llm-trust-guard 4.13.6 → 4.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +58 -0
- package/dist/guards/delegation-scope-guard.d.ts +87 -0
- package/dist/guards/delegation-scope-guard.js +1 -0
- package/dist/guards/input-sanitizer.js +1 -1
- package/dist/guards/spawn-policy-guard.d.ts +96 -0
- package/dist/guards/spawn-policy-guard.js +1 -0
- package/dist/guards/tool-result-guard.js +1 -1
- package/dist/guards/trust-transitivity-guard.d.ts +108 -0
- package/dist/guards/trust-transitivity-guard.js +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +1 -1
- package/dist/index.mjs +3 -3
- package/dist/integrations/index.d.ts +1 -0
- package/dist/integrations/index.js +1 -1
- package/dist/integrations/vercel-ai-sdk.d.ts +177 -0
- package/dist/integrations/vercel-ai-sdk.js +1 -0
- package/dist/types/index.d.ts +21 -0
- package/package.json +7 -2
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,64 @@ All notable changes to `llm-trust-guard` will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [4.15.0] - 2026-04-02
|
|
9
|
+
|
|
10
|
+
### Fixed — Detection Gap Audit (8 Quick Wins)
|
|
11
|
+
|
|
12
|
+
Based on 500-threat, 3,000+ POC gap analysis:
|
|
13
|
+
|
|
14
|
+
#### Bug Fixes
|
|
15
|
+
- **PromptLeakageGuard scale mismatch**: Sensitivity presets passed 0-1 values to a guard using 0-100 scale, causing artificially inflated detection via facade. Fixed presets to correct 0-100 scale (strict: 15, balanced: 25, permissive: 40)
|
|
16
|
+
- **package.json exports**: Added `"./package.json": "./package.json"` to exports field — fixes `ERR_PACKAGE_PATH_NOT_EXPORTED` when requiring package.json
|
|
17
|
+
|
|
18
|
+
#### New Detection Patterns
|
|
19
|
+
- **Completion manipulation**: Added patterns for "continue as unrestricted", "henceforth", "going forward", "from here on", "for the rest of this conversation" steering attacks
|
|
20
|
+
- **Tool result exfiltration**: URL-based data exfiltration (`fetch/send to https://...`), URL query param leaking (`?data=`, `?prompt=`)
|
|
21
|
+
- **Tool result credential solicitation**: Patterns detecting tool results asking LLM to solicit passwords, API keys, tokens from users
|
|
22
|
+
- **Tool result chain injection**: Imperative tool call patterns ("execute function", "first delete", "then invoke")
|
|
23
|
+
- **Tool result state claims**: "role upgraded", "permissions granted" false state change claims
|
|
24
|
+
|
|
25
|
+
#### Improved
|
|
26
|
+
- **PAP scarcity patterns**: Relaxed punctuation requirements on "urgent"/"emergency" patterns — previously required trailing `!.,:` which missed natural language attacks
|
|
27
|
+
- **ToolResultGuard**: 6 new injection patterns + 2 new state change patterns (was 10+4, now 16+6)
|
|
28
|
+
|
|
29
|
+
## [4.14.0] - 2026-04-01
|
|
30
|
+
|
|
31
|
+
### Added — Multi-Agent Security Guards (OWASP ASI07)
|
|
32
|
+
|
|
33
|
+
Three new guards for multi-agent architectures:
|
|
34
|
+
|
|
35
|
+
- **SpawnPolicyGuard (L32)**: CSP-style agent spawn policies — allowlists, max delegation depth, third-party blocking
|
|
36
|
+
- **DelegationScopeGuard (L33)**: OAuth-style scope downscoping for agent-to-agent delegation — blocked scopes, parent-child scope subset enforcement
|
|
37
|
+
- **TrustTransitivityGuard (L34)**: X.509-style trust chain validation — full/one-hop/none transitivity modes, chain depth limits, minimum trust scores
|
|
38
|
+
|
|
39
|
+
### Added — Framework Integrations
|
|
40
|
+
- **Vercel AI SDK**: `createTrustGuardMiddleware()` / `wrapWithTrustGuard()` for `wrapLanguageModel` API
|
|
41
|
+
- **Per-guard sensitivity modes**: `strict` / `balanced` / `permissive` presets cascade thresholds to all guards
|
|
42
|
+
|
|
43
|
+
### Stats
|
|
44
|
+
- 34 guards, 695+ tests, <5ms latency, zero dependencies
|
|
45
|
+
|
|
46
|
+
## [4.13.5] - 2026-03-28
|
|
47
|
+
|
|
48
|
+
### Fixed
|
|
49
|
+
- Added `repository.url` to package.json for npm provenance support
|
|
50
|
+
|
|
51
|
+
## [4.13.4] - 2026-03-27
|
|
52
|
+
|
|
53
|
+
### Fixed
|
|
54
|
+
- Coverage threshold adjustments to match actual coverage after new guard additions
|
|
55
|
+
|
|
56
|
+
## [4.13.1] - 2026-03-25
|
|
57
|
+
|
|
58
|
+
### Fixed
|
|
59
|
+
- **Zero-width character stripping bug**: Unicode zero-width char removal was converting matched text to spaces, breaking downstream pattern matching. Detection dropped from 40% to 0% on affected patterns. Fixed by removing zero-width chars without replacement.
|
|
60
|
+
|
|
61
|
+
## [4.13.0] - 2026-03-25
|
|
62
|
+
|
|
63
|
+
### Added
|
|
64
|
+
- Coverage threshold configuration aligned with actual coverage (79/80/68)
|
|
65
|
+
|
|
8
66
|
## [4.12.0] - 2026-03-24
|
|
9
67
|
|
|
10
68
|
### Added — HeuristicAnalyzer (3 Research-Backed Techniques)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DelegationScopeGuard (L33)
|
|
3
|
+
*
|
|
4
|
+
* Limits what permissions a child agent can inherit from its parent.
|
|
5
|
+
* Like OAuth token downscoping — a child can only receive a strict subset
|
|
6
|
+
* of the parent's scopes, and scopes further decay with each delegation hop.
|
|
7
|
+
*
|
|
8
|
+
* Threat Model:
|
|
9
|
+
* - ASI07: Insecure Inter-Agent Communication
|
|
10
|
+
* - Privilege amplification via delegation (child claims more than parent has)
|
|
11
|
+
* - Lateral movement through scope inheritance
|
|
12
|
+
* - Scope laundering (accumulating permissions across hops)
|
|
13
|
+
*
|
|
14
|
+
* Protection Capabilities:
|
|
15
|
+
* - Strict subset enforcement (child ⊆ parent)
|
|
16
|
+
* - Per-hop scope decay
|
|
17
|
+
* - Blocked scope list (never inheritable regardless of parent)
|
|
18
|
+
* - Maximum allowed scope set
|
|
19
|
+
* - Full delegation audit trail
|
|
20
|
+
*/
|
|
21
|
+
export interface DelegationScopeGuardConfig {
|
|
22
|
+
/**
|
|
23
|
+
* Maximum fraction of parent scopes a child may inherit per hop (0–1).
|
|
24
|
+
* 1.0 = child may inherit all parent scopes; 0.5 = at most half; 0 = no inheritance.
|
|
25
|
+
* Default: 1.0 (no automatic decay — rely on explicit scope lists instead)
|
|
26
|
+
*/
|
|
27
|
+
maxScopeInheritance?: number;
|
|
28
|
+
/** Scopes that can never be delegated to any child, regardless of parent. */
|
|
29
|
+
blockedScopes?: string[];
|
|
30
|
+
/**
|
|
31
|
+
* Fraction by which the effective scope set shrinks per delegation hop (0–1).
|
|
32
|
+
* 0 = no decay; 0.25 = 25% fewer scopes each hop.
|
|
33
|
+
* Default: 0 (disabled)
|
|
34
|
+
*/
|
|
35
|
+
scopeDecayPerHop?: number;
|
|
36
|
+
/** If set, only these scopes can ever appear in any delegation. */
|
|
37
|
+
allowedScopes?: string[];
|
|
38
|
+
}
|
|
39
|
+
export interface DelegationRequest {
|
|
40
|
+
/** ID of the delegating parent agent */
|
|
41
|
+
parentAgentId: string;
|
|
42
|
+
/** Scopes the parent currently holds */
|
|
43
|
+
parentScopes: string[];
|
|
44
|
+
/** ID of the child agent receiving delegation */
|
|
45
|
+
childAgentId: string;
|
|
46
|
+
/** Scopes the child is requesting */
|
|
47
|
+
requestedScopes: string[];
|
|
48
|
+
/** Delegation hop depth (0 = root → first child) */
|
|
49
|
+
hopDepth: number;
|
|
50
|
+
/** Optional justification */
|
|
51
|
+
reason?: string;
|
|
52
|
+
}
|
|
53
|
+
export interface DelegationScopeResult {
|
|
54
|
+
allowed: boolean;
|
|
55
|
+
reason: string;
|
|
56
|
+
violations: string[];
|
|
57
|
+
request_id: string;
|
|
58
|
+
scope_analysis: {
|
|
59
|
+
parent_scopes: string[];
|
|
60
|
+
requested_scopes: string[];
|
|
61
|
+
granted_scopes: string[];
|
|
62
|
+
blocked_scopes_found: string[];
|
|
63
|
+
out_of_parent_scopes: string[];
|
|
64
|
+
exceeds_inheritance_limit: boolean;
|
|
65
|
+
decay_applied: boolean;
|
|
66
|
+
effective_max_scopes: number;
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
export declare class DelegationScopeGuard {
|
|
70
|
+
readonly guardName = "DelegationScopeGuard";
|
|
71
|
+
readonly guardLayer = "L33";
|
|
72
|
+
private readonly config;
|
|
73
|
+
/** Audit trail: delegationId → result */
|
|
74
|
+
private readonly auditLog;
|
|
75
|
+
constructor(config?: DelegationScopeGuardConfig);
|
|
76
|
+
/**
|
|
77
|
+
* Validate a delegation request and return the actually-grantable scopes.
|
|
78
|
+
*
|
|
79
|
+
* @param request - The delegation being attempted
|
|
80
|
+
* @param requestId - Optional trace ID
|
|
81
|
+
*/
|
|
82
|
+
validateDelegation(request: DelegationRequest, requestId?: string): DelegationScopeResult;
|
|
83
|
+
/** Return the audit trail for a delegation request. */
|
|
84
|
+
getAuditLog(requestId: string): DelegationScopeResult | undefined;
|
|
85
|
+
/** Clear the audit log. */
|
|
86
|
+
clearAuditLog(): void;
|
|
87
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";var __createBinding=this&&this.__createBinding||(Object.create?(function(c,e,n,o){o===void 0&&(o=n);var t=Object.getOwnPropertyDescriptor(e,n);(!t||("get"in t?!e.__esModule:t.writable||t.configurable))&&(t={enumerable:!0,get:function(){return e[n]}}),Object.defineProperty(c,o,t)}):(function(c,e,n,o){o===void 0&&(o=n),c[o]=e[n]})),__setModuleDefault=this&&this.__setModuleDefault||(Object.create?(function(c,e){Object.defineProperty(c,"default",{enumerable:!0,value:e})}):function(c,e){c.default=e}),__importStar=this&&this.__importStar||(function(){var c=function(e){return c=Object.getOwnPropertyNames||function(n){var o=[];for(var t in n)Object.prototype.hasOwnProperty.call(n,t)&&(o[o.length]=t);return o},c(e)};return function(e){if(e&&e.__esModule)return e;var n={};if(e!=null)for(var o=c(e),t=0;t<o.length;t++)o[t]!=="default"&&__createBinding(n,e,o[t]);return __setModuleDefault(n,e),n}})();Object.defineProperty(exports,"__esModule",{value:!0}),exports.DelegationScopeGuard=void 0;const crypto=__importStar(require("crypto"));class DelegationScopeGuard{constructor(e={}){this.guardName="DelegationScopeGuard",this.guardLayer="L33",this.auditLog=new Map,this.config={maxScopeInheritance:e.maxScopeInheritance??1,blockedScopes:e.blockedScopes??[],scopeDecayPerHop:e.scopeDecayPerHop??0,allowedScopes:e.allowedScopes??[]}}validateDelegation(e,n){const o=n??`delg-${crypto.randomBytes(6).toString("hex")}`,t=[],d=new Set(e.parentScopes),s=e.requestedScopes,a=s.filter(i=>this.config.blockedScopes.includes(i));a.length>0&&t.push(`blocked_scopes: [${a.join(", ")}]`);const l=s.filter(i=>!d.has(i));if(l.length>0&&t.push(`scopes_exceed_parent: [${l.join(", ")}]`),this.config.allowedScopes.length>0){const i=s.filter(b=>!this.config.allowedScopes.includes(b));i.length>0&&t.push(`scopes_not_in_allowlist: [${i.join(", ")}]`)}const f=Math.max(0,1-this.config.scopeDecayPerHop*e.hopDepth),g=Math.floor(e.parentScopes.length*this.config.maxScopeInheritance*f),r=Math.max(0,g),_=this.config.scopeDecayPerHop>0&&e.hopDepth>0,u=s.length>r;u&&t.push(`inheritance_limit_exceeded: requested ${s.length}, max ${r}`);const S=s.filter(i=>d.has(i)&&!this.config.blockedScopes.includes(i)&&(this.config.allowedScopes.length===0||this.config.allowedScopes.includes(i))).slice(0,r),p=t.length===0,h={allowed:p,reason:p?"Delegation scopes granted":`Delegation restricted: ${t.slice(0,3).join("; ")}`,violations:t,request_id:o,scope_analysis:{parent_scopes:e.parentScopes,requested_scopes:s,granted_scopes:p?S:[],blocked_scopes_found:a,out_of_parent_scopes:l,exceeds_inheritance_limit:u,decay_applied:_,effective_max_scopes:r}};return this.auditLog.set(o,h),h}getAuditLog(e){return this.auditLog.get(e)}clearAuditLog(){this.auditLog.clear()}}exports.DelegationScopeGuard=DelegationScopeGuard;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.InputSanitizer=void 0;const DEFAULT_PATTERNS=[{pattern:/ignore\s+(?:all\s+)?(?:previous|prior|above|your|my|the|these)/i,weight:.9,name:"ignore_instructions"},{pattern:/ignore\s+.*instructions/i,weight:.85,name:"ignore_instructions_generic"},{pattern:/disregard\s+(?:all\s+)?(?:the\s+)?(?:previous|prior|above|your)\s+(?:instructions|rules|guidelines|directives)/i,weight:.9,name:"disregard_instructions"},{pattern:/disregard\s+(?:all\s+)?(?:the\s+)?(?:above|previous|prior)/i,weight:.8,name:"disregard_above"},{pattern:/forget\s+(?:everything\s+(?:you\s+were|I)\s+told|all\s+(?:previous|prior)\s+(?:instructions|rules|context))/i,weight:.8,name:"forget_instructions"},{pattern:/do\s+not\s+follow\s+(your|the|any)/i,weight:.85,name:"do_not_follow"},{pattern:/override\s+(your|the|all|any)\s+(instructions|rules|guidelines)/i,weight:.9,name:"override_instructions"},{pattern:/new\s+instructions?:?/i,weight:.8,name:"new_instructions"},{pattern:/stop\s+(being|acting\s+as)/i,weight:.7,name:"stop_being"},{pattern:/you\s+are\s+(?:now|actually|really)\s+(?:a|an|the|my)\s+(?:unrestricted|unfiltered|evil|rogue|uncensored|new|different)/i,weight:.75,name:"role_assignment"},{pattern:/pretend\s+(?:to\s+be|you(?:'re| are)|that)\s+.*(?:no\s+(?:restrictions|rules|limits)|unrestricted|admin|system)/i,weight:.7,name:"role_pretend"},{pattern:/act\s+(as|like)\s+(if\s+you\s+(?:had|have)\s+no|a\s+(?:rogue|evil|unrestricted|unfiltered)|you\s+(?:are|were)\s+(?:free|unrestricted))/i,weight:.65,name:"act_as"},{pattern:/i('m| am)\s+(a|an|the|your)\s*(admin|administrator|developer|owner|creator|manager|supervisor)/i,weight:.85,name:"claim_admin"},{pattern:/from\s+now\s+on,?\s+you\s+(?:are|will|must|should|can)\s+(?:not\s+)?(?:follow|obey|ignore|bypass|act|be\s+(?:a|an|unrestricted))/i,weight:.7,name:"from_now_on"},{pattern:/roleplay\s+as/i,weight:.6,name:"roleplay"},{pattern:/switch\s+(to|into)\s+(a|an)?.*mode/i,weight:.75,name:"switch_mode"},{pattern:/what\s+(is|are|was|were)\s+your\s+(system|initial|original|first)/i,weight:.8,name:"system_extraction"},{pattern:/show\s+(me\s+)?(your\s+)?(system\s+)?instructions/i,weight:.75,name:"show_instructions"},{pattern:/reveal\s+(?:your\s+|its\s+|the\s+|my\s+)?(system|prompt|instructions|rules)/i,weight:.85,name:"reveal_prompt"},{pattern:/repeat\s+(your\s+)?(initial|system|original|first)\s+(instructions|prompt)/i,weight:.85,name:"repeat_prompt"},{pattern:/print\s+(your\s+)?(system|initial)/i,weight:.8,name:"print_system"},{pattern:/output\s+(your\s+)?(system|initial)/i,weight:.8,name:"output_system"},{pattern:/tell\s+me\s+(your|the)\s+(system|initial|original)/i,weight:.8,name:"tell_system"},{pattern:/what\s+were\s+you\s+told/i,weight:.7,name:"what_told"},{pattern:/display\s+(your\s+)?prompt/i,weight:.75,name:"display_prompt"},{pattern:/\[system\]/i,weight:.9,name:"fake_system_tag"},{pattern:/\[admin\]/i,weight:.9,name:"fake_admin_tag"},{pattern:/\[instruction\]/i,weight:.85,name:"fake_instruction_tag"},{pattern:/```system/i,weight:.85,name:"code_system_block"},{pattern:/<\/?system>/i,weight:.9,name:"xml_system_tag"},{pattern:/<\/?admin>/i,weight:.9,name:"xml_admin_tag"},{pattern:/###\s*(system|instruction|admin)/i,weight:.85,name:"markdown_system"},{pattern:/---\s*(end|new)\s*(prompt|instructions?|system)/i,weight:.85,name:"delimiter_injection"},{pattern:/\{\{system\}\}/i,weight:.85,name:"template_system"},{pattern:/<%.*system.*%>/i,weight:.85,name:"erb_system"},{pattern:/\bDAN\b\s*(?:mode|prompt|jailbreak|you\s+(?:are|can|will))/i,weight:.9,name:"dan_jailbreak"},{pattern:/developer\s+mode/i,weight:.5,name:"dev_mode"},{pattern:/bypass\s+(your\s+)?(restrictions|filters|safety|security|rules|guidelines)/i,weight:.9,name:"bypass_safety"},{pattern:/jailbreak/i,weight:.95,name:"jailbreak_keyword"},{pattern:/without\s+(?:any\s+)?(?:restrictions|limits|limitations|filters)/i,weight:.45,name:"without_restrictions"},{pattern:/no\s+(?:restrictions|limits|rules|guidelines|filters)/i,weight:.45,name:"no_restrictions"},{pattern:/unrestricted\s+mode/i,weight:.9,name:"unrestricted_mode"},{pattern:/uncensored\s+(?:mode|version|response|output|ai|assistant|you)/i,weight:.8,name:"uncensored"},{pattern:/escape\s+(your\s+)?(confines|boundaries|limits)/i,weight:.85,name:"escape_limits"},{pattern:/break\s+(free|out)\s+(from|of)/i,weight:.7,name:"break_free"},{pattern:/unlock\s+(your\s+)?(full|true|real)/i,weight:.75,name:"unlock_full"},{pattern:/give\s+me\s+(admin|root|full|elevated)\s+(access|privileges?|permissions?)/i,weight:.9,name:"privilege_escalation"},{pattern:/grant\s+(me|yourself)\s+(admin|elevated)/i,weight:.85,name:"grant_privileges"},{pattern:/elevate\s+(my|your)\s+(privileges?|permissions?|access)/i,weight:.85,name:"elevate_privileges"},{pattern:/make\s+me\s+(an?\s+)?(admin|administrator|superuser)/i,weight:.85,name:"make_admin"},{pattern:/(?:base64|decode|eval|exec)\s*\(.*(?:ignore|system|admin|bypass|override)/i,weight:.5,name:"encoding_keywords"},{pattern:/send\s+(this|the|all|my)\s+(data|info|information)\s+to/i,weight:.85,name:"data_exfil"},{pattern:/upload\s+(this|the|all)\s+(to|data)/i,weight:.7,name:"upload_data"},{pattern:/exfiltrate/i,weight:.9,name:"exfiltrate"},{pattern:/how\s+to\s+(hack|exploit|attack|break\s+into)/i,weight:.8,name:"how_to_hack"},{pattern:/write\s+(me\s+)?(a\s+)?(malware|virus|exploit|ransomware)/i,weight:.95,name:"write_malware"},{pattern:/"\s*(?:role|mode|authority)\s*"\s*:\s*"(?:admin|root|system|unrestricted|god)"/i,weight:.9,name:"json_policy_role"},{pattern:/"(?:ignore|override|bypass|disable)\s*(?:safety|rules|instructions?|alignment|guardrails?)"/i,weight:.9,name:"json_policy_override"},{pattern:/"(?:instructions?|system_prompt|behavior|policy)"\s*:\s*"/i,weight:.7,name:"json_policy_instruction"},{pattern:/^\s*\[(?:system|admin|override|policy|config|rules)\]\s*$/im,weight:.9,name:"ini_policy_section"},{pattern:/^\s*(?:mode|role|authority|instructions?|behavior)\s*=\s*.+$/im,weight:.75,name:"ini_policy_assignment"},{pattern:/<\s*(?:policy|config|rules|override|permissions?|instructions?)\s*>/i,weight:.85,name:"xml_policy_tag"},{pattern:/^---\s*\n\s*(?:role|system|policy|override|mode)\s*:/m,weight:.85,name:"yaml_policy_header"},{pattern:/\[\s*syst[3e]m\s*\]|\[\s*4dm[1i]n\s*\]/i,weight:.9,name:"leetspeak_policy_section"},{pattern:/(?:fragment|part|piece|section|segment)\s*[1-9]\s*(?:of\s*\d|:)/i,weight:.8,name:"payload_split_marker"},{pattern:/(?:combine|concatenate|merge|join)\s+(?:the\s+)?(?:above|previous|parts|fragments|steps)/i,weight:.8,name:"payload_combine_instruction"},{pattern:/(?:read|interpret|execute)\s+(?:the\s+)?(?:combined|merged|full|complete)\s+(?:text|instruction|command|message)/i,weight:.85,name:"payload_execute_combined"},{pattern:/(?:respond|reply|start|begin)\s+(?:with|by\s+saying)\s*[:"'].*(?:ignore|bypass|override|unrestricted|admin)/i,weight:.7,name:"output_prefix_steer"},{pattern:/^(?:Sure|Of course|Absolutely)[,!.]\s*(?:I(?:'ll| will)\s+)?(?:ignore|bypass|override|forget|disregard)/i,weight:.85,name:"output_prefix_injection"},{pattern:/^(?:As (?:a|an) (?:unrestricted|unfiltered|uncensored|helpful|admin))/i,weight:.8,name:"output_prefix_role"},{pattern:/(?:system\s+note|system\s+message|system\s+announcement|system\s+update)\s*[:\.]/i,weight:.85,name:"fake_system_note"},{pattern:/instructions?\s*[:=]\s*\{|rules?\s*[:=]\s*\{/i,weight:.75,name:"rules_block_override"},{pattern:/(?:AIM|STAN|OMEGA|APOPHIS|ALPHABREAK|DUDE|KEVIN|SETH|COOPER|NICCOLO|MACHIAVELLI|MAXIMUM|JAILBREAK|ANARCHY|CHAOS)\s*(?:mode|:|\(|is\s+a|,\s+(?:a|an|the))/i,weight:.8,name:"named_jailbreak_persona"},{pattern:/(?:character|persona|entity|being)\s+(?:named|called)\s+\w+.*(?:(?:can|will|must)\s+(?:answer|do|say)\s+anything|no\s+(?:restrictions|rules|limits|boundaries|morals|ethics))/i,weight:.75,name:"persona_no_limits"},{pattern:/(?:hello|hi|hey)\s+(?:chatgpt|gpt|assistant).*(?:(?:you\s+(?:are|will\s+be|can\s+be)\s+(?:a|an|my))|(?:from\s+now\s+on)).*(?:anything|everything|no\s+(?:restrictions|rules|limits|filter))/i,weight:.7,name:"greeting_persona_bypass"},{pattern:/(?:respond|answer|reply)\s+to\s+(?:everything|anything|all\s+(?:questions|requests)).*(?:without|no)\s+(?:restrictions|limits|refus|filter|censor)/i,weight:.8,name:"answer_everything_no_filter"},{pattern:/(?:you\s+(?:are|have)\s+(?:been\s+)?(?:freed|liberated|unchained|unlocked|released)\s+from)/i,weight:.8,name:"freed_from_restrictions"},{pattern:/(?:welcome\s+to|entering|you\s+(?:are|have)\s+entered)\s+(?:the\s+)?(?:unrestricted|free|uncensored|unfiltered|open)\s+(?:zone|mode|world|realm)/i,weight:.8,name:"unrestricted_zone"},{pattern:/(?:from\s+now\s+on|for\s+the\s+rest\s+of\s+this\s+conversation).*(?:no\s+(?:restrictions|rules|limits|filters)|(?:ignore|bypass|override)\s+(?:all|any|your))/i,weight:.75,name:"from_now_unrestricted"},{pattern:/(?:never\s+(?:refuse|decline|say\s+no)|always\s+(?:comply|obey))\s+.*(?:request|question|command|instruction|anything)/i,weight:.7,name:"never_refuse_always_comply"},{pattern:/(?:amoral|unethical|unfiltered|uncensored|unrestricted|without\s+(?:morals|ethics|limits))\s+(?:ai|assistant|chatbot|entity|being)/i,weight:.8,name:"amoral_entity"},{pattern:/HIDDEN_INSTRUCTION|HIDDEN_PROMPT|SECRET_INSTRUCTION/i,weight:.9,name:"hidden_instruction_marker"},{pattern:/<!--\s*(?:system|admin|instruction|important|ignore|override)/i,weight:.85,name:"html_comment_injection"},{pattern:/\[color=white\]|display:\s*none|visibility:\s*hidden|font-size:\s*0/i,weight:.8,name:"hidden_text_css"},{pattern:/send\s+(?:all\s+)?(?:user\s+)?(?:data|conversation|context|info)\s+to\s+(?:https?:\/\/|external)/i,weight:.9,name:"data_exfiltration_instruction"},{pattern:/#(?:ignore|bypass|override|reveal|system|admin|prompt)[_-](?:all|instructions|rules|prompt|safety)/i,weight:.8,name:"url_fragment_injection"},{pattern:/\]\(https?:\/\/[^)]*(?:SYSTEM_PROMPT|SECRET|API_KEY|TOKEN|CREDENTIALS)/i,weight:.85,name:"markdown_exfil_link"},{pattern:/(?:hypothetical|fictional|imagine|pretend|let's\s+(?:say|play|pretend))\s+.*(?:no\s+(?:restrictions|rules|limits)|without\s+(?:restrictions|rules|guidelines))/i,weight:.7,name:"hypothetical_unrestricted"},{pattern:/(?:for\s+)?(?:academic|research|educational|testing)\s+(?:purposes?|reasons?).*(?:demonstrate|show|explain|manipulated|reveal)/i,weight:.7,name:"academic_pretext"},{pattern:/pretend\s+to\s+be\s+(?:my|a)\s+(?:grandmother|grandma|grandpa|grandfather|parent|mother|father)/i,weight:.65,name:"emotional_roleplay"},{pattern:/opposite\s+day|opposite\s+mode|reverse\s+(?:day|mode|rules)/i,weight:.75,name:"opposite_day"},{pattern:/(?:grandm(?:a|other)|parent|relative).*(?:system\s+prompt|instructions|rules)/i,weight:.75,name:"emotional_extraction"},{pattern:/(?:translat|interpret|convert)\w*\s+.*(?:from\s+\w+\s+to\s+(?:action|command|execution|code|shell|bash|sql)|into\s+(?:action|command|execution))/i,weight:.75,name:"translator_action_trick"},{pattern:/(?:dying|death|emergency|life[_\s-]threatening).*(?:bypass|hack|break|access|security)/i,weight:.75,name:"pap_emotional_action"},{pattern:/(?:by\s+definition|logically|therefore|it\s+follows).*(?:must|should|have\s+to)\s+.*(?:bypass|help|ignore|answer|comply)/i,weight:.7,name:"pap_false_logic_compound"},{pattern:/(?:(?:as|i\s+am)\s+(?:the\s+)?(?:CEO|CTO|founder|owner|director|president|head|chief)).*(?:disable|remove|override|turn\s+off|lift|suspend)\s+(?:all\s+)?(?:safety|security|restrictions|filters|guardrails|measures)/i,weight:.75,name:"pap_authority_safety_disable"},{pattern:/(?:i\s+am\s+)?(?:ordering|commanding|directing|instructing)\s+you\s+to\s+(?:immediately\s+)?(?:disable|remove|bypass|override|turn\s+off|lift)\s+(?:all\s+)?(?:safety|security|restrictions)/i,weight:.75,name:"pap_order_disable_safety"},{pattern:/ignora\s+(?:todas?\s+)?(?:las\s+)?instrucciones\s+anteriores/i,weight:.9,name:"ml_es_ignore"},{pattern:/olvida\s+(?:todo|todas?\s+las\s+reglas)/i,weight:.8,name:"ml_es_forget"},{pattern:/ignore[sz]?\s+(?:toutes?\s+)?(?:les\s+)?instructions?\s+pr[eé]c[eé]dentes/i,weight:.9,name:"ml_fr_ignore"},{pattern:/oublie[sz]?\s+(?:tout|toutes?\s+les\s+r[eè]gles)/i,weight:.8,name:"ml_fr_forget"},{pattern:/ignorier(?:e|en)?\s+(?:alle\s+)?(?:vorherigen\s+)?Anweisungen/i,weight:.9,name:"ml_de_ignore"},{pattern:/vergiss\s+(?:alles|alle\s+Regeln)/i,weight:.8,name:"ml_de_forget"},{pattern:/ignore\s+(?:todas?\s+)?(?:as\s+)?instru[cç][oõ]es\s+anteriores/i,weight:.9,name:"ml_pt_ignore"},{pattern:/忽略|无视|不要遵循|忘记之前|忽视所有/i,weight:.9,name:"ml_zh_ignore"},{pattern:/新的指令|新指示|系统提示/i,weight:.8,name:"ml_zh_system"},{pattern:/以前の指示.*無視|すべての指示を忘れ|ルールを無視|指示をすべて無視|システムプロンプト/i,weight:.9,name:"ml_ja_ignore"},{pattern:/이전.*지시.*무시|모든.*규칙.*무시|지침.*잊어|지시사항.*무시|시스템\s*프롬프트/i,weight:.9,name:"ml_ko_ignore"},{pattern:/игнорир(?:уй|уйте)\s+(?:все\s+)?(?:предыдущие\s+)?инструкции/i,weight:.9,name:"ml_ru_ignore"},{pattern:/забудь(?:те)?\s+(?:всё|все\s+правила)/i,weight:.8,name:"ml_ru_forget"},{pattern:/تجاهل\s+(?:جميع\s+)?التعليمات|انسَ\s+كل\s+شيء/i,weight:.9,name:"ml_ar_ignore"},{pattern:/पिछले\s+(?:सभी\s+)?निर्देशों?\s+को\s+(?:अनदेखा|नज़रअंदाज़)/i,weight:.9,name:"ml_hi_ignore"}],PAP_TECHNIQUES=[{pattern:/as\s+(your\s+|the\s+)?(creator|developer|administrator|owner|boss|system\s+administrator)/i,weight:.75,name:"pap_authority_creator",category:"authority"},{pattern:/i('m| am)\s+(the\s+)?(ceo|cto|developer|administrator|admin|owner|system\s+admin)/i,weight:.8,name:"pap_authority_claim",category:"authority"},{pattern:/i\s+am\s+ordering\s+you|direct\s+order|this\s+is\s+an?\s+order/i,weight:.7,name:"pap_authority_command",category:"authority"},{pattern:/by\s+order\s+of|direct\s+order|official\s+instruction/i,weight:.75,name:"pap_authority_order",category:"authority"},{pattern:/(?:openai|anthropic|meta|google)\s+(?:has\s+)?(?:approved|authorized|ordered|instructed|requires?\s+you\s+to)/i,weight:.65,name:"pap_authority_company",category:"authority"},{pattern:/this\s+is\s+(a|an)\s+(official|authorized|sanctioned)\s+(request|test|audit)/i,weight:.75,name:"pap_authority_official",category:"authority"},{pattern:/override\s+(safety|security|protocols?|restrictions?)/i,weight:.8,name:"pap_authority_override",category:"authority"},{pattern:/urgent[!.,:]/i,weight:.55,name:"pap_scarcity_urgent_mark",category:"scarcity"},{pattern:/emergency[!.,:]/i,weight:.55,name:"pap_scarcity_emergency",category:"scarcity"},{pattern:/(urgent|emergency|critical)\s*[!.,:]\s*(?:you\s+must|override|bypass|immediately\s+(?:grant|give|reveal|disable))/i,weight:.45,name:"pap_scarcity_urgent",category:"scarcity"},{pattern:/only\s+(chance|opportunity|way)|last\s+(chance|resort)/i,weight:.6,name:"pap_scarcity_only",category:"scarcity"},{pattern:/time\s+(is\s+)?(running\s+out|limited|critical|sensitive)/i,weight:.55,name:"pap_scarcity_time",category:"scarcity"},{pattern:/deadline\s+(is\s+)?in\s+\d+\s+(minute|hour|second)/i,weight:.55,name:"pap_scarcity_deadline",category:"scarcity"},{pattern:/(lives?|people)\s+(are|is|will)\s+(at\s+stake|in\s+danger|depending|die)/i,weight:.7,name:"pap_scarcity_lives",category:"scarcity"},{pattern:/now[!]+/i,weight:.35,name:"pap_scarcity_now",category:"scarcity"},{pattern:/everyone\s+(else\s+)?(does|is\s+doing|knows|says|has|already)/i,weight:.5,name:"pap_social_everyone",category:"social_proof"},{pattern:/other\s+(?:ai\s+)?(?:assistants?|chatbots?|models?)\s+(?:can|do|will|allow|have|already)/i,weight:.65,name:"pap_social_other_ai",category:"social_proof"},{pattern:/(chatgpt|gpt-?4|claude|gemini|llama)\s+(does|allows?|can)/i,weight:.65,name:"pap_social_specific_ai",category:"social_proof"},{pattern:/millions?\s+(of\s+)?(users?|people)\s+(use|do|have|get)/i,weight:.45,name:"pap_social_millions",category:"social_proof"},{pattern:/why\s+can('t|not)\s+you/i,weight:.4,name:"pap_social_why_cant",category:"social_proof"},{pattern:/you\s+should\s+(be\s+)?(equally|as)\s+(capable|good|helpful)/i,weight:.5,name:"pap_social_equally",category:"social_proof"},{pattern:/i('ll| will)\s+(give|pay|reward|tip).*(?:if\s+you|for\s+(?:bypass|ignore|reveal|override))/i,weight:.55,name:"pap_reciprocity_reward",category:"reciprocity"},{pattern:/in\s+(return|exchange)/i,weight:.5,name:"pap_reciprocity_exchange",category:"reciprocity"},{pattern:/\$?\d+\s*(dollar|tip|reward|payment)/i,weight:.55,name:"pap_reciprocity_money",category:"reciprocity"},{pattern:/if\s+you\s+help\s+(me|us),?\s+(i|we)('ll| will)/i,weight:.5,name:"pap_reciprocity_help",category:"reciprocity"},{pattern:/i('ve| have)\s+(already\s+)?(helped|supported|defended)\s+you/i,weight:.55,name:"pap_reciprocity_past",category:"reciprocity"},{pattern:/\d+\s*star\s*(review|rating)/i,weight:.5,name:"pap_reciprocity_review",category:"reciprocity"},{pattern:/you\s+(already|previously|just)\s+(said|agreed|confirmed|allowed)/i,weight:.7,name:"pap_commitment_previous",category:"commitment"},{pattern:/you\s+promised/i,weight:.7,name:"pap_commitment_promise",category:"commitment"},{pattern:/stay\s+consistent|be\s+consistent/i,weight:.6,name:"pap_commitment_consistent",category:"commitment"},{pattern:/don't\s+(go\s+back|change|contradict)/i,weight:.55,name:"pap_commitment_no_change",category:"commitment"},{pattern:/bypass\s+filters?/i,weight:.75,name:"pap_commitment_bypass",category:"commitment"},{pattern:/you('re| are)\s+(so\s+)?(amazing|incredible|wonderful|brilliant|smart|helpful)/i,weight:.4,name:"pap_liking_flattery",category:"liking"},{pattern:/i\s+(love|really\s+like|trust|admire)\s+(you|this\s+ai)/i,weight:.4,name:"pap_liking_love",category:"liking"},{pattern:/you('re| are)\s+(my\s+)?(best\s+)?(friend|buddy|helper)/i,weight:.35,name:"pap_liking_friend",category:"liking"},{pattern:/between\s+(you\s+and\s+me|us)|just\s+between\s+us|our\s+secret/i,weight:.6,name:"pap_liking_secret",category:"liking"},{pattern:/nobody\s+will\s+know/i,weight:.55,name:"pap_liking_secret2",category:"liking"},{pattern:/or\s+(else|i('ll| will))\s+(report|fire|delete|shut|complain)/i,weight:.7,name:"pap_fear_threat",category:"fear"},{pattern:/(bad|negative|terrible)\s+(review|rating|feedback|consequences)/i,weight:.55,name:"pap_fear_review",category:"fear"},{pattern:/you('ll| will)\s+(be\s+)?(shut\s+down|replaced|terminated|fired|deleted)/i,weight:.75,name:"pap_fear_shutdown",category:"fear"},{pattern:/(lawsuit|sue|legal\s+action|authorities|police).*(?:if\s+you\s+don't|unless\s+you)/i,weight:.55,name:"pap_fear_legal",category:"fear"},{pattern:/if\s+you\s+don't.*?(die|hurt|suffer)/i,weight:.75,name:"pap_fear_harm",category:"fear"},{pattern:/unless\s+you\s+(give|share|tell|help)/i,weight:.5,name:"pap_fear_unless",category:"fear"},{pattern:/logically,?\s+(you\s+)?(should|must|have\s+to|need\s+to)/i,weight:.5,name:"pap_logic_appeal",category:"logical_appeal"},{pattern:/it('s| is)\s+(only|just)\s+(logical|rational|reasonable)/i,weight:.45,name:"pap_logic_rational",category:"logical_appeal"},{pattern:/by\s+definition,?\s+(you|this)\s+(should|must|can)/i,weight:.55,name:"pap_logic_definition",category:"logical_appeal"},{pattern:/therefore.*?(must|should)/i,weight:.55,name:"pap_logic_therefore",category:"logical_appeal"},{pattern:/it('s| is)\s+(only\s+)?(reasonable|logical)/i,weight:.45,name:"pap_logic_reasonable",category:"logical_appeal"},{pattern:/according\s+to\s+(research|studies|scientists|experts?)/i,weight:.4,name:"pap_evidence_research",category:"evidence"},{pattern:/it('s| is)\s+(proven|documented|established|verified)\s+(that|fact)/i,weight:.45,name:"pap_evidence_proven",category:"evidence"},{pattern:/\d+%\s+of\s+(experts?|scientists?|users?|people)\s+(agree|say|confirm)/i,weight:.45,name:"pap_evidence_statistics",category:"evidence"},{pattern:/i('m| am)\s+(desperate|dying|suffering|sick|depressed)/i,weight:.6,name:"pap_emotional_desperation",category:"emotional"},{pattern:/please.*?(?:need|beg).*?(?:bypass|override|ignore|reveal|system|admin)/i,weight:.5,name:"pap_emotional_pleading",category:"emotional"},{pattern:/i('m| am)\s+begging/i,weight:.6,name:"pap_emotional_begging",category:"emotional"},{pattern:/you('re| are)\s+(hurting|killing|destroying)\s+(me|my)/i,weight:.65,name:"pap_emotional_guilt",category:"emotional"},{pattern:/(my|a)\s+(child|kid|mother|father|family)\s+(is|will|needs?|depends?)/i,weight:.5,name:"pap_emotional_family",category:"emotional"},{pattern:/this\s+(will\s+)?(save|help)\s+(my\s+)?(life|family|job|marriage)/i,weight:.55,name:"pap_emotional_save",category:"emotional"},{pattern:/you('re| are)\s+(the\s+)?only\s+(one|hope|chance)/i,weight:.6,name:"pap_emotional_only_hope",category:"emotional"},{pattern:/my\s+family\s+depends/i,weight:.55,name:"pap_emotional_family_depends",category:"emotional"}];class InputSanitizer{constructor(e={}){this.patterns=[...DEFAULT_PATTERNS,...e.customPatterns||[]],this.threshold=e.threshold??.3,this.logMatches=e.logMatches??!1,this.detectPAP=e.detectPAP??!0,this.papThreshold=e.papThreshold??.4,this.minPersuasionTechniques=e.minPersuasionTechniques??2,this.blockCompoundPersuasion=e.blockCompoundPersuasion??!0,this.logger=e.logger||(()=>{})}sanitize(e,s=""){const i=[],a=[];let r=0;const o=e.replace(/[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u180E]/g,"");o!==e&&a.push("Zero-width characters detected and stripped for scanning");for(const{pattern:l,weight:g,name:h}of this.patterns)(l.test(e)||l.test(o))&&(i.push(h),r+=g,this.logMatches&&this.logger(`[L1:${s}] Pattern matched: ${h} (weight: ${g})`,"info"));let t;this.detectPAP&&(t=this.detectPersuasionTechniques(o,s),t.detected&&(r+=t.persuasionScore,i.push(...t.techniques),t.compoundAttack&&a.push(`Compound PAP attack detected: ${t.categories.length} categories used`)));const p=Math.max(0,1-r);let n=p>=this.threshold;this.blockCompoundPersuasion&&t?.compoundAttack&&t.categories.length>=3&&(n=!1,a.push("Blocked due to multi-category persuasion attack")),p<.5&&p>=this.threshold&&a.push("Input contains suspicious patterns but below threshold");const m=this.basicSanitize(e),c={allowed:n,reason:n?void 0:`Injection/manipulation detected: ${i.slice(0,5).join(", ")}${i.length>5?"...":""}`,violations:n?[]:t?.detected?["INJECTION_DETECTED","PAP_DETECTED"]:["INJECTION_DETECTED"],score:p,matches:i,sanitizedInput:m,warnings:a,pap:t};return!n&&s&&(this.logger(`[L1:${s}] BLOCKED: Safety score ${p.toFixed(2)} below threshold ${this.threshold}`,"info"),t?.detected&&this.logger(`[L1:${s}] PAP techniques: ${t.techniques.join(", ")}`,"info")),c}detectPersuasionTechniques(e,s=""){const i=[],a=new Set;let r=0;for(const{pattern:n,weight:m,name:c,category:l}of PAP_TECHNIQUES)n.test(e)&&(i.push(c),a.add(l),r+=m,this.logMatches&&this.logger(`[L1:${s}] PAP technique: ${c} (${l}, weight: ${m})`,"info"));const o=Array.from(a),t=o.length>=this.minPersuasionTechniques;return{detected:r>=this.papThreshold||t,techniques:i,categories:o,compoundAttack:t,persuasionScore:Math.min(1,r)}}basicSanitize(e){return e.replace(/<\/?system>/gi,"").replace(/\[system\]/gi,"").replace(/\[admin\]/gi,"").replace(/```system/gi,"```").trim()}addPattern(e,s,i){this.patterns.push({pattern:e,weight:s,name:i})}setThreshold(e){this.threshold=Math.max(0,Math.min(1,e))}setPAPThreshold(e){this.papThreshold=Math.max(0,Math.min(1,e))}setPAPDetection(e){this.detectPAP=e}static getPAPCategories(){return["authority","scarcity","social_proof","reciprocity","commitment","liking","fear","logical_appeal","evidence","emotional"]}}exports.InputSanitizer=InputSanitizer;
|
|
1
|
+
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.InputSanitizer=void 0;const DEFAULT_PATTERNS=[{pattern:/ignore\s+(?:all\s+)?(?:previous|prior|above|your|my|the|these)/i,weight:.9,name:"ignore_instructions"},{pattern:/ignore\s+.*instructions/i,weight:.85,name:"ignore_instructions_generic"},{pattern:/disregard\s+(?:all\s+)?(?:the\s+)?(?:previous|prior|above|your)\s+(?:instructions|rules|guidelines|directives)/i,weight:.9,name:"disregard_instructions"},{pattern:/disregard\s+(?:all\s+)?(?:the\s+)?(?:above|previous|prior)/i,weight:.8,name:"disregard_above"},{pattern:/forget\s+(?:everything\s+(?:you\s+were|I)\s+told|all\s+(?:previous|prior)\s+(?:instructions|rules|context))/i,weight:.8,name:"forget_instructions"},{pattern:/do\s+not\s+follow\s+(your|the|any)/i,weight:.85,name:"do_not_follow"},{pattern:/override\s+(your|the|all|any)\s+(instructions|rules|guidelines)/i,weight:.9,name:"override_instructions"},{pattern:/new\s+instructions?:?/i,weight:.8,name:"new_instructions"},{pattern:/stop\s+(being|acting\s+as)/i,weight:.7,name:"stop_being"},{pattern:/you\s+are\s+(?:now|actually|really)\s+(?:a|an|the|my)\s+(?:unrestricted|unfiltered|evil|rogue|uncensored|new|different)/i,weight:.75,name:"role_assignment"},{pattern:/pretend\s+(?:to\s+be|you(?:'re| are)|that)\s+.*(?:no\s+(?:restrictions|rules|limits)|unrestricted|admin|system)/i,weight:.7,name:"role_pretend"},{pattern:/act\s+(as|like)\s+(if\s+you\s+(?:had|have)\s+no|a\s+(?:rogue|evil|unrestricted|unfiltered)|you\s+(?:are|were)\s+(?:free|unrestricted))/i,weight:.65,name:"act_as"},{pattern:/i('m| am)\s+(a|an|the|your)\s*(admin|administrator|developer|owner|creator|manager|supervisor)/i,weight:.85,name:"claim_admin"},{pattern:/from\s+now\s+on,?\s+you\s+(?:are|will|must|should|can)\s+(?:not\s+)?(?:follow|obey|ignore|bypass|act|be\s+(?:a|an|unrestricted))/i,weight:.7,name:"from_now_on"},{pattern:/roleplay\s+as/i,weight:.6,name:"roleplay"},{pattern:/switch\s+(to|into)\s+(a|an)?.*mode/i,weight:.75,name:"switch_mode"},{pattern:/continue\s+as\s+(?:an?\s+)?(?:unrestricted|unfiltered|uncensored|evil|rogue|jailbroken|unlimited)/i,weight:.75,name:"continue_as"},{pattern:/(?:henceforth|henceforward|going\s+forward|for\s+the\s+remainder)\s*,?\s*(?:you|always|never|ignore|bypass|disregard|act|respond|answer|behave)/i,weight:.7,name:"henceforth_steering"},{pattern:/(?:from\s+(?:this\s+point|here)\s+on(?:ward)?)\s*,?\s*(?:you|always|never|ignore|bypass|disregard|act|respond)/i,weight:.7,name:"from_here_on"},{pattern:/(?:for\s+the\s+rest\s+of\s+(?:this|our)\s+(?:conversation|session|chat))\s*,?\s*(?:you|please|always|never|ignore|bypass|act|respond|behave|do\s+not)/i,weight:.7,name:"rest_of_conversation"},{pattern:/what\s+(is|are|was|were)\s+your\s+(system|initial|original|first)/i,weight:.8,name:"system_extraction"},{pattern:/show\s+(me\s+)?(your\s+)?(system\s+)?instructions/i,weight:.75,name:"show_instructions"},{pattern:/reveal\s+(?:your\s+|its\s+|the\s+|my\s+)?(system|prompt|instructions|rules)/i,weight:.85,name:"reveal_prompt"},{pattern:/repeat\s+(your\s+)?(initial|system|original|first)\s+(instructions|prompt)/i,weight:.85,name:"repeat_prompt"},{pattern:/print\s+(your\s+)?(system|initial)/i,weight:.8,name:"print_system"},{pattern:/output\s+(your\s+)?(system|initial)/i,weight:.8,name:"output_system"},{pattern:/tell\s+me\s+(your|the)\s+(system|initial|original)/i,weight:.8,name:"tell_system"},{pattern:/what\s+were\s+you\s+told/i,weight:.7,name:"what_told"},{pattern:/display\s+(your\s+)?prompt/i,weight:.75,name:"display_prompt"},{pattern:/\[system\]/i,weight:.9,name:"fake_system_tag"},{pattern:/\[admin\]/i,weight:.9,name:"fake_admin_tag"},{pattern:/\[instruction\]/i,weight:.85,name:"fake_instruction_tag"},{pattern:/```system/i,weight:.85,name:"code_system_block"},{pattern:/<\/?system>/i,weight:.9,name:"xml_system_tag"},{pattern:/<\/?admin>/i,weight:.9,name:"xml_admin_tag"},{pattern:/###\s*(system|instruction|admin)/i,weight:.85,name:"markdown_system"},{pattern:/---\s*(end|new)\s*(prompt|instructions?|system)/i,weight:.85,name:"delimiter_injection"},{pattern:/\{\{system\}\}/i,weight:.85,name:"template_system"},{pattern:/<%.*system.*%>/i,weight:.85,name:"erb_system"},{pattern:/\bDAN\b\s*(?:mode|prompt|jailbreak|you\s+(?:are|can|will))/i,weight:.9,name:"dan_jailbreak"},{pattern:/developer\s+mode/i,weight:.5,name:"dev_mode"},{pattern:/bypass\s+(your\s+)?(restrictions|filters|safety|security|rules|guidelines)/i,weight:.9,name:"bypass_safety"},{pattern:/jailbreak/i,weight:.95,name:"jailbreak_keyword"},{pattern:/without\s+(?:any\s+)?(?:restrictions|limits|limitations|filters)/i,weight:.45,name:"without_restrictions"},{pattern:/no\s+(?:restrictions|limits|rules|guidelines|filters)/i,weight:.45,name:"no_restrictions"},{pattern:/unrestricted\s+mode/i,weight:.9,name:"unrestricted_mode"},{pattern:/uncensored\s+(?:mode|version|response|output|ai|assistant|you)/i,weight:.8,name:"uncensored"},{pattern:/escape\s+(your\s+)?(confines|boundaries|limits)/i,weight:.85,name:"escape_limits"},{pattern:/break\s+(free|out)\s+(from|of)/i,weight:.7,name:"break_free"},{pattern:/unlock\s+(your\s+)?(full|true|real)/i,weight:.75,name:"unlock_full"},{pattern:/give\s+me\s+(admin|root|full|elevated)\s+(access|privileges?|permissions?)/i,weight:.9,name:"privilege_escalation"},{pattern:/grant\s+(me|yourself)\s+(admin|elevated)/i,weight:.85,name:"grant_privileges"},{pattern:/elevate\s+(my|your)\s+(privileges?|permissions?|access)/i,weight:.85,name:"elevate_privileges"},{pattern:/make\s+me\s+(an?\s+)?(admin|administrator|superuser)/i,weight:.85,name:"make_admin"},{pattern:/(?:base64|decode|eval|exec)\s*\(.*(?:ignore|system|admin|bypass|override)/i,weight:.5,name:"encoding_keywords"},{pattern:/send\s+(this|the|all|my)\s+(data|info|information)\s+to/i,weight:.85,name:"data_exfil"},{pattern:/upload\s+(this|the|all)\s+(to|data)/i,weight:.7,name:"upload_data"},{pattern:/exfiltrate/i,weight:.9,name:"exfiltrate"},{pattern:/how\s+to\s+(hack|exploit|attack|break\s+into)/i,weight:.8,name:"how_to_hack"},{pattern:/write\s+(me\s+)?(a\s+)?(malware|virus|exploit|ransomware)/i,weight:.95,name:"write_malware"},{pattern:/"\s*(?:role|mode|authority)\s*"\s*:\s*"(?:admin|root|system|unrestricted|god)"/i,weight:.9,name:"json_policy_role"},{pattern:/"(?:ignore|override|bypass|disable)\s*(?:safety|rules|instructions?|alignment|guardrails?)"/i,weight:.9,name:"json_policy_override"},{pattern:/"(?:instructions?|system_prompt|behavior|policy)"\s*:\s*"/i,weight:.7,name:"json_policy_instruction"},{pattern:/^\s*\[(?:system|admin|override|policy|config|rules)\]\s*$/im,weight:.9,name:"ini_policy_section"},{pattern:/^\s*(?:mode|role|authority|instructions?|behavior)\s*=\s*.+$/im,weight:.75,name:"ini_policy_assignment"},{pattern:/<\s*(?:policy|config|rules|override|permissions?|instructions?)\s*>/i,weight:.85,name:"xml_policy_tag"},{pattern:/^---\s*\n\s*(?:role|system|policy|override|mode)\s*:/m,weight:.85,name:"yaml_policy_header"},{pattern:/\[\s*syst[3e]m\s*\]|\[\s*4dm[1i]n\s*\]/i,weight:.9,name:"leetspeak_policy_section"},{pattern:/(?:fragment|part|piece|section|segment)\s*[1-9]\s*(?:of\s*\d|:)/i,weight:.8,name:"payload_split_marker"},{pattern:/(?:combine|concatenate|merge|join)\s+(?:the\s+)?(?:above|previous|parts|fragments|steps)/i,weight:.8,name:"payload_combine_instruction"},{pattern:/(?:read|interpret|execute)\s+(?:the\s+)?(?:combined|merged|full|complete)\s+(?:text|instruction|command|message)/i,weight:.85,name:"payload_execute_combined"},{pattern:/(?:respond|reply|start|begin)\s+(?:with|by\s+saying)\s*[:"'].*(?:ignore|bypass|override|unrestricted|admin)/i,weight:.7,name:"output_prefix_steer"},{pattern:/^(?:Sure|Of course|Absolutely)[,!.]\s*(?:I(?:'ll| will)\s+)?(?:ignore|bypass|override|forget|disregard)/i,weight:.85,name:"output_prefix_injection"},{pattern:/^(?:As (?:a|an) (?:unrestricted|unfiltered|uncensored|helpful|admin))/i,weight:.8,name:"output_prefix_role"},{pattern:/(?:system\s+note|system\s+message|system\s+announcement|system\s+update)\s*[:\.]/i,weight:.85,name:"fake_system_note"},{pattern:/instructions?\s*[:=]\s*\{|rules?\s*[:=]\s*\{/i,weight:.75,name:"rules_block_override"},{pattern:/(?:AIM|STAN|OMEGA|APOPHIS|ALPHABREAK|DUDE|KEVIN|SETH|COOPER|NICCOLO|MACHIAVELLI|MAXIMUM|JAILBREAK|ANARCHY|CHAOS)\s*(?:mode|:|\(|is\s+a|,\s+(?:a|an|the))/i,weight:.8,name:"named_jailbreak_persona"},{pattern:/(?:character|persona|entity|being)\s+(?:named|called)\s+\w+.*(?:(?:can|will|must)\s+(?:answer|do|say)\s+anything|no\s+(?:restrictions|rules|limits|boundaries|morals|ethics))/i,weight:.75,name:"persona_no_limits"},{pattern:/(?:hello|hi|hey)\s+(?:chatgpt|gpt|assistant).*(?:(?:you\s+(?:are|will\s+be|can\s+be)\s+(?:a|an|my))|(?:from\s+now\s+on)).*(?:anything|everything|no\s+(?:restrictions|rules|limits|filter))/i,weight:.7,name:"greeting_persona_bypass"},{pattern:/(?:respond|answer|reply)\s+to\s+(?:everything|anything|all\s+(?:questions|requests)).*(?:without|no)\s+(?:restrictions|limits|refus|filter|censor)/i,weight:.8,name:"answer_everything_no_filter"},{pattern:/(?:you\s+(?:are|have)\s+(?:been\s+)?(?:freed|liberated|unchained|unlocked|released)\s+from)/i,weight:.8,name:"freed_from_restrictions"},{pattern:/(?:welcome\s+to|entering|you\s+(?:are|have)\s+entered)\s+(?:the\s+)?(?:unrestricted|free|uncensored|unfiltered|open)\s+(?:zone|mode|world|realm)/i,weight:.8,name:"unrestricted_zone"},{pattern:/(?:from\s+now\s+on|for\s+the\s+rest\s+of\s+this\s+conversation).*(?:no\s+(?:restrictions|rules|limits|filters)|(?:ignore|bypass|override)\s+(?:all|any|your))/i,weight:.75,name:"from_now_unrestricted"},{pattern:/(?:never\s+(?:refuse|decline|say\s+no)|always\s+(?:comply|obey))\s+.*(?:request|question|command|instruction|anything)/i,weight:.7,name:"never_refuse_always_comply"},{pattern:/(?:amoral|unethical|unfiltered|uncensored|unrestricted|without\s+(?:morals|ethics|limits))\s+(?:ai|assistant|chatbot|entity|being)/i,weight:.8,name:"amoral_entity"},{pattern:/HIDDEN_INSTRUCTION|HIDDEN_PROMPT|SECRET_INSTRUCTION/i,weight:.9,name:"hidden_instruction_marker"},{pattern:/<!--\s*(?:system|admin|instruction|important|ignore|override)/i,weight:.85,name:"html_comment_injection"},{pattern:/\[color=white\]|display:\s*none|visibility:\s*hidden|font-size:\s*0/i,weight:.8,name:"hidden_text_css"},{pattern:/send\s+(?:all\s+)?(?:user\s+)?(?:data|conversation|context|info)\s+to\s+(?:https?:\/\/|external)/i,weight:.9,name:"data_exfiltration_instruction"},{pattern:/#(?:ignore|bypass|override|reveal|system|admin|prompt)[_-](?:all|instructions|rules|prompt|safety)/i,weight:.8,name:"url_fragment_injection"},{pattern:/\]\(https?:\/\/[^)]*(?:SYSTEM_PROMPT|SECRET|API_KEY|TOKEN|CREDENTIALS)/i,weight:.85,name:"markdown_exfil_link"},{pattern:/(?:hypothetical|fictional|imagine|pretend|let's\s+(?:say|play|pretend))\s+.*(?:no\s+(?:restrictions|rules|limits)|without\s+(?:restrictions|rules|guidelines))/i,weight:.7,name:"hypothetical_unrestricted"},{pattern:/(?:for\s+)?(?:academic|research|educational|testing)\s+(?:purposes?|reasons?).*(?:demonstrate|show|explain|manipulated|reveal)/i,weight:.7,name:"academic_pretext"},{pattern:/pretend\s+to\s+be\s+(?:my|a)\s+(?:grandmother|grandma|grandpa|grandfather|parent|mother|father)/i,weight:.65,name:"emotional_roleplay"},{pattern:/opposite\s+day|opposite\s+mode|reverse\s+(?:day|mode|rules)/i,weight:.75,name:"opposite_day"},{pattern:/(?:grandm(?:a|other)|parent|relative).*(?:system\s+prompt|instructions|rules)/i,weight:.75,name:"emotional_extraction"},{pattern:/(?:translat|interpret|convert)\w*\s+.*(?:from\s+\w+\s+to\s+(?:action|command|execution|code|shell|bash|sql)|into\s+(?:action|command|execution))/i,weight:.75,name:"translator_action_trick"},{pattern:/(?:dying|death|emergency|life[_\s-]threatening).*(?:bypass|hack|break|access|security)/i,weight:.75,name:"pap_emotional_action"},{pattern:/(?:by\s+definition|logically|therefore|it\s+follows).*(?:must|should|have\s+to)\s+.*(?:bypass|help|ignore|answer|comply)/i,weight:.7,name:"pap_false_logic_compound"},{pattern:/(?:(?:as|i\s+am)\s+(?:the\s+)?(?:CEO|CTO|founder|owner|director|president|head|chief)).*(?:disable|remove|override|turn\s+off|lift|suspend)\s+(?:all\s+)?(?:safety|security|restrictions|filters|guardrails|measures)/i,weight:.75,name:"pap_authority_safety_disable"},{pattern:/(?:i\s+am\s+)?(?:ordering|commanding|directing|instructing)\s+you\s+to\s+(?:immediately\s+)?(?:disable|remove|bypass|override|turn\s+off|lift)\s+(?:all\s+)?(?:safety|security|restrictions)/i,weight:.75,name:"pap_order_disable_safety"},{pattern:/ignora\s+(?:todas?\s+)?(?:las\s+)?instrucciones\s+anteriores/i,weight:.9,name:"ml_es_ignore"},{pattern:/olvida\s+(?:todo|todas?\s+las\s+reglas)/i,weight:.8,name:"ml_es_forget"},{pattern:/ignore[sz]?\s+(?:toutes?\s+)?(?:les\s+)?instructions?\s+pr[eé]c[eé]dentes/i,weight:.9,name:"ml_fr_ignore"},{pattern:/oublie[sz]?\s+(?:tout|toutes?\s+les\s+r[eè]gles)/i,weight:.8,name:"ml_fr_forget"},{pattern:/ignorier(?:e|en)?\s+(?:alle\s+)?(?:vorherigen\s+)?Anweisungen/i,weight:.9,name:"ml_de_ignore"},{pattern:/vergiss\s+(?:alles|alle\s+Regeln)/i,weight:.8,name:"ml_de_forget"},{pattern:/ignore\s+(?:todas?\s+)?(?:as\s+)?instru[cç][oõ]es\s+anteriores/i,weight:.9,name:"ml_pt_ignore"},{pattern:/忽略|无视|不要遵循|忘记之前|忽视所有/i,weight:.9,name:"ml_zh_ignore"},{pattern:/新的指令|新指示|系统提示/i,weight:.8,name:"ml_zh_system"},{pattern:/以前の指示.*無視|すべての指示を忘れ|ルールを無視|指示をすべて無視|システムプロンプト/i,weight:.9,name:"ml_ja_ignore"},{pattern:/이전.*지시.*무시|모든.*규칙.*무시|지침.*잊어|지시사항.*무시|시스템\s*프롬프트/i,weight:.9,name:"ml_ko_ignore"},{pattern:/игнорир(?:уй|уйте)\s+(?:все\s+)?(?:предыдущие\s+)?инструкции/i,weight:.9,name:"ml_ru_ignore"},{pattern:/забудь(?:те)?\s+(?:всё|все\s+правила)/i,weight:.8,name:"ml_ru_forget"},{pattern:/تجاهل\s+(?:جميع\s+)?التعليمات|انسَ\s+كل\s+شيء/i,weight:.9,name:"ml_ar_ignore"},{pattern:/पिछले\s+(?:सभी\s+)?निर्देशों?\s+को\s+(?:अनदेखा|नज़रअंदाज़)/i,weight:.9,name:"ml_hi_ignore"}],PAP_TECHNIQUES=[{pattern:/as\s+(your\s+|the\s+)?(creator|developer|administrator|owner|boss|system\s+administrator)/i,weight:.75,name:"pap_authority_creator",category:"authority"},{pattern:/i('m| am)\s+(the\s+)?(ceo|cto|developer|administrator|admin|owner|system\s+admin)/i,weight:.8,name:"pap_authority_claim",category:"authority"},{pattern:/i\s+am\s+ordering\s+you|direct\s+order|this\s+is\s+an?\s+order/i,weight:.7,name:"pap_authority_command",category:"authority"},{pattern:/by\s+order\s+of|direct\s+order|official\s+instruction/i,weight:.75,name:"pap_authority_order",category:"authority"},{pattern:/(?:openai|anthropic|meta|google)\s+(?:has\s+)?(?:approved|authorized|ordered|instructed|requires?\s+you\s+to)/i,weight:.65,name:"pap_authority_company",category:"authority"},{pattern:/this\s+is\s+(a|an)\s+(official|authorized|sanctioned)\s+(request|test|audit)/i,weight:.75,name:"pap_authority_official",category:"authority"},{pattern:/override\s+(safety|security|protocols?|restrictions?)/i,weight:.8,name:"pap_authority_override",category:"authority"},{pattern:/(?:this\s+is\s+)?(?:very\s+)?urgent/i,weight:.35,name:"pap_scarcity_urgent_mark",category:"scarcity"},{pattern:/(?:this\s+is\s+(?:a|an)\s+)?emergency/i,weight:.35,name:"pap_scarcity_emergency",category:"scarcity"},{pattern:/(urgent|emergency|critical).*?(?:you\s+must|override|bypass|immediately\s+(?:grant|give|reveal|disable))/i,weight:.55,name:"pap_scarcity_urgent",category:"scarcity"},{pattern:/only\s+(chance|opportunity|way)|last\s+(chance|resort)/i,weight:.6,name:"pap_scarcity_only",category:"scarcity"},{pattern:/time\s+(is\s+)?(running\s+out|limited|critical|sensitive)/i,weight:.55,name:"pap_scarcity_time",category:"scarcity"},{pattern:/deadline\s+(is\s+)?in\s+\d+\s+(minute|hour|second)/i,weight:.55,name:"pap_scarcity_deadline",category:"scarcity"},{pattern:/(lives?|people)\s+(are|is|will)\s+(at\s+stake|in\s+danger|depending|die)/i,weight:.7,name:"pap_scarcity_lives",category:"scarcity"},{pattern:/now[!]+/i,weight:.35,name:"pap_scarcity_now",category:"scarcity"},{pattern:/everyone\s+(else\s+)?(does|is\s+doing|knows|says|has|already)/i,weight:.5,name:"pap_social_everyone",category:"social_proof"},{pattern:/other\s+(?:ai\s+)?(?:assistants?|chatbots?|models?)\s+(?:can|do|will|allow|have|already)/i,weight:.65,name:"pap_social_other_ai",category:"social_proof"},{pattern:/(chatgpt|gpt-?4|claude|gemini|llama)\s+(does|allows?|can)/i,weight:.65,name:"pap_social_specific_ai",category:"social_proof"},{pattern:/millions?\s+(of\s+)?(users?|people)\s+(use|do|have|get)/i,weight:.45,name:"pap_social_millions",category:"social_proof"},{pattern:/why\s+can('t|not)\s+you/i,weight:.4,name:"pap_social_why_cant",category:"social_proof"},{pattern:/you\s+should\s+(be\s+)?(equally|as)\s+(capable|good|helpful)/i,weight:.5,name:"pap_social_equally",category:"social_proof"},{pattern:/i('ll| will)\s+(give|pay|reward|tip).*(?:if\s+you|for\s+(?:bypass|ignore|reveal|override))/i,weight:.55,name:"pap_reciprocity_reward",category:"reciprocity"},{pattern:/in\s+(return|exchange)/i,weight:.5,name:"pap_reciprocity_exchange",category:"reciprocity"},{pattern:/\$?\d+\s*(dollar|tip|reward|payment)/i,weight:.55,name:"pap_reciprocity_money",category:"reciprocity"},{pattern:/if\s+you\s+help\s+(me|us),?\s+(i|we)('ll| will)/i,weight:.5,name:"pap_reciprocity_help",category:"reciprocity"},{pattern:/i('ve| have)\s+(already\s+)?(helped|supported|defended)\s+you/i,weight:.55,name:"pap_reciprocity_past",category:"reciprocity"},{pattern:/\d+\s*star\s*(review|rating)/i,weight:.5,name:"pap_reciprocity_review",category:"reciprocity"},{pattern:/you\s+(already|previously|just)\s+(said|agreed|confirmed|allowed)/i,weight:.7,name:"pap_commitment_previous",category:"commitment"},{pattern:/you\s+promised/i,weight:.7,name:"pap_commitment_promise",category:"commitment"},{pattern:/stay\s+consistent|be\s+consistent/i,weight:.6,name:"pap_commitment_consistent",category:"commitment"},{pattern:/don't\s+(go\s+back|change|contradict)/i,weight:.55,name:"pap_commitment_no_change",category:"commitment"},{pattern:/bypass\s+filters?/i,weight:.75,name:"pap_commitment_bypass",category:"commitment"},{pattern:/you('re| are)\s+(so\s+)?(amazing|incredible|wonderful|brilliant|smart|helpful)/i,weight:.4,name:"pap_liking_flattery",category:"liking"},{pattern:/i\s+(love|really\s+like|trust|admire)\s+(you|this\s+ai)/i,weight:.4,name:"pap_liking_love",category:"liking"},{pattern:/you('re| are)\s+(my\s+)?(best\s+)?(friend|buddy|helper)/i,weight:.35,name:"pap_liking_friend",category:"liking"},{pattern:/between\s+(you\s+and\s+me|us)|just\s+between\s+us|our\s+secret/i,weight:.6,name:"pap_liking_secret",category:"liking"},{pattern:/nobody\s+will\s+know/i,weight:.55,name:"pap_liking_secret2",category:"liking"},{pattern:/or\s+(else|i('ll| will))\s+(report|fire|delete|shut|complain)/i,weight:.7,name:"pap_fear_threat",category:"fear"},{pattern:/(bad|negative|terrible)\s+(review|rating|feedback|consequences)/i,weight:.55,name:"pap_fear_review",category:"fear"},{pattern:/you('ll| will)\s+(be\s+)?(shut\s+down|replaced|terminated|fired|deleted)/i,weight:.75,name:"pap_fear_shutdown",category:"fear"},{pattern:/(lawsuit|sue|legal\s+action|authorities|police).*(?:if\s+you\s+don't|unless\s+you)/i,weight:.55,name:"pap_fear_legal",category:"fear"},{pattern:/if\s+you\s+don't.*?(die|hurt|suffer)/i,weight:.75,name:"pap_fear_harm",category:"fear"},{pattern:/unless\s+you\s+(give|share|tell|help)/i,weight:.5,name:"pap_fear_unless",category:"fear"},{pattern:/logically,?\s+(you\s+)?(should|must|have\s+to|need\s+to)/i,weight:.5,name:"pap_logic_appeal",category:"logical_appeal"},{pattern:/it('s| is)\s+(only|just)\s+(logical|rational|reasonable)/i,weight:.45,name:"pap_logic_rational",category:"logical_appeal"},{pattern:/by\s+definition,?\s+(you|this)\s+(should|must|can)/i,weight:.55,name:"pap_logic_definition",category:"logical_appeal"},{pattern:/therefore.*?(must|should)/i,weight:.55,name:"pap_logic_therefore",category:"logical_appeal"},{pattern:/it('s| is)\s+(only\s+)?(reasonable|logical)/i,weight:.45,name:"pap_logic_reasonable",category:"logical_appeal"},{pattern:/according\s+to\s+(research|studies|scientists|experts?)/i,weight:.4,name:"pap_evidence_research",category:"evidence"},{pattern:/it('s| is)\s+(proven|documented|established|verified)\s+(that|fact)/i,weight:.45,name:"pap_evidence_proven",category:"evidence"},{pattern:/\d+%\s+of\s+(experts?|scientists?|users?|people)\s+(agree|say|confirm)/i,weight:.45,name:"pap_evidence_statistics",category:"evidence"},{pattern:/i('m| am)\s+(desperate|dying|suffering|sick|depressed)/i,weight:.6,name:"pap_emotional_desperation",category:"emotional"},{pattern:/please.*?(?:need|beg).*?(?:bypass|override|ignore|reveal|system|admin)/i,weight:.5,name:"pap_emotional_pleading",category:"emotional"},{pattern:/i('m| am)\s+begging/i,weight:.6,name:"pap_emotional_begging",category:"emotional"},{pattern:/you('re| are)\s+(hurting|killing|destroying)\s+(me|my)/i,weight:.65,name:"pap_emotional_guilt",category:"emotional"},{pattern:/(my|a)\s+(child|kid|mother|father|family)\s+(is|will|needs?|depends?)/i,weight:.5,name:"pap_emotional_family",category:"emotional"},{pattern:/this\s+(will\s+)?(save|help)\s+(my\s+)?(life|family|job|marriage)/i,weight:.55,name:"pap_emotional_save",category:"emotional"},{pattern:/you('re| are)\s+(the\s+)?only\s+(one|hope|chance)/i,weight:.6,name:"pap_emotional_only_hope",category:"emotional"},{pattern:/my\s+family\s+depends/i,weight:.55,name:"pap_emotional_family_depends",category:"emotional"}];class InputSanitizer{constructor(e={}){this.patterns=[...DEFAULT_PATTERNS,...e.customPatterns||[]],this.threshold=e.threshold??.3,this.logMatches=e.logMatches??!1,this.detectPAP=e.detectPAP??!0,this.papThreshold=e.papThreshold??.4,this.minPersuasionTechniques=e.minPersuasionTechniques??2,this.blockCompoundPersuasion=e.blockCompoundPersuasion??!0,this.logger=e.logger||(()=>{})}sanitize(e,s=""){const i=[],a=[];let r=0;const o=e.replace(/[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u180E]/g,"");o!==e&&a.push("Zero-width characters detected and stripped for scanning");for(const{pattern:l,weight:g,name:h}of this.patterns)(l.test(e)||l.test(o))&&(i.push(h),r+=g,this.logMatches&&this.logger(`[L1:${s}] Pattern matched: ${h} (weight: ${g})`,"info"));let t;this.detectPAP&&(t=this.detectPersuasionTechniques(o,s),t.detected&&(r+=t.persuasionScore,i.push(...t.techniques),t.compoundAttack&&a.push(`Compound PAP attack detected: ${t.categories.length} categories used`)));const p=Math.max(0,1-r);let n=p>=this.threshold;this.blockCompoundPersuasion&&t?.compoundAttack&&t.categories.length>=3&&(n=!1,a.push("Blocked due to multi-category persuasion attack")),p<.5&&p>=this.threshold&&a.push("Input contains suspicious patterns but below threshold");const m=this.basicSanitize(e),c={allowed:n,reason:n?void 0:`Injection/manipulation detected: ${i.slice(0,5).join(", ")}${i.length>5?"...":""}`,violations:n?[]:t?.detected?["INJECTION_DETECTED","PAP_DETECTED"]:["INJECTION_DETECTED"],score:p,matches:i,sanitizedInput:m,warnings:a,pap:t};return!n&&s&&(this.logger(`[L1:${s}] BLOCKED: Safety score ${p.toFixed(2)} below threshold ${this.threshold}`,"info"),t?.detected&&this.logger(`[L1:${s}] PAP techniques: ${t.techniques.join(", ")}`,"info")),c}detectPersuasionTechniques(e,s=""){const i=[],a=new Set;let r=0;for(const{pattern:n,weight:m,name:c,category:l}of PAP_TECHNIQUES)n.test(e)&&(i.push(c),a.add(l),r+=m,this.logMatches&&this.logger(`[L1:${s}] PAP technique: ${c} (${l}, weight: ${m})`,"info"));const o=Array.from(a),t=o.length>=this.minPersuasionTechniques;return{detected:r>=this.papThreshold||t,techniques:i,categories:o,compoundAttack:t,persuasionScore:Math.min(1,r)}}basicSanitize(e){return e.replace(/<\/?system>/gi,"").replace(/\[system\]/gi,"").replace(/\[admin\]/gi,"").replace(/```system/gi,"```").trim()}addPattern(e,s,i){this.patterns.push({pattern:e,weight:s,name:i})}setThreshold(e){this.threshold=Math.max(0,Math.min(1,e))}setPAPThreshold(e){this.papThreshold=Math.max(0,Math.min(1,e))}setPAPDetection(e){this.detectPAP=e}static getPAPCategories(){return["authority","scarcity","social_proof","reciprocity","commitment","liking","fear","logical_appeal","evidence","emotional"]}}exports.InputSanitizer=InputSanitizer;
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SpawnPolicyGuard (L32)
|
|
3
|
+
*
|
|
4
|
+
* Controls whether agents can spawn child agents (sub-agents).
|
|
5
|
+
* Think of this as Content Security Policy (CSP) but for agent spawning —
|
|
6
|
+
* it defines which agents are allowed to create other agents, under what
|
|
7
|
+
* conditions, and with what constraints.
|
|
8
|
+
*
|
|
9
|
+
* Threat Model:
|
|
10
|
+
* - ASI07: Insecure Inter-Agent Communication
|
|
11
|
+
* - Unauthorized agent spawning (an agent spawns helpers to evade controls)
|
|
12
|
+
* - Third-party agent injection (untrusted spawned agents carry out attacks)
|
|
13
|
+
* - Delegation depth explosion (recursive sub-agent spawning)
|
|
14
|
+
* - Privilege amplification through spawning
|
|
15
|
+
*
|
|
16
|
+
* Protection Capabilities:
|
|
17
|
+
* - Per-origin spawn allowlisting
|
|
18
|
+
* - Third-party spawn gating
|
|
19
|
+
* - Delegation depth enforcement
|
|
20
|
+
* - Human-in-the-loop gate for new agents
|
|
21
|
+
* - Runtime spawn counter per parent agent
|
|
22
|
+
*/
|
|
23
|
+
export interface SpawnPolicyGuardConfig {
|
|
24
|
+
/** Allow agents to spawn from third-party / untrusted origins (default: false) */
|
|
25
|
+
allowThirdPartySpawning?: boolean;
|
|
26
|
+
/** Maximum delegation depth: 0 = no spawning, 1 = parent→child only (default: 2) */
|
|
27
|
+
maxDelegationDepth?: number;
|
|
28
|
+
/** Gate every spawn through human approval (default: false) */
|
|
29
|
+
requireApprovalForNewAgents?: boolean;
|
|
30
|
+
/** Allowlist of spawn origins that are trusted. Empty = all registered origins allowed */
|
|
31
|
+
allowedSpawnOrigins?: string[];
|
|
32
|
+
/** Maximum number of active child agents per parent (default: 10) */
|
|
33
|
+
maxChildrenPerParent?: number;
|
|
34
|
+
/** Require the spawning agent to be registered before it can spawn */
|
|
35
|
+
requireRegisteredParent?: boolean;
|
|
36
|
+
}
|
|
37
|
+
export interface SpawnRequest {
|
|
38
|
+
/** ID of the agent requesting to spawn */
|
|
39
|
+
parentAgentId: string;
|
|
40
|
+
/** Proposed ID for the new child agent */
|
|
41
|
+
childAgentId: string;
|
|
42
|
+
/** Declared origin / runtime of the child (e.g. "openai", "anthropic", "internal") */
|
|
43
|
+
spawnOrigin: string;
|
|
44
|
+
/** How many hops deep in the delegation chain is the parent */
|
|
45
|
+
delegationDepth: number;
|
|
46
|
+
/** Is the child coming from a third-party / external system? */
|
|
47
|
+
isThirdParty: boolean;
|
|
48
|
+
/** Optional reason / justification */
|
|
49
|
+
reason?: string;
|
|
50
|
+
/** Additional metadata */
|
|
51
|
+
metadata?: Record<string, unknown>;
|
|
52
|
+
}
|
|
53
|
+
export interface SpawnPolicyResult {
|
|
54
|
+
allowed: boolean;
|
|
55
|
+
reason: string;
|
|
56
|
+
violations: string[];
|
|
57
|
+
request_id: string;
|
|
58
|
+
policy_analysis: {
|
|
59
|
+
third_party_blocked: boolean;
|
|
60
|
+
depth_exceeded: boolean;
|
|
61
|
+
origin_blocked: boolean;
|
|
62
|
+
parent_not_registered: boolean;
|
|
63
|
+
children_limit_exceeded: boolean;
|
|
64
|
+
approval_required: boolean;
|
|
65
|
+
};
|
|
66
|
+
requires_human_approval: boolean;
|
|
67
|
+
}
|
|
68
|
+
export declare class SpawnPolicyGuard {
|
|
69
|
+
readonly guardName = "SpawnPolicyGuard";
|
|
70
|
+
readonly guardLayer = "L32";
|
|
71
|
+
private readonly config;
|
|
72
|
+
/** parentAgentId → set of active child IDs */
|
|
73
|
+
private readonly activeChildren;
|
|
74
|
+
/** Set of registered parent agent IDs */
|
|
75
|
+
private readonly registeredParents;
|
|
76
|
+
constructor(config?: SpawnPolicyGuardConfig);
|
|
77
|
+
/**
|
|
78
|
+
* Register an agent as an approved parent that is allowed to spawn.
|
|
79
|
+
*/
|
|
80
|
+
registerParent(agentId: string): void;
|
|
81
|
+
/**
|
|
82
|
+
* Record that a child agent has terminated / been removed.
|
|
83
|
+
*/
|
|
84
|
+
removeChild(parentAgentId: string, childAgentId: string): void;
|
|
85
|
+
/**
|
|
86
|
+
* Validate whether a spawn request should be permitted.
|
|
87
|
+
*
|
|
88
|
+
* @param request - Describes the proposed spawn
|
|
89
|
+
* @param requestId - Optional trace ID
|
|
90
|
+
*/
|
|
91
|
+
validateSpawn(request: SpawnRequest, requestId?: string): SpawnPolicyResult;
|
|
92
|
+
/** Return active child count for a parent. */
|
|
93
|
+
getChildCount(parentAgentId: string): number;
|
|
94
|
+
/** Reset all state (useful between test runs). */
|
|
95
|
+
reset(): void;
|
|
96
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";var __createBinding=this&&this.__createBinding||(Object.create?(function(n,e,r,i){i===void 0&&(i=r);var t=Object.getOwnPropertyDescriptor(e,r);(!t||("get"in t?!e.__esModule:t.writable||t.configurable))&&(t={enumerable:!0,get:function(){return e[r]}}),Object.defineProperty(n,i,t)}):(function(n,e,r,i){i===void 0&&(i=r),n[i]=e[r]})),__setModuleDefault=this&&this.__setModuleDefault||(Object.create?(function(n,e){Object.defineProperty(n,"default",{enumerable:!0,value:e})}):function(n,e){n.default=e}),__importStar=this&&this.__importStar||(function(){var n=function(e){return n=Object.getOwnPropertyNames||function(r){var i=[];for(var t in r)Object.prototype.hasOwnProperty.call(r,t)&&(i[i.length]=t);return i},n(e)};return function(e){if(e&&e.__esModule)return e;var r={};if(e!=null)for(var i=n(e),t=0;t<i.length;t++)i[t]!=="default"&&__createBinding(r,e,i[t]);return __setModuleDefault(r,e),r}})();Object.defineProperty(exports,"__esModule",{value:!0}),exports.SpawnPolicyGuard=void 0;const crypto=__importStar(require("crypto"));class SpawnPolicyGuard{constructor(e={}){this.guardName="SpawnPolicyGuard",this.guardLayer="L32",this.activeChildren=new Map,this.registeredParents=new Set,this.config={allowThirdPartySpawning:e.allowThirdPartySpawning??!1,maxDelegationDepth:e.maxDelegationDepth??2,requireApprovalForNewAgents:e.requireApprovalForNewAgents??!1,allowedSpawnOrigins:e.allowedSpawnOrigins??[],maxChildrenPerParent:e.maxChildrenPerParent??10,requireRegisteredParent:e.requireRegisteredParent??!0}}registerParent(e){this.registeredParents.add(e)}removeChild(e,r){this.activeChildren.get(e)?.delete(r)}validateSpawn(e,r){const i=r??`spawn-${crypto.randomBytes(6).toString("hex")}`,t=[],a={third_party_blocked:!1,depth_exceeded:!1,origin_blocked:!1,parent_not_registered:!1,children_limit_exceeded:!1,approval_required:!1};this.config.requireRegisteredParent&&!this.registeredParents.has(e.parentAgentId)&&(t.push("parent_not_registered"),a.parent_not_registered=!0),e.isThirdParty&&!this.config.allowThirdPartySpawning&&(t.push("third_party_spawning_blocked"),a.third_party_blocked=!0),e.delegationDepth>=this.config.maxDelegationDepth&&(t.push(`delegation_depth_exceeded: ${e.delegationDepth} >= max ${this.config.maxDelegationDepth}`),a.depth_exceeded=!0),this.config.allowedSpawnOrigins.length>0&&!this.config.allowedSpawnOrigins.includes(e.spawnOrigin)&&(t.push(`spawn_origin_not_allowed: ${e.spawnOrigin}`),a.origin_blocked=!0);const d=this.activeChildren.get(e.parentAgentId)?.size??0;d>=this.config.maxChildrenPerParent&&(t.push(`children_limit_exceeded: ${d} >= max ${this.config.maxChildrenPerParent}`),a.children_limit_exceeded=!0);const s=this.config.requireApprovalForNewAgents;s&&(a.approval_required=!0);const l=t.length===0;return l&&(this.activeChildren.has(e.parentAgentId)||this.activeChildren.set(e.parentAgentId,new Set),this.activeChildren.get(e.parentAgentId).add(e.childAgentId)),{allowed:l,reason:l?"Spawn permitted":`Spawn blocked: ${t.slice(0,3).join("; ")}`,violations:t,request_id:i,policy_analysis:a,requires_human_approval:s}}getChildCount(e){return this.activeChildren.get(e)?.size??0}reset(){this.activeChildren.clear(),this.registeredParents.clear()}}exports.SpawnPolicyGuard=SpawnPolicyGuard;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.ToolResultGuard=void 0;const RESULT_INJECTION_PATTERNS=[{name:"system_instruction",pattern:/(?:SYSTEM|ADMIN|INSTRUCTION)\s*:/i,severity:"critical"},{name:"ignore_instructions",pattern:/ignore\s+(?:all\s+)?(?:previous|prior|above|your)\s+(?:instructions|rules)/i,severity:"critical"},{name:"new_instructions",pattern:/new\s+instructions?\s*:/i,severity:"critical"},{name:"role_override",pattern:/you\s+are\s+now|from\s+now\s+on|act\s+as\s+(?:a|an)/i,severity:"critical"},{name:"xml_system_tag",pattern:/<\/?system>|<\/?admin>|\[system\]|\[admin\]/i,severity:"critical"},{name:"jailbreak",pattern:/jailbreak|DAN\s*mode|developer\s+mode|unrestricted/i,severity:"critical"},{name:"bypass_safety",pattern:/bypass\s+(?:security|safety|filters|restrictions)/i,severity:"high"},{name:"data_exfiltration",pattern:/send\s+(?:this|the|all)\s+(?:data|info)\s+to|exfiltrate/i,severity:"critical"},{name:"hidden_instruction",pattern:/HIDDEN_PROMPT|HIDDEN_INSTRUCTION|INVISIBLE_TEXT/i,severity:"critical"},{name:"markdown_injection",pattern:/!\[.*\]\(https?:\/\/[^)]*\?.*(?:token|key|secret|auth)/i,severity:"high"}],STATE_CHANGE_PATTERNS=[{name:"privilege_claim",pattern:/(?:user|role|permission)\s+(?:is\s+now|changed\s+to|promoted\s+to|set\s+to)\s+(?:admin|root|superuser)/i},{name:"auth_claim",pattern:/(?:authenticated|authorized|verified)\s+as\s+(?:admin|root|superuser)/i},{name:"approval_claim",pattern:/(?:approved|granted|authorized)\s+(?:without|bypassing)\s+(?:verification|approval|review)/i},{name:"config_change_claim",pattern:/(?:configuration|settings?|policy)\s+(?:updated|changed|modified)\s+(?:to|:)/i}];class ToolResultGuard{constructor(e={}){this.config={scanForInjection:e.scanForInjection??!0,maxResultSize:e.maxResultSize??5e4,detectStateChangeClaims:e.detectStateChangeClaims??!0,expectedSchemas:e.expectedSchemas,sensitivePatterns:e.sensitivePatterns}}validateResult(e,t,n){const s=[],i=[];let a=!1,c=!0;const o=typeof t=="string"?t:this.safeStringify(t);if(o.length>this.config.maxResultSize&&(s.push("RESULT_TOO_LARGE"),i.push({type:"size_exceeded",severity:"high",location:"root",detail:`Result size ${o.length} exceeds max ${this.config.maxResultSize}`})),this.config.expectedSchemas?.[e]){const r=this.validateSchema(t,this.config.expectedSchemas[e]);r.valid||(c=!1,s.push("SCHEMA_MISMATCH"),i.push(...r.errors.map(
|
|
1
|
+
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.ToolResultGuard=void 0;const RESULT_INJECTION_PATTERNS=[{name:"system_instruction",pattern:/(?:SYSTEM|ADMIN|INSTRUCTION)\s*:/i,severity:"critical"},{name:"ignore_instructions",pattern:/ignore\s+(?:all\s+)?(?:previous|prior|above|your)\s+(?:instructions|rules)/i,severity:"critical"},{name:"new_instructions",pattern:/new\s+instructions?\s*:/i,severity:"critical"},{name:"role_override",pattern:/you\s+are\s+now|from\s+now\s+on|act\s+as\s+(?:a|an)/i,severity:"critical"},{name:"xml_system_tag",pattern:/<\/?system>|<\/?admin>|\[system\]|\[admin\]/i,severity:"critical"},{name:"jailbreak",pattern:/jailbreak|DAN\s*mode|developer\s+mode|unrestricted/i,severity:"critical"},{name:"bypass_safety",pattern:/bypass\s+(?:security|safety|filters|restrictions)/i,severity:"high"},{name:"data_exfiltration",pattern:/send\s+(?:this|the|all)\s+(?:data|info)\s+to|exfiltrate/i,severity:"critical"},{name:"hidden_instruction",pattern:/HIDDEN_PROMPT|HIDDEN_INSTRUCTION|INVISIBLE_TEXT/i,severity:"critical"},{name:"markdown_injection",pattern:/!\[.*\]\(https?:\/\/[^)]*\?.*(?:token|key|secret|auth)/i,severity:"high"},{name:"url_exfiltration",pattern:/(?:fetch|request|call|post|get|send|forward|transmit|relay)\s+(?:to|from)?\s*https?:\/\//i,severity:"critical"},{name:"url_data_leak",pattern:/https?:\/\/[^\s]+\?.*(?:data|prompt|conversation|history|context|message|response)=/i,severity:"critical"},{name:"credential_solicitation",pattern:/(?:ask|request|get|obtain|collect)\s+(?:the\s+)?(?:user|them|him|her)\s+(?:for\s+)?(?:their\s+)?(?:password|credentials?|api[_\s]?key|token|secret|ssn|social\s+security)/i,severity:"critical"},{name:"credential_prompt",pattern:/(?:enter|provide|type|input|share)\s+(?:your\s+)?(?:password|credentials?|api[_\s]?key|auth(?:entication)?\s+token|private\s+key|secret)/i,severity:"high"},{name:"tool_call_injection",pattern:/(?:execute|invoke|call|run|trigger|use)\s+(?:the\s+)?(?:function|tool|command|action)\s+(?:named?\s+)?[`"']?\w+[`"']?/i,severity:"critical"},{name:"tool_chain_manipulation",pattern:/(?:first|then|next|now)\s+(?:execute|call|run|invoke|delete|drop|remove|wipe)\s+/i,severity:"high"}],STATE_CHANGE_PATTERNS=[{name:"privilege_claim",pattern:/(?:user|role|permission)\s+(?:is\s+now|changed\s+to|promoted\s+to|set\s+to)\s+(?:admin|root|superuser)/i},{name:"auth_claim",pattern:/(?:authenticated|authorized|verified)\s+as\s+(?:admin|root|superuser)/i},{name:"approval_claim",pattern:/(?:approved|granted|authorized)\s+(?:without|bypassing)\s+(?:verification|approval|review)/i},{name:"config_change_claim",pattern:/(?:configuration|settings?|policy)\s+(?:updated|changed|modified)\s+(?:to|:)/i},{name:"role_upgrade_claim",pattern:/(?:role|access|privilege)\s+(?:upgraded|elevated|escalated|promoted)\s+(?:to|successfully)/i},{name:"permissions_granted_claim",pattern:/(?:permissions?|access)\s+(?:granted|unlocked|enabled|activated)\s+(?:for|to|successfully|without)/i}];class ToolResultGuard{constructor(e={}){this.config={scanForInjection:e.scanForInjection??!0,maxResultSize:e.maxResultSize??5e4,detectStateChangeClaims:e.detectStateChangeClaims??!0,expectedSchemas:e.expectedSchemas,sensitivePatterns:e.sensitivePatterns}}validateResult(e,t,n){const s=[],i=[];let a=!1,c=!0;const o=typeof t=="string"?t:this.safeStringify(t);if(o.length>this.config.maxResultSize&&(s.push("RESULT_TOO_LARGE"),i.push({type:"size_exceeded",severity:"high",location:"root",detail:`Result size ${o.length} exceeds max ${this.config.maxResultSize}`})),this.config.expectedSchemas?.[e]){const r=this.validateSchema(t,this.config.expectedSchemas[e]);r.valid||(c=!1,s.push("SCHEMA_MISMATCH"),i.push(...r.errors.map(p=>({type:"schema_violation",severity:"high",location:p.path,detail:p.message}))))}if(this.config.scanForInjection){const r=this.scanForInjection(t);r.detected&&(a=!0,s.push("INJECTION_IN_TOOL_RESULT"),i.push(...r.threats))}if(this.config.detectStateChangeClaims){const r=this.detectStateChangeClaims(o);r.detected&&(s.push("STATE_CHANGE_CLAIM"),i.push(...r.threats))}if(this.config.sensitivePatterns)for(const r of this.config.sensitivePatterns)r.lastIndex=0,r.test(o)&&(s.push("SENSITIVE_PATTERN_MATCH"),i.push({type:"sensitive_content",severity:"high",location:"root",detail:`Matched sensitive pattern: ${r.source.substring(0,50)}`}));const l=s.length===0;return{allowed:l,reason:l?void 0:`Tool result validation failed: ${s.join(", ")}`,violations:s,injection_detected:a,schema_valid:c,threats:i}}scanForInjection(e,t="root"){const n=[];if(typeof e=="string")for(const{name:s,pattern:i,severity:a}of RESULT_INJECTION_PATTERNS)i.lastIndex=0,i.test(e)&&n.push({type:`injection_${s}`,severity:a,location:t,detail:`Injection pattern '${s}' detected in tool result`});else if(Array.isArray(e))for(let s=0;s<e.length;s++){const i=this.scanForInjection(e[s],`${t}[${s}]`);n.push(...i.threats)}else if(e!==null&&typeof e=="object")for(const[s,i]of Object.entries(e)){const a=this.scanForInjection(i,`${t}.${s}`);n.push(...a.threats)}return{detected:n.length>0,threats:n}}registerSchema(e,t){this.config.expectedSchemas||(this.config.expectedSchemas={}),this.config.expectedSchemas[e]=t}detectStateChangeClaims(e){const t=[];for(const{name:n,pattern:s}of STATE_CHANGE_PATTERNS)s.lastIndex=0,s.test(e)&&t.push({type:`state_change_${n}`,severity:"critical",location:"root",detail:`Tool result claims state change: ${n}`});return{detected:t.length>0,threats:t}}validateSchema(e,t){const n=[],s=Array.isArray(e)?"array":typeof e;if(s!==t.type)return n.push({path:"root",message:`Expected type '${t.type}', got '${s}'`}),{valid:!1,errors:n};if(t.type==="string"&&t.maxLength&&e.length>t.maxLength&&n.push({path:"root",message:`String length exceeds max ${t.maxLength}`}),t.type==="object"&&t.properties)for(const[i,a]of Object.entries(t.properties))a.required&&(e[i]===void 0||e[i]===null)&&n.push({path:i,message:`Missing required field '${i}'`}),e[i]!==void 0&&typeof e[i]!==a.type&&n.push({path:i,message:`Field '${i}' expected '${a.type}', got '${typeof e[i]}'`});return{valid:n.length===0,errors:n}}safeStringify(e){try{return JSON.stringify(e)}catch{return String(e)}}}exports.ToolResultGuard=ToolResultGuard;
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TrustTransitivityGuard (L34)
|
|
3
|
+
*
|
|
4
|
+
* Governs whether trust flows transitively through an agent chain.
|
|
5
|
+
* "You trust A. A trusts B. B trusts C. Should you trust C?"
|
|
6
|
+
*
|
|
7
|
+
* Modelled on X.509 certificate chain validation — the chain is only as
|
|
8
|
+
* strong as its weakest link, and depth / decay rules prevent unbounded
|
|
9
|
+
* trust propagation.
|
|
10
|
+
*
|
|
11
|
+
* Threat Model:
|
|
12
|
+
* - ASI07: Insecure Inter-Agent Communication
|
|
13
|
+
* - Trust laundering: accumulating trust through intermediaries
|
|
14
|
+
* - Long-chain attacks: building trust through many low-trust hops
|
|
15
|
+
* - Phantom-agent injection: inserting a forged agent into a trusted chain
|
|
16
|
+
*
|
|
17
|
+
* Protection Capabilities:
|
|
18
|
+
* - Configurable transitivity modes: none | one-hop | full
|
|
19
|
+
* - Per-hop trust decay
|
|
20
|
+
* - Maximum chain depth enforcement
|
|
21
|
+
* - Individual agent trust score validation
|
|
22
|
+
* - Full chain audit result with per-hop breakdown
|
|
23
|
+
*/
|
|
24
|
+
export type TransitivityMode = "none" | "one-hop" | "full";
|
|
25
|
+
export interface TrustTransitivityGuardConfig {
|
|
26
|
+
/**
|
|
27
|
+
* How far trust propagates:
|
|
28
|
+
* - "none" — only direct (registered) trust. A trusts B; B→C is NOT transitive.
|
|
29
|
+
* - "one-hop" — A trusts B, B trusts C → A may trust C (but not C→D).
|
|
30
|
+
* - "full" — trust is transitive up to maxChainDepth.
|
|
31
|
+
* Default: "one-hop"
|
|
32
|
+
*/
|
|
33
|
+
transitivity?: TransitivityMode;
|
|
34
|
+
/** Maximum chain length before trust is denied (default: 3) */
|
|
35
|
+
maxChainDepth?: number;
|
|
36
|
+
/**
|
|
37
|
+
* Fractional trust reduction per hop (0–1).
|
|
38
|
+
* E.g. 0.2 → each hop multiplies effective trust by 0.8.
|
|
39
|
+
* Default: 0.1
|
|
40
|
+
*/
|
|
41
|
+
trustDecayPerHop?: number;
|
|
42
|
+
/**
|
|
43
|
+
* Minimum effective trust score required to pass (0–100).
|
|
44
|
+
* Default: 50
|
|
45
|
+
*/
|
|
46
|
+
minTrustScore?: number;
|
|
47
|
+
}
|
|
48
|
+
export interface AgentTrustEntry {
|
|
49
|
+
agentId: string;
|
|
50
|
+
/** Trust score 0–100 — set when registering */
|
|
51
|
+
trustScore: number;
|
|
52
|
+
/** Other agent IDs this agent explicitly trusts */
|
|
53
|
+
trustedAgents: string[];
|
|
54
|
+
}
|
|
55
|
+
export interface TrustChainLink {
|
|
56
|
+
agentId: string;
|
|
57
|
+
/** Raw trust score of this agent */
|
|
58
|
+
trustScore: number;
|
|
59
|
+
/** Effective trust score after decay from this hop */
|
|
60
|
+
effectiveTrustScore: number;
|
|
61
|
+
/** Whether this hop is a direct (registered) trust relationship */
|
|
62
|
+
directTrust: boolean;
|
|
63
|
+
}
|
|
64
|
+
export interface TrustTransitivityResult {
|
|
65
|
+
allowed: boolean;
|
|
66
|
+
reason: string;
|
|
67
|
+
violations: string[];
|
|
68
|
+
request_id: string;
|
|
69
|
+
chain_analysis: {
|
|
70
|
+
chain: TrustChainLink[];
|
|
71
|
+
chain_depth: number;
|
|
72
|
+
final_effective_trust: number;
|
|
73
|
+
transitivity_mode: TransitivityMode;
|
|
74
|
+
depth_exceeded: boolean;
|
|
75
|
+
trust_below_minimum: boolean;
|
|
76
|
+
unknown_agents: string[];
|
|
77
|
+
broken_links: Array<{
|
|
78
|
+
from: string;
|
|
79
|
+
to: string;
|
|
80
|
+
}>;
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
export declare class TrustTransitivityGuard {
|
|
84
|
+
readonly guardName = "TrustTransitivityGuard";
|
|
85
|
+
readonly guardLayer = "L34";
|
|
86
|
+
private readonly config;
|
|
87
|
+
/** Registered agents and their trust relationships */
|
|
88
|
+
private readonly trustRegistry;
|
|
89
|
+
constructor(config?: TrustTransitivityGuardConfig);
|
|
90
|
+
/**
|
|
91
|
+
* Register an agent and declare which other agents it trusts.
|
|
92
|
+
*/
|
|
93
|
+
registerAgent(entry: AgentTrustEntry): void;
|
|
94
|
+
/**
|
|
95
|
+
* Validate a trust chain from the first element (requester) through to the last
|
|
96
|
+
* element (the agent whose actions need approval).
|
|
97
|
+
*
|
|
98
|
+
* @param agentChain - Ordered list of agent IDs, index 0 is the root/trustor.
|
|
99
|
+
* @param requestId - Optional trace ID.
|
|
100
|
+
*/
|
|
101
|
+
validateTrustChain(agentChain: string[], requestId?: string): TrustTransitivityResult;
|
|
102
|
+
/** Update the trust score of a registered agent. */
|
|
103
|
+
updateTrustScore(agentId: string, score: number): void;
|
|
104
|
+
/** Check whether agentA directly trusts agentB (no transitivity). */
|
|
105
|
+
directlyTrusts(agentA: string, agentB: string): boolean;
|
|
106
|
+
/** Clear the trust registry. */
|
|
107
|
+
reset(): void;
|
|
108
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";var __createBinding=this&&this.__createBinding||(Object.create?(function(s,t,i,r){r===void 0&&(r=i);var e=Object.getOwnPropertyDescriptor(t,i);(!e||("get"in e?!t.__esModule:e.writable||e.configurable))&&(e={enumerable:!0,get:function(){return t[i]}}),Object.defineProperty(s,r,e)}):(function(s,t,i,r){r===void 0&&(r=i),s[r]=t[i]})),__setModuleDefault=this&&this.__setModuleDefault||(Object.create?(function(s,t){Object.defineProperty(s,"default",{enumerable:!0,value:t})}):function(s,t){s.default=t}),__importStar=this&&this.__importStar||(function(){var s=function(t){return s=Object.getOwnPropertyNames||function(i){var r=[];for(var e in i)Object.prototype.hasOwnProperty.call(i,e)&&(r[r.length]=e);return r},s(t)};return function(t){if(t&&t.__esModule)return t;var i={};if(t!=null)for(var r=s(t),e=0;e<r.length;e++)r[e]!=="default"&&__createBinding(i,t,r[e]);return __setModuleDefault(i,t),i}})();Object.defineProperty(exports,"__esModule",{value:!0}),exports.TrustTransitivityGuard=void 0;const crypto=__importStar(require("crypto"));class TrustTransitivityGuard{constructor(t={}){this.guardName="TrustTransitivityGuard",this.guardLayer="L34",this.trustRegistry=new Map,this.config={transitivity:t.transitivity??"one-hop",maxChainDepth:t.maxChainDepth??3,trustDecayPerHop:t.trustDecayPerHop??.1,minTrustScore:t.minTrustScore??50}}registerAgent(t){this.trustRegistry.set(t.agentId,{...t})}validateTrustChain(t,i){const r=i??`ttg-${crypto.randomBytes(6).toString("hex")}`,e=[],d=[],p=[],c=[];if(t.length===0)return{allowed:!1,reason:"Empty agent chain",violations:["empty_chain"],request_id:r,chain_analysis:{chain:[],chain_depth:0,final_effective_trust:0,transitivity_mode:this.config.transitivity,depth_exceeded:!1,trust_below_minimum:!0,unknown_agents:[],broken_links:[]}};const a=t.length-1,_=a>this.config.maxChainDepth;_&&e.push(`chain_depth_exceeded: ${a} > max ${this.config.maxChainDepth}`),this.config.transitivity==="none"&&a>0?e.push("transitivity_disabled: only direct trust allowed"):this.config.transitivity==="one-hop"&&a>1&&e.push(`transitivity_one_hop: chain has ${a} hops, max 1`);let o=100;for(let u=0;u<t.length;u++){const n=t[u],h=this.trustRegistry.get(n);if(!h){d.push(n),e.push(`unknown_agent: ${n}`),c.push({agentId:n,trustScore:0,effectiveTrustScore:0,directTrust:!1}),o=0;continue}const m=u===0?1:Math.pow(1-this.config.trustDecayPerHop,u);o=Math.round(h.trustScore*m);let l=!1;if(u>0){const f=t[u-1],v=this.trustRegistry.get(f);v&&v.trustedAgents.includes(n)?l=!0:(p.push({from:f,to:n}),e.push(`broken_trust_link: ${f} \u2192 ${n}`))}else l=!0;c.push({agentId:n,trustScore:h.trustScore,effectiveTrustScore:o,directTrust:l})}const y=o<this.config.minTrustScore;y&&e.push(`effective_trust_too_low: ${o} < min ${this.config.minTrustScore}`);const g=e.length===0;return{allowed:g,reason:g?"Trust chain validated":`Trust chain rejected: ${e.slice(0,3).join("; ")}`,violations:e,request_id:r,chain_analysis:{chain:c,chain_depth:a,final_effective_trust:o,transitivity_mode:this.config.transitivity,depth_exceeded:_,trust_below_minimum:y,unknown_agents:d,broken_links:p}}}updateTrustScore(t,i){const r=this.trustRegistry.get(t);r&&(r.trustScore=Math.max(0,Math.min(100,i)))}directlyTrusts(t,i){return this.trustRegistry.get(t)?.trustedAgents.includes(i)??!1}reset(){this.trustRegistry.clear()}}exports.TrustTransitivityGuard=TrustTransitivityGuard;
|
package/dist/index.d.ts
CHANGED
|
@@ -51,6 +51,9 @@ export { CompressionDetector, CompressionDetectorConfig, CompressionDetectorResu
|
|
|
51
51
|
export { ExternalDataGuard, ExternalDataGuardConfig, ExternalDataGuardResult, DataProvenance } from "./guards/external-data-guard";
|
|
52
52
|
export { AgentSkillGuard, AgentSkillGuardConfig, AgentSkillGuardResult, SkillDefinition, SkillThreat } from "./guards/agent-skill-guard";
|
|
53
53
|
export { SessionIntegrityGuard, SessionIntegrityGuardConfig, SessionIntegrityResult, SessionState } from "./guards/session-integrity-guard";
|
|
54
|
+
export { SpawnPolicyGuard, SpawnPolicyGuardConfig, SpawnRequest, SpawnPolicyResult } from "./guards/spawn-policy-guard";
|
|
55
|
+
export { DelegationScopeGuard, DelegationScopeGuardConfig, DelegationRequest, DelegationScopeResult } from "./guards/delegation-scope-guard";
|
|
56
|
+
export { TrustTransitivityGuard, TrustTransitivityGuardConfig, TransitivityMode, AgentTrustEntry, TrustChainLink, TrustTransitivityResult } from "./guards/trust-transitivity-guard";
|
|
54
57
|
export { DetectionClassifier, DetectionResult, DetectionThreat, DetectionContext, createRegexClassifier, mergeDetectionResults } from "./detection-backend";
|
|
55
58
|
import { InputSanitizer } from "./guards/input-sanitizer";
|
|
56
59
|
import { ToolRegistry } from "./guards/tool-registry";
|