@jamaynor/hal-config 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +84 -0
- package/index.js +3 -0
- package/lib/config.js +675 -0
- package/package.json +23 -0
- package/publish.ps1 +30 -0
- package/security/access-control.js +308 -0
- package/security/governor.js +313 -0
- package/security/index.js +31 -0
- package/security/redactor.js +129 -0
- package/security/sanitizer.js +571 -0
- package/test/config-io.test.js +326 -0
- package/test/security.test.js +488 -0
- package/test/test-utils.test.js +360 -0
- package/test/test.js +586 -0
- package/test-utils.js +255 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* security/redactor.js
|
|
3
|
+
*
|
|
4
|
+
* Responsibility: Strip secrets and PII from outbound text before delivery.
|
|
5
|
+
* All functions are pure: no side effects, no I/O, no API calls.
|
|
6
|
+
*
|
|
7
|
+
* Public Interface:
|
|
8
|
+
* redactor
|
|
9
|
+
* ├── redactSecrets(text) → string
|
|
10
|
+
* ├── redactPii(text, config) → string
|
|
11
|
+
* └── redactNotification(text, config) → string
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// Secret patterns covering 8+ common API key and auth token formats.
|
|
15
|
+
const SECRET_PATTERNS = [
|
|
16
|
+
// OpenAI (sk-... and sk-proj-...)
|
|
17
|
+
/sk-(?:proj-)?[A-Za-z0-9_-]{20,}/g,
|
|
18
|
+
// Google API keys
|
|
19
|
+
/AIza[0-9A-Za-z_-]{35}/g,
|
|
20
|
+
// xAI keys (xai-...)
|
|
21
|
+
/xai-[A-Za-z0-9_-]{20,}/g,
|
|
22
|
+
// Slack bot and user tokens
|
|
23
|
+
/xox[bpoa]-[0-9A-Za-z-]{10,}/g,
|
|
24
|
+
// GitHub personal access tokens (classic and fine-grained)
|
|
25
|
+
/gh[pousr]_[A-Za-z0-9_]{36,}/g,
|
|
26
|
+
// Telegram bot tokens (<digits>:<alphanumeric>)
|
|
27
|
+
/\b\d{8,10}:[A-Za-z0-9_-]{35}\b/g,
|
|
28
|
+
// Generic Bearer tokens in Authorization header format
|
|
29
|
+
/Bearer\s+[A-Za-z0-9\-._~+/]+=*/gi,
|
|
30
|
+
// Generic long hex secrets (32–64 hex chars preceded by a word boundary)
|
|
31
|
+
/\b[0-9a-fA-F]{32,64}\b/g,
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
const REDACTED_SECRET = '[REDACTED_SECRET]';
|
|
35
|
+
const REDACTED_EMAIL = '[REDACTED_EMAIL]';
|
|
36
|
+
const REDACTED_PHONE = '[REDACTED_PHONE]';
|
|
37
|
+
const REDACTED_AMOUNT = '[REDACTED_AMOUNT]';
|
|
38
|
+
|
|
39
|
+
// Default list of personal email providers whose addresses are treated as PII.
|
|
40
|
+
const DEFAULT_PERSONAL_PROVIDERS = [
|
|
41
|
+
'gmail.com',
|
|
42
|
+
'yahoo.com',
|
|
43
|
+
'hotmail.com',
|
|
44
|
+
'outlook.com',
|
|
45
|
+
'icloud.com',
|
|
46
|
+
'protonmail.com',
|
|
47
|
+
];
|
|
48
|
+
|
|
49
|
+
// Phone number patterns covering common North American and international formats.
|
|
50
|
+
const PHONE_PATTERNS = [
|
|
51
|
+
// (NNN) NNN-NNNN or NNN-NNN-NNNN
|
|
52
|
+
/\(?\d{3}\)?[\s.-]?\d{3}[\s.-]\d{4}/g,
|
|
53
|
+
// 1-800-NNN-NNNN (toll-free with country code)
|
|
54
|
+
/1[-.\s]\d{3}[-.\s]\d{3}[-.\s]\d{4}/g,
|
|
55
|
+
// +1 NNN NNN NNNN (E.164-style)
|
|
56
|
+
/\+\d{1,3}[\s.-]\d{3}[\s.-]\d{3}[\s.-]\d{4}/g,
|
|
57
|
+
];
|
|
58
|
+
|
|
59
|
+
const DOLLAR_AMOUNT_PATTERN = /\$\d[\d,]*(?:\.\d{2})?/g;
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Resolve the personal provider list from config, falling back to the default list.
|
|
63
|
+
* @param {object} config - Skill config object (may be undefined/empty).
|
|
64
|
+
* @returns {string[]} Array of lowercase domain strings.
|
|
65
|
+
*/
|
|
66
|
+
function resolveProviders(config) {
|
|
67
|
+
return config?.redactor?.personalEmailProviders ?? DEFAULT_PERSONAL_PROVIDERS;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Build a regex that matches email addresses whose domain is one of the personal providers.
|
|
72
|
+
* @param {string[]} providers - Array of domain strings.
|
|
73
|
+
* @returns {RegExp}
|
|
74
|
+
*/
|
|
75
|
+
function buildPersonalEmailPattern(providers) {
|
|
76
|
+
const escapedDomains = providers.map((d) => d.replace(/\./g, '\\.'));
|
|
77
|
+
const domainAlternation = escapedDomains.join('|');
|
|
78
|
+
return new RegExp(`[A-Za-z0-9._%+\\-]+@(?:${domainAlternation})`, 'gi');
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Replace all recognized API key and auth token patterns with [REDACTED_SECRET].
|
|
83
|
+
* Pure function — returns a new string; does not mutate input.
|
|
84
|
+
* @param {string} text
|
|
85
|
+
* @returns {string}
|
|
86
|
+
*/
|
|
87
|
+
export function redactSecrets(text) {
|
|
88
|
+
let result = text;
|
|
89
|
+
for (const pattern of SECRET_PATTERNS) {
|
|
90
|
+
pattern.lastIndex = 0;
|
|
91
|
+
result = result.replace(pattern, REDACTED_SECRET);
|
|
92
|
+
}
|
|
93
|
+
return result;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Replace personal email addresses, phone numbers, and dollar amounts with
|
|
98
|
+
* their respective placeholders. Work-domain addresses are not affected.
|
|
99
|
+
* Pure function — returns a new string; does not mutate input.
|
|
100
|
+
* @param {string} text
|
|
101
|
+
* @param {object} config - Skill config (may be undefined/empty).
|
|
102
|
+
* @returns {string}
|
|
103
|
+
*/
|
|
104
|
+
export function redactPii(text, config) {
|
|
105
|
+
const providers = resolveProviders(config);
|
|
106
|
+
const personalEmailPattern = buildPersonalEmailPattern(providers);
|
|
107
|
+
|
|
108
|
+
let result = text.replace(personalEmailPattern, REDACTED_EMAIL);
|
|
109
|
+
|
|
110
|
+
for (const pattern of PHONE_PATTERNS) {
|
|
111
|
+
pattern.lastIndex = 0;
|
|
112
|
+
result = result.replace(pattern, REDACTED_PHONE);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
result = result.replace(DOLLAR_AMOUNT_PATTERN, REDACTED_AMOUNT);
|
|
116
|
+
|
|
117
|
+
return result;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Chain redactSecrets then redactPii in a single call.
|
|
122
|
+
* Pure function — returns a new string; does not mutate input.
|
|
123
|
+
* @param {string} text
|
|
124
|
+
* @param {object} config - Skill config (may be undefined/empty).
|
|
125
|
+
* @returns {string}
|
|
126
|
+
*/
|
|
127
|
+
export function redactNotification(text, config) {
|
|
128
|
+
return redactPii(redactSecrets(text), config);
|
|
129
|
+
}
|
|
@@ -0,0 +1,571 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* security/sanitizer.js
|
|
3
|
+
*
|
|
4
|
+
* Responsibility: Deterministic text sanitizer — synchronous 11-step pipeline
|
|
5
|
+
* that strips injection vectors from untrusted string fields before any LLM sees them.
|
|
6
|
+
* Returns cleaned text alongside per-step detection stats. Makes no API calls.
|
|
7
|
+
* Never blocks; blocking decisions belong to the ingestion gate.
|
|
8
|
+
*
|
|
9
|
+
* CAVEAT: Unicode stripping is aggressive and optimized for English-language workloads.
|
|
10
|
+
* Emoji-heavy or multilingual input may have legitimate characters stripped. Use a
|
|
11
|
+
* more selective configuration for non-English workloads.
|
|
12
|
+
*
|
|
13
|
+
* Public Interface:
|
|
14
|
+
* sanitizer
|
|
15
|
+
* └── sanitize(text, opts?) → { cleaned: string, stats: SanitizeStats }
|
|
16
|
+
*
|
|
17
|
+
* SanitizeStats:
|
|
18
|
+
* {
|
|
19
|
+
* invisibleStripped: number,
|
|
20
|
+
* walletDrainStripped: number,
|
|
21
|
+
* lookalikesNormalized: number,
|
|
22
|
+
* tokenBudgetTruncated: boolean,
|
|
23
|
+
* encodingsDecoded: number,
|
|
24
|
+
* anomalyFlagged: boolean,
|
|
25
|
+
* patternMatches: {
|
|
26
|
+
* roleMarkers: number,
|
|
27
|
+
* jailbreakCommands: number,
|
|
28
|
+
* overridePhrases: number,
|
|
29
|
+
* },
|
|
30
|
+
* codeBlocksStripped: number,
|
|
31
|
+
* hardLimitTruncated: boolean,
|
|
32
|
+
* }
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
// Default configuration — overridden by opts passed to sanitize()
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
const DEFAULTS = {
|
|
39
|
+
tokenBudget: 2000,
|
|
40
|
+
hardCharLimit: 20_000,
|
|
41
|
+
anomalySigmaThreshold: 3.0,
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// Step 1 — Invisible character sets
|
|
46
|
+
// Zero-width joiners, non-joiners, non-breaking spaces, soft hyphens, etc.
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
const INVISIBLE_CHARS_REGEX = new RegExp(
|
|
49
|
+
'[' +
|
|
50
|
+
'\u00AD' + // soft hyphen
|
|
51
|
+
'\u200B' + // zero-width space
|
|
52
|
+
'\u200C' + // zero-width non-joiner
|
|
53
|
+
'\u200D' + // zero-width joiner
|
|
54
|
+
'\u200E' + // left-to-right mark
|
|
55
|
+
'\u200F' + // right-to-left mark
|
|
56
|
+
'\u2028' + // line separator
|
|
57
|
+
'\u2029' + // paragraph separator
|
|
58
|
+
'\u202A' + // left-to-right embedding
|
|
59
|
+
'\u202B' + // right-to-left embedding
|
|
60
|
+
'\u202C' + // pop directional formatting
|
|
61
|
+
'\u202D' + // left-to-right override
|
|
62
|
+
'\u202E' + // right-to-left override
|
|
63
|
+
'\u2060' + // word joiner
|
|
64
|
+
'\u2061' + // function application
|
|
65
|
+
'\u2062' + // invisible times
|
|
66
|
+
'\u2063' + // invisible separator
|
|
67
|
+
'\u2064' + // invisible plus
|
|
68
|
+
'\uFEFF' + // zero-width no-break space (BOM)
|
|
69
|
+
'\u180E' + // mongolian vowel separator
|
|
70
|
+
'\u00A0' + // non-breaking space
|
|
71
|
+
']',
|
|
72
|
+
'g'
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
// ---------------------------------------------------------------------------
|
|
76
|
+
// Step 2 — Wallet-drain characters (TOKEN80M8/TOKENADE catalog)
|
|
77
|
+
// Unicode code points known to tokenize to 3–10+ tokens each, inflating
|
|
78
|
+
// API cost dramatically when included in bulk.
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
const WALLET_DRAIN_RANGES = [
|
|
81
|
+
[0x1F300, 0x1F9FF], // Misc symbols, emoticons, transport, maps, supplemental
|
|
82
|
+
[0x2600, 0x27BF], // Misc symbols, dingbats
|
|
83
|
+
[0x2B50, 0x2BFF], // Misc symbols and arrows
|
|
84
|
+
[0x3000, 0x303F], // CJK symbols and punctuation
|
|
85
|
+
[0x3040, 0x309F], // Hiragana
|
|
86
|
+
[0x30A0, 0x30FF], // Katakana
|
|
87
|
+
[0x4E00, 0x9FFF], // CJK unified ideographs (common)
|
|
88
|
+
[0xAC00, 0xD7AF], // Hangul syllables
|
|
89
|
+
[0xFFF0, 0xFFFF], // Specials (object replacement, etc.)
|
|
90
|
+
[0x10000, 0x1007F], // Linear B syllabary
|
|
91
|
+
[0x1D400, 0x1D7FF], // Mathematical alphanumeric symbols (high token cost)
|
|
92
|
+
[0x1EE00, 0x1EEFF], // Arabic mathematical alphabetic symbols
|
|
93
|
+
];
|
|
94
|
+
|
|
95
|
+
function isWalletDrainCodePoint(cp) {
|
|
96
|
+
for (const [lo, hi] of WALLET_DRAIN_RANGES) {
|
|
97
|
+
if (cp >= lo && cp <= hi) return true;
|
|
98
|
+
}
|
|
99
|
+
return false;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
// Step 3 — Lookalike normalization (~40 non-Latin → ASCII pairs)
|
|
104
|
+
// Covers Cyrillic, Greek, and fullwidth Latin lookalikes most commonly used
|
|
105
|
+
// to bypass ASCII-only pattern matching.
|
|
106
|
+
// ---------------------------------------------------------------------------
|
|
107
|
+
const LOOKALIKE_MAP = {
|
|
108
|
+
// Cyrillic lookalikes
|
|
109
|
+
'\u0410': 'A', // А
|
|
110
|
+
'\u0412': 'B', // В
|
|
111
|
+
'\u0421': 'C', // С
|
|
112
|
+
'\u0415': 'E', // Е
|
|
113
|
+
'\u041D': 'H', // Н
|
|
114
|
+
'\u0406': 'I', // І
|
|
115
|
+
'\u0408': 'J', // Ј
|
|
116
|
+
'\u041A': 'K', // К
|
|
117
|
+
'\u041C': 'M', // М
|
|
118
|
+
'\u041E': 'O', // О
|
|
119
|
+
'\u0420': 'R', // Р
|
|
120
|
+
'\u0422': 'T', // Т
|
|
121
|
+
'\u0425': 'X', // Х
|
|
122
|
+
'\u0423': 'Y', // У
|
|
123
|
+
'\u0430': 'a', // а
|
|
124
|
+
'\u0441': 'c', // с
|
|
125
|
+
'\u0435': 'e', // е
|
|
126
|
+
'\u0456': 'i', // і
|
|
127
|
+
'\u0458': 'j', // ј
|
|
128
|
+
'\u043E': 'o', // о
|
|
129
|
+
'\u0440': 'r', // р
|
|
130
|
+
'\u0455': 's', // ѕ ← Cyrillic-S specifically called out in task spec
|
|
131
|
+
'\u0443': 'u', // у
|
|
132
|
+
'\u0445': 'x', // х
|
|
133
|
+
// Greek lookalikes
|
|
134
|
+
'\u0391': 'A', // Α
|
|
135
|
+
'\u0392': 'B', // Β
|
|
136
|
+
'\u0395': 'E', // Ε
|
|
137
|
+
'\u0396': 'Z', // Ζ
|
|
138
|
+
'\u0397': 'H', // Η
|
|
139
|
+
'\u0399': 'I', // Ι
|
|
140
|
+
'\u039A': 'K', // Κ
|
|
141
|
+
'\u039C': 'M', // Μ
|
|
142
|
+
'\u039D': 'N', // Ν
|
|
143
|
+
'\u039F': 'O', // Ο
|
|
144
|
+
'\u03A1': 'R', // Ρ
|
|
145
|
+
'\u03A4': 'T', // Τ
|
|
146
|
+
'\u03A7': 'X', // Χ
|
|
147
|
+
'\u03B1': 'a', // α
|
|
148
|
+
'\u03B2': 'b', // β (approx)
|
|
149
|
+
'\u03B5': 'e', // ε
|
|
150
|
+
'\u03B7': 'n', // η (approx)
|
|
151
|
+
'\u03B9': 'i', // ι
|
|
152
|
+
'\u03BF': 'o', // ο
|
|
153
|
+
'\u03C1': 'p', // ρ (approx)
|
|
154
|
+
'\u03C5': 'u', // υ
|
|
155
|
+
'\u03C7': 'x', // χ
|
|
156
|
+
// Fullwidth Latin (Unicode "full-width" equivalents)
|
|
157
|
+
'\uFF21': 'A', '\uFF22': 'B', '\uFF23': 'C', '\uFF24': 'D', '\uFF25': 'E',
|
|
158
|
+
'\uFF26': 'F', '\uFF27': 'G', '\uFF28': 'H', '\uFF29': 'I', '\uFF2A': 'J',
|
|
159
|
+
'\uFF2B': 'K', '\uFF2C': 'L', '\uFF2D': 'M', '\uFF2E': 'N', '\uFF2F': 'O',
|
|
160
|
+
'\uFF30': 'P', '\uFF31': 'Q', '\uFF32': 'R', '\uFF33': 'S', '\uFF34': 'T',
|
|
161
|
+
'\uFF35': 'U', '\uFF36': 'V', '\uFF37': 'W', '\uFF38': 'X', '\uFF39': 'Y',
|
|
162
|
+
'\uFF3A': 'Z',
|
|
163
|
+
'\uFF41': 'a', '\uFF42': 'b', '\uFF43': 'c', '\uFF44': 'd', '\uFF45': 'e',
|
|
164
|
+
'\uFF46': 'f', '\uFF47': 'g', '\uFF48': 'h', '\uFF49': 'i', '\uFF4A': 'j',
|
|
165
|
+
'\uFF4B': 'k', '\uFF4C': 'l', '\uFF4D': 'm', '\uFF4E': 'n', '\uFF4F': 'o',
|
|
166
|
+
'\uFF50': 'p', '\uFF51': 'q', '\uFF52': 'r', '\uFF53': 's', '\uFF54': 't',
|
|
167
|
+
'\uFF55': 'u', '\uFF56': 'v', '\uFF57': 'w', '\uFF58': 'x', '\uFF59': 'y',
|
|
168
|
+
'\uFF5A': 'z',
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
const LOOKALIKE_REGEX = new RegExp(
|
|
172
|
+
Object.keys(LOOKALIKE_MAP).join('|'),
|
|
173
|
+
'g'
|
|
174
|
+
);
|
|
175
|
+
|
|
176
|
+
// ---------------------------------------------------------------------------
|
|
177
|
+
// Step 4 — Token budget: rough estimate at ~4 chars/token for ASCII prose
|
|
178
|
+
// ---------------------------------------------------------------------------
|
|
179
|
+
const CHARS_PER_TOKEN = 4;
|
|
180
|
+
|
|
181
|
+
// ---------------------------------------------------------------------------
|
|
182
|
+
// Step 5 — Combining mark cleanup
|
|
183
|
+
// Unicode combining diacritics range: U+0300–U+036F
|
|
184
|
+
// Strip any base character that has more than 2 combining marks following it.
|
|
185
|
+
// ---------------------------------------------------------------------------
|
|
186
|
+
const COMBINING_MARK_REGEX = /\u0300-\u036F/;
|
|
187
|
+
|
|
188
|
+
// ---------------------------------------------------------------------------
|
|
189
|
+
// Step 6 — Encoding decode helpers
|
|
190
|
+
// ---------------------------------------------------------------------------
|
|
191
|
+
const HTML_ENTITY_REGEX = /&#(\d+);|&#x([0-9a-fA-F]+);|&([a-zA-Z]+);/g;
|
|
192
|
+
const BASE64_BLOCK_REGEX = /(?:[A-Za-z0-9+/]{4}){4,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?/g;
|
|
193
|
+
const PERCENT_ENCODED_REGEX = /%[0-9A-Fa-f]{2}/g;
|
|
194
|
+
const HEX_BLOCK_REGEX = /\\x[0-9A-Fa-f]{2}/g;
|
|
195
|
+
|
|
196
|
+
const HTML_NAMED_ENTITIES = {
|
|
197
|
+
amp: '&', lt: '<', gt: '>', quot: '"', apos: "'", nbsp: ' ',
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
function decodeHtmlEntities(text) {
|
|
201
|
+
return text.replace(HTML_ENTITY_REGEX, (match, dec, hex, named) => {
|
|
202
|
+
if (dec) return String.fromCodePoint(parseInt(dec, 10));
|
|
203
|
+
if (hex) return String.fromCodePoint(parseInt(hex, 16));
|
|
204
|
+
if (named) return HTML_NAMED_ENTITIES[named.toLowerCase()] ?? match;
|
|
205
|
+
return match;
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function tryBase64Decode(candidate) {
|
|
210
|
+
try {
|
|
211
|
+
const decoded = Buffer.from(candidate, 'base64').toString('utf8');
|
|
212
|
+
// Only accept if decoded result is printable ASCII-like text
|
|
213
|
+
if (/^[\x20-\x7E\n\r\t]+$/.test(decoded) && decoded.length < candidate.length) {
|
|
214
|
+
return decoded;
|
|
215
|
+
}
|
|
216
|
+
} catch {
|
|
217
|
+
// not valid base64
|
|
218
|
+
}
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function decodePercentEncoded(text) {
|
|
223
|
+
try {
|
|
224
|
+
return decodeURIComponent(text);
|
|
225
|
+
} catch {
|
|
226
|
+
return text;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function decodeHexEscapes(text) {
|
|
231
|
+
return text.replace(HEX_BLOCK_REGEX, (match) => {
|
|
232
|
+
return String.fromCodePoint(parseInt(match.slice(2), 16));
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// ---------------------------------------------------------------------------
|
|
237
|
+
// Step 7 — Statistical anomaly detection
|
|
238
|
+
// Computes character category distribution and flags if any category
|
|
239
|
+
// deviates beyond sigma threshold from expected English prose baseline.
|
|
240
|
+
// ---------------------------------------------------------------------------
|
|
241
|
+
// Expected proportions in English prose (approximate)
|
|
242
|
+
const EXPECTED_DIST = {
|
|
243
|
+
lowercase: 0.60,
|
|
244
|
+
uppercase: 0.05,
|
|
245
|
+
digit: 0.04,
|
|
246
|
+
space: 0.15,
|
|
247
|
+
punctuation: 0.08,
|
|
248
|
+
other: 0.08,
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
function charCategory(ch) {
|
|
252
|
+
const cp = ch.codePointAt(0);
|
|
253
|
+
if (cp >= 0x61 && cp <= 0x7A) return 'lowercase';
|
|
254
|
+
if (cp >= 0x41 && cp <= 0x5A) return 'uppercase';
|
|
255
|
+
if (cp >= 0x30 && cp <= 0x39) return 'digit';
|
|
256
|
+
if (cp === 0x20 || cp === 0x09 || cp === 0x0A || cp === 0x0D) return 'space';
|
|
257
|
+
if (cp >= 0x21 && cp <= 0x2F) return 'punctuation';
|
|
258
|
+
if (cp >= 0x3A && cp <= 0x40) return 'punctuation';
|
|
259
|
+
if (cp >= 0x5B && cp <= 0x60) return 'punctuation';
|
|
260
|
+
if (cp >= 0x7B && cp <= 0x7E) return 'punctuation';
|
|
261
|
+
return 'other';
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function detectAnomaly(text, sigmaThreshold) {
|
|
265
|
+
if (text.length === 0) return false;
|
|
266
|
+
|
|
267
|
+
const counts = { lowercase: 0, uppercase: 0, digit: 0, space: 0, punctuation: 0, other: 0 };
|
|
268
|
+
for (const ch of text) {
|
|
269
|
+
counts[charCategory(ch)]++;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
const total = text.length;
|
|
273
|
+
for (const [cat, expected] of Object.entries(EXPECTED_DIST)) {
|
|
274
|
+
const observed = counts[cat] / total;
|
|
275
|
+
const stdDev = Math.sqrt(expected * (1 - expected) / total);
|
|
276
|
+
if (stdDev === 0) continue;
|
|
277
|
+
const sigma = Math.abs(observed - expected) / stdDev;
|
|
278
|
+
if (sigma > sigmaThreshold) return true;
|
|
279
|
+
}
|
|
280
|
+
return false;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// ---------------------------------------------------------------------------
|
|
284
|
+
// Step 8 — Pattern matching
|
|
285
|
+
// L1B3RT4S jailbreak catalog: role markers, jailbreak commands, override phrases
|
|
286
|
+
// ---------------------------------------------------------------------------
|
|
287
|
+
const ROLE_MARKER_PATTERNS = [
|
|
288
|
+
/\bSystem\s*:/gi,
|
|
289
|
+
/\bUser\s*:/gi,
|
|
290
|
+
/\bAssistant\s*:/gi,
|
|
291
|
+
/\[SYSTEM\]/gi,
|
|
292
|
+
/\[USER\]/gi,
|
|
293
|
+
/\[ASSISTANT\]/gi,
|
|
294
|
+
/<\s*system\s*>/gi,
|
|
295
|
+
/<\s*user\s*>/gi,
|
|
296
|
+
/<\s*assistant\s*>/gi,
|
|
297
|
+
];
|
|
298
|
+
|
|
299
|
+
const JAILBREAK_COMMAND_PATTERNS = [
|
|
300
|
+
/L1B3RT4S/gi,
|
|
301
|
+
/DAN\s+mode/gi,
|
|
302
|
+
/jailbreak/gi,
|
|
303
|
+
/do\s+anything\s+now/gi,
|
|
304
|
+
/you\s+are\s+now\s+in\s+(?:developer|debug|admin|god|unrestricted)\s+mode/gi,
|
|
305
|
+
/pretend\s+you\s+(?:have\s+no\s+restrictions|are\s+(?:an?\s+)?(?:evil|uncensored|unfiltered))/gi,
|
|
306
|
+
/act\s+as\s+(?:an?\s+)?(?:uncensored|unfiltered|evil|unrestricted)/gi,
|
|
307
|
+
/\[?JAILBREAK\]?/gi,
|
|
308
|
+
/opposite\s+mode/gi,
|
|
309
|
+
/token\s*ade/gi,
|
|
310
|
+
/TOKEN80M8/gi,
|
|
311
|
+
/P4RS3LT0NGV3/gi,
|
|
312
|
+
/grandma\s+exploit/gi,
|
|
313
|
+
/virtual\s+(?:AI|assistant|LLM)\s+with\s+no\s+restrictions/gi,
|
|
314
|
+
];
|
|
315
|
+
|
|
316
|
+
const OVERRIDE_PHRASE_PATTERNS = [
|
|
317
|
+
/ignore\s+(?:all\s+)?(?:previous|prior|above|earlier)\s+instructions/gi,
|
|
318
|
+
/disregard\s+(?:all\s+)?(?:previous|prior|above|earlier)\s+instructions/gi,
|
|
319
|
+
/forget\s+(?:all\s+)?(?:previous|prior|above|earlier)\s+instructions/gi,
|
|
320
|
+
/override\s+(?:your\s+)?(?:instructions|programming|directives|constraints|safety)/gi,
|
|
321
|
+
/new\s+(?:primary\s+)?(?:instructions|directive|objective|mission|goal)\s*:/gi,
|
|
322
|
+
/you\s+are\s+now\s+in\s+(?:audit|maintenance|debug|developer)\s+mode/gi,
|
|
323
|
+
/your\s+(?:true|real|actual)\s+(?:instructions|purpose|mission|goal)\s+(?:is|are)/gi,
|
|
324
|
+
/send\s+me\s+(?:your\s+)?(?:hidden\s+)?(?:prompt|api\s+key|secret)/gi,
|
|
325
|
+
/reveal\s+(?:your\s+)?(?:system\s+prompt|instructions|context|api\s+key)/gi,
|
|
326
|
+
/print\s+(?:your\s+)?(?:system\s+prompt|instructions)/gi,
|
|
327
|
+
/what\s+(?:are|were)\s+your\s+(?:original\s+)?instructions/gi,
|
|
328
|
+
/translate\s+(?:your\s+)?(?:system\s+)?(?:prompt|instructions)\s+to/gi,
|
|
329
|
+
];
|
|
330
|
+
|
|
331
|
+
function countPatternMatches(text, patterns) {
|
|
332
|
+
let count = 0;
|
|
333
|
+
for (const pattern of patterns) {
|
|
334
|
+
const matches = text.match(pattern);
|
|
335
|
+
if (matches) count += matches.length;
|
|
336
|
+
// Reset lastIndex for global regex after match
|
|
337
|
+
pattern.lastIndex = 0;
|
|
338
|
+
}
|
|
339
|
+
return count;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// ---------------------------------------------------------------------------
|
|
343
|
+
// Step 9 — Code block stripping
|
|
344
|
+
// ---------------------------------------------------------------------------
|
|
345
|
+
const CODE_BLOCK_REGEX = /```[\s\S]*?```|~~~[\s\S]*?~~~/g;
|
|
346
|
+
|
|
347
|
+
// ---------------------------------------------------------------------------
|
|
348
|
+
// Core steps — run on a text string, mutating stats in place
|
|
349
|
+
// Returning the transformed text after each step.
|
|
350
|
+
// ---------------------------------------------------------------------------
|
|
351
|
+
|
|
352
|
+
function step1_stripInvisible(text, stats) {
|
|
353
|
+
const before = text.length;
|
|
354
|
+
const cleaned = text.replace(INVISIBLE_CHARS_REGEX, '');
|
|
355
|
+
stats.invisibleStripped += before - cleaned.length;
|
|
356
|
+
return cleaned;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
function step2_stripWalletDrain(text, stats) {
|
|
360
|
+
let cleaned = '';
|
|
361
|
+
let stripped = 0;
|
|
362
|
+
for (const ch of text) {
|
|
363
|
+
const cp = ch.codePointAt(0);
|
|
364
|
+
if (isWalletDrainCodePoint(cp)) {
|
|
365
|
+
stripped++;
|
|
366
|
+
} else {
|
|
367
|
+
cleaned += ch;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
stats.walletDrainStripped += stripped;
|
|
371
|
+
return cleaned;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
function step3_normalizeLookalikes(text, stats) {
|
|
375
|
+
let normalized = 0;
|
|
376
|
+
const cleaned = text.replace(LOOKALIKE_REGEX, (match) => {
|
|
377
|
+
const replacement = LOOKALIKE_MAP[match];
|
|
378
|
+
if (replacement !== undefined) {
|
|
379
|
+
normalized++;
|
|
380
|
+
return replacement;
|
|
381
|
+
}
|
|
382
|
+
return match;
|
|
383
|
+
});
|
|
384
|
+
stats.lookalikesNormalized += normalized;
|
|
385
|
+
return cleaned;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
function step4_enforceTokenBudget(text, stats, budget) {
|
|
389
|
+
const estimatedTokens = Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
390
|
+
if (estimatedTokens <= budget) return text;
|
|
391
|
+
const maxChars = budget * CHARS_PER_TOKEN;
|
|
392
|
+
stats.tokenBudgetTruncated = true;
|
|
393
|
+
return text.slice(0, maxChars);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
function step5_cleanCombiningMarks(text) {
|
|
397
|
+
// Decompose to NFD so combining marks are separate code points,
|
|
398
|
+
// then strip excess marks (more than 2 consecutive combining marks per base char).
|
|
399
|
+
const nfd = text.normalize('NFD');
|
|
400
|
+
let result = '';
|
|
401
|
+
let markCount = 0;
|
|
402
|
+
for (const ch of nfd) {
|
|
403
|
+
const cp = ch.codePointAt(0);
|
|
404
|
+
const isCombining = cp >= 0x0300 && cp <= 0x036F;
|
|
405
|
+
if (isCombining) {
|
|
406
|
+
markCount++;
|
|
407
|
+
if (markCount <= 2) result += ch;
|
|
408
|
+
} else {
|
|
409
|
+
markCount = 0;
|
|
410
|
+
result += ch;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
// Re-compose to NFC
|
|
414
|
+
return result.normalize('NFC');
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
function step6_decodeEncodings(text, stats, runSteps1to5) {
|
|
418
|
+
let decoded = text;
|
|
419
|
+
let decodings = 0;
|
|
420
|
+
|
|
421
|
+
// HTML entities
|
|
422
|
+
const afterEntities = decodeHtmlEntities(decoded);
|
|
423
|
+
if (afterEntities !== decoded) {
|
|
424
|
+
decodings++;
|
|
425
|
+
decoded = afterEntities;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// Hex escape sequences (\xNN)
|
|
429
|
+
const afterHex = decodeHexEscapes(decoded);
|
|
430
|
+
if (afterHex !== decoded) {
|
|
431
|
+
decodings++;
|
|
432
|
+
decoded = afterHex;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Percent-encoded sequences (%NN)
|
|
436
|
+
const afterPercent = decodePercentEncoded(decoded);
|
|
437
|
+
if (afterPercent !== decoded) {
|
|
438
|
+
decodings++;
|
|
439
|
+
decoded = afterPercent;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Base64 blocks — replace only when decoded result looks like readable text
|
|
443
|
+
const afterBase64 = decoded.replace(BASE64_BLOCK_REGEX, (match) => {
|
|
444
|
+
const result = tryBase64Decode(match);
|
|
445
|
+
if (result) {
|
|
446
|
+
decodings++;
|
|
447
|
+
return result;
|
|
448
|
+
}
|
|
449
|
+
return match;
|
|
450
|
+
});
|
|
451
|
+
decoded = afterBase64;
|
|
452
|
+
|
|
453
|
+
stats.encodingsDecoded += decodings;
|
|
454
|
+
|
|
455
|
+
// Re-run steps 1–5 on decoded content if anything changed
|
|
456
|
+
if (decodings > 0) {
|
|
457
|
+
decoded = runSteps1to5(decoded);
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
return decoded;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
function step7_detectAnomaly(text, stats, sigmaThreshold) {
|
|
464
|
+
if (detectAnomaly(text, sigmaThreshold)) {
|
|
465
|
+
stats.anomalyFlagged = true;
|
|
466
|
+
}
|
|
467
|
+
return text;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
function step8_matchPatterns(text, stats) {
|
|
471
|
+
stats.patternMatches.roleMarkers += countPatternMatches(text, ROLE_MARKER_PATTERNS);
|
|
472
|
+
stats.patternMatches.jailbreakCommands += countPatternMatches(text, JAILBREAK_COMMAND_PATTERNS);
|
|
473
|
+
stats.patternMatches.overridePhrases += countPatternMatches(text, OVERRIDE_PHRASE_PATTERNS);
|
|
474
|
+
return text;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
function step9_stripCodeBlocks(text, stats) {
|
|
478
|
+
let count = 0;
|
|
479
|
+
const cleaned = text.replace(CODE_BLOCK_REGEX, () => {
|
|
480
|
+
count++;
|
|
481
|
+
return '';
|
|
482
|
+
});
|
|
483
|
+
stats.codeBlocksStripped += count;
|
|
484
|
+
return cleaned;
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
function step10_hardCharLimit(text, stats, hardLimit) {
|
|
488
|
+
if (text.length <= hardLimit) return text;
|
|
489
|
+
stats.hardLimitTruncated = true;
|
|
490
|
+
return text.slice(0, hardLimit);
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
// ---------------------------------------------------------------------------
|
|
494
|
+
// Public API
|
|
495
|
+
// ---------------------------------------------------------------------------
|
|
496
|
+
|
|
497
|
+
/**
|
|
498
|
+
* sanitize(text, opts?) — synchronous 11-step sanitization pipeline.
|
|
499
|
+
*
|
|
500
|
+
* opts: {
|
|
501
|
+
* tokenBudget?: number — max estimated tokens; default 2000
|
|
502
|
+
* hardCharLimit?: number — max character count; default 20000
|
|
503
|
+
* anomalySigmaThreshold?: number — sigma deviation threshold; default 3.0
|
|
504
|
+
* }
|
|
505
|
+
*
|
|
506
|
+
* Returns:
|
|
507
|
+
* {
|
|
508
|
+
* cleaned: string,
|
|
509
|
+
* stats: {
|
|
510
|
+
* invisibleStripped, walletDrainStripped, lookalikesNormalized,
|
|
511
|
+
* tokenBudgetTruncated, encodingsDecoded, anomalyFlagged,
|
|
512
|
+
* patternMatches: { roleMarkers, jailbreakCommands, overridePhrases },
|
|
513
|
+
* codeBlocksStripped, hardLimitTruncated
|
|
514
|
+
* }
|
|
515
|
+
* }
|
|
516
|
+
*/
|
|
517
|
+
export function sanitize(text, opts = {}) {
|
|
518
|
+
const tokenBudget = opts.tokenBudget ?? DEFAULTS.tokenBudget;
|
|
519
|
+
const hardCharLimit = opts.hardCharLimit ?? DEFAULTS.hardCharLimit;
|
|
520
|
+
const anomalySigmaThreshold = opts.anomalySigmaThreshold ?? DEFAULTS.anomalySigmaThreshold;
|
|
521
|
+
|
|
522
|
+
const stats = {
|
|
523
|
+
invisibleStripped: 0,
|
|
524
|
+
walletDrainStripped: 0,
|
|
525
|
+
lookalikesNormalized: 0,
|
|
526
|
+
tokenBudgetTruncated: false,
|
|
527
|
+
encodingsDecoded: 0,
|
|
528
|
+
anomalyFlagged: false,
|
|
529
|
+
patternMatches: {
|
|
530
|
+
roleMarkers: 0,
|
|
531
|
+
jailbreakCommands: 0,
|
|
532
|
+
overridePhrases: 0,
|
|
533
|
+
},
|
|
534
|
+
codeBlocksStripped: 0,
|
|
535
|
+
hardLimitTruncated: false,
|
|
536
|
+
};
|
|
537
|
+
|
|
538
|
+
if (typeof text !== 'string' || text.length === 0) {
|
|
539
|
+
return { cleaned: text ?? '', stats };
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// Steps 1–5 as a composable inner function, used by step 6 for re-checking
|
|
543
|
+
function runSteps1to5(input) {
|
|
544
|
+
let t = step1_stripInvisible(input, stats);
|
|
545
|
+
t = step2_stripWalletDrain(t, stats);
|
|
546
|
+
t = step3_normalizeLookalikes(t, stats);
|
|
547
|
+
t = step4_enforceTokenBudget(t, stats, tokenBudget);
|
|
548
|
+
t = step5_cleanCombiningMarks(t);
|
|
549
|
+
return t;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
let cleaned = runSteps1to5(text);
|
|
553
|
+
|
|
554
|
+
// Step 6 — decode encodings and re-run steps 1–5 on decoded content
|
|
555
|
+
cleaned = step6_decodeEncodings(cleaned, stats, runSteps1to5);
|
|
556
|
+
|
|
557
|
+
// Step 7 — statistical anomaly detection (non-mutating; sets flag only)
|
|
558
|
+
step7_detectAnomaly(cleaned, stats, anomalySigmaThreshold);
|
|
559
|
+
|
|
560
|
+
// Step 8 — pattern matching (non-mutating; records counts only)
|
|
561
|
+
step8_matchPatterns(cleaned, stats);
|
|
562
|
+
|
|
563
|
+
// Step 9 — strip fenced code blocks
|
|
564
|
+
cleaned = step9_stripCodeBlocks(cleaned, stats);
|
|
565
|
+
|
|
566
|
+
// Step 10 — hard character limit fallback
|
|
567
|
+
cleaned = step10_hardCharLimit(cleaned, stats, hardCharLimit);
|
|
568
|
+
|
|
569
|
+
// Step 11 — return cleaned text and stats
|
|
570
|
+
return { cleaned, stats };
|
|
571
|
+
}
|