mask-privacy 3.0.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -17
- package/dist/index.d.mts +58 -27
- package/dist/index.d.ts +58 -27
- package/dist/index.js +394 -310
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +394 -310
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
- package/src/core/dlp/assessor.ts +3 -26
- package/src/core/dlp/handlers.ts +44 -31
- package/src/core/dlp/index.ts +0 -2
- package/src/core/dlp/registry.ts +119 -107
- package/src/core/dlp/scorer.ts +4 -4
- package/src/core/fpe.ts +85 -32
- package/src/core/fpe_utils.ts +20 -20
- package/src/core/scanner.ts +146 -151
- package/src/core/span.ts +76 -0
- package/src/core/transformers_scanner.ts +2 -2
- package/src/core/vault.ts +2 -1
- package/tests/async.test.ts +2 -2
- package/tests/dlp_hardened.test.ts +21 -0
- package/tests/fpe.test.ts +4 -4
- package/tests/hooks.test.ts +2 -2
- package/tests/langchain.test.ts +2 -2
- package/tests/llamaindex.test.ts +1 -1
- package/tests/scanner.test.ts +0 -1
- package/tests/substring.test.ts +1 -1
- package/tests/vault.test.ts +1 -1
package/dist/index.js
CHANGED
|
@@ -256,10 +256,13 @@ var init_exceptions = __esm({
|
|
|
256
256
|
function looksLikeToken(value) {
|
|
257
257
|
if (typeof value !== "string") return false;
|
|
258
258
|
const v7 = value.trim();
|
|
259
|
-
if (v7.startsWith("tkn-") && v7.includes("@
|
|
260
|
-
|
|
259
|
+
if (v7.startsWith("tkn-") && v7.includes("@")) {
|
|
260
|
+
const parts = v7.split("@");
|
|
261
|
+
if (parts.length === 2 && parts[0].length >= 12 && parts[1].includes(".")) {
|
|
262
|
+
return true;
|
|
263
|
+
}
|
|
261
264
|
}
|
|
262
|
-
if (
|
|
265
|
+
if (/^\+[1-9]\d{0,3}-555-\d{7}$/.test(v7)) {
|
|
263
266
|
return true;
|
|
264
267
|
}
|
|
265
268
|
if (v7.startsWith("000-00-") && v7.length === 11) {
|
|
@@ -271,16 +274,13 @@ function looksLikeToken(value) {
|
|
|
271
274
|
if (v7.startsWith("000000") && v7.length === 9) {
|
|
272
275
|
return true;
|
|
273
276
|
}
|
|
274
|
-
if (v7.startsWith("
|
|
275
|
-
return true;
|
|
276
|
-
}
|
|
277
|
-
if (v7.length === 11 && v7.startsWith("990000") && /^\d+$/.test(v7) && parseInt(v7[v7.length - 1], 10) % 2 === 0) {
|
|
277
|
+
if (v7.length === 9 && v7.startsWith("000") && /[A-Z]$/.test(v7)) {
|
|
278
278
|
return true;
|
|
279
279
|
}
|
|
280
|
-
if (
|
|
280
|
+
if (/^[A-Z]{2}00[A-F0-9]{4,16}$/.test(v7)) {
|
|
281
281
|
return true;
|
|
282
282
|
}
|
|
283
|
-
if (
|
|
283
|
+
if (/^<(PER|LOC|ORG):[^>]+>$/.test(v7)) {
|
|
284
284
|
return true;
|
|
285
285
|
}
|
|
286
286
|
if (v7.startsWith("[TKN-") && v7.endsWith("]")) {
|
|
@@ -292,7 +292,7 @@ var TOKEN_PATTERN;
|
|
|
292
292
|
var init_fpe_utils = __esm({
|
|
293
293
|
"src/core/fpe_utils.ts"() {
|
|
294
294
|
TOKEN_PATTERN = new RegExp(
|
|
295
|
-
"tkn-[a-f0-9]{8,64}@
|
|
295
|
+
"tkn-[a-f0-9]{8,64}@[A-Za-z0-9.\\-]+\\.[A-Za-z]{2,}|\\+[1-9]\\d{0,3}-555-\\d{7}|000-00-\\d{4}|4000-0000-0000-\\d{4}|000000\\d{3}|000\\d{5}[A-Z]|[A-Z]{2}00[A-F0-9]{4,16}|<(?:PER|LOC|ORG):[^>]+>|\\[TKN-[a-f0-9]{8,64}\\]",
|
|
296
296
|
// Opaque
|
|
297
297
|
"g"
|
|
298
298
|
);
|
|
@@ -341,42 +341,87 @@ async function _hmacDigits(plaintext, n6, offset = 0) {
|
|
|
341
341
|
}
|
|
342
342
|
return result.join("");
|
|
343
343
|
}
|
|
344
|
-
async function
|
|
345
|
-
const
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
344
|
+
async function _pickFromArray(plaintext, array) {
|
|
345
|
+
const digits = await _hmacDigits(plaintext, 8);
|
|
346
|
+
const num = parseInt(digits, 10);
|
|
347
|
+
return array[num % array.length];
|
|
348
|
+
}
|
|
349
|
+
function _computeLuhnDigit(partialNum) {
|
|
350
|
+
const digits = partialNum.split("").map(Number);
|
|
351
|
+
let sum = 0;
|
|
352
|
+
let shouldDouble = true;
|
|
353
|
+
for (let i6 = digits.length - 1; i6 >= 0; i6--) {
|
|
354
|
+
let digit = digits[i6];
|
|
355
|
+
if (shouldDouble) {
|
|
356
|
+
digit *= 2;
|
|
357
|
+
if (digit > 9) digit -= 9;
|
|
358
|
+
}
|
|
359
|
+
sum += digit;
|
|
360
|
+
shouldDouble = !shouldDouble;
|
|
351
361
|
}
|
|
352
|
-
|
|
362
|
+
return ((10 - sum % 10) % 10).toString();
|
|
363
|
+
}
|
|
364
|
+
function _computeEsIdCheck(num) {
|
|
365
|
+
return "TRWAGMYFPDXBNJZSQVHLCKE"[num % 23];
|
|
366
|
+
}
|
|
367
|
+
async function generateFPEToken(rawText, entityType = "UNKNOWN") {
|
|
368
|
+
const text = rawText.trim();
|
|
369
|
+
let type = (entityType || "UNKNOWN").toUpperCase();
|
|
370
|
+
if (type === "UNKNOWN") {
|
|
371
|
+
if (_EMAIL_RE.test(text)) type = "EMAIL_ADDRESS";
|
|
372
|
+
else if (_SSN_RE.test(text)) type = "US_SSN";
|
|
373
|
+
else if (_CC_RE.test(text)) type = "CREDIT_CARD";
|
|
374
|
+
else if (_ROUTING_RE.test(text)) type = "US_ROUTING_NUMBER";
|
|
375
|
+
else if (_ES_ID_RE.test(text)) type = "ES_DNI";
|
|
376
|
+
else if (_IBAN_RE.test(text)) type = "INTL_BANK_IBAN";
|
|
377
|
+
else if (_PHONE_RE.test(text)) type = "PHONE_NUMBER";
|
|
378
|
+
}
|
|
379
|
+
if (type === "EMAIL_ADDRESS" || type === "EMAIL_ADDR") {
|
|
380
|
+
const parts = text.split("@");
|
|
381
|
+
const domain = parts.length === 2 ? parts[1] : "email.com";
|
|
382
|
+
return `tkn-${await _hmacHex(text)}@${domain}`;
|
|
383
|
+
}
|
|
384
|
+
if (type === "PHONE_NUMBER" || type === "PHONE_NUM" || type === "PHONE_NUM_INTL") {
|
|
385
|
+
const m6 = text.match(/^\+([1-9]\d{0,3})/);
|
|
386
|
+
const cc = m6 ? m6[1] : "1";
|
|
387
|
+
return `+${cc}-555-${await _hmacDigits(text, 7)}`;
|
|
388
|
+
}
|
|
389
|
+
if (type === "US_SSN") {
|
|
353
390
|
return `000-00-${await _hmacDigits(text, 4)}`;
|
|
354
391
|
}
|
|
355
|
-
if (
|
|
356
|
-
|
|
392
|
+
if (type === "CREDIT_CARD" || type === "CREDIT_CARD_NUMBER") {
|
|
393
|
+
const base = `400000000000${await _hmacDigits(text, 3)}`;
|
|
394
|
+
const checkDig = _computeLuhnDigit(base);
|
|
395
|
+
const full = base + checkDig;
|
|
396
|
+
return `${full.slice(0, 4)}-${full.slice(4, 8)}-${full.slice(8, 12)}-${full.slice(12, 16)}`;
|
|
357
397
|
}
|
|
358
|
-
if (
|
|
398
|
+
if (type === "US_ROUTING_NUMBER" || type === "US_ABA_ROUTING") {
|
|
359
399
|
return `000000${await _hmacDigits(text, 3)}`;
|
|
360
400
|
}
|
|
361
|
-
if (
|
|
362
|
-
const
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
401
|
+
if (type === "INTL_BANK_IBAN" || type === "IBAN_CODE") {
|
|
402
|
+
const countryCode = text.length >= 2 && /[a-zA-Z]{2}/.test(text.slice(0, 2)) ? text.slice(0, 2).toUpperCase() : "US";
|
|
403
|
+
return `${countryCode}00${(await _hmacHex(text, 8)).toUpperCase()}`;
|
|
404
|
+
}
|
|
405
|
+
if (type === "ES_DNI") {
|
|
406
|
+
const digits = `000${await _hmacDigits(text, 5)}`;
|
|
407
|
+
return digits + _computeEsIdCheck(parseInt(digits, 10));
|
|
366
408
|
}
|
|
367
|
-
if (
|
|
368
|
-
|
|
409
|
+
if (type === "PERSON" || type === "PERSON_NAME") {
|
|
410
|
+
const f6 = await _pickFromArray(text, _FIRST_NAMES);
|
|
411
|
+
const l6 = await _pickFromArray(text + "last", _LAST_NAMES);
|
|
412
|
+
return `<PER:${f6}_${l6}>`;
|
|
369
413
|
}
|
|
370
|
-
if (
|
|
371
|
-
|
|
414
|
+
if (type === "LOCATION" || type === "PHYS_ADDRESS") {
|
|
415
|
+
const c6 = await _pickFromArray(text, _CITIES);
|
|
416
|
+
return `<LOC:${c6}>`;
|
|
372
417
|
}
|
|
373
|
-
if (
|
|
374
|
-
const
|
|
375
|
-
return
|
|
418
|
+
if (type === "ORGANIZATION") {
|
|
419
|
+
const c6 = await _pickFromArray(text, _LAST_NAMES);
|
|
420
|
+
return `<ORG:${c6}_Inc>`;
|
|
376
421
|
}
|
|
377
422
|
return `[TKN-${await _hmacHex(text)}]`;
|
|
378
423
|
}
|
|
379
|
-
var _masterKey, _EMAIL_RE, _PHONE_RE, _SSN_RE, _CC_RE, _ROUTING_RE,
|
|
424
|
+
var _masterKey, _EMAIL_RE, _PHONE_RE, _SSN_RE, _CC_RE, _ROUTING_RE, _ES_ID_RE, _IBAN_RE, _FIRST_NAMES, _LAST_NAMES, _CITIES;
|
|
380
425
|
var init_fpe = __esm({
|
|
381
426
|
"src/core/fpe.ts"() {
|
|
382
427
|
init_config();
|
|
@@ -385,14 +430,15 @@ var init_fpe = __esm({
|
|
|
385
430
|
init_fpe_utils();
|
|
386
431
|
_masterKey = null;
|
|
387
432
|
_EMAIL_RE = /^[^@\s]+@[^@\s]+\.[^@\s]+$/;
|
|
388
|
-
_PHONE_RE =
|
|
433
|
+
_PHONE_RE = /(?<!\d)(?:\+?1?[\s\-.]?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}|\d{3}[\s\-.]?\d{4})(?!\d)/;
|
|
389
434
|
_SSN_RE = /^\d{3}-\d{2}-\d{4}$/;
|
|
390
435
|
_CC_RE = /^(?:\d{4}[ \-]?){3}\d{4}$/;
|
|
391
436
|
_ROUTING_RE = /^\d{9}$/;
|
|
392
|
-
|
|
393
|
-
_SAUDI_NID_RE = /^1\d{9}$/;
|
|
394
|
-
_UAE_EID_RE = /^784-\d{4}-\d{7}-\d$/;
|
|
437
|
+
_ES_ID_RE = /^(?:\d{8}[A-Z]|[XYZ]\d{7}[A-Z])$/;
|
|
395
438
|
_IBAN_RE = /^[A-Z]{2}\d{2}[A-Z0-9]{4,30}$/;
|
|
439
|
+
_FIRST_NAMES = ["Taylor", "Jordan", "Casey", "Morgan", "Riley", "Avery", "Rowan", "Quinn", "Charlie", "Peyton", "Blake", "Dakota", "Reese", "Skyler", "Finley", "Eden", "Harley", "Rory", "Emerson", "Remi"];
|
|
440
|
+
_LAST_NAMES = ["Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez", "Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin"];
|
|
441
|
+
_CITIES = ["London", "Paris", "Berlin", "Tokyo", "Rome", "Madrid", "Vienna", "Sydney", "Toronto", "Chicago", "Seattle", "Austin", "Boston", "Denver", "Dallas", "Miami", "Seoul", "Dubai", "Mumbai", "Cairo"];
|
|
396
442
|
}
|
|
397
443
|
});
|
|
398
444
|
|
|
@@ -42993,7 +43039,7 @@ async function encode(rawText, options = {}) {
|
|
|
42993
43039
|
return existingToken;
|
|
42994
43040
|
}
|
|
42995
43041
|
}
|
|
42996
|
-
const token = await generateFPEToken(text);
|
|
43042
|
+
const token = await generateFPEToken(text, options.entityType || "UNKNOWN");
|
|
42997
43043
|
const ciphertext = cryptoEngine.encrypt(text);
|
|
42998
43044
|
const ttl = options.ttl || DEFAULT_TTL;
|
|
42999
43045
|
await vault.store(token, ciphertext, ttl, ptHash);
|
|
@@ -43445,19 +43491,8 @@ var SCRIPT_SIGNATURES; exports.LanguageContextResolver = void 0;
|
|
|
43445
43491
|
var init_assessor = __esm({
|
|
43446
43492
|
"src/core/dlp/assessor.ts"() {
|
|
43447
43493
|
SCRIPT_SIGNATURES = [
|
|
43448
|
-
// CJK / East-Asian — checked first because they are unambiguous
|
|
43449
|
-
{ tag: "zh", pattern: /[\u4e00-\u9fff\u3400-\u4dbf]/g },
|
|
43450
|
-
{ tag: "ja", pattern: /[\u3040-\u309f\u30a0-\u30ff\u31f0-\u31ff]/g },
|
|
43451
|
-
// Arabic script — covers Standard Arabic, Urdu overlap, etc.
|
|
43452
|
-
{ tag: "ar", pattern: /[\u0600-\u06ff\u0750-\u077f\u08a0-\u08ff\ufb50-\ufdff\ufe70-\ufeff]/g },
|
|
43453
|
-
// Turkish — distinguished by dotless-i (ı), soft-g (ğ), ş, and cedilla ç
|
|
43454
|
-
{ tag: "tr", pattern: /[ğıİşŞ]/g },
|
|
43455
|
-
// German — umlauts and Eszett
|
|
43456
|
-
{ tag: "de", pattern: /[äöüÄÖÜß]/g },
|
|
43457
43494
|
// Spanish — ñ and inverted punctuation
|
|
43458
|
-
{ tag: "es", pattern: /[ñÑ¡¿]/g }
|
|
43459
|
-
// French — cedilla, accented vowels with circumflex / diaeresis
|
|
43460
|
-
{ tag: "fr", pattern: /[àâçéèêëïîôùûüÿœæ]/gi }
|
|
43495
|
+
{ tag: "es", pattern: /[ñÑ¡¿]/g }
|
|
43461
43496
|
];
|
|
43462
43497
|
exports.LanguageContextResolver = class {
|
|
43463
43498
|
constructor(charThreshold = 1) {
|
|
@@ -43515,34 +43550,12 @@ var init_registry = __esm({
|
|
|
43515
43550
|
})(exports.SensitiveCategory || {});
|
|
43516
43551
|
LOCALE_NAME_RULES = {
|
|
43517
43552
|
en: [
|
|
43518
|
-
/\b[A-Z][a-z]+ [A-Z][a-z]+(?:\s+[A-Z][a-z]+)?\b/g,
|
|
43519
|
-
/\b(?:Mr|Mrs|Ms|Dr|Prof)\.?\s+[A-Z][a-z]+\b/g
|
|
43553
|
+
/\b[A-Z][a-z\-\']+ [A-Z][a-z\-\']+(?:\s+[A-Z][a-z\-\']+)?\b/g,
|
|
43554
|
+
/\b(?:Mr|Mrs|Ms|Dr|Prof)\.?\s+[A-Z][a-z\-\']+\b/g
|
|
43520
43555
|
],
|
|
43521
43556
|
es: [
|
|
43522
|
-
/\b[A-Z][a-
|
|
43523
|
-
/\b(?:Sr|Sra|Srta)\.?\s+[A-Z][a-
|
|
43524
|
-
],
|
|
43525
|
-
fr: [
|
|
43526
|
-
/\b[A-Z][a-zàâçéèêëïîôùûü]+ [A-Z][a-zàâçéèêëïîôùûü]+\b/g,
|
|
43527
|
-
/\b(?:M|Mme|Mlle)\.?\s+[A-Z][a-zàâçéèêëïîôùûü]+\b/g
|
|
43528
|
-
],
|
|
43529
|
-
de: [
|
|
43530
|
-
/\b[A-Z][a-zäöüß]+ [A-Z][a-zäöüß]+\b/g,
|
|
43531
|
-
/\b(?:Herr|Frau)\.?\s+[A-Z][a-zäöüß]+\b/g
|
|
43532
|
-
],
|
|
43533
|
-
tr: [
|
|
43534
|
-
/\b[A-ZÇĞİÖŞÜ][a-zçğıöşü]+ [A-ZÇĞİÖŞÜ][a-zçğıöşü]+\b/g,
|
|
43535
|
-
/\b(?:Bay|Bayan|Sayın)\.?\s+[A-ZÇĞİÖŞÜ][a-zçğıöşü]+\b/g
|
|
43536
|
-
],
|
|
43537
|
-
ar: [
|
|
43538
|
-
/[\u0621-\u064a][\u0600-\u06ff]+ [\u0621-\u064a][\u0600-\u06ff]+/g,
|
|
43539
|
-
/(?:أبو|أم|ابن|بنت)\s+[\u0621-\u064a][\u0600-\u06ff]+/gi
|
|
43540
|
-
],
|
|
43541
|
-
ja: [
|
|
43542
|
-
/\b[A-Z][a-z]+(?:moto|yama|kawa|mura|ta|da|shi|no)\s+[A-Z][a-z]+\b/g
|
|
43543
|
-
],
|
|
43544
|
-
zh: [
|
|
43545
|
-
/\b[A-Z][a-z]{1,3}\s+[A-Z][a-z]+\b/g
|
|
43557
|
+
/\b[A-Z][a-záéíóúñ\-\']+ [A-Z][a-záéíóúñ\-\']+(?:\s+[A-Z][a-záéíóúñ\-\']+)?\b/g,
|
|
43558
|
+
/\b(?:Sr|Sra|Srta)\.?\s+[A-Z][a-záéíóúñ\-\']+\b/g
|
|
43546
43559
|
]
|
|
43547
43560
|
};
|
|
43548
43561
|
LOCALE_ADDRESS_RULES = {
|
|
@@ -43550,26 +43563,8 @@ var init_registry = __esm({
|
|
|
43550
43563
|
/\b\d{1,5}\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\s+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way)\b/g,
|
|
43551
43564
|
/\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*,\s*[A-Z]{2}\s+\d{5}(?:-\d{4})?\b/g
|
|
43552
43565
|
],
|
|
43553
|
-
|
|
43554
|
-
/\b
|
|
43555
|
-
],
|
|
43556
|
-
de: [
|
|
43557
|
-
/\b[A-ZÄÖÜa-zäöüß]+(?:straße|strasse|weg|gasse|platz)\s+\d{1,4}\b/g
|
|
43558
|
-
],
|
|
43559
|
-
tr: [
|
|
43560
|
-
/\b[A-ZÇĞİÖŞÜa-zçğıöşü]+\s+(?:Cad|Sok|Mah)\.?\s+/gi,
|
|
43561
|
-
/\b\d{5}\s+[A-ZÇĞİÖŞÜa-zçğıöşü]+\/[A-ZÇĞİÖŞÜa-zçğıöşü]+\b/g
|
|
43562
|
-
],
|
|
43563
|
-
ar: [
|
|
43564
|
-
/شارع\s+[\u0600-\u06ff]+/g,
|
|
43565
|
-
/حي\s+[\u0600-\u06ff]+/g,
|
|
43566
|
-
/(?:ص\.ب|P\.?O\.?\s*Box)\s*\d{3,6}/gi
|
|
43567
|
-
],
|
|
43568
|
-
uk_postcode: [
|
|
43569
|
-
/\b[A-Z]{1,2}\d{1,2}[A-Z]?\s*\d[A-Z]{2}\b/g
|
|
43570
|
-
],
|
|
43571
|
-
ca_postal: [
|
|
43572
|
-
/\b[A-Z]\d[A-Z]\s*\d[A-Z]\d\b/g
|
|
43566
|
+
es: [
|
|
43567
|
+
/\b(?:Calle|Carrera|Avenida|Paseo|Plaza)\s+[A-ZÀ-ÖØ-Ý][a-zà-öø-ÿ]+\b/gi
|
|
43573
43568
|
]
|
|
43574
43569
|
};
|
|
43575
43570
|
RAW_PATTERNS = [
|
|
@@ -43577,7 +43572,6 @@ var init_registry = __esm({
|
|
|
43577
43572
|
[
|
|
43578
43573
|
"US_SSN",
|
|
43579
43574
|
"\\b(?!000|666|9\\d{2})\\d{3}-(?!00)\\d{2}-(?!0000)\\d{4}\\b",
|
|
43580
|
-
"g",
|
|
43581
43575
|
["ssn", "social security", "tax id", "taxpayer"],
|
|
43582
43576
|
0.95,
|
|
43583
43577
|
"FINANCIAL" /* FINANCIAL */,
|
|
@@ -43586,7 +43580,6 @@ var init_registry = __esm({
|
|
|
43586
43580
|
[
|
|
43587
43581
|
"CREDIT_CARD_NUMBER",
|
|
43588
43582
|
"\\b(?:4\\d{3}|5[1-5]\\d{2}|3[47]\\d{2}|6(?:011|5\\d{2}))[- ]?\\d{4}[- ]?\\d{4}[- ]?\\d{4}\\b",
|
|
43589
|
-
"g",
|
|
43590
43583
|
["card", "credit", "visa", "mastercard", "amex", "payment"],
|
|
43591
43584
|
0.97,
|
|
43592
43585
|
"FINANCIAL" /* FINANCIAL */,
|
|
@@ -43595,7 +43588,6 @@ var init_registry = __esm({
|
|
|
43595
43588
|
[
|
|
43596
43589
|
"INTL_BANK_IBAN",
|
|
43597
43590
|
"\\b[A-Z]{2}\\d{2}[A-Z0-9]{4}\\d{7}[A-Z0-9]{0,16}\\b",
|
|
43598
|
-
"g",
|
|
43599
43591
|
["iban", "swift", "sepa", "wire", "bank transfer"],
|
|
43600
43592
|
0.96,
|
|
43601
43593
|
"FINANCIAL" /* FINANCIAL */,
|
|
@@ -43604,7 +43596,6 @@ var init_registry = __esm({
|
|
|
43604
43596
|
[
|
|
43605
43597
|
"CRYPTO_BTC",
|
|
43606
43598
|
"\\b(?:[13][a-km-zA-HJ-NP-Z1-9]{25,34}|bc1[a-z0-9]{39,59})\\b",
|
|
43607
|
-
"g",
|
|
43608
43599
|
["bitcoin", "btc", "wallet", "crypto"],
|
|
43609
43600
|
0.94,
|
|
43610
43601
|
"FINANCIAL" /* FINANCIAL */,
|
|
@@ -43613,7 +43604,6 @@ var init_registry = __esm({
|
|
|
43613
43604
|
[
|
|
43614
43605
|
"CRYPTO_ETH",
|
|
43615
43606
|
"\\b0x[a-fA-F0-9]{40}\\b",
|
|
43616
|
-
"g",
|
|
43617
43607
|
["ethereum", "eth", "wallet", "0x"],
|
|
43618
43608
|
0.93,
|
|
43619
43609
|
"FINANCIAL" /* FINANCIAL */,
|
|
@@ -43621,8 +43611,7 @@ var init_registry = __esm({
|
|
|
43621
43611
|
],
|
|
43622
43612
|
[
|
|
43623
43613
|
"US_ABA_ROUTING",
|
|
43624
|
-
|
|
43625
|
-
"g",
|
|
43614
|
+
/(?<!\d)\d{9}(?!\d)/,
|
|
43626
43615
|
["routing", "aba", "wire", "bank"],
|
|
43627
43616
|
0.88,
|
|
43628
43617
|
"FINANCIAL" /* FINANCIAL */,
|
|
@@ -43630,17 +43619,15 @@ var init_registry = __esm({
|
|
|
43630
43619
|
],
|
|
43631
43620
|
[
|
|
43632
43621
|
"BANK_ACCT_NUM",
|
|
43633
|
-
|
|
43634
|
-
"g",
|
|
43622
|
+
/(?<!\d)\d{8,17}(?!\d)/,
|
|
43635
43623
|
["account", "checking", "savings", "deposit", "bank"],
|
|
43636
|
-
0.
|
|
43624
|
+
0.5,
|
|
43637
43625
|
"FINANCIAL" /* FINANCIAL */,
|
|
43638
|
-
|
|
43626
|
+
"luhn_soft"
|
|
43639
43627
|
],
|
|
43640
43628
|
[
|
|
43641
43629
|
"SWIFT_BIC",
|
|
43642
43630
|
"\\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\\b",
|
|
43643
|
-
"gi",
|
|
43644
43631
|
["swift", "bic", "bank code", "transfer"],
|
|
43645
43632
|
0.6,
|
|
43646
43633
|
"FINANCIAL" /* FINANCIAL */,
|
|
@@ -43650,7 +43637,6 @@ var init_registry = __esm({
|
|
|
43650
43637
|
[
|
|
43651
43638
|
"EMAIL_ADDR",
|
|
43652
43639
|
"\\b[A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z]{2,}\\b",
|
|
43653
|
-
"g",
|
|
43654
43640
|
["email", "mail", "contact", "address"],
|
|
43655
43641
|
0.99,
|
|
43656
43642
|
"CONTACT" /* CONTACT */,
|
|
@@ -43658,26 +43644,23 @@ var init_registry = __esm({
|
|
|
43658
43644
|
],
|
|
43659
43645
|
[
|
|
43660
43646
|
"PHONE_NUM",
|
|
43661
|
-
|
|
43662
|
-
"g",
|
|
43647
|
+
/(?<!\d)(?:\+?[1-9]\d{0,3}[-.\s]?)?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}(?!\d)/,
|
|
43663
43648
|
["phone", "call", "mobile", "tel", "whatsapp", "number"],
|
|
43664
|
-
0.
|
|
43649
|
+
0.8,
|
|
43665
43650
|
"CONTACT" /* CONTACT */,
|
|
43666
43651
|
null
|
|
43667
43652
|
],
|
|
43668
43653
|
[
|
|
43669
43654
|
"PHONE_NUM_INTL",
|
|
43670
|
-
|
|
43671
|
-
"g",
|
|
43655
|
+
/(?<!\d)\+(?:[1-9]\d{0,3})[-.\s]?\(?\d{1,5}\)?(?:[-.\s]?\d{2,4}){2,4}(?!\d)/,
|
|
43672
43656
|
["phone", "call", "mobile", "tel"],
|
|
43673
|
-
0.
|
|
43657
|
+
0.8,
|
|
43674
43658
|
"CONTACT" /* CONTACT */,
|
|
43675
43659
|
null
|
|
43676
43660
|
],
|
|
43677
43661
|
[
|
|
43678
43662
|
"IPV4_ADDR",
|
|
43679
43663
|
"\\b(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\b",
|
|
43680
|
-
"g",
|
|
43681
43664
|
["ip", "server", "host", "network", "address"],
|
|
43682
43665
|
0.94,
|
|
43683
43666
|
"CONTACT" /* CONTACT */,
|
|
@@ -43686,7 +43669,6 @@ var init_registry = __esm({
|
|
|
43686
43669
|
[
|
|
43687
43670
|
"IPV6_ADDR",
|
|
43688
43671
|
"\\b(?:[A-Fa-f0-9]{1,4}:){7}[A-Fa-f0-9]{1,4}\\b",
|
|
43689
|
-
"g",
|
|
43690
43672
|
["ipv6", "ip", "network", "server"],
|
|
43691
43673
|
0.93,
|
|
43692
43674
|
"CONTACT" /* CONTACT */,
|
|
@@ -43695,7 +43677,6 @@ var init_registry = __esm({
|
|
|
43695
43677
|
[
|
|
43696
43678
|
"HW_MAC_ADDR",
|
|
43697
43679
|
"\\b(?:[0-9A-Fa-f]{2}[:-]){5}[0-9A-Fa-f]{2}\\b",
|
|
43698
|
-
"g",
|
|
43699
43680
|
["mac", "hardware", "network", "device"],
|
|
43700
43681
|
0.91,
|
|
43701
43682
|
"CONTACT" /* CONTACT */,
|
|
@@ -43705,7 +43686,6 @@ var init_registry = __esm({
|
|
|
43705
43686
|
[
|
|
43706
43687
|
"BIRTH_DATE",
|
|
43707
43688
|
"\\b(?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01])[/-](?:19|20)\\d{2}\\b",
|
|
43708
|
-
"g",
|
|
43709
43689
|
["birth", "dob", "born", "birthday", "date of birth"],
|
|
43710
43690
|
0.88,
|
|
43711
43691
|
"PERSONAL" /* PERSONAL */,
|
|
@@ -43714,16 +43694,14 @@ var init_registry = __esm({
|
|
|
43714
43694
|
[
|
|
43715
43695
|
"US_DRIVERS_LIC",
|
|
43716
43696
|
"\\b(?:[A-Z]\\d{7,12}|\\d{7,12}[A-Z]?)\\b",
|
|
43717
|
-
"g",
|
|
43718
43697
|
["driver", "license", "licence", "dl", "dmv"],
|
|
43719
|
-
0.
|
|
43698
|
+
0.55,
|
|
43720
43699
|
"PERSONAL" /* PERSONAL */,
|
|
43721
43700
|
null
|
|
43722
43701
|
],
|
|
43723
43702
|
[
|
|
43724
43703
|
"US_PASSPORT_NUM",
|
|
43725
43704
|
"\\b[A-Z]\\d{8}\\b",
|
|
43726
|
-
"g",
|
|
43727
43705
|
["passport", "travel", "visa", "immigration"],
|
|
43728
43706
|
0.87,
|
|
43729
43707
|
"PERSONAL" /* PERSONAL */,
|
|
@@ -43733,7 +43711,6 @@ var init_registry = __esm({
|
|
|
43733
43711
|
[
|
|
43734
43712
|
"VEHICLE_VIN",
|
|
43735
43713
|
"\\b[A-HJ-NPR-Z0-9]{17}\\b",
|
|
43736
|
-
"g",
|
|
43737
43714
|
["vin", "vehicle", "chassis", "automobile"],
|
|
43738
43715
|
0.92,
|
|
43739
43716
|
"VEHICLE" /* VEHICLE */,
|
|
@@ -43742,7 +43719,6 @@ var init_registry = __esm({
|
|
|
43742
43719
|
[
|
|
43743
43720
|
"VEHICLE_PLATE",
|
|
43744
43721
|
"\\b[A-Z0-9]{1,3}[\\-\\s][A-Z0-9]{1,4}[\\-\\s][A-Z0-9]{1,4}\\b",
|
|
43745
|
-
"g",
|
|
43746
43722
|
["plate", "registration", "vehicle", "plaka"],
|
|
43747
43723
|
0.45,
|
|
43748
43724
|
"VEHICLE" /* VEHICLE */,
|
|
@@ -43752,7 +43728,6 @@ var init_registry = __esm({
|
|
|
43752
43728
|
[
|
|
43753
43729
|
"MED_RECORD_ID",
|
|
43754
43730
|
"\\b(?:MRN|Patient ID|Medical Record)[:\\s]*[A-Z0-9]{6,10}\\b",
|
|
43755
|
-
"g",
|
|
43756
43731
|
["patient", "medical", "record", "mrn", "hospital"],
|
|
43757
43732
|
0.96,
|
|
43758
43733
|
"HEALTHCARE" /* HEALTHCARE */,
|
|
@@ -43761,7 +43736,6 @@ var init_registry = __esm({
|
|
|
43761
43736
|
[
|
|
43762
43737
|
"US_MEDICARE_ID",
|
|
43763
43738
|
"\\b\\d{3}-\\d{2}-\\d{4}[A-Z]\\b",
|
|
43764
|
-
"g",
|
|
43765
43739
|
["medicare", "cms", "beneficiary", "health insurance"],
|
|
43766
43740
|
0.91,
|
|
43767
43741
|
"HEALTHCARE" /* HEALTHCARE */,
|
|
@@ -43770,7 +43744,6 @@ var init_registry = __esm({
|
|
|
43770
43744
|
[
|
|
43771
43745
|
"US_DEA_NUM",
|
|
43772
43746
|
"\\b[A-Z]{2}\\d{7}\\b",
|
|
43773
|
-
"g",
|
|
43774
43747
|
["dea", "prescriber", "drug", "enforcement"],
|
|
43775
43748
|
0.89,
|
|
43776
43749
|
"HEALTHCARE" /* HEALTHCARE */,
|
|
@@ -43779,7 +43752,6 @@ var init_registry = __esm({
|
|
|
43779
43752
|
[
|
|
43780
43753
|
"US_NPI_NUM",
|
|
43781
43754
|
"\\b\\d{10}\\b",
|
|
43782
|
-
"g",
|
|
43783
43755
|
["npi", "provider", "national provider", "healthcare"],
|
|
43784
43756
|
0.87,
|
|
43785
43757
|
"HEALTHCARE" /* HEALTHCARE */,
|
|
@@ -43789,7 +43761,6 @@ var init_registry = __esm({
|
|
|
43789
43761
|
[
|
|
43790
43762
|
"US_EIN_TAX",
|
|
43791
43763
|
"\\b\\d{2}-\\d{7}\\b",
|
|
43792
|
-
"g",
|
|
43793
43764
|
["ein", "federal", "employer", "tax id"],
|
|
43794
43765
|
0.89,
|
|
43795
43766
|
"IDENTITY_US" /* IDENTITY_US */,
|
|
@@ -43799,71 +43770,33 @@ var init_registry = __esm({
|
|
|
43799
43770
|
[
|
|
43800
43771
|
"UK_NATL_INS",
|
|
43801
43772
|
"\\b[A-Z]{2}\\d{6}[A-Z]\\b",
|
|
43802
|
-
"g",
|
|
43803
43773
|
["nino", "national insurance", "ni number", "uk"],
|
|
43804
43774
|
0.9,
|
|
43805
43775
|
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43806
|
-
|
|
43776
|
+
"uk_nino"
|
|
43807
43777
|
],
|
|
43808
43778
|
[
|
|
43809
43779
|
"CA_SOCIAL_INS",
|
|
43810
43780
|
"\\b\\d{3}[-\\s]?\\d{3}[-\\s]?\\d{3}\\b",
|
|
43811
|
-
"g",
|
|
43812
43781
|
["sin", "social insurance", "canada", "canadian"],
|
|
43813
43782
|
0.89,
|
|
43814
43783
|
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43815
|
-
|
|
43784
|
+
"ca_sin"
|
|
43816
43785
|
],
|
|
43817
43786
|
[
|
|
43818
|
-
"
|
|
43819
|
-
"
|
|
43820
|
-
"
|
|
43821
|
-
|
|
43822
|
-
0.88,
|
|
43823
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43824
|
-
null
|
|
43825
|
-
],
|
|
43826
|
-
[
|
|
43827
|
-
"DE_STEUER_ID",
|
|
43828
|
-
"\\b\\d{2}\\s?\\d{3}\\s?\\d{3}\\s?\\d{3}\\b",
|
|
43829
|
-
"g",
|
|
43830
|
-
["steuer", "steuernummer", "finanzamt", "deutschland"],
|
|
43831
|
-
0.87,
|
|
43832
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43833
|
-
null
|
|
43834
|
-
],
|
|
43835
|
-
[
|
|
43836
|
-
"TR_TCID",
|
|
43837
|
-
"\\b[1-9]\\d{9}[02468]\\b",
|
|
43838
|
-
"g",
|
|
43839
|
-
["tc", "kimlik", "vatanda\u015Fl\u0131k", "n\xFCfus", "t\xFCrkiye"],
|
|
43840
|
-
0.92,
|
|
43841
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43842
|
-
"tcid"
|
|
43843
|
-
],
|
|
43844
|
-
[
|
|
43845
|
-
"SA_NATIONAL_ID",
|
|
43846
|
-
"\\b1\\d{9}\\b",
|
|
43847
|
-
"g",
|
|
43848
|
-
["\u0647\u0648\u064A\u0629", "\u0631\u0642\u0645 \u0627\u0644\u0647\u0648\u064A\u0629", "saudi", "\u0648\u0637\u0646\u064A\u0629", "identity"],
|
|
43849
|
-
0.91,
|
|
43850
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43851
|
-
"saudi_nid"
|
|
43852
|
-
],
|
|
43853
|
-
[
|
|
43854
|
-
"UAE_EMIRATES_ID",
|
|
43855
|
-
"\\b784-\\d{4}-\\d{7}-\\d\\b",
|
|
43856
|
-
"g",
|
|
43857
|
-
["emirates", "\u0647\u0648\u064A\u0629", "uae", "emirati", "identity"],
|
|
43858
|
-
0.93,
|
|
43787
|
+
"ES_DNI",
|
|
43788
|
+
"(?:\\d{8}[A-Z]|[XYZ]\\d{7}[A-Z])",
|
|
43789
|
+
["dni", "nie", "identidad", "nif", "spain"],
|
|
43790
|
+
0.94,
|
|
43859
43791
|
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43860
|
-
"
|
|
43792
|
+
"es_id",
|
|
43793
|
+
true,
|
|
43794
|
+
["*", "es"]
|
|
43861
43795
|
],
|
|
43862
43796
|
// ── CORPORATE ──────────────────────────────────────────────────────
|
|
43863
43797
|
[
|
|
43864
43798
|
"CORP_EMPLOYEE_ID",
|
|
43865
|
-
"
|
|
43866
|
-
"gi",
|
|
43799
|
+
"(?:EMP|EMPLOYEE|ID)[:\\s]?[A-Z0-9]{5,10}",
|
|
43867
43800
|
["employee", "staff", "personnel", "worker"],
|
|
43868
43801
|
0.55,
|
|
43869
43802
|
"CORPORATE" /* CORPORATE */,
|
|
@@ -43873,7 +43806,11 @@ var init_registry = __esm({
|
|
|
43873
43806
|
exports.DLPPatternRegistry = class {
|
|
43874
43807
|
constructor(loadGroups) {
|
|
43875
43808
|
this.catalogue = /* @__PURE__ */ new Map();
|
|
43809
|
+
this.localeCategoryRegexMap = /* @__PURE__ */ new Map();
|
|
43876
43810
|
this.buildCatalogue(loadGroups ?? null);
|
|
43811
|
+
for (const loc of ["*", "en", "es"]) {
|
|
43812
|
+
this.compileForLocale(loc);
|
|
43813
|
+
}
|
|
43877
43814
|
}
|
|
43878
43815
|
get typeNames() {
|
|
43879
43816
|
return [...this.catalogue.keys()];
|
|
@@ -43885,23 +43822,74 @@ var init_registry = __esm({
|
|
|
43885
43822
|
descriptorFor(typeName) {
|
|
43886
43823
|
return this.catalogue.get(typeName);
|
|
43887
43824
|
}
|
|
43888
|
-
/** Return locale-tuned name regexes, falling back to English. */
|
|
43889
43825
|
namePatternsFor(lang) {
|
|
43890
43826
|
return LOCALE_NAME_RULES[lang] ?? LOCALE_NAME_RULES["en"];
|
|
43891
43827
|
}
|
|
43892
|
-
/** Return locale-tuned address regexes, falling back to English. */
|
|
43893
43828
|
addressPatternsFor(lang) {
|
|
43894
43829
|
return LOCALE_ADDRESS_RULES[lang] ?? LOCALE_ADDRESS_RULES["en"];
|
|
43895
43830
|
}
|
|
43831
|
+
getCategoryRegexesMap(locale = "en") {
|
|
43832
|
+
if (!this.localeCategoryRegexMap.has(locale)) {
|
|
43833
|
+
this.compileForLocale(locale);
|
|
43834
|
+
}
|
|
43835
|
+
return this.localeCategoryRegexMap.get(locale);
|
|
43836
|
+
}
|
|
43837
|
+
getCategoryTypeMap(categoryName, locale = "en") {
|
|
43838
|
+
return this.localeCategoryRegexMap.get(locale)?.get(categoryName)?.typeOrder ?? [];
|
|
43839
|
+
}
|
|
43840
|
+
compileForLocale(locale) {
|
|
43841
|
+
const localePool = /* @__PURE__ */ new Map();
|
|
43842
|
+
for (const [typeName, desc] of this.catalogue.entries()) {
|
|
43843
|
+
if (desc.supportedLocales.includes("*") || desc.supportedLocales.includes(locale)) {
|
|
43844
|
+
const catKey = desc.category;
|
|
43845
|
+
if (!localePool.has(catKey)) localePool.set(catKey, []);
|
|
43846
|
+
localePool.get(catKey).push([typeName, desc]);
|
|
43847
|
+
}
|
|
43848
|
+
}
|
|
43849
|
+
const categoryMap = /* @__PURE__ */ new Map();
|
|
43850
|
+
for (const [catKey, entries] of localePool.entries()) {
|
|
43851
|
+
entries.sort(([, a6], [, b6]) => {
|
|
43852
|
+
const aVal = a6.validatorTag ? 0 : 1;
|
|
43853
|
+
const bVal = b6.validatorTag ? 0 : 1;
|
|
43854
|
+
if (aVal !== bVal) return aVal - bVal;
|
|
43855
|
+
return b6.compiledRe.source.length - a6.compiledRe.source.length;
|
|
43856
|
+
});
|
|
43857
|
+
const parts = [];
|
|
43858
|
+
const typeOrder = [];
|
|
43859
|
+
for (const [typeName, desc] of entries) {
|
|
43860
|
+
parts.push(`(?<${typeName}>${desc.compiledRe.source})`);
|
|
43861
|
+
typeOrder.push(typeName);
|
|
43862
|
+
}
|
|
43863
|
+
const combinedSource = parts.join("|");
|
|
43864
|
+
const needsI = entries.some(([, d6]) => d6.compiledRe.flags.includes("i"));
|
|
43865
|
+
const flags = needsI ? "gi" : "g";
|
|
43866
|
+
try {
|
|
43867
|
+
const re = new RegExp(combinedSource, flags);
|
|
43868
|
+
categoryMap.set(catKey, { re, typeOrder });
|
|
43869
|
+
} catch (err2) {
|
|
43870
|
+
console.error(`[DLPPatternRegistry] Locale [${locale}] category [${catKey}] failed:`, err2);
|
|
43871
|
+
}
|
|
43872
|
+
}
|
|
43873
|
+
this.localeCategoryRegexMap.set(locale, categoryMap);
|
|
43874
|
+
}
|
|
43896
43875
|
buildCatalogue(restrict) {
|
|
43897
|
-
for (const
|
|
43876
|
+
for (const entry of RAW_PATTERNS) {
|
|
43877
|
+
const [typeName, regexSource, terms, risk, cat, vtag, isHighEntropy, supportedLocales] = entry;
|
|
43898
43878
|
if (restrict !== null && !restrict.has(cat)) continue;
|
|
43879
|
+
let re;
|
|
43880
|
+
if (regexSource instanceof RegExp) {
|
|
43881
|
+
re = regexSource;
|
|
43882
|
+
} else {
|
|
43883
|
+
re = new RegExp(regexSource, "g");
|
|
43884
|
+
}
|
|
43899
43885
|
this.catalogue.set(typeName, {
|
|
43900
|
-
compiledRe:
|
|
43886
|
+
compiledRe: re,
|
|
43901
43887
|
proximityTerms: new Set(terms),
|
|
43902
43888
|
baseRisk: risk,
|
|
43903
43889
|
category: cat,
|
|
43904
|
-
validatorTag: vtag
|
|
43890
|
+
validatorTag: vtag,
|
|
43891
|
+
isHighEntropy: isHighEntropy ?? vtag !== null,
|
|
43892
|
+
supportedLocales: supportedLocales ?? ["*"]
|
|
43905
43893
|
});
|
|
43906
43894
|
}
|
|
43907
43895
|
}
|
|
@@ -43998,29 +43986,13 @@ function checkIpv4Octets(raw) {
|
|
|
43998
43986
|
}
|
|
43999
43987
|
return true;
|
|
44000
43988
|
}
|
|
44001
|
-
function
|
|
44002
|
-
const
|
|
44003
|
-
if (
|
|
44004
|
-
const d6 = digitsStr.split("").map(Number);
|
|
44005
|
-
if (d6[0] === 0) return false;
|
|
44006
|
-
if (d6[10] % 2 !== 0) return false;
|
|
44007
|
-
const oddSum = d6[0] + d6[2] + d6[4] + d6[6] + d6[8];
|
|
44008
|
-
const evenSum = d6[1] + d6[3] + d6[5] + d6[7];
|
|
44009
|
-
const computedD10 = ((oddSum * 7 - evenSum) % 10 + 10) % 10;
|
|
44010
|
-
if (computedD10 !== d6[9]) return false;
|
|
44011
|
-
const firstTenSum = d6.slice(0, 10).reduce((a6, b6) => a6 + b6, 0);
|
|
44012
|
-
if (firstTenSum % 10 !== d6[10]) return false;
|
|
44013
|
-
return true;
|
|
44014
|
-
}
|
|
44015
|
-
function checkSaudiNid(raw) {
|
|
44016
|
-
const digitsStr = raw.replace(/\D/g, "");
|
|
44017
|
-
if (digitsStr.length !== 10) return false;
|
|
44018
|
-
const d6 = digitsStr.split("").map(Number);
|
|
44019
|
-
if (d6[0] !== 1) return false;
|
|
43989
|
+
function checkCaSin(raw) {
|
|
43990
|
+
const digits = raw.replace(/\D/g, "");
|
|
43991
|
+
if (digits.length !== 9) return false;
|
|
44020
43992
|
let total = 0;
|
|
44021
|
-
for (let idx = 0; idx <
|
|
44022
|
-
let val =
|
|
44023
|
-
if (idx % 2 ===
|
|
43993
|
+
for (let idx = 0; idx < digits.length; idx++) {
|
|
43994
|
+
let val = parseInt(digits[idx], 10);
|
|
43995
|
+
if (idx % 2 === 1) {
|
|
44024
43996
|
val *= 2;
|
|
44025
43997
|
if (val > 9) val -= 9;
|
|
44026
43998
|
}
|
|
@@ -44028,7 +44000,30 @@ function checkSaudiNid(raw) {
|
|
|
44028
44000
|
}
|
|
44029
44001
|
return total % 10 === 0;
|
|
44030
44002
|
}
|
|
44031
|
-
|
|
44003
|
+
function checkUkNino(raw) {
|
|
44004
|
+
const cleaned = raw.replace(/ /g, "").toUpperCase();
|
|
44005
|
+
if (cleaned.length !== 9) return false;
|
|
44006
|
+
return UK_NINO_REGEX.test(cleaned);
|
|
44007
|
+
}
|
|
44008
|
+
function checkEsId(raw) {
|
|
44009
|
+
const cleaned = raw.replace(/[\s-]/g, "").toUpperCase();
|
|
44010
|
+
if (cleaned.length !== 9) return false;
|
|
44011
|
+
const mapping = { X: "0", Y: "1", Z: "2" };
|
|
44012
|
+
const firstChar = cleaned[0];
|
|
44013
|
+
let numStr;
|
|
44014
|
+
if (firstChar in mapping) {
|
|
44015
|
+
numStr = mapping[firstChar] + cleaned.slice(1, 8);
|
|
44016
|
+
} else if (/^\d$/.test(firstChar)) {
|
|
44017
|
+
numStr = cleaned.slice(0, 8);
|
|
44018
|
+
} else {
|
|
44019
|
+
return false;
|
|
44020
|
+
}
|
|
44021
|
+
if (!/^\d+$/.test(numStr)) return false;
|
|
44022
|
+
const num = parseInt(numStr, 10);
|
|
44023
|
+
const validLetters = "TRWAGMYFPDXBNJZSQVHLCKE";
|
|
44024
|
+
return cleaned[8] === validLetters[num % 23];
|
|
44025
|
+
}
|
|
44026
|
+
var IBAN_COUNTRY_LENGTHS, VIN_TRANSLITERATION, VIN_WEIGHTS, UK_NINO_REGEX, VALIDATOR_DISPATCH; exports.DLPValidationEngine = void 0;
|
|
44032
44027
|
var init_handlers = __esm({
|
|
44033
44028
|
"src/core/dlp/handlers.ts"() {
|
|
44034
44029
|
IBAN_COUNTRY_LENGTHS = {
|
|
@@ -44131,6 +44126,7 @@ var init_handlers = __esm({
|
|
|
44131
44126
|
Z: 9
|
|
44132
44127
|
};
|
|
44133
44128
|
VIN_WEIGHTS = [8, 7, 6, 5, 4, 3, 2, 10, 0, 9, 8, 7, 6, 5, 4, 3, 2];
|
|
44129
|
+
UK_NINO_REGEX = /^(?!BG|GB|NK|KN|TN|NT|ZZ)[A-CEGHJ-PR-TW-Z]{2}[0-9]{6}[A-D]$/;
|
|
44134
44130
|
VALIDATOR_DISPATCH = {
|
|
44135
44131
|
luhn: checkLuhn,
|
|
44136
44132
|
ssn_area: checkSsnArea,
|
|
@@ -44139,8 +44135,9 @@ var init_handlers = __esm({
|
|
|
44139
44135
|
vin_format: checkVinFormat,
|
|
44140
44136
|
btc_format: checkBtcFormat,
|
|
44141
44137
|
ipv4: checkIpv4Octets,
|
|
44142
|
-
|
|
44143
|
-
|
|
44138
|
+
ca_sin: checkCaSin,
|
|
44139
|
+
uk_nino: checkUkNino,
|
|
44140
|
+
es_id: checkEsId
|
|
44144
44141
|
};
|
|
44145
44142
|
exports.DLPValidationEngine = class {
|
|
44146
44143
|
/**
|
|
@@ -44177,7 +44174,8 @@ var init_scorer = __esm({
|
|
|
44177
44174
|
keywordBoost: 0.1,
|
|
44178
44175
|
validatorOverride: 0.99,
|
|
44179
44176
|
maxConfidence: 0.99,
|
|
44180
|
-
penaltyFactor: 0.
|
|
44177
|
+
penaltyFactor: 0.99
|
|
44178
|
+
// Renamed functionally to validator failure penalty subtraction
|
|
44181
44179
|
};
|
|
44182
44180
|
exports.DLPConfidenceScorer = class {
|
|
44183
44181
|
constructor(overrides = {}) {
|
|
@@ -44196,7 +44194,7 @@ var init_scorer = __esm({
|
|
|
44196
44194
|
score(input) {
|
|
44197
44195
|
if (input.validatorPassed === true) return this.valOverride;
|
|
44198
44196
|
if (input.validatorPassed === false) {
|
|
44199
|
-
return Math.
|
|
44197
|
+
return Math.max(0, input.baseRisk - this.penalty);
|
|
44200
44198
|
}
|
|
44201
44199
|
const windowLo = Math.max(0, input.matchStart - this.window);
|
|
44202
44200
|
const windowHi = Math.min(input.fullText.length, input.matchEnd + this.window);
|
|
@@ -44222,6 +44220,47 @@ var init_scorer = __esm({
|
|
|
44222
44220
|
}
|
|
44223
44221
|
});
|
|
44224
44222
|
|
|
44223
|
+
// src/core/span.ts
|
|
44224
|
+
function resolveOverlaps(spans) {
|
|
44225
|
+
if (spans.length === 0) return [];
|
|
44226
|
+
const sorted = [...spans].sort((a6, b6) => {
|
|
44227
|
+
if (a6.start !== b6.start) return a6.start - b6.start;
|
|
44228
|
+
const lenDiff = b6.end - b6.start - (a6.end - a6.start);
|
|
44229
|
+
if (lenDiff !== 0) return lenDiff;
|
|
44230
|
+
return b6.confidence - a6.confidence;
|
|
44231
|
+
});
|
|
44232
|
+
const resolved = [];
|
|
44233
|
+
let occupiedEnd = -1;
|
|
44234
|
+
for (const span of sorted) {
|
|
44235
|
+
if (span.start >= occupiedEnd) {
|
|
44236
|
+
resolved.push(span);
|
|
44237
|
+
occupiedEnd = span.end;
|
|
44238
|
+
} else if (span.end <= occupiedEnd) {
|
|
44239
|
+
continue;
|
|
44240
|
+
} else {
|
|
44241
|
+
const last = resolved[resolved.length - 1];
|
|
44242
|
+
if (span.confidence > last.confidence) {
|
|
44243
|
+
resolved.pop();
|
|
44244
|
+
resolved.push(span);
|
|
44245
|
+
occupiedEnd = span.end;
|
|
44246
|
+
}
|
|
44247
|
+
}
|
|
44248
|
+
}
|
|
44249
|
+
return resolved.sort((a6, b6) => b6.start - a6.start);
|
|
44250
|
+
}
|
|
44251
|
+
function reconstruct(text, resolvedSpans) {
|
|
44252
|
+
let result = text;
|
|
44253
|
+
for (const span of resolvedSpans) {
|
|
44254
|
+
if (span.maskedValue == null) continue;
|
|
44255
|
+
result = result.slice(0, span.start) + span.maskedValue + result.slice(span.end);
|
|
44256
|
+
}
|
|
44257
|
+
return result;
|
|
44258
|
+
}
|
|
44259
|
+
var init_span = __esm({
|
|
44260
|
+
"src/core/span.ts"() {
|
|
44261
|
+
}
|
|
44262
|
+
});
|
|
44263
|
+
|
|
44225
44264
|
// node_modules/delayed-stream/lib/delayed_stream.js
|
|
44226
44265
|
var require_delayed_stream = __commonJS({
|
|
44227
44266
|
"node_modules/delayed-stream/lib/delayed_stream.js"(exports2, module) {
|
|
@@ -58892,7 +58931,7 @@ var init_transformers_scanner = __esm({
|
|
|
58892
58931
|
confidence = Math.min(1, confidence + 0.2);
|
|
58893
58932
|
}
|
|
58894
58933
|
if (confidence >= confidenceThreshold && !looksLikeToken(val) && val.length > 1) {
|
|
58895
|
-
const token = await encodeFn(val);
|
|
58934
|
+
const token = await encodeFn(val, { entityType });
|
|
58896
58935
|
entities.push({
|
|
58897
58936
|
type: entityType,
|
|
58898
58937
|
value: val,
|
|
@@ -58999,17 +59038,18 @@ var init_scanner = __esm({
|
|
|
58999
59038
|
init_registry();
|
|
59000
59039
|
init_handlers();
|
|
59001
59040
|
init_scorer();
|
|
59041
|
+
init_span();
|
|
59002
59042
|
_dlpLanguageResolver = new exports.LanguageContextResolver();
|
|
59003
59043
|
_dlpPatternRegistry = new exports.DLPPatternRegistry();
|
|
59004
59044
|
_dlpValidationEngine = new exports.DLPValidationEngine();
|
|
59005
59045
|
_dlpConfidenceScorer = new exports.DLPConfidenceScorer();
|
|
59006
59046
|
REGEX_PATTERNS = {
|
|
59007
59047
|
"EMAIL_ADDRESS": /[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+/g,
|
|
59008
|
-
"PHONE_NUMBER":
|
|
59009
|
-
"PHONE_NUMBER_INTL":
|
|
59010
|
-
"US_SSN":
|
|
59011
|
-
"CREDIT_CARD": /(?:\d{4}[ \-]?){3}\d{4}/g,
|
|
59012
|
-
"US_ROUTING_NUMBER":
|
|
59048
|
+
"PHONE_NUMBER": /(?<!\d)(?:\+?1?[\s\-.]?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}|\d{3}[\s\-.]?\d{4})(?!\d)/g,
|
|
59049
|
+
"PHONE_NUMBER_INTL": /(?<!\d)\+(?:[1-9]\d{0,3})[-.\s]?\(?\d{1,5}\)?(?:[-.\s]?\d{2,4}){2,4}(?!\d)/g,
|
|
59050
|
+
"US_SSN": /(?<!\d)\d{3}-\d{2}-\d{4}(?!\d)/g,
|
|
59051
|
+
"CREDIT_CARD": /(?<!\d)(?:\d{4}[ \-]?){3}\d{4}(?!\d)/g,
|
|
59052
|
+
"US_ROUTING_NUMBER": /(?<!\d)\d{9}(?!\d)/g,
|
|
59013
59053
|
"US_PASSPORT": /\b[A-Z]\d{8}\b/g,
|
|
59014
59054
|
"DATE_OF_BIRTH": /\b(?:0[1-9]|1[0-2])\/(?:0[1-9]|[12]\d|3[01])\/(?:19|20)\d{2}\b|\b(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/g
|
|
59015
59055
|
};
|
|
@@ -59064,26 +59104,55 @@ var init_scanner = __esm({
|
|
|
59064
59104
|
const checksum = 3 * (d6[0] + d6[3] + d6[6]) + 7 * (d6[1] + d6[4] + d6[7]) + (d6[2] + d6[5] + d6[8]);
|
|
59065
59105
|
return checksum % 10 === 0;
|
|
59066
59106
|
}
|
|
59067
|
-
async
|
|
59107
|
+
async _tier0CollectSpans(text, confidenceThreshold) {
|
|
59068
59108
|
const detectedLanguage = _dlpLanguageResolver.resolve(text);
|
|
59069
|
-
const
|
|
59070
|
-
|
|
59071
|
-
|
|
59109
|
+
const spans = [];
|
|
59110
|
+
const categoryMap = _dlpPatternRegistry.getCategoryRegexesMap();
|
|
59111
|
+
for (const [catKey, { re, typeOrder }] of categoryMap.entries()) {
|
|
59112
|
+
const megaRe = new RegExp(re.source, re.flags);
|
|
59072
59113
|
let m6;
|
|
59073
|
-
while ((m6 =
|
|
59114
|
+
while ((m6 = megaRe.exec(text)) !== null) {
|
|
59115
|
+
const groups = m6.groups ?? {};
|
|
59116
|
+
let typeTag;
|
|
59117
|
+
for (const name of typeOrder) {
|
|
59118
|
+
if (groups[name] !== void 0) {
|
|
59119
|
+
typeTag = name;
|
|
59120
|
+
break;
|
|
59121
|
+
}
|
|
59122
|
+
}
|
|
59123
|
+
if (!typeTag) continue;
|
|
59074
59124
|
const matchedStr = m6[0];
|
|
59075
59125
|
if (looksLikeToken(matchedStr)) continue;
|
|
59126
|
+
const descriptor = _dlpPatternRegistry.descriptorFor(typeTag);
|
|
59127
|
+
if (!descriptor) continue;
|
|
59076
59128
|
const validatorResult = _dlpValidationEngine.run(descriptor.validatorTag, matchedStr);
|
|
59077
|
-
|
|
59078
|
-
|
|
59079
|
-
|
|
59080
|
-
|
|
59081
|
-
|
|
59082
|
-
|
|
59083
|
-
|
|
59084
|
-
}
|
|
59129
|
+
let conf;
|
|
59130
|
+
if (validatorResult === false) {
|
|
59131
|
+
if (descriptor.isHighEntropy) {
|
|
59132
|
+
conf = 0.85;
|
|
59133
|
+
} else {
|
|
59134
|
+
continue;
|
|
59135
|
+
}
|
|
59136
|
+
} else {
|
|
59137
|
+
conf = _dlpConfidenceScorer.score({
|
|
59138
|
+
baseRisk: descriptor.baseRisk,
|
|
59139
|
+
matchStart: m6.index,
|
|
59140
|
+
matchEnd: m6.index + matchedStr.length,
|
|
59141
|
+
fullText: text,
|
|
59142
|
+
proximityTerms: descriptor.proximityTerms,
|
|
59143
|
+
validatorPassed: validatorResult
|
|
59144
|
+
});
|
|
59145
|
+
}
|
|
59085
59146
|
if (conf >= confidenceThreshold) {
|
|
59086
|
-
|
|
59147
|
+
spans.push({
|
|
59148
|
+
start: m6.index,
|
|
59149
|
+
end: m6.index + matchedStr.length,
|
|
59150
|
+
entityType: typeTag,
|
|
59151
|
+
originalValue: matchedStr,
|
|
59152
|
+
confidence: conf,
|
|
59153
|
+
method: "dlp_heuristic",
|
|
59154
|
+
language: detectedLanguage
|
|
59155
|
+
});
|
|
59087
59156
|
}
|
|
59088
59157
|
}
|
|
59089
59158
|
}
|
|
@@ -59102,7 +59171,15 @@ var init_scanner = __esm({
|
|
|
59102
59171
|
validatorPassed: null
|
|
59103
59172
|
});
|
|
59104
59173
|
if (conf >= confidenceThreshold) {
|
|
59105
|
-
|
|
59174
|
+
spans.push({
|
|
59175
|
+
start: m6.index,
|
|
59176
|
+
end: m6.index + m6[0].length,
|
|
59177
|
+
entityType: "PERSON_NAME",
|
|
59178
|
+
originalValue: m6[0],
|
|
59179
|
+
confidence: conf,
|
|
59180
|
+
method: "dlp_heuristic",
|
|
59181
|
+
language: detectedLanguage
|
|
59182
|
+
});
|
|
59106
59183
|
}
|
|
59107
59184
|
}
|
|
59108
59185
|
}
|
|
@@ -59111,85 +59188,78 @@ var init_scanner = __esm({
|
|
|
59111
59188
|
let m6;
|
|
59112
59189
|
while ((m6 = re.exec(text)) !== null) {
|
|
59113
59190
|
if (looksLikeToken(m6[0])) continue;
|
|
59114
|
-
|
|
59115
|
-
|
|
59116
|
-
|
|
59117
|
-
|
|
59118
|
-
|
|
59119
|
-
|
|
59120
|
-
|
|
59121
|
-
|
|
59122
|
-
|
|
59123
|
-
occupiedEnd = hit.end;
|
|
59191
|
+
spans.push({
|
|
59192
|
+
start: m6.index,
|
|
59193
|
+
end: m6.index + m6[0].length,
|
|
59194
|
+
entityType: "PHYS_ADDRESS",
|
|
59195
|
+
originalValue: m6[0],
|
|
59196
|
+
confidence: 0.55,
|
|
59197
|
+
method: "dlp_heuristic",
|
|
59198
|
+
language: detectedLanguage
|
|
59199
|
+
});
|
|
59124
59200
|
}
|
|
59125
59201
|
}
|
|
59202
|
+
return spans;
|
|
59203
|
+
}
|
|
59204
|
+
/** Backward-compat wrapper — collects spans then single-pass encodes. */
|
|
59205
|
+
async _tier0Dlp(text, encodeFn, confidenceThreshold) {
|
|
59206
|
+
const spans = await this._tier0CollectSpans(text, confidenceThreshold);
|
|
59207
|
+
const resolved = resolveOverlaps(spans);
|
|
59126
59208
|
const entities = [];
|
|
59127
|
-
|
|
59128
|
-
|
|
59129
|
-
const token = await encodeFn(hit.val);
|
|
59130
|
-
excised = excised.slice(0, hit.start) + token + excised.slice(hit.end);
|
|
59209
|
+
await Promise.all(resolved.map(async (span) => {
|
|
59210
|
+
span.maskedValue = await encodeFn(span.originalValue, { entityType: span.entityType });
|
|
59131
59211
|
entities.push({
|
|
59132
|
-
type:
|
|
59133
|
-
value:
|
|
59134
|
-
method:
|
|
59135
|
-
confidence:
|
|
59136
|
-
masked_value:
|
|
59137
|
-
language:
|
|
59212
|
+
type: span.entityType,
|
|
59213
|
+
value: span.originalValue,
|
|
59214
|
+
method: span.method,
|
|
59215
|
+
confidence: span.confidence,
|
|
59216
|
+
masked_value: span.maskedValue,
|
|
59217
|
+
language: span.language
|
|
59138
59218
|
});
|
|
59139
|
-
}
|
|
59140
|
-
return [
|
|
59219
|
+
}));
|
|
59220
|
+
return [reconstruct(text, resolved), entities];
|
|
59141
59221
|
}
|
|
59142
|
-
async
|
|
59143
|
-
|
|
59144
|
-
let excised = text;
|
|
59145
|
-
let allMatches = [];
|
|
59222
|
+
async _tier1CollectSpans(text, boostEntities, aggressive, confidenceThreshold) {
|
|
59223
|
+
const spans = [];
|
|
59146
59224
|
for (const [entityType, pattern] of Object.entries(REGEX_PATTERNS)) {
|
|
59147
59225
|
const re = new RegExp(pattern.source, pattern.flags);
|
|
59148
59226
|
let match;
|
|
59149
59227
|
while ((match = re.exec(text)) !== null) {
|
|
59150
|
-
|
|
59151
|
-
if (
|
|
59152
|
-
|
|
59153
|
-
|
|
59154
|
-
if (entityType === "
|
|
59155
|
-
|
|
59156
|
-
|
|
59157
|
-
|
|
59158
|
-
|
|
59228
|
+
const val = match[0];
|
|
59229
|
+
if (looksLikeToken(val)) continue;
|
|
59230
|
+
let confidence = aggressive || boostEntities.has(entityType.toLowerCase().replace(/_/g, " ")) ? 1 : 0.95;
|
|
59231
|
+
if (entityType === "CREDIT_CARD" && _BaseScanner._luhnChecksum(val)) confidence = Math.max(confidence, 0.99);
|
|
59232
|
+
if (entityType === "US_ROUTING_NUMBER" && !_BaseScanner._abaChecksum(val)) continue;
|
|
59233
|
+
if (confidence >= confidenceThreshold) {
|
|
59234
|
+
spans.push({
|
|
59235
|
+
start: match.index,
|
|
59236
|
+
end: match.index + val.length,
|
|
59237
|
+
entityType,
|
|
59238
|
+
originalValue: val,
|
|
59239
|
+
confidence,
|
|
59240
|
+
method: "regex"
|
|
59241
|
+
});
|
|
59159
59242
|
}
|
|
59160
|
-
allMatches.push({
|
|
59161
|
-
start: match.index,
|
|
59162
|
-
end: match.index + match[0].length,
|
|
59163
|
-
type: entityType,
|
|
59164
|
-
value: match[0],
|
|
59165
|
-
confidence
|
|
59166
|
-
});
|
|
59167
|
-
}
|
|
59168
|
-
}
|
|
59169
|
-
allMatches.sort((a6, b6) => a6.start - b6.start || b6.end - b6.start - (a6.end - a6.start));
|
|
59170
|
-
let filtered = [];
|
|
59171
|
-
let lastEnd = -1;
|
|
59172
|
-
for (const m6 of allMatches) {
|
|
59173
|
-
if (m6.start >= lastEnd) {
|
|
59174
|
-
filtered.push(m6);
|
|
59175
|
-
lastEnd = m6.end;
|
|
59176
59243
|
}
|
|
59177
59244
|
}
|
|
59178
|
-
|
|
59179
|
-
|
|
59180
|
-
|
|
59181
|
-
|
|
59182
|
-
|
|
59183
|
-
|
|
59184
|
-
|
|
59185
|
-
|
|
59186
|
-
|
|
59187
|
-
|
|
59188
|
-
|
|
59189
|
-
|
|
59190
|
-
|
|
59191
|
-
|
|
59192
|
-
|
|
59245
|
+
return spans;
|
|
59246
|
+
}
|
|
59247
|
+
/** Backward-compat wrapper. */
|
|
59248
|
+
async _tier1Regex(text, encodeFn, boostEntities, aggressive, confidenceThreshold) {
|
|
59249
|
+
const spans = await this._tier1CollectSpans(text, boostEntities, aggressive, confidenceThreshold);
|
|
59250
|
+
const resolved = resolveOverlaps(spans);
|
|
59251
|
+
const entities = [];
|
|
59252
|
+
await Promise.all(resolved.map(async (span) => {
|
|
59253
|
+
span.maskedValue = await encodeFn(span.originalValue, { entityType: span.entityType });
|
|
59254
|
+
entities.push({
|
|
59255
|
+
type: span.entityType,
|
|
59256
|
+
value: span.originalValue,
|
|
59257
|
+
method: span.method,
|
|
59258
|
+
confidence: span.confidence,
|
|
59259
|
+
masked_value: span.maskedValue
|
|
59260
|
+
});
|
|
59261
|
+
}));
|
|
59262
|
+
return [reconstruct(text, resolved), entities];
|
|
59193
59263
|
}
|
|
59194
59264
|
async _tier2Nlp(text, encodeFn, boostEntities, aggressive, confidenceThreshold) {
|
|
59195
59265
|
return [text, []];
|
|
@@ -59209,13 +59279,18 @@ var init_scanner = __esm({
|
|
|
59209
59279
|
const _encode = options.encodeFn || encode;
|
|
59210
59280
|
const confidenceThreshold = options.confidenceThreshold ?? 0.7;
|
|
59211
59281
|
const boost = this._resolveBoost(options.context);
|
|
59212
|
-
|
|
59282
|
+
const allSpans = [];
|
|
59213
59283
|
if (pipeline.includes("dlp")) {
|
|
59214
|
-
|
|
59284
|
+
allSpans.push(...await this._tier0CollectSpans(text, confidenceThreshold));
|
|
59215
59285
|
}
|
|
59216
59286
|
if (pipeline.includes("regex") || pipeline.includes("checksum")) {
|
|
59217
|
-
|
|
59287
|
+
allSpans.push(...await this._tier1CollectSpans(text, boost, !!options.aggressive, confidenceThreshold));
|
|
59218
59288
|
}
|
|
59289
|
+
const resolved = resolveOverlaps(allSpans);
|
|
59290
|
+
await Promise.all(resolved.map(async (span) => {
|
|
59291
|
+
span.maskedValue = await _encode(span.originalValue, { entityType: span.entityType });
|
|
59292
|
+
}));
|
|
59293
|
+
let currentText = reconstruct(text, resolved);
|
|
59219
59294
|
if (pipeline.includes("nlp")) {
|
|
59220
59295
|
[currentText] = await this._tier2Nlp(currentText, _encode, boost, !!options.aggressive, confidenceThreshold);
|
|
59221
59296
|
}
|
|
@@ -59227,20 +59302,29 @@ var init_scanner = __esm({
|
|
|
59227
59302
|
const _encode = options.encodeFn || encode;
|
|
59228
59303
|
const confidenceThreshold = options.confidenceThreshold ?? 0.7;
|
|
59229
59304
|
const boost = this._resolveBoost(options.context);
|
|
59230
|
-
|
|
59231
|
-
|
|
59305
|
+
const allEntities = [];
|
|
59306
|
+
const allSpans = [];
|
|
59232
59307
|
if (pipeline.includes("dlp")) {
|
|
59233
|
-
|
|
59234
|
-
remaining = newText;
|
|
59235
|
-
allEntities.push(...tier0);
|
|
59308
|
+
allSpans.push(...await this._tier0CollectSpans(text, confidenceThreshold));
|
|
59236
59309
|
}
|
|
59237
59310
|
if (pipeline.includes("regex") || pipeline.includes("checksum")) {
|
|
59238
|
-
|
|
59239
|
-
|
|
59240
|
-
|
|
59241
|
-
|
|
59311
|
+
allSpans.push(...await this._tier1CollectSpans(text, boost, !!options.aggressive, confidenceThreshold));
|
|
59312
|
+
}
|
|
59313
|
+
const resolved = resolveOverlaps(allSpans);
|
|
59314
|
+
await Promise.all(resolved.map(async (span) => {
|
|
59315
|
+
span.maskedValue = await _encode(span.originalValue, { entityType: span.entityType });
|
|
59316
|
+
allEntities.push({
|
|
59317
|
+
type: span.entityType,
|
|
59318
|
+
value: span.originalValue,
|
|
59319
|
+
method: span.method,
|
|
59320
|
+
confidence: span.confidence,
|
|
59321
|
+
masked_value: span.maskedValue,
|
|
59322
|
+
language: span.language
|
|
59323
|
+
});
|
|
59324
|
+
}));
|
|
59325
|
+
const remaining = reconstruct(text, resolved);
|
|
59242
59326
|
if (pipeline.includes("nlp")) {
|
|
59243
|
-
const [
|
|
59327
|
+
const [, tier2] = await this._tier2Nlp(remaining, _encode, boost, !!options.aggressive, confidenceThreshold);
|
|
59244
59328
|
allEntities.push(...tier2);
|
|
59245
59329
|
}
|
|
59246
59330
|
return allEntities;
|