mask-privacy 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -17
- package/dist/index.d.mts +5 -11
- package/dist/index.d.ts +5 -11
- package/dist/index.js +7 -268
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +7 -268
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
- package/src/core/dlp/assessor.ts +3 -26
- package/src/core/dlp/handlers.ts +0 -98
- package/src/core/dlp/index.ts +0 -2
- package/src/core/dlp/registry.ts +3 -67
- package/src/core/dlp/scorer.ts +2 -2
- package/src/core/fpe.ts +1 -59
- package/src/core/fpe_utils.ts +0 -28
- package/tests/dlp_hardened.test.ts +0 -17
package/README.md
CHANGED
|
@@ -87,12 +87,6 @@ Mask prevents the misidentification of real data as tokens by using universally
|
|
|
87
87
|
|
|
88
88
|
This prefix-based approach ensures that the SDK does not inadvertently process valid PII as an existing token.
|
|
89
89
|
|
|
90
|
-
Additional collision-proof prefixes for international identifiers:
|
|
91
|
-
* Turkish TCID tokens use the `990000` prefix (no valid Kimlik number starts with `99`).
|
|
92
|
-
* Saudi NID tokens use the `100000` prefix (length-constrained to avoid overlap with real IDs).
|
|
93
|
-
* UAE Emirates ID tokens use the `784-0000-` prefix (zeroed sub-fields are structurally invalid).
|
|
94
|
-
* IBAN tokens zero the check digits (`XX00...`), which always fails ISO 7064 Mod-97 verification.
|
|
95
|
-
|
|
96
90
|
### 4. Enterprise Async Support
|
|
97
91
|
Mask is built from the ground up for high-concurrency Node.js environments. All core operations are asynchronous and promised-based. Calling `encode()`, `decode()`, or `scanAndTokenize()` allows your event loop to remain unblocked while handling PII tokenization tasks.
|
|
98
92
|
|
|
@@ -135,7 +129,7 @@ Mask includes the ability to detokenize PII embedded within larger text blocks (
|
|
|
135
129
|
|
|
136
130
|
## Multilingual PII Detection (Waterfall Pipeline)
|
|
137
131
|
|
|
138
|
-
Mask is built for the global enterprise.
|
|
132
|
+
Mask is built for the global enterprise. The TypeScript SDK implements a **3-Tier Waterfall Detection** strategy for high-precision PII detection in **English and Spanish** using local ONNX models.
|
|
139
133
|
|
|
140
134
|
### Supported Language Matrix
|
|
141
135
|
|
|
@@ -145,12 +139,6 @@ Mask provides first-class support for the following languages:
|
|
|
145
139
|
| :--- | :--- | :--- | :--- |
|
|
146
140
|
| **English** | `en` | ✅ Full | DistilBERT (Simple) |
|
|
147
141
|
| **Spanish** | `es` | ✅ Full | BERT Multilingual |
|
|
148
|
-
| **French** | `fr` | ✅ Full | BERT Multilingual |
|
|
149
|
-
| **German** | `de` | ✅ Full | BERT Multilingual |
|
|
150
|
-
| **Turkish** | `tr` | ✅ Full | BERT Multilingual |
|
|
151
|
-
| **Arabic** | `ar` | ✅ Full | BERT Multilingual |
|
|
152
|
-
| **Japanese** | `ja` | ✅ Full | BERT Multilingual |
|
|
153
|
-
| **Chinese** | `zh` | ✅ Full | BERT Multilingual |
|
|
154
142
|
|
|
155
143
|
### How the Waterfall Works: The Excising Mechanism
|
|
156
144
|
|
|
@@ -165,11 +153,11 @@ To maintain high performance, the TypeScript SDK does not simply run three separ
|
|
|
165
153
|
|
|
166
154
|
### Configuration & Environment Variables
|
|
167
155
|
|
|
168
|
-
Configure your
|
|
156
|
+
Configure your language environment using standard variables.
|
|
169
157
|
|
|
170
158
|
| Variable | Default | Description |
|
|
171
159
|
| :--- | :--- | :--- |
|
|
172
|
-
| `MASK_LANGUAGES` | `en` | Comma-separated
|
|
160
|
+
| `MASK_LANGUAGES` | `en` | Comma-separated language codes. Supported: `en`, `es`. |
|
|
173
161
|
| `MASK_NLP_MODEL` | *(varies)* | Override the default model (e.g., `Xenova/bert-base-multilingual-cased-ner-hrl`). |
|
|
174
162
|
| `MASK_MODEL_CACHE_DIR` | `~/.cache` | Local directory for storing serialized ONNX models. |
|
|
175
163
|
| `MASK_NLP_MAX_WORKERS` | `4` | Number of worker processes/threads for NLP analysis. |
|
|
@@ -221,8 +209,8 @@ The TypeScript SDK manages AI models automatically via **Transformers.js**. For
|
|
|
221
209
|
```bash
|
|
222
210
|
npm install @huggingface/transformers # Required extra
|
|
223
211
|
|
|
224
|
-
# Pre-cache models for
|
|
225
|
-
export MASK_LANGUAGES="en,es
|
|
212
|
+
# Pre-cache models for English and Spanish
|
|
213
|
+
export MASK_LANGUAGES="en,es"
|
|
226
214
|
npx mask-privacy cache-models
|
|
227
215
|
```
|
|
228
216
|
|
package/dist/index.d.mts
CHANGED
|
@@ -312,14 +312,8 @@ declare class MaskClient {
|
|
|
312
312
|
* Supported language tags:
|
|
313
313
|
* en — English (default / Latin-only fallback)
|
|
314
314
|
* es — Spanish
|
|
315
|
-
* fr — French
|
|
316
|
-
* de — German
|
|
317
|
-
* tr — Turkish
|
|
318
|
-
* ar — Arabic
|
|
319
|
-
* zh — Chinese
|
|
320
|
-
* ja — Japanese
|
|
321
315
|
*/
|
|
322
|
-
type LanguageTag = "en" | "es"
|
|
316
|
+
type LanguageTag = "en" | "es";
|
|
323
317
|
interface LanguageBreakdown {
|
|
324
318
|
language: LanguageTag;
|
|
325
319
|
breakdown: Record<string, number>;
|
|
@@ -332,8 +326,8 @@ interface LanguageBreakdown {
|
|
|
332
326
|
* @example
|
|
333
327
|
* ```ts
|
|
334
328
|
* const resolver = new LanguageContextResolver();
|
|
335
|
-
* const tag = resolver.resolve("
|
|
336
|
-
* // tag === "
|
|
329
|
+
* const tag = resolver.resolve("Hola, mi DNI es 12345678Z");
|
|
330
|
+
* // tag === "es"
|
|
337
331
|
* ```
|
|
338
332
|
*/
|
|
339
333
|
declare class LanguageContextResolver {
|
|
@@ -457,8 +451,8 @@ interface ScoreInput {
|
|
|
457
451
|
* baseRisk: 0.92,
|
|
458
452
|
* matchStart: 10,
|
|
459
453
|
* matchEnd: 21,
|
|
460
|
-
* fullText: "
|
|
461
|
-
* proximityTerms: new Set(["
|
|
454
|
+
* fullText: "Mi número de DNI es 12345678Z",
|
|
455
|
+
* proximityTerms: new Set(["dni", "número"]),
|
|
462
456
|
* validatorPassed: true,
|
|
463
457
|
* });
|
|
464
458
|
* // score === 0.99 (validator override)
|
package/dist/index.d.ts
CHANGED
|
@@ -312,14 +312,8 @@ declare class MaskClient {
|
|
|
312
312
|
* Supported language tags:
|
|
313
313
|
* en — English (default / Latin-only fallback)
|
|
314
314
|
* es — Spanish
|
|
315
|
-
* fr — French
|
|
316
|
-
* de — German
|
|
317
|
-
* tr — Turkish
|
|
318
|
-
* ar — Arabic
|
|
319
|
-
* zh — Chinese
|
|
320
|
-
* ja — Japanese
|
|
321
315
|
*/
|
|
322
|
-
type LanguageTag = "en" | "es"
|
|
316
|
+
type LanguageTag = "en" | "es";
|
|
323
317
|
interface LanguageBreakdown {
|
|
324
318
|
language: LanguageTag;
|
|
325
319
|
breakdown: Record<string, number>;
|
|
@@ -332,8 +326,8 @@ interface LanguageBreakdown {
|
|
|
332
326
|
* @example
|
|
333
327
|
* ```ts
|
|
334
328
|
* const resolver = new LanguageContextResolver();
|
|
335
|
-
* const tag = resolver.resolve("
|
|
336
|
-
* // tag === "
|
|
329
|
+
* const tag = resolver.resolve("Hola, mi DNI es 12345678Z");
|
|
330
|
+
* // tag === "es"
|
|
337
331
|
* ```
|
|
338
332
|
*/
|
|
339
333
|
declare class LanguageContextResolver {
|
|
@@ -457,8 +451,8 @@ interface ScoreInput {
|
|
|
457
451
|
* baseRisk: 0.92,
|
|
458
452
|
* matchStart: 10,
|
|
459
453
|
* matchEnd: 21,
|
|
460
|
-
* fullText: "
|
|
461
|
-
* proximityTerms: new Set(["
|
|
454
|
+
* fullText: "Mi número de DNI es 12345678Z",
|
|
455
|
+
* proximityTerms: new Set(["dni", "número"]),
|
|
462
456
|
* validatorPassed: true,
|
|
463
457
|
* });
|
|
464
458
|
* // score === 0.99 (validator override)
|
package/dist/index.js
CHANGED
|
@@ -274,21 +274,6 @@ function looksLikeToken(value) {
|
|
|
274
274
|
if (v7.startsWith("000000") && v7.length === 9) {
|
|
275
275
|
return true;
|
|
276
276
|
}
|
|
277
|
-
if (v7.startsWith("784-0000-") && v7.length === 18) {
|
|
278
|
-
return true;
|
|
279
|
-
}
|
|
280
|
-
if (v7.length === 11 && v7.startsWith("990000") && /^\d+$/.test(v7) && parseInt(v7[v7.length - 1], 10) % 2 === 0) {
|
|
281
|
-
return true;
|
|
282
|
-
}
|
|
283
|
-
if (v7.length === 10 && v7.startsWith("100000") && /^\d+$/.test(v7)) {
|
|
284
|
-
return true;
|
|
285
|
-
}
|
|
286
|
-
if (v7.length === 18 && v7.startsWith("88000019900101")) {
|
|
287
|
-
return true;
|
|
288
|
-
}
|
|
289
|
-
if (v7.length === 12 && v7.startsWith("000000")) {
|
|
290
|
-
return true;
|
|
291
|
-
}
|
|
292
277
|
if (v7.length === 9 && v7.startsWith("000") && /[A-Z]$/.test(v7)) {
|
|
293
278
|
return true;
|
|
294
279
|
}
|
|
@@ -307,7 +292,7 @@ var TOKEN_PATTERN;
|
|
|
307
292
|
var init_fpe_utils = __esm({
|
|
308
293
|
"src/core/fpe_utils.ts"() {
|
|
309
294
|
TOKEN_PATTERN = new RegExp(
|
|
310
|
-
"tkn-[a-f0-9]{8,64}@[A-Za-z0-9.\\-]+\\.[A-Za-z]{2,}|\\+[1-9]\\d{0,3}-555-\\d{7}|000-00-\\d{4}|4000-0000-0000-\\d{4}|000000\\d{3}|
|
|
295
|
+
"tkn-[a-f0-9]{8,64}@[A-Za-z0-9.\\-]+\\.[A-Za-z]{2,}|\\+[1-9]\\d{0,3}-555-\\d{7}|000-00-\\d{4}|4000-0000-0000-\\d{4}|000000\\d{3}|000\\d{5}[A-Z]|[A-Z]{2}00[A-F0-9]{4,16}|<(?:PER|LOC|ORG):[^>]+>|\\[TKN-[a-f0-9]{8,64}\\]",
|
|
311
296
|
// Opaque
|
|
312
297
|
"g"
|
|
313
298
|
);
|
|
@@ -376,24 +361,6 @@ function _computeLuhnDigit(partialNum) {
|
|
|
376
361
|
}
|
|
377
362
|
return ((10 - sum % 10) % 10).toString();
|
|
378
363
|
}
|
|
379
|
-
function _computeCnIdCheck(partial) {
|
|
380
|
-
const weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2];
|
|
381
|
-
const checkDigits = "10X98765432";
|
|
382
|
-
let total = 0;
|
|
383
|
-
for (let i6 = 0; i6 < 17; i6++) {
|
|
384
|
-
total += parseInt(partial[i6], 10) * weights[i6];
|
|
385
|
-
}
|
|
386
|
-
return checkDigits[total % 11];
|
|
387
|
-
}
|
|
388
|
-
function _computeJaIdCheck(partial) {
|
|
389
|
-
const weights = [6, 5, 4, 3, 2, 7, 6, 5, 4, 3, 2];
|
|
390
|
-
let total = 0;
|
|
391
|
-
for (let i6 = 0; i6 < 11; i6++) {
|
|
392
|
-
total += parseInt(partial[i6], 10) * weights[i6];
|
|
393
|
-
}
|
|
394
|
-
const remainder = total % 11;
|
|
395
|
-
return remainder <= 1 ? 0 : 11 - remainder;
|
|
396
|
-
}
|
|
397
364
|
function _computeEsIdCheck(num) {
|
|
398
365
|
return "TRWAGMYFPDXBNJZSQVHLCKE"[num % 23];
|
|
399
366
|
}
|
|
@@ -405,11 +372,6 @@ async function generateFPEToken(rawText, entityType = "UNKNOWN") {
|
|
|
405
372
|
else if (_SSN_RE.test(text)) type = "US_SSN";
|
|
406
373
|
else if (_CC_RE.test(text)) type = "CREDIT_CARD";
|
|
407
374
|
else if (_ROUTING_RE.test(text)) type = "US_ROUTING_NUMBER";
|
|
408
|
-
else if (_TCID_RE.test(text)) type = "TR_TCID";
|
|
409
|
-
else if (_SAUDI_NID_RE.test(text)) type = "SA_NATIONAL_ID";
|
|
410
|
-
else if (_UAE_EID_RE.test(text)) type = "UAE_EMIRATES_ID";
|
|
411
|
-
else if (_CN_ID_RE.test(text)) type = "CN_ID";
|
|
412
|
-
else if (_JA_ID_RE.test(text)) type = "JA_ID";
|
|
413
375
|
else if (_ES_ID_RE.test(text)) type = "ES_DNI";
|
|
414
376
|
else if (_IBAN_RE.test(text)) type = "INTL_BANK_IBAN";
|
|
415
377
|
else if (_PHONE_RE.test(text)) type = "PHONE_NUMBER";
|
|
@@ -436,35 +398,10 @@ async function generateFPEToken(rawText, entityType = "UNKNOWN") {
|
|
|
436
398
|
if (type === "US_ROUTING_NUMBER" || type === "US_ABA_ROUTING") {
|
|
437
399
|
return `000000${await _hmacDigits(text, 3)}`;
|
|
438
400
|
}
|
|
439
|
-
if (type === "TR_TCID") {
|
|
440
|
-
const core = await _hmacDigits(text, 4);
|
|
441
|
-
const partial = `990000${core}`;
|
|
442
|
-
let sum1_10 = 0;
|
|
443
|
-
for (let i6 = 0; i6 < partial.length; i6++) sum1_10 += parseInt(partial[i6], 10);
|
|
444
|
-
const d11Raw = sum1_10 % 10;
|
|
445
|
-
const d11 = d11Raw % 2 === 0 ? d11Raw : (d11Raw + 1) % 10;
|
|
446
|
-
return `${partial}${d11}`;
|
|
447
|
-
}
|
|
448
|
-
if (type === "SA_NATIONAL_ID") {
|
|
449
|
-
return `100000${await _hmacDigits(text, 4)}`;
|
|
450
|
-
}
|
|
451
|
-
if (type === "UAE_EMIRATES_ID") {
|
|
452
|
-
const base = `7840000${await _hmacDigits(text, 7)}`;
|
|
453
|
-
const checkDig = _computeLuhnDigit(base);
|
|
454
|
-
return `784-0000-${base.slice(7, 14)}-${checkDig}`;
|
|
455
|
-
}
|
|
456
401
|
if (type === "INTL_BANK_IBAN" || type === "IBAN_CODE") {
|
|
457
402
|
const countryCode = text.length >= 2 && /[a-zA-Z]{2}/.test(text.slice(0, 2)) ? text.slice(0, 2).toUpperCase() : "US";
|
|
458
403
|
return `${countryCode}00${(await _hmacHex(text, 8)).toUpperCase()}`;
|
|
459
404
|
}
|
|
460
|
-
if (type === "CN_ID") {
|
|
461
|
-
const base = `88000019900101${await _hmacDigits(text, 3)}`;
|
|
462
|
-
return base + _computeCnIdCheck(base);
|
|
463
|
-
}
|
|
464
|
-
if (type === "JA_ID") {
|
|
465
|
-
const base = `000000${await _hmacDigits(text, 5)}`;
|
|
466
|
-
return base + _computeJaIdCheck(base).toString();
|
|
467
|
-
}
|
|
468
405
|
if (type === "ES_DNI") {
|
|
469
406
|
const digits = `000${await _hmacDigits(text, 5)}`;
|
|
470
407
|
return digits + _computeEsIdCheck(parseInt(digits, 10));
|
|
@@ -484,7 +421,7 @@ async function generateFPEToken(rawText, entityType = "UNKNOWN") {
|
|
|
484
421
|
}
|
|
485
422
|
return `[TKN-${await _hmacHex(text)}]`;
|
|
486
423
|
}
|
|
487
|
-
var _masterKey, _EMAIL_RE, _PHONE_RE, _SSN_RE, _CC_RE, _ROUTING_RE,
|
|
424
|
+
var _masterKey, _EMAIL_RE, _PHONE_RE, _SSN_RE, _CC_RE, _ROUTING_RE, _ES_ID_RE, _IBAN_RE, _FIRST_NAMES, _LAST_NAMES, _CITIES;
|
|
488
425
|
var init_fpe = __esm({
|
|
489
426
|
"src/core/fpe.ts"() {
|
|
490
427
|
init_config();
|
|
@@ -497,13 +434,8 @@ var init_fpe = __esm({
|
|
|
497
434
|
_SSN_RE = /^\d{3}-\d{2}-\d{4}$/;
|
|
498
435
|
_CC_RE = /^(?:\d{4}[ \-]?){3}\d{4}$/;
|
|
499
436
|
_ROUTING_RE = /^\d{9}$/;
|
|
500
|
-
_TCID_RE = /^[1-9]\d{9}[02468]$/;
|
|
501
|
-
_SAUDI_NID_RE = /^1\d{9}$/;
|
|
502
|
-
_UAE_EID_RE = /^784-\d{4}-\d{7}-\d$/;
|
|
503
|
-
_IBAN_RE = /^[A-Z]{2}\d{2}[A-Z0-9]{4,30}$/;
|
|
504
|
-
_CN_ID_RE = /^[1-9]\d{5}(?:18|19|20)\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[12]\d|3[01])\d{3}[0-9Xx]$/;
|
|
505
|
-
_JA_ID_RE = /^\d{12}$/;
|
|
506
437
|
_ES_ID_RE = /^(?:\d{8}[A-Z]|[XYZ]\d{7}[A-Z])$/;
|
|
438
|
+
_IBAN_RE = /^[A-Z]{2}\d{2}[A-Z0-9]{4,30}$/;
|
|
507
439
|
_FIRST_NAMES = ["Taylor", "Jordan", "Casey", "Morgan", "Riley", "Avery", "Rowan", "Quinn", "Charlie", "Peyton", "Blake", "Dakota", "Reese", "Skyler", "Finley", "Eden", "Harley", "Rory", "Emerson", "Remi"];
|
|
508
440
|
_LAST_NAMES = ["Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez", "Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin"];
|
|
509
441
|
_CITIES = ["London", "Paris", "Berlin", "Tokyo", "Rome", "Madrid", "Vienna", "Sydney", "Toronto", "Chicago", "Seattle", "Austin", "Boston", "Denver", "Dallas", "Miami", "Seoul", "Dubai", "Mumbai", "Cairo"];
|
|
@@ -43559,19 +43491,8 @@ var SCRIPT_SIGNATURES; exports.LanguageContextResolver = void 0;
|
|
|
43559
43491
|
var init_assessor = __esm({
|
|
43560
43492
|
"src/core/dlp/assessor.ts"() {
|
|
43561
43493
|
SCRIPT_SIGNATURES = [
|
|
43562
|
-
// CJK / East-Asian — checked first because they are unambiguous
|
|
43563
|
-
{ tag: "zh", pattern: /[\u4e00-\u9fff\u3400-\u4dbf]/g },
|
|
43564
|
-
{ tag: "ja", pattern: /[\u3040-\u309f\u30a0-\u30ff\u31f0-\u31ff]/g },
|
|
43565
|
-
// Arabic script — covers Standard Arabic, Urdu overlap, etc.
|
|
43566
|
-
{ tag: "ar", pattern: /[\u0600-\u06ff\u0750-\u077f\u08a0-\u08ff\ufb50-\ufdff\ufe70-\ufeff]/g },
|
|
43567
|
-
// Turkish — distinguished by dotless-i (ı), soft-g (ğ), ş, and cedilla ç
|
|
43568
|
-
{ tag: "tr", pattern: /[ğıİşŞ]/g },
|
|
43569
|
-
// German — umlauts and Eszett
|
|
43570
|
-
{ tag: "de", pattern: /[äöüÄÖÜß]/g },
|
|
43571
43494
|
// Spanish — ñ and inverted punctuation
|
|
43572
|
-
{ tag: "es", pattern: /[ñÑ¡¿]/g }
|
|
43573
|
-
// French — cedilla, accented vowels with circumflex / diaeresis
|
|
43574
|
-
{ tag: "fr", pattern: /[àâçéèêëïîôùûüÿœæ]/gi }
|
|
43495
|
+
{ tag: "es", pattern: /[ñÑ¡¿]/g }
|
|
43575
43496
|
];
|
|
43576
43497
|
exports.LanguageContextResolver = class {
|
|
43577
43498
|
constructor(charThreshold = 1) {
|
|
@@ -43635,28 +43556,6 @@ var init_registry = __esm({
|
|
|
43635
43556
|
es: [
|
|
43636
43557
|
/\b[A-Z][a-záéíóúñ\-\']+ [A-Z][a-záéíóúñ\-\']+(?:\s+[A-Z][a-záéíóúñ\-\']+)?\b/g,
|
|
43637
43558
|
/\b(?:Sr|Sra|Srta)\.?\s+[A-Z][a-záéíóúñ\-\']+\b/g
|
|
43638
|
-
],
|
|
43639
|
-
fr: [
|
|
43640
|
-
/\b[A-Z][a-zàâçéèêëïîôùûü\-\']+ [A-Z][a-zàâçéèêëïîôùûü\-\']+\b/g,
|
|
43641
|
-
/\b(?:M|Mme|Mlle)\.?\s+[A-Z][a-zàâçéèêëïîôùûü\-\+\']+\b/g
|
|
43642
|
-
],
|
|
43643
|
-
de: [
|
|
43644
|
-
/\b[A-Z][a-zäöüß\-\']+ [A-Z][a-zäöüß\-\']+\b/g,
|
|
43645
|
-
/\b(?:Herr|Frau)\.?\s+[A-Z][a-zäöüß\-\']+\b/g
|
|
43646
|
-
],
|
|
43647
|
-
tr: [
|
|
43648
|
-
/\b[A-ZÇĞİÖŞÜ][a-zçğıöşü]+ [A-ZÇĞİÖŞÜ][a-zçğıöşü]+\b/g,
|
|
43649
|
-
/\b(?:Bay|Bayan|Sayın)\.?\s+[A-ZÇĞİÖŞÜ][a-zçğıöşü]+\b/g
|
|
43650
|
-
],
|
|
43651
|
-
ar: [
|
|
43652
|
-
/[\u0621-\u064a][\u0600-\u06ff]+ [\u0621-\u064a][\u0600-\u06ff]+/g,
|
|
43653
|
-
/(?:أبو|أم|ابن|بنت)\s+[\u0621-\u064a][\u0600-\u06ff]+/gi
|
|
43654
|
-
],
|
|
43655
|
-
ja: [
|
|
43656
|
-
/\b[A-Z][a-z]+(?:moto|yama|kawa|mura|ta|da|shi|no)\s+[A-Z][a-z]+\b/g
|
|
43657
|
-
],
|
|
43658
|
-
zh: [
|
|
43659
|
-
/\b[A-Z][a-z]{1,3}\s+[A-Z][a-z]+\b/g
|
|
43660
43559
|
]
|
|
43661
43560
|
};
|
|
43662
43561
|
LOCALE_ADDRESS_RULES = {
|
|
@@ -43664,26 +43563,8 @@ var init_registry = __esm({
|
|
|
43664
43563
|
/\b\d{1,5}\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\s+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way)\b/g,
|
|
43665
43564
|
/\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*,\s*[A-Z]{2}\s+\d{5}(?:-\d{4})?\b/g
|
|
43666
43565
|
],
|
|
43667
|
-
|
|
43668
|
-
/\b
|
|
43669
|
-
],
|
|
43670
|
-
de: [
|
|
43671
|
-
/\b[A-ZÄÖÜa-zäöüß]+(?:straße|strasse|weg|gasse|platz)\s+\d{1,4}\b/g
|
|
43672
|
-
],
|
|
43673
|
-
tr: [
|
|
43674
|
-
/\b[A-ZÇĞİÖŞÜa-zçğıöşü]+\s+(?:Cad|Sok|Mah)\.?\s+/gi,
|
|
43675
|
-
/\b\d{5}\s+[A-ZÇĞİÖŞÜa-zçğıöşü]+\/[A-ZÇĞİÖŞÜa-zçğıöşü]+\b/g
|
|
43676
|
-
],
|
|
43677
|
-
ar: [
|
|
43678
|
-
/شارع\s+[\u0600-\u06ff]+/g,
|
|
43679
|
-
/حي\s+[\u0600-\u06ff]+/g,
|
|
43680
|
-
/(?:ص\.ب|P\.?O\.?\s*Box)\s*\d{3,6}/gi
|
|
43681
|
-
],
|
|
43682
|
-
uk_postcode: [
|
|
43683
|
-
/\b[A-Z]{1,2}\d{1,2}[A-Z]?\s*\d[A-Z]{2}\b/g
|
|
43684
|
-
],
|
|
43685
|
-
ca_postal: [
|
|
43686
|
-
/\b[A-Z]\d[A-Z]\s*\d[A-Z]\d\b/g
|
|
43566
|
+
es: [
|
|
43567
|
+
/\b(?:Calle|Carrera|Avenida|Paseo|Plaza)\s+[A-ZÀ-ÖØ-Ý][a-zà-öø-ÿ]+\b/gi
|
|
43687
43568
|
]
|
|
43688
43569
|
};
|
|
43689
43570
|
RAW_PATTERNS = [
|
|
@@ -43902,68 +43783,6 @@ var init_registry = __esm({
|
|
|
43902
43783
|
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43903
43784
|
"ca_sin"
|
|
43904
43785
|
],
|
|
43905
|
-
[
|
|
43906
|
-
"FR_INSEE_NUM",
|
|
43907
|
-
"\\b[12]\\d{2}[01]\\d\\d{8}\\d{2}\\b",
|
|
43908
|
-
["insee", "s\xE9curit\xE9 sociale", "france", "num\xE9ro"],
|
|
43909
|
-
0.88,
|
|
43910
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43911
|
-
"fr_insee"
|
|
43912
|
-
],
|
|
43913
|
-
[
|
|
43914
|
-
"DE_STEUER_ID",
|
|
43915
|
-
"\\b\\d{2}\\s?\\d{3}\\s?\\d{3}\\s?\\d{3}\\b",
|
|
43916
|
-
["steuer", "steuernummer", "finanzamt", "deutschland"],
|
|
43917
|
-
0.87,
|
|
43918
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43919
|
-
null
|
|
43920
|
-
],
|
|
43921
|
-
[
|
|
43922
|
-
"TR_TCID",
|
|
43923
|
-
"\\b[1-9]\\d{9}[02468]\\b",
|
|
43924
|
-
["tc", "kimlik", "vatanda\u015Fl\u0131k", "n\xFCfus", "t\xFCrkiye"],
|
|
43925
|
-
0.92,
|
|
43926
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43927
|
-
"tcid"
|
|
43928
|
-
],
|
|
43929
|
-
[
|
|
43930
|
-
"SA_NATIONAL_ID",
|
|
43931
|
-
"\\b1\\d{9}\\b",
|
|
43932
|
-
["\u0647\u0648\u064A\u0629", "\u0631\u0642\u0645 \u0627\u0644\u0647\u0648\u064A\u0629", "saudi", "\u0648\u0637\u0646\u064A\u0629", "identity"],
|
|
43933
|
-
0.91,
|
|
43934
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43935
|
-
"saudi_nid"
|
|
43936
|
-
],
|
|
43937
|
-
[
|
|
43938
|
-
"UAE_EMIRATES_ID",
|
|
43939
|
-
"784-\\d{4}-\\d{7}-\\d",
|
|
43940
|
-
["emirates", "\u0647\u0648\u064A\u0629", "uae", "emirati", "identity"],
|
|
43941
|
-
0.93,
|
|
43942
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43943
|
-
"luhn",
|
|
43944
|
-
true,
|
|
43945
|
-
["*", "ar"]
|
|
43946
|
-
],
|
|
43947
|
-
[
|
|
43948
|
-
"CN_ID",
|
|
43949
|
-
"[1-9]\\d{5}(?:18|19|20)\\d{2}(?:0[1-9]|1[0-2])(0[1-9]|[12]\\d|3[01])\\d{3}[0-9Xx]",
|
|
43950
|
-
["\u8EAB\u4EFD\u8BC1", "\u8EAB\u4EFD\u53F7\u7801", "id card", "china"],
|
|
43951
|
-
0.95,
|
|
43952
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43953
|
-
"cn_id",
|
|
43954
|
-
true,
|
|
43955
|
-
["*", "zh"]
|
|
43956
|
-
],
|
|
43957
|
-
[
|
|
43958
|
-
"JA_MY_NUMBER",
|
|
43959
|
-
"\\d{12}",
|
|
43960
|
-
["\u30DE\u30A4\u30CA\u30F3\u30D0\u30FC", "\u500B\u4EBA\u756A\u53F7", "my number", "japan"],
|
|
43961
|
-
0.94,
|
|
43962
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43963
|
-
"ja_id",
|
|
43964
|
-
true,
|
|
43965
|
-
["*", "ja"]
|
|
43966
|
-
],
|
|
43967
43786
|
[
|
|
43968
43787
|
"ES_DNI",
|
|
43969
43788
|
"(?:\\d{8}[A-Z]|[XYZ]\\d{7}[A-Z])",
|
|
@@ -43974,16 +43793,6 @@ var init_registry = __esm({
|
|
|
43974
43793
|
true,
|
|
43975
43794
|
["*", "es"]
|
|
43976
43795
|
],
|
|
43977
|
-
[
|
|
43978
|
-
"INTL_PASSPORT",
|
|
43979
|
-
"[A-Z0-9]{6,12}",
|
|
43980
|
-
["passport", "travel", "immigration", "visa"],
|
|
43981
|
-
0.6,
|
|
43982
|
-
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43983
|
-
null,
|
|
43984
|
-
true,
|
|
43985
|
-
["*"]
|
|
43986
|
-
],
|
|
43987
43796
|
// ── CORPORATE ──────────────────────────────────────────────────────
|
|
43988
43797
|
[
|
|
43989
43798
|
"CORP_EMPLOYEE_ID",
|
|
@@ -43999,7 +43808,7 @@ var init_registry = __esm({
|
|
|
43999
43808
|
this.catalogue = /* @__PURE__ */ new Map();
|
|
44000
43809
|
this.localeCategoryRegexMap = /* @__PURE__ */ new Map();
|
|
44001
43810
|
this.buildCatalogue(loadGroups ?? null);
|
|
44002
|
-
for (const loc of ["*", "en", "es"
|
|
43811
|
+
for (const loc of ["*", "en", "es"]) {
|
|
44003
43812
|
this.compileForLocale(loc);
|
|
44004
43813
|
}
|
|
44005
43814
|
}
|
|
@@ -44177,47 +43986,6 @@ function checkIpv4Octets(raw) {
|
|
|
44177
43986
|
}
|
|
44178
43987
|
return true;
|
|
44179
43988
|
}
|
|
44180
|
-
function checkTcidNumber(raw) {
|
|
44181
|
-
const digitsStr = raw.replace(/\D/g, "");
|
|
44182
|
-
if (digitsStr.length !== 11) return false;
|
|
44183
|
-
const d6 = digitsStr.split("").map(Number);
|
|
44184
|
-
if (d6[0] === 0) return false;
|
|
44185
|
-
if (d6[10] % 2 !== 0) return false;
|
|
44186
|
-
const oddSum = d6[0] + d6[2] + d6[4] + d6[6] + d6[8];
|
|
44187
|
-
const evenSum = d6[1] + d6[3] + d6[5] + d6[7];
|
|
44188
|
-
const computedD10 = ((oddSum * 7 - evenSum) % 10 + 10) % 10;
|
|
44189
|
-
if (computedD10 !== d6[9]) return false;
|
|
44190
|
-
const firstTenSum = d6.slice(0, 10).reduce((a6, b6) => a6 + b6, 0);
|
|
44191
|
-
if (firstTenSum % 10 !== d6[10]) return false;
|
|
44192
|
-
return true;
|
|
44193
|
-
}
|
|
44194
|
-
function checkSaudiNid(raw) {
|
|
44195
|
-
const digitsStr = raw.replace(/\D/g, "");
|
|
44196
|
-
if (digitsStr.length !== 10) return false;
|
|
44197
|
-
const d6 = digitsStr.split("").map(Number);
|
|
44198
|
-
if (d6[0] !== 1) return false;
|
|
44199
|
-
let total = 0;
|
|
44200
|
-
for (let idx = 0; idx < 10; idx++) {
|
|
44201
|
-
let val = d6[idx];
|
|
44202
|
-
if (idx % 2 === 0) {
|
|
44203
|
-
val *= 2;
|
|
44204
|
-
if (val > 9) val -= 9;
|
|
44205
|
-
}
|
|
44206
|
-
total += val;
|
|
44207
|
-
}
|
|
44208
|
-
return total % 10 === 0;
|
|
44209
|
-
}
|
|
44210
|
-
function checkFrInsee(raw) {
|
|
44211
|
-
let cleaned = raw.replace(/ /g, "").toUpperCase();
|
|
44212
|
-
if (cleaned.length !== 15) return false;
|
|
44213
|
-
cleaned = cleaned.replace(/2A/g, "19").replace(/2B/g, "18");
|
|
44214
|
-
if (!/^\d+$/.test(cleaned)) return false;
|
|
44215
|
-
const baseNumberStr = cleaned.slice(0, 13);
|
|
44216
|
-
const expectedKey = parseInt(cleaned.slice(13), 10);
|
|
44217
|
-
const baseNumber = BigInt(baseNumberStr);
|
|
44218
|
-
const calculatedKey = 97n - baseNumber % 97n;
|
|
44219
|
-
return Number(calculatedKey) === expectedKey;
|
|
44220
|
-
}
|
|
44221
43989
|
function checkCaSin(raw) {
|
|
44222
43990
|
const digits = raw.replace(/\D/g, "");
|
|
44223
43991
|
if (digits.length !== 9) return false;
|
|
@@ -44237,30 +44005,6 @@ function checkUkNino(raw) {
|
|
|
44237
44005
|
if (cleaned.length !== 9) return false;
|
|
44238
44006
|
return UK_NINO_REGEX.test(cleaned);
|
|
44239
44007
|
}
|
|
44240
|
-
function checkCnId(raw) {
|
|
44241
|
-
const cleaned = raw.replace(/[^0-9X]/gi, "").toUpperCase();
|
|
44242
|
-
if (cleaned.length !== 18) return false;
|
|
44243
|
-
const weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2];
|
|
44244
|
-
const checkDigits = "10X98765432";
|
|
44245
|
-
let total = 0;
|
|
44246
|
-
for (let i6 = 0; i6 < 17; i6++) {
|
|
44247
|
-
total += parseInt(cleaned[i6], 10) * weights[i6];
|
|
44248
|
-
}
|
|
44249
|
-
return cleaned[17] === checkDigits[total % 11];
|
|
44250
|
-
}
|
|
44251
|
-
function checkJaId(raw) {
|
|
44252
|
-
const digitsStr = raw.replace(/\D/g, "");
|
|
44253
|
-
if (digitsStr.length !== 12) return false;
|
|
44254
|
-
const d6 = digitsStr.split("").map(Number);
|
|
44255
|
-
const weights = [6, 5, 4, 3, 2, 7, 6, 5, 4, 3, 2];
|
|
44256
|
-
let total = 0;
|
|
44257
|
-
for (let i6 = 0; i6 < 11; i6++) {
|
|
44258
|
-
total += d6[i6] * weights[i6];
|
|
44259
|
-
}
|
|
44260
|
-
const remainder = total % 11;
|
|
44261
|
-
const expected = remainder <= 1 ? 0 : 11 - remainder;
|
|
44262
|
-
return d6[11] === expected;
|
|
44263
|
-
}
|
|
44264
44008
|
function checkEsId(raw) {
|
|
44265
44009
|
const cleaned = raw.replace(/[\s-]/g, "").toUpperCase();
|
|
44266
44010
|
if (cleaned.length !== 9) return false;
|
|
@@ -44391,13 +44135,8 @@ var init_handlers = __esm({
|
|
|
44391
44135
|
vin_format: checkVinFormat,
|
|
44392
44136
|
btc_format: checkBtcFormat,
|
|
44393
44137
|
ipv4: checkIpv4Octets,
|
|
44394
|
-
tcid: checkTcidNumber,
|
|
44395
|
-
saudi_nid: checkSaudiNid,
|
|
44396
|
-
fr_insee: checkFrInsee,
|
|
44397
44138
|
ca_sin: checkCaSin,
|
|
44398
44139
|
uk_nino: checkUkNino,
|
|
44399
|
-
cn_id: checkCnId,
|
|
44400
|
-
ja_id: checkJaId,
|
|
44401
44140
|
es_id: checkEsId
|
|
44402
44141
|
};
|
|
44403
44142
|
exports.DLPValidationEngine = class {
|