mask-privacy 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -87,12 +87,6 @@ Mask prevents the misidentification of real data as tokens by using universally
87
87
 
88
88
  This prefix-based approach ensures that the SDK does not inadvertently process valid PII as an existing token.
89
89
 
90
- Additional collision-proof prefixes for international identifiers:
91
- * Turkish TCID tokens use the `990000` prefix (no valid Kimlik number starts with `99`).
92
- * Saudi NID tokens use the `100000` prefix (length-constrained to avoid overlap with real IDs).
93
- * UAE Emirates ID tokens use the `784-0000-` prefix (zeroed sub-fields are structurally invalid).
94
- * IBAN tokens zero the check digits (`XX00...`), which always fails ISO 7064 Mod-97 verification.
95
-
96
90
  ### 4. Enterprise Async Support
97
91
  Mask is built from the ground up for high-concurrency Node.js environments. All core operations are asynchronous and promised-based. Calling `encode()`, `decode()`, or `scanAndTokenize()` allows your event loop to remain unblocked while handling PII tokenization tasks.
98
92
 
@@ -135,7 +129,7 @@ Mask includes the ability to detokenize PII embedded within larger text blocks (
135
129
 
136
130
  ## Multilingual PII Detection (Waterfall Pipeline)
137
131
 
138
- Mask is built for the global enterprise. While many privacy tools are English-centric, the TypeScript SDK implements a **3-Tier Waterfall Detection** strategy designed for high-performance PII detection across 8 major languages using local ONNX models.
132
+ Mask is built for the global enterprise. The TypeScript SDK implements a **3-Tier Waterfall Detection** strategy for high-precision PII detection in **English and Spanish** using local ONNX models.
139
133
 
140
134
  ### Supported Language Matrix
141
135
 
@@ -145,12 +139,6 @@ Mask provides first-class support for the following languages:
145
139
  | :--- | :--- | :--- | :--- |
146
140
  | **English** | `en` | ✅ Full | DistilBERT (Simple) |
147
141
  | **Spanish** | `es` | ✅ Full | BERT Multilingual |
148
- | **French** | `fr` | ✅ Full | BERT Multilingual |
149
- | **German** | `de` | ✅ Full | BERT Multilingual |
150
- | **Turkish** | `tr` | ✅ Full | BERT Multilingual |
151
- | **Arabic** | `ar` | ✅ Full | BERT Multilingual |
152
- | **Japanese** | `ja` | ✅ Full | BERT Multilingual |
153
- | **Chinese** | `zh` | ✅ Full | BERT Multilingual |
154
142
 
155
143
  ### How the Waterfall Works: The Excising Mechanism
156
144
 
@@ -165,11 +153,11 @@ To maintain high performance, the TypeScript SDK does not simply run three separ
165
153
 
166
154
  ### Configuration & Environment Variables
167
155
 
168
- Configure your multilingual environment using standard variables. These are parsed at runtime when the `LocalTransformersScanner` is initialized.
156
+ Configure your language environment using standard variables.
169
157
 
170
158
  | Variable | Default | Description |
171
159
  | :--- | :--- | :--- |
172
- | `MASK_LANGUAGES` | `en` | Comma-separated list of languages (e.g., `en,es,fr,ar`). |
160
+ | `MASK_LANGUAGES` | `en` | Comma-separated language codes. Supported: `en`, `es`. |
173
161
  | `MASK_NLP_MODEL` | *(varies)* | Override the default model (e.g., `Xenova/bert-base-multilingual-cased-ner-hrl`). |
174
162
  | `MASK_MODEL_CACHE_DIR` | `~/.cache` | Local directory for storing serialized ONNX models. |
175
163
  | `MASK_NLP_MAX_WORKERS` | `4` | Number of worker processes/threads for NLP analysis. |
@@ -221,8 +209,8 @@ The TypeScript SDK manages AI models automatically via **Transformers.js**. For
221
209
  ```bash
222
210
  npm install @huggingface/transformers # Required extra
223
211
 
224
- # Pre-cache models for your required languages
225
- export MASK_LANGUAGES="en,es,fr"
212
+ # Pre-cache models for English and Spanish
213
+ export MASK_LANGUAGES="en,es"
226
214
  npx mask-privacy cache-models
227
215
  ```
228
216
 
package/dist/index.d.mts CHANGED
@@ -312,14 +312,8 @@ declare class MaskClient {
312
312
  * Supported language tags:
313
313
  * en — English (default / Latin-only fallback)
314
314
  * es — Spanish
315
- * fr — French
316
- * de — German
317
- * tr — Turkish
318
- * ar — Arabic
319
- * zh — Chinese
320
- * ja — Japanese
321
315
  */
322
- type LanguageTag = "en" | "es" | "fr" | "de" | "tr" | "ar" | "zh" | "ja";
316
+ type LanguageTag = "en" | "es";
323
317
  interface LanguageBreakdown {
324
318
  language: LanguageTag;
325
319
  breakdown: Record<string, number>;
@@ -332,8 +326,8 @@ interface LanguageBreakdown {
332
326
  * @example
333
327
  * ```ts
334
328
  * const resolver = new LanguageContextResolver();
335
- * const tag = resolver.resolve("Merhaba, TC Kimlik Numaram 12345678901");
336
- * // tag === "tr"
329
+ * const tag = resolver.resolve("Hola, mi DNI es 12345678Z");
330
+ * // tag === "es"
337
331
  * ```
338
332
  */
339
333
  declare class LanguageContextResolver {
@@ -457,8 +451,8 @@ interface ScoreInput {
457
451
  * baseRisk: 0.92,
458
452
  * matchStart: 10,
459
453
  * matchEnd: 21,
460
- * fullText: "TC Kimlik No: 10000000146",
461
- * proximityTerms: new Set(["kimlik", "tc"]),
454
+ * fullText: "Mi número de DNI es 12345678Z",
455
+ * proximityTerms: new Set(["dni", "número"]),
462
456
  * validatorPassed: true,
463
457
  * });
464
458
  * // score === 0.99 (validator override)
package/dist/index.d.ts CHANGED
@@ -312,14 +312,8 @@ declare class MaskClient {
312
312
  * Supported language tags:
313
313
  * en — English (default / Latin-only fallback)
314
314
  * es — Spanish
315
- * fr — French
316
- * de — German
317
- * tr — Turkish
318
- * ar — Arabic
319
- * zh — Chinese
320
- * ja — Japanese
321
315
  */
322
- type LanguageTag = "en" | "es" | "fr" | "de" | "tr" | "ar" | "zh" | "ja";
316
+ type LanguageTag = "en" | "es";
323
317
  interface LanguageBreakdown {
324
318
  language: LanguageTag;
325
319
  breakdown: Record<string, number>;
@@ -332,8 +326,8 @@ interface LanguageBreakdown {
332
326
  * @example
333
327
  * ```ts
334
328
  * const resolver = new LanguageContextResolver();
335
- * const tag = resolver.resolve("Merhaba, TC Kimlik Numaram 12345678901");
336
- * // tag === "tr"
329
+ * const tag = resolver.resolve("Hola, mi DNI es 12345678Z");
330
+ * // tag === "es"
337
331
  * ```
338
332
  */
339
333
  declare class LanguageContextResolver {
@@ -457,8 +451,8 @@ interface ScoreInput {
457
451
  * baseRisk: 0.92,
458
452
  * matchStart: 10,
459
453
  * matchEnd: 21,
460
- * fullText: "TC Kimlik No: 10000000146",
461
- * proximityTerms: new Set(["kimlik", "tc"]),
454
+ * fullText: "Mi número de DNI es 12345678Z",
455
+ * proximityTerms: new Set(["dni", "número"]),
462
456
  * validatorPassed: true,
463
457
  * });
464
458
  * // score === 0.99 (validator override)
package/dist/index.js CHANGED
@@ -274,21 +274,6 @@ function looksLikeToken(value) {
274
274
  if (v7.startsWith("000000") && v7.length === 9) {
275
275
  return true;
276
276
  }
277
- if (v7.startsWith("784-0000-") && v7.length === 18) {
278
- return true;
279
- }
280
- if (v7.length === 11 && v7.startsWith("990000") && /^\d+$/.test(v7) && parseInt(v7[v7.length - 1], 10) % 2 === 0) {
281
- return true;
282
- }
283
- if (v7.length === 10 && v7.startsWith("100000") && /^\d+$/.test(v7)) {
284
- return true;
285
- }
286
- if (v7.length === 18 && v7.startsWith("88000019900101")) {
287
- return true;
288
- }
289
- if (v7.length === 12 && v7.startsWith("000000")) {
290
- return true;
291
- }
292
277
  if (v7.length === 9 && v7.startsWith("000") && /[A-Z]$/.test(v7)) {
293
278
  return true;
294
279
  }
@@ -307,7 +292,7 @@ var TOKEN_PATTERN;
307
292
  var init_fpe_utils = __esm({
308
293
  "src/core/fpe_utils.ts"() {
309
294
  TOKEN_PATTERN = new RegExp(
310
- "tkn-[a-f0-9]{8,64}@[A-Za-z0-9.\\-]+\\.[A-Za-z]{2,}|\\+[1-9]\\d{0,3}-555-\\d{7}|000-00-\\d{4}|4000-0000-0000-\\d{4}|000000\\d{3}|990000\\d{4}[02468]|100000\\d{4}|784-0000-\\d{7}-\\d|88000019900101\\d{3}[0-9X]|000000\\d{6}|000\\d{5}[A-Z]|[A-Z]{2}00[A-F0-9]{4,16}|<(?:PER|LOC|ORG):[^>]+>|\\[TKN-[a-f0-9]{8,64}\\]",
295
+ "tkn-[a-f0-9]{8,64}@[A-Za-z0-9.\\-]+\\.[A-Za-z]{2,}|\\+[1-9]\\d{0,3}-555-\\d{7}|000-00-\\d{4}|4000-0000-0000-\\d{4}|000000\\d{3}|000\\d{5}[A-Z]|[A-Z]{2}00[A-F0-9]{4,16}|<(?:PER|LOC|ORG):[^>]+>|\\[TKN-[a-f0-9]{8,64}\\]",
311
296
  // Opaque
312
297
  "g"
313
298
  );
@@ -376,24 +361,6 @@ function _computeLuhnDigit(partialNum) {
376
361
  }
377
362
  return ((10 - sum % 10) % 10).toString();
378
363
  }
379
- function _computeCnIdCheck(partial) {
380
- const weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2];
381
- const checkDigits = "10X98765432";
382
- let total = 0;
383
- for (let i6 = 0; i6 < 17; i6++) {
384
- total += parseInt(partial[i6], 10) * weights[i6];
385
- }
386
- return checkDigits[total % 11];
387
- }
388
- function _computeJaIdCheck(partial) {
389
- const weights = [6, 5, 4, 3, 2, 7, 6, 5, 4, 3, 2];
390
- let total = 0;
391
- for (let i6 = 0; i6 < 11; i6++) {
392
- total += parseInt(partial[i6], 10) * weights[i6];
393
- }
394
- const remainder = total % 11;
395
- return remainder <= 1 ? 0 : 11 - remainder;
396
- }
397
364
  function _computeEsIdCheck(num) {
398
365
  return "TRWAGMYFPDXBNJZSQVHLCKE"[num % 23];
399
366
  }
@@ -405,11 +372,6 @@ async function generateFPEToken(rawText, entityType = "UNKNOWN") {
405
372
  else if (_SSN_RE.test(text)) type = "US_SSN";
406
373
  else if (_CC_RE.test(text)) type = "CREDIT_CARD";
407
374
  else if (_ROUTING_RE.test(text)) type = "US_ROUTING_NUMBER";
408
- else if (_TCID_RE.test(text)) type = "TR_TCID";
409
- else if (_SAUDI_NID_RE.test(text)) type = "SA_NATIONAL_ID";
410
- else if (_UAE_EID_RE.test(text)) type = "UAE_EMIRATES_ID";
411
- else if (_CN_ID_RE.test(text)) type = "CN_ID";
412
- else if (_JA_ID_RE.test(text)) type = "JA_ID";
413
375
  else if (_ES_ID_RE.test(text)) type = "ES_DNI";
414
376
  else if (_IBAN_RE.test(text)) type = "INTL_BANK_IBAN";
415
377
  else if (_PHONE_RE.test(text)) type = "PHONE_NUMBER";
@@ -436,35 +398,10 @@ async function generateFPEToken(rawText, entityType = "UNKNOWN") {
436
398
  if (type === "US_ROUTING_NUMBER" || type === "US_ABA_ROUTING") {
437
399
  return `000000${await _hmacDigits(text, 3)}`;
438
400
  }
439
- if (type === "TR_TCID") {
440
- const core = await _hmacDigits(text, 4);
441
- const partial = `990000${core}`;
442
- let sum1_10 = 0;
443
- for (let i6 = 0; i6 < partial.length; i6++) sum1_10 += parseInt(partial[i6], 10);
444
- const d11Raw = sum1_10 % 10;
445
- const d11 = d11Raw % 2 === 0 ? d11Raw : (d11Raw + 1) % 10;
446
- return `${partial}${d11}`;
447
- }
448
- if (type === "SA_NATIONAL_ID") {
449
- return `100000${await _hmacDigits(text, 4)}`;
450
- }
451
- if (type === "UAE_EMIRATES_ID") {
452
- const base = `7840000${await _hmacDigits(text, 7)}`;
453
- const checkDig = _computeLuhnDigit(base);
454
- return `784-0000-${base.slice(7, 14)}-${checkDig}`;
455
- }
456
401
  if (type === "INTL_BANK_IBAN" || type === "IBAN_CODE") {
457
402
  const countryCode = text.length >= 2 && /[a-zA-Z]{2}/.test(text.slice(0, 2)) ? text.slice(0, 2).toUpperCase() : "US";
458
403
  return `${countryCode}00${(await _hmacHex(text, 8)).toUpperCase()}`;
459
404
  }
460
- if (type === "CN_ID") {
461
- const base = `88000019900101${await _hmacDigits(text, 3)}`;
462
- return base + _computeCnIdCheck(base);
463
- }
464
- if (type === "JA_ID") {
465
- const base = `000000${await _hmacDigits(text, 5)}`;
466
- return base + _computeJaIdCheck(base).toString();
467
- }
468
405
  if (type === "ES_DNI") {
469
406
  const digits = `000${await _hmacDigits(text, 5)}`;
470
407
  return digits + _computeEsIdCheck(parseInt(digits, 10));
@@ -484,7 +421,7 @@ async function generateFPEToken(rawText, entityType = "UNKNOWN") {
484
421
  }
485
422
  return `[TKN-${await _hmacHex(text)}]`;
486
423
  }
487
- var _masterKey, _EMAIL_RE, _PHONE_RE, _SSN_RE, _CC_RE, _ROUTING_RE, _TCID_RE, _SAUDI_NID_RE, _UAE_EID_RE, _IBAN_RE, _CN_ID_RE, _JA_ID_RE, _ES_ID_RE, _FIRST_NAMES, _LAST_NAMES, _CITIES;
424
+ var _masterKey, _EMAIL_RE, _PHONE_RE, _SSN_RE, _CC_RE, _ROUTING_RE, _ES_ID_RE, _IBAN_RE, _FIRST_NAMES, _LAST_NAMES, _CITIES;
488
425
  var init_fpe = __esm({
489
426
  "src/core/fpe.ts"() {
490
427
  init_config();
@@ -497,13 +434,8 @@ var init_fpe = __esm({
497
434
  _SSN_RE = /^\d{3}-\d{2}-\d{4}$/;
498
435
  _CC_RE = /^(?:\d{4}[ \-]?){3}\d{4}$/;
499
436
  _ROUTING_RE = /^\d{9}$/;
500
- _TCID_RE = /^[1-9]\d{9}[02468]$/;
501
- _SAUDI_NID_RE = /^1\d{9}$/;
502
- _UAE_EID_RE = /^784-\d{4}-\d{7}-\d$/;
503
- _IBAN_RE = /^[A-Z]{2}\d{2}[A-Z0-9]{4,30}$/;
504
- _CN_ID_RE = /^[1-9]\d{5}(?:18|19|20)\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[12]\d|3[01])\d{3}[0-9Xx]$/;
505
- _JA_ID_RE = /^\d{12}$/;
506
437
  _ES_ID_RE = /^(?:\d{8}[A-Z]|[XYZ]\d{7}[A-Z])$/;
438
+ _IBAN_RE = /^[A-Z]{2}\d{2}[A-Z0-9]{4,30}$/;
507
439
  _FIRST_NAMES = ["Taylor", "Jordan", "Casey", "Morgan", "Riley", "Avery", "Rowan", "Quinn", "Charlie", "Peyton", "Blake", "Dakota", "Reese", "Skyler", "Finley", "Eden", "Harley", "Rory", "Emerson", "Remi"];
508
440
  _LAST_NAMES = ["Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez", "Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin"];
509
441
  _CITIES = ["London", "Paris", "Berlin", "Tokyo", "Rome", "Madrid", "Vienna", "Sydney", "Toronto", "Chicago", "Seattle", "Austin", "Boston", "Denver", "Dallas", "Miami", "Seoul", "Dubai", "Mumbai", "Cairo"];
@@ -43559,19 +43491,8 @@ var SCRIPT_SIGNATURES; exports.LanguageContextResolver = void 0;
43559
43491
  var init_assessor = __esm({
43560
43492
  "src/core/dlp/assessor.ts"() {
43561
43493
  SCRIPT_SIGNATURES = [
43562
- // CJK / East-Asian — checked first because they are unambiguous
43563
- { tag: "zh", pattern: /[\u4e00-\u9fff\u3400-\u4dbf]/g },
43564
- { tag: "ja", pattern: /[\u3040-\u309f\u30a0-\u30ff\u31f0-\u31ff]/g },
43565
- // Arabic script — covers Standard Arabic, Urdu overlap, etc.
43566
- { tag: "ar", pattern: /[\u0600-\u06ff\u0750-\u077f\u08a0-\u08ff\ufb50-\ufdff\ufe70-\ufeff]/g },
43567
- // Turkish — distinguished by dotless-i (ı), soft-g (ğ), ş, and cedilla ç
43568
- { tag: "tr", pattern: /[ğıİşŞ]/g },
43569
- // German — umlauts and Eszett
43570
- { tag: "de", pattern: /[äöüÄÖÜß]/g },
43571
43494
  // Spanish — ñ and inverted punctuation
43572
- { tag: "es", pattern: /[ñÑ¡¿]/g },
43573
- // French — cedilla, accented vowels with circumflex / diaeresis
43574
- { tag: "fr", pattern: /[àâçéèêëïîôùûüÿœæ]/gi }
43495
+ { tag: "es", pattern: /[ñÑ¡¿]/g }
43575
43496
  ];
43576
43497
  exports.LanguageContextResolver = class {
43577
43498
  constructor(charThreshold = 1) {
@@ -43635,28 +43556,6 @@ var init_registry = __esm({
43635
43556
  es: [
43636
43557
  /\b[A-Z][a-záéíóúñ\-\']+ [A-Z][a-záéíóúñ\-\']+(?:\s+[A-Z][a-záéíóúñ\-\']+)?\b/g,
43637
43558
  /\b(?:Sr|Sra|Srta)\.?\s+[A-Z][a-záéíóúñ\-\']+\b/g
43638
- ],
43639
- fr: [
43640
- /\b[A-Z][a-zàâçéèêëïîôùûü\-\']+ [A-Z][a-zàâçéèêëïîôùûü\-\']+\b/g,
43641
- /\b(?:M|Mme|Mlle)\.?\s+[A-Z][a-zàâçéèêëïîôùûü\-\+\']+\b/g
43642
- ],
43643
- de: [
43644
- /\b[A-Z][a-zäöüß\-\']+ [A-Z][a-zäöüß\-\']+\b/g,
43645
- /\b(?:Herr|Frau)\.?\s+[A-Z][a-zäöüß\-\']+\b/g
43646
- ],
43647
- tr: [
43648
- /\b[A-ZÇĞİÖŞÜ][a-zçğıöşü]+ [A-ZÇĞİÖŞÜ][a-zçğıöşü]+\b/g,
43649
- /\b(?:Bay|Bayan|Sayın)\.?\s+[A-ZÇĞİÖŞÜ][a-zçğıöşü]+\b/g
43650
- ],
43651
- ar: [
43652
- /[\u0621-\u064a][\u0600-\u06ff]+ [\u0621-\u064a][\u0600-\u06ff]+/g,
43653
- /(?:أبو|أم|ابن|بنت)\s+[\u0621-\u064a][\u0600-\u06ff]+/gi
43654
- ],
43655
- ja: [
43656
- /\b[A-Z][a-z]+(?:moto|yama|kawa|mura|ta|da|shi|no)\s+[A-Z][a-z]+\b/g
43657
- ],
43658
- zh: [
43659
- /\b[A-Z][a-z]{1,3}\s+[A-Z][a-z]+\b/g
43660
43559
  ]
43661
43560
  };
43662
43561
  LOCALE_ADDRESS_RULES = {
@@ -43664,26 +43563,8 @@ var init_registry = __esm({
43664
43563
  /\b\d{1,5}\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\s+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way)\b/g,
43665
43564
  /\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*,\s*[A-Z]{2}\s+\d{5}(?:-\d{4})?\b/g
43666
43565
  ],
43667
- fr: [
43668
- /\b\d{1,4}\s+(?:rue|avenue|boulevard|place|chemin)\s+[A-ZÀ-ÖØ-Ý][a-zà-öø-ÿ]+\b/gi
43669
- ],
43670
- de: [
43671
- /\b[A-ZÄÖÜa-zäöüß]+(?:straße|strasse|weg|gasse|platz)\s+\d{1,4}\b/g
43672
- ],
43673
- tr: [
43674
- /\b[A-ZÇĞİÖŞÜa-zçğıöşü]+\s+(?:Cad|Sok|Mah)\.?\s+/gi,
43675
- /\b\d{5}\s+[A-ZÇĞİÖŞÜa-zçğıöşü]+\/[A-ZÇĞİÖŞÜa-zçğıöşü]+\b/g
43676
- ],
43677
- ar: [
43678
- /شارع\s+[\u0600-\u06ff]+/g,
43679
- /حي\s+[\u0600-\u06ff]+/g,
43680
- /(?:ص\.ب|P\.?O\.?\s*Box)\s*\d{3,6}/gi
43681
- ],
43682
- uk_postcode: [
43683
- /\b[A-Z]{1,2}\d{1,2}[A-Z]?\s*\d[A-Z]{2}\b/g
43684
- ],
43685
- ca_postal: [
43686
- /\b[A-Z]\d[A-Z]\s*\d[A-Z]\d\b/g
43566
+ es: [
43567
+ /\b(?:Calle|Carrera|Avenida|Paseo|Plaza)\s+[A-ZÀ-ÖØ-Ý][a-zà-öø-ÿ]+\b/gi
43687
43568
  ]
43688
43569
  };
43689
43570
  RAW_PATTERNS = [
@@ -43902,68 +43783,6 @@ var init_registry = __esm({
43902
43783
  "IDENTITY_INTL" /* IDENTITY_INTL */,
43903
43784
  "ca_sin"
43904
43785
  ],
43905
- [
43906
- "FR_INSEE_NUM",
43907
- "\\b[12]\\d{2}[01]\\d\\d{8}\\d{2}\\b",
43908
- ["insee", "s\xE9curit\xE9 sociale", "france", "num\xE9ro"],
43909
- 0.88,
43910
- "IDENTITY_INTL" /* IDENTITY_INTL */,
43911
- "fr_insee"
43912
- ],
43913
- [
43914
- "DE_STEUER_ID",
43915
- "\\b\\d{2}\\s?\\d{3}\\s?\\d{3}\\s?\\d{3}\\b",
43916
- ["steuer", "steuernummer", "finanzamt", "deutschland"],
43917
- 0.87,
43918
- "IDENTITY_INTL" /* IDENTITY_INTL */,
43919
- null
43920
- ],
43921
- [
43922
- "TR_TCID",
43923
- "\\b[1-9]\\d{9}[02468]\\b",
43924
- ["tc", "kimlik", "vatanda\u015Fl\u0131k", "n\xFCfus", "t\xFCrkiye"],
43925
- 0.92,
43926
- "IDENTITY_INTL" /* IDENTITY_INTL */,
43927
- "tcid"
43928
- ],
43929
- [
43930
- "SA_NATIONAL_ID",
43931
- "\\b1\\d{9}\\b",
43932
- ["\u0647\u0648\u064A\u0629", "\u0631\u0642\u0645 \u0627\u0644\u0647\u0648\u064A\u0629", "saudi", "\u0648\u0637\u0646\u064A\u0629", "identity"],
43933
- 0.91,
43934
- "IDENTITY_INTL" /* IDENTITY_INTL */,
43935
- "saudi_nid"
43936
- ],
43937
- [
43938
- "UAE_EMIRATES_ID",
43939
- "784-\\d{4}-\\d{7}-\\d",
43940
- ["emirates", "\u0647\u0648\u064A\u0629", "uae", "emirati", "identity"],
43941
- 0.93,
43942
- "IDENTITY_INTL" /* IDENTITY_INTL */,
43943
- "luhn",
43944
- true,
43945
- ["*", "ar"]
43946
- ],
43947
- [
43948
- "CN_ID",
43949
- "[1-9]\\d{5}(?:18|19|20)\\d{2}(?:0[1-9]|1[0-2])(0[1-9]|[12]\\d|3[01])\\d{3}[0-9Xx]",
43950
- ["\u8EAB\u4EFD\u8BC1", "\u8EAB\u4EFD\u53F7\u7801", "id card", "china"],
43951
- 0.95,
43952
- "IDENTITY_INTL" /* IDENTITY_INTL */,
43953
- "cn_id",
43954
- true,
43955
- ["*", "zh"]
43956
- ],
43957
- [
43958
- "JA_MY_NUMBER",
43959
- "\\d{12}",
43960
- ["\u30DE\u30A4\u30CA\u30F3\u30D0\u30FC", "\u500B\u4EBA\u756A\u53F7", "my number", "japan"],
43961
- 0.94,
43962
- "IDENTITY_INTL" /* IDENTITY_INTL */,
43963
- "ja_id",
43964
- true,
43965
- ["*", "ja"]
43966
- ],
43967
43786
  [
43968
43787
  "ES_DNI",
43969
43788
  "(?:\\d{8}[A-Z]|[XYZ]\\d{7}[A-Z])",
@@ -43974,16 +43793,6 @@ var init_registry = __esm({
43974
43793
  true,
43975
43794
  ["*", "es"]
43976
43795
  ],
43977
- [
43978
- "INTL_PASSPORT",
43979
- "[A-Z0-9]{6,12}",
43980
- ["passport", "travel", "immigration", "visa"],
43981
- 0.6,
43982
- "IDENTITY_INTL" /* IDENTITY_INTL */,
43983
- null,
43984
- true,
43985
- ["*"]
43986
- ],
43987
43796
  // ── CORPORATE ──────────────────────────────────────────────────────
43988
43797
  [
43989
43798
  "CORP_EMPLOYEE_ID",
@@ -43999,7 +43808,7 @@ var init_registry = __esm({
43999
43808
  this.catalogue = /* @__PURE__ */ new Map();
44000
43809
  this.localeCategoryRegexMap = /* @__PURE__ */ new Map();
44001
43810
  this.buildCatalogue(loadGroups ?? null);
44002
- for (const loc of ["*", "en", "es", "fr", "de", "tr", "ar", "ja", "zh"]) {
43811
+ for (const loc of ["*", "en", "es"]) {
44003
43812
  this.compileForLocale(loc);
44004
43813
  }
44005
43814
  }
@@ -44177,47 +43986,6 @@ function checkIpv4Octets(raw) {
44177
43986
  }
44178
43987
  return true;
44179
43988
  }
44180
- function checkTcidNumber(raw) {
44181
- const digitsStr = raw.replace(/\D/g, "");
44182
- if (digitsStr.length !== 11) return false;
44183
- const d6 = digitsStr.split("").map(Number);
44184
- if (d6[0] === 0) return false;
44185
- if (d6[10] % 2 !== 0) return false;
44186
- const oddSum = d6[0] + d6[2] + d6[4] + d6[6] + d6[8];
44187
- const evenSum = d6[1] + d6[3] + d6[5] + d6[7];
44188
- const computedD10 = ((oddSum * 7 - evenSum) % 10 + 10) % 10;
44189
- if (computedD10 !== d6[9]) return false;
44190
- const firstTenSum = d6.slice(0, 10).reduce((a6, b6) => a6 + b6, 0);
44191
- if (firstTenSum % 10 !== d6[10]) return false;
44192
- return true;
44193
- }
44194
- function checkSaudiNid(raw) {
44195
- const digitsStr = raw.replace(/\D/g, "");
44196
- if (digitsStr.length !== 10) return false;
44197
- const d6 = digitsStr.split("").map(Number);
44198
- if (d6[0] !== 1) return false;
44199
- let total = 0;
44200
- for (let idx = 0; idx < 10; idx++) {
44201
- let val = d6[idx];
44202
- if (idx % 2 === 0) {
44203
- val *= 2;
44204
- if (val > 9) val -= 9;
44205
- }
44206
- total += val;
44207
- }
44208
- return total % 10 === 0;
44209
- }
44210
- function checkFrInsee(raw) {
44211
- let cleaned = raw.replace(/ /g, "").toUpperCase();
44212
- if (cleaned.length !== 15) return false;
44213
- cleaned = cleaned.replace(/2A/g, "19").replace(/2B/g, "18");
44214
- if (!/^\d+$/.test(cleaned)) return false;
44215
- const baseNumberStr = cleaned.slice(0, 13);
44216
- const expectedKey = parseInt(cleaned.slice(13), 10);
44217
- const baseNumber = BigInt(baseNumberStr);
44218
- const calculatedKey = 97n - baseNumber % 97n;
44219
- return Number(calculatedKey) === expectedKey;
44220
- }
44221
43989
  function checkCaSin(raw) {
44222
43990
  const digits = raw.replace(/\D/g, "");
44223
43991
  if (digits.length !== 9) return false;
@@ -44237,30 +44005,6 @@ function checkUkNino(raw) {
44237
44005
  if (cleaned.length !== 9) return false;
44238
44006
  return UK_NINO_REGEX.test(cleaned);
44239
44007
  }
44240
- function checkCnId(raw) {
44241
- const cleaned = raw.replace(/[^0-9X]/gi, "").toUpperCase();
44242
- if (cleaned.length !== 18) return false;
44243
- const weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2];
44244
- const checkDigits = "10X98765432";
44245
- let total = 0;
44246
- for (let i6 = 0; i6 < 17; i6++) {
44247
- total += parseInt(cleaned[i6], 10) * weights[i6];
44248
- }
44249
- return cleaned[17] === checkDigits[total % 11];
44250
- }
44251
- function checkJaId(raw) {
44252
- const digitsStr = raw.replace(/\D/g, "");
44253
- if (digitsStr.length !== 12) return false;
44254
- const d6 = digitsStr.split("").map(Number);
44255
- const weights = [6, 5, 4, 3, 2, 7, 6, 5, 4, 3, 2];
44256
- let total = 0;
44257
- for (let i6 = 0; i6 < 11; i6++) {
44258
- total += d6[i6] * weights[i6];
44259
- }
44260
- const remainder = total % 11;
44261
- const expected = remainder <= 1 ? 0 : 11 - remainder;
44262
- return d6[11] === expected;
44263
- }
44264
44008
  function checkEsId(raw) {
44265
44009
  const cleaned = raw.replace(/[\s-]/g, "").toUpperCase();
44266
44010
  if (cleaned.length !== 9) return false;
@@ -44391,13 +44135,8 @@ var init_handlers = __esm({
44391
44135
  vin_format: checkVinFormat,
44392
44136
  btc_format: checkBtcFormat,
44393
44137
  ipv4: checkIpv4Octets,
44394
- tcid: checkTcidNumber,
44395
- saudi_nid: checkSaudiNid,
44396
- fr_insee: checkFrInsee,
44397
44138
  ca_sin: checkCaSin,
44398
44139
  uk_nino: checkUkNino,
44399
- cn_id: checkCnId,
44400
- ja_id: checkJaId,
44401
44140
  es_id: checkEsId
44402
44141
  };
44403
44142
  exports.DLPValidationEngine = class {