mask-privacy 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -96,8 +96,9 @@ declare class BaseScanner {
96
96
  protected _tier0CollectSpans(text: string, confidenceThreshold: number): Promise<Span[]>;
97
97
  /** Backward-compat wrapper — collects spans then single-pass encodes. */
98
98
  protected _tier0Dlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, confidenceThreshold: number): Promise<[string, any[]]>;
99
+ /** Tier 1 — Deterministic detection (Legacy: Redirected to DLP) */
99
100
  protected _tier1CollectSpans(text: string, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<Span[]>;
100
- /** Backward-compat wrapper. */
101
+ /** Backward-compat wrapper. Redirected to DLP. */
101
102
  protected _tier1Regex(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
102
103
  protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
103
104
  protected _resolveBoost(context?: string | null): Set<string>;
@@ -480,7 +481,7 @@ declare class DLPConfidenceScorer {
480
481
  * Provides format-preserving encryption, local/distributed vaulting,
481
482
  * and framework-agnostic tool interception hooks.
482
483
  */
483
- declare const VERSION = "2.0.0";
484
+ declare const VERSION = "3.3.0";
484
485
 
485
486
  /**
486
487
  * Detect PII entities in text and return a list of objects with metadata.
package/dist/index.d.ts CHANGED
@@ -96,8 +96,9 @@ declare class BaseScanner {
96
96
  protected _tier0CollectSpans(text: string, confidenceThreshold: number): Promise<Span[]>;
97
97
  /** Backward-compat wrapper — collects spans then single-pass encodes. */
98
98
  protected _tier0Dlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, confidenceThreshold: number): Promise<[string, any[]]>;
99
+ /** Tier 1 — Deterministic detection (Legacy: Redirected to DLP) */
99
100
  protected _tier1CollectSpans(text: string, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<Span[]>;
100
- /** Backward-compat wrapper. */
101
+ /** Backward-compat wrapper. Redirected to DLP. */
101
102
  protected _tier1Regex(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
102
103
  protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
103
104
  protected _resolveBoost(context?: string | null): Set<string>;
@@ -480,7 +481,7 @@ declare class DLPConfidenceScorer {
480
481
  * Provides format-preserving encryption, local/distributed vaulting,
481
482
  * and framework-agnostic tool interception hooks.
482
483
  */
483
- declare const VERSION = "2.0.0";
484
+ declare const VERSION = "3.3.0";
484
485
 
485
486
  /**
486
487
  * Detect PII entities in text and return a list of objects with metadata.
package/dist/index.js CHANGED
@@ -43580,7 +43580,7 @@ var init_registry = __esm({
43580
43580
  [
43581
43581
  "CREDIT_CARD_NUMBER",
43582
43582
  "\\b(?:4\\d{3}|5[1-5]\\d{2}|3[47]\\d{2}|6(?:011|5\\d{2}))[- ]?\\d{4}[- ]?\\d{4}[- ]?\\d{4}\\b",
43583
- ["card", "credit", "visa", "mastercard", "amex", "payment"],
43583
+ ["card", "credit", "visa", "mastercard", "amex", "payment", "tarjeta", "credito", "debito", "pago"],
43584
43584
  0.97,
43585
43585
  "FINANCIAL" /* FINANCIAL */,
43586
43586
  "luhn"
@@ -43588,7 +43588,7 @@ var init_registry = __esm({
43588
43588
  [
43589
43589
  "INTL_BANK_IBAN",
43590
43590
  "\\b[A-Z]{2}\\d{2}[A-Z0-9]{4}\\d{7}[A-Z0-9]{0,16}\\b",
43591
- ["iban", "swift", "sepa", "wire", "bank transfer"],
43591
+ ["iban", "swift", "sepa", "wire", "bank transfer", "cuenta", "banco", "transferencia"],
43592
43592
  0.96,
43593
43593
  "FINANCIAL" /* FINANCIAL */,
43594
43594
  "iban"
@@ -43625,6 +43625,16 @@ var init_registry = __esm({
43625
43625
  "FINANCIAL" /* FINANCIAL */,
43626
43626
  "luhn_soft"
43627
43627
  ],
43628
+ [
43629
+ "ES_CCC",
43630
+ "\\b\\d{4}[-\\s]?\\d{4}[-\\s]?\\d{2}[-\\s]?\\d{10}\\b",
43631
+ ["cuenta", "ccc", "banco", "sucursal", "entidad", "codigo cuenta cliente"],
43632
+ 0.9,
43633
+ "FINANCIAL" /* FINANCIAL */,
43634
+ "es_ccc",
43635
+ true,
43636
+ ["*", "es"]
43637
+ ],
43628
43638
  [
43629
43639
  "SWIFT_BIC",
43630
43640
  "\\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\\b",
@@ -43637,15 +43647,15 @@ var init_registry = __esm({
43637
43647
  [
43638
43648
  "EMAIL_ADDR",
43639
43649
  "\\b[A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z]{2,}\\b",
43640
- ["email", "mail", "contact", "address"],
43650
+ ["email", "mail", "contact", "address", "correo", "electronico"],
43641
43651
  0.99,
43642
43652
  "CONTACT" /* CONTACT */,
43643
43653
  null
43644
43654
  ],
43645
43655
  [
43646
43656
  "PHONE_NUM",
43647
- /(?<!\d)(?:\+?[1-9]\d{0,3}[-.\s]?)?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}(?!\d)/,
43648
- ["phone", "call", "mobile", "tel", "whatsapp", "number"],
43657
+ /(?<!\d)(?:\+?[1-9]\d{0,3}[-.\s]?)?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}(?!\d)/,
43658
+ ["phone", "call", "mobile", "tel", "whatsapp", "number", "tel\xE9fono", "telefono", "movil", "celular", "llamada"],
43649
43659
  0.8,
43650
43660
  "CONTACT" /* CONTACT */,
43651
43661
  null
@@ -43685,8 +43695,8 @@ var init_registry = __esm({
43685
43695
  // ── PERSONAL ───────────────────────────────────────────────────────
43686
43696
  [
43687
43697
  "BIRTH_DATE",
43688
- "\\b(?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01])[/-](?:19|20)\\d{2}\\b",
43689
- ["birth", "dob", "born", "birthday", "date of birth"],
43698
+ "\\b(?:(?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01])[/-](?:19|20)\\d{2}|(?:19|20)\\d{2}[/-](?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01]))\\b",
43699
+ ["birth", "dob", "born", "birthday", "date of birth", "nacimiento", "fecha", "cumplea\xF1os"],
43690
43700
  0.88,
43691
43701
  "PERSONAL" /* PERSONAL */,
43692
43702
  null
@@ -43793,6 +43803,16 @@ var init_registry = __esm({
43793
43803
  true,
43794
43804
  ["*", "es"]
43795
43805
  ],
43806
+ [
43807
+ "ES_NUSS",
43808
+ "\\b\\d{2}[-\\s]?\\d{8}[-\\s]?\\d{2}\\b",
43809
+ ["seguridad social", "nuss", "naf", "afiliacion"],
43810
+ 0.9,
43811
+ "IDENTITY_INTL" /* IDENTITY_INTL */,
43812
+ "es_nuss",
43813
+ true,
43814
+ ["*", "es"]
43815
+ ],
43796
43816
  // ── CORPORATE ──────────────────────────────────────────────────────
43797
43817
  [
43798
43818
  "CORP_EMPLOYEE_ID",
@@ -44023,6 +44043,38 @@ function checkEsId(raw) {
44023
44043
  const validLetters = "TRWAGMYFPDXBNJZSQVHLCKE";
44024
44044
  return cleaned[8] === validLetters[num % 23];
44025
44045
  }
44046
+ function checkEsNuss(raw) {
44047
+ const digits = raw.replace(/\D/g, "");
44048
+ if (digits.length !== 12) return false;
44049
+ const a6 = parseInt(digits.slice(0, 2), 10);
44050
+ const b6 = parseInt(digits.slice(2, 10), 10);
44051
+ const c6 = parseInt(digits.slice(10), 10);
44052
+ let check;
44053
+ if (b6 < 1e7) {
44054
+ check = (a6 * 1e7 + b6) % 97;
44055
+ } else {
44056
+ check = Number(BigInt(digits.slice(0, 10)) % 97n);
44057
+ }
44058
+ return check === c6;
44059
+ }
44060
+ function checkEsCcc(raw) {
44061
+ const digits = raw.replace(/\D/g, "");
44062
+ if (digits.length !== 20) return false;
44063
+ const weights = [1, 2, 4, 8, 5, 10, 9, 7, 3, 6];
44064
+ const calcDigit = (block) => {
44065
+ let s6 = 0;
44066
+ for (let i6 = 0; i6 < block.length; i6++) {
44067
+ s6 += parseInt(block[i6], 10) * weights[i6];
44068
+ }
44069
+ let rem = 11 - s6 % 11;
44070
+ if (rem === 10) return 1;
44071
+ if (rem === 11) return 0;
44072
+ return rem;
44073
+ };
44074
+ const d1 = calcDigit("00" + digits.slice(0, 8));
44075
+ const d22 = calcDigit(digits.slice(10));
44076
+ return parseInt(digits[8], 10) === d1 && parseInt(digits[9], 10) === d22;
44077
+ }
44026
44078
  var IBAN_COUNTRY_LENGTHS, VIN_TRANSLITERATION, VIN_WEIGHTS, UK_NINO_REGEX, VALIDATOR_DISPATCH; exports.DLPValidationEngine = void 0;
44027
44079
  var init_handlers = __esm({
44028
44080
  "src/core/dlp/handlers.ts"() {
@@ -44129,6 +44181,7 @@ var init_handlers = __esm({
44129
44181
  UK_NINO_REGEX = /^(?!BG|GB|NK|KN|TN|NT|ZZ)[A-CEGHJ-PR-TW-Z]{2}[0-9]{6}[A-D]$/;
44130
44182
  VALIDATOR_DISPATCH = {
44131
44183
  luhn: checkLuhn,
44184
+ luhn_soft: checkLuhn,
44132
44185
  ssn_area: checkSsnArea,
44133
44186
  iban: checkIbanStructure,
44134
44187
  aba_check: checkAbaRouting,
@@ -44137,7 +44190,9 @@ var init_handlers = __esm({
44137
44190
  ipv4: checkIpv4Octets,
44138
44191
  ca_sin: checkCaSin,
44139
44192
  uk_nino: checkUkNino,
44140
- es_id: checkEsId
44193
+ es_id: checkEsId,
44194
+ es_nuss: checkEsNuss,
44195
+ es_ccc: checkEsCcc
44141
44196
  };
44142
44197
  exports.DLPValidationEngine = class {
44143
44198
  /**
@@ -58926,6 +58981,7 @@ var init_transformers_scanner = __esm({
58926
58981
  const end = r6.end;
58927
58982
  const val = text.slice(start, end);
58928
58983
  const entityType = this._mapEntityType(r6.entity);
58984
+ if (!this._supportedEntities.includes(entityType)) continue;
58929
58985
  let confidence = r6.score || 0.7;
58930
58986
  if (aggressive || boostEntities.has(entityType.toLowerCase().replace(/_/g, " "))) {
58931
58987
  confidence = Math.min(1, confidence + 0.2);
@@ -59028,7 +59084,7 @@ function getScanner() {
59028
59084
  }
59029
59085
  return scannerInstance;
59030
59086
  }
59031
- var _dlpLanguageResolver, _dlpPatternRegistry, _dlpValidationEngine, _dlpConfidenceScorer, REGEX_PATTERNS, CONTEXT_KEYWORDS; exports.BaseScanner = void 0; exports.PresidioScanner = void 0; var scannerInstance;
59087
+ var _dlpLanguageResolver, _dlpPatternRegistry, _dlpValidationEngine, _dlpConfidenceScorer; exports.BaseScanner = void 0; exports.PresidioScanner = void 0; var scannerInstance;
59032
59088
  var init_scanner = __esm({
59033
59089
  "src/core/scanner.ts"() {
59034
59090
  init_config();
@@ -59043,39 +59099,12 @@ var init_scanner = __esm({
59043
59099
  _dlpPatternRegistry = new exports.DLPPatternRegistry();
59044
59100
  _dlpValidationEngine = new exports.DLPValidationEngine();
59045
59101
  _dlpConfidenceScorer = new exports.DLPConfidenceScorer();
59046
- REGEX_PATTERNS = {
59047
- "EMAIL_ADDRESS": /[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+/g,
59048
- "PHONE_NUMBER": /(?<!\d)(?:\+?1?[\s\-.]?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}|\d{3}[\s\-.]?\d{4})(?!\d)/g,
59049
- "PHONE_NUMBER_INTL": /(?<!\d)\+(?:[1-9]\d{0,3})[-.\s]?\(?\d{1,5}\)?(?:[-.\s]?\d{2,4}){2,4}(?!\d)/g,
59050
- "US_SSN": /(?<!\d)\d{3}-\d{2}-\d{4}(?!\d)/g,
59051
- "CREDIT_CARD": /(?<!\d)(?:\d{4}[ \-]?){3}\d{4}(?!\d)/g,
59052
- "US_ROUTING_NUMBER": /(?<!\d)\d{9}(?!\d)/g,
59053
- "US_PASSPORT": /\b[A-Z]\d{8}\b/g,
59054
- "DATE_OF_BIRTH": /\b(?:0[1-9]|1[0-2])\/(?:0[1-9]|[12]\d|3[01])\/(?:19|20)\d{2}\b|\b(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/g
59055
- };
59056
- CONTEXT_KEYWORDS = /* @__PURE__ */ new Set([
59057
- "account number",
59058
- "ssn",
59059
- "phone",
59060
- "credit card",
59061
- "iban",
59062
- "bank",
59063
- "email",
59064
- "pii",
59065
- "personal info"
59066
- ]);
59067
- exports.BaseScanner = class _BaseScanner {
59102
+ exports.BaseScanner = class {
59068
59103
  constructor() {
59069
59104
  this._supportedEntities = [
59070
- "EMAIL_ADDRESS",
59071
- "PHONE_NUMBER",
59072
- "US_SSN",
59073
- "CREDIT_CARD",
59074
- "US_BANK_NUMBER",
59075
- "CRYPTO",
59076
- "IBAN_CODE",
59077
- "IP_ADDRESS",
59078
- "PERSON"
59105
+ "PERSON",
59106
+ "LOCATION",
59107
+ "ORGANIZATION"
59079
59108
  ];
59080
59109
  }
59081
59110
  setSupportedEntities(entities) {
@@ -59219,47 +59248,13 @@ var init_scanner = __esm({
59219
59248
  }));
59220
59249
  return [reconstruct(text, resolved), entities];
59221
59250
  }
59251
+ /** Tier 1 — Deterministic detection (Legacy: Redirected to DLP) */
59222
59252
  async _tier1CollectSpans(text, boostEntities, aggressive, confidenceThreshold) {
59223
- const spans = [];
59224
- for (const [entityType, pattern] of Object.entries(REGEX_PATTERNS)) {
59225
- const re = new RegExp(pattern.source, pattern.flags);
59226
- let match;
59227
- while ((match = re.exec(text)) !== null) {
59228
- const val = match[0];
59229
- if (looksLikeToken(val)) continue;
59230
- let confidence = aggressive || boostEntities.has(entityType.toLowerCase().replace(/_/g, " ")) ? 1 : 0.95;
59231
- if (entityType === "CREDIT_CARD" && _BaseScanner._luhnChecksum(val)) confidence = Math.max(confidence, 0.99);
59232
- if (entityType === "US_ROUTING_NUMBER" && !_BaseScanner._abaChecksum(val)) continue;
59233
- if (confidence >= confidenceThreshold) {
59234
- spans.push({
59235
- start: match.index,
59236
- end: match.index + val.length,
59237
- entityType,
59238
- originalValue: val,
59239
- confidence,
59240
- method: "regex"
59241
- });
59242
- }
59243
- }
59244
- }
59245
- return spans;
59253
+ return this._tier0CollectSpans(text, confidenceThreshold);
59246
59254
  }
59247
- /** Backward-compat wrapper. */
59255
+ /** Backward-compat wrapper. Redirected to DLP. */
59248
59256
  async _tier1Regex(text, encodeFn, boostEntities, aggressive, confidenceThreshold) {
59249
- const spans = await this._tier1CollectSpans(text, boostEntities, aggressive, confidenceThreshold);
59250
- const resolved = resolveOverlaps(spans);
59251
- const entities = [];
59252
- await Promise.all(resolved.map(async (span) => {
59253
- span.maskedValue = await encodeFn(span.originalValue, { entityType: span.entityType });
59254
- entities.push({
59255
- type: span.entityType,
59256
- value: span.originalValue,
59257
- method: span.method,
59258
- confidence: span.confidence,
59259
- masked_value: span.maskedValue
59260
- });
59261
- }));
59262
- return [reconstruct(text, resolved), entities];
59257
+ return this._tier0Dlp(text, encodeFn, confidenceThreshold);
59263
59258
  }
59264
59259
  async _tier2Nlp(text, encodeFn, boostEntities, aggressive, confidenceThreshold) {
59265
59260
  return [text, []];
@@ -59268,24 +59263,26 @@ var init_scanner = __esm({
59268
59263
  if (!context) return /* @__PURE__ */ new Set();
59269
59264
  const lowered = context.toLowerCase();
59270
59265
  const boosted = /* @__PURE__ */ new Set();
59271
- for (const kw of CONTEXT_KEYWORDS) {
59272
- if (lowered.includes(kw)) boosted.add(kw);
59266
+ for (const [, desc] of _dlpPatternRegistry.iterDescriptors()) {
59267
+ for (const term of desc.proximityTerms) {
59268
+ if (lowered.includes(term)) {
59269
+ boosted.add(desc.category.toLowerCase());
59270
+ break;
59271
+ }
59272
+ }
59273
59273
  }
59274
59274
  return boosted;
59275
59275
  }
59276
59276
  async scanAndTokenize(text, options = {}) {
59277
59277
  if (!text || typeof text !== "string") return text;
59278
- const pipeline = options.pipeline || ["dlp", "regex", "checksum", "nlp"];
59278
+ const pipeline = options.pipeline || ["dlp", "nlp"];
59279
59279
  const _encode = options.encodeFn || encode;
59280
59280
  const confidenceThreshold = options.confidenceThreshold ?? 0.7;
59281
59281
  const boost = this._resolveBoost(options.context);
59282
59282
  const allSpans = [];
59283
- if (pipeline.includes("dlp")) {
59283
+ if (pipeline.includes("dlp") || pipeline.includes("regex") || pipeline.includes("checksum")) {
59284
59284
  allSpans.push(...await this._tier0CollectSpans(text, confidenceThreshold));
59285
59285
  }
59286
- if (pipeline.includes("regex") || pipeline.includes("checksum")) {
59287
- allSpans.push(...await this._tier1CollectSpans(text, boost, !!options.aggressive, confidenceThreshold));
59288
- }
59289
59286
  const resolved = resolveOverlaps(allSpans);
59290
59287
  await Promise.all(resolved.map(async (span) => {
59291
59288
  span.maskedValue = await _encode(span.originalValue, { entityType: span.entityType });
@@ -59298,18 +59295,15 @@ var init_scanner = __esm({
59298
59295
  }
59299
59296
  async scanAndReturnEntities(text, options = {}) {
59300
59297
  if (!text || typeof text !== "string") return [];
59301
- const pipeline = options.pipeline || ["dlp", "regex", "checksum", "nlp"];
59298
+ const pipeline = options.pipeline || ["dlp", "nlp"];
59302
59299
  const _encode = options.encodeFn || encode;
59303
59300
  const confidenceThreshold = options.confidenceThreshold ?? 0.7;
59304
59301
  const boost = this._resolveBoost(options.context);
59305
59302
  const allEntities = [];
59306
59303
  const allSpans = [];
59307
- if (pipeline.includes("dlp")) {
59304
+ if (pipeline.includes("dlp") || pipeline.includes("regex") || pipeline.includes("checksum")) {
59308
59305
  allSpans.push(...await this._tier0CollectSpans(text, confidenceThreshold));
59309
59306
  }
59310
- if (pipeline.includes("regex") || pipeline.includes("checksum")) {
59311
- allSpans.push(...await this._tier1CollectSpans(text, boost, !!options.aggressive, confidenceThreshold));
59312
- }
59313
59307
  const resolved = resolveOverlaps(allSpans);
59314
59308
  await Promise.all(resolved.map(async (span) => {
59315
59309
  span.maskedValue = await _encode(span.originalValue, { entityType: span.entityType });
@@ -59625,7 +59619,7 @@ init_handlers();
59625
59619
  init_scorer();
59626
59620
 
59627
59621
  // src/index.ts
59628
- var VERSION = "2.0.0";
59622
+ var VERSION = "3.3.0";
59629
59623
  async function detectEntitiesWithConfidence(text, options = {}) {
59630
59624
  const scanner = getScanner();
59631
59625
  return await scanner.scanAndReturnEntities(text, options);