mask-privacy 3.2.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -43555,7 +43555,7 @@ var init_registry = __esm({
43555
43555
  [
43556
43556
  "CREDIT_CARD_NUMBER",
43557
43557
  "\\b(?:4\\d{3}|5[1-5]\\d{2}|3[47]\\d{2}|6(?:011|5\\d{2}))[- ]?\\d{4}[- ]?\\d{4}[- ]?\\d{4}\\b",
43558
- ["card", "credit", "visa", "mastercard", "amex", "payment"],
43558
+ ["card", "credit", "visa", "mastercard", "amex", "payment", "tarjeta", "credito", "debito", "pago"],
43559
43559
  0.97,
43560
43560
  "FINANCIAL" /* FINANCIAL */,
43561
43561
  "luhn"
@@ -43563,7 +43563,7 @@ var init_registry = __esm({
43563
43563
  [
43564
43564
  "INTL_BANK_IBAN",
43565
43565
  "\\b[A-Z]{2}\\d{2}[A-Z0-9]{4}\\d{7}[A-Z0-9]{0,16}\\b",
43566
- ["iban", "swift", "sepa", "wire", "bank transfer"],
43566
+ ["iban", "swift", "sepa", "wire", "bank transfer", "cuenta", "banco", "transferencia"],
43567
43567
  0.96,
43568
43568
  "FINANCIAL" /* FINANCIAL */,
43569
43569
  "iban"
@@ -43600,6 +43600,16 @@ var init_registry = __esm({
43600
43600
  "FINANCIAL" /* FINANCIAL */,
43601
43601
  "luhn_soft"
43602
43602
  ],
43603
+ [
43604
+ "ES_CCC",
43605
+ "\\b\\d{4}[-\\s]?\\d{4}[-\\s]?\\d{2}[-\\s]?\\d{10}\\b",
43606
+ ["cuenta", "ccc", "banco", "sucursal", "entidad", "codigo cuenta cliente"],
43607
+ 0.9,
43608
+ "FINANCIAL" /* FINANCIAL */,
43609
+ "es_ccc",
43610
+ true,
43611
+ ["*", "es"]
43612
+ ],
43603
43613
  [
43604
43614
  "SWIFT_BIC",
43605
43615
  "\\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\\b",
@@ -43612,15 +43622,15 @@ var init_registry = __esm({
43612
43622
  [
43613
43623
  "EMAIL_ADDR",
43614
43624
  "\\b[A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z]{2,}\\b",
43615
- ["email", "mail", "contact", "address"],
43625
+ ["email", "mail", "contact", "address", "correo", "electronico"],
43616
43626
  0.99,
43617
43627
  "CONTACT" /* CONTACT */,
43618
43628
  null
43619
43629
  ],
43620
43630
  [
43621
43631
  "PHONE_NUM",
43622
- /(?<!\d)(?:\+?[1-9]\d{0,3}[-.\s]?)?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}(?!\d)/,
43623
- ["phone", "call", "mobile", "tel", "whatsapp", "number"],
43632
+ /(?<!\d)(?:\+?[1-9]\d{0,3}[-.\s]?)?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}(?!\d)/,
43633
+ ["phone", "call", "mobile", "tel", "whatsapp", "number", "tel\xE9fono", "telefono", "movil", "celular", "llamada"],
43624
43634
  0.8,
43625
43635
  "CONTACT" /* CONTACT */,
43626
43636
  null
@@ -43660,8 +43670,8 @@ var init_registry = __esm({
43660
43670
  // ── PERSONAL ───────────────────────────────────────────────────────
43661
43671
  [
43662
43672
  "BIRTH_DATE",
43663
- "\\b(?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01])[/-](?:19|20)\\d{2}\\b",
43664
- ["birth", "dob", "born", "birthday", "date of birth"],
43673
+ "\\b(?:(?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01])[/-](?:19|20)\\d{2}|(?:19|20)\\d{2}[/-](?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01]))\\b",
43674
+ ["birth", "dob", "born", "birthday", "date of birth", "nacimiento", "fecha", "cumplea\xF1os"],
43665
43675
  0.88,
43666
43676
  "PERSONAL" /* PERSONAL */,
43667
43677
  null
@@ -43768,6 +43778,16 @@ var init_registry = __esm({
43768
43778
  true,
43769
43779
  ["*", "es"]
43770
43780
  ],
43781
+ [
43782
+ "ES_NUSS",
43783
+ "\\b\\d{2}[-\\s]?\\d{8}[-\\s]?\\d{2}\\b",
43784
+ ["seguridad social", "nuss", "naf", "afiliacion"],
43785
+ 0.9,
43786
+ "IDENTITY_INTL" /* IDENTITY_INTL */,
43787
+ "es_nuss",
43788
+ true,
43789
+ ["*", "es"]
43790
+ ],
43771
43791
  // ── CORPORATE ──────────────────────────────────────────────────────
43772
43792
  [
43773
43793
  "CORP_EMPLOYEE_ID",
@@ -43998,6 +44018,38 @@ function checkEsId(raw) {
43998
44018
  const validLetters = "TRWAGMYFPDXBNJZSQVHLCKE";
43999
44019
  return cleaned[8] === validLetters[num % 23];
44000
44020
  }
44021
+ function checkEsNuss(raw) {
44022
+ const digits = raw.replace(/\D/g, "");
44023
+ if (digits.length !== 12) return false;
44024
+ const a6 = parseInt(digits.slice(0, 2), 10);
44025
+ const b6 = parseInt(digits.slice(2, 10), 10);
44026
+ const c6 = parseInt(digits.slice(10), 10);
44027
+ let check;
44028
+ if (b6 < 1e7) {
44029
+ check = (a6 * 1e7 + b6) % 97;
44030
+ } else {
44031
+ check = Number(BigInt(digits.slice(0, 10)) % 97n);
44032
+ }
44033
+ return check === c6;
44034
+ }
44035
+ function checkEsCcc(raw) {
44036
+ const digits = raw.replace(/\D/g, "");
44037
+ if (digits.length !== 20) return false;
44038
+ const weights = [1, 2, 4, 8, 5, 10, 9, 7, 3, 6];
44039
+ const calcDigit = (block) => {
44040
+ let s6 = 0;
44041
+ for (let i6 = 0; i6 < block.length; i6++) {
44042
+ s6 += parseInt(block[i6], 10) * weights[i6];
44043
+ }
44044
+ let rem = 11 - s6 % 11;
44045
+ if (rem === 10) return 1;
44046
+ if (rem === 11) return 0;
44047
+ return rem;
44048
+ };
44049
+ const d1 = calcDigit("00" + digits.slice(0, 8));
44050
+ const d22 = calcDigit(digits.slice(10));
44051
+ return parseInt(digits[8], 10) === d1 && parseInt(digits[9], 10) === d22;
44052
+ }
44001
44053
  var IBAN_COUNTRY_LENGTHS, VIN_TRANSLITERATION, VIN_WEIGHTS, UK_NINO_REGEX, VALIDATOR_DISPATCH, DLPValidationEngine;
44002
44054
  var init_handlers = __esm({
44003
44055
  "src/core/dlp/handlers.ts"() {
@@ -44104,6 +44156,7 @@ var init_handlers = __esm({
44104
44156
  UK_NINO_REGEX = /^(?!BG|GB|NK|KN|TN|NT|ZZ)[A-CEGHJ-PR-TW-Z]{2}[0-9]{6}[A-D]$/;
44105
44157
  VALIDATOR_DISPATCH = {
44106
44158
  luhn: checkLuhn,
44159
+ luhn_soft: checkLuhn,
44107
44160
  ssn_area: checkSsnArea,
44108
44161
  iban: checkIbanStructure,
44109
44162
  aba_check: checkAbaRouting,
@@ -44112,7 +44165,9 @@ var init_handlers = __esm({
44112
44165
  ipv4: checkIpv4Octets,
44113
44166
  ca_sin: checkCaSin,
44114
44167
  uk_nino: checkUkNino,
44115
- es_id: checkEsId
44168
+ es_id: checkEsId,
44169
+ es_nuss: checkEsNuss,
44170
+ es_ccc: checkEsCcc
44116
44171
  };
44117
44172
  DLPValidationEngine = class {
44118
44173
  /**
@@ -58901,6 +58956,7 @@ var init_transformers_scanner = __esm({
58901
58956
  const end = r6.end;
58902
58957
  const val = text.slice(start, end);
58903
58958
  const entityType = this._mapEntityType(r6.entity);
58959
+ if (!this._supportedEntities.includes(entityType)) continue;
58904
58960
  let confidence = r6.score || 0.7;
58905
58961
  if (aggressive || boostEntities.has(entityType.toLowerCase().replace(/_/g, " "))) {
58906
58962
  confidence = Math.min(1, confidence + 0.2);
@@ -59003,7 +59059,7 @@ function getScanner() {
59003
59059
  }
59004
59060
  return scannerInstance;
59005
59061
  }
59006
- var _dlpLanguageResolver, _dlpPatternRegistry, _dlpValidationEngine, _dlpConfidenceScorer, REGEX_PATTERNS, CONTEXT_KEYWORDS, BaseScanner, PresidioScanner, scannerInstance;
59062
+ var _dlpLanguageResolver, _dlpPatternRegistry, _dlpValidationEngine, _dlpConfidenceScorer, BaseScanner, PresidioScanner, scannerInstance;
59007
59063
  var init_scanner = __esm({
59008
59064
  "src/core/scanner.ts"() {
59009
59065
  init_config();
@@ -59018,39 +59074,12 @@ var init_scanner = __esm({
59018
59074
  _dlpPatternRegistry = new DLPPatternRegistry();
59019
59075
  _dlpValidationEngine = new DLPValidationEngine();
59020
59076
  _dlpConfidenceScorer = new DLPConfidenceScorer();
59021
- REGEX_PATTERNS = {
59022
- "EMAIL_ADDRESS": /[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+/g,
59023
- "PHONE_NUMBER": /(?<!\d)(?:\+?1?[\s\-.]?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}|\d{3}[\s\-.]?\d{4})(?!\d)/g,
59024
- "PHONE_NUMBER_INTL": /(?<!\d)\+(?:[1-9]\d{0,3})[-.\s]?\(?\d{1,5}\)?(?:[-.\s]?\d{2,4}){2,4}(?!\d)/g,
59025
- "US_SSN": /(?<!\d)\d{3}-\d{2}-\d{4}(?!\d)/g,
59026
- "CREDIT_CARD": /(?<!\d)(?:\d{4}[ \-]?){3}\d{4}(?!\d)/g,
59027
- "US_ROUTING_NUMBER": /(?<!\d)\d{9}(?!\d)/g,
59028
- "US_PASSPORT": /\b[A-Z]\d{8}\b/g,
59029
- "DATE_OF_BIRTH": /\b(?:0[1-9]|1[0-2])\/(?:0[1-9]|[12]\d|3[01])\/(?:19|20)\d{2}\b|\b(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/g
59030
- };
59031
- CONTEXT_KEYWORDS = /* @__PURE__ */ new Set([
59032
- "account number",
59033
- "ssn",
59034
- "phone",
59035
- "credit card",
59036
- "iban",
59037
- "bank",
59038
- "email",
59039
- "pii",
59040
- "personal info"
59041
- ]);
59042
- BaseScanner = class _BaseScanner {
59077
+ BaseScanner = class {
59043
59078
  constructor() {
59044
59079
  this._supportedEntities = [
59045
- "EMAIL_ADDRESS",
59046
- "PHONE_NUMBER",
59047
- "US_SSN",
59048
- "CREDIT_CARD",
59049
- "US_BANK_NUMBER",
59050
- "CRYPTO",
59051
- "IBAN_CODE",
59052
- "IP_ADDRESS",
59053
- "PERSON"
59080
+ "PERSON",
59081
+ "LOCATION",
59082
+ "ORGANIZATION"
59054
59083
  ];
59055
59084
  }
59056
59085
  setSupportedEntities(entities) {
@@ -59194,47 +59223,13 @@ var init_scanner = __esm({
59194
59223
  }));
59195
59224
  return [reconstruct(text, resolved), entities];
59196
59225
  }
59226
+ /** Tier 1 — Deterministic detection (Legacy: Redirected to DLP) */
59197
59227
  async _tier1CollectSpans(text, boostEntities, aggressive, confidenceThreshold) {
59198
- const spans = [];
59199
- for (const [entityType, pattern] of Object.entries(REGEX_PATTERNS)) {
59200
- const re = new RegExp(pattern.source, pattern.flags);
59201
- let match;
59202
- while ((match = re.exec(text)) !== null) {
59203
- const val = match[0];
59204
- if (looksLikeToken(val)) continue;
59205
- let confidence = aggressive || boostEntities.has(entityType.toLowerCase().replace(/_/g, " ")) ? 1 : 0.95;
59206
- if (entityType === "CREDIT_CARD" && _BaseScanner._luhnChecksum(val)) confidence = Math.max(confidence, 0.99);
59207
- if (entityType === "US_ROUTING_NUMBER" && !_BaseScanner._abaChecksum(val)) continue;
59208
- if (confidence >= confidenceThreshold) {
59209
- spans.push({
59210
- start: match.index,
59211
- end: match.index + val.length,
59212
- entityType,
59213
- originalValue: val,
59214
- confidence,
59215
- method: "regex"
59216
- });
59217
- }
59218
- }
59219
- }
59220
- return spans;
59228
+ return this._tier0CollectSpans(text, confidenceThreshold);
59221
59229
  }
59222
- /** Backward-compat wrapper. */
59230
+ /** Backward-compat wrapper. Redirected to DLP. */
59223
59231
  async _tier1Regex(text, encodeFn, boostEntities, aggressive, confidenceThreshold) {
59224
- const spans = await this._tier1CollectSpans(text, boostEntities, aggressive, confidenceThreshold);
59225
- const resolved = resolveOverlaps(spans);
59226
- const entities = [];
59227
- await Promise.all(resolved.map(async (span) => {
59228
- span.maskedValue = await encodeFn(span.originalValue, { entityType: span.entityType });
59229
- entities.push({
59230
- type: span.entityType,
59231
- value: span.originalValue,
59232
- method: span.method,
59233
- confidence: span.confidence,
59234
- masked_value: span.maskedValue
59235
- });
59236
- }));
59237
- return [reconstruct(text, resolved), entities];
59232
+ return this._tier0Dlp(text, encodeFn, confidenceThreshold);
59238
59233
  }
59239
59234
  async _tier2Nlp(text, encodeFn, boostEntities, aggressive, confidenceThreshold) {
59240
59235
  return [text, []];
@@ -59243,24 +59238,26 @@ var init_scanner = __esm({
59243
59238
  if (!context) return /* @__PURE__ */ new Set();
59244
59239
  const lowered = context.toLowerCase();
59245
59240
  const boosted = /* @__PURE__ */ new Set();
59246
- for (const kw of CONTEXT_KEYWORDS) {
59247
- if (lowered.includes(kw)) boosted.add(kw);
59241
+ for (const [, desc] of _dlpPatternRegistry.iterDescriptors()) {
59242
+ for (const term of desc.proximityTerms) {
59243
+ if (lowered.includes(term)) {
59244
+ boosted.add(desc.category.toLowerCase());
59245
+ break;
59246
+ }
59247
+ }
59248
59248
  }
59249
59249
  return boosted;
59250
59250
  }
59251
59251
  async scanAndTokenize(text, options = {}) {
59252
59252
  if (!text || typeof text !== "string") return text;
59253
- const pipeline = options.pipeline || ["dlp", "regex", "checksum", "nlp"];
59253
+ const pipeline = options.pipeline || ["dlp", "nlp"];
59254
59254
  const _encode = options.encodeFn || encode;
59255
59255
  const confidenceThreshold = options.confidenceThreshold ?? 0.7;
59256
59256
  const boost = this._resolveBoost(options.context);
59257
59257
  const allSpans = [];
59258
- if (pipeline.includes("dlp")) {
59258
+ if (pipeline.includes("dlp") || pipeline.includes("regex") || pipeline.includes("checksum")) {
59259
59259
  allSpans.push(...await this._tier0CollectSpans(text, confidenceThreshold));
59260
59260
  }
59261
- if (pipeline.includes("regex") || pipeline.includes("checksum")) {
59262
- allSpans.push(...await this._tier1CollectSpans(text, boost, !!options.aggressive, confidenceThreshold));
59263
- }
59264
59261
  const resolved = resolveOverlaps(allSpans);
59265
59262
  await Promise.all(resolved.map(async (span) => {
59266
59263
  span.maskedValue = await _encode(span.originalValue, { entityType: span.entityType });
@@ -59273,18 +59270,15 @@ var init_scanner = __esm({
59273
59270
  }
59274
59271
  async scanAndReturnEntities(text, options = {}) {
59275
59272
  if (!text || typeof text !== "string") return [];
59276
- const pipeline = options.pipeline || ["dlp", "regex", "checksum", "nlp"];
59273
+ const pipeline = options.pipeline || ["dlp", "nlp"];
59277
59274
  const _encode = options.encodeFn || encode;
59278
59275
  const confidenceThreshold = options.confidenceThreshold ?? 0.7;
59279
59276
  const boost = this._resolveBoost(options.context);
59280
59277
  const allEntities = [];
59281
59278
  const allSpans = [];
59282
- if (pipeline.includes("dlp")) {
59279
+ if (pipeline.includes("dlp") || pipeline.includes("regex") || pipeline.includes("checksum")) {
59283
59280
  allSpans.push(...await this._tier0CollectSpans(text, confidenceThreshold));
59284
59281
  }
59285
- if (pipeline.includes("regex") || pipeline.includes("checksum")) {
59286
- allSpans.push(...await this._tier1CollectSpans(text, boost, !!options.aggressive, confidenceThreshold));
59287
- }
59288
59282
  const resolved = resolveOverlaps(allSpans);
59289
59283
  await Promise.all(resolved.map(async (span) => {
59290
59284
  span.maskedValue = await _encode(span.originalValue, { entityType: span.entityType });
@@ -59600,7 +59594,7 @@ init_handlers();
59600
59594
  init_scorer();
59601
59595
 
59602
59596
  // src/index.ts
59603
- var VERSION = "2.0.0";
59597
+ var VERSION = "3.4.0";
59604
59598
  async function detectEntitiesWithConfidence(text, options = {}) {
59605
59599
  const scanner = getScanner();
59606
59600
  return await scanner.scanAndReturnEntities(text, options);