mask-privacy 3.2.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mask-privacy",
3
- "version": "3.2.0",
3
+ "version": "3.4.0",
4
4
  "description": "Enterprise-grade AI Data Loss Prevention (DLP) SDK for TypeScript",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -198,12 +198,58 @@ export function checkEsId(raw: string): boolean {
198
198
  return cleaned[8] === validLetters[num % 23];
199
199
  }
200
200
 
201
+ // ── Spanish Social Security (NUSS) ──────────────────────────────────────────
202
+
203
+ export function checkEsNuss(raw: string): boolean {
204
+ const digits = raw.replace(/\D/g, "");
205
+ if (digits.length !== 12) return false;
206
+
207
+ const a = parseInt(digits.slice(0, 2), 10); // Province
208
+ const b = parseInt(digits.slice(2, 10), 10); // Number
209
+ const c = parseInt(digits.slice(10), 10); // Control
210
+
211
+ let check: number;
212
+ if (b < 10000000) {
213
+ check = (a * 10000000 + b) % 97;
214
+ } else {
215
+ check = Number(BigInt(digits.slice(0, 10)) % 97n);
216
+ }
217
+
218
+ return check === c;
219
+ }
220
+
221
+ // ── Spanish Bank Account (CCC) ──────────────────────────────────────────────
222
+
223
+ export function checkEsCcc(raw: string): boolean {
224
+ const digits = raw.replace(/\D/g, "");
225
+ if (digits.length !== 20) return false;
226
+
227
+ const weights = [1, 2, 4, 8, 5, 10, 9, 7, 3, 6];
228
+
229
+ const calcDigit = (block: string): number => {
230
+ let s = 0;
231
+ for (let i = 0; i < block.length; i++) {
232
+ s += parseInt(block[i], 10) * weights[i];
233
+ }
234
+ let rem = 11 - (s % 11);
235
+ if (rem === 10) return 1;
236
+ if (rem === 11) return 0;
237
+ return rem;
238
+ };
239
+
240
+ const d1 = calcDigit("00" + digits.slice(0, 8));
241
+ const d2 = calcDigit(digits.slice(10));
242
+
243
+ return parseInt(digits[8], 10) === d1 && parseInt(digits[9], 10) === d2;
244
+ }
245
+
201
246
  // ── Dispatcher ─────────────────────────────────────────────────────────────
202
247
 
203
248
  type ValidatorFn = (raw: string) => boolean;
204
249
 
205
250
  const VALIDATOR_DISPATCH: Record<string, ValidatorFn> = {
206
251
  luhn: checkLuhn,
252
+ luhn_soft: checkLuhn,
207
253
  ssn_area: checkSsnArea,
208
254
  iban: checkIbanStructure,
209
255
  aba_check: checkAbaRouting,
@@ -213,6 +259,8 @@ const VALIDATOR_DISPATCH: Record<string, ValidatorFn> = {
213
259
  ca_sin: checkCaSin,
214
260
  uk_nino: checkUkNino,
215
261
  es_id: checkEsId,
262
+ es_nuss: checkEsNuss,
263
+ es_ccc: checkEsCcc,
216
264
  };
217
265
 
218
266
  /**
@@ -79,10 +79,10 @@ const RAW_PATTERNS: RawEntry[] = [
79
79
  ["ssn", "social security", "tax id", "taxpayer"], 0.95, SensitiveCategory.FINANCIAL, "ssn_area"],
80
80
 
81
81
  ["CREDIT_CARD_NUMBER", "\\b(?:4\\d{3}|5[1-5]\\d{2}|3[47]\\d{2}|6(?:011|5\\d{2}))[- ]?\\d{4}[- ]?\\d{4}[- ]?\\d{4}\\b",
82
- ["card", "credit", "visa", "mastercard", "amex", "payment"], 0.97, SensitiveCategory.FINANCIAL, "luhn"],
82
+ ["card", "credit", "visa", "mastercard", "amex", "payment", "tarjeta", "credito", "debito", "pago"], 0.97, SensitiveCategory.FINANCIAL, "luhn"],
83
83
 
84
84
  ["INTL_BANK_IBAN", "\\b[A-Z]{2}\\d{2}[A-Z0-9]{4}\\d{7}[A-Z0-9]{0,16}\\b",
85
- ["iban", "swift", "sepa", "wire", "bank transfer"], 0.96, SensitiveCategory.FINANCIAL, "iban"],
85
+ ["iban", "swift", "sepa", "wire", "bank transfer", "cuenta", "banco", "transferencia"], 0.96, SensitiveCategory.FINANCIAL, "iban"],
86
86
 
87
87
  ["CRYPTO_BTC", "\\b(?:[13][a-km-zA-HJ-NP-Z1-9]{25,34}|bc1[a-z0-9]{39,59})\\b",
88
88
  ["bitcoin", "btc", "wallet", "crypto"], 0.94, SensitiveCategory.FINANCIAL, "btc_format"],
@@ -96,15 +96,18 @@ const RAW_PATTERNS: RawEntry[] = [
96
96
  ["BANK_ACCT_NUM", /(?<!\d)\d{8,17}(?!\d)/,
97
97
  ["account", "checking", "savings", "deposit", "bank"], 0.50, SensitiveCategory.FINANCIAL, "luhn_soft"],
98
98
 
99
+ ["ES_CCC", "\\b\\d{4}[-\\s]?\\d{4}[-\\s]?\\d{2}[-\\s]?\\d{10}\\b",
100
+ ["cuenta", "ccc", "banco", "sucursal", "entidad", "codigo cuenta cliente"], 0.90, SensitiveCategory.FINANCIAL, "es_ccc", true, ["*", "es"]],
101
+
99
102
  ["SWIFT_BIC", "\\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\\b",
100
103
  ["swift", "bic", "bank code", "transfer"], 0.60, SensitiveCategory.FINANCIAL, null],
101
104
 
102
105
  // ── CONTACT ────────────────────────────────────────────────────────
103
106
  ["EMAIL_ADDR", "\\b[A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z]{2,}\\b",
104
- ["email", "mail", "contact", "address"], 0.99, SensitiveCategory.CONTACT, null],
107
+ ["email", "mail", "contact", "address", "correo", "electronico"], 0.99, SensitiveCategory.CONTACT, null],
105
108
 
106
- ["PHONE_NUM", /(?<!\d)(?:\+?[1-9]\d{0,3}[-.\s]?)?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}(?!\d)/,
107
- ["phone", "call", "mobile", "tel", "whatsapp", "number"], 0.80, SensitiveCategory.CONTACT, null],
109
+ ["PHONE_NUM", /(?<!\d)(?:\+?[1-9]\d{0,3}[-.\s]?)?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}(?!\d)/,
110
+ ["phone", "call", "mobile", "tel", "whatsapp", "number", "teléfono", "telefono", "movil", "celular", "llamada"], 0.80, SensitiveCategory.CONTACT, null],
108
111
 
109
112
  ["PHONE_NUM_INTL", /(?<!\d)\+(?:[1-9]\d{0,3})[-.\s]?\(?\d{1,5}\)?(?:[-.\s]?\d{2,4}){2,4}(?!\d)/,
110
113
  ["phone", "call", "mobile", "tel"], 0.80, SensitiveCategory.CONTACT, null],
@@ -119,8 +122,8 @@ const RAW_PATTERNS: RawEntry[] = [
119
122
  ["mac", "hardware", "network", "device"], 0.91, SensitiveCategory.CONTACT, null],
120
123
 
121
124
  // ── PERSONAL ───────────────────────────────────────────────────────
122
- ["BIRTH_DATE", "\\b(?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01])[/-](?:19|20)\\d{2}\\b",
123
- ["birth", "dob", "born", "birthday", "date of birth"], 0.88, SensitiveCategory.PERSONAL, null],
125
+ ["BIRTH_DATE", "\\b(?:(?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01])[/-](?:19|20)\\d{2}|(?:19|20)\\d{2}[/-](?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01]))\\b",
126
+ ["birth", "dob", "born", "birthday", "date of birth", "nacimiento", "fecha", "cumpleaños"], 0.88, SensitiveCategory.PERSONAL, null],
124
127
 
125
128
  ["US_DRIVERS_LIC", "\\b(?:[A-Z]\\d{7,12}|\\d{7,12}[A-Z]?)\\b",
126
129
  ["driver", "license", "licence", "dl", "dmv"], 0.55, SensitiveCategory.PERSONAL, null],
@@ -162,6 +165,9 @@ const RAW_PATTERNS: RawEntry[] = [
162
165
  ["ES_DNI", "(?:\\d{8}[A-Z]|[XYZ]\\d{7}[A-Z])",
163
166
  ["dni", "nie", "identidad", "nif", "spain"], 0.94, SensitiveCategory.IDENTITY_INTL, "es_id", true, ["*", "es"]],
164
167
 
168
+ ["ES_NUSS", "\\b\\d{2}[-\\s]?\\d{8}[-\\s]?\\d{2}\\b",
169
+ ["seguridad social", "nuss", "naf", "afiliacion"], 0.90, SensitiveCategory.IDENTITY_INTL, "es_nuss", true, ["*", "es"]],
170
+
165
171
  // ── CORPORATE ──────────────────────────────────────────────────────
166
172
  ["CORP_EMPLOYEE_ID", "(?:EMP|EMPLOYEE|ID)[:\\s]?[A-Z0-9]{5,10}",
167
173
  ["employee", "staff", "personnel", "worker"], 0.55, SensitiveCategory.CORPORATE, null],
@@ -27,31 +27,12 @@ const _dlpPatternRegistry = new DLPPatternRegistry();
27
27
  const _dlpValidationEngine = new DLPValidationEngine();
28
28
  const _dlpConfidenceScorer = new DLPConfidenceScorer();
29
29
 
30
- /** Regex patterns for Tier 1 deterministic detection */
31
- export const REGEX_PATTERNS: Record<string, RegExp> = {
32
- "EMAIL_ADDRESS": /[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+/g,
33
- "PHONE_NUMBER": /(?<!\d)(?:\+?1?[\s\-.]?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}|\d{3}[\s\-.]?\d{4})(?!\d)/g,
34
- "PHONE_NUMBER_INTL": /(?<!\d)\+(?:[1-9]\d{0,3})[-.\s]?\(?\d{1,5}\)?(?:[-.\s]?\d{2,4}){2,4}(?!\d)/g,
35
- "US_SSN": /(?<!\d)\d{3}-\d{2}-\d{4}(?!\d)/g,
36
- "CREDIT_CARD": /(?<!\d)(?:\d{4}[ \-]?){3}\d{4}(?!\d)/g,
37
- "US_ROUTING_NUMBER": /(?<!\d)\d{9}(?!\d)/g,
38
- "US_PASSPORT": /\b[A-Z]\d{8}\b/g,
39
- "DATE_OF_BIRTH": /\b(?:0[1-9]|1[0-2])\/(?:0[1-9]|[12]\d|3[01])\/(?:19|20)\d{2}\b|\b(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/g,
40
- };
41
-
42
- /** Keywords whose presence boosts detection aggressiveness */
43
- export const CONTEXT_KEYWORDS = new Set([
44
- "account number", "ssn", "phone", "credit card",
45
- "iban", "bank", "email", "pii", "personal info",
46
- ]);
47
-
48
30
  export class BaseScanner {
49
31
  protected _supportedEntities: string[];
50
32
 
51
33
  constructor() {
52
34
  this._supportedEntities = [
53
- "EMAIL_ADDRESS", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD",
54
- "US_BANK_NUMBER", "CRYPTO", "IBAN_CODE", "IP_ADDRESS", "PERSON",
35
+ "PERSON", "LOCATION", "ORGANIZATION",
55
36
  ];
56
37
  }
57
38
 
@@ -190,32 +171,17 @@ export class BaseScanner {
190
171
  return [reconstruct(text, resolved), entities];
191
172
  }
192
173
 
174
+ /** Tier 1 — Deterministic detection (Legacy: Redirected to DLP) */
193
175
  protected async _tier1CollectSpans(
194
176
  text: string,
195
177
  boostEntities: Set<string>,
196
178
  aggressive: boolean,
197
179
  confidenceThreshold: number,
198
180
  ): Promise<Span[]> {
199
- const spans: Span[] = [];
200
- for (const [entityType, pattern] of Object.entries(REGEX_PATTERNS)) {
201
- const re = new RegExp(pattern.source, pattern.flags);
202
- let match: RegExpExecArray | null;
203
- while ((match = re.exec(text)) !== null) {
204
- const val = match[0];
205
- if (looksLikeToken(val)) continue;
206
- let confidence = (aggressive || boostEntities.has(entityType.toLowerCase().replace(/_/g, ' '))) ? 1.0 : 0.95;
207
- if (entityType === 'CREDIT_CARD' && BaseScanner._luhnChecksum(val)) confidence = Math.max(confidence, 0.99);
208
- if (entityType === 'US_ROUTING_NUMBER' && !BaseScanner._abaChecksum(val)) continue;
209
- if (confidence >= confidenceThreshold) {
210
- spans.push({ start: match.index, end: match.index + val.length,
211
- entityType, originalValue: val, confidence, method: 'regex' });
212
- }
213
- }
214
- }
215
- return spans;
181
+ return this._tier0CollectSpans(text, confidenceThreshold);
216
182
  }
217
183
 
218
- /** Backward-compat wrapper. */
184
+ /** Backward-compat wrapper. Redirected to DLP. */
219
185
  protected async _tier1Regex(
220
186
  text: string,
221
187
  encodeFn: (val: string, options?: any) => Promise<string>,
@@ -223,15 +189,7 @@ export class BaseScanner {
223
189
  aggressive: boolean,
224
190
  confidenceThreshold: number,
225
191
  ): Promise<[string, any[]]> {
226
- const spans = await this._tier1CollectSpans(text, boostEntities, aggressive, confidenceThreshold);
227
- const resolved = resolveOverlaps(spans);
228
- const entities: any[] = [];
229
- await Promise.all(resolved.map(async (span) => {
230
- span.maskedValue = await encodeFn(span.originalValue, { entityType: span.entityType });
231
- entities.push({ type: span.entityType, value: span.originalValue,
232
- method: span.method, confidence: span.confidence, masked_value: span.maskedValue });
233
- }));
234
- return [reconstruct(text, resolved), entities];
192
+ return this._tier0Dlp(text, encodeFn, confidenceThreshold);
235
193
  }
236
194
 
237
195
  protected async _tier2Nlp(
@@ -248,8 +206,14 @@ export class BaseScanner {
248
206
  if (!context) return new Set();
249
207
  const lowered = context.toLowerCase();
250
208
  const boosted = new Set<string>();
251
- for (const kw of CONTEXT_KEYWORDS) {
252
- if (lowered.includes(kw)) boosted.add(kw);
209
+ // Scan registry descriptors to see if any proximity terms match the context
210
+ for (const [, desc] of _dlpPatternRegistry.iterDescriptors()) {
211
+ for (const term of desc.proximityTerms) {
212
+ if (lowered.includes(term)) {
213
+ boosted.add(desc.category.toLowerCase());
214
+ break;
215
+ }
216
+ }
253
217
  }
254
218
  return boosted;
255
219
  }
@@ -266,7 +230,7 @@ export class BaseScanner {
266
230
  ): Promise<string> {
267
231
  if (!text || typeof text !== 'string') return text;
268
232
 
269
- const pipeline = options.pipeline || ['dlp', 'regex', 'checksum', 'nlp'];
233
+ const pipeline = options.pipeline || ['dlp', 'nlp'];
270
234
  const _encode = options.encodeFn || encode;
271
235
  const confidenceThreshold = options.confidenceThreshold ?? 0.7;
272
236
  const boost = this._resolveBoost(options.context);
@@ -274,12 +238,9 @@ export class BaseScanner {
274
238
  // ── Span-accumulation phase (no string mutation) ─────────────────────
275
239
  const allSpans: Span[] = [];
276
240
 
277
- if (pipeline.includes('dlp')) {
241
+ if (pipeline.includes('dlp') || pipeline.includes('regex') || pipeline.includes('checksum')) {
278
242
  allSpans.push(...await this._tier0CollectSpans(text, confidenceThreshold));
279
243
  }
280
- if (pipeline.includes('regex') || pipeline.includes('checksum')) {
281
- allSpans.push(...await this._tier1CollectSpans(text, boost, !!options.aggressive, confidenceThreshold));
282
- }
283
244
 
284
245
  // ── Single-pass resolve + reconstruct ────────────────────────────────
285
246
  const resolved = resolveOverlaps(allSpans);
@@ -308,7 +269,7 @@ export class BaseScanner {
308
269
  ): Promise<any[]> {
309
270
  if (!text || typeof text !== 'string') return [];
310
271
 
311
- const pipeline = options.pipeline || ['dlp', 'regex', 'checksum', 'nlp'];
272
+ const pipeline = options.pipeline || ['dlp', 'nlp'];
312
273
  const _encode = options.encodeFn || encode;
313
274
  const confidenceThreshold = options.confidenceThreshold ?? 0.7;
314
275
  const boost = this._resolveBoost(options.context);
@@ -316,12 +277,9 @@ export class BaseScanner {
316
277
 
317
278
  // ── Span-accumulation phase ──────────────────────────────────────────
318
279
  const allSpans: Span[] = [];
319
- if (pipeline.includes('dlp')) {
280
+ if (pipeline.includes('dlp') || pipeline.includes('regex') || pipeline.includes('checksum')) {
320
281
  allSpans.push(...await this._tier0CollectSpans(text, confidenceThreshold));
321
282
  }
322
- if (pipeline.includes('regex') || pipeline.includes('checksum')) {
323
- allSpans.push(...await this._tier1CollectSpans(text, boost, !!options.aggressive, confidenceThreshold));
324
- }
325
283
 
326
284
  const resolved = resolveOverlaps(allSpans);
327
285
  await Promise.all(resolved.map(async (span) => {
@@ -129,6 +129,8 @@ export class LocalTransformersScanner extends BaseScanner {
129
129
  const val = text.slice(start, end);
130
130
 
131
131
  const entityType = this._mapEntityType(r.entity);
132
+ if (!this._supportedEntities.includes(entityType)) continue;
133
+
132
134
  let confidence = r.score || 0.7;
133
135
 
134
136
  if (aggressive || boostEntities.has(entityType.toLowerCase().replace(/_/g, " "))) {
package/src/index.ts CHANGED
@@ -6,7 +6,7 @@
6
6
  * and framework-agnostic tool interception hooks.
7
7
  */
8
8
 
9
- export const VERSION = "2.0.0";
9
+ export const VERSION = "3.4.0";
10
10
 
11
11
  export {
12
12
  getVault,
@@ -1,8 +1,16 @@
1
1
  import { describe, test, expect } from '@jest/globals';
2
- import { REGEX_PATTERNS, PresidioScanner } from '../src/core/scanner';
2
+ import { DLPPatternRegistry } from '../src/core/dlp/registry';
3
+ import { checkAbaRouting } from '../src/core/dlp/handlers';
4
+
5
+ const registry = new DLPPatternRegistry();
6
+ const getPattern = (name: string) => {
7
+ const desc = registry.descriptorFor(name);
8
+ if (!desc) throw new Error(`Pattern ${name} not found in registry`);
9
+ return new RegExp(desc.compiledRe.source, 'g');
10
+ };
3
11
 
4
12
  describe('TestInternationalPhonePatterns', () => {
5
- const pattern = new RegExp(REGEX_PATTERNS["PHONE_NUMBER_INTL"].source, 'g');
13
+ const pattern = getPattern("PHONE_NUM_INTL");
6
14
 
7
15
  test.each([
8
16
  "+44 20 7946 0958",
@@ -27,7 +35,7 @@ describe('TestInternationalPhonePatterns', () => {
27
35
  });
28
36
 
29
37
  describe('TestUSRoutingNumber', () => {
30
- const pattern = new RegExp(REGEX_PATTERNS["US_ROUTING_NUMBER"].source, 'g');
38
+ const pattern = getPattern("US_ABA_ROUTING");
31
39
 
32
40
  test('test_regex_matches_9_digit_number', () => {
33
41
  pattern.lastIndex = 0;
@@ -40,23 +48,20 @@ describe('TestUSRoutingNumber', () => {
40
48
  });
41
49
 
42
50
  test('test_aba_checksum_valid', () => {
43
- // @ts-ignore - accessing protected static for test
44
- expect(PresidioScanner._abaChecksum("021000021")).toBe(true);
51
+ expect(checkAbaRouting("021000021")).toBe(true);
45
52
  });
46
53
 
47
54
  test('test_aba_checksum_invalid', () => {
48
- // @ts-ignore
49
- expect(PresidioScanner._abaChecksum("123456789")).toBe(false);
55
+ expect(checkAbaRouting("123456789")).toBe(false);
50
56
  });
51
57
 
52
58
  test('test_aba_checksum_wrong_length', () => {
53
- // @ts-ignore
54
- expect(PresidioScanner._abaChecksum("12345")).toBe(false);
59
+ expect(checkAbaRouting("12345")).toBe(false);
55
60
  });
56
61
  });
57
62
 
58
63
  describe('TestUSPassport', () => {
59
- const pattern = new RegExp(REGEX_PATTERNS["US_PASSPORT"].source, 'g');
64
+ const pattern = getPattern("US_PASSPORT_NUM");
60
65
 
61
66
  test.each([
62
67
  "C12345678",
@@ -79,7 +84,7 @@ describe('TestUSPassport', () => {
79
84
  });
80
85
 
81
86
  describe('TestDateOfBirth', () => {
82
- const pattern = new RegExp(REGEX_PATTERNS["DATE_OF_BIRTH"].source, 'g');
87
+ const pattern = getPattern("BIRTH_DATE");
83
88
 
84
89
  test.each([
85
90
  "01/15/1990",
@@ -1,4 +1,4 @@
1
- import { CryptoEngine } from './src/core/crypto';
1
+ import { CryptoEngine } from '../src/core/crypto';
2
2
  import * as process from 'process';
3
3
 
4
4
  async function test() {
package/tsconfig.json CHANGED
@@ -17,6 +17,6 @@
17
17
  "types": ["node", "jest"],
18
18
  "typeRoots": ["./node_modules/@types"]
19
19
  },
20
- "include": ["src/**/*"],
21
- "exclude": ["node_modules", "tests", "dist"]
20
+ "include": ["src/**/*", "tests/**/*"],
21
+ "exclude": ["node_modules", "dist"]
22
22
  }