mask-privacy 3.2.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -16
- package/dist/index.d.mts +3 -2
- package/dist/index.d.ts +3 -2
- package/dist/index.js +85 -91
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +85 -91
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
- package/src/core/dlp/handlers.ts +48 -0
- package/src/core/dlp/registry.ts +13 -7
- package/src/core/scanner.ts +17 -59
- package/src/core/transformers_scanner.ts +2 -0
- package/src/index.ts +1 -1
- package/tests/scanner.test.ts +16 -11
- package/tests/test_cross.ts +1 -1
- package/tsconfig.json +2 -2
package/dist/index.mjs
CHANGED
|
@@ -43555,7 +43555,7 @@ var init_registry = __esm({
|
|
|
43555
43555
|
[
|
|
43556
43556
|
"CREDIT_CARD_NUMBER",
|
|
43557
43557
|
"\\b(?:4\\d{3}|5[1-5]\\d{2}|3[47]\\d{2}|6(?:011|5\\d{2}))[- ]?\\d{4}[- ]?\\d{4}[- ]?\\d{4}\\b",
|
|
43558
|
-
["card", "credit", "visa", "mastercard", "amex", "payment"],
|
|
43558
|
+
["card", "credit", "visa", "mastercard", "amex", "payment", "tarjeta", "credito", "debito", "pago"],
|
|
43559
43559
|
0.97,
|
|
43560
43560
|
"FINANCIAL" /* FINANCIAL */,
|
|
43561
43561
|
"luhn"
|
|
@@ -43563,7 +43563,7 @@ var init_registry = __esm({
|
|
|
43563
43563
|
[
|
|
43564
43564
|
"INTL_BANK_IBAN",
|
|
43565
43565
|
"\\b[A-Z]{2}\\d{2}[A-Z0-9]{4}\\d{7}[A-Z0-9]{0,16}\\b",
|
|
43566
|
-
["iban", "swift", "sepa", "wire", "bank transfer"],
|
|
43566
|
+
["iban", "swift", "sepa", "wire", "bank transfer", "cuenta", "banco", "transferencia"],
|
|
43567
43567
|
0.96,
|
|
43568
43568
|
"FINANCIAL" /* FINANCIAL */,
|
|
43569
43569
|
"iban"
|
|
@@ -43600,6 +43600,16 @@ var init_registry = __esm({
|
|
|
43600
43600
|
"FINANCIAL" /* FINANCIAL */,
|
|
43601
43601
|
"luhn_soft"
|
|
43602
43602
|
],
|
|
43603
|
+
[
|
|
43604
|
+
"ES_CCC",
|
|
43605
|
+
"\\b\\d{4}[-\\s]?\\d{4}[-\\s]?\\d{2}[-\\s]?\\d{10}\\b",
|
|
43606
|
+
["cuenta", "ccc", "banco", "sucursal", "entidad", "codigo cuenta cliente"],
|
|
43607
|
+
0.9,
|
|
43608
|
+
"FINANCIAL" /* FINANCIAL */,
|
|
43609
|
+
"es_ccc",
|
|
43610
|
+
true,
|
|
43611
|
+
["*", "es"]
|
|
43612
|
+
],
|
|
43603
43613
|
[
|
|
43604
43614
|
"SWIFT_BIC",
|
|
43605
43615
|
"\\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\\b",
|
|
@@ -43612,15 +43622,15 @@ var init_registry = __esm({
|
|
|
43612
43622
|
[
|
|
43613
43623
|
"EMAIL_ADDR",
|
|
43614
43624
|
"\\b[A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z]{2,}\\b",
|
|
43615
|
-
["email", "mail", "contact", "address"],
|
|
43625
|
+
["email", "mail", "contact", "address", "correo", "electronico"],
|
|
43616
43626
|
0.99,
|
|
43617
43627
|
"CONTACT" /* CONTACT */,
|
|
43618
43628
|
null
|
|
43619
43629
|
],
|
|
43620
43630
|
[
|
|
43621
43631
|
"PHONE_NUM",
|
|
43622
|
-
/(?<!\d)(?:\+?[1-9]\d{0,3}[-.\s]?)?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}(?!\d)/,
|
|
43623
|
-
["phone", "call", "mobile", "tel", "whatsapp", "number"],
|
|
43632
|
+
/(?<!\d)(?:\+?[1-9]\d{0,3}[-.\s]?)?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}(?!\d)/,
|
|
43633
|
+
["phone", "call", "mobile", "tel", "whatsapp", "number", "tel\xE9fono", "telefono", "movil", "celular", "llamada"],
|
|
43624
43634
|
0.8,
|
|
43625
43635
|
"CONTACT" /* CONTACT */,
|
|
43626
43636
|
null
|
|
@@ -43660,8 +43670,8 @@ var init_registry = __esm({
|
|
|
43660
43670
|
// ── PERSONAL ───────────────────────────────────────────────────────
|
|
43661
43671
|
[
|
|
43662
43672
|
"BIRTH_DATE",
|
|
43663
|
-
"\\b(?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01])[/-](?:19|20)\\d{2}\\b",
|
|
43664
|
-
["birth", "dob", "born", "birthday", "date of birth"],
|
|
43673
|
+
"\\b(?:(?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01])[/-](?:19|20)\\d{2}|(?:19|20)\\d{2}[/-](?:0[1-9]|1[0-2])[/-](?:0[1-9]|[12]\\d|3[01]))\\b",
|
|
43674
|
+
["birth", "dob", "born", "birthday", "date of birth", "nacimiento", "fecha", "cumplea\xF1os"],
|
|
43665
43675
|
0.88,
|
|
43666
43676
|
"PERSONAL" /* PERSONAL */,
|
|
43667
43677
|
null
|
|
@@ -43768,6 +43778,16 @@ var init_registry = __esm({
|
|
|
43768
43778
|
true,
|
|
43769
43779
|
["*", "es"]
|
|
43770
43780
|
],
|
|
43781
|
+
[
|
|
43782
|
+
"ES_NUSS",
|
|
43783
|
+
"\\b\\d{2}[-\\s]?\\d{8}[-\\s]?\\d{2}\\b",
|
|
43784
|
+
["seguridad social", "nuss", "naf", "afiliacion"],
|
|
43785
|
+
0.9,
|
|
43786
|
+
"IDENTITY_INTL" /* IDENTITY_INTL */,
|
|
43787
|
+
"es_nuss",
|
|
43788
|
+
true,
|
|
43789
|
+
["*", "es"]
|
|
43790
|
+
],
|
|
43771
43791
|
// ── CORPORATE ──────────────────────────────────────────────────────
|
|
43772
43792
|
[
|
|
43773
43793
|
"CORP_EMPLOYEE_ID",
|
|
@@ -43998,6 +44018,38 @@ function checkEsId(raw) {
|
|
|
43998
44018
|
const validLetters = "TRWAGMYFPDXBNJZSQVHLCKE";
|
|
43999
44019
|
return cleaned[8] === validLetters[num % 23];
|
|
44000
44020
|
}
|
|
44021
|
+
function checkEsNuss(raw) {
|
|
44022
|
+
const digits = raw.replace(/\D/g, "");
|
|
44023
|
+
if (digits.length !== 12) return false;
|
|
44024
|
+
const a6 = parseInt(digits.slice(0, 2), 10);
|
|
44025
|
+
const b6 = parseInt(digits.slice(2, 10), 10);
|
|
44026
|
+
const c6 = parseInt(digits.slice(10), 10);
|
|
44027
|
+
let check;
|
|
44028
|
+
if (b6 < 1e7) {
|
|
44029
|
+
check = (a6 * 1e7 + b6) % 97;
|
|
44030
|
+
} else {
|
|
44031
|
+
check = Number(BigInt(digits.slice(0, 10)) % 97n);
|
|
44032
|
+
}
|
|
44033
|
+
return check === c6;
|
|
44034
|
+
}
|
|
44035
|
+
function checkEsCcc(raw) {
|
|
44036
|
+
const digits = raw.replace(/\D/g, "");
|
|
44037
|
+
if (digits.length !== 20) return false;
|
|
44038
|
+
const weights = [1, 2, 4, 8, 5, 10, 9, 7, 3, 6];
|
|
44039
|
+
const calcDigit = (block) => {
|
|
44040
|
+
let s6 = 0;
|
|
44041
|
+
for (let i6 = 0; i6 < block.length; i6++) {
|
|
44042
|
+
s6 += parseInt(block[i6], 10) * weights[i6];
|
|
44043
|
+
}
|
|
44044
|
+
let rem = 11 - s6 % 11;
|
|
44045
|
+
if (rem === 10) return 1;
|
|
44046
|
+
if (rem === 11) return 0;
|
|
44047
|
+
return rem;
|
|
44048
|
+
};
|
|
44049
|
+
const d1 = calcDigit("00" + digits.slice(0, 8));
|
|
44050
|
+
const d22 = calcDigit(digits.slice(10));
|
|
44051
|
+
return parseInt(digits[8], 10) === d1 && parseInt(digits[9], 10) === d22;
|
|
44052
|
+
}
|
|
44001
44053
|
var IBAN_COUNTRY_LENGTHS, VIN_TRANSLITERATION, VIN_WEIGHTS, UK_NINO_REGEX, VALIDATOR_DISPATCH, DLPValidationEngine;
|
|
44002
44054
|
var init_handlers = __esm({
|
|
44003
44055
|
"src/core/dlp/handlers.ts"() {
|
|
@@ -44104,6 +44156,7 @@ var init_handlers = __esm({
|
|
|
44104
44156
|
UK_NINO_REGEX = /^(?!BG|GB|NK|KN|TN|NT|ZZ)[A-CEGHJ-PR-TW-Z]{2}[0-9]{6}[A-D]$/;
|
|
44105
44157
|
VALIDATOR_DISPATCH = {
|
|
44106
44158
|
luhn: checkLuhn,
|
|
44159
|
+
luhn_soft: checkLuhn,
|
|
44107
44160
|
ssn_area: checkSsnArea,
|
|
44108
44161
|
iban: checkIbanStructure,
|
|
44109
44162
|
aba_check: checkAbaRouting,
|
|
@@ -44112,7 +44165,9 @@ var init_handlers = __esm({
|
|
|
44112
44165
|
ipv4: checkIpv4Octets,
|
|
44113
44166
|
ca_sin: checkCaSin,
|
|
44114
44167
|
uk_nino: checkUkNino,
|
|
44115
|
-
es_id: checkEsId
|
|
44168
|
+
es_id: checkEsId,
|
|
44169
|
+
es_nuss: checkEsNuss,
|
|
44170
|
+
es_ccc: checkEsCcc
|
|
44116
44171
|
};
|
|
44117
44172
|
DLPValidationEngine = class {
|
|
44118
44173
|
/**
|
|
@@ -58901,6 +58956,7 @@ var init_transformers_scanner = __esm({
|
|
|
58901
58956
|
const end = r6.end;
|
|
58902
58957
|
const val = text.slice(start, end);
|
|
58903
58958
|
const entityType = this._mapEntityType(r6.entity);
|
|
58959
|
+
if (!this._supportedEntities.includes(entityType)) continue;
|
|
58904
58960
|
let confidence = r6.score || 0.7;
|
|
58905
58961
|
if (aggressive || boostEntities.has(entityType.toLowerCase().replace(/_/g, " "))) {
|
|
58906
58962
|
confidence = Math.min(1, confidence + 0.2);
|
|
@@ -59003,7 +59059,7 @@ function getScanner() {
|
|
|
59003
59059
|
}
|
|
59004
59060
|
return scannerInstance;
|
|
59005
59061
|
}
|
|
59006
|
-
var _dlpLanguageResolver, _dlpPatternRegistry, _dlpValidationEngine, _dlpConfidenceScorer,
|
|
59062
|
+
var _dlpLanguageResolver, _dlpPatternRegistry, _dlpValidationEngine, _dlpConfidenceScorer, BaseScanner, PresidioScanner, scannerInstance;
|
|
59007
59063
|
var init_scanner = __esm({
|
|
59008
59064
|
"src/core/scanner.ts"() {
|
|
59009
59065
|
init_config();
|
|
@@ -59018,39 +59074,12 @@ var init_scanner = __esm({
|
|
|
59018
59074
|
_dlpPatternRegistry = new DLPPatternRegistry();
|
|
59019
59075
|
_dlpValidationEngine = new DLPValidationEngine();
|
|
59020
59076
|
_dlpConfidenceScorer = new DLPConfidenceScorer();
|
|
59021
|
-
|
|
59022
|
-
"EMAIL_ADDRESS": /[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+/g,
|
|
59023
|
-
"PHONE_NUMBER": /(?<!\d)(?:\+?1?[\s\-.]?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}|\d{3}[\s\-.]?\d{4})(?!\d)/g,
|
|
59024
|
-
"PHONE_NUMBER_INTL": /(?<!\d)\+(?:[1-9]\d{0,3})[-.\s]?\(?\d{1,5}\)?(?:[-.\s]?\d{2,4}){2,4}(?!\d)/g,
|
|
59025
|
-
"US_SSN": /(?<!\d)\d{3}-\d{2}-\d{4}(?!\d)/g,
|
|
59026
|
-
"CREDIT_CARD": /(?<!\d)(?:\d{4}[ \-]?){3}\d{4}(?!\d)/g,
|
|
59027
|
-
"US_ROUTING_NUMBER": /(?<!\d)\d{9}(?!\d)/g,
|
|
59028
|
-
"US_PASSPORT": /\b[A-Z]\d{8}\b/g,
|
|
59029
|
-
"DATE_OF_BIRTH": /\b(?:0[1-9]|1[0-2])\/(?:0[1-9]|[12]\d|3[01])\/(?:19|20)\d{2}\b|\b(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/g
|
|
59030
|
-
};
|
|
59031
|
-
CONTEXT_KEYWORDS = /* @__PURE__ */ new Set([
|
|
59032
|
-
"account number",
|
|
59033
|
-
"ssn",
|
|
59034
|
-
"phone",
|
|
59035
|
-
"credit card",
|
|
59036
|
-
"iban",
|
|
59037
|
-
"bank",
|
|
59038
|
-
"email",
|
|
59039
|
-
"pii",
|
|
59040
|
-
"personal info"
|
|
59041
|
-
]);
|
|
59042
|
-
BaseScanner = class _BaseScanner {
|
|
59077
|
+
BaseScanner = class {
|
|
59043
59078
|
constructor() {
|
|
59044
59079
|
this._supportedEntities = [
|
|
59045
|
-
"
|
|
59046
|
-
"
|
|
59047
|
-
"
|
|
59048
|
-
"CREDIT_CARD",
|
|
59049
|
-
"US_BANK_NUMBER",
|
|
59050
|
-
"CRYPTO",
|
|
59051
|
-
"IBAN_CODE",
|
|
59052
|
-
"IP_ADDRESS",
|
|
59053
|
-
"PERSON"
|
|
59080
|
+
"PERSON",
|
|
59081
|
+
"LOCATION",
|
|
59082
|
+
"ORGANIZATION"
|
|
59054
59083
|
];
|
|
59055
59084
|
}
|
|
59056
59085
|
setSupportedEntities(entities) {
|
|
@@ -59194,47 +59223,13 @@ var init_scanner = __esm({
|
|
|
59194
59223
|
}));
|
|
59195
59224
|
return [reconstruct(text, resolved), entities];
|
|
59196
59225
|
}
|
|
59226
|
+
/** Tier 1 — Deterministic detection (Legacy: Redirected to DLP) */
|
|
59197
59227
|
async _tier1CollectSpans(text, boostEntities, aggressive, confidenceThreshold) {
|
|
59198
|
-
|
|
59199
|
-
for (const [entityType, pattern] of Object.entries(REGEX_PATTERNS)) {
|
|
59200
|
-
const re = new RegExp(pattern.source, pattern.flags);
|
|
59201
|
-
let match;
|
|
59202
|
-
while ((match = re.exec(text)) !== null) {
|
|
59203
|
-
const val = match[0];
|
|
59204
|
-
if (looksLikeToken(val)) continue;
|
|
59205
|
-
let confidence = aggressive || boostEntities.has(entityType.toLowerCase().replace(/_/g, " ")) ? 1 : 0.95;
|
|
59206
|
-
if (entityType === "CREDIT_CARD" && _BaseScanner._luhnChecksum(val)) confidence = Math.max(confidence, 0.99);
|
|
59207
|
-
if (entityType === "US_ROUTING_NUMBER" && !_BaseScanner._abaChecksum(val)) continue;
|
|
59208
|
-
if (confidence >= confidenceThreshold) {
|
|
59209
|
-
spans.push({
|
|
59210
|
-
start: match.index,
|
|
59211
|
-
end: match.index + val.length,
|
|
59212
|
-
entityType,
|
|
59213
|
-
originalValue: val,
|
|
59214
|
-
confidence,
|
|
59215
|
-
method: "regex"
|
|
59216
|
-
});
|
|
59217
|
-
}
|
|
59218
|
-
}
|
|
59219
|
-
}
|
|
59220
|
-
return spans;
|
|
59228
|
+
return this._tier0CollectSpans(text, confidenceThreshold);
|
|
59221
59229
|
}
|
|
59222
|
-
/** Backward-compat wrapper. */
|
|
59230
|
+
/** Backward-compat wrapper. Redirected to DLP. */
|
|
59223
59231
|
async _tier1Regex(text, encodeFn, boostEntities, aggressive, confidenceThreshold) {
|
|
59224
|
-
|
|
59225
|
-
const resolved = resolveOverlaps(spans);
|
|
59226
|
-
const entities = [];
|
|
59227
|
-
await Promise.all(resolved.map(async (span) => {
|
|
59228
|
-
span.maskedValue = await encodeFn(span.originalValue, { entityType: span.entityType });
|
|
59229
|
-
entities.push({
|
|
59230
|
-
type: span.entityType,
|
|
59231
|
-
value: span.originalValue,
|
|
59232
|
-
method: span.method,
|
|
59233
|
-
confidence: span.confidence,
|
|
59234
|
-
masked_value: span.maskedValue
|
|
59235
|
-
});
|
|
59236
|
-
}));
|
|
59237
|
-
return [reconstruct(text, resolved), entities];
|
|
59232
|
+
return this._tier0Dlp(text, encodeFn, confidenceThreshold);
|
|
59238
59233
|
}
|
|
59239
59234
|
async _tier2Nlp(text, encodeFn, boostEntities, aggressive, confidenceThreshold) {
|
|
59240
59235
|
return [text, []];
|
|
@@ -59243,24 +59238,26 @@ var init_scanner = __esm({
|
|
|
59243
59238
|
if (!context) return /* @__PURE__ */ new Set();
|
|
59244
59239
|
const lowered = context.toLowerCase();
|
|
59245
59240
|
const boosted = /* @__PURE__ */ new Set();
|
|
59246
|
-
for (const
|
|
59247
|
-
|
|
59241
|
+
for (const [, desc] of _dlpPatternRegistry.iterDescriptors()) {
|
|
59242
|
+
for (const term of desc.proximityTerms) {
|
|
59243
|
+
if (lowered.includes(term)) {
|
|
59244
|
+
boosted.add(desc.category.toLowerCase());
|
|
59245
|
+
break;
|
|
59246
|
+
}
|
|
59247
|
+
}
|
|
59248
59248
|
}
|
|
59249
59249
|
return boosted;
|
|
59250
59250
|
}
|
|
59251
59251
|
async scanAndTokenize(text, options = {}) {
|
|
59252
59252
|
if (!text || typeof text !== "string") return text;
|
|
59253
|
-
const pipeline = options.pipeline || ["dlp", "
|
|
59253
|
+
const pipeline = options.pipeline || ["dlp", "nlp"];
|
|
59254
59254
|
const _encode = options.encodeFn || encode;
|
|
59255
59255
|
const confidenceThreshold = options.confidenceThreshold ?? 0.7;
|
|
59256
59256
|
const boost = this._resolveBoost(options.context);
|
|
59257
59257
|
const allSpans = [];
|
|
59258
|
-
if (pipeline.includes("dlp")) {
|
|
59258
|
+
if (pipeline.includes("dlp") || pipeline.includes("regex") || pipeline.includes("checksum")) {
|
|
59259
59259
|
allSpans.push(...await this._tier0CollectSpans(text, confidenceThreshold));
|
|
59260
59260
|
}
|
|
59261
|
-
if (pipeline.includes("regex") || pipeline.includes("checksum")) {
|
|
59262
|
-
allSpans.push(...await this._tier1CollectSpans(text, boost, !!options.aggressive, confidenceThreshold));
|
|
59263
|
-
}
|
|
59264
59261
|
const resolved = resolveOverlaps(allSpans);
|
|
59265
59262
|
await Promise.all(resolved.map(async (span) => {
|
|
59266
59263
|
span.maskedValue = await _encode(span.originalValue, { entityType: span.entityType });
|
|
@@ -59273,18 +59270,15 @@ var init_scanner = __esm({
|
|
|
59273
59270
|
}
|
|
59274
59271
|
async scanAndReturnEntities(text, options = {}) {
|
|
59275
59272
|
if (!text || typeof text !== "string") return [];
|
|
59276
|
-
const pipeline = options.pipeline || ["dlp", "
|
|
59273
|
+
const pipeline = options.pipeline || ["dlp", "nlp"];
|
|
59277
59274
|
const _encode = options.encodeFn || encode;
|
|
59278
59275
|
const confidenceThreshold = options.confidenceThreshold ?? 0.7;
|
|
59279
59276
|
const boost = this._resolveBoost(options.context);
|
|
59280
59277
|
const allEntities = [];
|
|
59281
59278
|
const allSpans = [];
|
|
59282
|
-
if (pipeline.includes("dlp")) {
|
|
59279
|
+
if (pipeline.includes("dlp") || pipeline.includes("regex") || pipeline.includes("checksum")) {
|
|
59283
59280
|
allSpans.push(...await this._tier0CollectSpans(text, confidenceThreshold));
|
|
59284
59281
|
}
|
|
59285
|
-
if (pipeline.includes("regex") || pipeline.includes("checksum")) {
|
|
59286
|
-
allSpans.push(...await this._tier1CollectSpans(text, boost, !!options.aggressive, confidenceThreshold));
|
|
59287
|
-
}
|
|
59288
59282
|
const resolved = resolveOverlaps(allSpans);
|
|
59289
59283
|
await Promise.all(resolved.map(async (span) => {
|
|
59290
59284
|
span.maskedValue = await _encode(span.originalValue, { entityType: span.entityType });
|
|
@@ -59600,7 +59594,7 @@ init_handlers();
|
|
|
59600
59594
|
init_scorer();
|
|
59601
59595
|
|
|
59602
59596
|
// src/index.ts
|
|
59603
|
-
var VERSION = "
|
|
59597
|
+
var VERSION = "3.4.0";
|
|
59604
59598
|
async function detectEntitiesWithConfidence(text, options = {}) {
|
|
59605
59599
|
const scanner = getScanner();
|
|
59606
59600
|
return await scanner.scanAndReturnEntities(text, options);
|