mask-privacy 3.0.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -17
- package/dist/index.d.mts +58 -27
- package/dist/index.d.ts +58 -27
- package/dist/index.js +394 -310
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +394 -310
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
- package/src/core/dlp/assessor.ts +3 -26
- package/src/core/dlp/handlers.ts +44 -31
- package/src/core/dlp/index.ts +0 -2
- package/src/core/dlp/registry.ts +119 -107
- package/src/core/dlp/scorer.ts +4 -4
- package/src/core/fpe.ts +85 -32
- package/src/core/fpe_utils.ts +20 -20
- package/src/core/scanner.ts +146 -151
- package/src/core/span.ts +76 -0
- package/src/core/transformers_scanner.ts +2 -2
- package/src/core/vault.ts +2 -1
- package/tests/async.test.ts +2 -2
- package/tests/dlp_hardened.test.ts +21 -0
- package/tests/fpe.test.ts +4 -4
- package/tests/hooks.test.ts +2 -2
- package/tests/langchain.test.ts +2 -2
- package/tests/llamaindex.test.ts +1 -1
- package/tests/scanner.test.ts +0 -1
- package/tests/substring.test.ts +1 -1
- package/tests/vault.test.ts +1 -1
package/README.md
CHANGED
|
@@ -87,12 +87,6 @@ Mask prevents the misidentification of real data as tokens by using universally
|
|
|
87
87
|
|
|
88
88
|
This prefix-based approach ensures that the SDK does not inadvertently process valid PII as an existing token.
|
|
89
89
|
|
|
90
|
-
Additional collision-proof prefixes for international identifiers:
|
|
91
|
-
* Turkish TCID tokens use the `990000` prefix (no valid Kimlik number starts with `99`).
|
|
92
|
-
* Saudi NID tokens use the `100000` prefix (length-constrained to avoid overlap with real IDs).
|
|
93
|
-
* UAE Emirates ID tokens use the `784-0000-` prefix (zeroed sub-fields are structurally invalid).
|
|
94
|
-
* IBAN tokens zero the check digits (`XX00...`), which always fails ISO 7064 Mod-97 verification.
|
|
95
|
-
|
|
96
90
|
### 4. Enterprise Async Support
|
|
97
91
|
Mask is built from the ground up for high-concurrency Node.js environments. All core operations are asynchronous and promised-based. Calling `encode()`, `decode()`, or `scanAndTokenize()` allows your event loop to remain unblocked while handling PII tokenization tasks.
|
|
98
92
|
|
|
@@ -135,7 +129,7 @@ Mask includes the ability to detokenize PII embedded within larger text blocks (
|
|
|
135
129
|
|
|
136
130
|
## Multilingual PII Detection (Waterfall Pipeline)
|
|
137
131
|
|
|
138
|
-
Mask is built for the global enterprise.
|
|
132
|
+
Mask is built for the global enterprise. The TypeScript SDK implements a **3-Tier Waterfall Detection** strategy for high-precision PII detection in **English and Spanish** using local ONNX models.
|
|
139
133
|
|
|
140
134
|
### Supported Language Matrix
|
|
141
135
|
|
|
@@ -145,12 +139,6 @@ Mask provides first-class support for the following languages:
|
|
|
145
139
|
| :--- | :--- | :--- | :--- |
|
|
146
140
|
| **English** | `en` | ✅ Full | DistilBERT (Simple) |
|
|
147
141
|
| **Spanish** | `es` | ✅ Full | BERT Multilingual |
|
|
148
|
-
| **French** | `fr` | ✅ Full | BERT Multilingual |
|
|
149
|
-
| **German** | `de` | ✅ Full | BERT Multilingual |
|
|
150
|
-
| **Turkish** | `tr` | ✅ Full | BERT Multilingual |
|
|
151
|
-
| **Arabic** | `ar` | ✅ Full | BERT Multilingual |
|
|
152
|
-
| **Japanese** | `ja` | ✅ Full | BERT Multilingual |
|
|
153
|
-
| **Chinese** | `zh` | ✅ Full | BERT Multilingual |
|
|
154
142
|
|
|
155
143
|
### How the Waterfall Works: The Excising Mechanism
|
|
156
144
|
|
|
@@ -165,11 +153,11 @@ To maintain high performance, the TypeScript SDK does not simply run three separ
|
|
|
165
153
|
|
|
166
154
|
### Configuration & Environment Variables
|
|
167
155
|
|
|
168
|
-
Configure your
|
|
156
|
+
Configure your language environment using standard variables.
|
|
169
157
|
|
|
170
158
|
| Variable | Default | Description |
|
|
171
159
|
| :--- | :--- | :--- |
|
|
172
|
-
| `MASK_LANGUAGES` | `en` | Comma-separated
|
|
160
|
+
| `MASK_LANGUAGES` | `en` | Comma-separated language codes. Supported: `en`, `es`. |
|
|
173
161
|
| `MASK_NLP_MODEL` | *(varies)* | Override the default model (e.g., `Xenova/bert-base-multilingual-cased-ner-hrl`). |
|
|
174
162
|
| `MASK_MODEL_CACHE_DIR` | `~/.cache` | Local directory for storing serialized ONNX models. |
|
|
175
163
|
| `MASK_NLP_MAX_WORKERS` | `4` | Number of worker processes/threads for NLP analysis. |
|
|
@@ -221,8 +209,8 @@ The TypeScript SDK manages AI models automatically via **Transformers.js**. For
|
|
|
221
209
|
```bash
|
|
222
210
|
npm install @huggingface/transformers # Required extra
|
|
223
211
|
|
|
224
|
-
# Pre-cache models for
|
|
225
|
-
export MASK_LANGUAGES="en,es
|
|
212
|
+
# Pre-cache models for English and Spanish
|
|
213
|
+
export MASK_LANGUAGES="en,es"
|
|
226
214
|
npx mask-privacy cache-models
|
|
227
215
|
```
|
|
228
216
|
|
package/dist/index.d.mts
CHANGED
|
@@ -14,6 +14,7 @@ type EncodeOptions = {
|
|
|
14
14
|
ttl?: number;
|
|
15
15
|
searchBuckets?: ('year' | 'month' | 'day' | 'numeric')[];
|
|
16
16
|
searchBucketSize?: number;
|
|
17
|
+
entityType?: string;
|
|
17
18
|
};
|
|
18
19
|
/**
|
|
19
20
|
* Tokenise rawText, encrypt it, store in vault, return the FPE token.
|
|
@@ -49,9 +50,40 @@ declare function looksLikeToken(value: string | any): boolean;
|
|
|
49
50
|
/** Clear the cached master key. Useful in tests. */
|
|
50
51
|
declare function resetMasterKey(): void;
|
|
51
52
|
/**
|
|
52
|
-
* Return a **deterministic**, format-preserving token for rawText.
|
|
53
|
+
* Return a **deterministic**, format-preserving token for rawText using its entityType.
|
|
54
|
+
*/
|
|
55
|
+
declare function generateFPEToken(rawText: string, entityType?: string): Promise<string>;
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Span Resolution Engine — Sweep-Line Overlap Resolver (TypeScript).
|
|
59
|
+
*
|
|
60
|
+
* All detection tiers now return Span objects instead of mutating the text.
|
|
61
|
+
* resolveOverlaps() chooses the winning span in every conflicting region,
|
|
62
|
+
* and reconstruct() rebuilds the string exactly once.
|
|
63
|
+
*/
|
|
64
|
+
interface Span {
|
|
65
|
+
start: number;
|
|
66
|
+
end: number;
|
|
67
|
+
entityType: string;
|
|
68
|
+
originalValue: string;
|
|
69
|
+
confidence: number;
|
|
70
|
+
method: string;
|
|
71
|
+
language?: string;
|
|
72
|
+
maskedValue?: string;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Entity Detection Scanner — Tiered Waterfall Pipeline.
|
|
77
|
+
*
|
|
78
|
+
* Scans unstructured text to identify PII (Emails, Phones, SSNs, Credit Cards,
|
|
79
|
+
* Names) and replaces them in-place with Format-Preserving Encryption (FPE)
|
|
80
|
+
* tokens.
|
|
81
|
+
*
|
|
82
|
+
* Detection Architecture (Waterfall):
|
|
83
|
+
* Tier 0 — DLP Heuristic: Multilingual, 50+ types, checksum validators
|
|
84
|
+
* Tier 1 — Deterministic: Regex + Checksum (fast, provable, auditable)
|
|
85
|
+
* Tier 2 — Probabilistic: Local NLP via Transformers (catches names/orgs)
|
|
53
86
|
*/
|
|
54
|
-
declare function generateFPEToken(rawText: string): Promise<string>;
|
|
55
87
|
|
|
56
88
|
declare class BaseScanner {
|
|
57
89
|
protected _supportedEntities: string[];
|
|
@@ -61,19 +93,23 @@ declare class BaseScanner {
|
|
|
61
93
|
protected static _luhnChecksum(ccNumber: string): boolean;
|
|
62
94
|
/** Validate a US ABA routing number using the checksum algorithm. */
|
|
63
95
|
protected static _abaChecksum(routingNumber: string): boolean;
|
|
64
|
-
protected
|
|
65
|
-
|
|
66
|
-
protected
|
|
96
|
+
protected _tier0CollectSpans(text: string, confidenceThreshold: number): Promise<Span[]>;
|
|
97
|
+
/** Backward-compat wrapper — collects spans then single-pass encodes. */
|
|
98
|
+
protected _tier0Dlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
99
|
+
protected _tier1CollectSpans(text: string, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<Span[]>;
|
|
100
|
+
/** Backward-compat wrapper. */
|
|
101
|
+
protected _tier1Regex(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
102
|
+
protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
67
103
|
protected _resolveBoost(context?: string | null): Set<string>;
|
|
68
104
|
scanAndTokenize(text: string, options?: {
|
|
69
|
-
encodeFn?: (val: string) => Promise<string>;
|
|
105
|
+
encodeFn?: (val: string, options?: any) => Promise<string>;
|
|
70
106
|
pipeline?: string[];
|
|
71
107
|
confidenceThreshold?: number;
|
|
72
108
|
context?: string | null;
|
|
73
109
|
aggressive?: boolean;
|
|
74
110
|
}): Promise<string>;
|
|
75
111
|
scanAndReturnEntities(text: string, options?: {
|
|
76
|
-
encodeFn?: (val: string) => Promise<string>;
|
|
112
|
+
encodeFn?: (val: string, options?: any) => Promise<string>;
|
|
77
113
|
pipeline?: string[];
|
|
78
114
|
confidenceThreshold?: number;
|
|
79
115
|
context?: string | null;
|
|
@@ -107,7 +143,7 @@ declare class LocalTransformersScanner extends BaseScanner {
|
|
|
107
143
|
* Map Transformer entity types to Mask internal entity types.
|
|
108
144
|
*/
|
|
109
145
|
private _mapEntityType;
|
|
110
|
-
protected _tier2Nlp(text: string, encodeFn: (val: string) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
146
|
+
protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
111
147
|
/**
|
|
112
148
|
* Merges sub-tokens and entities of the same type while precisely tracking
|
|
113
149
|
* offsets in the original text.
|
|
@@ -276,14 +312,8 @@ declare class MaskClient {
|
|
|
276
312
|
* Supported language tags:
|
|
277
313
|
* en — English (default / Latin-only fallback)
|
|
278
314
|
* es — Spanish
|
|
279
|
-
* fr — French
|
|
280
|
-
* de — German
|
|
281
|
-
* tr — Turkish
|
|
282
|
-
* ar — Arabic
|
|
283
|
-
* zh — Chinese
|
|
284
|
-
* ja — Japanese
|
|
285
315
|
*/
|
|
286
|
-
type LanguageTag = "en" | "es"
|
|
316
|
+
type LanguageTag = "en" | "es";
|
|
287
317
|
interface LanguageBreakdown {
|
|
288
318
|
language: LanguageTag;
|
|
289
319
|
breakdown: Record<string, number>;
|
|
@@ -296,8 +326,8 @@ interface LanguageBreakdown {
|
|
|
296
326
|
* @example
|
|
297
327
|
* ```ts
|
|
298
328
|
* const resolver = new LanguageContextResolver();
|
|
299
|
-
* const tag = resolver.resolve("
|
|
300
|
-
* // tag === "
|
|
329
|
+
* const tag = resolver.resolve("Hola, mi DNI es 12345678Z");
|
|
330
|
+
* // tag === "es"
|
|
301
331
|
* ```
|
|
302
332
|
*/
|
|
303
333
|
declare class LanguageContextResolver {
|
|
@@ -338,27 +368,28 @@ interface PatternDescriptor {
|
|
|
338
368
|
baseRisk: number;
|
|
339
369
|
category: SensitiveCategory;
|
|
340
370
|
validatorTag: string | null;
|
|
371
|
+
isHighEntropy: boolean;
|
|
372
|
+
supportedLocales: string[];
|
|
341
373
|
}
|
|
342
374
|
/**
|
|
343
375
|
* Immutable catalogue of sensitive-data regex signatures.
|
|
344
|
-
*
|
|
345
|
-
* @example
|
|
346
|
-
* ```ts
|
|
347
|
-
* const reg = new DLPPatternRegistry(); // load everything
|
|
348
|
-
* const reg = new DLPPatternRegistry(new Set([SensitiveCategory.FINANCIAL]));
|
|
349
|
-
* ```
|
|
350
376
|
*/
|
|
351
377
|
declare class DLPPatternRegistry {
|
|
352
378
|
private readonly catalogue;
|
|
379
|
+
private readonly localeCategoryRegexMap;
|
|
353
380
|
constructor(loadGroups?: ReadonlySet<SensitiveCategory>);
|
|
354
381
|
get typeNames(): string[];
|
|
355
382
|
/** Yield [typeName, descriptor] pairs. */
|
|
356
383
|
iterDescriptors(): IterableIterator<[string, PatternDescriptor]>;
|
|
357
384
|
descriptorFor(typeName: string): PatternDescriptor | undefined;
|
|
358
|
-
/** Return locale-tuned name regexes, falling back to English. */
|
|
359
385
|
namePatternsFor(lang: LanguageTag | string): RegExp[];
|
|
360
|
-
/** Return locale-tuned address regexes, falling back to English. */
|
|
361
386
|
addressPatternsFor(lang: LanguageTag | string): RegExp[];
|
|
387
|
+
getCategoryRegexesMap(locale?: string): Map<string, {
|
|
388
|
+
re: RegExp;
|
|
389
|
+
typeOrder: string[];
|
|
390
|
+
}>;
|
|
391
|
+
getCategoryTypeMap(categoryName: string, locale?: string): string[];
|
|
392
|
+
private compileForLocale;
|
|
362
393
|
private buildCatalogue;
|
|
363
394
|
}
|
|
364
395
|
|
|
@@ -420,8 +451,8 @@ interface ScoreInput {
|
|
|
420
451
|
* baseRisk: 0.92,
|
|
421
452
|
* matchStart: 10,
|
|
422
453
|
* matchEnd: 21,
|
|
423
|
-
* fullText: "
|
|
424
|
-
* proximityTerms: new Set(["
|
|
454
|
+
* fullText: "Mi número de DNI es 12345678Z",
|
|
455
|
+
* proximityTerms: new Set(["dni", "número"]),
|
|
425
456
|
* validatorPassed: true,
|
|
426
457
|
* });
|
|
427
458
|
* // score === 0.99 (validator override)
|
package/dist/index.d.ts
CHANGED
|
@@ -14,6 +14,7 @@ type EncodeOptions = {
|
|
|
14
14
|
ttl?: number;
|
|
15
15
|
searchBuckets?: ('year' | 'month' | 'day' | 'numeric')[];
|
|
16
16
|
searchBucketSize?: number;
|
|
17
|
+
entityType?: string;
|
|
17
18
|
};
|
|
18
19
|
/**
|
|
19
20
|
* Tokenise rawText, encrypt it, store in vault, return the FPE token.
|
|
@@ -49,9 +50,40 @@ declare function looksLikeToken(value: string | any): boolean;
|
|
|
49
50
|
/** Clear the cached master key. Useful in tests. */
|
|
50
51
|
declare function resetMasterKey(): void;
|
|
51
52
|
/**
|
|
52
|
-
* Return a **deterministic**, format-preserving token for rawText.
|
|
53
|
+
* Return a **deterministic**, format-preserving token for rawText using its entityType.
|
|
54
|
+
*/
|
|
55
|
+
declare function generateFPEToken(rawText: string, entityType?: string): Promise<string>;
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Span Resolution Engine — Sweep-Line Overlap Resolver (TypeScript).
|
|
59
|
+
*
|
|
60
|
+
* All detection tiers now return Span objects instead of mutating the text.
|
|
61
|
+
* resolveOverlaps() chooses the winning span in every conflicting region,
|
|
62
|
+
* and reconstruct() rebuilds the string exactly once.
|
|
63
|
+
*/
|
|
64
|
+
interface Span {
|
|
65
|
+
start: number;
|
|
66
|
+
end: number;
|
|
67
|
+
entityType: string;
|
|
68
|
+
originalValue: string;
|
|
69
|
+
confidence: number;
|
|
70
|
+
method: string;
|
|
71
|
+
language?: string;
|
|
72
|
+
maskedValue?: string;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Entity Detection Scanner — Tiered Waterfall Pipeline.
|
|
77
|
+
*
|
|
78
|
+
* Scans unstructured text to identify PII (Emails, Phones, SSNs, Credit Cards,
|
|
79
|
+
* Names) and replaces them in-place with Format-Preserving Encryption (FPE)
|
|
80
|
+
* tokens.
|
|
81
|
+
*
|
|
82
|
+
* Detection Architecture (Waterfall):
|
|
83
|
+
* Tier 0 — DLP Heuristic: Multilingual, 50+ types, checksum validators
|
|
84
|
+
* Tier 1 — Deterministic: Regex + Checksum (fast, provable, auditable)
|
|
85
|
+
* Tier 2 — Probabilistic: Local NLP via Transformers (catches names/orgs)
|
|
53
86
|
*/
|
|
54
|
-
declare function generateFPEToken(rawText: string): Promise<string>;
|
|
55
87
|
|
|
56
88
|
declare class BaseScanner {
|
|
57
89
|
protected _supportedEntities: string[];
|
|
@@ -61,19 +93,23 @@ declare class BaseScanner {
|
|
|
61
93
|
protected static _luhnChecksum(ccNumber: string): boolean;
|
|
62
94
|
/** Validate a US ABA routing number using the checksum algorithm. */
|
|
63
95
|
protected static _abaChecksum(routingNumber: string): boolean;
|
|
64
|
-
protected
|
|
65
|
-
|
|
66
|
-
protected
|
|
96
|
+
protected _tier0CollectSpans(text: string, confidenceThreshold: number): Promise<Span[]>;
|
|
97
|
+
/** Backward-compat wrapper — collects spans then single-pass encodes. */
|
|
98
|
+
protected _tier0Dlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
99
|
+
protected _tier1CollectSpans(text: string, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<Span[]>;
|
|
100
|
+
/** Backward-compat wrapper. */
|
|
101
|
+
protected _tier1Regex(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
102
|
+
protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
67
103
|
protected _resolveBoost(context?: string | null): Set<string>;
|
|
68
104
|
scanAndTokenize(text: string, options?: {
|
|
69
|
-
encodeFn?: (val: string) => Promise<string>;
|
|
105
|
+
encodeFn?: (val: string, options?: any) => Promise<string>;
|
|
70
106
|
pipeline?: string[];
|
|
71
107
|
confidenceThreshold?: number;
|
|
72
108
|
context?: string | null;
|
|
73
109
|
aggressive?: boolean;
|
|
74
110
|
}): Promise<string>;
|
|
75
111
|
scanAndReturnEntities(text: string, options?: {
|
|
76
|
-
encodeFn?: (val: string) => Promise<string>;
|
|
112
|
+
encodeFn?: (val: string, options?: any) => Promise<string>;
|
|
77
113
|
pipeline?: string[];
|
|
78
114
|
confidenceThreshold?: number;
|
|
79
115
|
context?: string | null;
|
|
@@ -107,7 +143,7 @@ declare class LocalTransformersScanner extends BaseScanner {
|
|
|
107
143
|
* Map Transformer entity types to Mask internal entity types.
|
|
108
144
|
*/
|
|
109
145
|
private _mapEntityType;
|
|
110
|
-
protected _tier2Nlp(text: string, encodeFn: (val: string) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
146
|
+
protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
111
147
|
/**
|
|
112
148
|
* Merges sub-tokens and entities of the same type while precisely tracking
|
|
113
149
|
* offsets in the original text.
|
|
@@ -276,14 +312,8 @@ declare class MaskClient {
|
|
|
276
312
|
* Supported language tags:
|
|
277
313
|
* en — English (default / Latin-only fallback)
|
|
278
314
|
* es — Spanish
|
|
279
|
-
* fr — French
|
|
280
|
-
* de — German
|
|
281
|
-
* tr — Turkish
|
|
282
|
-
* ar — Arabic
|
|
283
|
-
* zh — Chinese
|
|
284
|
-
* ja — Japanese
|
|
285
315
|
*/
|
|
286
|
-
type LanguageTag = "en" | "es"
|
|
316
|
+
type LanguageTag = "en" | "es";
|
|
287
317
|
interface LanguageBreakdown {
|
|
288
318
|
language: LanguageTag;
|
|
289
319
|
breakdown: Record<string, number>;
|
|
@@ -296,8 +326,8 @@ interface LanguageBreakdown {
|
|
|
296
326
|
* @example
|
|
297
327
|
* ```ts
|
|
298
328
|
* const resolver = new LanguageContextResolver();
|
|
299
|
-
* const tag = resolver.resolve("
|
|
300
|
-
* // tag === "
|
|
329
|
+
* const tag = resolver.resolve("Hola, mi DNI es 12345678Z");
|
|
330
|
+
* // tag === "es"
|
|
301
331
|
* ```
|
|
302
332
|
*/
|
|
303
333
|
declare class LanguageContextResolver {
|
|
@@ -338,27 +368,28 @@ interface PatternDescriptor {
|
|
|
338
368
|
baseRisk: number;
|
|
339
369
|
category: SensitiveCategory;
|
|
340
370
|
validatorTag: string | null;
|
|
371
|
+
isHighEntropy: boolean;
|
|
372
|
+
supportedLocales: string[];
|
|
341
373
|
}
|
|
342
374
|
/**
|
|
343
375
|
* Immutable catalogue of sensitive-data regex signatures.
|
|
344
|
-
*
|
|
345
|
-
* @example
|
|
346
|
-
* ```ts
|
|
347
|
-
* const reg = new DLPPatternRegistry(); // load everything
|
|
348
|
-
* const reg = new DLPPatternRegistry(new Set([SensitiveCategory.FINANCIAL]));
|
|
349
|
-
* ```
|
|
350
376
|
*/
|
|
351
377
|
declare class DLPPatternRegistry {
|
|
352
378
|
private readonly catalogue;
|
|
379
|
+
private readonly localeCategoryRegexMap;
|
|
353
380
|
constructor(loadGroups?: ReadonlySet<SensitiveCategory>);
|
|
354
381
|
get typeNames(): string[];
|
|
355
382
|
/** Yield [typeName, descriptor] pairs. */
|
|
356
383
|
iterDescriptors(): IterableIterator<[string, PatternDescriptor]>;
|
|
357
384
|
descriptorFor(typeName: string): PatternDescriptor | undefined;
|
|
358
|
-
/** Return locale-tuned name regexes, falling back to English. */
|
|
359
385
|
namePatternsFor(lang: LanguageTag | string): RegExp[];
|
|
360
|
-
/** Return locale-tuned address regexes, falling back to English. */
|
|
361
386
|
addressPatternsFor(lang: LanguageTag | string): RegExp[];
|
|
387
|
+
getCategoryRegexesMap(locale?: string): Map<string, {
|
|
388
|
+
re: RegExp;
|
|
389
|
+
typeOrder: string[];
|
|
390
|
+
}>;
|
|
391
|
+
getCategoryTypeMap(categoryName: string, locale?: string): string[];
|
|
392
|
+
private compileForLocale;
|
|
362
393
|
private buildCatalogue;
|
|
363
394
|
}
|
|
364
395
|
|
|
@@ -420,8 +451,8 @@ interface ScoreInput {
|
|
|
420
451
|
* baseRisk: 0.92,
|
|
421
452
|
* matchStart: 10,
|
|
422
453
|
* matchEnd: 21,
|
|
423
|
-
* fullText: "
|
|
424
|
-
* proximityTerms: new Set(["
|
|
454
|
+
* fullText: "Mi número de DNI es 12345678Z",
|
|
455
|
+
* proximityTerms: new Set(["dni", "número"]),
|
|
425
456
|
* validatorPassed: true,
|
|
426
457
|
* });
|
|
427
458
|
* // score === 0.99 (validator override)
|