mask-privacy 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +53 -16
- package/dist/index.d.ts +53 -16
- package/dist/index.js +536 -191
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +536 -191
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
- package/src/core/dlp/handlers.ts +111 -0
- package/src/core/dlp/registry.ts +138 -62
- package/src/core/dlp/scorer.ts +2 -2
- package/src/core/fpe.ts +137 -26
- package/src/core/fpe_utils.ts +35 -7
- package/src/core/scanner.ts +146 -151
- package/src/core/span.ts +76 -0
- package/src/core/transformers_scanner.ts +2 -2
- package/src/core/vault.ts +2 -1
- package/tests/async.test.ts +2 -2
- package/tests/dlp_hardened.test.ts +38 -0
- package/tests/fpe.test.ts +4 -4
- package/tests/hooks.test.ts +2 -2
- package/tests/langchain.test.ts +2 -2
- package/tests/llamaindex.test.ts +1 -1
- package/tests/scanner.test.ts +0 -1
- package/tests/substring.test.ts +1 -1
- package/tests/vault.test.ts +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -14,6 +14,7 @@ type EncodeOptions = {
|
|
|
14
14
|
ttl?: number;
|
|
15
15
|
searchBuckets?: ('year' | 'month' | 'day' | 'numeric')[];
|
|
16
16
|
searchBucketSize?: number;
|
|
17
|
+
entityType?: string;
|
|
17
18
|
};
|
|
18
19
|
/**
|
|
19
20
|
* Tokenise rawText, encrypt it, store in vault, return the FPE token.
|
|
@@ -49,9 +50,40 @@ declare function looksLikeToken(value: string | any): boolean;
|
|
|
49
50
|
/** Clear the cached master key. Useful in tests. */
|
|
50
51
|
declare function resetMasterKey(): void;
|
|
51
52
|
/**
|
|
52
|
-
* Return a **deterministic**, format-preserving token for rawText.
|
|
53
|
+
* Return a **deterministic**, format-preserving token for rawText using its entityType.
|
|
54
|
+
*/
|
|
55
|
+
declare function generateFPEToken(rawText: string, entityType?: string): Promise<string>;
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Span Resolution Engine — Sweep-Line Overlap Resolver (TypeScript).
|
|
59
|
+
*
|
|
60
|
+
* All detection tiers now return Span objects instead of mutating the text.
|
|
61
|
+
* resolveOverlaps() chooses the winning span in every conflicting region,
|
|
62
|
+
* and reconstruct() rebuilds the string exactly once.
|
|
63
|
+
*/
|
|
64
|
+
interface Span {
|
|
65
|
+
start: number;
|
|
66
|
+
end: number;
|
|
67
|
+
entityType: string;
|
|
68
|
+
originalValue: string;
|
|
69
|
+
confidence: number;
|
|
70
|
+
method: string;
|
|
71
|
+
language?: string;
|
|
72
|
+
maskedValue?: string;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Entity Detection Scanner — Tiered Waterfall Pipeline.
|
|
77
|
+
*
|
|
78
|
+
* Scans unstructured text to identify PII (Emails, Phones, SSNs, Credit Cards,
|
|
79
|
+
* Names) and replaces them in-place with Format-Preserving Encryption (FPE)
|
|
80
|
+
* tokens.
|
|
81
|
+
*
|
|
82
|
+
* Detection Architecture (Waterfall):
|
|
83
|
+
* Tier 0 — DLP Heuristic: Multilingual, 50+ types, checksum validators
|
|
84
|
+
* Tier 1 — Deterministic: Regex + Checksum (fast, provable, auditable)
|
|
85
|
+
* Tier 2 — Probabilistic: Local NLP via Transformers (catches names/orgs)
|
|
53
86
|
*/
|
|
54
|
-
declare function generateFPEToken(rawText: string): Promise<string>;
|
|
55
87
|
|
|
56
88
|
declare class BaseScanner {
|
|
57
89
|
protected _supportedEntities: string[];
|
|
@@ -61,19 +93,23 @@ declare class BaseScanner {
|
|
|
61
93
|
protected static _luhnChecksum(ccNumber: string): boolean;
|
|
62
94
|
/** Validate a US ABA routing number using the checksum algorithm. */
|
|
63
95
|
protected static _abaChecksum(routingNumber: string): boolean;
|
|
64
|
-
protected
|
|
65
|
-
|
|
66
|
-
protected
|
|
96
|
+
protected _tier0CollectSpans(text: string, confidenceThreshold: number): Promise<Span[]>;
|
|
97
|
+
/** Backward-compat wrapper — collects spans then single-pass encodes. */
|
|
98
|
+
protected _tier0Dlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
99
|
+
protected _tier1CollectSpans(text: string, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<Span[]>;
|
|
100
|
+
/** Backward-compat wrapper. */
|
|
101
|
+
protected _tier1Regex(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
102
|
+
protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
67
103
|
protected _resolveBoost(context?: string | null): Set<string>;
|
|
68
104
|
scanAndTokenize(text: string, options?: {
|
|
69
|
-
encodeFn?: (val: string) => Promise<string>;
|
|
105
|
+
encodeFn?: (val: string, options?: any) => Promise<string>;
|
|
70
106
|
pipeline?: string[];
|
|
71
107
|
confidenceThreshold?: number;
|
|
72
108
|
context?: string | null;
|
|
73
109
|
aggressive?: boolean;
|
|
74
110
|
}): Promise<string>;
|
|
75
111
|
scanAndReturnEntities(text: string, options?: {
|
|
76
|
-
encodeFn?: (val: string) => Promise<string>;
|
|
112
|
+
encodeFn?: (val: string, options?: any) => Promise<string>;
|
|
77
113
|
pipeline?: string[];
|
|
78
114
|
confidenceThreshold?: number;
|
|
79
115
|
context?: string | null;
|
|
@@ -107,7 +143,7 @@ declare class LocalTransformersScanner extends BaseScanner {
|
|
|
107
143
|
* Map Transformer entity types to Mask internal entity types.
|
|
108
144
|
*/
|
|
109
145
|
private _mapEntityType;
|
|
110
|
-
protected _tier2Nlp(text: string, encodeFn: (val: string) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
146
|
+
protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
111
147
|
/**
|
|
112
148
|
* Merges sub-tokens and entities of the same type while precisely tracking
|
|
113
149
|
* offsets in the original text.
|
|
@@ -338,27 +374,28 @@ interface PatternDescriptor {
|
|
|
338
374
|
baseRisk: number;
|
|
339
375
|
category: SensitiveCategory;
|
|
340
376
|
validatorTag: string | null;
|
|
377
|
+
isHighEntropy: boolean;
|
|
378
|
+
supportedLocales: string[];
|
|
341
379
|
}
|
|
342
380
|
/**
|
|
343
381
|
* Immutable catalogue of sensitive-data regex signatures.
|
|
344
|
-
*
|
|
345
|
-
* @example
|
|
346
|
-
* ```ts
|
|
347
|
-
* const reg = new DLPPatternRegistry(); // load everything
|
|
348
|
-
* const reg = new DLPPatternRegistry(new Set([SensitiveCategory.FINANCIAL]));
|
|
349
|
-
* ```
|
|
350
382
|
*/
|
|
351
383
|
declare class DLPPatternRegistry {
|
|
352
384
|
private readonly catalogue;
|
|
385
|
+
private readonly localeCategoryRegexMap;
|
|
353
386
|
constructor(loadGroups?: ReadonlySet<SensitiveCategory>);
|
|
354
387
|
get typeNames(): string[];
|
|
355
388
|
/** Yield [typeName, descriptor] pairs. */
|
|
356
389
|
iterDescriptors(): IterableIterator<[string, PatternDescriptor]>;
|
|
357
390
|
descriptorFor(typeName: string): PatternDescriptor | undefined;
|
|
358
|
-
/** Return locale-tuned name regexes, falling back to English. */
|
|
359
391
|
namePatternsFor(lang: LanguageTag | string): RegExp[];
|
|
360
|
-
/** Return locale-tuned address regexes, falling back to English. */
|
|
361
392
|
addressPatternsFor(lang: LanguageTag | string): RegExp[];
|
|
393
|
+
getCategoryRegexesMap(locale?: string): Map<string, {
|
|
394
|
+
re: RegExp;
|
|
395
|
+
typeOrder: string[];
|
|
396
|
+
}>;
|
|
397
|
+
getCategoryTypeMap(categoryName: string, locale?: string): string[];
|
|
398
|
+
private compileForLocale;
|
|
362
399
|
private buildCatalogue;
|
|
363
400
|
}
|
|
364
401
|
|
package/dist/index.d.ts
CHANGED
|
@@ -14,6 +14,7 @@ type EncodeOptions = {
|
|
|
14
14
|
ttl?: number;
|
|
15
15
|
searchBuckets?: ('year' | 'month' | 'day' | 'numeric')[];
|
|
16
16
|
searchBucketSize?: number;
|
|
17
|
+
entityType?: string;
|
|
17
18
|
};
|
|
18
19
|
/**
|
|
19
20
|
* Tokenise rawText, encrypt it, store in vault, return the FPE token.
|
|
@@ -49,9 +50,40 @@ declare function looksLikeToken(value: string | any): boolean;
|
|
|
49
50
|
/** Clear the cached master key. Useful in tests. */
|
|
50
51
|
declare function resetMasterKey(): void;
|
|
51
52
|
/**
|
|
52
|
-
* Return a **deterministic**, format-preserving token for rawText.
|
|
53
|
+
* Return a **deterministic**, format-preserving token for rawText using its entityType.
|
|
54
|
+
*/
|
|
55
|
+
declare function generateFPEToken(rawText: string, entityType?: string): Promise<string>;
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Span Resolution Engine — Sweep-Line Overlap Resolver (TypeScript).
|
|
59
|
+
*
|
|
60
|
+
* All detection tiers now return Span objects instead of mutating the text.
|
|
61
|
+
* resolveOverlaps() chooses the winning span in every conflicting region,
|
|
62
|
+
* and reconstruct() rebuilds the string exactly once.
|
|
63
|
+
*/
|
|
64
|
+
interface Span {
|
|
65
|
+
start: number;
|
|
66
|
+
end: number;
|
|
67
|
+
entityType: string;
|
|
68
|
+
originalValue: string;
|
|
69
|
+
confidence: number;
|
|
70
|
+
method: string;
|
|
71
|
+
language?: string;
|
|
72
|
+
maskedValue?: string;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Entity Detection Scanner — Tiered Waterfall Pipeline.
|
|
77
|
+
*
|
|
78
|
+
* Scans unstructured text to identify PII (Emails, Phones, SSNs, Credit Cards,
|
|
79
|
+
* Names) and replaces them in-place with Format-Preserving Encryption (FPE)
|
|
80
|
+
* tokens.
|
|
81
|
+
*
|
|
82
|
+
* Detection Architecture (Waterfall):
|
|
83
|
+
* Tier 0 — DLP Heuristic: Multilingual, 50+ types, checksum validators
|
|
84
|
+
* Tier 1 — Deterministic: Regex + Checksum (fast, provable, auditable)
|
|
85
|
+
* Tier 2 — Probabilistic: Local NLP via Transformers (catches names/orgs)
|
|
53
86
|
*/
|
|
54
|
-
declare function generateFPEToken(rawText: string): Promise<string>;
|
|
55
87
|
|
|
56
88
|
declare class BaseScanner {
|
|
57
89
|
protected _supportedEntities: string[];
|
|
@@ -61,19 +93,23 @@ declare class BaseScanner {
|
|
|
61
93
|
protected static _luhnChecksum(ccNumber: string): boolean;
|
|
62
94
|
/** Validate a US ABA routing number using the checksum algorithm. */
|
|
63
95
|
protected static _abaChecksum(routingNumber: string): boolean;
|
|
64
|
-
protected
|
|
65
|
-
|
|
66
|
-
protected
|
|
96
|
+
protected _tier0CollectSpans(text: string, confidenceThreshold: number): Promise<Span[]>;
|
|
97
|
+
/** Backward-compat wrapper — collects spans then single-pass encodes. */
|
|
98
|
+
protected _tier0Dlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
99
|
+
protected _tier1CollectSpans(text: string, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<Span[]>;
|
|
100
|
+
/** Backward-compat wrapper. */
|
|
101
|
+
protected _tier1Regex(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
102
|
+
protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
67
103
|
protected _resolveBoost(context?: string | null): Set<string>;
|
|
68
104
|
scanAndTokenize(text: string, options?: {
|
|
69
|
-
encodeFn?: (val: string) => Promise<string>;
|
|
105
|
+
encodeFn?: (val: string, options?: any) => Promise<string>;
|
|
70
106
|
pipeline?: string[];
|
|
71
107
|
confidenceThreshold?: number;
|
|
72
108
|
context?: string | null;
|
|
73
109
|
aggressive?: boolean;
|
|
74
110
|
}): Promise<string>;
|
|
75
111
|
scanAndReturnEntities(text: string, options?: {
|
|
76
|
-
encodeFn?: (val: string) => Promise<string>;
|
|
112
|
+
encodeFn?: (val: string, options?: any) => Promise<string>;
|
|
77
113
|
pipeline?: string[];
|
|
78
114
|
confidenceThreshold?: number;
|
|
79
115
|
context?: string | null;
|
|
@@ -107,7 +143,7 @@ declare class LocalTransformersScanner extends BaseScanner {
|
|
|
107
143
|
* Map Transformer entity types to Mask internal entity types.
|
|
108
144
|
*/
|
|
109
145
|
private _mapEntityType;
|
|
110
|
-
protected _tier2Nlp(text: string, encodeFn: (val: string) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
146
|
+
protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
|
|
111
147
|
/**
|
|
112
148
|
* Merges sub-tokens and entities of the same type while precisely tracking
|
|
113
149
|
* offsets in the original text.
|
|
@@ -338,27 +374,28 @@ interface PatternDescriptor {
|
|
|
338
374
|
baseRisk: number;
|
|
339
375
|
category: SensitiveCategory;
|
|
340
376
|
validatorTag: string | null;
|
|
377
|
+
isHighEntropy: boolean;
|
|
378
|
+
supportedLocales: string[];
|
|
341
379
|
}
|
|
342
380
|
/**
|
|
343
381
|
* Immutable catalogue of sensitive-data regex signatures.
|
|
344
|
-
*
|
|
345
|
-
* @example
|
|
346
|
-
* ```ts
|
|
347
|
-
* const reg = new DLPPatternRegistry(); // load everything
|
|
348
|
-
* const reg = new DLPPatternRegistry(new Set([SensitiveCategory.FINANCIAL]));
|
|
349
|
-
* ```
|
|
350
382
|
*/
|
|
351
383
|
declare class DLPPatternRegistry {
|
|
352
384
|
private readonly catalogue;
|
|
385
|
+
private readonly localeCategoryRegexMap;
|
|
353
386
|
constructor(loadGroups?: ReadonlySet<SensitiveCategory>);
|
|
354
387
|
get typeNames(): string[];
|
|
355
388
|
/** Yield [typeName, descriptor] pairs. */
|
|
356
389
|
iterDescriptors(): IterableIterator<[string, PatternDescriptor]>;
|
|
357
390
|
descriptorFor(typeName: string): PatternDescriptor | undefined;
|
|
358
|
-
/** Return locale-tuned name regexes, falling back to English. */
|
|
359
391
|
namePatternsFor(lang: LanguageTag | string): RegExp[];
|
|
360
|
-
/** Return locale-tuned address regexes, falling back to English. */
|
|
361
392
|
addressPatternsFor(lang: LanguageTag | string): RegExp[];
|
|
393
|
+
getCategoryRegexesMap(locale?: string): Map<string, {
|
|
394
|
+
re: RegExp;
|
|
395
|
+
typeOrder: string[];
|
|
396
|
+
}>;
|
|
397
|
+
getCategoryTypeMap(categoryName: string, locale?: string): string[];
|
|
398
|
+
private compileForLocale;
|
|
362
399
|
private buildCatalogue;
|
|
363
400
|
}
|
|
364
401
|
|