mask-privacy 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -14,6 +14,7 @@ type EncodeOptions = {
14
14
  ttl?: number;
15
15
  searchBuckets?: ('year' | 'month' | 'day' | 'numeric')[];
16
16
  searchBucketSize?: number;
17
+ entityType?: string;
17
18
  };
18
19
  /**
19
20
  * Tokenise rawText, encrypt it, store in vault, return the FPE token.
@@ -49,9 +50,40 @@ declare function looksLikeToken(value: string | any): boolean;
49
50
  /** Clear the cached master key. Useful in tests. */
50
51
  declare function resetMasterKey(): void;
51
52
  /**
52
- * Return a **deterministic**, format-preserving token for rawText.
53
+ * Return a **deterministic**, format-preserving token for rawText using its entityType.
54
+ */
55
+ declare function generateFPEToken(rawText: string, entityType?: string): Promise<string>;
56
+
57
+ /**
58
+ * Span Resolution Engine — Sweep-Line Overlap Resolver (TypeScript).
59
+ *
60
+ * All detection tiers now return Span objects instead of mutating the text.
61
+ * resolveOverlaps() chooses the winning span in every conflicting region,
62
+ * and reconstruct() rebuilds the string exactly once.
63
+ */
64
+ interface Span {
65
+ start: number;
66
+ end: number;
67
+ entityType: string;
68
+ originalValue: string;
69
+ confidence: number;
70
+ method: string;
71
+ language?: string;
72
+ maskedValue?: string;
73
+ }
74
+
75
+ /**
76
+ * Entity Detection Scanner — Tiered Waterfall Pipeline.
77
+ *
78
+ * Scans unstructured text to identify PII (Emails, Phones, SSNs, Credit Cards,
79
+ * Names) and replaces them in-place with Format-Preserving Encryption (FPE)
80
+ * tokens.
81
+ *
82
+ * Detection Architecture (Waterfall):
83
+ * Tier 0 — DLP Heuristic: Multilingual, 50+ types, checksum validators
84
+ * Tier 1 — Deterministic: Regex + Checksum (fast, provable, auditable)
85
+ * Tier 2 — Probabilistic: Local NLP via Transformers (catches names/orgs)
53
86
  */
54
- declare function generateFPEToken(rawText: string): Promise<string>;
55
87
 
56
88
  declare class BaseScanner {
57
89
  protected _supportedEntities: string[];
@@ -61,19 +93,23 @@ declare class BaseScanner {
61
93
  protected static _luhnChecksum(ccNumber: string): boolean;
62
94
  /** Validate a US ABA routing number using the checksum algorithm. */
63
95
  protected static _abaChecksum(routingNumber: string): boolean;
64
- protected _tier0Dlp(text: string, encodeFn: (val: string) => Promise<string>, confidenceThreshold: number): Promise<[string, any[]]>;
65
- protected _tier1Regex(text: string, encodeFn: (val: string) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
66
- protected _tier2Nlp(text: string, encodeFn: (val: string) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
96
+ protected _tier0CollectSpans(text: string, confidenceThreshold: number): Promise<Span[]>;
97
+ /** Backward-compat wrapper collects spans then single-pass encodes. */
98
+ protected _tier0Dlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, confidenceThreshold: number): Promise<[string, any[]]>;
99
+ protected _tier1CollectSpans(text: string, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<Span[]>;
100
+ /** Backward-compat wrapper. */
101
+ protected _tier1Regex(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
102
+ protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
67
103
  protected _resolveBoost(context?: string | null): Set<string>;
68
104
  scanAndTokenize(text: string, options?: {
69
- encodeFn?: (val: string) => Promise<string>;
105
+ encodeFn?: (val: string, options?: any) => Promise<string>;
70
106
  pipeline?: string[];
71
107
  confidenceThreshold?: number;
72
108
  context?: string | null;
73
109
  aggressive?: boolean;
74
110
  }): Promise<string>;
75
111
  scanAndReturnEntities(text: string, options?: {
76
- encodeFn?: (val: string) => Promise<string>;
112
+ encodeFn?: (val: string, options?: any) => Promise<string>;
77
113
  pipeline?: string[];
78
114
  confidenceThreshold?: number;
79
115
  context?: string | null;
@@ -107,7 +143,7 @@ declare class LocalTransformersScanner extends BaseScanner {
107
143
  * Map Transformer entity types to Mask internal entity types.
108
144
  */
109
145
  private _mapEntityType;
110
- protected _tier2Nlp(text: string, encodeFn: (val: string) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
146
+ protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
111
147
  /**
112
148
  * Merges sub-tokens and entities of the same type while precisely tracking
113
149
  * offsets in the original text.
@@ -338,27 +374,28 @@ interface PatternDescriptor {
338
374
  baseRisk: number;
339
375
  category: SensitiveCategory;
340
376
  validatorTag: string | null;
377
+ isHighEntropy: boolean;
378
+ supportedLocales: string[];
341
379
  }
342
380
  /**
343
381
  * Immutable catalogue of sensitive-data regex signatures.
344
- *
345
- * @example
346
- * ```ts
347
- * const reg = new DLPPatternRegistry(); // load everything
348
- * const reg = new DLPPatternRegistry(new Set([SensitiveCategory.FINANCIAL]));
349
- * ```
350
382
  */
351
383
  declare class DLPPatternRegistry {
352
384
  private readonly catalogue;
385
+ private readonly localeCategoryRegexMap;
353
386
  constructor(loadGroups?: ReadonlySet<SensitiveCategory>);
354
387
  get typeNames(): string[];
355
388
  /** Yield [typeName, descriptor] pairs. */
356
389
  iterDescriptors(): IterableIterator<[string, PatternDescriptor]>;
357
390
  descriptorFor(typeName: string): PatternDescriptor | undefined;
358
- /** Return locale-tuned name regexes, falling back to English. */
359
391
  namePatternsFor(lang: LanguageTag | string): RegExp[];
360
- /** Return locale-tuned address regexes, falling back to English. */
361
392
  addressPatternsFor(lang: LanguageTag | string): RegExp[];
393
+ getCategoryRegexesMap(locale?: string): Map<string, {
394
+ re: RegExp;
395
+ typeOrder: string[];
396
+ }>;
397
+ getCategoryTypeMap(categoryName: string, locale?: string): string[];
398
+ private compileForLocale;
362
399
  private buildCatalogue;
363
400
  }
364
401
 
package/dist/index.d.ts CHANGED
@@ -14,6 +14,7 @@ type EncodeOptions = {
14
14
  ttl?: number;
15
15
  searchBuckets?: ('year' | 'month' | 'day' | 'numeric')[];
16
16
  searchBucketSize?: number;
17
+ entityType?: string;
17
18
  };
18
19
  /**
19
20
  * Tokenise rawText, encrypt it, store in vault, return the FPE token.
@@ -49,9 +50,40 @@ declare function looksLikeToken(value: string | any): boolean;
49
50
  /** Clear the cached master key. Useful in tests. */
50
51
  declare function resetMasterKey(): void;
51
52
  /**
52
- * Return a **deterministic**, format-preserving token for rawText.
53
+ * Return a **deterministic**, format-preserving token for rawText using its entityType.
54
+ */
55
+ declare function generateFPEToken(rawText: string, entityType?: string): Promise<string>;
56
+
57
+ /**
58
+ * Span Resolution Engine — Sweep-Line Overlap Resolver (TypeScript).
59
+ *
60
+ * All detection tiers now return Span objects instead of mutating the text.
61
+ * resolveOverlaps() chooses the winning span in every conflicting region,
62
+ * and reconstruct() rebuilds the string exactly once.
63
+ */
64
+ interface Span {
65
+ start: number;
66
+ end: number;
67
+ entityType: string;
68
+ originalValue: string;
69
+ confidence: number;
70
+ method: string;
71
+ language?: string;
72
+ maskedValue?: string;
73
+ }
74
+
75
+ /**
76
+ * Entity Detection Scanner — Tiered Waterfall Pipeline.
77
+ *
78
+ * Scans unstructured text to identify PII (Emails, Phones, SSNs, Credit Cards,
79
+ * Names) and replaces them in-place with Format-Preserving Encryption (FPE)
80
+ * tokens.
81
+ *
82
+ * Detection Architecture (Waterfall):
83
+ * Tier 0 — DLP Heuristic: Multilingual, 50+ types, checksum validators
84
+ * Tier 1 — Deterministic: Regex + Checksum (fast, provable, auditable)
85
+ * Tier 2 — Probabilistic: Local NLP via Transformers (catches names/orgs)
53
86
  */
54
- declare function generateFPEToken(rawText: string): Promise<string>;
55
87
 
56
88
  declare class BaseScanner {
57
89
  protected _supportedEntities: string[];
@@ -61,19 +93,23 @@ declare class BaseScanner {
61
93
  protected static _luhnChecksum(ccNumber: string): boolean;
62
94
  /** Validate a US ABA routing number using the checksum algorithm. */
63
95
  protected static _abaChecksum(routingNumber: string): boolean;
64
- protected _tier0Dlp(text: string, encodeFn: (val: string) => Promise<string>, confidenceThreshold: number): Promise<[string, any[]]>;
65
- protected _tier1Regex(text: string, encodeFn: (val: string) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
66
- protected _tier2Nlp(text: string, encodeFn: (val: string) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
96
+ protected _tier0CollectSpans(text: string, confidenceThreshold: number): Promise<Span[]>;
97
+ /** Backward-compat wrapper collects spans then single-pass encodes. */
98
+ protected _tier0Dlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, confidenceThreshold: number): Promise<[string, any[]]>;
99
+ protected _tier1CollectSpans(text: string, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<Span[]>;
100
+ /** Backward-compat wrapper. */
101
+ protected _tier1Regex(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
102
+ protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
67
103
  protected _resolveBoost(context?: string | null): Set<string>;
68
104
  scanAndTokenize(text: string, options?: {
69
- encodeFn?: (val: string) => Promise<string>;
105
+ encodeFn?: (val: string, options?: any) => Promise<string>;
70
106
  pipeline?: string[];
71
107
  confidenceThreshold?: number;
72
108
  context?: string | null;
73
109
  aggressive?: boolean;
74
110
  }): Promise<string>;
75
111
  scanAndReturnEntities(text: string, options?: {
76
- encodeFn?: (val: string) => Promise<string>;
112
+ encodeFn?: (val: string, options?: any) => Promise<string>;
77
113
  pipeline?: string[];
78
114
  confidenceThreshold?: number;
79
115
  context?: string | null;
@@ -107,7 +143,7 @@ declare class LocalTransformersScanner extends BaseScanner {
107
143
  * Map Transformer entity types to Mask internal entity types.
108
144
  */
109
145
  private _mapEntityType;
110
- protected _tier2Nlp(text: string, encodeFn: (val: string) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
146
+ protected _tier2Nlp(text: string, encodeFn: (val: string, options?: any) => Promise<string>, boostEntities: Set<string>, aggressive: boolean, confidenceThreshold: number): Promise<[string, any[]]>;
111
147
  /**
112
148
  * Merges sub-tokens and entities of the same type while precisely tracking
113
149
  * offsets in the original text.
@@ -338,27 +374,28 @@ interface PatternDescriptor {
338
374
  baseRisk: number;
339
375
  category: SensitiveCategory;
340
376
  validatorTag: string | null;
377
+ isHighEntropy: boolean;
378
+ supportedLocales: string[];
341
379
  }
342
380
  /**
343
381
  * Immutable catalogue of sensitive-data regex signatures.
344
- *
345
- * @example
346
- * ```ts
347
- * const reg = new DLPPatternRegistry(); // load everything
348
- * const reg = new DLPPatternRegistry(new Set([SensitiveCategory.FINANCIAL]));
349
- * ```
350
382
  */
351
383
  declare class DLPPatternRegistry {
352
384
  private readonly catalogue;
385
+ private readonly localeCategoryRegexMap;
353
386
  constructor(loadGroups?: ReadonlySet<SensitiveCategory>);
354
387
  get typeNames(): string[];
355
388
  /** Yield [typeName, descriptor] pairs. */
356
389
  iterDescriptors(): IterableIterator<[string, PatternDescriptor]>;
357
390
  descriptorFor(typeName: string): PatternDescriptor | undefined;
358
- /** Return locale-tuned name regexes, falling back to English. */
359
391
  namePatternsFor(lang: LanguageTag | string): RegExp[];
360
- /** Return locale-tuned address regexes, falling back to English. */
361
392
  addressPatternsFor(lang: LanguageTag | string): RegExp[];
393
+ getCategoryRegexesMap(locale?: string): Map<string, {
394
+ re: RegExp;
395
+ typeOrder: string[];
396
+ }>;
397
+ getCategoryTypeMap(categoryName: string, locale?: string): string[];
398
+ private compileForLocale;
362
399
  private buildCatalogue;
363
400
  }
364
401