mask-privacy 3.0.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -17
- package/dist/index.d.mts +58 -27
- package/dist/index.d.ts +58 -27
- package/dist/index.js +394 -310
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +394 -310
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
- package/src/core/dlp/assessor.ts +3 -26
- package/src/core/dlp/handlers.ts +44 -31
- package/src/core/dlp/index.ts +0 -2
- package/src/core/dlp/registry.ts +119 -107
- package/src/core/dlp/scorer.ts +4 -4
- package/src/core/fpe.ts +85 -32
- package/src/core/fpe_utils.ts +20 -20
- package/src/core/scanner.ts +146 -151
- package/src/core/span.ts +76 -0
- package/src/core/transformers_scanner.ts +2 -2
- package/src/core/vault.ts +2 -1
- package/tests/async.test.ts +2 -2
- package/tests/dlp_hardened.test.ts +21 -0
- package/tests/fpe.test.ts +4 -4
- package/tests/hooks.test.ts +2 -2
- package/tests/langchain.test.ts +2 -2
- package/tests/llamaindex.test.ts +1 -1
- package/tests/scanner.test.ts +0 -1
- package/tests/substring.test.ts +1 -1
- package/tests/vault.test.ts +1 -1
package/src/core/span.ts
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Span Resolution Engine — Sweep-Line Overlap Resolver (TypeScript).
|
|
3
|
+
*
|
|
4
|
+
* All detection tiers now return Span objects instead of mutating the text.
|
|
5
|
+
* resolveOverlaps() chooses the winning span in every conflicting region,
|
|
6
|
+
* and reconstruct() rebuilds the string exactly once.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
export interface Span {
|
|
10
|
+
start: number;
|
|
11
|
+
end: number;
|
|
12
|
+
entityType: string;
|
|
13
|
+
originalValue: string;
|
|
14
|
+
confidence: number;
|
|
15
|
+
method: string; // "dlp_heuristic" | "regex" | "nlp"
|
|
16
|
+
language?: string;
|
|
17
|
+
maskedValue?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Return a non-overlapping, right-to-left-ordered subset of spans.
|
|
22
|
+
*
|
|
23
|
+
* Algorithm:
|
|
24
|
+
* 1. Sort by start ASC, length DESC (prefer longer), confidence DESC.
|
|
25
|
+
* 2. Walk left-to-right tracking occupiedEnd.
|
|
26
|
+
* 3. Fully-contained spans are discarded.
|
|
27
|
+
* 4. Partial overlaps resolve by confidence (higher wins).
|
|
28
|
+
*/
|
|
29
|
+
export function resolveOverlaps(spans: Span[]): Span[] {
|
|
30
|
+
if (spans.length === 0) return [];
|
|
31
|
+
|
|
32
|
+
const sorted = [...spans].sort((a, b) => {
|
|
33
|
+
if (a.start !== b.start) return a.start - b.start;
|
|
34
|
+
const lenDiff = (b.end - b.start) - (a.end - a.start);
|
|
35
|
+
if (lenDiff !== 0) return lenDiff;
|
|
36
|
+
return b.confidence - a.confidence;
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
const resolved: Span[] = [];
|
|
40
|
+
let occupiedEnd = -1;
|
|
41
|
+
|
|
42
|
+
for (const span of sorted) {
|
|
43
|
+
if (span.start >= occupiedEnd) {
|
|
44
|
+
resolved.push(span);
|
|
45
|
+
occupiedEnd = span.end;
|
|
46
|
+
} else if (span.end <= occupiedEnd) {
|
|
47
|
+
// Fully inside an already-accepted span — discard.
|
|
48
|
+
continue;
|
|
49
|
+
} else {
|
|
50
|
+
// Partial overlap — keep highest confidence.
|
|
51
|
+
const last = resolved[resolved.length - 1];
|
|
52
|
+
if (span.confidence > last.confidence) {
|
|
53
|
+
resolved.pop();
|
|
54
|
+
resolved.push(span);
|
|
55
|
+
occupiedEnd = span.end;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Return descending start order for right-to-left reconstruction.
|
|
61
|
+
return resolved.sort((a, b) => b.start - a.start);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Rebuild text from a right-to-left-ordered list of resolved spans.
|
|
66
|
+
* Each span must have maskedValue populated.
|
|
67
|
+
* This is the single string-construction pass that replaces all slice loops.
|
|
68
|
+
*/
|
|
69
|
+
export function reconstruct(text: string, resolvedSpans: Span[]): string {
|
|
70
|
+
let result = text;
|
|
71
|
+
for (const span of resolvedSpans) {
|
|
72
|
+
if (span.maskedValue == null) continue;
|
|
73
|
+
result = result.slice(0, span.start) + span.maskedValue + result.slice(span.end);
|
|
74
|
+
}
|
|
75
|
+
return result;
|
|
76
|
+
}
|
|
@@ -96,7 +96,7 @@ export class LocalTransformersScanner extends BaseScanner {
|
|
|
96
96
|
|
|
97
97
|
protected async _tier2Nlp(
|
|
98
98
|
text: string,
|
|
99
|
-
encodeFn: (val: string) => Promise<string>,
|
|
99
|
+
encodeFn: (val: string, options?: any) => Promise<string>,
|
|
100
100
|
boostEntities: Set<string>,
|
|
101
101
|
aggressive: boolean,
|
|
102
102
|
confidenceThreshold: number,
|
|
@@ -136,7 +136,7 @@ export class LocalTransformersScanner extends BaseScanner {
|
|
|
136
136
|
}
|
|
137
137
|
|
|
138
138
|
if (confidence >= confidenceThreshold && !looksLikeToken(val) && val.length > 1) {
|
|
139
|
-
const token = await encodeFn(val);
|
|
139
|
+
const token = await encodeFn(val, { entityType: entityType });
|
|
140
140
|
entities.push({
|
|
141
141
|
type: entityType,
|
|
142
142
|
value: val,
|
package/src/core/vault.ts
CHANGED
|
@@ -538,6 +538,7 @@ export type EncodeOptions = {
|
|
|
538
538
|
ttl?: number;
|
|
539
539
|
searchBuckets?: ('year' | 'month' | 'day' | 'numeric')[];
|
|
540
540
|
searchBucketSize?: number;
|
|
541
|
+
entityType?: string;
|
|
541
542
|
};
|
|
542
543
|
|
|
543
544
|
/**
|
|
@@ -565,7 +566,7 @@ export async function encode(rawText: string, options: EncodeOptions = {}): Prom
|
|
|
565
566
|
}
|
|
566
567
|
|
|
567
568
|
// 2. Generate new token
|
|
568
|
-
const token = await generateFPEToken(text);
|
|
569
|
+
const token = await generateFPEToken(text, options.entityType || 'UNKNOWN');
|
|
569
570
|
|
|
570
571
|
// 3. Encrypt the plaintext before it touches the vault
|
|
571
572
|
const ciphertext = cryptoEngine.encrypt(text);
|
package/tests/async.test.ts
CHANGED
|
@@ -21,7 +21,7 @@ describe('TestAsyncWrappers', () => {
|
|
|
21
21
|
test('test_module_level_async_wrappers', async () => {
|
|
22
22
|
const token = await aencode("test@async.com");
|
|
23
23
|
expect(looksLikeToken(token)).toBe(true);
|
|
24
|
-
expect(token).toMatch(/@
|
|
24
|
+
expect(token).toMatch(/@async\.com$/);
|
|
25
25
|
|
|
26
26
|
const plaintext = await adecode(token);
|
|
27
27
|
expect(plaintext).toBe("test@async.com");
|
|
@@ -43,6 +43,6 @@ describe('TestAsyncWrappers', () => {
|
|
|
43
43
|
const safeText = await client.ascanAndTokenize(text);
|
|
44
44
|
expect(safeText).not.toContain("bob@example.com");
|
|
45
45
|
expect(safeText).toContain("tkn-");
|
|
46
|
-
expect(safeText).toMatch(/@
|
|
46
|
+
expect(safeText).toMatch(/@example\.com$/);
|
|
47
47
|
});
|
|
48
48
|
});
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/* global describe, test, expect */
|
|
2
|
+
import { getScanner } from '../src/index';
|
|
3
|
+
|
|
4
|
+
async function ascanAndTokenize(text: string, options: any = {}) {
|
|
5
|
+
return getScanner().scanAndTokenize(text, options);
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
describe('Multilingual ID Hardening (TS)', () => {
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
test('Locale-Aware Precision (ES_DNI)', async () => {
|
|
13
|
+
// Spanish DNI in English context
|
|
14
|
+
const raw = "My DNI is 12345678Z";
|
|
15
|
+
const masked = await ascanAndTokenize(raw, { pipeline: ['dlp'] });
|
|
16
|
+
|
|
17
|
+
// Should mask using the ES_DNI generator (starts with 000)
|
|
18
|
+
expect(masked).toContain("000");
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
});
|
package/tests/fpe.test.ts
CHANGED
|
@@ -17,7 +17,7 @@ describe('TestFPETokenGeneration', () => {
|
|
|
17
17
|
|
|
18
18
|
test('test_email_format', async () => {
|
|
19
19
|
const token = await generateFPEToken("user@company.io");
|
|
20
|
-
expect(token.endsWith("@
|
|
20
|
+
expect(token.endsWith("@company.io")).toBe(true);
|
|
21
21
|
expect(token.startsWith("tkn-")).toBe(true);
|
|
22
22
|
expect(token).toMatch(/^[^@]+@[^@]+\.[^@]+$/);
|
|
23
23
|
});
|
|
@@ -60,7 +60,7 @@ describe('TestFPETokenGeneration', () => {
|
|
|
60
60
|
const t1 = await generateFPEToken("a@b.com");
|
|
61
61
|
const t2 = await generateFPEToken("a@b.com");
|
|
62
62
|
expect(t1).toBe(t2);
|
|
63
|
-
expect(t1.endsWith("@
|
|
63
|
+
expect(t1.endsWith("@b.com")).toBe(true);
|
|
64
64
|
});
|
|
65
65
|
|
|
66
66
|
test('test_different_inputs_different_tokens', () => {
|
|
@@ -85,7 +85,7 @@ describe('TestFPETokenGeneration', () => {
|
|
|
85
85
|
});
|
|
86
86
|
|
|
87
87
|
test('test_whitespace_stripped_determinism', async () => {
|
|
88
|
-
expect(await generateFPEToken(" someone@
|
|
88
|
+
expect(await generateFPEToken(" someone@example.com ")).toBe(await generateFPEToken("someone@example.com"));
|
|
89
89
|
});
|
|
90
90
|
|
|
91
91
|
test('test_fail_fast_when_key_missing', async () => {
|
|
@@ -100,7 +100,7 @@ describe('TestFPETokenGeneration', () => {
|
|
|
100
100
|
|
|
101
101
|
describe('TestLooksLikeToken', () => {
|
|
102
102
|
test('test_email_token', () => {
|
|
103
|
-
expect(looksLikeToken("tkn-
|
|
103
|
+
expect(looksLikeToken("tkn-abcd1234abcd@example.com")).toBe(true);
|
|
104
104
|
});
|
|
105
105
|
|
|
106
106
|
test('test_phone_token', () => {
|
package/tests/hooks.test.ts
CHANGED
|
@@ -60,7 +60,7 @@ describe('TestHooks', () => {
|
|
|
60
60
|
test('test_encodes_raw_email', async () => {
|
|
61
61
|
const result = await deepEncodePII({"email": "test@example.com"});
|
|
62
62
|
expect(looksLikeToken(result.email)).toBe(true);
|
|
63
|
-
expect(result.email).toMatch(/@
|
|
63
|
+
expect(result.email).toMatch(/@example\.com$/);
|
|
64
64
|
});
|
|
65
65
|
|
|
66
66
|
test('test_does_not_double_encode_token', async () => {
|
|
@@ -91,7 +91,7 @@ describe('TestHooks', () => {
|
|
|
91
91
|
const args = "Contact us at support@example.com for help.";
|
|
92
92
|
const result = await deepEncodePII(args);
|
|
93
93
|
expect(typeof result).toBe('string');
|
|
94
|
-
expect(result).toContain("@
|
|
94
|
+
expect(result).toContain("@example.com");
|
|
95
95
|
expect(result).not.toContain("support@example.com");
|
|
96
96
|
});
|
|
97
97
|
|
package/tests/langchain.test.ts
CHANGED
|
@@ -41,7 +41,7 @@ describe('TestLangchainHooks', () => {
|
|
|
41
41
|
const result = await secure.run(token, "Welcome");
|
|
42
42
|
|
|
43
43
|
expect(result.target).not.toBe("user@example.com");
|
|
44
|
-
expect(result.target).toMatch(/@
|
|
44
|
+
expect(result.target).toMatch(/@example\.com$/);
|
|
45
45
|
});
|
|
46
46
|
});
|
|
47
47
|
|
|
@@ -89,7 +89,7 @@ describe('TestLangchainHooks', () => {
|
|
|
89
89
|
|
|
90
90
|
const result = await sendEmail(token, "Hello");
|
|
91
91
|
expect(result).not.toContain("dev@mask.ai");
|
|
92
|
-
expect(result).toContain("@
|
|
92
|
+
expect(result).toContain("@mask.ai");
|
|
93
93
|
});
|
|
94
94
|
|
|
95
95
|
test('test_secure_tool_preserves_non_pii', async () => {
|
package/tests/llamaindex.test.ts
CHANGED
|
@@ -42,7 +42,7 @@ describe('TestLlamaindexHooks', () => {
|
|
|
42
42
|
const result = await secureTool.run(token, "Give me the records");
|
|
43
43
|
|
|
44
44
|
expect(result.target).not.toBe("admin@hospital.com");
|
|
45
|
-
expect(result.target).toMatch(/@
|
|
45
|
+
expect(result.target).toMatch(/@hospital\.com$/);
|
|
46
46
|
});
|
|
47
47
|
});
|
|
48
48
|
|
package/tests/scanner.test.ts
CHANGED
package/tests/substring.test.ts
CHANGED
|
@@ -50,7 +50,7 @@ describe('TestSubstringDetokenization', () => {
|
|
|
50
50
|
});
|
|
51
51
|
|
|
52
52
|
test('test_detokenize_text_lenient', async () => {
|
|
53
|
-
const bogus = "tkn-12345678@
|
|
53
|
+
const bogus = "tkn-12345678@example.com";
|
|
54
54
|
const paragraph = `Hello ${bogus}`;
|
|
55
55
|
|
|
56
56
|
const restored = await detokenizeText(paragraph);
|
package/tests/vault.test.ts
CHANGED
|
@@ -46,7 +46,7 @@ describe('TestEncodeDecodePublicAPI', () => {
|
|
|
46
46
|
|
|
47
47
|
test('test_roundtrip_email', async () => {
|
|
48
48
|
const token = await encode("user@example.com");
|
|
49
|
-
expect(token.endsWith("@
|
|
49
|
+
expect(token.endsWith("@example.com")).toBe(true);
|
|
50
50
|
expect(await decode(token)).toBe("user@example.com");
|
|
51
51
|
});
|
|
52
52
|
|