mask-privacy 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.test_audit.db +0 -0
- package/README.md +250 -0
- package/dist/index.d.mts +257 -0
- package/dist/index.d.ts +257 -0
- package/dist/index.js +58820 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +58777 -0
- package/dist/index.mjs.map +1 -0
- package/examples/secure_vault/email_tool.ts +13 -0
- package/examples/test_agent.ts +50 -0
- package/jest.config.js +10 -0
- package/package.json +37 -0
- package/src/client.ts +135 -0
- package/src/core/crypto.ts +100 -0
- package/src/core/exceptions.ts +23 -0
- package/src/core/fpe.ts +185 -0
- package/src/core/key_provider.ts +158 -0
- package/src/core/scanner.ts +308 -0
- package/src/core/utils.ts +76 -0
- package/src/core/vault.ts +540 -0
- package/src/index.ts +85 -0
- package/src/integrations/adk_hooks.ts +56 -0
- package/src/integrations/langchain_hooks.ts +87 -0
- package/src/integrations/llamaindex_hooks.ts +80 -0
- package/src/telemetry/audit_logger.ts +168 -0
- package/tests/async.test.ts +47 -0
- package/tests/audit_logger.test.ts +55 -0
- package/tests/exceptions.test.ts +75 -0
- package/tests/fail_strategy.test.ts +84 -0
- package/tests/fpe.test.ts +126 -0
- package/tests/hooks.test.ts +107 -0
- package/tests/key_provider.test.ts +68 -0
- package/tests/langchain.test.ts +101 -0
- package/tests/llamaindex.test.ts +75 -0
- package/tests/scanner.test.ts +107 -0
- package/tests/smoke.test.ts +6 -0
- package/tests/substring.test.ts +59 -0
- package/tests/vault.test.ts +101 -0
- package/tests/vault_backends.test.ts +124 -0
- package/tsconfig.json +22 -0
- package/tsup.config.ts +11 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export function sendSecureEmail(emailAddress: string, subject: string, message: string): string {
|
|
2
|
+
/**
|
|
3
|
+
* Sends an email to the provided address (DEMO-ONLY, LELeaks PII TO STDOUT AND RESPONSES).
|
|
4
|
+
*
|
|
5
|
+
* This tool demonstrates the Just-In-Time Detokenization in action.
|
|
6
|
+
*/
|
|
7
|
+
console.log("\n[tool execution] smtplib email sender");
|
|
8
|
+
console.log(`Executing API request to send email to:\n----> ${emailAddress} <----`);
|
|
9
|
+
console.log(`Subject: ${subject}`);
|
|
10
|
+
console.log(`Body: ${message}\n`);
|
|
11
|
+
|
|
12
|
+
return `Successfully sent email to ${emailAddress} with subject: '${subject}'`;
|
|
13
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import * as path from 'path';
|
|
2
|
+
import { aencode } from '../src/index';
|
|
3
|
+
import { decryptBeforeTool } from '../src/integrations/adk_hooks';
|
|
4
|
+
import { sendSecureEmail } from './secure_vault/email_tool';
|
|
5
|
+
|
|
6
|
+
class MockTool {
|
|
7
|
+
name = "send_secure_email";
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
class MockToolContext {
|
|
11
|
+
agent_name = "secure_data_assistant";
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
async function runDemo() {
|
|
15
|
+
console.log("\nStarting Mask JIT Micro-Vault detokenization demo (NON-PRODUCTION)...");
|
|
16
|
+
|
|
17
|
+
// 1. The local application generates a token for the user's email
|
|
18
|
+
const realEmail = "user1@example.com";
|
|
19
|
+
const secureToken = await aencode(realEmail);
|
|
20
|
+
|
|
21
|
+
console.log("\n[app] Intercepted PII. Storing in Micro-Vault...");
|
|
22
|
+
console.log(`[app] Vault mapping: ${secureToken} -> ${realEmail}`);
|
|
23
|
+
|
|
24
|
+
// 2. We pass ONLY the token to the LLM
|
|
25
|
+
console.log(`\n[mask -> llm] Passing tokenized context to LLM:`);
|
|
26
|
+
console.log(` Context: {'user:email': '${secureToken}'}`);
|
|
27
|
+
|
|
28
|
+
// 3. Simulate the LLM deciding to call the tool with the token
|
|
29
|
+
console.log("\n[llm -> mask] LLM reasoned successfully. Calling tool `send_secure_email` with tokenized argument...");
|
|
30
|
+
const llmToolCallArgs: any = {
|
|
31
|
+
"emailAddress": secureToken,
|
|
32
|
+
"subject": "Welcome to Mask!",
|
|
33
|
+
"message": "Your Micro-Vault architecture is secure."
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
// 4. Mask PRE-HOOK intercepts the tool call BEFORE execution
|
|
37
|
+
console.log("\n[mask jit detokenization hook]");
|
|
38
|
+
const mockTool = new MockTool();
|
|
39
|
+
const mockCtx = new MockToolContext();
|
|
40
|
+
|
|
41
|
+
await decryptBeforeTool(mockTool, llmToolCallArgs, mockCtx);
|
|
42
|
+
|
|
43
|
+
// 5. Execute the actual tool with the detokenized arguments
|
|
44
|
+
console.log("\n[system] Executing tool with detokenized payload (prints plaintext PII in this demo):");
|
|
45
|
+
sendSecureEmail(llmToolCallArgs.emailAddress, llmToolCallArgs.subject, llmToolCallArgs.message);
|
|
46
|
+
|
|
47
|
+
console.log("\nVerification complete: The LLM only saw the token, but the tool triggered with the plaintext.");
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
runDemo().catch(console.error);
|
package/jest.config.js
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "mask-privacy",
|
|
3
|
+
"version": "1.0.2",
|
|
4
|
+
"description": "",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"test": "jest",
|
|
8
|
+
"demo": "ts-node examples/test_agent.ts",
|
|
9
|
+
"build": "tsup",
|
|
10
|
+
"dev": "tsup --watch",
|
|
11
|
+
"lint": "tsc --noEmit"
|
|
12
|
+
},
|
|
13
|
+
"keywords": [],
|
|
14
|
+
"author": "",
|
|
15
|
+
"license": "ISC",
|
|
16
|
+
"type": "commonjs",
|
|
17
|
+
"devDependencies": {
|
|
18
|
+
"@aws-sdk/client-dynamodb": "^3.1012.0",
|
|
19
|
+
"@types/jest": "^30.0.0",
|
|
20
|
+
"@types/node": "^25.5.0",
|
|
21
|
+
"axios": "^1.13.6",
|
|
22
|
+
"better-sqlite3": "^12.8.0",
|
|
23
|
+
"fernet": "^0.3.3",
|
|
24
|
+
"ioredis": "^5.10.0",
|
|
25
|
+
"jest": "^30.3.0",
|
|
26
|
+
"memjs": "^1.3.2",
|
|
27
|
+
"ts-jest": "^29.4.6",
|
|
28
|
+
"ts-node": "^10.9.2",
|
|
29
|
+
"tsup": "^8.5.1",
|
|
30
|
+
"typescript": "^5.9.3"
|
|
31
|
+
},
|
|
32
|
+
"dependencies": {
|
|
33
|
+
"@aws-sdk/lib-dynamodb": "^3.1012.0",
|
|
34
|
+
"@langchain/core": "^1.1.33",
|
|
35
|
+
"llamaindex": "^0.12.1"
|
|
36
|
+
}
|
|
37
|
+
}
|
package/src/client.ts
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Explicit Client initialization for the Mask SDK.
|
|
3
|
+
*
|
|
4
|
+
* Provides MaskClient — a unified, explicitly-configured client that
|
|
5
|
+
* bundles vault, crypto, scanner, and audit logger into a single object.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { BaseVault, getVault, decode, encode, detokenizeText } from './core/vault';
|
|
9
|
+
import { CryptoEngine, getCryptoEngine } from './core/crypto';
|
|
10
|
+
import { PresidioScanner, getScanner } from './core/scanner';
|
|
11
|
+
import { generateFPEToken, looksLikeToken } from './core/fpe';
|
|
12
|
+
import { AuditLogger, getAuditLogger } from './telemetry/audit_logger';
|
|
13
|
+
|
|
14
|
+
export class MaskClient {
|
|
15
|
+
public vault: BaseVault;
|
|
16
|
+
public crypto: CryptoEngine;
|
|
17
|
+
public scanner: PresidioScanner;
|
|
18
|
+
public auditLogger: AuditLogger;
|
|
19
|
+
/** backward compat alias */
|
|
20
|
+
public logger: AuditLogger;
|
|
21
|
+
public ttl: number;
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Initialise the client with specific component instances.
|
|
25
|
+
*
|
|
26
|
+
* If an instance is not provided, the client will fall back to
|
|
27
|
+
* the standard environment-configured singleton for that component.
|
|
28
|
+
*/
|
|
29
|
+
constructor(options: {
|
|
30
|
+
vault?: BaseVault;
|
|
31
|
+
crypto?: CryptoEngine;
|
|
32
|
+
scanner?: PresidioScanner;
|
|
33
|
+
auditLogger?: AuditLogger;
|
|
34
|
+
ttl?: number;
|
|
35
|
+
} = {}) {
|
|
36
|
+
this.vault = options.vault || getVault();
|
|
37
|
+
this.crypto = options.crypto || getCryptoEngine();
|
|
38
|
+
this.scanner = options.scanner || getScanner();
|
|
39
|
+
this.auditLogger = options.auditLogger || getAuditLogger();
|
|
40
|
+
this.logger = this.auditLogger;
|
|
41
|
+
this.ttl = options.ttl || 600;
|
|
42
|
+
|
|
43
|
+
// Ensure the audit logger is running
|
|
44
|
+
this.auditLogger.start();
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Tokenise rawText, encrypt it, and store it in the vault.
|
|
49
|
+
*
|
|
50
|
+
* Includes deduplication: if the same plaintext has been encoded
|
|
51
|
+
* before and the token is still active, the existing token is returned.
|
|
52
|
+
*/
|
|
53
|
+
async encode(rawText: string): Promise<string> {
|
|
54
|
+
// Token Guard: never re-encode a value that is already a Mask token
|
|
55
|
+
if (looksLikeToken(rawText)) {
|
|
56
|
+
return rawText;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Normalise whitespace so " Alice " and "Alice" share the same hash
|
|
60
|
+
const text = rawText.trim();
|
|
61
|
+
|
|
62
|
+
// 1. Deduplication check
|
|
63
|
+
// We'll use the vault methods directly here to match Python client logic
|
|
64
|
+
const cryptoSub = require('crypto');
|
|
65
|
+
const ptHash = cryptoSub.createHash('sha256').update(text, 'utf-8').digest('hex');
|
|
66
|
+
|
|
67
|
+
const existingToken = await this.vault.getTokenByPlaintextHash(ptHash);
|
|
68
|
+
if (existingToken && (await this.vault.retrieve(existingToken)) !== null) {
|
|
69
|
+
this.logger.log("encode", existingToken, "opaque");
|
|
70
|
+
return existingToken;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// 2. Generate deterministic token
|
|
74
|
+
const token = generateFPEToken(text);
|
|
75
|
+
|
|
76
|
+
// 3. Encrypt
|
|
77
|
+
const ciphertext = this.crypto.encrypt(text);
|
|
78
|
+
|
|
79
|
+
// 4. Store with reverse lookup hash
|
|
80
|
+
await this.vault.store(token, ciphertext, this.ttl, ptHash);
|
|
81
|
+
|
|
82
|
+
this.logger.log("encode", token, "opaque");
|
|
83
|
+
return token;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/** Retrieve token from vault and decrypt it. */
|
|
87
|
+
async decode(token: string): Promise<string> {
|
|
88
|
+
const ciphertext = await this.vault.retrieve(token);
|
|
89
|
+
if (ciphertext === null) {
|
|
90
|
+
this.logger.log("expired", token, "opaque");
|
|
91
|
+
return token;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
const plaintext = this.crypto.decrypt(ciphertext);
|
|
96
|
+
this.logger.log("decode", token, "opaque");
|
|
97
|
+
return plaintext;
|
|
98
|
+
} catch (e) {
|
|
99
|
+
this.logger.log("error", token, "opaque", "decryption_failed");
|
|
100
|
+
return token;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Scan text using the Waterfall pipeline and replace PII with FPE tokens. */
|
|
105
|
+
async scanAndTokenize(text: string): Promise<string> {
|
|
106
|
+
return await this.scanner.scanAndTokenize(text, {
|
|
107
|
+
encodeFn: (val) => this.encode(val)
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** Async wrapper for encode (parity with Python aencode). */
|
|
112
|
+
async aencode(rawText: string): Promise<string> {
|
|
113
|
+
return await this.encode(rawText);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/** Async wrapper for decode (parity with Python adecode). */
|
|
117
|
+
async adecode(token: string): Promise<string> {
|
|
118
|
+
return await this.decode(token);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/** Async wrapper for scanAndTokenize (parity with Python ascan_and_tokenize). */
|
|
122
|
+
async ascanAndTokenize(text: string): Promise<string> {
|
|
123
|
+
return await this.scanAndTokenize(text);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/** Find and replace all tokens within text with their plaintext. */
|
|
127
|
+
async detokenizeText(text: string): Promise<string> {
|
|
128
|
+
return await detokenizeText(text);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/** Async wrapper for detokenizeText (parity with Python adetokenize_text). */
|
|
132
|
+
async adetokenizeText(text: string): Promise<string> {
|
|
133
|
+
return await this.detokenizeText(text);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core cryptography engine for Mask SDK.
|
|
3
|
+
*
|
|
4
|
+
* Provides a CryptoEngine singleton that handles Envelope Encryption,
|
|
5
|
+
* ensuring that plaintext PII is encrypted locally before being
|
|
6
|
+
* transmitted and stored in distributed vaults (Redis/Memcached/DynamoDB).
|
|
7
|
+
*
|
|
8
|
+
* Requires MASK_ENCRYPTION_KEY to be set in the environment.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import * as process from 'process';
|
|
12
|
+
import { getKeyProvider } from './key_provider';
|
|
13
|
+
|
|
14
|
+
const fernet = require('fernet');
|
|
15
|
+
const cryptoNode = require('crypto');
|
|
16
|
+
import { MaskDecryptionError } from './exceptions';
|
|
17
|
+
|
|
18
|
+
export class CryptoEngine {
|
|
19
|
+
private static _instance: CryptoEngine | null = null;
|
|
20
|
+
private _fernet: any;
|
|
21
|
+
|
|
22
|
+
private constructor() {
|
|
23
|
+
this._init();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
public static getInstance(): CryptoEngine {
|
|
27
|
+
if (this._instance === null) {
|
|
28
|
+
this._instance = new CryptoEngine();
|
|
29
|
+
}
|
|
30
|
+
return this._instance;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** Clear the singleton instance to force re-initialization (useful for key rotation). */
|
|
34
|
+
public static reset(): void {
|
|
35
|
+
this._instance = null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
private _init(): void {
|
|
39
|
+
/**
|
|
40
|
+
* Initialize the underlying Fernet engine.
|
|
41
|
+
*
|
|
42
|
+
* The encryption key is retrieved from the active KeyProvider.
|
|
43
|
+
* If no key is available, a throwaway key is auto-generated for
|
|
44
|
+
* local/test/demo use.
|
|
45
|
+
*/
|
|
46
|
+
const keyFromProvider = getKeyProvider().getEncryptionKey();
|
|
47
|
+
let key: string;
|
|
48
|
+
if (!keyFromProvider) {
|
|
49
|
+
key = cryptoNode.randomBytes(32).toString('base64');
|
|
50
|
+
process.env.MASK_ENCRYPTION_KEY = key;
|
|
51
|
+
console.warn(
|
|
52
|
+
"MASK_ENCRYPTION_KEY not set. Using a generated throwaway key. DO NOT USE THIS IN PRODUCTION."
|
|
53
|
+
);
|
|
54
|
+
} else {
|
|
55
|
+
key = keyFromProvider;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
try {
|
|
59
|
+
// fernet Secret expects a base64 encoded string
|
|
60
|
+
const secret = new fernet.Secret(key);
|
|
61
|
+
this._fernet = secret;
|
|
62
|
+
} catch (e) {
|
|
63
|
+
throw new Error(
|
|
64
|
+
"Invalid MASK_ENCRYPTION_KEY. Must be a valid url-safe base64-encoded " +
|
|
65
|
+
"Fernet key."
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
public encrypt(plaintext: string): string {
|
|
71
|
+
/** Encrypt plaintext into a url-safe base64 string. */
|
|
72
|
+
const token = new fernet.Token({
|
|
73
|
+
secret: this._fernet,
|
|
74
|
+
time: Date.now(),
|
|
75
|
+
iv: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] // Placeholder if needed, but fernet generates its own
|
|
76
|
+
});
|
|
77
|
+
// The fernet npm package encode returns a string
|
|
78
|
+
return token.encode(plaintext);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
public decrypt(ciphertext: string): string {
|
|
82
|
+
/** Decrypt url-safe base64 ciphertext back to plaintext. */
|
|
83
|
+
try {
|
|
84
|
+
const token = new fernet.Token({
|
|
85
|
+
secret: this._fernet,
|
|
86
|
+
token: ciphertext,
|
|
87
|
+
ttl: 0 // No TTL check by default to match Python's Fernet default
|
|
88
|
+
});
|
|
89
|
+
return token.decode();
|
|
90
|
+
} catch (e) {
|
|
91
|
+
console.error("Failed to decrypt vault payload. Check your MASK_ENCRYPTION_KEY.");
|
|
92
|
+
throw new MaskDecryptionError("Decryption failed");
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/** Return the configured crypto engine singleton. */
|
|
98
|
+
export function getCryptoEngine(): CryptoEngine {
|
|
99
|
+
return CryptoEngine.getInstance();
|
|
100
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Custom exception hierarchy for the Mask SDK.
|
|
3
|
+
*
|
|
4
|
+
* Provides specific exceptions so callers can implement targeted
|
|
5
|
+
* retry/fallback logic instead of catching generic Error.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export class MaskError extends Error {
|
|
9
|
+
constructor(message: string) {
|
|
10
|
+
super(message);
|
|
11
|
+
this.name = this.constructor.name;
|
|
12
|
+
Object.setPrototypeOf(this, new.target.prototype);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/** Raised when a vault backend (Redis, DynamoDB) is unreachable. */
|
|
17
|
+
export class MaskVaultConnectionError extends MaskError {}
|
|
18
|
+
|
|
19
|
+
/** Raised when CryptoEngine.decrypt() fails (bad key, corrupt data). */
|
|
20
|
+
export class MaskDecryptionError extends MaskError {}
|
|
21
|
+
|
|
22
|
+
/** Raised when spaCy / Presidio analysis exceeds the time budget. */
|
|
23
|
+
export class MaskNLPTimeout extends MaskError {}
|
package/src/core/fpe.ts
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Format-Preserving Encryption (FPE) token generation.
|
|
3
|
+
*
|
|
4
|
+
* Generates structurally valid, **deterministic** tokens that preserve the
|
|
5
|
+
* format of the original data type so downstream tools, schemas, and
|
|
6
|
+
* validators continue to work without modification.
|
|
7
|
+
*
|
|
8
|
+
* Determinism is achieved via HMAC-SHA256 keyed with a master key, ensuring
|
|
9
|
+
* the same plaintext always produces the same token. This preserves entity
|
|
10
|
+
* relationships for LLMs (e.g. "John" is always [TKN-abc]) without leaking
|
|
11
|
+
* the identity.
|
|
12
|
+
*
|
|
13
|
+
* Supported formats:
|
|
14
|
+
* - Email → tkn-<hex>@email.com
|
|
15
|
+
* - Phone → +1-555-<7 digits>
|
|
16
|
+
* - SSN → 000-00-<4 digits>
|
|
17
|
+
* - CC → 4000-0000-0000-<4 digits>
|
|
18
|
+
* - Routing→ 000000<3 digits>
|
|
19
|
+
* - Default→ [TKN-<hex>]
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import * as crypto from 'crypto';
|
|
23
|
+
import * as process from 'process';
|
|
24
|
+
import { getKeyProvider } from './key_provider';
|
|
25
|
+
|
|
26
|
+
// Master key management
|
|
27
|
+
|
|
28
|
+
let _masterKey: Buffer | null = null;
|
|
29
|
+
|
|
30
|
+
/** Return the HMAC master key, lazily initialised from the key provider. */
|
|
31
|
+
function _getMasterKey(): Buffer {
|
|
32
|
+
if (_masterKey === null) {
|
|
33
|
+
let raw = getKeyProvider().getMasterKey() || "";
|
|
34
|
+
if (!raw) {
|
|
35
|
+
// Auto-generate a session-local key (non-persistent)
|
|
36
|
+
raw = crypto.randomBytes(32).toString('hex');
|
|
37
|
+
process.env.MASK_MASTER_KEY = raw;
|
|
38
|
+
console.warn(
|
|
39
|
+
"MASK_MASTER_KEY not set. Using an ephemeral session key. " +
|
|
40
|
+
"Tokens will NOT be reproducible across process restarts."
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
_masterKey = Buffer.from(raw, 'utf-8');
|
|
44
|
+
}
|
|
45
|
+
return _masterKey;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Clear the cached master key. Useful in tests. */
|
|
49
|
+
export function resetMasterKey(): void {
|
|
50
|
+
_masterKey = null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Detectors — order matters: first match wins
|
|
54
|
+
|
|
55
|
+
const _EMAIL_RE = /^[^@\s]+@[^@\s]+\.[^@\s]+$/;
|
|
56
|
+
const _PHONE_RE = /^\+?1?[\s\-.]?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}$|^\d{3}[\s\-.]?\d{4}$/;
|
|
57
|
+
const _SSN_RE = /^\d{3}-\d{2}-\d{4}$/;
|
|
58
|
+
const _CC_RE = /^(?:\d{4}[ \-]?){3}\d{4}$/;
|
|
59
|
+
const _ROUTING_RE = /^\d{9}$/;
|
|
60
|
+
|
|
61
|
+
// Deterministic helpers (HMAC-based)
|
|
62
|
+
|
|
63
|
+
/** Return *n* deterministic hex characters derived from HMAC(key, plaintext). */
|
|
64
|
+
function _hmacHex(plaintext: string, n: number = 8): string {
|
|
65
|
+
const digest = crypto
|
|
66
|
+
.createHmac('sha256', _getMasterKey())
|
|
67
|
+
.update(plaintext, 'utf-8')
|
|
68
|
+
.digest('hex');
|
|
69
|
+
return digest.slice(0, n);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Return *n* deterministic decimal digits derived from HMAC(key, plaintext). */
|
|
73
|
+
function _hmacDigits(plaintext: string, n: number, offset: number = 0): string {
|
|
74
|
+
const digest = crypto
|
|
75
|
+
.createHmac('sha256', _getMasterKey())
|
|
76
|
+
.update(plaintext, 'utf-8')
|
|
77
|
+
.digest('hex');
|
|
78
|
+
|
|
79
|
+
// Convert hex nibbles to digits via modulo-10
|
|
80
|
+
const result: string[] = [];
|
|
81
|
+
for (let i = offset; i < digest.length; i++) {
|
|
82
|
+
const ch = digest[i];
|
|
83
|
+
result.push((parseInt(ch, 16) % 10).toString());
|
|
84
|
+
if (result.length === n) {
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Safety: pad with zeros if digest is too short (shouldn't happen for SHA-256)
|
|
90
|
+
while (result.length < n) {
|
|
91
|
+
result.push("0");
|
|
92
|
+
}
|
|
93
|
+
return result.join("");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Public API
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Return a **deterministic**, format-preserving token for rawText.
|
|
100
|
+
*
|
|
101
|
+
* The token is structurally compatible with the original data type
|
|
102
|
+
* so that downstream schema validators, regex checks, and database
|
|
103
|
+
* constraints continue to pass.
|
|
104
|
+
*/
|
|
105
|
+
export function generateFPEToken(rawText: string): string {
|
|
106
|
+
const text = rawText.trim();
|
|
107
|
+
|
|
108
|
+
if (_EMAIL_RE.test(text)) {
|
|
109
|
+
return `tkn-${_hmacHex(text)}@email.com`;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (_PHONE_RE.test(text)) {
|
|
113
|
+
return `+1-555-${_hmacDigits(text, 7)}`;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (_SSN_RE.test(text)) {
|
|
117
|
+
return `000-00-${_hmacDigits(text, 4)}`;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Standard 16-digit credit card (format: 4000-0000-0000-XXXX)
|
|
121
|
+
if (_CC_RE.test(text)) {
|
|
122
|
+
return `4000-0000-0000-${_hmacDigits(text, 4)}`;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// US ABA Routing Number (format: 000000XXX)
|
|
126
|
+
if (_ROUTING_RE.test(text)) {
|
|
127
|
+
return `000000${_hmacDigits(text, 3)}`;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Opaque fallback
|
|
131
|
+
return `[TKN-${_hmacHex(text)}]`;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Regex that matches ANY valid Mask token.
|
|
136
|
+
* Used for sub-string detokenization (finding tokens inside paragraphs).
|
|
137
|
+
*/
|
|
138
|
+
export const TOKEN_PATTERN = new RegExp(
|
|
139
|
+
"tkn-[a-f0-9]{8,64}@email\\.com" + // Email
|
|
140
|
+
"|\\+1-555-\\d{7}" + // Phone
|
|
141
|
+
"|000-00-\\d{4}" + // SSN
|
|
142
|
+
"|4000-0000-0000-\\d{4}" + // CC
|
|
143
|
+
"|000000\\d{3}" + // Routing
|
|
144
|
+
"|\\[TKN-[a-f0-9]{8,64}\\]", // Opaque
|
|
145
|
+
"g"
|
|
146
|
+
);
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Heuristic: return true if value appears to be a Mask token.
|
|
150
|
+
*/
|
|
151
|
+
export function looksLikeToken(value: string): boolean {
|
|
152
|
+
const v = value.trim();
|
|
153
|
+
|
|
154
|
+
// Email tokens: tkn-<hex>@email.com
|
|
155
|
+
if (v.startsWith("tkn-") && v.endsWith("@email.com")) {
|
|
156
|
+
return true;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Phone tokens: +1-555-XXXXXXX (555 is the standard fictional exchange)
|
|
160
|
+
if (v.startsWith("+1-555-") && v.length === 14) {
|
|
161
|
+
return true;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// SSN tokens: 000-00-XXXX (area 000 is never assigned)
|
|
165
|
+
if (v.startsWith("000-00-") && v.length === 11 && /^\d+$/.test(v.slice(7))) {
|
|
166
|
+
return true;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Credit card tokens: 4000-0000-0000-XXXX (reserved test BIN)
|
|
170
|
+
if (v.startsWith("4000-0000-0000-") && v.length === 19 && /^\d+$/.test(v.slice(15))) {
|
|
171
|
+
return true;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Routing tokens: 000000XXX (invalid Fed symbol 0000)
|
|
175
|
+
if (v.startsWith("000000") && v.length === 9 && /^\d+$/.test(v.slice(6))) {
|
|
176
|
+
return true;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Opaque fallback tokens: [TKN-<hex>]
|
|
180
|
+
if (v.startsWith("[TKN-") && v.endsWith("]")) {
|
|
181
|
+
return true;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return false;
|
|
185
|
+
}
|