mask-privacy 4.0.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/core/fpe.ts CHANGED
@@ -1,15 +1,12 @@
1
1
  /**
2
- * Format-Preserving Encryption (FPE) token generation.
3
- *
4
- * Generates structurally valid, **deterministic** tokens that preserve the
5
- * format of the original data type so downstream tools, schemas, and
6
- * validators continue to work without modification.
2
+ * Deterministic Pseudonymization (DP) token generation using NIST SP 800-38G FF1.
7
3
  */
8
4
 
9
5
  import * as crypto from 'crypto';
10
6
  import { config } from '../config';
11
7
  import { getKeyProvider } from './key_provider';
12
8
  import { MaskSecurityError } from './exceptions';
9
+ import { FF1 } from './ff1';
13
10
  import {
14
11
  FIRST_NAMES as _BIJECTIVE_NAMES,
15
12
  CONNECTORS as _BIJECTIVE_CONNECTORS,
@@ -18,32 +15,23 @@ import {
18
15
  SYLLABLES as _BIJECTIVE_SYLLABLES
19
16
  } from './synthesisLibrary';
20
17
 
21
-
22
- // Master key management
23
-
24
18
  let _masterKey: Buffer | null = null;
25
19
 
26
- /** Return the HMAC master key, lazily initialised from the key provider. */
27
20
  async function _getMasterKey(): Promise<Buffer> {
28
21
  if (_masterKey === null) {
29
22
  const provider = getKeyProvider();
30
23
  let raw = await provider.getMasterKey();
31
24
 
32
25
  if (!raw) {
33
- // Fallback to encryption key if no master key is set
34
26
  raw = await provider.getEncryptionKey() || "";
35
27
  }
36
28
 
37
29
  if (!raw) {
38
30
  if (config.MASK_DEV_MODE) {
39
- // Auto-generate a session-local key (non-persistent)
40
31
  raw = crypto.randomBytes(32).toString('hex');
41
- // Update process.env for any other legacy paths that might check it
42
32
  process.env.MASK_MASTER_KEY = raw;
43
33
  } else {
44
- throw new MaskSecurityError(
45
- "MASK_MASTER_KEY not set. Set it or use MASK_DEV_MODE=true for dev."
46
- );
34
+ throw new MaskSecurityError("MASK_MASTER_KEY not set.");
47
35
  }
48
36
  }
49
37
  _masterKey = Buffer.from(raw, 'utf-8');
@@ -51,149 +39,71 @@ async function _getMasterKey(): Promise<Buffer> {
51
39
  return _masterKey;
52
40
  }
53
41
 
54
- /** Clear the cached master key. Useful in tests. */
55
42
  export function resetMasterKey(): void {
56
43
  _masterKey = null;
57
44
  }
58
45
 
59
- // Detectors — order matters: first match wins
46
+ async function _getAesKey(): Promise<Buffer> {
47
+ // Salt the derivation with the tenant ID to guarantee per-tenant FF1
48
+ // uniqueness — two tenants with the same plaintext must never produce
49
+ // the same FPE token (cross-tenant collision prevention).
50
+ const masterKey = await _getMasterKey();
51
+ return crypto.createHmac('sha256', masterKey).update(config.MASK_TENANT_ID, 'utf-8').digest();
52
+ }
60
53
 
61
54
  const _EMAIL_RE = /^[^@\s]+@[^@\s]+\.[^@\s]+$/;
62
55
  const _PHONE_RE = /(?<!\d)(?:\+?1?[\s\-.]?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}|\d{3}[\s\-.]?\d{4}|\+\d{2,3}[\s\-.]?\d{3}[\s\-.]?\d{3}[\s\-.]?\d{3,4})(?!\d)/;
63
- const _PHONE_INTL_RE = /(?<!\d)\+(?:[1-9]\d{0,3})[-.\s]?\(?\d{1,5}\)?(?:[-.\s]?\d{2,4}){2,4}(?!\d)/;
64
56
  const _SSN_RE = /^\d{3}-\d{2}-\d{4}$/;
65
57
  const _CC_RE = /^(?:\d{4}[ \-]?){3}\d{4}$/;
66
58
  const _ROUTING_RE = /^\d{9}$/;
67
59
  const _ES_ID_RE = /^(?:\d{8}[A-Z]|[XYZ]\d{7}[A-Z])$/;
68
60
  const _IBAN_RE = /^[A-Z]{2}\d{2}[A-Z0-9]{4,30}$/;
69
61
 
70
- // Deterministic helpers (HMAC-based)
71
-
72
- /** Return *n* deterministic hex characters derived from HMAC(key, plaintext). */
73
62
  async function _hmacHex(plaintext: string, n: number = 8): Promise<string> {
74
63
  const masterKey = await _getMasterKey();
75
- const digest = crypto
76
- .createHmac('sha256', masterKey)
77
- .update(plaintext, 'utf-8')
78
- .digest('hex');
64
+ const digest = crypto.createHmac('sha256', masterKey).update(plaintext, 'utf-8').digest('hex');
79
65
  return digest.slice(0, n);
80
66
  }
81
67
 
82
- /**
83
- * Return a deterministic 128-bit BigInt from HMAC(key, plaintext).
84
- *
85
- * Uses the first 16 bytes (128 bits) of the SHA-256 HMAC digest,
86
- * providing a namespace of 2^128 (~3.4 × 10^38). This replaces the
87
- * old nibble-by-nibble modulo-10 approach which suffered from severe
88
- * distribution bias in short fields (3-4 digits).
89
- */
90
- async function _hmacInt(plaintext: string): Promise<bigint> {
91
- const masterKey = await _getMasterKey();
92
- const raw = crypto
93
- .createHmac('sha256', masterKey)
94
- .update(plaintext, 'utf-8')
95
- .digest();
96
- // Read first 16 bytes as a big-endian unsigned integer
97
- let result = 0n;
98
- for (let i = 0; i < 16; i++) {
99
- result = (result << 8n) | BigInt(raw[i]);
100
- }
101
- return result;
102
- }
103
-
104
- /**
105
- * Return *n* deterministic decimal digits from HMAC(key, plaintext).
106
- *
107
- * Uses full-integer division of a 128-bit HMAC-derived seed instead of
108
- * per-nibble modulo-10, which eliminates the distribution bias that
109
- * caused collisions in short numeric fields (routing numbers, SSN
110
- * suffixes). The offset parameter salts the input to derive
111
- * independent digit sequences from the same plaintext.
112
- */
113
- async function _hmacDigits(plaintext: string, n: number, offset: number = 0): Promise<string> {
114
- const salted = offset ? `${plaintext}::${offset}` : plaintext;
115
- const seed = await _hmacInt(salted);
116
- const modulus = 10n ** BigInt(n);
117
- return (seed % modulus).toString().padStart(n, '0');
118
- }
119
-
120
68
  // ── Bijective Synthesis Engine ─────────────────────────────────────────────
121
69
 
122
- export class FF1 {
123
- /** NIST SP 800-38G FF1 implementation (simplified for 64-bit domains). */
124
- constructor(private key: Buffer, private tweak: Buffer) {}
125
-
126
- encrypt(n: bigint): bigint {
127
- /** Encrypts 64-bit bigint n using FF1 (10 rounds). */
128
- let A = n >> 32n;
129
- let B = n & 0xFFFFFFFFn;
130
- const radix = 2n ** 32n;
131
-
132
- for (let i = 0; i < 10; i++) {
133
- const tweakInfoBuffer = Buffer.alloc(8);
134
- tweakInfoBuffer.writeUInt32BE(i, 0);
135
- tweakInfoBuffer.writeUInt32BE(Number(B), 4);
136
- const tweakInfoCombined = Buffer.concat([this.tweak, tweakInfoBuffer]);
137
-
138
- const h = crypto.createHmac('sha256', this.key)
139
- .update(tweakInfoCombined)
140
- .digest();
141
-
142
- const roundVal = BigInt(h.readUInt32BE(0));
143
-
144
- const Anext = B;
145
- const Bnext = (A + roundVal) % radix;
146
- A = Anext;
147
- B = Bnext;
148
- }
149
-
150
- return (A << 32n) | B;
151
- }
152
-
153
- decrypt(n: bigint): bigint {
154
- /** Decrypts 64-bit bigint n using FF1 (10 rounds in reverse). */
155
- let A = n >> 32n;
156
- let B = n & 0xFFFFFFFFn;
157
- const radix = 2n ** 32n;
158
-
159
- for (let i = 9; i >= 0; i--) {
160
- const tweakInfoBuffer = Buffer.alloc(8);
161
- tweakInfoBuffer.writeUInt32BE(i, 0);
162
- tweakInfoBuffer.writeUInt32BE(Number(A), 4);
163
- const tweakInfoCombined = Buffer.concat([this.tweak, tweakInfoBuffer]);
164
-
165
- const h = crypto.createHmac('sha256', this.key)
166
- .update(tweakInfoCombined)
167
- .digest();
168
-
169
- const roundVal = BigInt(h.readUInt32BE(0));
170
-
171
- const Bprev = A;
172
- const Aprev = (B - roundVal + radix) % radix;
173
- A = Aprev;
174
- B = Bprev;
175
- }
176
-
177
- return (A << 32n) | B;
178
- }
179
- }
180
-
181
70
  async function _getBijectiveTweak(): Promise<Buffer> {
182
- const masterKey = await _getMasterKey();
183
- let base = config.MASK_TENANT_ID;
71
+ /**
72
+ * Derive the FF1 tweak deterministically from the tenant ID.
73
+ *
74
+ * IMPORTANT: The tweak is intentionally time-independent. Historical use of
75
+ * MASK_SALT_ROTATION (MONTHLY/YEARLY) caused permanent data loss when the
76
+ * calendar rolled over because old tokens could no longer be re-derived.
77
+ * Use MASK_KEYRING for key rotation instead; MASK_SALT_ROTATION is now a
78
+ * no-op and will emit a console.warn if set to a non-NONE value.
79
+ */
184
80
  if (config.MASK_SALT_ROTATION !== 'NONE') {
185
- const now = new Date();
186
- if (config.MASK_SALT_ROTATION === 'MONTHLY') {
187
- base += `-${now.getUTCFullYear()}-${now.getUTCMonth() + 1}`;
188
- } else if (config.MASK_SALT_ROTATION === 'YEARLY') {
189
- base += `-${now.getUTCFullYear()}`;
190
- }
81
+ console.warn(
82
+ `[mask] MASK_SALT_ROTATION=${config.MASK_SALT_ROTATION} is deprecated and ignored. ` +
83
+ 'Time-based tweaks caused permanent data loss on month/year rollovers. ' +
84
+ 'Use MASK_KEYRING for key rotation instead.'
85
+ );
191
86
  }
192
- return crypto.createHmac('sha256', masterKey).update(base, 'utf-8').digest();
87
+ const masterKey = await _getMasterKey();
88
+ return crypto.createHmac('sha256', masterKey).update(config.MASK_TENANT_ID, 'utf-8').digest();
89
+ }
90
+
91
+ async function _encryptBijectiveFF1(text: string): Promise<bigint> {
92
+ const canonical = text.toLowerCase().trim();
93
+ const hash = crypto.createHash('sha256').update(canonical, 'utf-8').digest();
94
+ // Hash to 64-bit int, then to 20-digit string
95
+ const inputInt = hash.readBigUInt64BE(0);
96
+ const inputStr = inputInt.toString().padStart(20, '0');
97
+
98
+ const aesKey = await _getAesKey();
99
+ const tweak = await _getBijectiveTweak();
100
+ const engine = new FF1(aesKey, tweak, 10);
101
+
102
+ const cipherStr = engine.encrypt(inputStr);
103
+ return BigInt(cipherStr) % (2n ** 64n);
193
104
  }
194
105
 
195
106
  function _renderBijectivePerson(bits: bigint): string {
196
- /** Render a 64-bit cipher into a human-readable name (Bijective Synthesis). */
197
107
  const firstIdx = Number(bits & 0x7FFn); // 11 bits (2048)
198
108
  const connIdx = Number((bits >> 11n) & 0x3Fn); // 6 bits (64)
199
109
  const rootIdx = Number((bits >> 17n) & 0xFFFn); // 12 bits (4096)
@@ -210,7 +120,6 @@ function _renderBijectivePerson(bits: bigint): string {
210
120
 
211
121
  const paddedNumeric = numeric.toString().padStart(4, '0');
212
122
 
213
- // Format Shuffle
214
123
  if (formatIdx === 0) return `${first} ${conn} ${surname}-${paddedNumeric}`;
215
124
  if (formatIdx === 1) return `${surname}, ${first}-${paddedNumeric}`;
216
125
  if (formatIdx === 2) return `${first[0]}. ${surname}-${paddedNumeric}`;
@@ -220,7 +129,6 @@ function _renderBijectivePerson(bits: bigint): string {
220
129
  }
221
130
 
222
131
  function _renderBijectiveLocation(bits: bigint): string {
223
- /** Render a 64-bit cipher into a bijective location name. */
224
132
  const s1 = Number(bits & 0x3FFn);
225
133
  const s2 = Number((bits >> 10n) & 0x3FFn);
226
134
  const s3 = Number((bits >> 20n) & 0x3FFn);
@@ -230,17 +138,6 @@ function _renderBijectiveLocation(bits: bigint): string {
230
138
  return `${city}-${tag.toString().padStart(3, '0')}`;
231
139
  }
232
140
 
233
- // ── Legacy Semantic Token Banks (Redirected in Bijective Mode) ──────────────
234
- // Seed lists are imported from semanticBanks.ts, maintaining architecture
235
- // parity with python/semantic_banks.py
236
-
237
- /** Return a deterministic item from an array using full 128-bit entropy. */
238
- async function _pickFromArray(plaintext: string, array: string[]): Promise<string> {
239
- const seed = await _hmacInt(plaintext);
240
- return array[Number(seed % BigInt(array.length))];
241
- }
242
-
243
- /** Compute Luhn check digit */
244
141
  function _computeLuhnDigit(partialNum: string): string {
245
142
  const digits = partialNum.split("").map(Number);
246
143
  let sum = 0;
@@ -257,18 +154,15 @@ function _computeLuhnDigit(partialNum: string): string {
257
154
  return ((10 - (sum % 10)) % 10).toString();
258
155
  }
259
156
 
260
-
261
-
262
157
  function _computeEsIdCheck(num: number): string {
263
158
  return "TRWAGMYFPDXBNJZSQVHLCKE"[num % 23];
264
159
  }
265
160
 
266
- // Public API
161
+ function _stripCcSeparators(text: string): string {
162
+ return text.replace(/[\s\-]/g, '');
163
+ }
267
164
 
268
- /**
269
- * Return a **deterministic**, format-preserving token for rawText using its entityType.
270
- */
271
- export async function generateFPEToken(rawText: string, entityType: string = 'UNKNOWN'): Promise<string> {
165
+ export async function generateDPToken(rawText: string, entityType: string = 'UNKNOWN'): Promise<string> {
272
166
  const text = rawText.trim();
273
167
  let type = (entityType || "UNKNOWN").toUpperCase();
274
168
 
@@ -291,22 +185,52 @@ export async function generateFPEToken(rawText: string, entityType: string = 'UN
291
185
  if (type === "PHONE_NUMBER" || type === "PHONE_NUM" || type === "PHONE_NUM_INTL") {
292
186
  const m = text.match(/^\+([1-9]\d{0,3})/);
293
187
  const cc = m ? m[1] : "1";
294
- return `+${cc}-555-${await _hmacDigits(text, 7)}`;
188
+ const digits = text.replace(/\D/g, "");
189
+ if (digits.length >= 7) {
190
+ const last7 = digits.slice(-7);
191
+ const engine = new FF1(await _getAesKey(), Buffer.from("PHONE"), 10);
192
+ const enc = engine.encrypt(last7);
193
+ return `+${cc}-555-${enc}`;
194
+ }
295
195
  }
296
196
 
297
197
  if (type === "US_SSN") {
298
- return `000-00-${await _hmacDigits(text, 4)}`;
198
+ const digits = text.replace(/-/g, "");
199
+ if (digits.length === 9) {
200
+ const engine = new FF1(await _getAesKey(), Buffer.from("US_SSN"), 10);
201
+ const enc = engine.encrypt(digits);
202
+ return `${enc.slice(0,3)}-${enc.slice(3,5)}-${enc.slice(5,9)}`;
203
+ }
299
204
  }
300
205
 
301
206
  if (type === "CREDIT_CARD" || type === "CREDIT_CARD_NUMBER") {
302
- const base = `400000000000${await _hmacDigits(text, 3)}`;
303
- const checkDig = _computeLuhnDigit(base);
304
- const full = base + checkDig;
305
- return `${full.slice(0,4)}-${full.slice(4,8)}-${full.slice(8,12)}-${full.slice(12,16)}`;
207
+ const digits = _stripCcSeparators(text);
208
+ if (digits.length === 16) {
209
+ const bin6 = digits.slice(0, 6);
210
+ const last4 = digits.slice(12, 16);
211
+ const middle6 = digits.slice(6, 12);
212
+
213
+ const engine = new FF1(await _getAesKey(), Buffer.from("CREDIT_CARD"), 10);
214
+ const encMiddle = engine.encrypt(middle6);
215
+
216
+ const base15 = bin6 + encMiddle + last4.slice(0, 3);
217
+ const checkDig = _computeLuhnDigit(base15);
218
+ const full = bin6 + encMiddle + last4.slice(0, 3) + checkDig;
219
+ return `${full.slice(0, 4)}-${full.slice(4, 8)}-${full.slice(8, 12)}-${full.slice(12, 16)}`;
220
+ } else {
221
+ const fallbackDigits = digits.padEnd(16, '0').slice(0, 16);
222
+ const engine = new FF1(await _getAesKey(), Buffer.from("CREDIT_CARD"), 10);
223
+ const encMiddle = engine.encrypt(fallbackDigits.slice(6, 12));
224
+ const full = fallbackDigits.slice(0, 6) + encMiddle + fallbackDigits.slice(12);
225
+ return `${full.slice(0, 4)}-${full.slice(4, 8)}-${full.slice(8, 12)}-${full.slice(12, 16)}`;
226
+ }
306
227
  }
307
228
 
308
229
  if (type === "US_ROUTING_NUMBER" || type === "US_ABA_ROUTING") {
309
- return `000000${await _hmacDigits(text, 3)}`;
230
+ if (text.length === 9 && /^\d+$/.test(text)) {
231
+ const engine = new FF1(await _getAesKey(), Buffer.from("US_ROUTING"), 10);
232
+ return engine.encrypt(text);
233
+ }
310
234
  }
311
235
 
312
236
  if (type === "INTL_BANK_IBAN" || type === "IBAN_CODE") {
@@ -315,31 +239,27 @@ export async function generateFPEToken(rawText: string, entityType: string = 'UN
315
239
  }
316
240
 
317
241
  if (type === "ES_ID" || type === "ES_DNI") {
318
- const digits = `000${await _hmacDigits(text, 5)}`;
319
- return digits + _computeEsIdCheck(parseInt(digits, 10));
242
+ let digits = text.toUpperCase().replace(/[A-Z]/g, "");
243
+ if (digits) {
244
+ digits = digits.padStart(8, "0");
245
+ const engine = new FF1(await _getAesKey(), Buffer.from("ES_ID"), 10);
246
+ const enc = engine.encrypt(digits.slice(-5));
247
+ const tokenDigits = `000${enc}`;
248
+ return tokenDigits + _computeEsIdCheck(parseInt(tokenDigits, 10));
249
+ }
320
250
  }
321
251
 
322
252
  if (type === "PERSON" || type === "PERSON_NAME") {
323
253
  if (config.MASK_BIJECTIVE_MODE) {
324
- const canonical = text.toLowerCase().trim();
325
- const hash = crypto.createHash('sha256').update(canonical, 'utf-8').digest();
326
- const inputInt = hash.readBigUInt64BE(0);
327
- const masterKey = await _getMasterKey();
328
- const engine = new FF1(masterKey.slice(0, 16), await _getBijectiveTweak());
329
- const cipher = engine.encrypt(inputInt);
330
- return _renderBijectivePerson(cipher);
254
+ const cipherBits = await _encryptBijectiveFF1(text);
255
+ return _renderBijectivePerson(cipherBits);
331
256
  }
332
257
  return `[TKN-PERSON-${await _hmacHex(text)}]`;
333
258
  }
334
259
  if (type === "LOCATION" || type === "PHYS_ADDRESS") {
335
260
  if (config.MASK_BIJECTIVE_MODE) {
336
- const canonical = text.toLowerCase().trim();
337
- const hash = crypto.createHash('sha256').update(canonical, 'utf-8').digest();
338
- const inputInt = hash.readBigUInt64BE(0);
339
- const masterKey = await _getMasterKey();
340
- const engine = new FF1(masterKey.slice(0, 16), await _getBijectiveTweak());
341
- const cipher = engine.encrypt(inputInt);
342
- return _renderBijectiveLocation(cipher);
261
+ const cipherBits = await _encryptBijectiveFF1(text);
262
+ return _renderBijectiveLocation(cipherBits);
343
263
  }
344
264
  return `[TKN-LOC-${await _hmacHex(text)}]`;
345
265
  }
@@ -350,4 +270,6 @@ export async function generateFPEToken(rawText: string, entityType: string = 'UN
350
270
  return `[TKN-${await _hmacHex(text)}]`;
351
271
  }
352
272
 
273
+ export const generateFPEToken = generateDPToken;
274
+
353
275
  export * from './fpe_utils';
@@ -12,14 +12,14 @@
12
12
  export const TOKEN_PATTERN = new RegExp(
13
13
  "tkn-[a-f0-9]{8,64}@[A-Za-z0-9.\\-]+\\.[A-Za-z]{2,}" + // Email
14
14
  "|\\+[1-9]\\d{0,3}-555-\\d{7}" + // Phone
15
- "|000-00-\\d{4}" + // SSN
16
- "|4000-0000-0000-\\d{4}" + // CC
17
- "|000000\\d{3}" + // Routing
18
- "|000\\d{5}[A-Z]" + // Spanish DNI token
15
+ "|\\d{3}-\\d{2}-\\d{4}" + // SSN
16
+ "|\\d{4}-\\d{4}-\\d{4}-\\d{4}" + // CC
17
+ "|\\b\\d{9}\\b" + // Routing
18
+ "|\\b000\\d{5}[A-Z]\\b" + // Spanish DNI token
19
19
  "|[A-Z]{2}00[A-F0-9]{4,16}" + // IBAN token
20
20
  "|<(?:PER|LOC|ORG):[^>]+>" + // NLP Semantic tokens V4
21
21
  "|\\b[A-Z][a-zA-Z, ]+-[0-9]{3,4}\\b" + // Bijective Name/Loc
22
- "|\\\\[TKN-[a-f0-9]{8,64}\\\\]", // Opaque
22
+ "|\\[TKN-[^\\]]+\\]", // Opaque
23
23
  "g"
24
24
  );
25
25
 
@@ -43,18 +43,18 @@ export function looksLikeToken(value: string | any): boolean {
43
43
  return true;
44
44
  }
45
45
 
46
- // SSN tokens: 000-00-XXXX
47
- if (v.startsWith("000-00-") && v.length === 11) {
46
+ // SSN tokens: XXX-XX-XXXX
47
+ if (/^\d{3}-\d{2}-\d{4}$/.test(v)) {
48
48
  return true;
49
49
  }
50
50
 
51
- // Credit card tokens: 4000-0000-0000-XXXX
52
- if (v.startsWith("4000-0000-0000-") && v.length === 19) {
51
+ // Credit card tokens: XXXX-XXXX-XXXX-XXXX
52
+ if (/^\d{4}-\d{4}-\d{4}-\d{4}$/.test(v)) {
53
53
  return true;
54
54
  }
55
55
 
56
- // Routing tokens: 000000XXX
57
- if (v.startsWith("000000") && v.length === 9) {
56
+ // Routing tokens: XXXXXXXXX
57
+ if (v.length === 9 && /^\d+$/.test(v)) {
58
58
  return true;
59
59
  }
60
60
 
@@ -92,4 +92,50 @@ export function looksLikeToken(value: string | any): boolean {
92
92
  return false;
93
93
  }
94
94
 
95
+ /**
96
+ * Strict token check safe for use inside audit log redaction (_deepMask).
97
+ *
98
+ * Unlike looksLikeToken(), this function excludes patterns that are AMBIGUOUS
99
+ * with real sensitive data (raw Credit Card and SSN formats). It only returns
100
+ * true when the value carries an unambiguous FPE watermark that real PII
101
+ * cannot share.
102
+ *
103
+ * This prevents real PANs / SSNs from bypassing redaction and being written
104
+ * to SOC 2 / HIPAA audit logs in plaintext — a PCI DSS Level 1 failure.
105
+ */
106
+ export function isUnambiguouslySafeToken(value: string | any): boolean {
107
+ if (typeof value !== 'string') return false;
108
+ const v = value.trim();
95
109
 
110
+ // Email FPE token: tkn-<hex>@domain.tld
111
+ if (v.startsWith("tkn-") && v.includes("@")) {
112
+ const parts = v.split("@");
113
+ if (parts.length === 2 && parts[0].length >= 12 && parts[1].includes(".")) {
114
+ return true;
115
+ }
116
+ }
117
+
118
+ // Phone FPE token: +CC-555-XXXXXXX (555 exchange is synthetic watermark)
119
+ if (/^\+[1-9]\d{0,3}-555-\d{7}$/.test(v)) return true;
120
+
121
+ // Spanish DNI FPE token: always starts 000 (real DNIs never start 000)
122
+ if (/^000\d{5}[A-Z]$/.test(v)) return true;
123
+
124
+ // IBAN FPE token: XX00... (real IBANs never have 00 as check digits)
125
+ if (/^[A-Z]{2}00[A-F0-9]{4,16}$/.test(v)) return true;
126
+
127
+ // Semantic NLP tokens: <PER:...>, <LOC:...>, <ORG:...>
128
+ if (/^<(PER|LOC|ORG):[^>]+>$/.test(v)) return true;
129
+
130
+ // Opaque fallback tokens: [TKN-...]
131
+ if (v.startsWith("[TKN-") && v.endsWith("]")) return true;
132
+
133
+ // Bijective Name/Location tokens: always end -DDDD (synthetic pattern)
134
+ if (/^[A-Z][a-zA-Z, ]+-[0-9]{3,4}$/.test(v)) return true;
135
+
136
+ // NOTE: Raw SSN (\d{3}-\d{2}-\d{4}), CC (\d{4}-\d{4}-\d{4}-\d{4}),
137
+ // and routing (\d{9}) patterns are intentionally EXCLUDED because real
138
+ // PII shares these exact formats. Use looksLikeToken() only for
139
+ // detokenization (where context guarantees a token is present).
140
+ return false;
141
+ }
@@ -33,6 +33,17 @@ export abstract class BaseKeyProvider {
33
33
 
34
34
  /** Return the HMAC master key, or null to auto-generate. */
35
35
  abstract getMasterKey(): Promise<string | null> | string | null;
36
+
37
+ /**
38
+ * Return a JSON keyring string (e.g. from KMS / Secrets Manager), or null
39
+ * to fall back to the MASK_KEYRING environment variable.
40
+ *
41
+ * Override in KMS-backed providers to source the full keyring from a
42
+ * secure external store, removing the need for MASK_KEYRING in env vars.
43
+ */
44
+ getKeyring(): Promise<string | null> | string | null {
45
+ return null;
46
+ }
36
47
  }
37
48
 
38
49
  /**
@@ -58,6 +69,11 @@ export class EnvKeyProvider extends BaseKeyProvider {
58
69
  let key = config.MASK_MASTER_KEY;
59
70
  return key || null;
60
71
  }
72
+
73
+ /** Return MASK_KEYRING from environment (default behaviour). */
74
+ async getKeyring(): Promise<string | null> {
75
+ return config.MASK_KEYRING || null;
76
+ }
61
77
  }
62
78
 
63
79
  /**
@@ -141,6 +157,30 @@ export class AwsKmsKeyProvider extends BaseKeyProvider {
141
157
  async getMasterKey(): Promise<string | null> {
142
158
  return await this.getEncryptionKey();
143
159
  }
160
+
161
+ /**
162
+ * Retrieve the JSON keyring from AWS Secrets Manager.
163
+ *
164
+ * If MASK_KEYRING_SECRET_ID is set, this provider fetches the full JSON
165
+ * keyring document from Secrets Manager, enabling zero-downtime key
166
+ * rotation without writing key material to environment variables.
167
+ */
168
+ async getKeyring(): Promise<string | null> {
169
+ const secretId = process.env.MASK_KEYRING_SECRET_ID;
170
+ if (!secretId) return null;
171
+ try {
172
+ const { GetSecretValueCommand } = require('@aws-sdk/client-secrets-manager');
173
+ const client = await this._getSecretsClient();
174
+ const response = await client.send(new GetSecretValueCommand({ SecretId: secretId }));
175
+ const keyringStr = response.SecretString;
176
+ if (!keyringStr) throw new Error('MASK_KEYRING_SECRET_ID returned an empty secret.');
177
+ console.info(`Keyring loaded from AWS Secrets Manager (secret: ${secretId})`);
178
+ return keyringStr;
179
+ } catch (e) {
180
+ console.error('Failed to retrieve keyring from AWS Secrets Manager:', e);
181
+ throw e;
182
+ }
183
+ }
144
184
  }
145
185
 
146
186
  /**
@@ -176,6 +216,25 @@ export class AzureKeyVaultProvider extends BaseKeyProvider {
176
216
  async getMasterKey(): Promise<string | null> {
177
217
  return await this.getEncryptionKey();
178
218
  }
219
+
220
+ /**
221
+ * Retrieve the JSON keyring from Azure Key Vault.
222
+ * Looks for a secret named `<secretName>-keyring`.
223
+ */
224
+ async getKeyring(): Promise<string | null> {
225
+ const keyringSecretName = this.secretName + '-keyring';
226
+ try {
227
+ const client = await this._getClient();
228
+ const secret = await client.getSecret(keyringSecretName);
229
+ if (secret.value) {
230
+ console.info(`Keyring loaded from Azure Key Vault (secret: ${keyringSecretName})`);
231
+ return secret.value;
232
+ }
233
+ } catch {
234
+ // Secret may not exist; fall back to env
235
+ }
236
+ return null;
237
+ }
179
238
  }
180
239
 
181
240
  /**
@@ -207,6 +266,27 @@ export class HashiCorpVaultProvider extends BaseKeyProvider {
207
266
  async getMasterKey(): Promise<string | null> {
208
267
  return await this.getEncryptionKey();
209
268
  }
269
+
270
+ /**
271
+ * Retrieve the JSON keyring from HashiCorp Vault.
272
+ * Looks for a `keyring` key in the secret at `secretPath`.
273
+ */
274
+ async getKeyring(): Promise<string | null> {
275
+ try {
276
+ const axios = require('axios');
277
+ const url = `${this.vaultAddr}/v1/${this.secretPath}`;
278
+ const response = await axios.get(url, { headers: { 'X-Vault-Token': this._token } });
279
+ const data = response.data?.data?.data || response.data?.data;
280
+ const keyringStr = data?.keyring;
281
+ if (keyringStr) {
282
+ console.info(`Keyring loaded from HashiCorp Vault (path: ${this.secretPath})`);
283
+ return keyringStr;
284
+ }
285
+ } catch (e) {
286
+ console.error('HashiCorp Vault keyring retrieval failed:', e);
287
+ }
288
+ return null;
289
+ }
210
290
  }
211
291
 
212
292
  // Singleton accessor