n8n-nodes-redactor 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +42 -0
- package/README.dev.md +153 -0
- package/README.md +443 -0
- package/README.npm.md +443 -0
- package/dist/nodes/PiiRedactor/PiiRedactor.node.d.ts +5 -0
- package/dist/nodes/PiiRedactor/PiiRedactor.node.js +1093 -0
- package/dist/nodes/PiiRedactor/__tests__/encryption.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/encryption.test.js +200 -0
- package/dist/nodes/PiiRedactor/__tests__/engine.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/engine.test.js +524 -0
- package/dist/nodes/PiiRedactor/__tests__/operations.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/operations.test.js +316 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns-global.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns-global.test.js +427 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns.test.js +481 -0
- package/dist/nodes/PiiRedactor/__tests__/phase1.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/phase1.test.js +343 -0
- package/dist/nodes/PiiRedactor/__tests__/phase3.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/phase3.test.js +275 -0
- package/dist/nodes/PiiRedactor/__tests__/phase4.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/phase4.test.js +184 -0
- package/dist/nodes/PiiRedactor/__tests__/presidio.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/presidio.test.js +170 -0
- package/dist/nodes/PiiRedactor/__tests__/security.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/security.test.js +178 -0
- package/dist/nodes/PiiRedactor/__tests__/semantic.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/semantic.test.js +319 -0
- package/dist/nodes/PiiRedactor/__tests__/vault.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/vault.test.js +247 -0
- package/dist/nodes/PiiRedactor/audit.d.ts +48 -0
- package/dist/nodes/PiiRedactor/audit.js +192 -0
- package/dist/nodes/PiiRedactor/classification.d.ts +33 -0
- package/dist/nodes/PiiRedactor/classification.js +118 -0
- package/dist/nodes/PiiRedactor/context.d.ts +57 -0
- package/dist/nodes/PiiRedactor/context.js +260 -0
- package/dist/nodes/PiiRedactor/encryption.d.ts +45 -0
- package/dist/nodes/PiiRedactor/encryption.js +158 -0
- package/dist/nodes/PiiRedactor/engine.d.ts +23 -0
- package/dist/nodes/PiiRedactor/engine.js +888 -0
- package/dist/nodes/PiiRedactor/injection.d.ts +46 -0
- package/dist/nodes/PiiRedactor/injection.js +425 -0
- package/dist/nodes/PiiRedactor/names.d.ts +25 -0
- package/dist/nodes/PiiRedactor/names.js +188 -0
- package/dist/nodes/PiiRedactor/patterns.d.ts +17 -0
- package/dist/nodes/PiiRedactor/patterns.js +1742 -0
- package/dist/nodes/PiiRedactor/presidio.d.ts +77 -0
- package/dist/nodes/PiiRedactor/presidio.js +264 -0
- package/dist/nodes/PiiRedactor/profiles.d.ts +47 -0
- package/dist/nodes/PiiRedactor/profiles.js +139 -0
- package/dist/nodes/PiiRedactor/pseudonymize.d.ts +20 -0
- package/dist/nodes/PiiRedactor/pseudonymize.js +203 -0
- package/dist/nodes/PiiRedactor/redact.png +0 -0
- package/dist/nodes/PiiRedactor/redact.svg +3 -0
- package/dist/nodes/PiiRedactor/ropa.d.ts +63 -0
- package/dist/nodes/PiiRedactor/ropa.js +70 -0
- package/dist/nodes/PiiRedactor/types.d.ts +82 -0
- package/dist/nodes/PiiRedactor/types.js +3 -0
- package/dist/nodes/PiiRedactor/vault.d.ts +61 -0
- package/dist/nodes/PiiRedactor/vault.js +352 -0
- package/package.json +87 -0
|
@@ -0,0 +1,1742 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.PII_PATTERNS = void 0;
|
|
4
|
+
exports.getPatternOptions = getPatternOptions;
|
|
5
|
+
exports.getPatternsByNames = getPatternsByNames;
|
|
6
|
+
/**
|
|
7
|
+
* Luhn algorithm — validates credit card numbers, IBANs (partial), etc.
|
|
8
|
+
*/
|
|
9
|
+
function luhnCheck(num) {
|
|
10
|
+
const digits = num.replace(/\D/g, '');
|
|
11
|
+
let sum = 0;
|
|
12
|
+
let alternate = false;
|
|
13
|
+
for (let i = digits.length - 1; i >= 0; i--) {
|
|
14
|
+
let n = parseInt(digits[i], 10);
|
|
15
|
+
if (alternate) {
|
|
16
|
+
n *= 2;
|
|
17
|
+
if (n > 9)
|
|
18
|
+
n -= 9;
|
|
19
|
+
}
|
|
20
|
+
sum += n;
|
|
21
|
+
alternate = !alternate;
|
|
22
|
+
}
|
|
23
|
+
return sum % 10 === 0;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* IBAN checksum validation (ISO 13616)
|
|
27
|
+
*/
|
|
28
|
+
function ibanCheck(iban) {
|
|
29
|
+
const cleaned = iban.replace(/\s/g, '').toUpperCase();
|
|
30
|
+
if (cleaned.length < 15 || cleaned.length > 34)
|
|
31
|
+
return false;
|
|
32
|
+
// Move first 4 chars to end, convert letters to numbers
|
|
33
|
+
const rearranged = cleaned.slice(4) + cleaned.slice(0, 4);
|
|
34
|
+
const numeric = rearranged.replace(/[A-Z]/g, (ch) => (ch.charCodeAt(0) - 55).toString());
|
|
35
|
+
// Mod 97 check
|
|
36
|
+
let remainder = numeric
|
|
37
|
+
.match(/.{1,7}/g)
|
|
38
|
+
.reduce((acc, chunk) => (parseInt(acc + chunk, 10) % 97).toString(), '');
|
|
39
|
+
return parseInt(remainder, 10) === 1;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* All built-in PII patterns — 30+ types organized by category.
|
|
43
|
+
*/
|
|
44
|
+
exports.PII_PATTERNS = [
|
|
45
|
+
// ═══════════════════════════════════════════
|
|
46
|
+
// CONTACT
|
|
47
|
+
// ═══════════════════════════════════════════
|
|
48
|
+
{
|
|
49
|
+
name: 'email',
|
|
50
|
+
label: 'EMAIL',
|
|
51
|
+
category: 'contact',
|
|
52
|
+
regex: /[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}/g,
|
|
53
|
+
description: 'Email addresses',
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: 'phone',
|
|
57
|
+
label: 'PHONE',
|
|
58
|
+
category: 'contact',
|
|
59
|
+
regex: /(?:\+\d{1,3}[\s\-.])?(?:\(\d{2,4}\)|\d{2,4})[\s\-.]\d{3,4}[\s\-.]\d{3,8}/g,
|
|
60
|
+
description: 'Phone numbers (international formats)',
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
name: 'phoneUK',
|
|
64
|
+
label: 'PHONE_UK',
|
|
65
|
+
category: 'contact',
|
|
66
|
+
regex: /\b(?:0|\+44)\s?\d{2,4}\s?\d{3,4}\s?\d{3,4}\b/g,
|
|
67
|
+
description: 'UK phone numbers',
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
name: 'phoneDE',
|
|
71
|
+
label: 'PHONE_DE',
|
|
72
|
+
category: 'contact',
|
|
73
|
+
regex: /(?:0|\+49)\s?\d{2,4}[\s\/]\d{3,8}(?:[\s\-]\d{1,5})?/g,
|
|
74
|
+
description: 'German phone numbers (089 1234 5678, +49 30 12345678)',
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
name: 'phoneAT',
|
|
78
|
+
label: 'PHONE_AT',
|
|
79
|
+
category: 'contact',
|
|
80
|
+
regex: /(?:0|\+43)\s?\d{1,4}[\s\/]\d{3,8}(?:[\s\-]\d{1,5})?/g,
|
|
81
|
+
description: 'Austrian phone numbers',
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
name: 'phoneCH',
|
|
85
|
+
label: 'PHONE_CH',
|
|
86
|
+
category: 'contact',
|
|
87
|
+
regex: /(?:0|\+41)\s?\d{2}\s?\d{3}\s?\d{2}\s?\d{2}/g,
|
|
88
|
+
description: 'Swiss phone numbers (079 123 45 67)',
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
name: 'phoneFR',
|
|
92
|
+
label: 'PHONE_FR',
|
|
93
|
+
category: 'contact',
|
|
94
|
+
regex: /\b(?:0|\+33)\s?\d[\s.]\d{2}[\s.]\d{2}[\s.]\d{2}[\s.]\d{2}\b/g,
|
|
95
|
+
description: 'French phone numbers (06 12 34 56 78)',
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
name: 'phoneNL',
|
|
99
|
+
label: 'PHONE_NL',
|
|
100
|
+
category: 'contact',
|
|
101
|
+
regex: /\b(?:0|\+31)\s?\d{1,3}[\s\-]\d{3,4}[\s\-]\d{2,4}\b/g,
|
|
102
|
+
description: 'Dutch phone numbers',
|
|
103
|
+
},
|
|
104
|
+
// ═══════════════════════════════════════════
|
|
105
|
+
// IDENTITY — Person names (heuristic)
|
|
106
|
+
// ═══════════════════════════════════════════
|
|
107
|
+
{
|
|
108
|
+
name: 'personName',
|
|
109
|
+
label: 'PERSON',
|
|
110
|
+
category: 'identity',
|
|
111
|
+
regex: /\b(?:Mr\.?|Mrs\.?|Ms\.?|Miss|Dr\.?|Prof\.?|Sr\.?|Jr\.?|Herr|Frau|Monsieur|Madame|Señor|Señora)\s+[A-Z][a-zA-ZÀ-ÿ]+(?:\s+[A-Z][a-zA-ZÀ-ÿ]+){0,3}\b/g,
|
|
112
|
+
description: 'Person names with title prefixes (Mr., Mrs., Dr., etc.)',
|
|
113
|
+
},
|
|
114
|
+
// ═══════════════════════════════════════════
|
|
115
|
+
// IDENTITY — Government IDs
|
|
116
|
+
// ═══════════════════════════════════════════
|
|
117
|
+
{
|
|
118
|
+
name: 'ssn',
|
|
119
|
+
label: 'SSN',
|
|
120
|
+
category: 'identity',
|
|
121
|
+
regex: /\b\d{3}-\d{2}-\d{4}\b/g,
|
|
122
|
+
description: 'US Social Security Numbers',
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
name: 'passportUS',
|
|
126
|
+
label: 'PASSPORT_US',
|
|
127
|
+
category: 'identity',
|
|
128
|
+
regex: /\b[A-Z]\d{8}\b/g,
|
|
129
|
+
description: 'US passport numbers',
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
name: 'passportEU',
|
|
133
|
+
label: 'PASSPORT_EU',
|
|
134
|
+
category: 'identity',
|
|
135
|
+
regex: /\b[A-Z]{2}\d{7}\b/g,
|
|
136
|
+
description: 'EU passport numbers (2 letters + 7 digits)',
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
name: 'driverLicenseUS',
|
|
140
|
+
label: 'DRIVER_LICENSE_US',
|
|
141
|
+
category: 'identity',
|
|
142
|
+
regex: /\b[A-Z]\d{3}-\d{4}-\d{4}\b/g,
|
|
143
|
+
description: 'US driver license (common format)',
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
name: 'nationalIdDE',
|
|
147
|
+
label: 'NATIONAL_ID_DE',
|
|
148
|
+
category: 'identity',
|
|
149
|
+
regex: /\b[CFGHJKLMNPRTVWXYZ0-9]{9}\d\b/g,
|
|
150
|
+
description: 'German ID card number (10 chars)',
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
name: 'nhsNumber',
|
|
154
|
+
label: 'NHS_NUMBER',
|
|
155
|
+
category: 'identity',
|
|
156
|
+
regex: /\b\d{3}\s?\d{3}\s?\d{4}\b/g,
|
|
157
|
+
validate: (match) => {
|
|
158
|
+
// NHS number checksum: weighted mod 11
|
|
159
|
+
const digits = match.replace(/\s/g, '');
|
|
160
|
+
if (digits.length !== 10)
|
|
161
|
+
return false;
|
|
162
|
+
const weights = [10, 9, 8, 7, 6, 5, 4, 3, 2];
|
|
163
|
+
let sum = 0;
|
|
164
|
+
for (let i = 0; i < 9; i++) {
|
|
165
|
+
sum += parseInt(digits[i], 10) * weights[i];
|
|
166
|
+
}
|
|
167
|
+
const remainder = sum % 11;
|
|
168
|
+
const check = 11 - remainder;
|
|
169
|
+
if (check === 11)
|
|
170
|
+
return parseInt(digits[9], 10) === 0;
|
|
171
|
+
if (check === 10)
|
|
172
|
+
return false; // invalid
|
|
173
|
+
return parseInt(digits[9], 10) === check;
|
|
174
|
+
},
|
|
175
|
+
description: 'UK NHS numbers (with checksum validation)',
|
|
176
|
+
},
|
|
177
|
+
{
|
|
178
|
+
name: 'taxIdUS',
|
|
179
|
+
label: 'EIN',
|
|
180
|
+
category: 'identity',
|
|
181
|
+
regex: /\b\d{2}-\d{7}\b/g,
|
|
182
|
+
description: 'US Employer Identification Number (EIN)',
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
name: 'taxIdDE',
|
|
186
|
+
label: 'STEUER_ID_DE',
|
|
187
|
+
category: 'identity',
|
|
188
|
+
regex: /\b(?:Steuer[\-\s]?(?:ID|Id|Identifikationsnummer|Nr)[.:\s]*)\d{11}\b/gi,
|
|
189
|
+
description: 'German tax ID (Steuerliche Identifikationsnummer, 11 digits)',
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
name: 'taxIdDEPlain',
|
|
193
|
+
label: 'STEUER_ID_DE',
|
|
194
|
+
category: 'identity',
|
|
195
|
+
regex: /\b\d{2}\s?\d{3}\s?\d{3}\s?\d{3}\b/g,
|
|
196
|
+
validate: (match) => {
|
|
197
|
+
// German tax ID is exactly 11 digits, first digit != 0
|
|
198
|
+
const digits = match.replace(/\s/g, '');
|
|
199
|
+
return digits.length === 11 && digits[0] !== '0';
|
|
200
|
+
},
|
|
201
|
+
description: 'German tax ID (11 digits, standalone)',
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
name: 'sozialversicherungDE',
|
|
205
|
+
label: 'SOZIALVERS_DE',
|
|
206
|
+
category: 'identity',
|
|
207
|
+
regex: /\b\d{2}\s?\d{6}\s?[A-Z]\s?\d{3}\b/g,
|
|
208
|
+
description: 'German social insurance number (Sozialversicherungsnummer)',
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
name: 'sozialversicherungAT',
|
|
212
|
+
label: 'SOZIALVERS_AT',
|
|
213
|
+
category: 'identity',
|
|
214
|
+
regex: /\b\d{4}\s?\d{6}\b/g,
|
|
215
|
+
validate: (match) => {
|
|
216
|
+
// Austrian SV number: 4-digit serial + 6-digit DOB (DDMMYY)
|
|
217
|
+
const digits = match.replace(/\s/g, '');
|
|
218
|
+
if (digits.length !== 10)
|
|
219
|
+
return false;
|
|
220
|
+
const day = parseInt(digits.slice(4, 6), 10);
|
|
221
|
+
const month = parseInt(digits.slice(6, 8), 10);
|
|
222
|
+
return day >= 1 && day <= 31 && month >= 1 && month <= 12;
|
|
223
|
+
},
|
|
224
|
+
description: 'Austrian social insurance number (Sozialversicherungsnummer)',
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
name: 'ahvCH',
|
|
228
|
+
label: 'AHV_CH',
|
|
229
|
+
category: 'identity',
|
|
230
|
+
regex: /\b756\.\d{4}\.\d{4}\.\d{2}\b/g,
|
|
231
|
+
description: 'Swiss social insurance number (AHV/AVS: 756.XXXX.XXXX.XX)',
|
|
232
|
+
},
|
|
233
|
+
{
|
|
234
|
+
name: 'nationalIdFR',
|
|
235
|
+
label: 'NIR_FR',
|
|
236
|
+
category: 'identity',
|
|
237
|
+
regex: /\b[12]\s?\d{2}\s?\d{2}\s?\d{2}\s?\d{3}\s?\d{3}\s?\d{2}\b/g,
|
|
238
|
+
description: 'French national ID / social security (NIR: 13 digits + 2 check)',
|
|
239
|
+
},
|
|
240
|
+
{
|
|
241
|
+
name: 'codiceFiscaleIT',
|
|
242
|
+
label: 'CODICE_FISCALE_IT',
|
|
243
|
+
category: 'identity',
|
|
244
|
+
regex: /\b[A-Z]{6}\d{2}[A-EHLMPRST]\d{2}[A-Z]\d{3}[A-Z]\b/g,
|
|
245
|
+
description: 'Italian fiscal code (Codice Fiscale)',
|
|
246
|
+
},
|
|
247
|
+
{
|
|
248
|
+
name: 'dniES',
|
|
249
|
+
label: 'DNI_ES',
|
|
250
|
+
category: 'identity',
|
|
251
|
+
regex: /\b\d{8}[A-Z]\b/g,
|
|
252
|
+
description: 'Spanish national ID (DNI: 8 digits + letter)',
|
|
253
|
+
},
|
|
254
|
+
{
|
|
255
|
+
name: 'nieES',
|
|
256
|
+
label: 'NIE_ES',
|
|
257
|
+
category: 'identity',
|
|
258
|
+
regex: /\b[XYZ]\d{7}[A-Z]\b/g,
|
|
259
|
+
description: 'Spanish foreigner ID (NIE: X/Y/Z + 7 digits + letter)',
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
name: 'peselPL',
|
|
263
|
+
label: 'PESEL_PL',
|
|
264
|
+
category: 'identity',
|
|
265
|
+
regex: /\b\d{11}\b/g,
|
|
266
|
+
validate: (match) => {
|
|
267
|
+
// PESEL checksum: weighted mod 10
|
|
268
|
+
const w = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
|
|
269
|
+
let sum = 0;
|
|
270
|
+
for (let i = 0; i < 10; i++) {
|
|
271
|
+
sum += parseInt(match[i], 10) * w[i];
|
|
272
|
+
}
|
|
273
|
+
const check = (10 - (sum % 10)) % 10;
|
|
274
|
+
return parseInt(match[10], 10) === check;
|
|
275
|
+
},
|
|
276
|
+
description: 'Polish national ID (PESEL: 11 digits, checksum validated)',
|
|
277
|
+
},
|
|
278
|
+
{
|
|
279
|
+
name: 'bsnNL',
|
|
280
|
+
label: 'BSN_NL',
|
|
281
|
+
category: 'identity',
|
|
282
|
+
regex: /\b\d{9}\b/g,
|
|
283
|
+
validate: (match) => {
|
|
284
|
+
// BSN 11-check: 9*d1 + 8*d2 + ... + 2*d8 - 1*d9 must be divisible by 11
|
|
285
|
+
const w = [9, 8, 7, 6, 5, 4, 3, 2, -1];
|
|
286
|
+
let sum = 0;
|
|
287
|
+
for (let i = 0; i < 9; i++) {
|
|
288
|
+
sum += parseInt(match[i], 10) * w[i];
|
|
289
|
+
}
|
|
290
|
+
return sum % 11 === 0 && sum !== 0;
|
|
291
|
+
},
|
|
292
|
+
description: 'Dutch citizen service number (BSN: 9 digits, 11-check validated)',
|
|
293
|
+
},
|
|
294
|
+
{
|
|
295
|
+
name: 'handelsregisterDE',
|
|
296
|
+
label: 'HRB_DE',
|
|
297
|
+
category: 'identity',
|
|
298
|
+
regex: /\b(?:HRA|HRB|GnR|PR|VR)\s?\d{3,6}\s?[A-Z]?\b/g,
|
|
299
|
+
description: 'German commercial register number (Handelsregisternummer)',
|
|
300
|
+
},
|
|
301
|
+
// ═══════════════════════════════════════════
|
|
302
|
+
// FINANCIAL
|
|
303
|
+
// ═══════════════════════════════════════════
|
|
304
|
+
{
|
|
305
|
+
name: 'creditCard',
|
|
306
|
+
label: 'CREDIT_CARD',
|
|
307
|
+
category: 'financial',
|
|
308
|
+
regex: /\b(?:\d{4}[\s\-]?){3}\d{4}\b/g,
|
|
309
|
+
validate: (match) => luhnCheck(match),
|
|
310
|
+
description: 'Credit/debit card numbers (Luhn validated)',
|
|
311
|
+
},
|
|
312
|
+
{
|
|
313
|
+
name: 'iban',
|
|
314
|
+
label: 'IBAN',
|
|
315
|
+
category: 'financial',
|
|
316
|
+
regex: /\b[A-Z]{2}\d{2}[\s]?[\dA-Z]{4}[\s]?(?:[\dA-Z]{4}[\s]?){2,7}[\dA-Z]{1,4}\b/g,
|
|
317
|
+
validate: (match) => ibanCheck(match),
|
|
318
|
+
description: 'International Bank Account Numbers (IBAN checksum validated)',
|
|
319
|
+
},
|
|
320
|
+
{
|
|
321
|
+
name: 'bic',
|
|
322
|
+
label: 'BIC_SWIFT',
|
|
323
|
+
category: 'financial',
|
|
324
|
+
regex: /\b[A-Z]{4}(?:AD|AE|AF|AG|AI|AL|AM|AO|AQ|AR|AS|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BJ|BL|BM|BN|BO|BQ|BR|BS|BT|BV|BW|BY|BZ|CA|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|CR|CU|CV|CW|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EE|EG|EH|ER|ES|ET|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|IO|IQ|IR|IS|IT|JE|JM|JO|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MF|MG|MH|MK|ML|MM|MN|MO|MP|MQ|MR|MS|MT|MU|MV|MW|MX|MY|MZ|NA|NC|NE|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|SS|ST|SV|SX|SY|SZ|TC|TD|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TR|TT|TV|TW|TZ|UA|UG|UM|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XK|YE|YT|ZA|ZM|ZW)[A-Z0-9]{2}(?:[A-Z0-9]{3})?\b/g,
|
|
325
|
+
description: 'BIC/SWIFT codes (with country code validation)',
|
|
326
|
+
},
|
|
327
|
+
{
|
|
328
|
+
name: 'vatEU',
|
|
329
|
+
label: 'VAT_EU',
|
|
330
|
+
category: 'financial',
|
|
331
|
+
regex: /\b(?:ATU\d{8}|BE[01]\d{9}|BG\d{9,10}|CY\d{8}[A-Z]|CZ\d{8,10}|DE\d{9}|DK\d{8}|EE\d{9}|EL\d{9}|ES[A-Z0-9]\d{7}[A-Z0-9]|FI\d{8}|FR[A-Z0-9]{2}\d{9}|HR\d{11}|HU\d{8}|IE\d{7}[A-Z]{1,2}|IT\d{11}|LT\d{9,12}|LU\d{8}|LV\d{11}|MT\d{8}|NL\d{9}B\d{2}|PL\d{10}|PT\d{9}|RO\d{2,10}|SE\d{12}|SI\d{8}|SK\d{10})\b/g,
|
|
332
|
+
description: 'EU VAT identification numbers (all member states)',
|
|
333
|
+
},
|
|
334
|
+
{
|
|
335
|
+
name: 'bankAccountUK',
|
|
336
|
+
label: 'BANK_ACCOUNT_UK',
|
|
337
|
+
category: 'financial',
|
|
338
|
+
regex: /\b\d{6}\s?\d{8}\b/g, // sort code + account
|
|
339
|
+
description: 'UK bank account (sort code + account number)',
|
|
340
|
+
},
|
|
341
|
+
// ═══════════════════════════════════════════
|
|
342
|
+
// NETWORK
|
|
343
|
+
// ═══════════════════════════════════════════
|
|
344
|
+
{
|
|
345
|
+
name: 'ipv4',
|
|
346
|
+
label: 'IP_ADDRESS',
|
|
347
|
+
category: 'network',
|
|
348
|
+
regex: /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g,
|
|
349
|
+
description: 'IPv4 addresses',
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
name: 'ipv6',
|
|
353
|
+
label: 'IPV6_ADDRESS',
|
|
354
|
+
category: 'network',
|
|
355
|
+
regex: /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b|\b(?:[0-9a-fA-F]{1,4}:){1,7}:\b|\b(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}\b/g,
|
|
356
|
+
description: 'IPv6 addresses',
|
|
357
|
+
},
|
|
358
|
+
{
|
|
359
|
+
name: 'macAddress',
|
|
360
|
+
label: 'MAC_ADDRESS',
|
|
361
|
+
category: 'network',
|
|
362
|
+
regex: /\b(?:[0-9a-fA-F]{2}[:\-]){5}[0-9a-fA-F]{2}\b/g,
|
|
363
|
+
description: 'MAC addresses',
|
|
364
|
+
},
|
|
365
|
+
{
|
|
366
|
+
name: 'ipv4Port',
|
|
367
|
+
label: 'IP_PORT',
|
|
368
|
+
category: 'network',
|
|
369
|
+
regex: /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?):\d{1,5}\b/g,
|
|
370
|
+
description: 'IPv4 addresses with port (192.168.1.1:8080)',
|
|
371
|
+
},
|
|
372
|
+
{
|
|
373
|
+
name: 'url',
|
|
374
|
+
label: 'URL',
|
|
375
|
+
category: 'network',
|
|
376
|
+
regex: /https?:\/\/[^\s<>\"')\]]+/g,
|
|
377
|
+
description: 'URLs (http/https)',
|
|
378
|
+
},
|
|
379
|
+
// ═══════════════════════════════════════════
|
|
380
|
+
// LOCATION
|
|
381
|
+
// ═══════════════════════════════════════════
|
|
382
|
+
{
|
|
383
|
+
name: 'postalCodeUS',
|
|
384
|
+
label: 'ZIP_CODE',
|
|
385
|
+
category: 'location',
|
|
386
|
+
regex: /\b\d{5}(?:-\d{4})?\b/g,
|
|
387
|
+
description: 'US ZIP codes',
|
|
388
|
+
},
|
|
389
|
+
{
|
|
390
|
+
name: 'postalCodeUK',
|
|
391
|
+
label: 'POSTCODE_UK',
|
|
392
|
+
category: 'location',
|
|
393
|
+
regex: /\b[A-Z]{1,2}\d[A-Z\d]?\s?\d[A-Z]{2}\b/g,
|
|
394
|
+
description: 'UK postcodes',
|
|
395
|
+
},
|
|
396
|
+
{
|
|
397
|
+
name: 'postalCodeDE',
|
|
398
|
+
label: 'POSTCODE_DE',
|
|
399
|
+
category: 'location',
|
|
400
|
+
regex: /\b\d{5}\b/g,
|
|
401
|
+
description: 'German postal codes (5 digits)',
|
|
402
|
+
},
|
|
403
|
+
{
|
|
404
|
+
name: 'postalCodeAT',
|
|
405
|
+
label: 'POSTCODE_AT',
|
|
406
|
+
category: 'location',
|
|
407
|
+
regex: /\b[1-9]\d{3}\b/g,
|
|
408
|
+
description: 'Austrian postal codes (4 digits)',
|
|
409
|
+
},
|
|
410
|
+
{
|
|
411
|
+
name: 'postalCodeCH',
|
|
412
|
+
label: 'POSTCODE_CH',
|
|
413
|
+
category: 'location',
|
|
414
|
+
regex: /\b[1-9]\d{3}\b/g,
|
|
415
|
+
description: 'Swiss postal codes (4 digits)',
|
|
416
|
+
},
|
|
417
|
+
{
|
|
418
|
+
name: 'postalCodeFR',
|
|
419
|
+
label: 'POSTCODE_FR',
|
|
420
|
+
category: 'location',
|
|
421
|
+
regex: /\b(?:0[1-9]|[1-8]\d|9[0-5]|97[1-6])\d{3}\b/g,
|
|
422
|
+
description: 'French postal codes (5 digits, department prefix validated)',
|
|
423
|
+
},
|
|
424
|
+
{
|
|
425
|
+
name: 'postalCodeNL',
|
|
426
|
+
label: 'POSTCODE_NL',
|
|
427
|
+
category: 'location',
|
|
428
|
+
regex: /\b[1-9]\d{3}\s?[A-Z]{2}\b/g,
|
|
429
|
+
description: 'Dutch postal codes (1234 AB)',
|
|
430
|
+
},
|
|
431
|
+
{
|
|
432
|
+
name: 'postalCodeIT',
|
|
433
|
+
label: 'POSTCODE_IT',
|
|
434
|
+
category: 'location',
|
|
435
|
+
regex: /\b\d{5}\b/g,
|
|
436
|
+
description: 'Italian postal codes (5 digits)',
|
|
437
|
+
},
|
|
438
|
+
{
|
|
439
|
+
name: 'postalCodeES',
|
|
440
|
+
label: 'POSTCODE_ES',
|
|
441
|
+
category: 'location',
|
|
442
|
+
regex: /\b(?:0[1-9]|[1-4]\d|5[0-2])\d{3}\b/g,
|
|
443
|
+
description: 'Spanish postal codes (5 digits, province prefix validated)',
|
|
444
|
+
},
|
|
445
|
+
{
|
|
446
|
+
name: 'postalCodePL',
|
|
447
|
+
label: 'POSTCODE_PL',
|
|
448
|
+
category: 'location',
|
|
449
|
+
regex: /\b\d{2}-\d{3}\b/g,
|
|
450
|
+
description: 'Polish postal codes (XX-XXX)',
|
|
451
|
+
},
|
|
452
|
+
{
|
|
453
|
+
name: 'gpsCoordinates',
|
|
454
|
+
label: 'GPS_COORDS',
|
|
455
|
+
category: 'location',
|
|
456
|
+
regex: /[-+]?\d{1,2}\.\d{4,},\s?[-+]?\d{1,3}\.\d{4,}/g,
|
|
457
|
+
description: 'GPS coordinates (lat,lng with 4+ decimal places)',
|
|
458
|
+
},
|
|
459
|
+
// ═══════════════════════════════════════════
|
|
460
|
+
// LOCATION — Street addresses (labeled, multiformat)
|
|
461
|
+
// ═══════════════════════════════════════════
|
|
462
|
+
{
|
|
463
|
+
name: 'addressDE',
|
|
464
|
+
label: 'ADDRESS_DE',
|
|
465
|
+
category: 'location',
|
|
466
|
+
regex: /(?:[A-ZÀ-ÿa-zà-ÿ]+(?:straße|strasse|str\.|weg|gasse|platz|allee|ring|damm|ufer|chaussee))\s+\d{1,5}[a-z]?(?:\s*[,\/]\s*\d{5}\s+[A-ZÀ-ÿ][a-zà-ÿ]+(?:\s+[a-zà-ÿ]+\.?\s+[A-ZÀ-ÿ][a-zà-ÿ]+)?)?/gi,
|
|
467
|
+
description: 'German street addresses (Musterstraße 12, 80331 München)',
|
|
468
|
+
},
|
|
469
|
+
{
|
|
470
|
+
name: 'addressLabeledDE',
|
|
471
|
+
label: 'ADDRESS',
|
|
472
|
+
category: 'location',
|
|
473
|
+
regex: /(?:Adresse|Anschrift|Wohnort|Straße|Wohnadresse)[:\s]+[^\n,]{5,60}(?:,\s*\d{4,5}\s+[A-ZÀ-ÿ][a-zà-ÿ]+)?/gi,
|
|
474
|
+
description: 'Labeled German addresses (Adresse: ..., Anschrift: ...)',
|
|
475
|
+
},
|
|
476
|
+
{
|
|
477
|
+
name: 'addressLabeledEN',
|
|
478
|
+
label: 'ADDRESS',
|
|
479
|
+
category: 'location',
|
|
480
|
+
regex: /(?:Address|Home Address|Street Address|Mailing Address|Shipping Address|Billing Address)[:\s]+[^\n]{5,80}/gi,
|
|
481
|
+
description: 'Labeled English addresses (Address: ...)',
|
|
482
|
+
},
|
|
483
|
+
{
|
|
484
|
+
name: 'addressLabeledFR',
|
|
485
|
+
label: 'ADDRESS',
|
|
486
|
+
category: 'location',
|
|
487
|
+
regex: /(?:Adresse|Domicile|Résidence)[:\s]+[^\n]{5,80}/gi,
|
|
488
|
+
description: 'Labeled French addresses (Adresse: ...)',
|
|
489
|
+
},
|
|
490
|
+
{
|
|
491
|
+
name: 'addressEUStreetNumber',
|
|
492
|
+
label: 'STREET_ADDRESS',
|
|
493
|
+
category: 'location',
|
|
494
|
+
regex: /\b[A-ZÀ-ÿ][a-zà-ÿ]+(?:straat|laan|weg|plein|avenue|boulevard|rue|via|calle|plaza)\s+\d{1,5}[a-z]?\b/gi,
|
|
495
|
+
description: 'EU street addresses (Dutch/French/Italian/Spanish street types)',
|
|
496
|
+
},
|
|
497
|
+
// ═══════════════════════════════════════════
|
|
498
|
+
// TEMPORAL
|
|
499
|
+
// ═══════════════════════════════════════════
|
|
500
|
+
{
|
|
501
|
+
name: 'dateSlash',
|
|
502
|
+
label: 'DATE',
|
|
503
|
+
category: 'temporal',
|
|
504
|
+
regex: /\b\d{1,2}\/\d{1,2}\/\d{2,4}\b/g,
|
|
505
|
+
description: 'Dates (DD/MM/YYYY or MM/DD/YYYY)',
|
|
506
|
+
},
|
|
507
|
+
{
|
|
508
|
+
name: 'dateDash',
|
|
509
|
+
label: 'DATE',
|
|
510
|
+
category: 'temporal',
|
|
511
|
+
regex: /\b\d{4}-\d{2}-\d{2}\b/g,
|
|
512
|
+
description: 'ISO dates (YYYY-MM-DD)',
|
|
513
|
+
},
|
|
514
|
+
{
|
|
515
|
+
name: 'dateDot',
|
|
516
|
+
label: 'DATE',
|
|
517
|
+
category: 'temporal',
|
|
518
|
+
regex: /\b\d{1,2}\.\d{1,2}\.\d{2,4}\b/g,
|
|
519
|
+
description: 'Dates (DD.MM.YYYY — European format)',
|
|
520
|
+
},
|
|
521
|
+
{
|
|
522
|
+
name: 'dateOfBirth',
|
|
523
|
+
label: 'DOB',
|
|
524
|
+
category: 'temporal',
|
|
525
|
+
regex: /\b(?:DOB|Date\s+of\s+Birth|Geboren\s+am|Geburtsdatum|Né\(e\)\s+le|Fecha\s+de\s+nacimiento|Data\s+di\s+nascita|Geboortedatum)[:\s]+\d{1,2}[\/.]\d{1,2}[\/.]\d{2,4}\b/gi,
|
|
526
|
+
description: 'Date of birth with label prefix (EN/DE/FR/ES/IT/NL)',
|
|
527
|
+
},
|
|
528
|
+
// ═══════════════════════════════════════════
|
|
529
|
+
// MEDICAL
|
|
530
|
+
// ═══════════════════════════════════════════
|
|
531
|
+
{
|
|
532
|
+
name: 'medicalRecordNumber',
|
|
533
|
+
label: 'MRN',
|
|
534
|
+
category: 'medical',
|
|
535
|
+
regex: /\bMRN[:\s#]*\d{6,10}\b/gi,
|
|
536
|
+
description: 'Medical Record Numbers (MRN prefix)',
|
|
537
|
+
},
|
|
538
|
+
// ═══════════════════════════════════════════
|
|
539
|
+
// CRYPTO
|
|
540
|
+
// ═══════════════════════════════════════════
|
|
541
|
+
{
|
|
542
|
+
name: 'bitcoinAddress',
|
|
543
|
+
label: 'BTC_ADDRESS',
|
|
544
|
+
category: 'crypto',
|
|
545
|
+
regex: /\b(?:bc1|[13])[a-zA-HJ-NP-Z0-9]{25,39}\b/g,
|
|
546
|
+
description: 'Bitcoin addresses (Legacy + Bech32)',
|
|
547
|
+
},
|
|
548
|
+
{
|
|
549
|
+
name: 'ethereumAddress',
|
|
550
|
+
label: 'ETH_ADDRESS',
|
|
551
|
+
category: 'crypto',
|
|
552
|
+
regex: /\b0x[a-fA-F0-9]{40}\b/g,
|
|
553
|
+
description: 'Ethereum addresses',
|
|
554
|
+
},
|
|
555
|
+
// ═══════════════════════════════════════════
|
|
556
|
+
// IDENTITY — Global National IDs (extended)
|
|
557
|
+
// ═══════════════════════════════════════════
|
|
558
|
+
{
|
|
559
|
+
name: 'itinUS',
|
|
560
|
+
label: 'ITIN_US',
|
|
561
|
+
category: 'identity',
|
|
562
|
+
regex: /\b9\d{2}-[7-9]\d-\d{4}\b/g,
|
|
563
|
+
description: 'US Individual Taxpayer Identification Number (ITIN)',
|
|
564
|
+
},
|
|
565
|
+
{
|
|
566
|
+
name: 'sinCA',
|
|
567
|
+
label: 'SIN_CA',
|
|
568
|
+
category: 'identity',
|
|
569
|
+
regex: /\b\d{3}[\s\-]\d{3}[\s\-]\d{3}\b/g,
|
|
570
|
+
validate: (match) => luhnCheck(match),
|
|
571
|
+
description: 'Canadian Social Insurance Number (SIN, Luhn validated)',
|
|
572
|
+
},
|
|
573
|
+
{
|
|
574
|
+
name: 'ninoUK',
|
|
575
|
+
label: 'NINO_UK',
|
|
576
|
+
category: 'identity',
|
|
577
|
+
regex: /\b[A-CEGHJ-PR-TW-Z]{2}\s?\d{2}\s?\d{2}\s?\d{2}\s?[A-D]\b/g,
|
|
578
|
+
description: 'UK National Insurance Number (NINO)',
|
|
579
|
+
},
|
|
580
|
+
{
|
|
581
|
+
name: 'passportDE',
|
|
582
|
+
label: 'PASSPORT_DE',
|
|
583
|
+
category: 'identity',
|
|
584
|
+
regex: /\b[CFGHJK][0-9A-Z]{8}\b/g,
|
|
585
|
+
description: 'German passport number (Reisepassnummer)',
|
|
586
|
+
},
|
|
587
|
+
{
|
|
588
|
+
name: 'rrnBE',
|
|
589
|
+
label: 'RRN_BE',
|
|
590
|
+
category: 'identity',
|
|
591
|
+
regex: /\b\d{2}[.\-]?\d{2}[.\-]?\d{2}[.\-]?\d{3}[.\-]?\d{2}\b/g,
|
|
592
|
+
description: 'Belgian national register number (Rijksregisternummer)',
|
|
593
|
+
},
|
|
594
|
+
{
|
|
595
|
+
name: 'personnummerSE',
|
|
596
|
+
label: 'PERSONNUMMER_SE',
|
|
597
|
+
category: 'identity',
|
|
598
|
+
regex: /\b\d{6}[\-+]?\d{4}\b|\b\d{8}[\-+]?\d{4}\b/g,
|
|
599
|
+
description: 'Swedish personal identity number (Personnummer)',
|
|
600
|
+
},
|
|
601
|
+
{
|
|
602
|
+
name: 'fodselsnummerNO',
|
|
603
|
+
label: 'FODSELSNR_NO',
|
|
604
|
+
category: 'identity',
|
|
605
|
+
regex: /\b\d{6}\s?\d{5}\b/g,
|
|
606
|
+
description: 'Norwegian national ID (Fødselsnummer, 11 digits)',
|
|
607
|
+
},
|
|
608
|
+
{
|
|
609
|
+
name: 'cprDK',
|
|
610
|
+
label: 'CPR_DK',
|
|
611
|
+
category: 'identity',
|
|
612
|
+
regex: /\b\d{6}-?\d{4}\b/g,
|
|
613
|
+
description: 'Danish personal ID (CPR-nummer)',
|
|
614
|
+
},
|
|
615
|
+
{
|
|
616
|
+
name: 'hetuFI',
|
|
617
|
+
label: 'HETU_FI',
|
|
618
|
+
category: 'identity',
|
|
619
|
+
regex: /\b\d{6}[\-+A-Y]\d{3}[0-9A-Y]\b/g,
|
|
620
|
+
description: 'Finnish personal ID (Henkilötunnus / HETU)',
|
|
621
|
+
},
|
|
622
|
+
{
|
|
623
|
+
name: 'nifPT',
|
|
624
|
+
label: 'NIF_PT',
|
|
625
|
+
category: 'identity',
|
|
626
|
+
regex: /\b[1-9]\d{8}\b/g,
|
|
627
|
+
validate: (match) => {
|
|
628
|
+
const w = [9, 8, 7, 6, 5, 4, 3, 2];
|
|
629
|
+
let sum = 0;
|
|
630
|
+
for (let i = 0; i < 8; i++) {
|
|
631
|
+
sum += parseInt(match[i], 10) * w[i];
|
|
632
|
+
}
|
|
633
|
+
let check = 11 - (sum % 11);
|
|
634
|
+
if (check >= 10)
|
|
635
|
+
check = 0;
|
|
636
|
+
return parseInt(match[8], 10) === check;
|
|
637
|
+
},
|
|
638
|
+
description: 'Portuguese tax number (NIF, checksum validated)',
|
|
639
|
+
},
|
|
640
|
+
{
|
|
641
|
+
name: 'ppsIE',
|
|
642
|
+
label: 'PPS_IE',
|
|
643
|
+
category: 'identity',
|
|
644
|
+
regex: /\b\d{7}[A-W][A-IW]?\b/g,
|
|
645
|
+
description: 'Irish Personal Public Service number (PPS)',
|
|
646
|
+
},
|
|
647
|
+
{
|
|
648
|
+
name: 'tfnAU',
|
|
649
|
+
label: 'TFN_AU',
|
|
650
|
+
category: 'identity',
|
|
651
|
+
regex: /\b\d{3}\s?\d{3}\s?\d{3}\b/g,
|
|
652
|
+
validate: (match) => {
|
|
653
|
+
const digits = match.replace(/\s/g, '');
|
|
654
|
+
if (digits.length !== 9)
|
|
655
|
+
return false;
|
|
656
|
+
const w = [1, 4, 3, 7, 5, 8, 6, 9, 10];
|
|
657
|
+
let sum = 0;
|
|
658
|
+
for (let i = 0; i < 9; i++) {
|
|
659
|
+
sum += parseInt(digits[i], 10) * w[i];
|
|
660
|
+
}
|
|
661
|
+
return sum % 11 === 0;
|
|
662
|
+
},
|
|
663
|
+
description: 'Australian Tax File Number (TFN, checksum validated)',
|
|
664
|
+
},
|
|
665
|
+
{
|
|
666
|
+
name: 'medicareAU',
|
|
667
|
+
label: 'MEDICARE_AU',
|
|
668
|
+
category: 'identity',
|
|
669
|
+
regex: /\b\d{4}\s?\d{5}\s?\d\b/g,
|
|
670
|
+
description: 'Australian Medicare number',
|
|
671
|
+
},
|
|
672
|
+
{
|
|
673
|
+
name: 'irdNZ',
|
|
674
|
+
label: 'IRD_NZ',
|
|
675
|
+
category: 'identity',
|
|
676
|
+
regex: /\b\d{2,3}-?\d{3}-?\d{3}\b/g,
|
|
677
|
+
description: 'New Zealand IRD number',
|
|
678
|
+
},
|
|
679
|
+
{
|
|
680
|
+
name: 'myNumberJP',
|
|
681
|
+
label: 'MY_NUMBER_JP',
|
|
682
|
+
category: 'identity',
|
|
683
|
+
regex: /\b\d{4}\s?\d{4}\s?\d{4}\b/g,
|
|
684
|
+
description: 'Japanese My Number (Individual Number, 12 digits)',
|
|
685
|
+
},
|
|
686
|
+
{
|
|
687
|
+
name: 'rrnKR',
|
|
688
|
+
label: 'RRN_KR',
|
|
689
|
+
category: 'identity',
|
|
690
|
+
regex: /\b\d{6}[\s\-]?\d{7}\b/g,
|
|
691
|
+
description: 'South Korean Resident Registration Number (RRN)',
|
|
692
|
+
},
|
|
693
|
+
{
|
|
694
|
+
name: 'nricSG',
|
|
695
|
+
label: 'NRIC_SG',
|
|
696
|
+
category: 'identity',
|
|
697
|
+
regex: /\b[STFGM]\d{7}[A-Z]\b/g,
|
|
698
|
+
description: 'Singapore National Registration Identity Card (NRIC/FIN)',
|
|
699
|
+
},
|
|
700
|
+
{
|
|
701
|
+
name: 'aadhaarIN',
|
|
702
|
+
label: 'AADHAAR_IN',
|
|
703
|
+
category: 'identity',
|
|
704
|
+
regex: /\b\d{4}\s?\d{4}\s?\d{4}\b/g,
|
|
705
|
+
description: 'Indian Aadhaar number (12 digits)',
|
|
706
|
+
},
|
|
707
|
+
{
|
|
708
|
+
name: 'panIN',
|
|
709
|
+
label: 'PAN_IN',
|
|
710
|
+
category: 'identity',
|
|
711
|
+
regex: /\b[A-Z]{3}[ABCFGHLJPTK][A-Z]\d{4}[A-Z]\b/g,
|
|
712
|
+
description: 'Indian PAN card (Permanent Account Number)',
|
|
713
|
+
},
|
|
714
|
+
{
|
|
715
|
+
name: 'cpfBR',
|
|
716
|
+
label: 'CPF_BR',
|
|
717
|
+
category: 'identity',
|
|
718
|
+
regex: /\b\d{3}\.?\d{3}\.?\d{3}-?\d{2}\b/g,
|
|
719
|
+
description: 'Brazilian individual taxpayer ID (CPF)',
|
|
720
|
+
},
|
|
721
|
+
{
|
|
722
|
+
name: 'nifES',
|
|
723
|
+
label: 'NIF_ES',
|
|
724
|
+
category: 'identity',
|
|
725
|
+
regex: /\b[A-HJ-NP-SUVW]\d{7}[0-9A-J]\b/g,
|
|
726
|
+
description: 'Spanish company tax ID (NIF)',
|
|
727
|
+
},
|
|
728
|
+
{
|
|
729
|
+
name: 'cartaIdIT',
|
|
730
|
+
label: 'CARTA_ID_IT',
|
|
731
|
+
category: 'identity',
|
|
732
|
+
regex: /\b[A-Z]{2}\d{5}[A-Z]{2}\b/g,
|
|
733
|
+
description: 'Italian electronic identity card (Carta d\'Identità)',
|
|
734
|
+
},
|
|
735
|
+
{
|
|
736
|
+
name: 'driverLicenseCtx',
|
|
737
|
+
label: 'DRIVER_LICENSE',
|
|
738
|
+
category: 'identity',
|
|
739
|
+
regex: /(?:driver'?s?\s*(?:license|licence|lic)|DL)[#:\s]*[A-Z]{0,2}\d{4,13}/gi,
|
|
740
|
+
description: 'Driver license numbers (contextual, with label prefix)',
|
|
741
|
+
},
|
|
742
|
+
// ═══════════════════════════════════════════
|
|
743
|
+
// FINANCIAL — Extended
|
|
744
|
+
// ═══════════════════════════════════════════
|
|
745
|
+
{
|
|
746
|
+
name: 'abaRouting',
|
|
747
|
+
label: 'ABA_ROUTING',
|
|
748
|
+
category: 'financial',
|
|
749
|
+
regex: /\b(?:0[1-9]|[12]\d|3[0-2]|6[1-9]|7[0-2]|80)\d{7}\b/g,
|
|
750
|
+
validate: (match) => {
|
|
751
|
+
const d = match.split('').map(Number);
|
|
752
|
+
return (3 * (d[0] + d[3] + d[6]) + 7 * (d[1] + d[4] + d[7]) + (d[2] + d[5] + d[8])) % 10 === 0;
|
|
753
|
+
},
|
|
754
|
+
description: 'US ABA routing number (checksum validated)',
|
|
755
|
+
},
|
|
756
|
+
{
|
|
757
|
+
name: 'sortCodeUK',
|
|
758
|
+
label: 'SORT_CODE_UK',
|
|
759
|
+
category: 'financial',
|
|
760
|
+
regex: /\b\d{2}-\d{2}-\d{2}\b/g,
|
|
761
|
+
description: 'UK bank sort code',
|
|
762
|
+
},
|
|
763
|
+
{
|
|
764
|
+
name: 'amex',
|
|
765
|
+
label: 'AMEX',
|
|
766
|
+
category: 'financial',
|
|
767
|
+
regex: /\b3[47]\d{2}[\s\-]?\d{6}[\s\-]?\d{5}\b/g,
|
|
768
|
+
validate: (match) => luhnCheck(match),
|
|
769
|
+
description: 'American Express card numbers (Luhn validated)',
|
|
770
|
+
},
|
|
771
|
+
// ═══════════════════════════════════════════
|
|
772
|
+
// ENTERPRISE — Infrastructure & Secrets
|
|
773
|
+
// ═══════════════════════════════════════════
|
|
774
|
+
{
|
|
775
|
+
name: 'internalHostname',
|
|
776
|
+
label: 'INTERNAL_HOST',
|
|
777
|
+
category: 'enterprise',
|
|
778
|
+
regex: /\b[a-z][a-z0-9\-]+\.(?:internal|local|corp|intranet|lan|private|home\.arpa)(?:\.[a-z]+)*\b/gi,
|
|
779
|
+
description: 'Internal/intranet hostnames (.internal, .local, .corp, .lan)',
|
|
780
|
+
},
|
|
781
|
+
{
|
|
782
|
+
name: 'uncPath',
|
|
783
|
+
label: 'UNC_PATH',
|
|
784
|
+
category: 'enterprise',
|
|
785
|
+
regex: /\\\\[a-zA-Z0-9._\-]+\\[a-zA-Z0-9$._\- \\]+/g,
|
|
786
|
+
description: 'Windows UNC network paths (\\\\server\\share)',
|
|
787
|
+
},
|
|
788
|
+
{
|
|
789
|
+
name: 'ldapDN',
|
|
790
|
+
label: 'LDAP_DN',
|
|
791
|
+
category: 'enterprise',
|
|
792
|
+
regex: /(?:CN|OU|DC|O|C|L|ST)=[^,]+(?:,\s*(?:CN|OU|DC|O|C|L|ST)=[^,]+){1,}/gi,
|
|
793
|
+
description: 'LDAP Distinguished Names (CN=...,OU=...,DC=...)',
|
|
794
|
+
},
|
|
795
|
+
{
|
|
796
|
+
name: 'adUsername',
|
|
797
|
+
label: 'AD_USERNAME',
|
|
798
|
+
category: 'enterprise',
|
|
799
|
+
regex: /\b[A-Z][A-Z0-9]{1,15}\\[a-zA-Z][a-zA-Z0-9._\-]{1,20}\b/g,
|
|
800
|
+
description: 'Active Directory usernames (DOMAIN\\user)',
|
|
801
|
+
},
|
|
802
|
+
{
|
|
803
|
+
name: 'privateIp',
|
|
804
|
+
label: 'PRIVATE_IP',
|
|
805
|
+
category: 'enterprise',
|
|
806
|
+
regex: /\b(?:10\.\d{1,3}\.\d{1,3}\.\d{1,3}|172\.(?:1[6-9]|2\d|3[01])\.\d{1,3}\.\d{1,3}|192\.168\.\d{1,3}\.\d{1,3})\b/g,
|
|
807
|
+
description: 'Private/internal IP addresses (RFC 1918: 10.x, 172.16-31.x, 192.168.x)',
|
|
808
|
+
},
|
|
809
|
+
{
|
|
810
|
+
name: 'dbConnectionString',
|
|
811
|
+
label: 'DB_CONN_STRING',
|
|
812
|
+
category: 'enterprise',
|
|
813
|
+
regex: /(?:jdbc|mongodb(?:\+srv)?|postgres(?:ql)?|mysql|redis(?:s)?|mssql|mariadb):\/\/[^\s"']+/gi,
|
|
814
|
+
description: 'Database connection strings (JDBC, MongoDB, PostgreSQL, MySQL, Redis)',
|
|
815
|
+
},
|
|
816
|
+
{
|
|
817
|
+
name: 'connStringKV',
|
|
818
|
+
label: 'CONN_STRING',
|
|
819
|
+
category: 'enterprise',
|
|
820
|
+
regex: /(?:Server|Data Source|Host)=[^;]+;[^;]*(?:;[^;]*){0,10}(?:Password|Pwd)=[^;]+/gi,
|
|
821
|
+
description: 'Connection strings with credentials (ADO.NET / key=value style)',
|
|
822
|
+
},
|
|
823
|
+
{
|
|
824
|
+
name: 'awsAccessKey',
|
|
825
|
+
label: 'AWS_KEY',
|
|
826
|
+
category: 'enterprise',
|
|
827
|
+
regex: /\b(?:AKIA|ASIA)[A-Z0-9]{16}\b/g,
|
|
828
|
+
description: 'AWS Access Key IDs',
|
|
829
|
+
},
|
|
830
|
+
{
|
|
831
|
+
name: 'gcpApiKey',
|
|
832
|
+
label: 'GCP_KEY',
|
|
833
|
+
category: 'enterprise',
|
|
834
|
+
regex: /\bAIza[0-9A-Za-z_\-]{35}\b/g,
|
|
835
|
+
description: 'Google Cloud API keys',
|
|
836
|
+
},
|
|
837
|
+
{
|
|
838
|
+
name: 'stripeKey',
|
|
839
|
+
label: 'STRIPE_KEY',
|
|
840
|
+
category: 'enterprise',
|
|
841
|
+
regex: /\b[sr]k_(?:live|test)_[A-Za-z0-9]{24,}\b/g,
|
|
842
|
+
description: 'Stripe API keys (secret and publishable)',
|
|
843
|
+
},
|
|
844
|
+
{
|
|
845
|
+
name: 'openaiKey',
|
|
846
|
+
label: 'OPENAI_KEY',
|
|
847
|
+
category: 'enterprise',
|
|
848
|
+
regex: /\bsk-[A-Za-z0-9]{20,}\b/g,
|
|
849
|
+
description: 'OpenAI API keys',
|
|
850
|
+
},
|
|
851
|
+
{
|
|
852
|
+
name: 'githubToken',
|
|
853
|
+
label: 'GITHUB_TOKEN',
|
|
854
|
+
category: 'enterprise',
|
|
855
|
+
regex: /\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36,}\b/g,
|
|
856
|
+
description: 'GitHub personal access tokens',
|
|
857
|
+
},
|
|
858
|
+
{
|
|
859
|
+
name: 'slackToken',
|
|
860
|
+
label: 'SLACK_TOKEN',
|
|
861
|
+
category: 'enterprise',
|
|
862
|
+
regex: /\bxox[boaprs]-[A-Za-z0-9\-]+/g,
|
|
863
|
+
description: 'Slack API tokens',
|
|
864
|
+
},
|
|
865
|
+
{
|
|
866
|
+
name: 'bearerToken',
|
|
867
|
+
label: 'BEARER_TOKEN',
|
|
868
|
+
category: 'enterprise',
|
|
869
|
+
regex: /Bearer\s+[A-Za-z0-9\-._~+\/]+=*/g,
|
|
870
|
+
description: 'Bearer authentication tokens',
|
|
871
|
+
},
|
|
872
|
+
{
|
|
873
|
+
name: 'jwtToken',
|
|
874
|
+
label: 'JWT',
|
|
875
|
+
category: 'enterprise',
|
|
876
|
+
regex: /\beyJ[A-Za-z0-9_\-]*\.eyJ[A-Za-z0-9_\-]*\.[A-Za-z0-9_\-]+/g,
|
|
877
|
+
description: 'JSON Web Tokens (JWT)',
|
|
878
|
+
},
|
|
879
|
+
{
|
|
880
|
+
name: 'pemPrivateKey',
|
|
881
|
+
label: 'PRIVATE_KEY',
|
|
882
|
+
category: 'enterprise',
|
|
883
|
+
regex: /-----BEGIN (?:RSA |DSA |EC |OPENSSH |ENCRYPTED )?PRIVATE KEY-----/g,
|
|
884
|
+
description: 'PEM private key headers',
|
|
885
|
+
},
|
|
886
|
+
{
|
|
887
|
+
name: 'sshPublicKey',
|
|
888
|
+
label: 'SSH_KEY',
|
|
889
|
+
category: 'enterprise',
|
|
890
|
+
regex: /ssh-(?:rsa|dss|ed25519|ecdsa)\s+[A-Za-z0-9+\/=]{40,}/g,
|
|
891
|
+
description: 'SSH public keys',
|
|
892
|
+
},
|
|
893
|
+
{
|
|
894
|
+
name: 'genericSecret',
|
|
895
|
+
label: 'SECRET',
|
|
896
|
+
category: 'enterprise',
|
|
897
|
+
regex: /(?:password|passwd|pwd|secret|token|credential|auth_key|api_key|apikey|api_secret|api_token|access_token|secret_key|private_key|encryption_key)\s*[=:]\s*['"]?[^\s'"]{8,}['"]?/gi,
|
|
898
|
+
description: 'Secrets in config (password=, api_key=, secret=, token=, etc.)',
|
|
899
|
+
},
|
|
900
|
+
{
|
|
901
|
+
name: 'azureKey',
|
|
902
|
+
label: 'AZURE_KEY',
|
|
903
|
+
category: 'enterprise',
|
|
904
|
+
regex: /(?:AccountKey|SharedAccessSignature)\s*=\s*[A-Za-z0-9+\/=]{44,88}/gi,
|
|
905
|
+
description: 'Azure storage keys and SAS tokens',
|
|
906
|
+
},
|
|
907
|
+
{
|
|
908
|
+
name: 'slackWebhook',
|
|
909
|
+
label: 'SLACK_WEBHOOK',
|
|
910
|
+
category: 'enterprise',
|
|
911
|
+
regex: /https:\/\/hooks\.slack\.com\/services\/T[A-Z0-9]+\/B[A-Z0-9]+\/[A-Za-z0-9]+/g,
|
|
912
|
+
description: 'Slack webhook URLs',
|
|
913
|
+
},
|
|
914
|
+
// ═══════════════════════════════════════════
|
|
915
|
+
// MEDICAL — Extended (HIPAA)
|
|
916
|
+
// ═══════════════════════════════════════════
|
|
917
|
+
{
|
|
918
|
+
name: 'deaNumber',
|
|
919
|
+
label: 'DEA_NUMBER',
|
|
920
|
+
category: 'medical',
|
|
921
|
+
regex: /\b[ABCDEFGHJKLMNPRSTUXabcdefghjklmnprstux][A-Za-z9]\d{7}\b/g,
|
|
922
|
+
description: 'US DEA registration numbers',
|
|
923
|
+
},
|
|
924
|
+
{
|
|
925
|
+
name: 'npiNumber',
|
|
926
|
+
label: 'NPI',
|
|
927
|
+
category: 'medical',
|
|
928
|
+
regex: /(?:NPI)[#:\s]*\d{10}\b/gi,
|
|
929
|
+
description: 'US National Provider Identifier (NPI, contextual)',
|
|
930
|
+
},
|
|
931
|
+
{
|
|
932
|
+
name: 'rxNumber',
|
|
933
|
+
label: 'RX_NUMBER',
|
|
934
|
+
category: 'medical',
|
|
935
|
+
regex: /(?:Rx|RX|Prescription)\s*#?\s*:?\s*\d{6,12}\b/gi,
|
|
936
|
+
description: 'Prescription / Rx numbers',
|
|
937
|
+
},
|
|
938
|
+
// ═══════════════════════════════════════════
|
|
939
|
+
// VEHICLE
|
|
940
|
+
// ═══════════════════════════════════════════
|
|
941
|
+
{
|
|
942
|
+
name: 'vin',
|
|
943
|
+
label: 'VIN',
|
|
944
|
+
category: 'vehicle',
|
|
945
|
+
regex: /\b[A-HJ-NPR-Z0-9]{17}\b/g,
|
|
946
|
+
description: 'Vehicle Identification Number (VIN, 17 chars)',
|
|
947
|
+
},
|
|
948
|
+
{
|
|
949
|
+
name: 'licensePlateDE',
|
|
950
|
+
label: 'LICENSE_PLATE_DE',
|
|
951
|
+
category: 'vehicle',
|
|
952
|
+
regex: /\b[A-ZÄÖÜ]{1,3}\s[A-Z]{1,2}\s\d{1,4}[EH]?\b/g,
|
|
953
|
+
description: 'German license plates (M AB 1234, requires spaces)',
|
|
954
|
+
},
|
|
955
|
+
{
|
|
956
|
+
name: 'licensePlateUK',
|
|
957
|
+
label: 'LICENSE_PLATE_UK',
|
|
958
|
+
category: 'vehicle',
|
|
959
|
+
regex: /\b[A-Z]{2}\d{2}\s?[A-Z]{3}\b/g,
|
|
960
|
+
description: 'UK license plates (AB12 CDE)',
|
|
961
|
+
},
|
|
962
|
+
{
|
|
963
|
+
name: 'licensePlateFR',
|
|
964
|
+
label: 'LICENSE_PLATE_FR',
|
|
965
|
+
category: 'vehicle',
|
|
966
|
+
regex: /\b[A-Z]{2}-\d{3}-[A-Z]{2}\b/g,
|
|
967
|
+
description: 'French license plates (AB-123-CD)',
|
|
968
|
+
},
|
|
969
|
+
// ═══════════════════════════════════════════
|
|
970
|
+
// BIOMETRIC / DIGITAL IDENTITY
|
|
971
|
+
// ═══════════════════════════════════════════
|
|
972
|
+
{
|
|
973
|
+
name: 'uuid',
|
|
974
|
+
label: 'UUID',
|
|
975
|
+
category: 'biometric',
|
|
976
|
+
regex: /\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b/g,
|
|
977
|
+
description: 'UUIDs / GUIDs',
|
|
978
|
+
},
|
|
979
|
+
{
|
|
980
|
+
name: 'socialHandle',
|
|
981
|
+
label: 'SOCIAL_HANDLE',
|
|
982
|
+
category: 'biometric',
|
|
983
|
+
regex: /(?<=^|\s)@[a-zA-Z_][a-zA-Z0-9_.]{1,30}\b/g,
|
|
984
|
+
description: 'Social media handles (@username)',
|
|
985
|
+
},
|
|
986
|
+
{
|
|
987
|
+
name: 'imei',
|
|
988
|
+
label: 'IMEI',
|
|
989
|
+
category: 'biometric',
|
|
990
|
+
regex: /\b\d{2}-\d{6}-\d{6}-\d\b/g,
|
|
991
|
+
description: 'IMEI device identifiers (formatted)',
|
|
992
|
+
},
|
|
993
|
+
{
|
|
994
|
+
name: 'iccid',
|
|
995
|
+
label: 'ICCID',
|
|
996
|
+
category: 'biometric',
|
|
997
|
+
regex: /\b89\d{17,19}\b/g,
|
|
998
|
+
description: 'SIM card ICCID numbers',
|
|
999
|
+
},
|
|
1000
|
+
// ═══════════════════════════════════════════
|
|
1001
|
+
// CONTACT — Extended global phone numbers
|
|
1002
|
+
// ═══════════════════════════════════════════
|
|
1003
|
+
{
|
|
1004
|
+
name: 'phoneIT',
|
|
1005
|
+
label: 'PHONE_IT',
|
|
1006
|
+
category: 'contact',
|
|
1007
|
+
regex: /(?:\+?39[\s\-.])\d{2,4}[\s\-.]\d{4,8}/g,
|
|
1008
|
+
description: 'Italian phone numbers (requires +39 prefix)',
|
|
1009
|
+
},
|
|
1010
|
+
{
|
|
1011
|
+
name: 'phoneES',
|
|
1012
|
+
label: 'PHONE_ES',
|
|
1013
|
+
category: 'contact',
|
|
1014
|
+
regex: /\b(?:\+?34[\s\-.]?)?\d{3}[\s\-.]\d{3}[\s\-.]\d{3}\b/g,
|
|
1015
|
+
description: 'Spanish phone numbers',
|
|
1016
|
+
},
|
|
1017
|
+
{
|
|
1018
|
+
name: 'phoneAU',
|
|
1019
|
+
label: 'PHONE_AU',
|
|
1020
|
+
category: 'contact',
|
|
1021
|
+
regex: /\b(?:\+?61[\s\-.]?)?0?\d[\s\-.]\d{4}[\s\-.]\d{4}\b/g,
|
|
1022
|
+
description: 'Australian phone numbers',
|
|
1023
|
+
},
|
|
1024
|
+
{
|
|
1025
|
+
name: 'phoneIN',
|
|
1026
|
+
label: 'PHONE_IN',
|
|
1027
|
+
category: 'contact',
|
|
1028
|
+
regex: /\b(?:\+?91[\s\-.]?)?[6-9]\d{4}[\s\-.]\d{5}\b/g,
|
|
1029
|
+
description: 'Indian phone numbers',
|
|
1030
|
+
},
|
|
1031
|
+
{
|
|
1032
|
+
name: 'phoneJP',
|
|
1033
|
+
label: 'PHONE_JP',
|
|
1034
|
+
category: 'contact',
|
|
1035
|
+
regex: /\b(?:\+?81[\s\-.]?)?0?\d{1,4}[\s\-.]\d{1,4}[\s\-.]\d{4}\b/g,
|
|
1036
|
+
description: 'Japanese phone numbers',
|
|
1037
|
+
},
|
|
1038
|
+
{
|
|
1039
|
+
name: 'phoneBR',
|
|
1040
|
+
label: 'PHONE_BR',
|
|
1041
|
+
category: 'contact',
|
|
1042
|
+
regex: /\b(?:\+?55[\s\-.]?)?\(?\d{2}\)?[\s\-.]?\d{4,5}[\s\-.]\d{4}\b/g,
|
|
1043
|
+
description: 'Brazilian phone numbers',
|
|
1044
|
+
},
|
|
1045
|
+
{
|
|
1046
|
+
name: 'faxCtx',
|
|
1047
|
+
label: 'FAX',
|
|
1048
|
+
category: 'contact',
|
|
1049
|
+
regex: /(?:fax|facsimile|telefax)[:\s#]*(?:\+?\d[\d\s\-().]{7,20})/gi,
|
|
1050
|
+
description: 'Fax numbers (contextual, with label prefix)',
|
|
1051
|
+
},
|
|
1052
|
+
{
|
|
1053
|
+
name: 'phoneCN',
|
|
1054
|
+
label: 'PHONE_CN',
|
|
1055
|
+
category: 'contact',
|
|
1056
|
+
regex: /\b(?:\+?86[\s\-.]?)?1[3-9]\d{9}\b/g,
|
|
1057
|
+
description: 'Chinese mobile phone numbers',
|
|
1058
|
+
},
|
|
1059
|
+
{
|
|
1060
|
+
name: 'phoneKR',
|
|
1061
|
+
label: 'PHONE_KR',
|
|
1062
|
+
category: 'contact',
|
|
1063
|
+
regex: /\b(?:\+?82[\s\-.]?)?0?\d{1,2}[\s\-.]\d{3,4}[\s\-.]\d{4}\b/g,
|
|
1064
|
+
description: 'South Korean phone numbers',
|
|
1065
|
+
},
|
|
1066
|
+
{
|
|
1067
|
+
name: 'phoneRU',
|
|
1068
|
+
label: 'PHONE_RU',
|
|
1069
|
+
category: 'contact',
|
|
1070
|
+
regex: /\b(?:\+?7[\s\-.]?)?\d{3}[\s\-.]\d{3}[\s\-.]\d{2}[\s\-.]\d{2}\b/g,
|
|
1071
|
+
description: 'Russian phone numbers',
|
|
1072
|
+
},
|
|
1073
|
+
{
|
|
1074
|
+
name: 'phoneMX',
|
|
1075
|
+
label: 'PHONE_MX',
|
|
1076
|
+
category: 'contact',
|
|
1077
|
+
regex: /\b(?:\+?52[\s\-.]?)?\d{2,3}[\s\-.]\d{3,4}[\s\-.]\d{4}\b/g,
|
|
1078
|
+
description: 'Mexican phone numbers',
|
|
1079
|
+
},
|
|
1080
|
+
{
|
|
1081
|
+
name: 'phoneZA',
|
|
1082
|
+
label: 'PHONE_ZA',
|
|
1083
|
+
category: 'contact',
|
|
1084
|
+
regex: /\b(?:\+?27[\s\-.]?)?0?\d{2}[\s\-.]\d{3}[\s\-.]\d{4}\b/g,
|
|
1085
|
+
description: 'South African phone numbers',
|
|
1086
|
+
},
|
|
1087
|
+
// ═══════════════════════════════════════════
|
|
1088
|
+
// IDENTITY — Extended global (Asia, LATAM, MENA, Africa)
|
|
1089
|
+
// ═══════════════════════════════════════════
|
|
1090
|
+
{
|
|
1091
|
+
name: 'nationalIdCN',
|
|
1092
|
+
label: 'NATIONAL_ID_CN',
|
|
1093
|
+
category: 'identity',
|
|
1094
|
+
regex: /\b\d{6}(?:19|20)\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[12]\d|3[01])\d{3}[\dXx]\b/g,
|
|
1095
|
+
description: 'Chinese Resident Identity Card number (18 digits)',
|
|
1096
|
+
},
|
|
1097
|
+
{
|
|
1098
|
+
name: 'nationalIdZA',
|
|
1099
|
+
label: 'NATIONAL_ID_ZA',
|
|
1100
|
+
category: 'identity',
|
|
1101
|
+
regex: /\b\d{6}\s?\d{4}\s?\d{3}\b/g,
|
|
1102
|
+
description: 'South African ID number (13 digits)',
|
|
1103
|
+
},
|
|
1104
|
+
{
|
|
1105
|
+
name: 'nationalIdTH',
|
|
1106
|
+
label: 'NATIONAL_ID_TH',
|
|
1107
|
+
category: 'identity',
|
|
1108
|
+
regex: /\b\d-\d{4}-\d{5}-\d{2}-\d\b/g,
|
|
1109
|
+
description: 'Thai National ID (13 digits with dashes)',
|
|
1110
|
+
},
|
|
1111
|
+
{
|
|
1112
|
+
name: 'nationalIdMY',
|
|
1113
|
+
label: 'NRIC_MY',
|
|
1114
|
+
category: 'identity',
|
|
1115
|
+
regex: /\b\d{6}-?\d{2}-?\d{4}\b/g,
|
|
1116
|
+
description: 'Malaysian NRIC (12 digits)',
|
|
1117
|
+
},
|
|
1118
|
+
{
|
|
1119
|
+
name: 'nationalIdTR',
|
|
1120
|
+
label: 'TC_KIMLIK_TR',
|
|
1121
|
+
category: 'identity',
|
|
1122
|
+
regex: /\b[1-9]\d{10}\b/g,
|
|
1123
|
+
description: 'Turkish national ID (TC Kimlik No, 11 digits)',
|
|
1124
|
+
},
|
|
1125
|
+
{
|
|
1126
|
+
name: 'curpMX',
|
|
1127
|
+
label: 'CURP_MX',
|
|
1128
|
+
category: 'identity',
|
|
1129
|
+
regex: /\b[A-Z]{4}\d{6}[HM][A-Z]{5}[A-Z0-9]\d\b/g,
|
|
1130
|
+
description: 'Mexican CURP (population registry code)',
|
|
1131
|
+
},
|
|
1132
|
+
{
|
|
1133
|
+
name: 'rfcMX',
|
|
1134
|
+
label: 'RFC_MX',
|
|
1135
|
+
category: 'identity',
|
|
1136
|
+
regex: /\b[A-Z&]{3,4}\d{6}[A-Z0-9]{3}\b/g,
|
|
1137
|
+
description: 'Mexican RFC (tax ID)',
|
|
1138
|
+
},
|
|
1139
|
+
{
|
|
1140
|
+
name: 'nationalIdHK',
|
|
1141
|
+
label: 'HKID',
|
|
1142
|
+
category: 'identity',
|
|
1143
|
+
regex: /\b[A-Z]{1,2}\d{6}\(?\d\)?\b/g,
|
|
1144
|
+
description: 'Hong Kong ID card number',
|
|
1145
|
+
},
|
|
1146
|
+
{
|
|
1147
|
+
name: 'nationalIdTW',
|
|
1148
|
+
label: 'NATIONAL_ID_TW',
|
|
1149
|
+
category: 'identity',
|
|
1150
|
+
regex: /\b[A-Z][12]\d{8}\b/g,
|
|
1151
|
+
description: 'Taiwan National ID number',
|
|
1152
|
+
},
|
|
1153
|
+
{
|
|
1154
|
+
name: 'emiratesIdUAE',
|
|
1155
|
+
label: 'EMIRATES_ID',
|
|
1156
|
+
category: 'identity',
|
|
1157
|
+
regex: /\b784-\d{4}-\d{7}-\d\b/g,
|
|
1158
|
+
description: 'UAE Emirates ID number',
|
|
1159
|
+
},
|
|
1160
|
+
{
|
|
1161
|
+
name: 'rgBR',
|
|
1162
|
+
label: 'RG_BR',
|
|
1163
|
+
category: 'identity',
|
|
1164
|
+
regex: /\b\d{2}\.?\d{3}\.?\d{3}-?\d\b/g,
|
|
1165
|
+
description: 'Brazilian RG (Registro Geral)',
|
|
1166
|
+
},
|
|
1167
|
+
{
|
|
1168
|
+
name: 'cnpjBR',
|
|
1169
|
+
label: 'CNPJ_BR',
|
|
1170
|
+
category: 'identity',
|
|
1171
|
+
regex: /\b\d{2}\.?\d{3}\.?\d{3}\/?\d{4}-?\d{2}\b/g,
|
|
1172
|
+
description: 'Brazilian CNPJ (company registration)',
|
|
1173
|
+
},
|
|
1174
|
+
{
|
|
1175
|
+
name: 'driverLicenseUK',
|
|
1176
|
+
label: 'DRIVER_LICENSE_UK',
|
|
1177
|
+
category: 'identity',
|
|
1178
|
+
regex: /\b[A-Z]{5}\d{6}[A-Z]{2}\d[A-Z]{2}\b/g,
|
|
1179
|
+
description: 'UK driving licence number (16 characters)',
|
|
1180
|
+
},
|
|
1181
|
+
{
|
|
1182
|
+
name: 'krankenversichertDE',
|
|
1183
|
+
label: 'HEALTH_INS_DE',
|
|
1184
|
+
category: 'identity',
|
|
1185
|
+
regex: /\b[A-Z]\d{9}\b/g,
|
|
1186
|
+
description: 'German health insurance number (Krankenversichertennummer)',
|
|
1187
|
+
},
|
|
1188
|
+
{
|
|
1189
|
+
name: 'romanianCNP',
|
|
1190
|
+
label: 'CNP_RO',
|
|
1191
|
+
category: 'identity',
|
|
1192
|
+
regex: /\b[1-9]\d{12}\b/g,
|
|
1193
|
+
description: 'Romanian personal numeric code (CNP, 13 digits)',
|
|
1194
|
+
},
|
|
1195
|
+
{
|
|
1196
|
+
name: 'croatianOIB',
|
|
1197
|
+
label: 'OIB_HR',
|
|
1198
|
+
category: 'identity',
|
|
1199
|
+
regex: /\b\d{11}\b/g,
|
|
1200
|
+
description: 'Croatian personal identification number (OIB, 11 digits)',
|
|
1201
|
+
},
|
|
1202
|
+
{
|
|
1203
|
+
name: 'mothersMaidenName',
|
|
1204
|
+
label: 'MAIDEN_NAME',
|
|
1205
|
+
category: 'identity',
|
|
1206
|
+
regex: /(?:mother'?s?\s*maiden\s*name|Geburtsname|nom\s+de\s+jeune\s+fille)[:\s]+[A-ZÀ-ÿ][a-zà-ÿ]+/gi,
|
|
1207
|
+
description: 'Mother\'s maiden name (security question, labeled)',
|
|
1208
|
+
},
|
|
1209
|
+
{
|
|
1210
|
+
name: 'passportCtx',
|
|
1211
|
+
label: 'PASSPORT',
|
|
1212
|
+
category: 'identity',
|
|
1213
|
+
regex: /(?:passport\s*(?:no|number|#|num))[:\s]*[A-Z0-9]{6,12}/gi,
|
|
1214
|
+
description: 'Passport numbers (generic contextual with label)',
|
|
1215
|
+
},
|
|
1216
|
+
{
|
|
1217
|
+
name: 'voterRegCtx',
|
|
1218
|
+
label: 'VOTER_REG',
|
|
1219
|
+
category: 'identity',
|
|
1220
|
+
regex: /(?:voter\s*(?:reg(?:istration)?|ID)\s*#?)[:\s]*[A-Z0-9]{5,15}/gi,
|
|
1221
|
+
description: 'Voter registration numbers (contextual)',
|
|
1222
|
+
},
|
|
1223
|
+
{
|
|
1224
|
+
name: 'militaryIdCtx',
|
|
1225
|
+
label: 'MILITARY_ID',
|
|
1226
|
+
category: 'identity',
|
|
1227
|
+
regex: /(?:military\s*ID|DOD\s*ID|CAC)[:\s#]*\d{10}/gi,
|
|
1228
|
+
description: 'Military / DOD ID numbers (contextual)',
|
|
1229
|
+
},
|
|
1230
|
+
{
|
|
1231
|
+
name: 'immigrationCtx',
|
|
1232
|
+
label: 'IMMIGRATION_ID',
|
|
1233
|
+
category: 'identity',
|
|
1234
|
+
regex: /(?:visa|immigration|I-94|A-number|alien\s*(?:reg(?:istration)?)?\s*#?)[:\s#]*[A-Z]?\d{7,13}/gi,
|
|
1235
|
+
description: 'Immigration / visa / alien registration numbers (contextual)',
|
|
1236
|
+
},
|
|
1237
|
+
{
|
|
1238
|
+
name: 'professionalLicenseCtx',
|
|
1239
|
+
label: 'PROF_LICENSE',
|
|
1240
|
+
category: 'identity',
|
|
1241
|
+
regex: /(?:bar\s*(?:number|#)|CPA\s*(?:license|#)|medical\s*license|nursing\s*license|license\s*#)[:\s]*[A-Z0-9\-]{4,15}/gi,
|
|
1242
|
+
description: 'Professional license numbers (bar, CPA, medical, nursing)',
|
|
1243
|
+
},
|
|
1244
|
+
{
|
|
1245
|
+
name: 'employeeIdCtx',
|
|
1246
|
+
label: 'EMPLOYEE_ID',
|
|
1247
|
+
category: 'identity',
|
|
1248
|
+
regex: /(?:employee\s*(?:ID|number|#|no)|Personalnummer|Mitarbeiternummer)[:\s]*[A-Z0-9\-]{3,15}/gi,
|
|
1249
|
+
description: 'Employee ID numbers (contextual, EN/DE)',
|
|
1250
|
+
},
|
|
1251
|
+
{
|
|
1252
|
+
name: 'fullNameCtx',
|
|
1253
|
+
label: 'FULL_NAME',
|
|
1254
|
+
category: 'identity',
|
|
1255
|
+
regex: /(?:full\s*name|first\s*name|last\s*name|surname|family\s*name|given\s*name|Vorname|Nachname|Familienname|nom\s*de\s*famille|prenom|nombre|apellido)[:\s]+[A-ZÀ-ÿ][a-zà-ÿ]+(?:\s+[A-ZÀ-ÿ][a-zà-ÿ]+){0,3}/gi,
|
|
1256
|
+
description: 'Full name, first name, last name (labeled, multi-language)',
|
|
1257
|
+
},
|
|
1258
|
+
{
|
|
1259
|
+
name: 'departmentCtx',
|
|
1260
|
+
label: 'DEPARTMENT',
|
|
1261
|
+
category: 'identity',
|
|
1262
|
+
regex: /(?:department|Abteilung|departement|service)[:\s]+[A-ZÀ-ÿa-zà-ÿ\s&\-]{2,40}/gi,
|
|
1263
|
+
description: 'Department name (contextual, EN/DE/FR)',
|
|
1264
|
+
},
|
|
1265
|
+
{
|
|
1266
|
+
name: 'jobTitleCtx',
|
|
1267
|
+
label: 'JOB_TITLE',
|
|
1268
|
+
category: 'identity',
|
|
1269
|
+
regex: /(?:position|job\s*title|role|Berufsbezeichnung|Stelle|titulo|poste)[:\s]+[A-ZÀ-ÿa-zà-ÿ\s\-\/]{2,50}/gi,
|
|
1270
|
+
description: 'Job title / position (contextual, EN/DE/FR/ES)',
|
|
1271
|
+
},
|
|
1272
|
+
{
|
|
1273
|
+
name: 'hiringDateCtx',
|
|
1274
|
+
label: 'HIRING_DATE',
|
|
1275
|
+
category: 'temporal',
|
|
1276
|
+
regex: /(?:hiring\s*date|start\s*date|date\s*of\s*hire|Eintrittsdatum|Einstellungsdatum|date\s*d'?embauche)[:\s]*\d{1,2}[\/.\-]\d{1,2}[\/.\-]\d{2,4}/gi,
|
|
1277
|
+
description: 'Hiring / start date (contextual, EN/DE/FR)',
|
|
1278
|
+
},
|
|
1279
|
+
{
|
|
1280
|
+
name: 'payrollCtx',
|
|
1281
|
+
label: 'PAYROLL',
|
|
1282
|
+
category: 'financial',
|
|
1283
|
+
regex: /(?:payroll\s*(?:#|number|no)|Lohnsteuernummer)[:\s]*[A-Z0-9\-]{4,15}/gi,
|
|
1284
|
+
description: 'Payroll numbers (contextual, EN/DE)',
|
|
1285
|
+
},
|
|
1286
|
+
{
|
|
1287
|
+
name: 'workersCompCtx',
|
|
1288
|
+
label: 'WORKERS_COMP',
|
|
1289
|
+
category: 'financial',
|
|
1290
|
+
regex: /(?:workers?\s*comp|WC)\s*(?:claim|#)[:\s]*[A-Z0-9\-]{5,15}/gi,
|
|
1291
|
+
description: 'Workers compensation claim numbers (contextual)',
|
|
1292
|
+
},
|
|
1293
|
+
// ═══════════════════════════════════════════
|
|
1294
|
+
// FINANCIAL — Extended (PCI-DSS, GLBA, SOX)
|
|
1295
|
+
// ═══════════════════════════════════════════
|
|
1296
|
+
{
|
|
1297
|
+
name: 'cardExpiry',
|
|
1298
|
+
label: 'CARD_EXPIRY',
|
|
1299
|
+
category: 'financial',
|
|
1300
|
+
regex: /\b(?:0[1-9]|1[0-2])\s*[\/\-]\s*(?:\d{2}|\d{4})\b/g,
|
|
1301
|
+
description: 'Credit/debit card expiry dates (MM/YY or MM/YYYY)',
|
|
1302
|
+
},
|
|
1303
|
+
{
|
|
1304
|
+
name: 'cvvCtx',
|
|
1305
|
+
label: 'CVV',
|
|
1306
|
+
category: 'financial',
|
|
1307
|
+
regex: /(?:CVV|CVC|CVV2|CVC2|CID|security\s*code)[:\s#]*\d{3,4}/gi,
|
|
1308
|
+
description: 'Card verification codes (CVV/CVC, contextual)',
|
|
1309
|
+
},
|
|
1310
|
+
{
|
|
1311
|
+
name: 'bankAccountCtx',
|
|
1312
|
+
label: 'BANK_ACCOUNT',
|
|
1313
|
+
category: 'financial',
|
|
1314
|
+
regex: /(?:account\s*(?:no|number|#|num)|acct)[:\s#]*\d{8,17}/gi,
|
|
1315
|
+
description: 'Bank account numbers (contextual with label)',
|
|
1316
|
+
},
|
|
1317
|
+
{
|
|
1318
|
+
name: 'insurancePolicyCtx',
|
|
1319
|
+
label: 'INSURANCE_POLICY',
|
|
1320
|
+
category: 'financial',
|
|
1321
|
+
regex: /(?:policy|insurance)\s*(?:#|number|no)[:\s]*[A-Z0-9\-]{5,20}/gi,
|
|
1322
|
+
description: 'Insurance policy numbers (contextual)',
|
|
1323
|
+
},
|
|
1324
|
+
{
|
|
1325
|
+
name: 'insuranceClaimCtx',
|
|
1326
|
+
label: 'INSURANCE_CLAIM',
|
|
1327
|
+
category: 'financial',
|
|
1328
|
+
regex: /(?:claim)\s*(?:#|number|no|ref)[:\s]*[A-Z0-9\-]{5,20}/gi,
|
|
1329
|
+
description: 'Insurance claim reference numbers (contextual)',
|
|
1330
|
+
},
|
|
1331
|
+
{
|
|
1332
|
+
name: 'wireTransferCtx',
|
|
1333
|
+
label: 'WIRE_TRANSFER',
|
|
1334
|
+
category: 'financial',
|
|
1335
|
+
regex: /(?:wire|transfer|remittance)\s*(?:#|ref|reference)[:\s]*[A-Z0-9]{6,20}/gi,
|
|
1336
|
+
description: 'Wire transfer reference numbers (contextual)',
|
|
1337
|
+
},
|
|
1338
|
+
{
|
|
1339
|
+
name: 'loanNumberCtx',
|
|
1340
|
+
label: 'LOAN_NUMBER',
|
|
1341
|
+
category: 'financial',
|
|
1342
|
+
regex: /(?:mortgage|loan)\s*(?:#|number|no|acct)[:\s]*[A-Z0-9\-]{6,20}/gi,
|
|
1343
|
+
description: 'Mortgage / loan account numbers (contextual)',
|
|
1344
|
+
},
|
|
1345
|
+
{
|
|
1346
|
+
name: 'salaryCtx',
|
|
1347
|
+
label: 'SALARY',
|
|
1348
|
+
category: 'financial',
|
|
1349
|
+
regex: /(?:salary|compensation|Gehalt|salaire|annual\s*pay|hourly\s*rate|income)[:\s]*[$€£¥]?\s?[\d,]+(?:\.\d{2})?/gi,
|
|
1350
|
+
description: 'Salary / compensation amounts (contextual, multi-currency)',
|
|
1351
|
+
},
|
|
1352
|
+
{
|
|
1353
|
+
name: 'taxReturnCtx',
|
|
1354
|
+
label: 'TAX_RETURN',
|
|
1355
|
+
category: 'financial',
|
|
1356
|
+
regex: /(?:W-?2|1099|W-?4|1040|AGI|adjusted\s*gross\s*income)[:\s]*[$]?[\d,.]+/gi,
|
|
1357
|
+
description: 'US tax form references and amounts (W-2, 1099, AGI)',
|
|
1358
|
+
},
|
|
1359
|
+
// ═══════════════════════════════════════════
|
|
1360
|
+
// MEDICAL — Extended (HIPAA 18 identifiers + codes)
|
|
1361
|
+
// ═══════════════════════════════════════════
|
|
1362
|
+
{
|
|
1363
|
+
name: 'ndcDrugCode',
|
|
1364
|
+
label: 'NDC_CODE',
|
|
1365
|
+
category: 'medical',
|
|
1366
|
+
regex: /\b\d{4,5}-\d{3,4}-\d{1,2}\b/g,
|
|
1367
|
+
description: 'National Drug Code (NDC)',
|
|
1368
|
+
},
|
|
1369
|
+
{
|
|
1370
|
+
name: 'healthPlanCtx',
|
|
1371
|
+
label: 'HEALTH_PLAN_ID',
|
|
1372
|
+
category: 'medical',
|
|
1373
|
+
regex: /(?:member\s*ID|beneficiary|subscriber|health\s*plan)[:\s#]*[A-Z0-9]{6,20}/gi,
|
|
1374
|
+
description: 'Health plan beneficiary / member ID (contextual)',
|
|
1375
|
+
},
|
|
1376
|
+
{
|
|
1377
|
+
name: 'bloodTypeCtx',
|
|
1378
|
+
label: 'BLOOD_TYPE',
|
|
1379
|
+
category: 'medical',
|
|
1380
|
+
regex: /(?:blood\s*type|blood\s*group)[:\s]*(?:A|B|AB|O)[+-]?/gi,
|
|
1381
|
+
description: 'Blood type (contextual)',
|
|
1382
|
+
},
|
|
1383
|
+
{
|
|
1384
|
+
name: 'deviceSerialCtx',
|
|
1385
|
+
label: 'DEVICE_SERIAL',
|
|
1386
|
+
category: 'medical',
|
|
1387
|
+
regex: /(?:serial\s*(?:#|number|no)|S\/N|UDI)[:\s]*[A-Z0-9\-]{6,20}/gi,
|
|
1388
|
+
description: 'Medical device serial numbers / UDI (contextual)',
|
|
1389
|
+
},
|
|
1390
|
+
// ═══════════════════════════════════════════
|
|
1391
|
+
// LOCATION — Extended postal codes (global)
|
|
1392
|
+
// ═══════════════════════════════════════════
|
|
1393
|
+
{
|
|
1394
|
+
name: 'postalCodeCA',
|
|
1395
|
+
label: 'POSTCODE_CA',
|
|
1396
|
+
category: 'location',
|
|
1397
|
+
regex: /\b[A-Z]\d[A-Z]\s?\d[A-Z]\d\b/g,
|
|
1398
|
+
description: 'Canadian postal codes (A1A 1A1)',
|
|
1399
|
+
},
|
|
1400
|
+
{
|
|
1401
|
+
name: 'postalCodeJP',
|
|
1402
|
+
label: 'POSTCODE_JP',
|
|
1403
|
+
category: 'location',
|
|
1404
|
+
regex: /\b\d{3}-\d{4}\b/g,
|
|
1405
|
+
description: 'Japanese postal codes (XXX-XXXX)',
|
|
1406
|
+
},
|
|
1407
|
+
{
|
|
1408
|
+
name: 'postalCodeBR',
|
|
1409
|
+
label: 'CEP_BR',
|
|
1410
|
+
category: 'location',
|
|
1411
|
+
regex: /\b\d{5}-?\d{3}\b/g,
|
|
1412
|
+
description: 'Brazilian postal codes / CEP',
|
|
1413
|
+
},
|
|
1414
|
+
{
|
|
1415
|
+
name: 'postalCodeIN',
|
|
1416
|
+
label: 'PIN_IN',
|
|
1417
|
+
category: 'location',
|
|
1418
|
+
regex: /\b[1-9]\d{5}\b/g,
|
|
1419
|
+
description: 'Indian PIN codes (6 digits)',
|
|
1420
|
+
},
|
|
1421
|
+
// ═══════════════════════════════════════════
|
|
1422
|
+
// ENTERPRISE — Extended API keys & secrets
|
|
1423
|
+
// ═══════════════════════════════════════════
|
|
1424
|
+
{
|
|
1425
|
+
name: 'sendgridKey',
|
|
1426
|
+
label: 'SENDGRID_KEY',
|
|
1427
|
+
category: 'enterprise',
|
|
1428
|
+
regex: /\bSG\.[A-Za-z0-9\-_]{22}\.[A-Za-z0-9\-_]{43}\b/g,
|
|
1429
|
+
description: 'SendGrid API keys',
|
|
1430
|
+
},
|
|
1431
|
+
{
|
|
1432
|
+
name: 'twilioKey',
|
|
1433
|
+
label: 'TWILIO_KEY',
|
|
1434
|
+
category: 'enterprise',
|
|
1435
|
+
regex: /\bSK[a-f0-9]{32}\b/g,
|
|
1436
|
+
description: 'Twilio API keys',
|
|
1437
|
+
},
|
|
1438
|
+
{
|
|
1439
|
+
name: 'mailgunKey',
|
|
1440
|
+
label: 'MAILGUN_KEY',
|
|
1441
|
+
category: 'enterprise',
|
|
1442
|
+
regex: /\bkey-[A-Za-z0-9]{32}\b/g,
|
|
1443
|
+
description: 'Mailgun API keys',
|
|
1444
|
+
},
|
|
1445
|
+
{
|
|
1446
|
+
name: 'gitlabToken',
|
|
1447
|
+
label: 'GITLAB_TOKEN',
|
|
1448
|
+
category: 'enterprise',
|
|
1449
|
+
regex: /\bglpat-[A-Za-z0-9\-]{20}\b/g,
|
|
1450
|
+
description: 'GitLab personal access tokens',
|
|
1451
|
+
},
|
|
1452
|
+
{
|
|
1453
|
+
name: 'anthropicKey',
|
|
1454
|
+
label: 'ANTHROPIC_KEY',
|
|
1455
|
+
category: 'enterprise',
|
|
1456
|
+
regex: /\bsk-ant-[A-Za-z0-9\-]{90,}\b/g,
|
|
1457
|
+
description: 'Anthropic / Claude API keys',
|
|
1458
|
+
},
|
|
1459
|
+
{
|
|
1460
|
+
name: 'npmToken',
|
|
1461
|
+
label: 'NPM_TOKEN',
|
|
1462
|
+
category: 'enterprise',
|
|
1463
|
+
regex: /\bnpm_[A-Za-z0-9]{36}\b/g,
|
|
1464
|
+
description: 'npm authentication tokens',
|
|
1465
|
+
},
|
|
1466
|
+
{
|
|
1467
|
+
name: 'shopifyKey',
|
|
1468
|
+
label: 'SHOPIFY_KEY',
|
|
1469
|
+
category: 'enterprise',
|
|
1470
|
+
regex: /\bshp(?:at|ss|pa)_[a-fA-F0-9]{32}\b/g,
|
|
1471
|
+
description: 'Shopify API keys',
|
|
1472
|
+
},
|
|
1473
|
+
{
|
|
1474
|
+
name: 'telegramBotToken',
|
|
1475
|
+
label: 'TELEGRAM_TOKEN',
|
|
1476
|
+
category: 'enterprise',
|
|
1477
|
+
regex: /\b\d{8,10}:[A-Za-z0-9_-]{35}\b/g,
|
|
1478
|
+
description: 'Telegram bot tokens',
|
|
1479
|
+
},
|
|
1480
|
+
{
|
|
1481
|
+
name: 'discordBotToken',
|
|
1482
|
+
label: 'DISCORD_TOKEN',
|
|
1483
|
+
category: 'enterprise',
|
|
1484
|
+
regex: /\b[A-Za-z0-9]{24}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{27}\b/g,
|
|
1485
|
+
description: 'Discord bot tokens',
|
|
1486
|
+
},
|
|
1487
|
+
{
|
|
1488
|
+
name: 'huggingfaceToken',
|
|
1489
|
+
label: 'HF_TOKEN',
|
|
1490
|
+
category: 'enterprise',
|
|
1491
|
+
regex: /\bhf_[A-Za-z0-9]{34,}\b/g,
|
|
1492
|
+
description: 'Hugging Face API tokens',
|
|
1493
|
+
},
|
|
1494
|
+
{
|
|
1495
|
+
name: 'digitaloceanToken',
|
|
1496
|
+
label: 'DO_TOKEN',
|
|
1497
|
+
category: 'enterprise',
|
|
1498
|
+
regex: /\bdop_v1_[a-f0-9]{64}\b/g,
|
|
1499
|
+
description: 'DigitalOcean personal access tokens',
|
|
1500
|
+
},
|
|
1501
|
+
{
|
|
1502
|
+
name: 'hashicorpVaultToken',
|
|
1503
|
+
label: 'VAULT_TOKEN',
|
|
1504
|
+
category: 'enterprise',
|
|
1505
|
+
regex: /\bhvs\.[A-Za-z0-9]{24,}\b/g,
|
|
1506
|
+
description: 'HashiCorp Vault tokens',
|
|
1507
|
+
},
|
|
1508
|
+
{
|
|
1509
|
+
name: 'httpBasicAuth',
|
|
1510
|
+
label: 'HTTP_BASIC_AUTH',
|
|
1511
|
+
category: 'enterprise',
|
|
1512
|
+
regex: /Authorization:\s*Basic\s+[A-Za-z0-9+\/=]+/g,
|
|
1513
|
+
description: 'HTTP Basic Authentication headers',
|
|
1514
|
+
},
|
|
1515
|
+
{
|
|
1516
|
+
name: 'apiKeyInUrl',
|
|
1517
|
+
label: 'API_KEY_URL',
|
|
1518
|
+
category: 'enterprise',
|
|
1519
|
+
regex: /[?&](?:api_?key|token|access_token|auth)=[A-Za-z0-9\-._~+\/]{8,}/gi,
|
|
1520
|
+
description: 'API keys exposed in URLs',
|
|
1521
|
+
},
|
|
1522
|
+
{
|
|
1523
|
+
name: 'x509Certificate',
|
|
1524
|
+
label: 'X509_CERT',
|
|
1525
|
+
category: 'enterprise',
|
|
1526
|
+
regex: /-----BEGIN CERTIFICATE-----/g,
|
|
1527
|
+
description: 'X.509 certificate headers',
|
|
1528
|
+
},
|
|
1529
|
+
{
|
|
1530
|
+
name: 'pgpPrivateKey',
|
|
1531
|
+
label: 'PGP_KEY',
|
|
1532
|
+
category: 'enterprise',
|
|
1533
|
+
regex: /-----BEGIN PGP PRIVATE KEY BLOCK-----/g,
|
|
1534
|
+
description: 'PGP private key block headers',
|
|
1535
|
+
},
|
|
1536
|
+
{
|
|
1537
|
+
name: 'envSecrets',
|
|
1538
|
+
label: 'ENV_SECRET',
|
|
1539
|
+
category: 'enterprise',
|
|
1540
|
+
regex: /\b(?:DATABASE_URL|DB_PASSWORD|DB_PASS|REDIS_URL|MONGO_URI|SECRET_KEY|ENCRYPTION_KEY|MASTER_KEY|SMTP_PASSWORD|MAIL_PASSWORD)=[^\s]+/g,
|
|
1541
|
+
description: 'Environment variable secrets (DATABASE_URL, DB_PASSWORD, etc.)',
|
|
1542
|
+
},
|
|
1543
|
+
{
|
|
1544
|
+
name: 'googleAnalyticsId',
|
|
1545
|
+
label: 'GA_ID',
|
|
1546
|
+
category: 'enterprise',
|
|
1547
|
+
regex: /\bUA-\d{4,10}-\d{1,4}\b|\bG-[A-Z0-9]{10}\b/g,
|
|
1548
|
+
description: 'Google Analytics tracking IDs',
|
|
1549
|
+
},
|
|
1550
|
+
// ═══════════════════════════════════════════
|
|
1551
|
+
// GDPR ARTICLE 9 — Sensitive keyword detection
|
|
1552
|
+
// ═══════════════════════════════════════════
|
|
1553
|
+
{
|
|
1554
|
+
name: 'racialOriginCtx',
|
|
1555
|
+
label: 'RACIAL_ORIGIN',
|
|
1556
|
+
category: 'identity',
|
|
1557
|
+
regex: /(?:race|ethnicity|ethnic\s*origin|Rasse|Herkunft|racial\s*background)[:\s]+[a-zA-ZÀ-ÿ\s]{2,30}/gi,
|
|
1558
|
+
description: 'Racial/ethnic origin (GDPR Art.9, contextual)',
|
|
1559
|
+
},
|
|
1560
|
+
{
|
|
1561
|
+
name: 'politicalOpinionCtx',
|
|
1562
|
+
label: 'POLITICAL_OPINION',
|
|
1563
|
+
category: 'identity',
|
|
1564
|
+
regex: /(?:political\s*(?:party|affiliation|opinion)|Partei(?:zugehörigkeit)?|parti\s*politique)[:\s]+[a-zA-ZÀ-ÿ\s]{2,40}/gi,
|
|
1565
|
+
description: 'Political opinions/affiliations (GDPR Art.9, contextual)',
|
|
1566
|
+
},
|
|
1567
|
+
{
|
|
1568
|
+
name: 'religiousBeliefCtx',
|
|
1569
|
+
label: 'RELIGIOUS_BELIEF',
|
|
1570
|
+
category: 'identity',
|
|
1571
|
+
regex: /(?:religion|religious\s*(?:belief|affiliation)|faith|Konfession|Religionszugehörigkeit|confession)[:\s]+[a-zA-ZÀ-ÿ\s]{2,30}/gi,
|
|
1572
|
+
description: 'Religious beliefs/affiliations (GDPR Art.9, contextual)',
|
|
1573
|
+
},
|
|
1574
|
+
{
|
|
1575
|
+
name: 'tradeUnionCtx',
|
|
1576
|
+
label: 'TRADE_UNION',
|
|
1577
|
+
category: 'identity',
|
|
1578
|
+
regex: /(?:trade\s*union|union\s*member(?:ship)?|Gewerkschaft(?:smitgliedschaft)?|syndicat)[:\s]+[a-zA-ZÀ-ÿ\s]{2,40}/gi,
|
|
1579
|
+
description: 'Trade union membership (GDPR Art.9, contextual)',
|
|
1580
|
+
},
|
|
1581
|
+
{
|
|
1582
|
+
name: 'sexualOrientationCtx',
|
|
1583
|
+
label: 'SEXUAL_ORIENTATION',
|
|
1584
|
+
category: 'identity',
|
|
1585
|
+
regex: /(?:sexual\s*orientation|sexuelle\s*Orientierung|orientation\s*sexuelle)[:\s]+[a-zA-ZÀ-ÿ\s]{2,20}/gi,
|
|
1586
|
+
description: 'Sexual orientation (GDPR Art.9, contextual)',
|
|
1587
|
+
},
|
|
1588
|
+
{
|
|
1589
|
+
name: 'disabilityCtx',
|
|
1590
|
+
label: 'DISABILITY',
|
|
1591
|
+
category: 'medical',
|
|
1592
|
+
regex: /(?:disability|disabled|Behinderung|Schwerbehindertenausweis|GdB)\s*(?:status|degree|grad)?[:\s]*(?:\d{1,3}%?|[a-zA-ZÀ-ÿ\s]{2,30})/gi,
|
|
1593
|
+
description: 'Disability status/degree (GDPR Art.9, BDSG, contextual)',
|
|
1594
|
+
},
|
|
1595
|
+
{
|
|
1596
|
+
name: 'criminalRecordCtx',
|
|
1597
|
+
label: 'CRIMINAL_RECORD',
|
|
1598
|
+
category: 'identity',
|
|
1599
|
+
regex: /(?:criminal\s*record|conviction|felony|misdemeanor|Vorstrafe|casier\s*judiciaire|arrest\s*record)[:\s]+[a-zA-ZÀ-ÿ\s]{2,40}/gi,
|
|
1600
|
+
description: 'Criminal record references (GDPR Art.10, contextual)',
|
|
1601
|
+
},
|
|
1602
|
+
// ═══════════════════════════════════════════
|
|
1603
|
+
// EDUCATION — FERPA
|
|
1604
|
+
// ═══════════════════════════════════════════
|
|
1605
|
+
{
|
|
1606
|
+
name: 'studentIdCtx',
|
|
1607
|
+
label: 'STUDENT_ID',
|
|
1608
|
+
category: 'identity',
|
|
1609
|
+
regex: /(?:student\s*(?:ID|number|#|no))[:\s]*[A-Z0-9]{4,15}/gi,
|
|
1610
|
+
description: 'Student ID numbers (FERPA, contextual)',
|
|
1611
|
+
},
|
|
1612
|
+
{
|
|
1613
|
+
name: 'gpaCtx',
|
|
1614
|
+
label: 'GPA',
|
|
1615
|
+
category: 'identity',
|
|
1616
|
+
regex: /(?:GPA|grade\s*point)[:\s]*[0-4]\.\d{1,2}/gi,
|
|
1617
|
+
description: 'Grade point average (FERPA, contextual)',
|
|
1618
|
+
},
|
|
1619
|
+
{
|
|
1620
|
+
name: 'testScoreCtx',
|
|
1621
|
+
label: 'TEST_SCORE',
|
|
1622
|
+
category: 'identity',
|
|
1623
|
+
regex: /(?:SAT|ACT|GRE|GMAT|LSAT|MCAT)\s*(?:score)?[:\s]*\d{2,4}/gi,
|
|
1624
|
+
description: 'Standardized test scores (SAT, ACT, GRE, etc.)',
|
|
1625
|
+
},
|
|
1626
|
+
{
|
|
1627
|
+
name: 'eduEmail',
|
|
1628
|
+
label: 'EDU_EMAIL',
|
|
1629
|
+
category: 'contact',
|
|
1630
|
+
regex: /[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.edu/g,
|
|
1631
|
+
description: 'Educational email addresses (.edu)',
|
|
1632
|
+
},
|
|
1633
|
+
// ═══════════════════════════════════════════
|
|
1634
|
+
// UTILITY / SERVICE ACCOUNTS
|
|
1635
|
+
// ═══════════════════════════════════════════
|
|
1636
|
+
{
|
|
1637
|
+
name: 'loyaltyNumberCtx',
|
|
1638
|
+
label: 'LOYALTY_NUMBER',
|
|
1639
|
+
category: 'financial',
|
|
1640
|
+
regex: /(?:loyalty|rewards?|frequent\s*flyer|mileage|membership|FF)\s*(?:#|number|card|ID)[:\s]*[A-Z0-9\-]{6,20}/gi,
|
|
1641
|
+
description: 'Loyalty / rewards / frequent flyer numbers (contextual)',
|
|
1642
|
+
},
|
|
1643
|
+
{
|
|
1644
|
+
name: 'utilityAccountCtx',
|
|
1645
|
+
label: 'UTILITY_ACCOUNT',
|
|
1646
|
+
category: 'financial',
|
|
1647
|
+
regex: /(?:electricity|gas|water|utility)\s*(?:account|acct|#)[:\s]*[A-Z0-9\-]{6,20}/gi,
|
|
1648
|
+
description: 'Utility account numbers (electricity, gas, water)',
|
|
1649
|
+
},
|
|
1650
|
+
// ═══════════════════════════════════════════
|
|
1651
|
+
// LEGAL / JUDICIAL
|
|
1652
|
+
// ═══════════════════════════════════════════
|
|
1653
|
+
{
|
|
1654
|
+
name: 'caseNumberCtx',
|
|
1655
|
+
label: 'CASE_NUMBER',
|
|
1656
|
+
category: 'identity',
|
|
1657
|
+
regex: /(?:case|docket)\s*(?:#|number|no)[:\s]*\d{1,2}[\-:]\w{2,4}[\-:]\d{3,8}/gi,
|
|
1658
|
+
description: 'Court case / docket numbers (contextual)',
|
|
1659
|
+
},
|
|
1660
|
+
{
|
|
1661
|
+
name: 'inmateNumberCtx',
|
|
1662
|
+
label: 'INMATE_NUMBER',
|
|
1663
|
+
category: 'identity',
|
|
1664
|
+
regex: /(?:inmate|prisoner|offender|booking)\s*(?:#|number|ID)[:\s]*[A-Z0-9\-]{4,15}/gi,
|
|
1665
|
+
description: 'Inmate / prisoner / booking numbers (contextual)',
|
|
1666
|
+
},
|
|
1667
|
+
// ═══════════════════════════════════════════
|
|
1668
|
+
// VEHICLE — Extended license plates
|
|
1669
|
+
// ═══════════════════════════════════════════
|
|
1670
|
+
{
|
|
1671
|
+
name: 'licensePlateNL',
|
|
1672
|
+
label: 'LICENSE_PLATE_NL',
|
|
1673
|
+
category: 'vehicle',
|
|
1674
|
+
regex: /\b\d{1,2}-[A-Z]{2,3}-\d{1,2}\b|\b[A-Z]{2}-\d{2,3}-[A-Z]{1,2}\b/g,
|
|
1675
|
+
description: 'Dutch license plates',
|
|
1676
|
+
},
|
|
1677
|
+
{
|
|
1678
|
+
name: 'licensePlateIT',
|
|
1679
|
+
label: 'LICENSE_PLATE_IT',
|
|
1680
|
+
category: 'vehicle',
|
|
1681
|
+
regex: /\b[A-Z]{2}\s?\d{3}\s?[A-Z]{2}\b/g,
|
|
1682
|
+
description: 'Italian license plates (AB 123 CD)',
|
|
1683
|
+
},
|
|
1684
|
+
{
|
|
1685
|
+
name: 'licensePlateES',
|
|
1686
|
+
label: 'LICENSE_PLATE_ES',
|
|
1687
|
+
category: 'vehicle',
|
|
1688
|
+
regex: /\b\d{4}\s?[BCDFGHJKLMNPRSTVWXYZ]{3}\b/g,
|
|
1689
|
+
description: 'Spanish license plates (1234 BCD)',
|
|
1690
|
+
},
|
|
1691
|
+
// ═══════════════════════════════════════════
|
|
1692
|
+
// BIOMETRIC — Genetic / DNA
|
|
1693
|
+
// ═══════════════════════════════════════════
|
|
1694
|
+
{
|
|
1695
|
+
name: 'dnaSequence',
|
|
1696
|
+
label: 'DNA_SEQUENCE',
|
|
1697
|
+
category: 'biometric',
|
|
1698
|
+
regex: /\b[ACGT]{20,}\b/g,
|
|
1699
|
+
description: 'DNA nucleotide sequences (20+ bases)',
|
|
1700
|
+
},
|
|
1701
|
+
{
|
|
1702
|
+
name: 'geneticTestCtx',
|
|
1703
|
+
label: 'GENETIC_TEST',
|
|
1704
|
+
category: 'biometric',
|
|
1705
|
+
regex: /(?:BRCA[12]|genetic\s*test|genome|genotype|SNP|allele|karyotype)[:\s]+[^\n]{3,40}/gi,
|
|
1706
|
+
description: 'Genetic test results and markers (BRCA1/2, SNP, contextual)',
|
|
1707
|
+
},
|
|
1708
|
+
// ═══════════════════════════════════════════
|
|
1709
|
+
// TEMPORAL — Extended
|
|
1710
|
+
// ═══════════════════════════════════════════
|
|
1711
|
+
{
|
|
1712
|
+
name: 'ageCtx',
|
|
1713
|
+
label: 'AGE',
|
|
1714
|
+
category: 'temporal',
|
|
1715
|
+
regex: /(?:age|Alter|edad|eta|âge)[:\s]+\d{1,3}\s*(?:years?|yr|ans?|Jahre?|años?|anni)?/gi,
|
|
1716
|
+
description: 'Age with label (contextual, multi-language)',
|
|
1717
|
+
},
|
|
1718
|
+
{
|
|
1719
|
+
name: 'dateOfDeathCtx',
|
|
1720
|
+
label: 'DATE_OF_DEATH',
|
|
1721
|
+
category: 'temporal',
|
|
1722
|
+
regex: /(?:date\s*of\s*death|deceased|Sterbedatum|died\s*on)[:\s]*\d{1,2}[\/.\-]\d{1,2}[\/.\-]\d{2,4}/gi,
|
|
1723
|
+
description: 'Date of death (HIPAA, contextual)',
|
|
1724
|
+
},
|
|
1725
|
+
];
|
|
1726
|
+
/**
|
|
1727
|
+
* Returns pattern names grouped by category for the UI.
|
|
1728
|
+
*/
|
|
1729
|
+
function getPatternOptions() {
|
|
1730
|
+
return exports.PII_PATTERNS.map((p) => ({
|
|
1731
|
+
name: `${p.description || p.label} (${p.category})`,
|
|
1732
|
+
value: p.name,
|
|
1733
|
+
description: p.description || p.label,
|
|
1734
|
+
}));
|
|
1735
|
+
}
|
|
1736
|
+
/**
|
|
1737
|
+
* Get patterns by name.
|
|
1738
|
+
*/
|
|
1739
|
+
function getPatternsByNames(names) {
|
|
1740
|
+
const nameSet = new Set(names);
|
|
1741
|
+
return exports.PII_PATTERNS.filter((p) => nameSet.has(p.name));
|
|
1742
|
+
}
|