n8n-nodes-redactor 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of n8n-nodes-redactor might be problematic. Click here for more details.
- package/LICENSE +42 -0
- package/README.dev.md +134 -0
- package/README.md +376 -0
- package/README.npm.md +376 -0
- package/dist/nodes/PiiRedactor/PiiRedactor.node.d.ts +5 -0
- package/dist/nodes/PiiRedactor/PiiRedactor.node.js +872 -0
- package/dist/nodes/PiiRedactor/__tests__/engine.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/engine.test.js +524 -0
- package/dist/nodes/PiiRedactor/__tests__/operations.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/operations.test.js +316 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns-global.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns-global.test.js +427 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns.test.js +481 -0
- package/dist/nodes/PiiRedactor/__tests__/phase1.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/phase1.test.js +343 -0
- package/dist/nodes/PiiRedactor/__tests__/security.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/security.test.js +178 -0
- package/dist/nodes/PiiRedactor/__tests__/semantic.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/semantic.test.js +319 -0
- package/dist/nodes/PiiRedactor/__tests__/vault.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/vault.test.js +247 -0
- package/dist/nodes/PiiRedactor/context.d.ts +57 -0
- package/dist/nodes/PiiRedactor/context.js +260 -0
- package/dist/nodes/PiiRedactor/engine.d.ts +17 -0
- package/dist/nodes/PiiRedactor/engine.js +813 -0
- package/dist/nodes/PiiRedactor/names.d.ts +25 -0
- package/dist/nodes/PiiRedactor/names.js +188 -0
- package/dist/nodes/PiiRedactor/patterns.d.ts +17 -0
- package/dist/nodes/PiiRedactor/patterns.js +1741 -0
- package/dist/nodes/PiiRedactor/redact.png +0 -0
- package/dist/nodes/PiiRedactor/redact.svg +3 -0
- package/dist/nodes/PiiRedactor/types.d.ts +78 -0
- package/dist/nodes/PiiRedactor/types.js +3 -0
- package/dist/nodes/PiiRedactor/vault.d.ts +60 -0
- package/dist/nodes/PiiRedactor/vault.js +299 -0
- package/package.json +87 -0
|
@@ -0,0 +1,813 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.redactValue = redactValue;
|
|
37
|
+
exports.restoreValue = restoreValue;
|
|
38
|
+
exports.buildReport = buildReport;
|
|
39
|
+
const crypto = __importStar(require("crypto"));
|
|
40
|
+
const patterns_1 = require("./patterns");
|
|
41
|
+
const context_1 = require("./context");
|
|
42
|
+
const names_1 = require("./names");
|
|
43
|
+
const context_2 = require("./context");
|
|
44
|
+
/**
|
|
45
|
+
* Check if a value matches any entry in the allow list (should be SKIPPED).
|
|
46
|
+
*/
|
|
47
|
+
function isAllowListed(value, allowList) {
|
|
48
|
+
if (!allowList || allowList.length === 0)
|
|
49
|
+
return false;
|
|
50
|
+
return allowList.some((entry) => {
|
|
51
|
+
try {
|
|
52
|
+
switch (entry.type) {
|
|
53
|
+
case 'exact':
|
|
54
|
+
return value === entry.value;
|
|
55
|
+
case 'contains':
|
|
56
|
+
return value.includes(entry.value);
|
|
57
|
+
case 'regex':
|
|
58
|
+
return new RegExp(entry.value, 'i').test(value);
|
|
59
|
+
default:
|
|
60
|
+
return value === entry.value;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Check if a value matches any entry in the deny list (should be ALWAYS redacted).
|
|
70
|
+
* Returns the matching entry's value as label, or null.
|
|
71
|
+
*/
|
|
72
|
+
function getDenyListMatch(value, denyList) {
|
|
73
|
+
if (!denyList || denyList.length === 0)
|
|
74
|
+
return null;
|
|
75
|
+
for (const entry of denyList) {
|
|
76
|
+
try {
|
|
77
|
+
let matched = false;
|
|
78
|
+
switch (entry.type) {
|
|
79
|
+
case 'exact':
|
|
80
|
+
matched = value === entry.value;
|
|
81
|
+
break;
|
|
82
|
+
case 'contains':
|
|
83
|
+
matched = value.includes(entry.value);
|
|
84
|
+
break;
|
|
85
|
+
case 'regex':
|
|
86
|
+
matched = new RegExp(entry.value, 'i').test(value);
|
|
87
|
+
break;
|
|
88
|
+
default:
|
|
89
|
+
matched = value === entry.value;
|
|
90
|
+
}
|
|
91
|
+
if (matched)
|
|
92
|
+
return entry.value;
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Generate a deterministic token for a given label + index.
|
|
102
|
+
*/
|
|
103
|
+
function generateToken(label, index) {
|
|
104
|
+
return `[${label}_${index}]`;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Token format regex — used to skip already-redacted regions.
|
|
108
|
+
*/
|
|
109
|
+
const TOKEN_RE = /^\[[A-Z][A-Z0-9_]*_\d+\]$/;
|
|
110
|
+
/**
|
|
111
|
+
* Generate a masked version of a string.
|
|
112
|
+
*/
|
|
113
|
+
function maskValue(value, label) {
|
|
114
|
+
if (value.length <= 2)
|
|
115
|
+
return '*'.repeat(value.length);
|
|
116
|
+
if (label === 'EMAIL' && value.includes('@')) {
|
|
117
|
+
const [local, domain] = value.split('@');
|
|
118
|
+
const maskedLocal = local[0] + '***';
|
|
119
|
+
const domainParts = domain.split('.');
|
|
120
|
+
const maskedDomain = domainParts[0][0] + '***.' + domainParts.slice(1).join('.');
|
|
121
|
+
return maskedLocal + '@' + maskedDomain;
|
|
122
|
+
}
|
|
123
|
+
if (label.startsWith('PHONE') || label === 'FAX') {
|
|
124
|
+
const digits = value.replace(/\D/g, '');
|
|
125
|
+
if (digits.length >= 4) {
|
|
126
|
+
return '***' + digits.slice(-4);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
if (label === 'CREDIT_CARD' || label === 'AMEX') {
|
|
130
|
+
const digits = value.replace(/\D/g, '');
|
|
131
|
+
return '****-****-****-' + digits.slice(-4);
|
|
132
|
+
}
|
|
133
|
+
return value[0] + '*'.repeat(value.length - 2) + value[value.length - 1];
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Generate a truncated hash of a value.
|
|
137
|
+
*/
|
|
138
|
+
function hashValue(value) {
|
|
139
|
+
return crypto.createHash('sha256').update(value).digest('hex').slice(0, 12);
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Apply redaction with proper category on the vault entry.
|
|
143
|
+
*/
|
|
144
|
+
function applyRedaction(match, pattern, mode, vault, sessionId, dedup) {
|
|
145
|
+
switch (mode) {
|
|
146
|
+
case 'token': {
|
|
147
|
+
if (dedup) {
|
|
148
|
+
const existing = vault.findByOriginal(sessionId, match);
|
|
149
|
+
if (existing)
|
|
150
|
+
return existing.token;
|
|
151
|
+
}
|
|
152
|
+
const session = vault.getSession(sessionId);
|
|
153
|
+
const index = session ? Object.keys(session.entries).length : 0;
|
|
154
|
+
const token = generateToken(pattern.label, index);
|
|
155
|
+
vault.addEntry(sessionId, {
|
|
156
|
+
token,
|
|
157
|
+
original: match,
|
|
158
|
+
patternLabel: pattern.label,
|
|
159
|
+
category: ('category' in pattern ? pattern.category : 'identity'),
|
|
160
|
+
createdAt: new Date().toISOString(),
|
|
161
|
+
});
|
|
162
|
+
return token;
|
|
163
|
+
}
|
|
164
|
+
case 'mask':
|
|
165
|
+
return maskValue(match, pattern.label);
|
|
166
|
+
case 'hash':
|
|
167
|
+
return `[${pattern.label}:${hashValue(match)}]`;
|
|
168
|
+
case 'redact':
|
|
169
|
+
return `[REDACTED]`;
|
|
170
|
+
default:
|
|
171
|
+
return match;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Check if a JSON path should be processed given the field rules.
|
|
176
|
+
*/
|
|
177
|
+
function shouldProcessField(fieldPath, fieldMode, fieldRules) {
|
|
178
|
+
if (fieldMode === 'all')
|
|
179
|
+
return true;
|
|
180
|
+
if (fieldMode === 'allowlist') {
|
|
181
|
+
return fieldRules.some((r) => r.mode === 'include' && fieldPathMatches(fieldPath, r.field));
|
|
182
|
+
}
|
|
183
|
+
if (fieldMode === 'denylist') {
|
|
184
|
+
return !fieldRules.some((r) => r.mode === 'exclude' && fieldPathMatches(fieldPath, r.field));
|
|
185
|
+
}
|
|
186
|
+
return true;
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Safe wildcard matching for JSON paths.
|
|
190
|
+
* Escapes all regex special chars before applying wildcard transformations.
|
|
191
|
+
* Supports: "user.email", "*.email", "contacts[*].phone"
|
|
192
|
+
*/
|
|
193
|
+
function fieldPathMatches(actual, pattern) {
|
|
194
|
+
// Escape ALL regex special characters first
|
|
195
|
+
let regexStr = pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
196
|
+
// Now unescape our wildcards
|
|
197
|
+
regexStr = regexStr
|
|
198
|
+
.replace(/\\\[\\*\\]/g, '\\[\\d+\\]') // [*] -> [\d+]
|
|
199
|
+
.replace(/\\[*]/g, '[^.]+'); // escaped * -> [^.]+
|
|
200
|
+
try {
|
|
201
|
+
return new RegExp(`^${regexStr}$`).test(actual);
|
|
202
|
+
}
|
|
203
|
+
catch {
|
|
204
|
+
return false;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Validate a user-supplied regex for safety (prevent ReDoS).
|
|
209
|
+
* Rejects nested quantifiers and other dangerous patterns.
|
|
210
|
+
*/
|
|
211
|
+
function isSafeRegex(regexStr) {
|
|
212
|
+
// Reject nested quantifiers like (a+)+, (a*)+, (a?)+, (a{1,})+
|
|
213
|
+
if (/(\+|\*|\?|\{[^}]+\})\s*(\+|\*|\?|\{)/.test(regexStr))
|
|
214
|
+
return false;
|
|
215
|
+
// Reject patterns longer than 500 chars
|
|
216
|
+
if (regexStr.length > 500)
|
|
217
|
+
return false;
|
|
218
|
+
// Try to compile it
|
|
219
|
+
try {
|
|
220
|
+
new RegExp(regexStr);
|
|
221
|
+
return true;
|
|
222
|
+
}
|
|
223
|
+
catch {
|
|
224
|
+
return false;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Semantic field-name mapping: when the JSON key name matches,
|
|
229
|
+
* redact the ENTIRE value regardless of regex matches.
|
|
230
|
+
* This catches structured data where the field name IS the context.
|
|
231
|
+
*/
|
|
232
|
+
const SEMANTIC_FIELD_MAP = [
|
|
233
|
+
// ═══════════════════════════════════════════
|
|
234
|
+
// PERSON NAMES (EN/DE/FR/ES/IT/PT/NL/SV/NO/DA/PL/RU/JA/KO/ZH/AR/TR)
|
|
235
|
+
// Covers: camelCase, snake_case, PascalCase, abbreviations, prefixed, suffixed
|
|
236
|
+
// ═══════════════════════════════════════════
|
|
237
|
+
{
|
|
238
|
+
keys: /^(?:name|first_?name|last_?name|full_?name|given_?name|family_?name|middle_?name|surname|maiden_?name|nick_?name|salutation|display_?name|contact_?name|customer_?name|client_?name|patient_?name|user_?name|member_?name|subscriber_?name|account_?name|holder_?name|card_?holder|cardholder_?name|beneficiary_?name|recipient_?name|sender_?name|author_?name|owner_?name|guardian_?name|parent_?name|spouse_?name|partner_?name|emergency_?contact|next_?of_?kin|applicant_?name|candidate_?name|student_?name|teacher_?name|employee_?name|manager_?name|supervisor_?name|guarantor|witness|signatory|cosigner|fname|lname|mname|fn|ln|mn|firstname|lastname|middlename|fullname|givenname|familyname|sur_?name|name_?first|name_?last|name_?full|legal_?name|birth_?name|married_?name|former_?name|known_?as|aka|vorname|nachname|familienname|geburtsname|anzeige_?name|kundenname|prenom|nom|nom_?de_?famille|nom_?complet|nom_?de_?jeune_?fille|nombre|apellido|apellidos|nombre_?completo|nome|cognome|nome_?completo|naam|achternaam|voornaam|volledige_?naam|fornamn|efternamn|fornavn|etternavn|imie|nazwisko|imya|familiya|sei|mei|shimei|seimei|xing|ming|xingming|isim|soyisim|soyad|ism|nasab)$/i,
|
|
239
|
+
label: 'PERSON_NAME',
|
|
240
|
+
category: 'identity',
|
|
241
|
+
},
|
|
242
|
+
// ═══════════════════════════════════════════
|
|
243
|
+
// EMPLOYEE / STAFF / WORKER IDs
|
|
244
|
+
// ═══════════════════════════════════════════
|
|
245
|
+
{
|
|
246
|
+
keys: /^(?:employee_?id|emp_?id|emp_?no|emp_?num|emp_?number|staff_?id|staff_?no|staff_?number|personnel_?(?:id|number|no|nr)|worker_?id|worker_?number|workforce_?id|badge_?(?:id|number|no)|clock_?(?:id|number)|payroll_?(?:id|number|no)|hr_?id|hr_?number|contractor_?id|temp_?id|intern_?id|associate_?id|team_?member_?id|personalnummer|mitarbeiter_?(?:id|nummer|nr|number)|arbeitnehmer_?nr|numero_?employe|matricule|numero_?dipendente|medewerkernummer|personeelsnummer|anstallningsnummer|ansattnummer|numer_?pracownika)$/i,
|
|
247
|
+
label: 'EMPLOYEE_ID',
|
|
248
|
+
category: 'identity',
|
|
249
|
+
},
|
|
250
|
+
// ═══════════════════════════════════════════
|
|
251
|
+
// DEPARTMENT / DIVISION / TEAM / BUSINESS UNIT
|
|
252
|
+
// ═══════════════════════════════════════════
|
|
253
|
+
{
|
|
254
|
+
keys: /^(?:department|dept|department_?name|dept_?name|dept_?code|division|division_?name|business_?unit|bu|cost_?center|cost_?centre|profit_?center|team|team_?name|group|group_?name|unit|unit_?name|section|section_?name|branch|branch_?name|office|office_?name|site|site_?name|location_?name|work_?location|facility|plant|factory|warehouse|abteilung|abteilungsname|bereich|referat|dienststelle|departement|service|direction|dipartimento|reparto|ufficio|afdeling|divisie|avdelning|avdeling|wydzial|dzial|otdel|bu_?name|org_?unit)$/i,
|
|
255
|
+
label: 'DEPARTMENT',
|
|
256
|
+
category: 'identity',
|
|
257
|
+
},
|
|
258
|
+
// ═══════════════════════════════════════════
|
|
259
|
+
// JOB TITLE / POSITION / ROLE / DESIGNATION / OCCUPATION
|
|
260
|
+
// ═══════════════════════════════════════════
|
|
261
|
+
{
|
|
262
|
+
keys: /^(?:position|job_?title|job_?role|job_?function|job_?type|job_?name|job_?description|job_?code|job_?level|job_?grade|job_?band|job_?family|job_?category|designation|occupation|profession|seniority|career_?level|functional_?title|working_?title|official_?title|employment_?type|contract_?type|work_?type|engagement_?type|berufsbezeichnung|stelle|stellenbezeichnung|position_?title|funktion|dienstbezeichnung|beruf|taetigkeit|poste|fonction|intitule_?du_?poste|titulo|cargo|puesto|posizione|ruolo|qualifica|mansione|functie|functietitel|befattning|stilling|stanowisko|zawod|dolzhnost|shokumei|yakushoku|zhiwei|zhicheng|gorev|unvan|pozisyon)$/i,
|
|
263
|
+
label: 'JOB_TITLE',
|
|
264
|
+
category: 'identity',
|
|
265
|
+
},
|
|
266
|
+
// ═══════════════════════════════════════════
|
|
267
|
+
// SALARY / COMPENSATION / PAY / INCOME / WAGES
|
|
268
|
+
// ═══════════════════════════════════════════
|
|
269
|
+
{
|
|
270
|
+
keys: /^(?:salary|compensation|wage|wages|income|pay|pay_?rate|hourly_?rate|daily_?rate|annual_?salary|monthly_?salary|weekly_?salary|yearly_?salary|base_?pay|base_?salary|gross_?pay|gross_?salary|net_?pay|net_?salary|take_?home|total_?comp|total_?compensation|total_?pay|total_?earnings|earnings|remuneration|stipend|allowance|bonus|bonus_?amount|commission|commission_?amount|overtime|overtime_?pay|severance|severance_?pay|pension|pension_?amount|retirement_?contribution|stock_?options|equity|rsu|vesting|deduction|deductions|tax_?withholding|withholding|benefits_?value|ctc|cost_?to_?company|package|comp_?package|offer_?amount|starting_?salary|current_?salary|previous_?salary|expected_?salary|desired_?salary|salary_?range|pay_?grade|pay_?band|pay_?scale|gehalt|brutto_?gehalt|netto_?gehalt|grundgehalt|jahresgehalt|monatsgehalt|lohn|bruttolohn|nettolohn|verguetung|entgelt|bezuege|zuschlag|praemie|provision|salaire|remuneration_?brute|remuneration_?nette|stipendio|retribuzione|salario|sueldo|remuneracao|loon|salaris|lon|lonn|wynagrodzenie|pensja|zarplata|oklad|kyuyo|nenshu|gongzi|xinshui|maas|ucret)$/i,
|
|
271
|
+
label: 'SALARY',
|
|
272
|
+
category: 'financial',
|
|
273
|
+
},
|
|
274
|
+
// ═══════════════════════════════════════════
|
|
275
|
+
// DATES: Hire, Start, End, Termination, Birth, Death, etc.
|
|
276
|
+
// ═══════════════════════════════════════════
|
|
277
|
+
{
|
|
278
|
+
keys: /^(?:hire_?date|start_?date|joining_?date|date_?of_?hire|date_?of_?joining|employment_?date|employment_?start|onboarding_?date|effective_?date|commencement_?date|appointment_?date|probation_?(?:start|end)_?date|termination_?date|end_?date|leaving_?date|resignation_?date|separation_?date|exit_?date|last_?day|last_?working_?day|notice_?date|retirement_?date|contract_?(?:start|end)_?date|trial_?(?:start|end)_?date|review_?date|appraisal_?date|promotion_?date|transfer_?date|anniversary_?date|tenure_?start|eintrittsdatum|einstellungsdatum|austrittsdatum|kuendigungsdatum|probezeitende|vertragsbeginn|vertragsende|date_?embauche|date_?de_?debut|date_?de_?fin|date_?sortie|fecha_?de_?inicio|fecha_?de_?fin|data_?assunzione|data_?cessazione|datum_?indiensttreding|datum_?uitdiensttreding|anstallningsdatum|ansettelsesdato|data_?zatrudnienia)$/i,
|
|
279
|
+
label: 'EMPLOYMENT_DATE',
|
|
280
|
+
category: 'temporal',
|
|
281
|
+
},
|
|
282
|
+
{
|
|
283
|
+
keys: /^(?:dob|date_?of_?birth|birth_?date|birthday|birth_?day|born|born_?on|born_?date|geburtsdatum|geburtstag|date_?de_?naissance|fecha_?de_?nacimiento|data_?di_?nascita|geboortedatum|fodelsedatum|fodselsdato|syntymapaiva|data_?de_?nascimento|data_?urodzenia|den_?rozhdeniya|seinengappi|shengri|dogum_?tarihi)$/i,
|
|
284
|
+
label: 'DOB',
|
|
285
|
+
category: 'temporal',
|
|
286
|
+
},
|
|
287
|
+
{
|
|
288
|
+
keys: /^(?:date_?of_?death|death_?date|deceased_?date|died_?on|died|sterbedatum|todesdatum|date_?de_?deces|fecha_?de_?defuncion|data_?di_?morte)$/i,
|
|
289
|
+
label: 'DATE_OF_DEATH',
|
|
290
|
+
category: 'temporal',
|
|
291
|
+
},
|
|
292
|
+
{
|
|
293
|
+
keys: /^(?:person_?age|patient_?age|customer_?age|employee_?age|age_?years|current_?age|alter|edad|eta|yaş|wiek|vozrast|nenrei|nianling)$/i,
|
|
294
|
+
label: 'AGE',
|
|
295
|
+
category: 'temporal',
|
|
296
|
+
},
|
|
297
|
+
// ═══════════════════════════════════════════
|
|
298
|
+
// ADDRESS / LOCATION (every variation in 15+ languages)
|
|
299
|
+
// ═══════════════════════════════════════════
|
|
300
|
+
{
|
|
301
|
+
keys: /^(?:address|street_?address|home_?address|mailing_?address|shipping_?address|billing_?address|postal_?address|residential_?address|permanent_?address|temporary_?address|current_?address|previous_?address|work_?address|office_?address|business_?address|delivery_?address|correspondence_?address|legal_?address|registered_?address|address_?line_?[123]|addr_?[123]|addr|street|street_?name|street_?line|street_?line_?[12]|house_?number|house_?no|building|apartment|apt|flat|suite|floor|room|unit_?number|block|lot|po_?box|post_?box|city|town|municipality|village|locality|suburb|district|borough|county|state|province|region|territory|country|country_?code|country_?name|nation|zip|zip_?code|zipcode|postal_?code|postcode|post_?code|pin_?code|area_?code|geo_?location|latitude|longitude|lat|lng|coordinates|place|place_?of_?birth|birth_?place|place_?of_?residence|domicile|residence|neighborhood|neighbourhood|quarter|canton|prefecture|commune|departement|land|bundesland|kreis|bezirk|ortsteil|stadtteil|gemeinde|anschrift|adresse|wohnadresse|wohnort|strasse|hausnummer|plz|postleitzahl|ort|stadt|wohnhaft|rue|ville|code_?postal|cedex|calle|direccion|codigo_?postal|localidad|poblacion|provincia|comunidad|via|indirizzo|cap|citta|comune|straat|huisnummer|postbus|plaats|woonplaats|gata|postnummer|sted|ulica|kod_?pocztowy|miasto|adres|gorod|pochta|jyusho|jutaku|dizhi|chengshi|youbian|adres|mahalle|ilce|il|posta_?kodu)$/i,
|
|
302
|
+
label: 'ADDRESS',
|
|
303
|
+
category: 'location',
|
|
304
|
+
},
|
|
305
|
+
// ═══════════════════════════════════════════
|
|
306
|
+
// PHONE / MOBILE / FAX / CONTACT NUMBERS
|
|
307
|
+
// ═══════════════════════════════════════════
|
|
308
|
+
{
|
|
309
|
+
keys: /^(?:phone|phone_?number|phone_?no|mobile|mobile_?number|mobile_?no|cell|cell_?phone|cell_?number|telephone|tel|tel_?number|fax|fax_?number|fax_?no|contact_?number|contact_?phone|home_?phone|work_?phone|office_?phone|business_?phone|direct_?line|direct_?dial|extension|ext|callback|callback_?number|sms_?number|whatsapp|whatsapp_?number|primary_?phone|secondary_?phone|alternate_?phone|emergency_?phone|telefon|telefonnummer|mobilnummer|handy|handynummer|festnetz|rufnummer|durchwahl|faxnummer|telephone_?portable|portable|fixe|numero_?de_?telephone|numero_?de_?portable|telefono|cellulare|numero_?di_?telefono|numero_?de_?telefone|celular|telefoon|mobiel|telefoonnummer|mobilnummer_?2|telefonnr|tlf|telefon_?komorkowy|telefon_?stacjonarny|nomer_?telefona|denwabangou|keitai|dianhua|shouji|telefon_?numarasi|cep_?telefonu)$/i,
|
|
310
|
+
label: 'PHONE',
|
|
311
|
+
category: 'contact',
|
|
312
|
+
},
|
|
313
|
+
// ═══════════════════════════════════════════
|
|
314
|
+
// EMAIL (every variation)
|
|
315
|
+
// ═══════════════════════════════════════════
|
|
316
|
+
{
|
|
317
|
+
keys: /^(?:email|email_?address|e_?mail|mail|mail_?address|primary_?email|secondary_?email|work_?email|personal_?email|business_?email|contact_?email|login_?email|notification_?email|recovery_?email|backup_?email|alternate_?email|alt_?email|reply_?to|from_?email|to_?email|cc|bcc|email_?id|e_?mail_?adresse|elektronische_?post|correo|correo_?electronico|indirizzo_?email|posta_?elettronica|emailadres|epostadress|epost|adres_?email|pochta|mail_?adresi|eposta)$/i,
|
|
318
|
+
label: 'EMAIL',
|
|
319
|
+
category: 'contact',
|
|
320
|
+
},
|
|
321
|
+
// ═══════════════════════════════════════════
|
|
322
|
+
// SSN / NATIONAL ID / TAX ID / GOV ID
|
|
323
|
+
// ═══════════════════════════════════════════
|
|
324
|
+
{
|
|
325
|
+
keys: /^(?:ssn|social_?security|social_?security_?number|social_?insurance|social_?insurance_?number|sin|national_?id|national_?id_?number|national_?identity|identity_?number|identity_?card|id_?number|id_?card|id_?no|id_?document|personal_?id|personal_?number|personal_?code|citizen_?id|resident_?id|registration_?number|civil_?id|civil_?registration|gov_?id|government_?id|tax_?id|tax_?number|tax_?identification|taxpayer_?id|tin|itin|ein|vat_?number|vat_?id|steuer_?id|steuer_?nummer|steueridentifikationsnummer|steuernummer|sozialversicherungsnummer|personalausweisnummer|ausweisnummer|nif|nie|dni|cif|nif_?numero|bsn|burger_?service_?nummer|nino|national_?insurance|national_?insurance_?number|pps|pps_?number|pesel|nip|regon|personnummer|fodselsnummer|cpr|cpr_?nummer|hetu|henkilotunnus|cpf|rg|cnpj|curp|rfc|aadhaar|aadhar|pan|pan_?number|nric|fin|my_?number|rrn|ahv|avs|codice_?fiscale|carta_?identita|carte_?identite|numero_?securite_?sociale|nir|rijksregisternummer|numero_?identite|kimlik|tc_?kimlik|emirates_?id|iqama)$/i,
|
|
326
|
+
label: 'NATIONAL_ID',
|
|
327
|
+
category: 'identity',
|
|
328
|
+
},
|
|
329
|
+
// ═══════════════════════════════════════════
|
|
330
|
+
// BANK ACCOUNT / FINANCIAL ACCOUNTS
|
|
331
|
+
// ═══════════════════════════════════════════
|
|
332
|
+
{
|
|
333
|
+
keys: /^(?:account_?number|account_?no|acct|acct_?number|acct_?no|bank_?account|bank_?account_?number|bank_?acct|checking_?account|savings_?account|current_?account|deposit_?account|iban|bic|swift|swift_?code|routing_?number|routing_?no|aba|sort_?code|bsb|bsb_?number|clabe|transit_?number|branch_?code|branch_?number|kontonummer|bankleitzahl|blz|konto|girokonto|sparkonto|numero_?de_?compte|rib|numero_?conto|numero_?de_?cuenta|rekeningnummer|bankgiro|plusgiro|numer_?konta|schet|kouza_?bangou)$/i,
|
|
334
|
+
label: 'ACCOUNT_NUMBER',
|
|
335
|
+
category: 'financial',
|
|
336
|
+
},
|
|
337
|
+
// ═══════════════════════════════════════════
|
|
338
|
+
// CREDIT / DEBIT CARD
|
|
339
|
+
// ═══════════════════════════════════════════
|
|
340
|
+
{
|
|
341
|
+
keys: /^(?:card_?number|credit_?card|credit_?card_?number|debit_?card|debit_?card_?number|cc_?number|cc_?no|pan|card_?no|card_?num|payment_?card|kartennummer|kreditkarte|kreditkartennummer|numero_?de_?carte|numero_?carta|numero_?de_?tarjeta|kaartnummer|cvv|cvc|cvv2|cvc2|cid|security_?code|card_?verification|card_?expiry|expiry_?date|expiration_?date|exp_?date|valid_?thru|valid_?through|card_?holder|cardholder_?name)$/i,
|
|
342
|
+
label: 'CREDIT_CARD',
|
|
343
|
+
category: 'financial',
|
|
344
|
+
},
|
|
345
|
+
// ═══════════════════════════════════════════
|
|
346
|
+
// INSURANCE (health, life, auto, property, etc.)
|
|
347
|
+
// ═══════════════════════════════════════════
|
|
348
|
+
{
|
|
349
|
+
keys: /^(?:insurance|insurance_?(?:id|number|no|policy|claim|type|provider|company|plan|group)|policy_?(?:number|no|id)|policy_?holder|claim_?(?:number|no|id|ref|reference)|health_?plan|health_?plan_?id|member_?id|member_?number|subscriber_?id|subscriber_?number|beneficiary_?id|beneficiary_?number|group_?number|group_?id|plan_?id|plan_?number|coverage_?id|enrollment_?id|certificate_?number|versicherungsnummer|versicherung|krankenversicherung|krankenkasse|police_?nummer|police_?nr|numero_?assurance|numero_?police|polizza|numero_?polizza|numero_?de_?seguro|verzekeringsnummer|polisnummer)$/i,
|
|
350
|
+
label: 'INSURANCE',
|
|
351
|
+
category: 'financial',
|
|
352
|
+
},
|
|
353
|
+
// ═══════════════════════════════════════════
|
|
354
|
+
// PASSPORT
|
|
355
|
+
// ═══════════════════════════════════════════
|
|
356
|
+
{
|
|
357
|
+
keys: /^(?:passport|passport_?number|passport_?no|passport_?id|passport_?num|travel_?document|travel_?doc_?number|reisepass|reisepassnummer|passnummer|numero_?passeport|passeport|passaporto|numero_?passaporto|numero_?de_?pasaporte|paspoort|paspoortnummer|passnummer_?2|pass_?nr|pasaporte|pasaporte_?numero)$/i,
|
|
358
|
+
label: 'PASSPORT',
|
|
359
|
+
category: 'identity',
|
|
360
|
+
},
|
|
361
|
+
// ═══════════════════════════════════════════
|
|
362
|
+
// DRIVER LICENSE
|
|
363
|
+
// ═══════════════════════════════════════════
|
|
364
|
+
{
|
|
365
|
+
keys: /^(?:driver_?license|drivers_?license|driver_?licence|drivers_?licence|driving_?license|driving_?licence|dl|dl_?number|dl_?no|license_?number|licence_?number|license_?no|licence_?no|permit_?number|learner_?permit|fuehrerschein|fuehrerscheinnummer|permis_?de_?conduire|numero_?permis|patente|patente_?di_?guida|permiso_?de_?conducir|carnet_?de_?conducir|rijbewijs|rijbewijsnummer|koerkort|forerkort|prawo_?jazdy|voditelskoe_?udostoverenie|unten_?menkyo|jiashi_?zheng|ehliyet|surucu_?belgesi)$/i,
|
|
366
|
+
label: 'DRIVER_LICENSE',
|
|
367
|
+
category: 'identity',
|
|
368
|
+
},
|
|
369
|
+
// ═══════════════════════════════════════════
|
|
370
|
+
// MEDICAL / HEALTH / PATIENT
|
|
371
|
+
// ═══════════════════════════════════════════
|
|
372
|
+
{
|
|
373
|
+
keys: /^(?:mrn|medical_?record|medical_?record_?number|patient_?id|patient_?number|patient_?name|health_?id|health_?card|health_?number|hospital_?id|hospital_?number|chart_?number|case_?number|encounter_?id|visit_?id|admission_?id|discharge_?id|diagnosis|diagnosis_?code|icd|icd_?code|icd_?10|procedure_?code|cpt|cpt_?code|ndc|ndc_?code|medication|medications|drug|drug_?name|prescription|prescription_?id|rx|rx_?number|dosage|blood_?type|blood_?group|allergy|allergies|condition|conditions|medical_?condition|disability|disability_?status|disability_?type|disability_?degree|impairment|mental_?health|psychiatric|immunization|vaccination|vaccine|lab_?result|test_?result|vital_?signs|bmi|weight|height|npi|npi_?number|dea|dea_?number|provider_?id|physician_?id|doctor_?name|treating_?physician|primary_?care|attending|referring|insurance_?diagnosis|pre_?existing|chronic|krankenakte|patientennummer|diagnose|medikament|rezept|blutgruppe|behinderung|grad_?der_?behinderung|gdb|schwerbehindertenausweis|dossier_?medical|numero_?patient|groupe_?sanguin|cartella_?clinica|historia_?clinica|prontuario)$/i,
|
|
374
|
+
label: 'MEDICAL',
|
|
375
|
+
category: 'medical',
|
|
376
|
+
},
|
|
377
|
+
// ═══════════════════════════════════════════
|
|
378
|
+
// COMPANY / ORGANIZATION / EMPLOYER / BUSINESS
|
|
379
|
+
// ═══════════════════════════════════════════
|
|
380
|
+
{
|
|
381
|
+
keys: /^(?:company|company_?name|organization|organisation|org|org_?name|employer|employer_?name|business|business_?name|corporation|corp|corp_?name|subsidiary|parent_?company|holding|group_?name|entity|entity_?name|legal_?entity|legal_?name|registered_?name|trading_?name|brand|brand_?name|vendor|vendor_?name|supplier|supplier_?name|partner|partner_?name|client|client_?name|agency|agency_?name|institute|institution|school|school_?name|university|university_?name|college|hospital|hospital_?name|practice|practice_?name|firm|firm_?name|llc|gmbh|ag|inc|ltd|plc|sa|srl|bv|nv|firma|unternehmen|unternehmensname|arbeitgeber|betrieb|handelsname|societe|raison_?sociale|denominazione|ragione_?sociale|empresa|nombre_?empresa|bedrijf|bedrijfsnaam|werkgever|foretag|arbetsgivare|virksomhed|arbejdsgiver|firma_?2|spolka|pracodawca|kompaniya|rabotodatel|kaisha|kigyou|gongsi|guyong_?danwei|sirket|isveren)$/i,
|
|
382
|
+
label: 'COMPANY',
|
|
383
|
+
category: 'identity',
|
|
384
|
+
},
|
|
385
|
+
// ═══════════════════════════════════════════
|
|
386
|
+
// EDUCATION / ACADEMIC
|
|
387
|
+
// ═══════════════════════════════════════════
|
|
388
|
+
{
|
|
389
|
+
keys: /^(?:student_?id|student_?number|student_?name|enrollment_?id|enrollment_?number|matriculation|matriculation_?number|matrikel|matrikelnummer|gpa|grade_?point|test_?score|sat_?score|act_?score|gre_?score|gmat_?score|degree|diploma|qualification|major|minor|field_?of_?study|program|faculty|school_?name|university|college|campus|class_?of|graduation|graduation_?date|grad_?year|transcript|academic_?record|education_?level|highest_?education|studiengang|abschluss|zeugnis|schulname|hochschule|diplome|scolarite|titolo_?di_?studio|expediente)$/i,
|
|
390
|
+
label: 'EDUCATION',
|
|
391
|
+
category: 'identity',
|
|
392
|
+
},
|
|
393
|
+
// ═══════════════════════════════════════════
|
|
394
|
+
// VEHICLE / TRANSPORT
|
|
395
|
+
// ═══════════════════════════════════════════
|
|
396
|
+
{
|
|
397
|
+
keys: /^(?:vin|vehicle_?identification|vehicle_?id|vehicle_?number|chassis_?number|license_?plate|plate_?number|plate_?no|registration|registration_?number|reg_?number|reg_?no|tag_?number|kennzeichen|fahrzeug_?id|fahrgestellnummer|immatriculation|plaque|targa|matricula|kenteken|registreringsnummer|numer_?rejestracyjny)$/i,
|
|
398
|
+
label: 'VEHICLE',
|
|
399
|
+
category: 'vehicle',
|
|
400
|
+
},
|
|
401
|
+
// ═══════════════════════════════════════════
|
|
402
|
+
// CREDENTIALS / AUTH / SECRETS / TOKENS
|
|
403
|
+
// ═══════════════════════════════════════════
|
|
404
|
+
{
|
|
405
|
+
keys: /^(?:password|passwd|pwd|pass_?word|secret|secret_?key|api_?key|api_?secret|api_?token|access_?token|refresh_?token|auth_?token|bearer_?token|session_?token|session_?key|private_?key|encryption_?key|signing_?key|master_?key|client_?secret|consumer_?key|consumer_?secret|oauth_?token|credentials|passwort|kennwort|schluessel|mot_?de_?passe|contrasena|clave|senha|wachtwoord|sleutel|losenord|haslo|parol|mima|sifre)$/i,
|
|
406
|
+
label: 'CREDENTIAL',
|
|
407
|
+
category: 'enterprise',
|
|
408
|
+
},
|
|
409
|
+
// ═══════════════════════════════════════════
|
|
410
|
+
// RACE / ETHNICITY / NATIONALITY / CITIZENSHIP
|
|
411
|
+
// ═══════════════════════════════════════════
|
|
412
|
+
{
|
|
413
|
+
keys: /^(?:race|ethnicity|ethnic|ethnic_?origin|ethnic_?group|nationality|citizenship|national_?origin|heritage|ancestry|background|rasse|herkunft|ethnische_?herkunft|nationalitaet|staatsangehoerigkeit|ethnie|origine|nationalite|citoyennete|raza|etnia|nacionalidad|ciudadania|razza|nazionalita|cittadinanza|nationaliteit|etnicitet|medborgarskap|narodowsc|rasa|natsionalnost|grazhdanstvo|kokuseki|minzu|guoji|milliyet|vatandaslik|uyruk)$/i,
|
|
414
|
+
label: 'ETHNICITY',
|
|
415
|
+
category: 'identity',
|
|
416
|
+
},
|
|
417
|
+
// ═══════════════════════════════════════════
|
|
418
|
+
// RELIGION / BELIEF / POLITICAL
|
|
419
|
+
// ═══════════════════════════════════════════
|
|
420
|
+
{
|
|
421
|
+
keys: /^(?:religion|religious|religious_?affiliation|religious_?belief|faith|creed|denomination|confession|church|mosque|synagogue|temple|spiritual|belief|philosophical_?belief|political_?(?:party|affiliation|opinion|preference|view)|party|party_?membership|union|union_?membership|trade_?union|konfession|religionszugehoerigkeit|glaubensbekenntnis|kirchenzugehoerigkeit|politische_?partei|gewerkschaft|religion_?2|culte|parti_?politique|syndicat|partido_?politico|sindicato|religie|politieke_?partij|vakbond|religion_?3|tro|parti|fagforening|religia|partia|zwiazek_?zawodowy|religiya|partiya|profsoyuz|shukyo|seitou|zongjiao|dangpai|din|mezhep|parti_?2|sendika)$/i,
|
|
422
|
+
label: 'BELIEF_POLITICAL',
|
|
423
|
+
category: 'identity',
|
|
424
|
+
},
|
|
425
|
+
// ═══════════════════════════════════════════
|
|
426
|
+
// GENDER / SEX / MARITAL STATUS / FAMILY
|
|
427
|
+
// ═══════════════════════════════════════════
|
|
428
|
+
{
|
|
429
|
+
keys: /^(?:gender|sex|gender_?identity|sexual_?orientation|sexuality|preferred_?pronouns|pronouns|marital_?status|civil_?status|relationship_?status|marriage|married|single|divorced|widowed|separated|domestic_?partner|spouse|partner|family_?status|number_?of_?(?:children|kids|dependents)|dependents|geschlecht|familienstand|sexuelle_?orientierung|sexe|etat_?civil|orientation_?sexuelle|genero|estado_?civil|orientacion_?sexual|sesso|stato_?civile|geslacht|burgerlijke_?staat|kon|civilstand|plec|stan_?cywilny|pol|semeynoe_?polozhenie|seibetsu|xingbie|hunyin|cinsiyet|medeni_?hal)$/i,
|
|
430
|
+
label: 'PERSONAL_STATUS',
|
|
431
|
+
category: 'identity',
|
|
432
|
+
},
|
|
433
|
+
// ═══════════════════════════════════════════
|
|
434
|
+
// LEGAL / JUDICIAL / CRIMINAL
|
|
435
|
+
// ═══════════════════════════════════════════
|
|
436
|
+
{
|
|
437
|
+
keys: /^(?:case_?number|case_?id|case_?ref|docket|docket_?number|court|court_?name|judge|attorney|lawyer|legal_?representative|legal_?counsel|criminal_?record|conviction|offense|offence|charge|charges|sentence|verdict|probation|parole|arrest|arrest_?date|booking|booking_?number|inmate|prisoner|offender|inmate_?(?:id|number)|mugshot|fingerprint|warrant|bail|bond|aktenzeichen|gerichtsaktenzeichen|strafregister|vorstrafe|verurteilung|anwalt|rechtsanwalt|richter|numero_?affaire|casier_?judiciaire|condamnation|avocat|juge|numero_?expediente|antecedentes_?penales|condena|abogado|juez)$/i,
|
|
438
|
+
label: 'LEGAL',
|
|
439
|
+
category: 'identity',
|
|
440
|
+
},
|
|
441
|
+
// ═══════════════════════════════════════════
|
|
442
|
+
// BIOMETRIC / PHYSICAL CHARACTERISTICS
|
|
443
|
+
// ═══════════════════════════════════════════
|
|
444
|
+
{
|
|
445
|
+
keys: /^(?:biometric|fingerprint|face_?id|facial|facial_?recognition|iris|retina|voiceprint|voice_?id|palm_?print|hand_?geometry|dna|genetic|genome|genotype|karyotype|blood_?type|blood_?group|eye_?color|hair_?color|skin_?color|height|weight|bmi|body_?mass|distinguishing_?marks|tattoo|scar|birthmark|photo|photograph|picture|image|avatar|headshot|portrait|mugshot|selfie|foto|bild|lichtbild|passfoto|passbild)$/i,
|
|
446
|
+
label: 'BIOMETRIC',
|
|
447
|
+
category: 'biometric',
|
|
448
|
+
},
|
|
449
|
+
// ═══════════════════════════════════════════
|
|
450
|
+
// UTILITY / SUBSCRIPTION / LOYALTY / MEMBERSHIP
|
|
451
|
+
// ═══════════════════════════════════════════
|
|
452
|
+
{
|
|
453
|
+
keys: /^(?:utility_?(?:account|number|id)|electricity_?(?:account|number)|gas_?(?:account|number)|water_?(?:account|number)|meter_?(?:number|id)|customer_?(?:number|id|no)|account_?(?:id|ref)|reference_?(?:number|id|no|code)|subscription_?(?:id|number)|membership_?(?:id|number|no)|member_?(?:number|no)|loyalty_?(?:id|number|card)|rewards?_?(?:id|number|card)|frequent_?flyer|frequent_?flyer_?(?:number|id)|mileage_?(?:number|id)|points_?(?:id|number)|library_?(?:card|id|number)|kundennummer|mitgliedsnummer|abonnement|vertragsnummer)$/i,
|
|
454
|
+
label: 'ACCOUNT_REF',
|
|
455
|
+
category: 'financial',
|
|
456
|
+
},
|
|
457
|
+
// ═══════════════════════════════════════════
|
|
458
|
+
// IP / HOSTNAME / SERVER / NETWORK
|
|
459
|
+
// ═══════════════════════════════════════════
|
|
460
|
+
{
|
|
461
|
+
keys: /^(?:ip|ip_?address|ipv4|ipv6|server|server_?name|server_?address|hostname|host|host_?name|domain|domain_?name|mac|mac_?address|subnet|gateway|proxy|vpn|dns|url|uri|endpoint|api_?url|webhook_?url|callback_?url|redirect_?url|origin|referrer|referer|user_?agent|device_?id|device_?name|machine_?name|computer_?name|workstation)$/i,
|
|
462
|
+
label: 'NETWORK',
|
|
463
|
+
category: 'network',
|
|
464
|
+
},
|
|
465
|
+
// ═══════════════════════════════════════════
|
|
466
|
+
// CONTRACTS / INVOICES / LEGAL DOCUMENTS
|
|
467
|
+
// ═══════════════════════════════════════════
|
|
468
|
+
{
|
|
469
|
+
keys: /^(?:contract_?(?:number|no|id|ref)|invoice_?(?:number|no|id|ref)|order_?(?:number|no|id|ref)|purchase_?order|po_?(?:number|no)|quote_?(?:number|no|id)|proposal_?(?:number|no|id)|agreement_?(?:number|no|id)|reference_?(?:number|no)|receipt_?(?:number|no|id)|transaction_?(?:id|ref|number)|payment_?(?:id|ref|reference)|billing_?(?:id|ref|number)|Rechnungsnummer|Vertragsnummer|Auftragsnummer|Bestellnummer|Angebotsnummer|numero_?(?:facture|contrat|commande)|numero_?(?:fattura|contratto|ordine))$/i,
|
|
470
|
+
label: 'DOCUMENT_REF',
|
|
471
|
+
category: 'financial',
|
|
472
|
+
},
|
|
473
|
+
// ═══════════════════════════════════════════
|
|
474
|
+
// FINANCIAL AMOUNTS / REVENUE / SENSITIVE NUMBERS
|
|
475
|
+
// ═══════════════════════════════════════════
|
|
476
|
+
{
|
|
477
|
+
keys: /^(?:amount|total|subtotal|grand_?total|tax_?amount|vat_?amount|discount|balance|outstanding|due|paid|refund|credit|debit|revenue|profit|loss|margin|cost|price|unit_?price|net_?amount|gross_?amount|fee|charge|interest|penalty|fine|deposit|withdrawal|transfer_?amount|Betrag|Gesamtbetrag|Steuerbetrag|Rabatt|Saldo|montant|somme|total_?ttc|total_?ht|importe|monto|importo)$/i,
|
|
478
|
+
label: 'FINANCIAL_AMOUNT',
|
|
479
|
+
category: 'financial',
|
|
480
|
+
},
|
|
481
|
+
// ═══════════════════════════════════════════
|
|
482
|
+
// SIGNATURE / AUTHORIZATION / CONSENT
|
|
483
|
+
// ═══════════════════════════════════════════
|
|
484
|
+
{
|
|
485
|
+
keys: /^(?:signature|signed_?by|authorized_?by|approved_?by|witnessed_?by|notarized_?by|certified_?by|consent|consent_?date|consent_?given|power_?of_?attorney|proxy|delegate|Unterschrift|unterzeichnet_?von|genehmigt_?von|Vollmacht)$/i,
|
|
486
|
+
label: 'SIGNATURE',
|
|
487
|
+
category: 'identity',
|
|
488
|
+
},
|
|
489
|
+
// ═══════════════════════════════════════════
|
|
490
|
+
// BANK / PAYMENT DETAILS IN DOCUMENTS
|
|
491
|
+
// ═══════════════════════════════════════════
|
|
492
|
+
{
|
|
493
|
+
keys: /^(?:bank_?name|bank|branch|branch_?name|beneficiary|beneficiary_?name|beneficiary_?account|payer|payer_?name|payee|payee_?name|remitter|sender_?(?:name|account|bank)|receiver_?(?:name|account|bank)|correspondent_?bank|intermediary_?bank|Bankname|Begünstigter|Zahlungsempfänger|Auftraggeber)$/i,
|
|
494
|
+
label: 'BANK_DETAIL',
|
|
495
|
+
category: 'financial',
|
|
496
|
+
},
|
|
497
|
+
// ═══════════════════════════════════════════
|
|
498
|
+
// TAX / FISCAL DETAILS
|
|
499
|
+
// ═══════════════════════════════════════════
|
|
500
|
+
{
|
|
501
|
+
keys: /^(?:tax_?(?:rate|class|bracket|status|year|period|return|filing)|fiscal_?(?:year|period|code)|withholding|exemption|deductible|taxable_?(?:income|amount)|gross_?income|net_?income|adjusted_?gross|agi|Steuerklasse|Steuerjahr|Steuererklärung|Freibetrag)$/i,
|
|
502
|
+
label: 'TAX_DETAIL',
|
|
503
|
+
category: 'financial',
|
|
504
|
+
},
|
|
505
|
+
// ═══════════════════════════════════════════
|
|
506
|
+
// LANGUAGE / CULTURAL IDENTITY (GDPR Art.9)
|
|
507
|
+
// ═══════════════════════════════════════════
|
|
508
|
+
{
|
|
509
|
+
keys: /^(?:language|mother_?tongue|native_?language|spoken_?language|preferred_?language|first_?language|primary_?language|sprache|muttersprache|langue|langue_?maternelle|idioma|lengua|lingua|taal|moedertaal|sprak|jezyk|yazyk|gengo|yuyan|dil|ana_?dil)$/i,
|
|
510
|
+
label: 'LANGUAGE',
|
|
511
|
+
category: 'identity',
|
|
512
|
+
},
|
|
513
|
+
// ═══════════════════════════════════════════
|
|
514
|
+
// NOTES / FREE TEXT (run regex, don't redact whole value)
|
|
515
|
+
// ═══════════════════════════════════════════
|
|
516
|
+
{
|
|
517
|
+
keys: /^(?:notes|note|comment|comments|description|desc|message|msg|body|text|content|remarks|remark|internal_?notes|agent_?notes|case_?notes|memo|memorandum|narrative|summary|abstract|details|detail|reason|explanation|history|log|changelog|audit|feedback|review|observation|assessment|evaluation|recommendation|instruction|instructions|notizen|bemerkung|kommentar|beschreibung|nachricht|anmerkung|vermerk|remarques|commentaire|observation_?2|nota|commento|descrizione|notas|comentario|descripcion|opmerkingen|beschrijving|anteckningar|kommentar_?2|uwagi|komentarz|opis|zametki|kommentariy|opisanie|bikou|beikou|beizhu|shuoming|not|aciklama|yorum)$/i,
|
|
518
|
+
label: '',
|
|
519
|
+
category: 'identity',
|
|
520
|
+
},
|
|
521
|
+
];
|
|
522
|
+
/**
|
|
523
|
+
* Ambiguous field names that MIGHT be personal data depending on context.
|
|
524
|
+
* Only redacted when sibling fields suggest this is a person/PII record.
|
|
525
|
+
*/
|
|
526
|
+
const AMBIGUOUS_FIELDS = /^(?:id|code|number|no|num|nr|ref|reference|key|identifier|value|data|info|source|origin|created_?by|updated_?by|modified_?by|assigned_?to|owned_?by|submitted_?by|requested_?by|approved_?by|reviewed_?by|reported_?by|signed_?by|verified_?by|processed_?by|handled_?by|managed_?by|contact|details|record|entry|item|subject|party|counterparty|client_?id|customer_?id|user_?id|member_?id|patient_?id|case_?id|ticket_?id|account_?id|subscriber_?id)$/i;
|
|
527
|
+
/**
|
|
528
|
+
* Fields that CONFIRM the parent object is about a person/entity with PII.
|
|
529
|
+
* If any sibling field matches this, ambiguous fields get redacted too.
|
|
530
|
+
*/
|
|
531
|
+
const PII_INDICATOR_FIELDS = /^(?:name|first_?name|last_?name|full_?name|email|phone|mobile|ssn|iban|address|dob|birth|birthday|passport|salary|compensation|national_?id|tax_?id|nino|bsn|pesel|cpf|aadhaar|vorname|nachname|nom|prenom|nombre|apellido|nome|cognome|social_?security|employee_?id|patient_?name|customer_?name|contact_?name)$/i;
|
|
532
|
+
/**
|
|
533
|
+
* Check if a field key matches a semantic field and should be fully redacted.
|
|
534
|
+
* Returns the label if the entire value should be redacted, empty string if
|
|
535
|
+
* regex scanning should be used, or null if no semantic match.
|
|
536
|
+
*/
|
|
537
|
+
function getSemanticLabel(fieldKey, siblingKeys) {
|
|
538
|
+
// Extract the last key segment (e.g., "user.profile.name" -> "name")
|
|
539
|
+
const lastKey = fieldKey.includes('.') ? fieldKey.split('.').pop() : fieldKey;
|
|
540
|
+
// Also try the key without array indices (e.g., "contacts[0].name" -> "name")
|
|
541
|
+
const cleanKey = lastKey.replace(/\[\d+\]/g, '');
|
|
542
|
+
// First check explicit semantic mappings
|
|
543
|
+
for (const mapping of SEMANTIC_FIELD_MAP) {
|
|
544
|
+
if (mapping.keys.test(cleanKey)) {
|
|
545
|
+
return { label: mapping.label, category: mapping.category };
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
// Context-aware: if field is ambiguous (like bare "id") AND sibling fields
|
|
549
|
+
// indicate this is a person/PII record, redact the ambiguous field too.
|
|
550
|
+
if (siblingKeys && AMBIGUOUS_FIELDS.test(cleanKey)) {
|
|
551
|
+
const hasPiiSiblings = siblingKeys.some((k) => PII_INDICATOR_FIELDS.test(k));
|
|
552
|
+
if (hasPiiSiblings) {
|
|
553
|
+
return { label: 'IDENTIFIER', category: 'identity' };
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
return null;
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Main redaction engine - recursively walks JSON and applies redaction.
|
|
560
|
+
* Uses BOTH semantic field-name detection AND regex pattern matching.
|
|
561
|
+
*/
|
|
562
|
+
function redactValue(value, ctx, vault, sessionId, hits, itemIndex, currentPath = '', siblingKeys) {
|
|
563
|
+
if (typeof value === 'string') {
|
|
564
|
+
if (!shouldProcessField(currentPath, ctx.fieldMode, ctx.fieldRules)) {
|
|
565
|
+
return value;
|
|
566
|
+
}
|
|
567
|
+
// Semantic field-name detection: if the field name tells us what this is,
|
|
568
|
+
// redact the ENTIRE value without needing regex to match.
|
|
569
|
+
// Skipped in verify mode (we only care about actual PII values, not field names).
|
|
570
|
+
if (!ctx.skipSemantic) {
|
|
571
|
+
const semantic = getSemanticLabel(currentPath, siblingKeys);
|
|
572
|
+
if (semantic && semantic.label && value.trim().length > 0) {
|
|
573
|
+
// Allow list check
|
|
574
|
+
if (isAllowListed(value, ctx.allowList)) {
|
|
575
|
+
// Skip to regex detection
|
|
576
|
+
}
|
|
577
|
+
else {
|
|
578
|
+
const semConf = semantic.label === 'IDENTIFIER' ? context_1.AMBIGUOUS_FIELD_CONFIDENCE : context_1.SEMANTIC_CONFIDENCE;
|
|
579
|
+
if (semConf >= (ctx.confidenceThreshold ?? 0)) {
|
|
580
|
+
const replacement = applyRedaction(value, { label: semantic.label, category: semantic.category }, ctx.mode, vault, sessionId, ctx.dedup);
|
|
581
|
+
hits.push({
|
|
582
|
+
token: replacement, original: '***',
|
|
583
|
+
patternName: `semantic_${semantic.label}`,
|
|
584
|
+
patternLabel: semantic.label,
|
|
585
|
+
category: semantic.category,
|
|
586
|
+
field: currentPath, itemIndex,
|
|
587
|
+
confidence: semConf,
|
|
588
|
+
});
|
|
589
|
+
return replacement;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
} // end skipSemantic check
|
|
594
|
+
// Fall through to regex-based detection
|
|
595
|
+
return redactText(value, ctx, vault, sessionId, hits, itemIndex, currentPath);
|
|
596
|
+
}
|
|
597
|
+
if (typeof value === 'number' && !ctx.skipSemantic) {
|
|
598
|
+
const semantic = getSemanticLabel(currentPath, siblingKeys);
|
|
599
|
+
if (semantic && semantic.label) {
|
|
600
|
+
const semConf = semantic.label === 'IDENTIFIER' ? context_1.AMBIGUOUS_FIELD_CONFIDENCE : context_1.SEMANTIC_CONFIDENCE;
|
|
601
|
+
if (semConf >= (ctx.confidenceThreshold ?? 0)) {
|
|
602
|
+
const strValue = String(value);
|
|
603
|
+
const replacement = applyRedaction(strValue, { label: semantic.label, category: semantic.category }, ctx.mode, vault, sessionId, ctx.dedup);
|
|
604
|
+
hits.push({
|
|
605
|
+
token: replacement, original: '***',
|
|
606
|
+
patternName: `semantic_${semantic.label}`,
|
|
607
|
+
patternLabel: semantic.label,
|
|
608
|
+
category: semantic.category,
|
|
609
|
+
field: currentPath, itemIndex,
|
|
610
|
+
confidence: semConf,
|
|
611
|
+
});
|
|
612
|
+
return replacement;
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
if (Array.isArray(value)) {
|
|
617
|
+
return value.map((v, i) => redactValue(v, ctx, vault, sessionId, hits, itemIndex, `${currentPath}[${i}]`));
|
|
618
|
+
}
|
|
619
|
+
if (value !== null && typeof value === 'object') {
|
|
620
|
+
const entries = Object.entries(value);
|
|
621
|
+
// Collect sibling keys for context-aware detection
|
|
622
|
+
const siblingKeys = entries.map(([k]) => k);
|
|
623
|
+
const obj = {};
|
|
624
|
+
for (const [k, v] of entries) {
|
|
625
|
+
const fieldPath = currentPath ? `${currentPath}.${k}` : k;
|
|
626
|
+
obj[k] = redactValue(v, ctx, vault, sessionId, hits, itemIndex, fieldPath, siblingKeys);
|
|
627
|
+
}
|
|
628
|
+
return obj;
|
|
629
|
+
}
|
|
630
|
+
return value;
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* Redact PII in a text string.
|
|
634
|
+
*/
|
|
635
|
+
function redactText(text, ctx, vault, sessionId, hits, itemIndex, fieldPath) {
|
|
636
|
+
let result = text;
|
|
637
|
+
const threshold = ctx.confidenceThreshold ?? 0;
|
|
638
|
+
// ─── DENY LIST: always redact these values first ───
|
|
639
|
+
if (ctx.denyList && ctx.denyList.length > 0) {
|
|
640
|
+
for (const entry of ctx.denyList) {
|
|
641
|
+
try {
|
|
642
|
+
let re;
|
|
643
|
+
switch (entry.type) {
|
|
644
|
+
case 'regex':
|
|
645
|
+
if (!isSafeRegex(entry.value))
|
|
646
|
+
continue;
|
|
647
|
+
re = new RegExp(entry.value, 'gi');
|
|
648
|
+
break;
|
|
649
|
+
case 'contains':
|
|
650
|
+
re = new RegExp(entry.value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi');
|
|
651
|
+
break;
|
|
652
|
+
default: // exact
|
|
653
|
+
re = new RegExp(`\\b${entry.value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'gi');
|
|
654
|
+
}
|
|
655
|
+
result = result.replace(re, (match) => {
|
|
656
|
+
if (TOKEN_RE.test(match))
|
|
657
|
+
return match;
|
|
658
|
+
const replacement = applyRedaction(match, { label: 'DENY_LIST', category: 'other' }, ctx.mode, vault, sessionId, ctx.dedup);
|
|
659
|
+
hits.push({
|
|
660
|
+
token: replacement, original: '***',
|
|
661
|
+
patternName: 'denyList', patternLabel: 'DENY_LIST',
|
|
662
|
+
category: 'other', field: fieldPath, itemIndex,
|
|
663
|
+
confidence: context_1.DENY_LIST_CONFIDENCE,
|
|
664
|
+
});
|
|
665
|
+
return replacement;
|
|
666
|
+
});
|
|
667
|
+
}
|
|
668
|
+
catch { /* skip invalid */ }
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
// ─── BUILT-IN PATTERNS with confidence scoring ───
|
|
672
|
+
const activePatterns = (0, patterns_1.getPatternsByNames)(ctx.enabledPatterns);
|
|
673
|
+
for (const pattern of activePatterns) {
|
|
674
|
+
const re = new RegExp(pattern.regex.source, pattern.regex.flags);
|
|
675
|
+
result = result.replace(re, (match, ...args) => {
|
|
676
|
+
// Skip already-redacted tokens
|
|
677
|
+
if (TOKEN_RE.test(match) || (match.startsWith('[') && match.endsWith(']')))
|
|
678
|
+
return match;
|
|
679
|
+
// Run validator if present
|
|
680
|
+
const validatorPassed = pattern.validate ? pattern.validate(match) : true;
|
|
681
|
+
if (pattern.validate && !validatorPassed)
|
|
682
|
+
return match;
|
|
683
|
+
// Allow list check: skip if value is allow-listed
|
|
684
|
+
if (isAllowListed(match, ctx.allowList))
|
|
685
|
+
return match;
|
|
686
|
+
// Calculate confidence score
|
|
687
|
+
const matchOffset = typeof args[args.length - 2] === 'number' ? args[args.length - 2] : 0;
|
|
688
|
+
const confidence = (0, context_1.calculateConfidence)(pattern.name, !!pattern.validate, validatorPassed, text, matchOffset, matchOffset + match.length);
|
|
689
|
+
// Skip if below confidence threshold
|
|
690
|
+
if (confidence < threshold)
|
|
691
|
+
return match;
|
|
692
|
+
const replacement = applyRedaction(match, pattern, ctx.mode, vault, sessionId, ctx.dedup);
|
|
693
|
+
hits.push({
|
|
694
|
+
token: replacement, original: '***',
|
|
695
|
+
patternName: pattern.name, patternLabel: pattern.label,
|
|
696
|
+
category: pattern.category, field: fieldPath, itemIndex,
|
|
697
|
+
confidence,
|
|
698
|
+
});
|
|
699
|
+
return replacement;
|
|
700
|
+
});
|
|
701
|
+
}
|
|
702
|
+
// ─── NAME DICTIONARY detection (free-text names) ───
|
|
703
|
+
if (!ctx.skipSemantic) {
|
|
704
|
+
const nameMatches = (0, names_1.detectNamesInText)(result);
|
|
705
|
+
for (const nm of nameMatches) {
|
|
706
|
+
// Skip if already redacted (contains token brackets)
|
|
707
|
+
if (nm.name.includes('[') && nm.name.includes(']'))
|
|
708
|
+
continue;
|
|
709
|
+
// Allow list check
|
|
710
|
+
if (isAllowListed(nm.name, ctx.allowList))
|
|
711
|
+
continue;
|
|
712
|
+
// Calculate confidence with context words
|
|
713
|
+
let nameConfidence = 0.65;
|
|
714
|
+
if ((0, context_2.hasContextWords)(text, nm.start, nm.end, names_1.NAME_CONTEXT_WORDS)) {
|
|
715
|
+
nameConfidence = 0.80;
|
|
716
|
+
}
|
|
717
|
+
if (nameConfidence < threshold)
|
|
718
|
+
continue;
|
|
719
|
+
const replacement = applyRedaction(nm.name, { label: 'PERSON_NAME', category: 'identity' }, ctx.mode, vault, sessionId, ctx.dedup);
|
|
720
|
+
// Replace in result
|
|
721
|
+
result = result.split(nm.name).join(replacement);
|
|
722
|
+
hits.push({
|
|
723
|
+
token: replacement, original: '***',
|
|
724
|
+
patternName: 'personNameDict', patternLabel: 'PERSON_NAME',
|
|
725
|
+
category: 'identity', field: fieldPath, itemIndex,
|
|
726
|
+
confidence: nameConfidence,
|
|
727
|
+
});
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
// ─── CUSTOM PATTERNS with safety validation ───
|
|
731
|
+
for (const cp of ctx.customPatterns) {
|
|
732
|
+
if (!cp.label || !cp.regex)
|
|
733
|
+
continue;
|
|
734
|
+
if (!isSafeRegex(cp.regex))
|
|
735
|
+
continue;
|
|
736
|
+
try {
|
|
737
|
+
const re = new RegExp(cp.regex, 'g');
|
|
738
|
+
result = result.replace(re, (match) => {
|
|
739
|
+
if (TOKEN_RE.test(match))
|
|
740
|
+
return match;
|
|
741
|
+
if (isAllowListed(match, ctx.allowList))
|
|
742
|
+
return match;
|
|
743
|
+
if (context_1.CUSTOM_PATTERN_CONFIDENCE < threshold)
|
|
744
|
+
return match;
|
|
745
|
+
const pseudoPattern = {
|
|
746
|
+
label: cp.label.toUpperCase(),
|
|
747
|
+
category: cp.category || 'identity',
|
|
748
|
+
};
|
|
749
|
+
const replacement = applyRedaction(match, pseudoPattern, ctx.mode, vault, sessionId, ctx.dedup);
|
|
750
|
+
hits.push({
|
|
751
|
+
token: replacement, original: '***',
|
|
752
|
+
patternName: `custom_${cp.label}`, patternLabel: pseudoPattern.label,
|
|
753
|
+
category: pseudoPattern.category, field: fieldPath, itemIndex,
|
|
754
|
+
confidence: context_1.CUSTOM_PATTERN_CONFIDENCE,
|
|
755
|
+
});
|
|
756
|
+
return replacement;
|
|
757
|
+
});
|
|
758
|
+
}
|
|
759
|
+
catch {
|
|
760
|
+
// Skip invalid regex
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
return result;
|
|
764
|
+
}
|
|
765
|
+
/**
|
|
766
|
+
* Restore redacted tokens back to original values.
|
|
767
|
+
* Uses single-pass replacement to avoid infinite loops.
|
|
768
|
+
*/
|
|
769
|
+
function restoreValue(value, vault, sessionId) {
|
|
770
|
+
const session = vault.getSession(sessionId);
|
|
771
|
+
if (!session)
|
|
772
|
+
return value;
|
|
773
|
+
if (typeof value === 'string') {
|
|
774
|
+
let result = value;
|
|
775
|
+
// Single-pass: replace each token exactly once per occurrence.
|
|
776
|
+
// Use split/join which is safe against infinite loops.
|
|
777
|
+
for (const [token, e] of Object.entries(session.entries)) {
|
|
778
|
+
result = result.split(token).join(e.original);
|
|
779
|
+
}
|
|
780
|
+
return result;
|
|
781
|
+
}
|
|
782
|
+
if (Array.isArray(value)) {
|
|
783
|
+
return value.map((v) => restoreValue(v, vault, sessionId));
|
|
784
|
+
}
|
|
785
|
+
if (value !== null && typeof value === 'object') {
|
|
786
|
+
const obj = {};
|
|
787
|
+
for (const [k, v] of Object.entries(value)) {
|
|
788
|
+
obj[k] = restoreValue(v, vault, sessionId);
|
|
789
|
+
}
|
|
790
|
+
return obj;
|
|
791
|
+
}
|
|
792
|
+
return value;
|
|
793
|
+
}
|
|
794
|
+
/**
|
|
795
|
+
* Build a redaction audit report from collected hits.
|
|
796
|
+
* SECURITY: Original PII values are NEVER included in the report.
|
|
797
|
+
*/
|
|
798
|
+
function buildReport(sessionId, hits) {
|
|
799
|
+
const hitsByCategory = {};
|
|
800
|
+
const hitsByPattern = {};
|
|
801
|
+
for (const hit of hits) {
|
|
802
|
+
hitsByCategory[hit.category] = (hitsByCategory[hit.category] || 0) + 1;
|
|
803
|
+
hitsByPattern[hit.patternLabel] = (hitsByPattern[hit.patternLabel] || 0) + 1;
|
|
804
|
+
}
|
|
805
|
+
return {
|
|
806
|
+
sessionId,
|
|
807
|
+
timestamp: new Date().toISOString(),
|
|
808
|
+
totalHits: hits.length,
|
|
809
|
+
hitsByCategory,
|
|
810
|
+
hitsByPattern,
|
|
811
|
+
hits,
|
|
812
|
+
};
|
|
813
|
+
}
|