n8n-nodes-redactor 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/LICENSE +42 -0
  2. package/README.dev.md +153 -0
  3. package/README.md +443 -0
  4. package/README.npm.md +443 -0
  5. package/dist/nodes/PiiRedactor/PiiRedactor.node.d.ts +5 -0
  6. package/dist/nodes/PiiRedactor/PiiRedactor.node.js +1093 -0
  7. package/dist/nodes/PiiRedactor/__tests__/encryption.test.d.ts +1 -0
  8. package/dist/nodes/PiiRedactor/__tests__/encryption.test.js +200 -0
  9. package/dist/nodes/PiiRedactor/__tests__/engine.test.d.ts +1 -0
  10. package/dist/nodes/PiiRedactor/__tests__/engine.test.js +524 -0
  11. package/dist/nodes/PiiRedactor/__tests__/operations.test.d.ts +1 -0
  12. package/dist/nodes/PiiRedactor/__tests__/operations.test.js +316 -0
  13. package/dist/nodes/PiiRedactor/__tests__/patterns-global.test.d.ts +1 -0
  14. package/dist/nodes/PiiRedactor/__tests__/patterns-global.test.js +427 -0
  15. package/dist/nodes/PiiRedactor/__tests__/patterns.test.d.ts +1 -0
  16. package/dist/nodes/PiiRedactor/__tests__/patterns.test.js +481 -0
  17. package/dist/nodes/PiiRedactor/__tests__/phase1.test.d.ts +1 -0
  18. package/dist/nodes/PiiRedactor/__tests__/phase1.test.js +343 -0
  19. package/dist/nodes/PiiRedactor/__tests__/phase3.test.d.ts +1 -0
  20. package/dist/nodes/PiiRedactor/__tests__/phase3.test.js +275 -0
  21. package/dist/nodes/PiiRedactor/__tests__/phase4.test.d.ts +1 -0
  22. package/dist/nodes/PiiRedactor/__tests__/phase4.test.js +184 -0
  23. package/dist/nodes/PiiRedactor/__tests__/presidio.test.d.ts +1 -0
  24. package/dist/nodes/PiiRedactor/__tests__/presidio.test.js +170 -0
  25. package/dist/nodes/PiiRedactor/__tests__/security.test.d.ts +1 -0
  26. package/dist/nodes/PiiRedactor/__tests__/security.test.js +178 -0
  27. package/dist/nodes/PiiRedactor/__tests__/semantic.test.d.ts +1 -0
  28. package/dist/nodes/PiiRedactor/__tests__/semantic.test.js +319 -0
  29. package/dist/nodes/PiiRedactor/__tests__/vault.test.d.ts +1 -0
  30. package/dist/nodes/PiiRedactor/__tests__/vault.test.js +247 -0
  31. package/dist/nodes/PiiRedactor/audit.d.ts +48 -0
  32. package/dist/nodes/PiiRedactor/audit.js +192 -0
  33. package/dist/nodes/PiiRedactor/classification.d.ts +33 -0
  34. package/dist/nodes/PiiRedactor/classification.js +118 -0
  35. package/dist/nodes/PiiRedactor/context.d.ts +57 -0
  36. package/dist/nodes/PiiRedactor/context.js +260 -0
  37. package/dist/nodes/PiiRedactor/encryption.d.ts +45 -0
  38. package/dist/nodes/PiiRedactor/encryption.js +158 -0
  39. package/dist/nodes/PiiRedactor/engine.d.ts +23 -0
  40. package/dist/nodes/PiiRedactor/engine.js +888 -0
  41. package/dist/nodes/PiiRedactor/injection.d.ts +46 -0
  42. package/dist/nodes/PiiRedactor/injection.js +425 -0
  43. package/dist/nodes/PiiRedactor/names.d.ts +25 -0
  44. package/dist/nodes/PiiRedactor/names.js +188 -0
  45. package/dist/nodes/PiiRedactor/patterns.d.ts +17 -0
  46. package/dist/nodes/PiiRedactor/patterns.js +1742 -0
  47. package/dist/nodes/PiiRedactor/presidio.d.ts +77 -0
  48. package/dist/nodes/PiiRedactor/presidio.js +264 -0
  49. package/dist/nodes/PiiRedactor/profiles.d.ts +47 -0
  50. package/dist/nodes/PiiRedactor/profiles.js +139 -0
  51. package/dist/nodes/PiiRedactor/pseudonymize.d.ts +20 -0
  52. package/dist/nodes/PiiRedactor/pseudonymize.js +203 -0
  53. package/dist/nodes/PiiRedactor/redact.png +0 -0
  54. package/dist/nodes/PiiRedactor/redact.svg +3 -0
  55. package/dist/nodes/PiiRedactor/ropa.d.ts +63 -0
  56. package/dist/nodes/PiiRedactor/ropa.js +70 -0
  57. package/dist/nodes/PiiRedactor/types.d.ts +82 -0
  58. package/dist/nodes/PiiRedactor/types.js +3 -0
  59. package/dist/nodes/PiiRedactor/vault.d.ts +61 -0
  60. package/dist/nodes/PiiRedactor/vault.js +352 -0
  61. package/package.json +87 -0
@@ -0,0 +1,888 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.redactValue = redactValue;
37
+ exports.restoreValue = restoreValue;
38
+ exports.enhanceWithPresidio = enhanceWithPresidio;
39
+ exports.buildReport = buildReport;
40
+ const crypto = __importStar(require("crypto"));
41
+ const patterns_1 = require("./patterns");
42
+ const context_1 = require("./context");
43
+ const names_1 = require("./names");
44
+ const context_2 = require("./context");
45
+ const pseudonymize_1 = require("./pseudonymize");
46
+ const presidio_1 = require("./presidio");
47
+ /**
48
+ * Check if a value matches any entry in the allow list (should be SKIPPED).
49
+ */
50
+ function isAllowListed(value, allowList) {
51
+ if (!allowList || allowList.length === 0)
52
+ return false;
53
+ return allowList.some((entry) => {
54
+ try {
55
+ switch (entry.type) {
56
+ case 'exact':
57
+ return value === entry.value;
58
+ case 'contains':
59
+ return value.includes(entry.value);
60
+ case 'regex':
61
+ if (!isSafeRegex(entry.value))
62
+ return false;
63
+ return new RegExp(entry.value, 'i').test(value);
64
+ default:
65
+ return value === entry.value;
66
+ }
67
+ }
68
+ catch {
69
+ return false;
70
+ }
71
+ });
72
+ }
73
+ /**
74
+ * Generate a deterministic token for a given label + index.
75
+ */
76
+ function generateToken(label, index) {
77
+ return `[${label}_${index}]`;
78
+ }
79
+ /**
80
+ * Token format regex — used to skip already-redacted regions.
81
+ */
82
+ const TOKEN_RE = /^\[[A-Z][A-Z0-9_]*_\d+\]$/;
83
+ /**
84
+ * Generate a masked version of a string.
85
+ */
86
+ function maskValue(value, label) {
87
+ if (value.length <= 2)
88
+ return '*'.repeat(value.length);
89
+ if (label === 'EMAIL' && value.includes('@')) {
90
+ const [local, domain] = value.split('@');
91
+ const maskedLocal = local[0] + '***';
92
+ const domainParts = domain.split('.');
93
+ const maskedDomain = domainParts[0][0] + '***.' + domainParts.slice(1).join('.');
94
+ return maskedLocal + '@' + maskedDomain;
95
+ }
96
+ if (label.startsWith('PHONE') || label === 'FAX') {
97
+ const digits = value.replace(/\D/g, '');
98
+ if (digits.length >= 4) {
99
+ return '***' + digits.slice(-4);
100
+ }
101
+ }
102
+ if (label === 'CREDIT_CARD' || label === 'AMEX') {
103
+ const digits = value.replace(/\D/g, '');
104
+ return '****-****-****-' + digits.slice(-4);
105
+ }
106
+ return value[0] + '*'.repeat(value.length - 2) + value[value.length - 1];
107
+ }
108
+ /**
109
+ * Generate a truncated hash of a value.
110
+ */
111
+ function hashValue(value) {
112
+ return crypto.createHash('sha256').update(value).digest('hex').slice(0, 12);
113
+ }
114
+ /**
115
+ * Apply redaction with proper category on the vault entry.
116
+ */
117
+ function applyRedaction(match, pattern, mode, vault, sessionId, dedup) {
118
+ switch (mode) {
119
+ case 'token': {
120
+ if (dedup) {
121
+ const existing = vault.findByOriginal(sessionId, match);
122
+ if (existing)
123
+ return existing.token;
124
+ }
125
+ const session = vault.getSession(sessionId);
126
+ const index = session ? Object.keys(session.entries).length : 0;
127
+ const token = generateToken(pattern.label, index);
128
+ vault.addEntry(sessionId, {
129
+ token,
130
+ original: match,
131
+ patternLabel: pattern.label,
132
+ category: ('category' in pattern ? pattern.category : 'identity'),
133
+ createdAt: new Date().toISOString(),
134
+ });
135
+ return token;
136
+ }
137
+ case 'mask':
138
+ return maskValue(match, pattern.label);
139
+ case 'hash':
140
+ return `[${pattern.label}:${hashValue(match)}]`;
141
+ case 'redact':
142
+ return `[REDACTED]`;
143
+ case 'blackout':
144
+ return '\u2588'.repeat(match.length); // █████ visual black bars
145
+ case 'remove':
146
+ return ''; // Complete removal, no trace left
147
+ case 'pseudonymize': {
148
+ if (dedup) {
149
+ const existing = vault.findByOriginal(sessionId, match);
150
+ if (existing)
151
+ return existing.token;
152
+ }
153
+ const pseudonym = (0, pseudonymize_1.generatePseudonym)(sessionId, match, pattern.label);
154
+ const session = vault.getSession(sessionId);
155
+ const index = session ? Object.keys(session.entries).length : 0;
156
+ // Store pseudonym as 'token' in vault for restoration
157
+ vault.addEntry(sessionId, {
158
+ token: pseudonym,
159
+ original: match,
160
+ patternLabel: pattern.label,
161
+ category: ('category' in pattern ? pattern.category : 'identity'),
162
+ createdAt: new Date().toISOString(),
163
+ });
164
+ return pseudonym;
165
+ }
166
+ default:
167
+ return match;
168
+ }
169
+ }
170
+ /**
171
+ * Check if a JSON path should be processed given the field rules.
172
+ */
173
+ function shouldProcessField(fieldPath, fieldMode, fieldRules) {
174
+ if (fieldMode === 'all')
175
+ return true;
176
+ if (fieldMode === 'allowlist') {
177
+ return fieldRules.some((r) => r.mode === 'include' && fieldPathMatches(fieldPath, r.field));
178
+ }
179
+ if (fieldMode === 'denylist') {
180
+ return !fieldRules.some((r) => r.mode === 'exclude' && fieldPathMatches(fieldPath, r.field));
181
+ }
182
+ return true;
183
+ }
184
+ /**
185
+ * Safe wildcard matching for JSON paths.
186
+ * Escapes all regex special chars before applying wildcard transformations.
187
+ * Supports: "user.email", "*.email", "contacts[*].phone"
188
+ */
189
+ function fieldPathMatches(actual, pattern) {
190
+ // Escape ALL regex special characters first
191
+ let regexStr = pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
192
+ // Now unescape our wildcards
193
+ regexStr = regexStr
194
+ .replace(/\\\[\\*\\]/g, '\\[\\d+\\]') // [*] -> [\d+]
195
+ .replace(/\\[*]/g, '[^.]+'); // escaped * -> [^.]+
196
+ try {
197
+ return new RegExp(`^${regexStr}$`).test(actual);
198
+ }
199
+ catch {
200
+ return false;
201
+ }
202
+ }
203
+ /**
204
+ * Validate a user-supplied regex for safety (prevent ReDoS).
205
+ * Rejects nested quantifiers and other dangerous patterns.
206
+ */
207
+ function isSafeRegex(regexStr) {
208
+ // Reject nested quantifiers like (a+)+, (a*)+, (a?)+, (a{1,})+
209
+ if (/(\+|\*|\?|\{[^}]+\})\s*(\+|\*|\?|\{)/.test(regexStr))
210
+ return false;
211
+ // Reject patterns longer than 500 chars
212
+ if (regexStr.length > 500)
213
+ return false;
214
+ // Try to compile it
215
+ try {
216
+ new RegExp(regexStr);
217
+ return true;
218
+ }
219
+ catch {
220
+ return false;
221
+ }
222
+ }
223
+ /**
224
+ * Semantic field-name mapping: when the JSON key name matches,
225
+ * redact the ENTIRE value regardless of regex matches.
226
+ * This catches structured data where the field name IS the context.
227
+ */
228
+ const SEMANTIC_FIELD_MAP = [
229
+ // ═══════════════════════════════════════════
230
+ // PERSON NAMES (EN/DE/FR/ES/IT/PT/NL/SV/NO/DA/PL/RU/JA/KO/ZH/AR/TR)
231
+ // Covers: camelCase, snake_case, PascalCase, abbreviations, prefixed, suffixed
232
+ // ═══════════════════════════════════════════
233
+ {
234
+ keys: /^(?:name|first_?name|last_?name|full_?name|given_?name|family_?name|middle_?name|surname|maiden_?name|nick_?name|salutation|display_?name|contact_?name|customer_?name|client_?name|patient_?name|user_?name|member_?name|subscriber_?name|account_?name|holder_?name|card_?holder|cardholder_?name|beneficiary_?name|recipient_?name|sender_?name|author_?name|owner_?name|guardian_?name|parent_?name|spouse_?name|partner_?name|emergency_?contact|next_?of_?kin|applicant_?name|candidate_?name|student_?name|teacher_?name|employee_?name|manager_?name|supervisor_?name|guarantor|witness|signatory|cosigner|fname|lname|mname|fn|ln|mn|firstname|lastname|middlename|fullname|givenname|familyname|sur_?name|name_?first|name_?last|name_?full|legal_?name|birth_?name|married_?name|former_?name|known_?as|aka|vorname|nachname|familienname|geburtsname|anzeige_?name|kundenname|prenom|nom|nom_?de_?famille|nom_?complet|nom_?de_?jeune_?fille|nombre|apellido|apellidos|nombre_?completo|nome|cognome|nome_?completo|naam|achternaam|voornaam|volledige_?naam|fornamn|efternamn|fornavn|etternavn|imie|nazwisko|imya|familiya|sei|mei|shimei|seimei|xing|ming|xingming|isim|soyisim|soyad|ism|nasab)$/i,
235
+ label: 'PERSON_NAME',
236
+ category: 'identity',
237
+ },
238
+ // ═══════════════════════════════════════════
239
+ // EMPLOYEE / STAFF / WORKER IDs
240
+ // ═══════════════════════════════════════════
241
+ {
242
+ keys: /^(?:employee_?id|emp_?id|emp_?no|emp_?num|emp_?number|staff_?id|staff_?no|staff_?number|personnel_?(?:id|number|no|nr)|worker_?id|worker_?number|workforce_?id|badge_?(?:id|number|no)|clock_?(?:id|number)|payroll_?(?:id|number|no)|hr_?id|hr_?number|contractor_?id|temp_?id|intern_?id|associate_?id|team_?member_?id|personalnummer|mitarbeiter_?(?:id|nummer|nr|number)|arbeitnehmer_?nr|numero_?employe|matricule|numero_?dipendente|medewerkernummer|personeelsnummer|anstallningsnummer|ansattnummer|numer_?pracownika)$/i,
243
+ label: 'EMPLOYEE_ID',
244
+ category: 'identity',
245
+ },
246
+ // ═══════════════════════════════════════════
247
+ // DEPARTMENT / DIVISION / TEAM / BUSINESS UNIT
248
+ // ═══════════════════════════════════════════
249
+ {
250
+ keys: /^(?:department|dept|department_?name|dept_?name|dept_?code|division|division_?name|business_?unit|bu|cost_?center|cost_?centre|profit_?center|team|team_?name|group|group_?name|unit|unit_?name|section|section_?name|branch|branch_?name|office|office_?name|site|site_?name|location_?name|work_?location|facility|plant|factory|warehouse|abteilung|abteilungsname|bereich|referat|dienststelle|departement|service|direction|dipartimento|reparto|ufficio|afdeling|divisie|avdelning|avdeling|wydzial|dzial|otdel|bu_?name|org_?unit)$/i,
251
+ label: 'DEPARTMENT',
252
+ category: 'identity',
253
+ },
254
+ // ═══════════════════════════════════════════
255
+ // JOB TITLE / POSITION / ROLE / DESIGNATION / OCCUPATION
256
+ // ═══════════════════════════════════════════
257
+ {
258
+ keys: /^(?:position|job_?title|job_?role|job_?function|job_?type|job_?name|job_?description|job_?code|job_?level|job_?grade|job_?band|job_?family|job_?category|designation|occupation|profession|seniority|career_?level|functional_?title|working_?title|official_?title|employment_?type|contract_?type|work_?type|engagement_?type|berufsbezeichnung|stelle|stellenbezeichnung|position_?title|funktion|dienstbezeichnung|beruf|taetigkeit|poste|fonction|intitule_?du_?poste|titulo|cargo|puesto|posizione|ruolo|qualifica|mansione|functie|functietitel|befattning|stilling|stanowisko|zawod|dolzhnost|shokumei|yakushoku|zhiwei|zhicheng|gorev|unvan|pozisyon)$/i,
259
+ label: 'JOB_TITLE',
260
+ category: 'identity',
261
+ },
262
+ // ═══════════════════════════════════════════
263
+ // SALARY / COMPENSATION / PAY / INCOME / WAGES
264
+ // ═══════════════════════════════════════════
265
+ {
266
+ keys: /^(?:salary|compensation|wage|wages|income|pay|pay_?rate|hourly_?rate|daily_?rate|annual_?salary|monthly_?salary|weekly_?salary|yearly_?salary|base_?pay|base_?salary|gross_?pay|gross_?salary|net_?pay|net_?salary|take_?home|total_?comp|total_?compensation|total_?pay|total_?earnings|earnings|remuneration|stipend|allowance|bonus|bonus_?amount|commission|commission_?amount|overtime|overtime_?pay|severance|severance_?pay|pension|pension_?amount|retirement_?contribution|stock_?options|equity|rsu|vesting|deduction|deductions|tax_?withholding|withholding|benefits_?value|ctc|cost_?to_?company|package|comp_?package|offer_?amount|starting_?salary|current_?salary|previous_?salary|expected_?salary|desired_?salary|salary_?range|pay_?grade|pay_?band|pay_?scale|gehalt|brutto_?gehalt|netto_?gehalt|grundgehalt|jahresgehalt|monatsgehalt|lohn|bruttolohn|nettolohn|verguetung|entgelt|bezuege|zuschlag|praemie|provision|salaire|remuneration_?brute|remuneration_?nette|stipendio|retribuzione|salario|sueldo|remuneracao|loon|salaris|lon|lonn|wynagrodzenie|pensja|zarplata|oklad|kyuyo|nenshu|gongzi|xinshui|maas|ucret)$/i,
267
+ label: 'SALARY',
268
+ category: 'financial',
269
+ },
270
+ // ═══════════════════════════════════════════
271
+ // DATES: Hire, Start, End, Termination, Birth, Death, etc.
272
+ // ═══════════════════════════════════════════
273
+ {
274
+ keys: /^(?:hire_?date|start_?date|joining_?date|date_?of_?hire|date_?of_?joining|employment_?date|employment_?start|onboarding_?date|effective_?date|commencement_?date|appointment_?date|probation_?(?:start|end)_?date|termination_?date|end_?date|leaving_?date|resignation_?date|separation_?date|exit_?date|last_?day|last_?working_?day|notice_?date|retirement_?date|contract_?(?:start|end)_?date|trial_?(?:start|end)_?date|review_?date|appraisal_?date|promotion_?date|transfer_?date|anniversary_?date|tenure_?start|eintrittsdatum|einstellungsdatum|austrittsdatum|kuendigungsdatum|probezeitende|vertragsbeginn|vertragsende|date_?embauche|date_?de_?debut|date_?de_?fin|date_?sortie|fecha_?de_?inicio|fecha_?de_?fin|data_?assunzione|data_?cessazione|datum_?indiensttreding|datum_?uitdiensttreding|anstallningsdatum|ansettelsesdato|data_?zatrudnienia)$/i,
275
+ label: 'EMPLOYMENT_DATE',
276
+ category: 'temporal',
277
+ },
278
+ {
279
+ keys: /^(?:dob|date_?of_?birth|birth_?date|birthday|birth_?day|born|born_?on|born_?date|geburtsdatum|geburtstag|date_?de_?naissance|fecha_?de_?nacimiento|data_?di_?nascita|geboortedatum|fodelsedatum|fodselsdato|syntymapaiva|data_?de_?nascimento|data_?urodzenia|den_?rozhdeniya|seinengappi|shengri|dogum_?tarihi)$/i,
280
+ label: 'DOB',
281
+ category: 'temporal',
282
+ },
283
+ {
284
+ keys: /^(?:date_?of_?death|death_?date|deceased_?date|died_?on|died|sterbedatum|todesdatum|date_?de_?deces|fecha_?de_?defuncion|data_?di_?morte)$/i,
285
+ label: 'DATE_OF_DEATH',
286
+ category: 'temporal',
287
+ },
288
+ {
289
+ keys: /^(?:person_?age|patient_?age|customer_?age|employee_?age|age_?years|current_?age|alter|edad|eta|yaş|wiek|vozrast|nenrei|nianling)$/i,
290
+ label: 'AGE',
291
+ category: 'temporal',
292
+ },
293
+ // ═══════════════════════════════════════════
294
+ // ADDRESS / LOCATION (every variation in 15+ languages)
295
+ // ═══════════════════════════════════════════
296
+ {
297
+ keys: /^(?:address|street_?address|home_?address|mailing_?address|shipping_?address|billing_?address|postal_?address|residential_?address|permanent_?address|temporary_?address|current_?address|previous_?address|work_?address|office_?address|business_?address|delivery_?address|correspondence_?address|legal_?address|registered_?address|address_?line_?[123]|addr_?[123]|addr|street|street_?name|street_?line|street_?line_?[12]|house_?number|house_?no|building|apartment|apt|flat|suite|floor|room|unit_?number|block|lot|po_?box|post_?box|city|town|municipality|village|locality|suburb|district|borough|county|state|province|region|territory|country|country_?code|country_?name|nation|zip|zip_?code|zipcode|postal_?code|postcode|post_?code|pin_?code|area_?code|geo_?location|latitude|longitude|lat|lng|coordinates|place|place_?of_?birth|birth_?place|place_?of_?residence|domicile|residence|neighborhood|neighbourhood|quarter|canton|prefecture|commune|departement|land|bundesland|kreis|bezirk|ortsteil|stadtteil|gemeinde|anschrift|adresse|wohnadresse|wohnort|strasse|hausnummer|plz|postleitzahl|ort|stadt|wohnhaft|rue|ville|code_?postal|cedex|calle|direccion|codigo_?postal|localidad|poblacion|provincia|comunidad|via|indirizzo|cap|citta|comune|straat|huisnummer|postbus|plaats|woonplaats|gata|postnummer|sted|ulica|kod_?pocztowy|miasto|adres|gorod|pochta|jyusho|jutaku|dizhi|chengshi|youbian|adres|mahalle|ilce|il|posta_?kodu)$/i,
298
+ label: 'ADDRESS',
299
+ category: 'location',
300
+ },
301
+ // ═══════════════════════════════════════════
302
+ // PHONE / MOBILE / FAX / CONTACT NUMBERS
303
+ // ═══════════════════════════════════════════
304
+ {
305
+ keys: /^(?:phone|phone_?number|phone_?no|mobile|mobile_?number|mobile_?no|cell|cell_?phone|cell_?number|telephone|tel|tel_?number|fax|fax_?number|fax_?no|contact_?number|contact_?phone|home_?phone|work_?phone|office_?phone|business_?phone|direct_?line|direct_?dial|extension|ext|callback|callback_?number|sms_?number|whatsapp|whatsapp_?number|primary_?phone|secondary_?phone|alternate_?phone|emergency_?phone|telefon|telefonnummer|mobilnummer|handy|handynummer|festnetz|rufnummer|durchwahl|faxnummer|telephone_?portable|portable|fixe|numero_?de_?telephone|numero_?de_?portable|telefono|cellulare|numero_?di_?telefono|numero_?de_?telefone|celular|telefoon|mobiel|telefoonnummer|mobilnummer_?2|telefonnr|tlf|telefon_?komorkowy|telefon_?stacjonarny|nomer_?telefona|denwabangou|keitai|dianhua|shouji|telefon_?numarasi|cep_?telefonu)$/i,
306
+ label: 'PHONE',
307
+ category: 'contact',
308
+ },
309
+ // ═══════════════════════════════════════════
310
+ // EMAIL (every variation)
311
+ // ═══════════════════════════════════════════
312
+ {
313
+ keys: /^(?:email|email_?address|e_?mail|mail|mail_?address|primary_?email|secondary_?email|work_?email|personal_?email|business_?email|contact_?email|login_?email|notification_?email|recovery_?email|backup_?email|alternate_?email|alt_?email|reply_?to|from_?email|to_?email|cc|bcc|email_?id|e_?mail_?adresse|elektronische_?post|correo|correo_?electronico|indirizzo_?email|posta_?elettronica|emailadres|epostadress|epost|adres_?email|pochta|mail_?adresi|eposta)$/i,
314
+ label: 'EMAIL',
315
+ category: 'contact',
316
+ },
317
+ // ═══════════════════════════════════════════
318
+ // SSN / NATIONAL ID / TAX ID / GOV ID
319
+ // ═══════════════════════════════════════════
320
+ {
321
+ keys: /^(?:ssn|social_?security|social_?security_?number|social_?insurance|social_?insurance_?number|sin|national_?id|national_?id_?number|national_?identity|identity_?number|identity_?card|id_?number|id_?card|id_?no|id_?document|personal_?id|personal_?number|personal_?code|citizen_?id|resident_?id|registration_?number|civil_?id|civil_?registration|gov_?id|government_?id|tax_?id|tax_?number|tax_?identification|taxpayer_?id|tin|itin|ein|vat_?number|vat_?id|steuer_?id|steuer_?nummer|steueridentifikationsnummer|steuernummer|sozialversicherungsnummer|personalausweisnummer|ausweisnummer|nif|nie|dni|cif|nif_?numero|bsn|burger_?service_?nummer|nino|national_?insurance|national_?insurance_?number|pps|pps_?number|pesel|nip|regon|personnummer|fodselsnummer|cpr|cpr_?nummer|hetu|henkilotunnus|cpf|rg|cnpj|curp|rfc|aadhaar|aadhar|pan|pan_?number|nric|fin|my_?number|rrn|ahv|avs|codice_?fiscale|carta_?identita|carte_?identite|numero_?securite_?sociale|nir|rijksregisternummer|numero_?identite|kimlik|tc_?kimlik|emirates_?id|iqama)$/i,
322
+ label: 'NATIONAL_ID',
323
+ category: 'identity',
324
+ },
325
+ // ═══════════════════════════════════════════
326
+ // BANK ACCOUNT / FINANCIAL ACCOUNTS
327
+ // ═══════════════════════════════════════════
328
+ {
329
+ keys: /^(?:account_?number|account_?no|acct|acct_?number|acct_?no|bank_?account|bank_?account_?number|bank_?acct|checking_?account|savings_?account|current_?account|deposit_?account|iban|bic|swift|swift_?code|routing_?number|routing_?no|aba|sort_?code|bsb|bsb_?number|clabe|transit_?number|branch_?code|branch_?number|kontonummer|bankleitzahl|blz|konto|girokonto|sparkonto|numero_?de_?compte|rib|numero_?conto|numero_?de_?cuenta|rekeningnummer|bankgiro|plusgiro|numer_?konta|schet|kouza_?bangou)$/i,
330
+ label: 'ACCOUNT_NUMBER',
331
+ category: 'financial',
332
+ },
333
+ // ═══════════════════════════════════════════
334
+ // CREDIT / DEBIT CARD
335
+ // ═══════════════════════════════════════════
336
+ {
337
+ keys: /^(?:card_?number|credit_?card|credit_?card_?number|debit_?card|debit_?card_?number|cc_?number|cc_?no|pan|card_?no|card_?num|payment_?card|kartennummer|kreditkarte|kreditkartennummer|numero_?de_?carte|numero_?carta|numero_?de_?tarjeta|kaartnummer|cvv|cvc|cvv2|cvc2|cid|security_?code|card_?verification|card_?expiry|expiry_?date|expiration_?date|exp_?date|valid_?thru|valid_?through|card_?holder|cardholder_?name)$/i,
338
+ label: 'CREDIT_CARD',
339
+ category: 'financial',
340
+ },
341
+ // ═══════════════════════════════════════════
342
+ // INSURANCE (health, life, auto, property, etc.)
343
+ // ═══════════════════════════════════════════
344
+ {
345
+ keys: /^(?:insurance|insurance_?(?:id|number|no|policy|claim|type|provider|company|plan|group)|policy_?(?:number|no|id)|policy_?holder|claim_?(?:number|no|id|ref|reference)|health_?plan|health_?plan_?id|member_?id|member_?number|subscriber_?id|subscriber_?number|beneficiary_?id|beneficiary_?number|group_?number|group_?id|plan_?id|plan_?number|coverage_?id|enrollment_?id|certificate_?number|versicherungsnummer|versicherung|krankenversicherung|krankenkasse|police_?nummer|police_?nr|numero_?assurance|numero_?police|polizza|numero_?polizza|numero_?de_?seguro|verzekeringsnummer|polisnummer)$/i,
346
+ label: 'INSURANCE',
347
+ category: 'financial',
348
+ },
349
+ // ═══════════════════════════════════════════
350
+ // PASSPORT
351
+ // ═══════════════════════════════════════════
352
+ {
353
+ keys: /^(?:passport|passport_?number|passport_?no|passport_?id|passport_?num|travel_?document|travel_?doc_?number|reisepass|reisepassnummer|passnummer|numero_?passeport|passeport|passaporto|numero_?passaporto|numero_?de_?pasaporte|paspoort|paspoortnummer|passnummer_?2|pass_?nr|pasaporte|pasaporte_?numero)$/i,
354
+ label: 'PASSPORT',
355
+ category: 'identity',
356
+ },
357
+ // ═══════════════════════════════════════════
358
+ // DRIVER LICENSE
359
+ // ═══════════════════════════════════════════
360
+ {
361
+ keys: /^(?:driver_?license|drivers_?license|driver_?licence|drivers_?licence|driving_?license|driving_?licence|dl|dl_?number|dl_?no|license_?number|licence_?number|license_?no|licence_?no|permit_?number|learner_?permit|fuehrerschein|fuehrerscheinnummer|permis_?de_?conduire|numero_?permis|patente|patente_?di_?guida|permiso_?de_?conducir|carnet_?de_?conducir|rijbewijs|rijbewijsnummer|koerkort|forerkort|prawo_?jazdy|voditelskoe_?udostoverenie|unten_?menkyo|jiashi_?zheng|ehliyet|surucu_?belgesi)$/i,
362
+ label: 'DRIVER_LICENSE',
363
+ category: 'identity',
364
+ },
365
+ // ═══════════════════════════════════════════
366
+ // MEDICAL / HEALTH / PATIENT
367
+ // ═══════════════════════════════════════════
368
+ {
369
+ keys: /^(?:mrn|medical_?record|medical_?record_?number|patient_?id|patient_?number|patient_?name|health_?id|health_?card|health_?number|hospital_?id|hospital_?number|chart_?number|case_?number|encounter_?id|visit_?id|admission_?id|discharge_?id|diagnosis|diagnosis_?code|icd|icd_?code|icd_?10|procedure_?code|cpt|cpt_?code|ndc|ndc_?code|medication|medications|drug|drug_?name|prescription|prescription_?id|rx|rx_?number|dosage|blood_?type|blood_?group|allergy|allergies|condition|conditions|medical_?condition|disability|disability_?status|disability_?type|disability_?degree|impairment|mental_?health|psychiatric|immunization|vaccination|vaccine|lab_?result|test_?result|vital_?signs|bmi|weight|height|npi|npi_?number|dea|dea_?number|provider_?id|physician_?id|doctor_?name|treating_?physician|primary_?care|attending|referring|insurance_?diagnosis|pre_?existing|chronic|krankenakte|patientennummer|diagnose|medikament|rezept|blutgruppe|behinderung|grad_?der_?behinderung|gdb|schwerbehindertenausweis|dossier_?medical|numero_?patient|groupe_?sanguin|cartella_?clinica|historia_?clinica|prontuario)$/i,
370
+ label: 'MEDICAL',
371
+ category: 'medical',
372
+ },
373
+ // ═══════════════════════════════════════════
374
+ // COMPANY / ORGANIZATION / EMPLOYER / BUSINESS
375
+ // ═══════════════════════════════════════════
376
+ {
377
+ keys: /^(?:company|company_?name|organization|organisation|org|org_?name|employer|employer_?name|business|business_?name|corporation|corp|corp_?name|subsidiary|parent_?company|holding|group_?name|entity|entity_?name|legal_?entity|legal_?name|registered_?name|trading_?name|brand|brand_?name|vendor|vendor_?name|supplier|supplier_?name|partner|partner_?name|client|client_?name|agency|agency_?name|institute|institution|school|school_?name|university|university_?name|college|hospital|hospital_?name|practice|practice_?name|firm|firm_?name|llc|gmbh|ag|inc|ltd|plc|sa|srl|bv|nv|firma|unternehmen|unternehmensname|arbeitgeber|betrieb|handelsname|societe|raison_?sociale|denominazione|ragione_?sociale|empresa|nombre_?empresa|bedrijf|bedrijfsnaam|werkgever|foretag|arbetsgivare|virksomhed|arbejdsgiver|firma_?2|spolka|pracodawca|kompaniya|rabotodatel|kaisha|kigyou|gongsi|guyong_?danwei|sirket|isveren)$/i,
378
+ label: 'COMPANY',
379
+ category: 'identity',
380
+ },
381
+ // ═══════════════════════════════════════════
382
+ // EDUCATION / ACADEMIC
383
+ // ═══════════════════════════════════════════
384
+ {
385
+ keys: /^(?:student_?id|student_?number|student_?name|enrollment_?id|enrollment_?number|matriculation|matriculation_?number|matrikel|matrikelnummer|gpa|grade_?point|test_?score|sat_?score|act_?score|gre_?score|gmat_?score|degree|diploma|qualification|major|minor|field_?of_?study|program|faculty|school_?name|university|college|campus|class_?of|graduation|graduation_?date|grad_?year|transcript|academic_?record|education_?level|highest_?education|studiengang|abschluss|zeugnis|schulname|hochschule|diplome|scolarite|titolo_?di_?studio|expediente)$/i,
386
+ label: 'EDUCATION',
387
+ category: 'identity',
388
+ },
389
+ // ═══════════════════════════════════════════
390
+ // VEHICLE / TRANSPORT
391
+ // ═══════════════════════════════════════════
392
+ {
393
+ keys: /^(?:vin|vehicle_?identification|vehicle_?id|vehicle_?number|chassis_?number|license_?plate|plate_?number|plate_?no|registration|registration_?number|reg_?number|reg_?no|tag_?number|kennzeichen|fahrzeug_?id|fahrgestellnummer|immatriculation|plaque|targa|matricula|kenteken|registreringsnummer|numer_?rejestracyjny)$/i,
394
+ label: 'VEHICLE',
395
+ category: 'vehicle',
396
+ },
397
+ // ═══════════════════════════════════════════
398
+ // CREDENTIALS / AUTH / SECRETS / TOKENS
399
+ // ═══════════════════════════════════════════
400
+ {
401
+ keys: /^(?:password|passwd|pwd|pass_?word|secret|secret_?key|api_?key|api_?secret|api_?token|access_?token|refresh_?token|auth_?token|bearer_?token|session_?token|session_?key|private_?key|encryption_?key|signing_?key|master_?key|client_?secret|consumer_?key|consumer_?secret|oauth_?token|credentials|passwort|kennwort|schluessel|mot_?de_?passe|contrasena|clave|senha|wachtwoord|sleutel|losenord|haslo|parol|mima|sifre)$/i,
402
+ label: 'CREDENTIAL',
403
+ category: 'enterprise',
404
+ },
405
+ // ═══════════════════════════════════════════
406
+ // RACE / ETHNICITY / NATIONALITY / CITIZENSHIP
407
+ // ═══════════════════════════════════════════
408
+ {
409
+ keys: /^(?:race|ethnicity|ethnic|ethnic_?origin|ethnic_?group|nationality|citizenship|national_?origin|heritage|ancestry|background|rasse|herkunft|ethnische_?herkunft|nationalitaet|staatsangehoerigkeit|ethnie|origine|nationalite|citoyennete|raza|etnia|nacionalidad|ciudadania|razza|nazionalita|cittadinanza|nationaliteit|etnicitet|medborgarskap|narodowsc|rasa|natsionalnost|grazhdanstvo|kokuseki|minzu|guoji|milliyet|vatandaslik|uyruk)$/i,
410
+ label: 'ETHNICITY',
411
+ category: 'identity',
412
+ },
413
+ // ═══════════════════════════════════════════
414
+ // RELIGION / BELIEF / POLITICAL
415
+ // ═══════════════════════════════════════════
416
+ {
417
+ keys: /^(?:religion|religious|religious_?affiliation|religious_?belief|faith|creed|denomination|confession|church|mosque|synagogue|temple|spiritual|belief|philosophical_?belief|political_?(?:party|affiliation|opinion|preference|view)|party|party_?membership|union|union_?membership|trade_?union|konfession|religionszugehoerigkeit|glaubensbekenntnis|kirchenzugehoerigkeit|politische_?partei|gewerkschaft|religion_?2|culte|parti_?politique|syndicat|partido_?politico|sindicato|religie|politieke_?partij|vakbond|religion_?3|tro|parti|fagforening|religia|partia|zwiazek_?zawodowy|religiya|partiya|profsoyuz|shukyo|seitou|zongjiao|dangpai|din|mezhep|parti_?2|sendika)$/i,
418
+ label: 'BELIEF_POLITICAL',
419
+ category: 'identity',
420
+ },
421
+ // ═══════════════════════════════════════════
422
+ // GENDER / SEX / MARITAL STATUS / FAMILY
423
+ // ═══════════════════════════════════════════
424
+ {
425
+ keys: /^(?:gender|sex|gender_?identity|sexual_?orientation|sexuality|preferred_?pronouns|pronouns|marital_?status|civil_?status|relationship_?status|marriage|married|single|divorced|widowed|separated|domestic_?partner|spouse|partner|family_?status|number_?of_?(?:children|kids|dependents)|dependents|geschlecht|familienstand|sexuelle_?orientierung|sexe|etat_?civil|orientation_?sexuelle|genero|estado_?civil|orientacion_?sexual|sesso|stato_?civile|geslacht|burgerlijke_?staat|kon|civilstand|plec|stan_?cywilny|pol|semeynoe_?polozhenie|seibetsu|xingbie|hunyin|cinsiyet|medeni_?hal)$/i,
426
+ label: 'PERSONAL_STATUS',
427
+ category: 'identity',
428
+ },
429
+ // ═══════════════════════════════════════════
430
+ // LEGAL / JUDICIAL / CRIMINAL
431
+ // ═══════════════════════════════════════════
432
+ {
433
+ keys: /^(?:case_?number|case_?id|case_?ref|docket|docket_?number|court|court_?name|judge|attorney|lawyer|legal_?representative|legal_?counsel|criminal_?record|conviction|offense|offence|charge|charges|sentence|verdict|probation|parole|arrest|arrest_?date|booking|booking_?number|inmate|prisoner|offender|inmate_?(?:id|number)|mugshot|fingerprint|warrant|bail|bond|aktenzeichen|gerichtsaktenzeichen|strafregister|vorstrafe|verurteilung|anwalt|rechtsanwalt|richter|numero_?affaire|casier_?judiciaire|condamnation|avocat|juge|numero_?expediente|antecedentes_?penales|condena|abogado|juez)$/i,
434
+ label: 'LEGAL',
435
+ category: 'identity',
436
+ },
437
+ // ═══════════════════════════════════════════
438
+ // BIOMETRIC / PHYSICAL CHARACTERISTICS
439
+ // ═══════════════════════════════════════════
440
+ {
441
+ keys: /^(?:biometric|fingerprint|face_?id|facial|facial_?recognition|iris|retina|voiceprint|voice_?id|palm_?print|hand_?geometry|dna|genetic|genome|genotype|karyotype|blood_?type|blood_?group|eye_?color|hair_?color|skin_?color|height|weight|bmi|body_?mass|distinguishing_?marks|tattoo|scar|birthmark|photo|photograph|picture|image|avatar|headshot|portrait|mugshot|selfie|foto|bild|lichtbild|passfoto|passbild)$/i,
442
+ label: 'BIOMETRIC',
443
+ category: 'biometric',
444
+ },
445
+ // ═══════════════════════════════════════════
446
+ // UTILITY / SUBSCRIPTION / LOYALTY / MEMBERSHIP
447
+ // ═══════════════════════════════════════════
448
+ {
449
+ keys: /^(?:utility_?(?:account|number|id)|electricity_?(?:account|number)|gas_?(?:account|number)|water_?(?:account|number)|meter_?(?:number|id)|customer_?(?:number|id|no)|account_?(?:id|ref)|reference_?(?:number|id|no|code)|subscription_?(?:id|number)|membership_?(?:id|number|no)|member_?(?:number|no)|loyalty_?(?:id|number|card)|rewards?_?(?:id|number|card)|frequent_?flyer|frequent_?flyer_?(?:number|id)|mileage_?(?:number|id)|points_?(?:id|number)|library_?(?:card|id|number)|kundennummer|mitgliedsnummer|abonnement|vertragsnummer)$/i,
450
+ label: 'ACCOUNT_REF',
451
+ category: 'financial',
452
+ },
453
+ // ═══════════════════════════════════════════
454
+ // IP / HOSTNAME / SERVER / NETWORK
455
+ // ═══════════════════════════════════════════
456
+ {
457
+ keys: /^(?:ip|ip_?address|ipv4|ipv6|server|server_?name|server_?address|hostname|host|host_?name|domain|domain_?name|mac|mac_?address|subnet|gateway|proxy|vpn|dns|url|uri|endpoint|api_?url|webhook_?url|callback_?url|redirect_?url|origin|referrer|referer|user_?agent|device_?id|device_?name|machine_?name|computer_?name|workstation)$/i,
458
+ label: 'NETWORK',
459
+ category: 'network',
460
+ },
461
+ // ═══════════════════════════════════════════
462
+ // CONTRACTS / INVOICES / LEGAL DOCUMENTS
463
+ // ═══════════════════════════════════════════
464
+ {
465
+ keys: /^(?:contract_?(?:number|no|id|ref)|invoice_?(?:number|no|id|ref)|order_?(?:number|no|id|ref)|purchase_?order|po_?(?:number|no)|quote_?(?:number|no|id)|proposal_?(?:number|no|id)|agreement_?(?:number|no|id)|reference_?(?:number|no)|receipt_?(?:number|no|id)|transaction_?(?:id|ref|number)|payment_?(?:id|ref|reference)|billing_?(?:id|ref|number)|Rechnungsnummer|Vertragsnummer|Auftragsnummer|Bestellnummer|Angebotsnummer|numero_?(?:facture|contrat|commande)|numero_?(?:fattura|contratto|ordine))$/i,
466
+ label: 'DOCUMENT_REF',
467
+ category: 'financial',
468
+ },
469
+ // ═══════════════════════════════════════════
470
+ // FINANCIAL AMOUNTS / REVENUE / SENSITIVE NUMBERS
471
+ // ═══════════════════════════════════════════
472
+ {
473
+ keys: /^(?:amount|total|subtotal|grand_?total|tax_?amount|vat_?amount|discount|balance|outstanding|due|paid|refund|credit|debit|revenue|profit|loss|margin|cost|price|unit_?price|net_?amount|gross_?amount|fee|charge|interest|penalty|fine|deposit|withdrawal|transfer_?amount|Betrag|Gesamtbetrag|Steuerbetrag|Rabatt|Saldo|montant|somme|total_?ttc|total_?ht|importe|monto|importo)$/i,
474
+ label: 'FINANCIAL_AMOUNT',
475
+ category: 'financial',
476
+ },
477
+ // ═══════════════════════════════════════════
478
+ // SIGNATURE / AUTHORIZATION / CONSENT
479
+ // ═══════════════════════════════════════════
480
+ {
481
+ keys: /^(?:signature|signed_?by|authorized_?by|approved_?by|witnessed_?by|notarized_?by|certified_?by|consent|consent_?date|consent_?given|power_?of_?attorney|proxy|delegate|Unterschrift|unterzeichnet_?von|genehmigt_?von|Vollmacht)$/i,
482
+ label: 'SIGNATURE',
483
+ category: 'identity',
484
+ },
485
+ // ═══════════════════════════════════════════
486
+ // BANK / PAYMENT DETAILS IN DOCUMENTS
487
+ // ═══════════════════════════════════════════
488
+ {
489
+ keys: /^(?:bank_?name|bank|branch|branch_?name|beneficiary|beneficiary_?name|beneficiary_?account|payer|payer_?name|payee|payee_?name|remitter|sender_?(?:name|account|bank)|receiver_?(?:name|account|bank)|correspondent_?bank|intermediary_?bank|Bankname|Begünstigter|Zahlungsempfänger|Auftraggeber)$/i,
490
+ label: 'BANK_DETAIL',
491
+ category: 'financial',
492
+ },
493
+ // ═══════════════════════════════════════════
494
+ // TAX / FISCAL DETAILS
495
+ // ═══════════════════════════════════════════
496
+ {
497
+ keys: /^(?:tax_?(?:rate|class|bracket|status|year|period|return|filing)|fiscal_?(?:year|period|code)|withholding|exemption|deductible|taxable_?(?:income|amount)|gross_?income|net_?income|adjusted_?gross|agi|Steuerklasse|Steuerjahr|Steuererklärung|Freibetrag)$/i,
498
+ label: 'TAX_DETAIL',
499
+ category: 'financial',
500
+ },
501
+ // ═══════════════════════════════════════════
502
+ // LANGUAGE / CULTURAL IDENTITY (GDPR Art.9)
503
+ // ═══════════════════════════════════════════
504
+ {
505
+ keys: /^(?:language|mother_?tongue|native_?language|spoken_?language|preferred_?language|first_?language|primary_?language|sprache|muttersprache|langue|langue_?maternelle|idioma|lengua|lingua|taal|moedertaal|sprak|jezyk|yazyk|gengo|yuyan|dil|ana_?dil)$/i,
506
+ label: 'LANGUAGE',
507
+ category: 'identity',
508
+ },
509
+ // ═══════════════════════════════════════════
510
+ // NOTES / FREE TEXT (run regex, don't redact whole value)
511
+ // ═══════════════════════════════════════════
512
+ {
513
+ keys: /^(?:notes|note|comment|comments|description|desc|message|msg|body|text|content|remarks|remark|internal_?notes|agent_?notes|case_?notes|memo|memorandum|narrative|summary|abstract|details|detail|reason|explanation|history|log|changelog|audit|feedback|review|observation|assessment|evaluation|recommendation|instruction|instructions|notizen|bemerkung|kommentar|beschreibung|nachricht|anmerkung|vermerk|remarques|commentaire|observation_?2|nota|commento|descrizione|notas|comentario|descripcion|opmerkingen|beschrijving|anteckningar|kommentar_?2|uwagi|komentarz|opis|zametki|kommentariy|opisanie|bikou|beikou|beizhu|shuoming|not|aciklama|yorum)$/i,
514
+ label: '',
515
+ category: 'identity',
516
+ },
517
+ ];
518
+ /**
519
+ * Ambiguous field names that MIGHT be personal data depending on context.
520
+ * Only redacted when sibling fields suggest this is a person/PII record.
521
+ */
522
+ const AMBIGUOUS_FIELDS = /^(?:id|code|number|no|num|nr|ref|reference|key|identifier|value|data|info|source|origin|created_?by|updated_?by|modified_?by|assigned_?to|owned_?by|submitted_?by|requested_?by|approved_?by|reviewed_?by|reported_?by|signed_?by|verified_?by|processed_?by|handled_?by|managed_?by|contact|details|record|entry|item|subject|party|counterparty|client_?id|customer_?id|user_?id|member_?id|patient_?id|case_?id|ticket_?id|account_?id|subscriber_?id)$/i;
523
+ /**
524
+ * Fields that CONFIRM the parent object is about a person/entity with PII.
525
+ * If any sibling field matches this, ambiguous fields get redacted too.
526
+ */
527
+ const PII_INDICATOR_FIELDS = /^(?:name|first_?name|last_?name|full_?name|email|phone|mobile|ssn|iban|address|dob|birth|birthday|passport|salary|compensation|national_?id|tax_?id|nino|bsn|pesel|cpf|aadhaar|vorname|nachname|nom|prenom|nombre|apellido|nome|cognome|social_?security|employee_?id|patient_?name|customer_?name|contact_?name)$/i;
528
+ /**
529
+ * Check if a field key matches a semantic field and should be fully redacted.
530
+ * Returns the label if the entire value should be redacted, empty string if
531
+ * regex scanning should be used, or null if no semantic match.
532
+ */
533
+ function getSemanticLabel(fieldKey, siblingKeys) {
534
+ // Extract the last key segment (e.g., "user.profile.name" -> "name")
535
+ const lastKey = fieldKey.includes('.') ? fieldKey.split('.').pop() : fieldKey;
536
+ // Also try the key without array indices (e.g., "contacts[0].name" -> "name")
537
+ const cleanKey = lastKey.replace(/\[\d+\]/g, '');
538
+ // First check explicit semantic mappings
539
+ for (const mapping of SEMANTIC_FIELD_MAP) {
540
+ if (mapping.keys.test(cleanKey)) {
541
+ return { label: mapping.label, category: mapping.category };
542
+ }
543
+ }
544
+ // Context-aware: if field is ambiguous (like bare "id") AND sibling fields
545
+ // indicate this is a person/PII record, redact the ambiguous field too.
546
+ if (siblingKeys && AMBIGUOUS_FIELDS.test(cleanKey)) {
547
+ const hasPiiSiblings = siblingKeys.some((k) => PII_INDICATOR_FIELDS.test(k));
548
+ if (hasPiiSiblings) {
549
+ return { label: 'IDENTIFIER', category: 'identity' };
550
+ }
551
+ }
552
+ return null;
553
+ }
554
+ /**
555
+ * Main redaction engine - recursively walks JSON and applies redaction.
556
+ * Uses BOTH semantic field-name detection AND regex pattern matching.
557
+ */
558
+ function redactValue(value, ctx, vault, sessionId, hits, itemIndex, currentPath = '', siblingKeys) {
559
+ if (typeof value === 'string') {
560
+ if (!shouldProcessField(currentPath, ctx.fieldMode, ctx.fieldRules)) {
561
+ return value;
562
+ }
563
+ // Semantic field-name detection: if the field name tells us what this is,
564
+ // redact the ENTIRE value without needing regex to match.
565
+ // Skipped in verify mode (we only care about actual PII values, not field names).
566
+ if (!ctx.skipSemantic) {
567
+ const semantic = getSemanticLabel(currentPath, siblingKeys);
568
+ if (semantic && semantic.label && value.trim().length > 0) {
569
+ // Allow list check
570
+ if (isAllowListed(value, ctx.allowList)) {
571
+ // Skip to regex detection
572
+ }
573
+ else {
574
+ const semConf = semantic.label === 'IDENTIFIER' ? context_1.AMBIGUOUS_FIELD_CONFIDENCE : context_1.SEMANTIC_CONFIDENCE;
575
+ if (semConf >= (ctx.confidenceThreshold ?? 0)) {
576
+ const replacement = applyRedaction(value, { label: semantic.label, category: semantic.category }, ctx.mode, vault, sessionId, ctx.dedup);
577
+ hits.push({
578
+ token: replacement, original: '***',
579
+ patternName: `semantic_${semantic.label}`,
580
+ patternLabel: semantic.label,
581
+ category: semantic.category,
582
+ field: currentPath, itemIndex,
583
+ confidence: semConf,
584
+ });
585
+ return replacement;
586
+ }
587
+ }
588
+ }
589
+ } // end skipSemantic check
590
+ // Fall through to regex-based detection
591
+ return redactText(value, ctx, vault, sessionId, hits, itemIndex, currentPath);
592
+ }
593
+ if (typeof value === 'number' && !ctx.skipSemantic) {
594
+ const semantic = getSemanticLabel(currentPath, siblingKeys);
595
+ if (semantic && semantic.label) {
596
+ const semConf = semantic.label === 'IDENTIFIER' ? context_1.AMBIGUOUS_FIELD_CONFIDENCE : context_1.SEMANTIC_CONFIDENCE;
597
+ if (semConf >= (ctx.confidenceThreshold ?? 0)) {
598
+ const strValue = String(value);
599
+ const replacement = applyRedaction(strValue, { label: semantic.label, category: semantic.category }, ctx.mode, vault, sessionId, ctx.dedup);
600
+ hits.push({
601
+ token: replacement, original: '***',
602
+ patternName: `semantic_${semantic.label}`,
603
+ patternLabel: semantic.label,
604
+ category: semantic.category,
605
+ field: currentPath, itemIndex,
606
+ confidence: semConf,
607
+ });
608
+ return replacement;
609
+ }
610
+ }
611
+ }
612
+ if (Array.isArray(value)) {
613
+ return value.map((v, i) => redactValue(v, ctx, vault, sessionId, hits, itemIndex, `${currentPath}[${i}]`));
614
+ }
615
+ if (value !== null && typeof value === 'object') {
616
+ const entries = Object.entries(value);
617
+ // Collect sibling keys for context-aware detection
618
+ const siblingKeys = entries.map(([k]) => k);
619
+ const obj = {};
620
+ for (const [k, v] of entries) {
621
+ const fieldPath = currentPath ? `${currentPath}.${k}` : k;
622
+ obj[k] = redactValue(v, ctx, vault, sessionId, hits, itemIndex, fieldPath, siblingKeys);
623
+ }
624
+ return obj;
625
+ }
626
+ return value;
627
+ }
628
+ /**
629
+ * Redact PII in a text string.
630
+ */
631
+ function redactText(text, ctx, vault, sessionId, hits, itemIndex, fieldPath) {
632
+ let result = text;
633
+ const threshold = ctx.confidenceThreshold ?? 0;
634
+ // ─── DENY LIST: always redact these values first ───
635
+ if (ctx.denyList && ctx.denyList.length > 0) {
636
+ for (const entry of ctx.denyList) {
637
+ try {
638
+ let re;
639
+ switch (entry.type) {
640
+ case 'regex':
641
+ if (!isSafeRegex(entry.value))
642
+ continue;
643
+ re = new RegExp(entry.value, 'gi');
644
+ break;
645
+ case 'contains':
646
+ re = new RegExp(entry.value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi');
647
+ break;
648
+ default: // exact — use escaped string without \b to match #SECRET, @values etc.
649
+ re = new RegExp(entry.value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi');
650
+ }
651
+ result = result.replace(re, (match) => {
652
+ if (TOKEN_RE.test(match))
653
+ return match;
654
+ const replacement = applyRedaction(match, { label: 'DENY_LIST', category: 'other' }, ctx.mode, vault, sessionId, ctx.dedup);
655
+ hits.push({
656
+ token: replacement, original: '***',
657
+ patternName: 'denyList', patternLabel: 'DENY_LIST',
658
+ category: 'other', field: fieldPath, itemIndex,
659
+ confidence: context_1.DENY_LIST_CONFIDENCE,
660
+ });
661
+ return replacement;
662
+ });
663
+ }
664
+ catch { /* skip invalid */ }
665
+ }
666
+ }
667
+ // ─── BUILT-IN PATTERNS with confidence scoring ───
668
+ const activePatterns = (0, patterns_1.getPatternsByNames)(ctx.enabledPatterns);
669
+ for (const pattern of activePatterns) {
670
+ const re = new RegExp(pattern.regex.source, pattern.regex.flags);
671
+ result = result.replace(re, (match, ...args) => {
672
+ // Skip already-redacted tokens
673
+ if (TOKEN_RE.test(match) || (match.startsWith('[') && match.endsWith(']')))
674
+ return match;
675
+ // Run validator if present
676
+ const validatorPassed = pattern.validate ? pattern.validate(match) : true;
677
+ if (pattern.validate && !validatorPassed)
678
+ return match;
679
+ // Allow list check: skip if value is allow-listed
680
+ if (isAllowListed(match, ctx.allowList))
681
+ return match;
682
+ // Calculate confidence score
683
+ const matchOffset = typeof args[args.length - 2] === 'number' ? args[args.length - 2] : 0;
684
+ const confidence = (0, context_1.calculateConfidence)(pattern.name, !!pattern.validate, validatorPassed, text, matchOffset, matchOffset + match.length);
685
+ // Skip if below confidence threshold
686
+ if (confidence < threshold)
687
+ return match;
688
+ const replacement = applyRedaction(match, pattern, ctx.mode, vault, sessionId, ctx.dedup);
689
+ hits.push({
690
+ token: replacement, original: '***',
691
+ patternName: pattern.name, patternLabel: pattern.label,
692
+ category: pattern.category, field: fieldPath, itemIndex,
693
+ confidence,
694
+ });
695
+ return replacement;
696
+ });
697
+ }
698
+ // ─── NAME DICTIONARY detection (free-text names) ───
699
+ if (!ctx.skipSemantic) {
700
+ const nameMatches = (0, names_1.detectNamesInText)(result);
701
+ for (const nm of nameMatches) {
702
+ // Skip if already redacted (contains token brackets)
703
+ if (nm.name.includes('[') && nm.name.includes(']'))
704
+ continue;
705
+ // Allow list check
706
+ if (isAllowListed(nm.name, ctx.allowList))
707
+ continue;
708
+ // Calculate confidence with context words
709
+ let nameConfidence = 0.65;
710
+ if ((0, context_2.hasContextWords)(text, nm.start, nm.end, names_1.NAME_CONTEXT_WORDS)) {
711
+ nameConfidence = 0.80;
712
+ }
713
+ if (nameConfidence < threshold)
714
+ continue;
715
+ const replacement = applyRedaction(nm.name, { label: 'PERSON_NAME', category: 'identity' }, ctx.mode, vault, sessionId, ctx.dedup);
716
+ // Replace in result
717
+ result = result.split(nm.name).join(replacement);
718
+ hits.push({
719
+ token: replacement, original: '***',
720
+ patternName: 'personNameDict', patternLabel: 'PERSON_NAME',
721
+ category: 'identity', field: fieldPath, itemIndex,
722
+ confidence: nameConfidence,
723
+ });
724
+ }
725
+ }
726
+ // ─── CUSTOM PATTERNS with safety validation ───
727
+ for (const cp of ctx.customPatterns) {
728
+ if (!cp.label || !cp.regex)
729
+ continue;
730
+ if (!isSafeRegex(cp.regex))
731
+ continue;
732
+ try {
733
+ const re = new RegExp(cp.regex, 'g');
734
+ result = result.replace(re, (match) => {
735
+ if (TOKEN_RE.test(match))
736
+ return match;
737
+ if (isAllowListed(match, ctx.allowList))
738
+ return match;
739
+ if (context_1.CUSTOM_PATTERN_CONFIDENCE < threshold)
740
+ return match;
741
+ const pseudoPattern = {
742
+ label: cp.label.toUpperCase(),
743
+ category: cp.category || 'identity',
744
+ };
745
+ const replacement = applyRedaction(match, pseudoPattern, ctx.mode, vault, sessionId, ctx.dedup);
746
+ hits.push({
747
+ token: replacement, original: '***',
748
+ patternName: `custom_${cp.label}`, patternLabel: pseudoPattern.label,
749
+ category: pseudoPattern.category, field: fieldPath, itemIndex,
750
+ confidence: context_1.CUSTOM_PATTERN_CONFIDENCE,
751
+ });
752
+ return replacement;
753
+ });
754
+ }
755
+ catch {
756
+ // Skip invalid regex
757
+ }
758
+ }
759
+ return result;
760
+ }
761
+ /**
762
+ * Restore redacted tokens back to original values.
763
+ * Uses single-pass replacement to avoid infinite loops.
764
+ */
765
+ function restoreValue(value, vault, sessionId) {
766
+ const session = vault.getSession(sessionId);
767
+ if (!session)
768
+ return value;
769
+ if (typeof value === 'string') {
770
+ let result = value;
771
+ // Single-pass: replace each token exactly once per occurrence.
772
+ // Use split/join which is safe against infinite loops.
773
+ for (const [token, e] of Object.entries(session.entries)) {
774
+ result = result.split(token).join(e.original);
775
+ }
776
+ return result;
777
+ }
778
+ if (Array.isArray(value)) {
779
+ return value.map((v) => restoreValue(v, vault, sessionId));
780
+ }
781
+ if (value !== null && typeof value === 'object') {
782
+ const obj = {};
783
+ for (const [k, v] of Object.entries(value)) {
784
+ obj[k] = restoreValue(v, vault, sessionId);
785
+ }
786
+ return obj;
787
+ }
788
+ return value;
789
+ }
790
+ /**
791
+ * Run Presidio NLP analysis on all string values in an item,
792
+ * then redact the detected entities. Called AFTER regex-based redaction.
793
+ * This catches entities that regex missed (especially PERSON names in free text).
794
+ */
795
+ async function enhanceWithPresidio(data, ctx, vault, sessionId, hits, itemIndex) {
796
+ if (!ctx.presidioUrl)
797
+ return data;
798
+ const result = { ...data };
799
+ for (const [key, value] of Object.entries(result)) {
800
+ // Recurse into nested objects
801
+ if (value !== null && typeof value === 'object' && !Array.isArray(value)) {
802
+ result[key] = await enhanceWithPresidio(value, ctx, vault, sessionId, hits, itemIndex);
803
+ continue;
804
+ }
805
+ // Recurse into arrays
806
+ if (Array.isArray(value)) {
807
+ const arr = [];
808
+ for (const item of value) {
809
+ if (item !== null && typeof item === 'object') {
810
+ arr.push(await enhanceWithPresidio(item, ctx, vault, sessionId, hits, itemIndex));
811
+ }
812
+ else {
813
+ arr.push(item);
814
+ }
815
+ }
816
+ result[key] = arr;
817
+ continue;
818
+ }
819
+ if (typeof value !== 'string')
820
+ continue;
821
+ if (value.trim().length === 0)
822
+ continue;
823
+ // Skip values that are already fully redacted (single token)
824
+ if (TOKEN_RE.test(value))
825
+ continue;
826
+ try {
827
+ const entities = await (0, presidio_1.analyzeWithPresidio)(ctx.presidioUrl, value, ctx.presidioLanguage || 'en', 0.4, 10000);
828
+ if (entities.length === 0)
829
+ continue;
830
+ // Sort entities by start position descending (replace from end to preserve positions)
831
+ const sorted = entities.sort((a, b) => b.start - a.start);
832
+ let text = value;
833
+ for (const entity of sorted) {
834
+ const matchedText = text.slice(entity.start, entity.end);
835
+ // Skip if already redacted by regex
836
+ if (TOKEN_RE.test(matchedText) || (matchedText.startsWith('[') && matchedText.endsWith(']')))
837
+ continue;
838
+ // Skip if allow-listed
839
+ if (isAllowListed(matchedText, ctx.allowList))
840
+ continue;
841
+ // Skip if below confidence threshold
842
+ if (entity.score < (ctx.confidenceThreshold ?? 0))
843
+ continue;
844
+ const label = (0, presidio_1.getRedactorLabel)(entity.entity_type);
845
+ const category = (0, presidio_1.getRedactorCategory)(entity.entity_type);
846
+ const replacement = applyRedaction(matchedText, { label, category }, ctx.mode, vault, sessionId, ctx.dedup);
847
+ // Replace in text
848
+ text = text.slice(0, entity.start) + replacement + text.slice(entity.end);
849
+ hits.push({
850
+ token: replacement,
851
+ original: '***',
852
+ patternName: `presidio_${entity.entity_type}`,
853
+ patternLabel: label,
854
+ category,
855
+ field: key,
856
+ itemIndex,
857
+ confidence: entity.score,
858
+ });
859
+ }
860
+ result[key] = text;
861
+ }
862
+ catch {
863
+ // Presidio call failed for this field — skip silently
864
+ continue;
865
+ }
866
+ }
867
+ return result;
868
+ }
869
+ /**
870
+ * Build a redaction audit report from collected hits.
871
+ * SECURITY: Original PII values are NEVER included in the report.
872
+ */
873
+ function buildReport(sessionId, hits) {
874
+ const hitsByCategory = {};
875
+ const hitsByPattern = {};
876
+ for (const hit of hits) {
877
+ hitsByCategory[hit.category] = (hitsByCategory[hit.category] || 0) + 1;
878
+ hitsByPattern[hit.patternLabel] = (hitsByPattern[hit.patternLabel] || 0) + 1;
879
+ }
880
+ return {
881
+ sessionId,
882
+ timestamp: new Date().toISOString(),
883
+ totalHits: hits.length,
884
+ hitsByCategory,
885
+ hitsByPattern,
886
+ hits,
887
+ };
888
+ }