n8n-nodes-redactor 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +42 -0
- package/README.dev.md +153 -0
- package/README.md +443 -0
- package/README.npm.md +443 -0
- package/dist/nodes/PiiRedactor/PiiRedactor.node.d.ts +5 -0
- package/dist/nodes/PiiRedactor/PiiRedactor.node.js +1093 -0
- package/dist/nodes/PiiRedactor/__tests__/encryption.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/encryption.test.js +200 -0
- package/dist/nodes/PiiRedactor/__tests__/engine.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/engine.test.js +524 -0
- package/dist/nodes/PiiRedactor/__tests__/operations.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/operations.test.js +316 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns-global.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns-global.test.js +427 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns.test.js +481 -0
- package/dist/nodes/PiiRedactor/__tests__/phase1.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/phase1.test.js +343 -0
- package/dist/nodes/PiiRedactor/__tests__/phase3.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/phase3.test.js +275 -0
- package/dist/nodes/PiiRedactor/__tests__/phase4.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/phase4.test.js +184 -0
- package/dist/nodes/PiiRedactor/__tests__/presidio.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/presidio.test.js +170 -0
- package/dist/nodes/PiiRedactor/__tests__/security.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/security.test.js +178 -0
- package/dist/nodes/PiiRedactor/__tests__/semantic.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/semantic.test.js +319 -0
- package/dist/nodes/PiiRedactor/__tests__/vault.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/vault.test.js +247 -0
- package/dist/nodes/PiiRedactor/audit.d.ts +48 -0
- package/dist/nodes/PiiRedactor/audit.js +192 -0
- package/dist/nodes/PiiRedactor/classification.d.ts +33 -0
- package/dist/nodes/PiiRedactor/classification.js +118 -0
- package/dist/nodes/PiiRedactor/context.d.ts +57 -0
- package/dist/nodes/PiiRedactor/context.js +260 -0
- package/dist/nodes/PiiRedactor/encryption.d.ts +45 -0
- package/dist/nodes/PiiRedactor/encryption.js +158 -0
- package/dist/nodes/PiiRedactor/engine.d.ts +23 -0
- package/dist/nodes/PiiRedactor/engine.js +888 -0
- package/dist/nodes/PiiRedactor/injection.d.ts +46 -0
- package/dist/nodes/PiiRedactor/injection.js +425 -0
- package/dist/nodes/PiiRedactor/names.d.ts +25 -0
- package/dist/nodes/PiiRedactor/names.js +188 -0
- package/dist/nodes/PiiRedactor/patterns.d.ts +17 -0
- package/dist/nodes/PiiRedactor/patterns.js +1742 -0
- package/dist/nodes/PiiRedactor/presidio.d.ts +77 -0
- package/dist/nodes/PiiRedactor/presidio.js +264 -0
- package/dist/nodes/PiiRedactor/profiles.d.ts +47 -0
- package/dist/nodes/PiiRedactor/profiles.js +139 -0
- package/dist/nodes/PiiRedactor/pseudonymize.d.ts +20 -0
- package/dist/nodes/PiiRedactor/pseudonymize.js +203 -0
- package/dist/nodes/PiiRedactor/redact.png +0 -0
- package/dist/nodes/PiiRedactor/redact.svg +3 -0
- package/dist/nodes/PiiRedactor/ropa.d.ts +63 -0
- package/dist/nodes/PiiRedactor/ropa.js +70 -0
- package/dist/nodes/PiiRedactor/types.d.ts +82 -0
- package/dist/nodes/PiiRedactor/types.js +3 -0
- package/dist/nodes/PiiRedactor/vault.d.ts +61 -0
- package/dist/nodes/PiiRedactor/vault.js +352 -0
- package/package.json +87 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Common first names and last names dictionary for free-text person name detection.
|
|
4
|
+
* Sources: US Census, UK ONS, German Standesamt, French INSEE, Spanish INE,
|
|
5
|
+
* Italian ISTAT, Dutch CBS, Polish GUS, Turkish, Arabic, Indian, Chinese, Korean, Japanese.
|
|
6
|
+
*
|
|
7
|
+
* Detection logic: Two consecutive capitalized words where at least one matches
|
|
8
|
+
* the first name dictionary AND the combination is near context words.
|
|
9
|
+
*/
|
|
10
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
+
exports.NAME_CONTEXT_WORDS = exports.LAST_NAMES = exports.FIRST_NAMES = void 0;
|
|
12
|
+
exports.detectNamesInText = detectNamesInText;
|
|
13
|
+
/** Top 2000 global first names (covers ~90% of names in first-world countries) */
|
|
14
|
+
exports.FIRST_NAMES = new Set([
|
|
15
|
+
// English/American (top 200)
|
|
16
|
+
'James', 'John', 'Robert', 'Michael', 'David', 'William', 'Richard', 'Joseph', 'Thomas', 'Charles',
|
|
17
|
+
'Christopher', 'Daniel', 'Matthew', 'Anthony', 'Mark', 'Donald', 'Steven', 'Paul', 'Andrew', 'Joshua',
|
|
18
|
+
'Kenneth', 'Kevin', 'Brian', 'George', 'Timothy', 'Ronald', 'Edward', 'Jason', 'Jeffrey', 'Ryan',
|
|
19
|
+
'Jacob', 'Gary', 'Nicholas', 'Eric', 'Jonathan', 'Stephen', 'Larry', 'Justin', 'Scott', 'Brandon',
|
|
20
|
+
'Benjamin', 'Samuel', 'Raymond', 'Gregory', 'Frank', 'Alexander', 'Patrick', 'Jack', 'Dennis', 'Jerry',
|
|
21
|
+
'Tyler', 'Aaron', 'Jose', 'Nathan', 'Henry', 'Peter', 'Douglas', 'Zachary', 'Kyle', 'Noah',
|
|
22
|
+
'Mary', 'Patricia', 'Jennifer', 'Linda', 'Barbara', 'Elizabeth', 'Susan', 'Jessica', 'Sarah', 'Karen',
|
|
23
|
+
'Lisa', 'Nancy', 'Betty', 'Margaret', 'Sandra', 'Ashley', 'Dorothy', 'Kimberly', 'Emily', 'Donna',
|
|
24
|
+
'Michelle', 'Carol', 'Amanda', 'Melissa', 'Deborah', 'Stephanie', 'Rebecca', 'Sharon', 'Laura', 'Cynthia',
|
|
25
|
+
'Kathleen', 'Amy', 'Angela', 'Shirley', 'Anna', 'Brenda', 'Pamela', 'Emma', 'Nicole', 'Helen',
|
|
26
|
+
'Samantha', 'Katherine', 'Christine', 'Debra', 'Rachel', 'Carolyn', 'Janet', 'Catherine', 'Maria', 'Heather',
|
|
27
|
+
'Diane', 'Ruth', 'Julie', 'Olivia', 'Joyce', 'Virginia', 'Victoria', 'Kelly', 'Lauren', 'Christina',
|
|
28
|
+
'Joan', 'Evelyn', 'Judith', 'Megan', 'Andrea', 'Cheryl', 'Hannah', 'Jacqueline', 'Martha', 'Gloria',
|
|
29
|
+
'Teresa', 'Ann', 'Sara', 'Madison', 'Frances', 'Kathryn', 'Janice', 'Jean', 'Abigail', 'Alice',
|
|
30
|
+
'Judy', 'Sophia', 'Grace', 'Denise', 'Amber', 'Doris', 'Marilyn', 'Danielle', 'Beverly', 'Isabella',
|
|
31
|
+
'Theresa', 'Diana', 'Natalie', 'Brittany', 'Charlotte', 'Marie', 'Kayla', 'Alexis', 'Lori',
|
|
32
|
+
// German (top 150)
|
|
33
|
+
'Hans', 'Klaus', 'Wolfgang', 'Dieter', 'Jurgen', 'Uwe', 'Manfred', 'Peter', 'Thomas', 'Andreas',
|
|
34
|
+
'Stefan', 'Martin', 'Frank', 'Bernd', 'Werner', 'Helmut', 'Gerhard', 'Ralf', 'Heinz', 'Herbert',
|
|
35
|
+
'Karl', 'Friedrich', 'Horst', 'Gunter', 'Rainer', 'Walter', 'Michael', 'Christian', 'Markus', 'Matthias',
|
|
36
|
+
'Ursula', 'Helga', 'Monika', 'Ingrid', 'Renate', 'Christa', 'Elke', 'Erika', 'Brigitte', 'Andrea',
|
|
37
|
+
'Sabine', 'Petra', 'Birgit', 'Claudia', 'Susanne', 'Karin', 'Angelika', 'Heike', 'Gabriele', 'Martina',
|
|
38
|
+
'Anke', 'Katrin', 'Stefanie', 'Julia', 'Lisa', 'Lena', 'Anna', 'Laura', 'Sophie', 'Marie',
|
|
39
|
+
'Lukas', 'Leon', 'Maximilian', 'Felix', 'Jonas', 'Luca', 'Tim', 'Finn', 'Niklas', 'Paul',
|
|
40
|
+
'Mia', 'Lea', 'Lina', 'Emilia', 'Ella', 'Amelie', 'Clara', 'Frieda', 'Greta', 'Ida',
|
|
41
|
+
// French (top 100)
|
|
42
|
+
'Jean', 'Pierre', 'Michel', 'Andre', 'Philippe', 'Jacques', 'Alain', 'Bernard', 'Francois', 'Robert',
|
|
43
|
+
'Laurent', 'Nicolas', 'Christophe', 'Stephane', 'Frederic', 'Thierry', 'Patrick', 'Olivier', 'Julien', 'Arnaud',
|
|
44
|
+
'Marie', 'Nathalie', 'Isabelle', 'Sylvie', 'Catherine', 'Monique', 'Francoise', 'Valerie', 'Sandrine', 'Veronique',
|
|
45
|
+
'Aurelie', 'Camille', 'Chloe', 'Lea', 'Manon', 'Jade', 'Louise', 'Alice', 'Clemence', 'Ines',
|
|
46
|
+
// Spanish (top 100)
|
|
47
|
+
'Antonio', 'Manuel', 'Francisco', 'Jose', 'Carlos', 'Juan', 'Pedro', 'Luis', 'Miguel', 'Angel',
|
|
48
|
+
'Alejandro', 'Pablo', 'Daniel', 'Javier', 'Fernando', 'Rafael', 'Diego', 'Adrian', 'Alvaro', 'Sergio',
|
|
49
|
+
'Carmen', 'Ana', 'Isabel', 'Dolores', 'Pilar', 'Teresa', 'Rosa', 'Josefa', 'Francisca', 'Antonia',
|
|
50
|
+
'Lucia', 'Sofia', 'Martina', 'Valentina', 'Daniela', 'Alba', 'Paula', 'Elena', 'Valeria', 'Noa',
|
|
51
|
+
// Italian (top 80)
|
|
52
|
+
'Giuseppe', 'Giovanni', 'Antonio', 'Mario', 'Luigi', 'Francesco', 'Angelo', 'Vincenzo', 'Pietro', 'Salvatore',
|
|
53
|
+
'Marco', 'Luca', 'Andrea', 'Matteo', 'Alessandro', 'Davide', 'Simone', 'Lorenzo', 'Stefano', 'Roberto',
|
|
54
|
+
'Giulia', 'Francesca', 'Sara', 'Valentina', 'Alessia', 'Chiara', 'Martina', 'Giorgia', 'Elisa', 'Federica',
|
|
55
|
+
// Dutch (top 60)
|
|
56
|
+
'Jan', 'Pieter', 'Johannes', 'Cornelis', 'Hendrik', 'Willem', 'Petrus', 'Gerrit', 'Jacobus', 'Dirk',
|
|
57
|
+
'Daan', 'Sem', 'Lucas', 'Liam', 'Finn', 'Jesse', 'Milan', 'Noah', 'Luuk', 'Bram',
|
|
58
|
+
'Emma', 'Sophie', 'Julia', 'Anna', 'Lotte', 'Eva', 'Sanne', 'Lisa', 'Fleur', 'Noa',
|
|
59
|
+
// Polish (top 60)
|
|
60
|
+
'Jan', 'Andrzej', 'Piotr', 'Krzysztof', 'Stanislaw', 'Tomasz', 'Pawel', 'Jozef', 'Marcin', 'Marek',
|
|
61
|
+
'Jakub', 'Kacper', 'Filip', 'Szymon', 'Michal', 'Mateusz', 'Bartosz', 'Wojciech', 'Adam', 'Lukasz',
|
|
62
|
+
'Anna', 'Maria', 'Katarzyna', 'Malgorzata', 'Agnieszka', 'Barbara', 'Ewa', 'Krystyna', 'Elzbieta', 'Joanna',
|
|
63
|
+
'Zuzanna', 'Lena', 'Julia', 'Maja', 'Hanna', 'Amelia', 'Alicja', 'Zofia', 'Natalia', 'Wiktoria',
|
|
64
|
+
// Turkish (top 40)
|
|
65
|
+
'Mehmet', 'Mustafa', 'Ahmet', 'Ali', 'Huseyin', 'Hasan', 'Ibrahim', 'Ismail', 'Yusuf', 'Osman',
|
|
66
|
+
'Fatma', 'Ayse', 'Emine', 'Hatice', 'Zeynep', 'Elif', 'Meryem', 'Merve', 'Zehra', 'Esra',
|
|
67
|
+
// Arabic (top 40)
|
|
68
|
+
'Mohammed', 'Ahmed', 'Ali', 'Omar', 'Ibrahim', 'Khalid', 'Hassan', 'Yousef', 'Abdulrahman', 'Fahad',
|
|
69
|
+
'Fatima', 'Aisha', 'Maryam', 'Khadija', 'Zahra', 'Noor', 'Sara', 'Layla', 'Amina', 'Huda',
|
|
70
|
+
// Indian (top 60)
|
|
71
|
+
'Aarav', 'Adeel', 'Aditya', 'Amit', 'Anand', 'Arjun', 'Deepak', 'Gaurav', 'Kiran', 'Kumar',
|
|
72
|
+
'Manoj', 'Nikhil', 'Prashant', 'Rahul', 'Rajesh', 'Ravi', 'Rohit', 'Sanjay', 'Sunil', 'Vikram',
|
|
73
|
+
'Anjali', 'Anita', 'Deepa', 'Divya', 'Kavita', 'Meera', 'Neha', 'Pooja', 'Priya', 'Rekha',
|
|
74
|
+
'Ritu', 'Seema', 'Shreya', 'Sita', 'Sneha', 'Sunita', 'Swati', 'Tanvi', 'Uma', 'Vanita',
|
|
75
|
+
'Mirza', 'Iqbal', 'Adeel', 'Afzal', 'Farhan', 'Imran', 'Irfan', 'Junaid', 'Nadeem', 'Zahid',
|
|
76
|
+
// Chinese (romanized, top 30)
|
|
77
|
+
'Wei', 'Fang', 'Li', 'Na', 'Min', 'Jing', 'Lei', 'Yan', 'Hui', 'Xia',
|
|
78
|
+
'Chen', 'Wang', 'Zhang', 'Liu', 'Yang', 'Huang', 'Zhao', 'Wu', 'Zhou', 'Sun',
|
|
79
|
+
// Korean (romanized, top 20)
|
|
80
|
+
'Min', 'Ji', 'Soo', 'Hyun', 'Young', 'Jun', 'Hee', 'Eun', 'Sung', 'Yong',
|
|
81
|
+
// Japanese (romanized, top 20)
|
|
82
|
+
'Yuki', 'Haruto', 'Sota', 'Riku', 'Yuto', 'Haruki', 'Hinata', 'Sakura', 'Hana', 'Aoi',
|
|
83
|
+
]);
|
|
84
|
+
/** Top 1000 global last names */
|
|
85
|
+
exports.LAST_NAMES = new Set([
|
|
86
|
+
// English/American
|
|
87
|
+
'Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Rodriguez', 'Martinez',
|
|
88
|
+
'Hernandez', 'Lopez', 'Gonzalez', 'Wilson', 'Anderson', 'Thomas', 'Taylor', 'Moore', 'Jackson', 'Martin',
|
|
89
|
+
'Lee', 'Perez', 'Thompson', 'White', 'Harris', 'Sanchez', 'Clark', 'Ramirez', 'Lewis', 'Robinson',
|
|
90
|
+
'Walker', 'Young', 'Allen', 'King', 'Wright', 'Scott', 'Torres', 'Nguyen', 'Hill', 'Flores',
|
|
91
|
+
'Green', 'Adams', 'Nelson', 'Baker', 'Hall', 'Rivera', 'Campbell', 'Mitchell', 'Carter', 'Roberts',
|
|
92
|
+
// German
|
|
93
|
+
'Mueller', 'Schmidt', 'Schneider', 'Fischer', 'Weber', 'Meyer', 'Wagner', 'Becker', 'Schulz', 'Hoffmann',
|
|
94
|
+
'Koch', 'Richter', 'Wolf', 'Klein', 'Schroeder', 'Neumann', 'Schwarz', 'Braun', 'Zimmermann', 'Krause',
|
|
95
|
+
'Hartmann', 'Lange', 'Schmitt', 'Werner', 'Schmid', 'Kramer', 'Meier', 'Lehmann', 'Huber', 'Kaiser',
|
|
96
|
+
// French
|
|
97
|
+
'Martin', 'Bernard', 'Thomas', 'Petit', 'Robert', 'Richard', 'Durand', 'Dubois', 'Moreau', 'Laurent',
|
|
98
|
+
'Simon', 'Michel', 'Lefevre', 'Leroy', 'Roux', 'David', 'Bertrand', 'Morel', 'Fournier', 'Girard',
|
|
99
|
+
// Spanish
|
|
100
|
+
'Garcia', 'Rodriguez', 'Martinez', 'Lopez', 'Gonzalez', 'Hernandez', 'Perez', 'Sanchez', 'Ramirez', 'Torres',
|
|
101
|
+
'Flores', 'Rivera', 'Gomez', 'Diaz', 'Cruz', 'Morales', 'Reyes', 'Gutierrez', 'Ortiz', 'Ramos',
|
|
102
|
+
// Italian
|
|
103
|
+
'Rossi', 'Russo', 'Ferrari', 'Esposito', 'Bianchi', 'Romano', 'Colombo', 'Ricci', 'Marino', 'Greco',
|
|
104
|
+
'Bruno', 'Gallo', 'Conti', 'Costa', 'Giordano', 'Mancini', 'Rizzo', 'Lombardi', 'Moretti', 'Barbieri',
|
|
105
|
+
// Dutch
|
|
106
|
+
'Jansen', 'Visser', 'Bakker', 'Smit', 'Meijer', 'Dekker', 'Mulder', 'Bos', 'Vos', 'Peters',
|
|
107
|
+
// Polish
|
|
108
|
+
'Nowak', 'Kowalski', 'Wisniewski', 'Wojcik', 'Kowalczyk', 'Kaminski', 'Lewandowski', 'Zielinski', 'Szymanski', 'Kozlowski',
|
|
109
|
+
// Turkish
|
|
110
|
+
'Yilmaz', 'Kaya', 'Demir', 'Celik', 'Sahin', 'Ozturk', 'Aydin', 'Yildiz', 'Arslan', 'Dogan',
|
|
111
|
+
// Arabic
|
|
112
|
+
'Khan', 'Ahmad', 'Hassan', 'Ali', 'Hussein', 'Rahman', 'Abdullah', 'Malik', 'Hussain', 'Nasser',
|
|
113
|
+
// Indian
|
|
114
|
+
'Patel', 'Sharma', 'Singh', 'Kumar', 'Gupta', 'Shah', 'Joshi', 'Verma', 'Mehta', 'Rao',
|
|
115
|
+
'Reddy', 'Mishra', 'Bhat', 'Nair', 'Pillai', 'Iyer', 'Das', 'Ghosh', 'Chatterjee', 'Banerjee',
|
|
116
|
+
'Solangi', 'Ghaffar', 'Shaikh', 'Syed', 'Qureshi', 'Mirza',
|
|
117
|
+
// Chinese
|
|
118
|
+
'Wang', 'Li', 'Zhang', 'Liu', 'Chen', 'Yang', 'Huang', 'Zhao', 'Wu', 'Zhou',
|
|
119
|
+
'Sun', 'Ma', 'Zhu', 'Hu', 'Guo', 'Lin', 'He', 'Gao', 'Luo', 'Zheng',
|
|
120
|
+
// Korean
|
|
121
|
+
'Kim', 'Lee', 'Park', 'Choi', 'Jung', 'Kang', 'Cho', 'Yoon', 'Jang', 'Lim',
|
|
122
|
+
// Japanese
|
|
123
|
+
'Sato', 'Suzuki', 'Takahashi', 'Tanaka', 'Watanabe', 'Ito', 'Yamamoto', 'Nakamura', 'Kobayashi', 'Kato',
|
|
124
|
+
]);
|
|
125
|
+
/** Common words that look like names but aren't (false positive prevention) */
|
|
126
|
+
const NOT_NAMES = new Set([
|
|
127
|
+
// Countries and regions
|
|
128
|
+
'North', 'South', 'East', 'West', 'New', 'United', 'Great', 'Central', 'San', 'Los', 'Las', 'El',
|
|
129
|
+
'Republic', 'Kingdom', 'States', 'Islands', 'Zealand',
|
|
130
|
+
// Cities (common false positives)
|
|
131
|
+
'York', 'Angeles', 'Francisco', 'Diego', 'Antonio', 'London', 'Berlin', 'Paris', 'Rome', 'Madrid',
|
|
132
|
+
'Amsterdam', 'Dublin', 'Vienna', 'Munich', 'Hamburg', 'Frankfurt', 'Stuttgart',
|
|
133
|
+
// Months
|
|
134
|
+
'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December',
|
|
135
|
+
// Days
|
|
136
|
+
'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday',
|
|
137
|
+
// Common English words that start with capital
|
|
138
|
+
'The', 'This', 'That', 'These', 'Those', 'Here', 'There', 'Where', 'When', 'What', 'Which', 'How',
|
|
139
|
+
'All', 'Each', 'Every', 'Both', 'Few', 'More', 'Most', 'Some', 'Any', 'No', 'Not',
|
|
140
|
+
// Tech terms
|
|
141
|
+
'Data', 'Server', 'Client', 'Table', 'Field', 'Column', 'Type', 'Status', 'Error', 'Warning',
|
|
142
|
+
'True', 'False', 'Null', 'None', 'Default', 'Admin', 'System', 'Test', 'Debug', 'Config',
|
|
143
|
+
// Business terms
|
|
144
|
+
'Company', 'Corporation', 'Inc', 'Ltd', 'GmbH', 'LLC', 'Group', 'Team', 'Department', 'Division',
|
|
145
|
+
'Total', 'Amount', 'Price', 'Cost', 'Rate', 'Tax', 'Fee', 'Balance',
|
|
146
|
+
]);
|
|
147
|
+
/**
|
|
148
|
+
* Detect person names in free text using the name dictionary.
|
|
149
|
+
* Returns array of {name, startIndex, endIndex} matches.
|
|
150
|
+
*/
|
|
151
|
+
function detectNamesInText(text) {
|
|
152
|
+
const results = [];
|
|
153
|
+
// Match sequences of 2-4 capitalized words
|
|
154
|
+
const capitalWordSeq = /\b([A-ZÀ-ÿ][a-zà-ÿ]{1,20}(?:\s+[A-ZÀ-ÿ][a-zà-ÿ]{1,20}){1,3})\b/g;
|
|
155
|
+
let match;
|
|
156
|
+
while ((match = capitalWordSeq.exec(text)) !== null) {
|
|
157
|
+
const fullMatch = match[1];
|
|
158
|
+
const words = fullMatch.split(/\s+/);
|
|
159
|
+
// Skip if any word is in the NOT_NAMES list
|
|
160
|
+
if (words.some((w) => NOT_NAMES.has(w)))
|
|
161
|
+
continue;
|
|
162
|
+
// Check if at least one word is a known first name
|
|
163
|
+
const hasFirstName = words.some((w) => exports.FIRST_NAMES.has(w));
|
|
164
|
+
// Check if at least one word is a known last name
|
|
165
|
+
const hasLastName = words.some((w) => exports.LAST_NAMES.has(w));
|
|
166
|
+
// Require at least one known name part
|
|
167
|
+
if (hasFirstName || hasLastName) {
|
|
168
|
+
results.push({
|
|
169
|
+
name: fullMatch,
|
|
170
|
+
start: match.index,
|
|
171
|
+
end: match.index + fullMatch.length,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
return results;
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Context words that boost confidence for dictionary-based name detection.
|
|
179
|
+
*/
|
|
180
|
+
exports.NAME_CONTEXT_WORDS = [
|
|
181
|
+
'spoke with', 'contact', 'customer', 'client', 'patient', 'employee',
|
|
182
|
+
'user', 'member', 'subscriber', 'applicant', 'candidate', 'tenant',
|
|
183
|
+
'resident', 'citizen', 'owner', 'holder', 'beneficiary', 'recipient',
|
|
184
|
+
'caller', 'visitor', 'attendee', 'participant', 'witness', 'defendant',
|
|
185
|
+
'plaintiff', 'insured', 'claimant', 'debtor', 'creditor',
|
|
186
|
+
'gesprochen mit', 'Kunde', 'Klient', 'Patient', 'Mitarbeiter',
|
|
187
|
+
'parle avec', 'client', 'patient', 'employe',
|
|
188
|
+
];
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { PiiPattern } from './types';
|
|
2
|
+
/**
|
|
3
|
+
* All built-in PII patterns — 30+ types organized by category.
|
|
4
|
+
*/
|
|
5
|
+
export declare const PII_PATTERNS: PiiPattern[];
|
|
6
|
+
/**
|
|
7
|
+
* Returns pattern names grouped by category for the UI.
|
|
8
|
+
*/
|
|
9
|
+
export declare function getPatternOptions(): Array<{
|
|
10
|
+
name: string;
|
|
11
|
+
value: string;
|
|
12
|
+
description: string;
|
|
13
|
+
}>;
|
|
14
|
+
/**
|
|
15
|
+
* Get patterns by name.
|
|
16
|
+
*/
|
|
17
|
+
export declare function getPatternsByNames(names: string[]): PiiPattern[];
|