@pharmatools/redacta 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/redact.js +32 -12
- package/package.json +6 -3
package/dist/redact.js
CHANGED
|
@@ -115,7 +115,10 @@ const UK_PLATE_RE = /\b[A-Z]{2}\d{2}\s?[A-Z]{3}\b/g;
|
|
|
115
115
|
// carrying a clinical title (Dr, Consultant, Nurse, ...), matching the Redacta
|
|
116
116
|
// skill's "don't redact the treating clinician" rule. Names buried in free
|
|
117
117
|
// prose are NOT caught; the UI tells users to review.
|
|
118
|
-
|
|
118
|
+
// A single name word: "Eileen", "O'Brien" (apostrophe directly after the
|
|
119
|
+
// initial capital), "Kowalski-Nowak", "O'Brien-Smith".
|
|
120
|
+
const NAME_WORD = String.raw `[A-Z](?:[a-z]+|['’][A-Z][a-z]+)(?:['’\-][A-Za-z]+)?`;
|
|
121
|
+
const NAME = String.raw `${NAME_WORD}(?:[ \t]+${NAME_WORD}){0,2}`;
|
|
119
122
|
// Case-sensitive, anchored version. Used to trim a loosely-captured name down
|
|
120
123
|
// to its leading run of properly capitalised words — necessary because the
|
|
121
124
|
// label/relative regexes carry the `i` flag (for the keyword), which would
|
|
@@ -214,17 +217,34 @@ const redactZip = (text, tok) => {
|
|
|
214
217
|
};
|
|
215
218
|
const redactIp = (text, tok) => text.replace(IP_RE, (m) => tok.tokenFor("IP_ADDRESS", m));
|
|
216
219
|
const redactPlate = (text, tok) => text.replace(UK_PLATE_RE, (m) => tok.tokenFor("VEHICLE_REG", m, m.replace(/\s/g, "").toUpperCase()));
|
|
217
|
-
const redactRelative = (text, tok) =>
|
|
218
|
-
//
|
|
219
|
-
//
|
|
220
|
-
//
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
220
|
+
const redactRelative = (text, tok) => {
|
|
221
|
+
// Manual exec loop rather than String.replace: when the strict-name trim
|
|
222
|
+
// fails (the `i` flag lets the loose capture open with lowercase words, e.g.
|
|
223
|
+
// "Next of kin: her daughter Anita" captured after "next of kin"), a replace
|
|
224
|
+
// callback would consume the whole region and swallow the inner
|
|
225
|
+
// "daughter Anita" match. Here we roll the scan back to just after the
|
|
226
|
+
// relation word instead, so nested relation phrases still match.
|
|
227
|
+
let out = "";
|
|
228
|
+
let last = 0;
|
|
229
|
+
RELATIVE_NAME_RE.lastIndex = 0;
|
|
230
|
+
let m;
|
|
231
|
+
while ((m = RELATIVE_NAME_RE.exec(text)) !== null) {
|
|
232
|
+
const [, rel, sep, name] = m;
|
|
233
|
+
// Trim to the leading capitalised run — this both rejects "daughter and
|
|
234
|
+
// two sons" and stops "Sarah is the" over-capturing.
|
|
235
|
+
const split = leadingName(name);
|
|
236
|
+
if (!split) {
|
|
237
|
+
RELATIVE_NAME_RE.lastIndex = m.index + rel.length;
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
out +=
|
|
241
|
+
text.slice(last, m.index) + rel + sep +
|
|
242
|
+
tok.tokenFor("RELATIVE_NAME", split.name, split.name.toLowerCase());
|
|
243
|
+
last = m.index + rel.length + sep.length + split.name.length;
|
|
244
|
+
RELATIVE_NAME_RE.lastIndex = last;
|
|
245
|
+
}
|
|
246
|
+
return out + text.slice(last);
|
|
247
|
+
};
|
|
228
248
|
const redactName = (text, tok) => {
|
|
229
249
|
const nameToken = (raw) => tok.tokenFor("PATIENT_NAME", raw.trim(), raw.trim().toLowerCase().replace(/\s+/g, " "));
|
|
230
250
|
// Courtesy-titled names first. Store the full match (title + name) as the
|
package/package.json
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pharmatools/redacta",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.1",
|
|
4
4
|
"description": "Pseudonymise patient identifiers and PII in text (and restore them) — a dependency-free TypeScript engine. Names, relatives, general PII, self-check, re-identification.",
|
|
5
5
|
"license": "MIT-0",
|
|
6
6
|
"author": "Nick Lamb (PharmaTools.AI)",
|
|
7
7
|
"homepage": "https://www.pharmatools.ai/redacta",
|
|
8
|
-
"repository": {
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "https://github.com/nickjlamb/redacta"
|
|
11
|
+
},
|
|
9
12
|
"type": "module",
|
|
10
13
|
"main": "dist/index.js",
|
|
11
14
|
"types": "dist/index.d.ts",
|
|
@@ -37,6 +40,6 @@
|
|
|
37
40
|
},
|
|
38
41
|
"devDependencies": {
|
|
39
42
|
"typescript": "^5.4.0",
|
|
40
|
-
"vitest": "^1.
|
|
43
|
+
"vitest": "^4.1.8"
|
|
41
44
|
}
|
|
42
45
|
}
|