openredaction 1.0.0 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -1
- package/dist/index.d.ts +112 -22
- package/dist/index.js +650 -223
- package/dist/index.mjs +592 -180
- package/package.json +12 -27
- package/dist/HealthCheck-A5OD4ATR.mjs +0 -12
- package/dist/HealthCheck-A5OD4ATR.mjs.map +0 -1
- package/dist/chunk-7OGNW2MU.mjs +0 -1701
- package/dist/chunk-7OGNW2MU.mjs.map +0 -1
- package/dist/chunk-MYYLGNXS.mjs +0 -149
- package/dist/chunk-MYYLGNXS.mjs.map +0 -1
- package/dist/chunk-WMJKH4XE.mjs +0 -34
- package/dist/chunk-WMJKH4XE.mjs.map +0 -1
- package/dist/chunk-ZRHGDEPC.mjs +0 -297
- package/dist/chunk-ZRHGDEPC.mjs.map +0 -1
- package/dist/cli/test-pattern.js +0 -430
- package/dist/document-AOMZP7UR.mjs +0 -26
- package/dist/document-AOMZP7UR.mjs.map +0 -1
- package/dist/index.cli.js +0 -15093
- package/dist/index.d.mts +0 -4111
- package/dist/index.js.map +0 -1
- package/dist/index.mjs.map +0 -1
- package/dist/workers-RMN5POM6.mjs +0 -10
- package/dist/workers-RMN5POM6.mjs.map +0 -1
package/dist/index.mjs
CHANGED
|
@@ -2,7 +2,7 @@ import {
|
|
|
2
2
|
HealthChecker,
|
|
3
3
|
createHealthChecker,
|
|
4
4
|
healthCheckMiddleware
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-CXYSQPG6.mjs";
|
|
6
6
|
import {
|
|
7
7
|
CsvProcessor,
|
|
8
8
|
DocumentProcessor,
|
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
createJsonProcessor,
|
|
15
15
|
createOCRProcessor,
|
|
16
16
|
createXlsxProcessor
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-XG7MSXCJ.mjs";
|
|
18
18
|
import {
|
|
19
19
|
WorkerPool,
|
|
20
20
|
createWorkerPool
|
|
@@ -417,7 +417,7 @@ var PersistentAuditLogger = class {
|
|
|
417
417
|
enableHashing: options.enableHashing ?? true,
|
|
418
418
|
hashAlgorithm: options.hashAlgorithm ?? "sha256",
|
|
419
419
|
enableWAL: options.enableWAL ?? true,
|
|
420
|
-
secretKey: options.secretKey
|
|
420
|
+
secretKey: options.secretKey ?? void 0
|
|
421
421
|
};
|
|
422
422
|
this.adapter = this.createAdapter(options.database);
|
|
423
423
|
}
|
|
@@ -763,7 +763,8 @@ var PersistentAuditLogger = class {
|
|
|
763
763
|
* Start automatic cleanup schedule
|
|
764
764
|
*/
|
|
765
765
|
startCleanupSchedule() {
|
|
766
|
-
const
|
|
766
|
+
const cleanupIntervalHours = this.options.retention?.cleanupIntervalHours ?? 24;
|
|
767
|
+
const intervalMs = cleanupIntervalHours * 60 * 60 * 1e3;
|
|
767
768
|
this.cleanupTimer = setInterval(() => {
|
|
768
769
|
this.runCleanup().catch((err) => {
|
|
769
770
|
console.error("[PersistentAuditLogger] Cleanup failed:", err);
|
|
@@ -1769,7 +1770,7 @@ function validateLuhn(cardNumber, _context) {
|
|
|
1769
1770
|
return sum % 10 === 0;
|
|
1770
1771
|
}
|
|
1771
1772
|
function validateIBAN(iban, _context) {
|
|
1772
|
-
const cleaned = iban.replace(
|
|
1773
|
+
const cleaned = iban.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
1773
1774
|
if (!/^[A-Z]{2}[0-9]{2}[A-Z0-9]{1,30}$/.test(cleaned)) {
|
|
1774
1775
|
return false;
|
|
1775
1776
|
}
|
|
@@ -1864,7 +1865,7 @@ function mod97(string) {
|
|
|
1864
1865
|
return remainder;
|
|
1865
1866
|
}
|
|
1866
1867
|
function validateNINO(nino, _context) {
|
|
1867
|
-
const cleaned = nino.replace(
|
|
1868
|
+
const cleaned = nino.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
1868
1869
|
if (!/^[A-CEGHJ-PR-TW-Z]{2}[0-9]{6}[A-D]$/.test(cleaned)) {
|
|
1869
1870
|
return false;
|
|
1870
1871
|
}
|
|
@@ -1873,7 +1874,7 @@ function validateNINO(nino, _context) {
|
|
|
1873
1874
|
return !invalidPrefixes.includes(prefix);
|
|
1874
1875
|
}
|
|
1875
1876
|
function validateNHS(nhs, _context) {
|
|
1876
|
-
const cleaned = nhs.replace(/[\s
|
|
1877
|
+
const cleaned = nhs.replace(/[\s\u00A0.-]/g, "");
|
|
1877
1878
|
if (!/^\d{10}$/.test(cleaned)) {
|
|
1878
1879
|
return false;
|
|
1879
1880
|
}
|
|
@@ -1886,11 +1887,11 @@ function validateNHS(nhs, _context) {
|
|
|
1886
1887
|
return expectedCheckDigit === parseInt(cleaned[9], 10) && checkDigit !== 10;
|
|
1887
1888
|
}
|
|
1888
1889
|
function validateUKPassport(passport, _context) {
|
|
1889
|
-
const cleaned = passport.replace(
|
|
1890
|
+
const cleaned = passport.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
1890
1891
|
return /^\d{9}$/.test(cleaned) || /^\d{3}\d{6}$/.test(cleaned);
|
|
1891
1892
|
}
|
|
1892
1893
|
function validateSSN(ssn, _context) {
|
|
1893
|
-
const cleaned = ssn.replace(/[\s
|
|
1894
|
+
const cleaned = ssn.replace(/[\s\u00A0.-]/g, "");
|
|
1894
1895
|
if (!/^\d{9}$/.test(cleaned)) {
|
|
1895
1896
|
return false;
|
|
1896
1897
|
}
|
|
@@ -1920,6 +1921,15 @@ function validateSortCode(sortCode, _context) {
|
|
|
1920
1921
|
const cleaned = sortCode.replace(/[\s-]/g, "");
|
|
1921
1922
|
return /^\d{6}$/.test(cleaned);
|
|
1922
1923
|
}
|
|
1924
|
+
function validateRoutingNumber(routingNumber, _context) {
|
|
1925
|
+
const cleaned = routingNumber.replace(/[\s\u00A0.-]/g, "");
|
|
1926
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
1927
|
+
return false;
|
|
1928
|
+
}
|
|
1929
|
+
const digits = cleaned.split("").map(Number);
|
|
1930
|
+
const checksum = (3 * (digits[0] + digits[3] + digits[6]) + 7 * (digits[1] + digits[4] + digits[7]) + (digits[2] + digits[5] + digits[8])) % 10;
|
|
1931
|
+
return checksum === 0;
|
|
1932
|
+
}
|
|
1923
1933
|
function validateName(name, context) {
|
|
1924
1934
|
const businessTerms = [
|
|
1925
1935
|
"account",
|
|
@@ -1954,10 +1964,29 @@ function validateName(name, context) {
|
|
|
1954
1964
|
"sir",
|
|
1955
1965
|
"madam",
|
|
1956
1966
|
"lord",
|
|
1957
|
-
"lady"
|
|
1967
|
+
"lady",
|
|
1968
|
+
"personal",
|
|
1969
|
+
"sensitive",
|
|
1970
|
+
"information",
|
|
1971
|
+
"data",
|
|
1972
|
+
"details",
|
|
1973
|
+
"content",
|
|
1974
|
+
"document",
|
|
1975
|
+
"text",
|
|
1976
|
+
"example",
|
|
1977
|
+
"simple",
|
|
1978
|
+
"regular",
|
|
1979
|
+
"plain",
|
|
1980
|
+
"send",
|
|
1981
|
+
"reply",
|
|
1982
|
+
"reach",
|
|
1983
|
+
"write",
|
|
1984
|
+
"use",
|
|
1985
|
+
"contact",
|
|
1986
|
+
"message"
|
|
1958
1987
|
];
|
|
1959
1988
|
const nameLower = name.toLowerCase();
|
|
1960
|
-
if (businessTerms.some((term) => nameLower.includes(term))) {
|
|
1989
|
+
if (businessTerms.some((term) => nameLower === term || nameLower.includes(term))) {
|
|
1961
1990
|
return false;
|
|
1962
1991
|
}
|
|
1963
1992
|
if (name === name.toUpperCase() && name.length <= 5) {
|
|
@@ -1967,7 +1996,7 @@ function validateName(name, context) {
|
|
|
1967
1996
|
return false;
|
|
1968
1997
|
}
|
|
1969
1998
|
const contextLower = context.toLowerCase();
|
|
1970
|
-
if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization")) {
|
|
1999
|
+
if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization") || contextLower.includes("without any") || contextLower.includes("simple text") || contextLower.includes("plain text")) {
|
|
1971
2000
|
return false;
|
|
1972
2001
|
}
|
|
1973
2002
|
return true;
|
|
@@ -2000,11 +2029,13 @@ var personalPatterns = [
|
|
|
2000
2029
|
},
|
|
2001
2030
|
{
|
|
2002
2031
|
type: "NAME",
|
|
2003
|
-
|
|
2032
|
+
// Match names with case variations - handles "John Smith", "john smith", "JOHN SMITH", "Lucy jones", etc.
|
|
2033
|
+
// First word must start with uppercase or be all uppercase; subsequent words can be any case
|
|
2034
|
+
regex: /\b(?:(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Sir|Madam|Lady|Lord|Rev|Father|Sister|Brother)\.?\s+)?((?:[A-Z][a-z'’.\-]+|[A-Z]{2,})(?:\s+(?:[A-Z][a-z'’.\-]+|[A-Z]{2,}|[a-z][a-z'’.\-]+)){1,3})(?:\s+(?:Jr|Sr|II|III|IV|PhD|MD|Esq|DDS|DVM|MBA|CPA)\.?)?\b/g,
|
|
2004
2035
|
priority: 50,
|
|
2005
2036
|
validator: validateName,
|
|
2006
2037
|
placeholder: "[NAME_{n}]",
|
|
2007
|
-
description: "Person name with salutations/suffixes",
|
|
2038
|
+
description: "Person name with salutations/suffixes (handles case variations)",
|
|
2008
2039
|
severity: "high"
|
|
2009
2040
|
},
|
|
2010
2041
|
{
|
|
@@ -2025,11 +2056,26 @@ var personalPatterns = [
|
|
|
2025
2056
|
},
|
|
2026
2057
|
{
|
|
2027
2058
|
type: "DATE_OF_BIRTH",
|
|
2028
|
-
regex: /\b(?:DOB|date of birth|birth ?date)[:\s]*(
|
|
2059
|
+
regex: /\b(?:DOB|date of birth|birth ?date)[:\s-]*((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
|
|
2029
2060
|
priority: 95,
|
|
2030
2061
|
placeholder: "[DOB_{n}]",
|
|
2031
2062
|
description: "Date of birth",
|
|
2032
2063
|
severity: "high"
|
|
2064
|
+
},
|
|
2065
|
+
{
|
|
2066
|
+
type: "DATE",
|
|
2067
|
+
regex: /\b((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
|
|
2068
|
+
priority: 60,
|
|
2069
|
+
placeholder: "[DATE_{n}]",
|
|
2070
|
+
description: "Date (standalone, without DOB context)",
|
|
2071
|
+
severity: "medium",
|
|
2072
|
+
validator: (value, context) => {
|
|
2073
|
+
const yearPattern = /^(19|20)\d{2}$/;
|
|
2074
|
+
if (yearPattern.test(value.replace(/[\/\-.\s]/g, ""))) return false;
|
|
2075
|
+
const versionContext = /\b(version|v\d+|release|build|update)\s*[:\s]*/i;
|
|
2076
|
+
if (versionContext.test(context)) return false;
|
|
2077
|
+
return true;
|
|
2078
|
+
}
|
|
2033
2079
|
}
|
|
2034
2080
|
];
|
|
2035
2081
|
|
|
@@ -2037,7 +2083,7 @@ var personalPatterns = [
|
|
|
2037
2083
|
var financialPatterns = [
|
|
2038
2084
|
{
|
|
2039
2085
|
type: "CREDIT_CARD",
|
|
2040
|
-
regex:
|
|
2086
|
+
regex: /(?<!\d)(?:(?:\d{4}[\s\u00A0.-]?){3}\d{4}|\d{4}[\s\u00A0.-]?\d{6}[\s\u00A0.-]?\d{5})(?!\d)/g,
|
|
2041
2087
|
priority: 100,
|
|
2042
2088
|
validator: (match) => validateLuhn(match),
|
|
2043
2089
|
placeholder: "[CREDIT_CARD_{n}]",
|
|
@@ -2046,7 +2092,7 @@ var financialPatterns = [
|
|
|
2046
2092
|
},
|
|
2047
2093
|
{
|
|
2048
2094
|
type: "IBAN",
|
|
2049
|
-
regex: /\b[A-Z]{2}\d{2}[A-Z0-9]{
|
|
2095
|
+
regex: /\b([A-Z]{2}\d{2}(?:[ \u00A0.-]?[A-Z0-9]){11,30})\b/gi,
|
|
2050
2096
|
priority: 95,
|
|
2051
2097
|
validator: (match) => validateIBAN(match),
|
|
2052
2098
|
placeholder: "[IBAN_{n}]",
|
|
@@ -2055,7 +2101,7 @@ var financialPatterns = [
|
|
|
2055
2101
|
},
|
|
2056
2102
|
{
|
|
2057
2103
|
type: "BANK_ACCOUNT_UK",
|
|
2058
|
-
regex: /\b(?:account|acc)[:\s
|
|
2104
|
+
regex: /\b(?:account|acc|a\/c)[:\s#-]*((?:\d{4}[\s-]?\d{4})|(?:\d{2}[\s-]?\d{2}[\s-]?\d{4}))\b/gi,
|
|
2059
2105
|
priority: 90,
|
|
2060
2106
|
placeholder: "[BANK_ACCOUNT_{n}]",
|
|
2061
2107
|
description: "UK bank account number",
|
|
@@ -2063,7 +2109,7 @@ var financialPatterns = [
|
|
|
2063
2109
|
},
|
|
2064
2110
|
{
|
|
2065
2111
|
type: "SORT_CODE_UK",
|
|
2066
|
-
regex: /\b(?:sort[
|
|
2112
|
+
regex: /\b(?:sort[\s-]*code|SC)[:\s.-]*((?:\d{2}[\s.-]?){2}\d{2})\b/gi,
|
|
2067
2113
|
priority: 90,
|
|
2068
2114
|
validator: (match) => validateSortCode(match),
|
|
2069
2115
|
placeholder: "[SORT_CODE_{n}]",
|
|
@@ -2072,8 +2118,9 @@ var financialPatterns = [
|
|
|
2072
2118
|
},
|
|
2073
2119
|
{
|
|
2074
2120
|
type: "ROUTING_NUMBER_US",
|
|
2075
|
-
regex: /\b(?:routing|RTN|ABA)[
|
|
2121
|
+
regex: /\b(?:routing|RTN|ABA)[-\s\u00A0]*(?:number|no|num)?[-\s\u00A0.:#]*((?:\d[\s\u00A0.-]?){9})\b/gi,
|
|
2076
2122
|
priority: 90,
|
|
2123
|
+
validator: (match) => validateRoutingNumber(match),
|
|
2077
2124
|
placeholder: "[ROUTING_NUMBER_{n}]",
|
|
2078
2125
|
description: "US routing number",
|
|
2079
2126
|
severity: "high"
|
|
@@ -2088,11 +2135,15 @@ var financialPatterns = [
|
|
|
2088
2135
|
},
|
|
2089
2136
|
{
|
|
2090
2137
|
type: "IFSC",
|
|
2091
|
-
regex: /\b[A-Z]{4}0[A-Z0-9]{6}\b/
|
|
2138
|
+
regex: /\b([A-Z]{4})[-\s\u00A0.]?0[-\s\u00A0.]?([A-Z0-9]{6})\b/gi,
|
|
2092
2139
|
priority: 90,
|
|
2093
2140
|
placeholder: "[IFSC_{n}]",
|
|
2094
2141
|
description: "Indian Financial System Code",
|
|
2095
|
-
severity: "high"
|
|
2142
|
+
severity: "high",
|
|
2143
|
+
validator: (value) => {
|
|
2144
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
2145
|
+
return /^[A-Z]{4}0[A-Z0-9]{6}$/.test(cleaned);
|
|
2146
|
+
}
|
|
2096
2147
|
},
|
|
2097
2148
|
{
|
|
2098
2149
|
type: "CLABE",
|
|
@@ -2246,7 +2297,7 @@ var financialPatterns = [
|
|
|
2246
2297
|
var governmentPatterns = [
|
|
2247
2298
|
{
|
|
2248
2299
|
type: "SSN",
|
|
2249
|
-
regex: /\b(?:SSN|social
|
|
2300
|
+
regex: /\b(?:SSN|social\s+security)\b[:\s#-]*([0-9]{3}[\s\u00A0.-]?[0-9]{2}[\s\u00A0.-]?[0-9]{4})\b/gi,
|
|
2250
2301
|
priority: 100,
|
|
2251
2302
|
validator: (match) => validateSSN(match),
|
|
2252
2303
|
placeholder: "[SSN_{n}]",
|
|
@@ -2255,7 +2306,7 @@ var governmentPatterns = [
|
|
|
2255
2306
|
},
|
|
2256
2307
|
{
|
|
2257
2308
|
type: "PASSPORT_UK",
|
|
2258
|
-
regex: /\b(?:passport|pass)[:\s
|
|
2309
|
+
regex: /\b(?:passport|pass)[:\s#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{3})\b/gi,
|
|
2259
2310
|
priority: 95,
|
|
2260
2311
|
validator: (match) => validateUKPassport(match),
|
|
2261
2312
|
placeholder: "[PASSPORT_{n}]",
|
|
@@ -2264,7 +2315,7 @@ var governmentPatterns = [
|
|
|
2264
2315
|
},
|
|
2265
2316
|
{
|
|
2266
2317
|
type: "PASSPORT_US",
|
|
2267
|
-
regex: /\b(?:passport|pass)[:\s
|
|
2318
|
+
regex: /\b(?:passport|pass)[:\s#-]*(([A-Z0-9][\s\u00A0.-]?){5,8}[A-Z0-9])\b/gi,
|
|
2268
2319
|
priority: 95,
|
|
2269
2320
|
placeholder: "[PASSPORT_{n}]",
|
|
2270
2321
|
description: "US Passport number",
|
|
@@ -2272,7 +2323,7 @@ var governmentPatterns = [
|
|
|
2272
2323
|
},
|
|
2273
2324
|
{
|
|
2274
2325
|
type: "NATIONAL_INSURANCE_UK",
|
|
2275
|
-
regex: /\b(?:NI|NINO|national
|
|
2326
|
+
regex: /\b(?:NI\b|NINO|national\s+insurance)[:\s#-]*([A-CEGHJ-PR-TW-Z]{2}(?:[\s\u00A0.-]?\d{2}){3}[\s\u00A0.-]?[A-D])\b/gi,
|
|
2276
2327
|
priority: 100,
|
|
2277
2328
|
validator: (match) => validateNINO(match),
|
|
2278
2329
|
placeholder: "[NINO_{n}]",
|
|
@@ -2281,7 +2332,7 @@ var governmentPatterns = [
|
|
|
2281
2332
|
},
|
|
2282
2333
|
{
|
|
2283
2334
|
type: "NHS_NUMBER",
|
|
2284
|
-
regex: /\b(?:NHS|nhs number)[:\s
|
|
2335
|
+
regex: /\b(?:NHS|nhs number)[:\s#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{4})\b/gi,
|
|
2285
2336
|
priority: 95,
|
|
2286
2337
|
validator: (match) => validateNHS(match),
|
|
2287
2338
|
placeholder: "[NHS_{n}]",
|
|
@@ -2290,15 +2341,27 @@ var governmentPatterns = [
|
|
|
2290
2341
|
},
|
|
2291
2342
|
{
|
|
2292
2343
|
type: "DRIVING_LICENSE_UK",
|
|
2293
|
-
regex: /\b([A-Z]{5}\d{
|
|
2344
|
+
regex: /\b(?:DL|DRIVING|DRIVER(?:'S)?|LICEN[SC]E)?[\s#:-]*(?:NO|NUM(?:BER)?|ID)?[\s#:-]*([A-Z]{5}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?[A-Z]{2}[\s\u00A0.-]?\d[\s\u00A0.-]?[A-Z]{2})\b/gi,
|
|
2294
2345
|
priority: 90,
|
|
2295
2346
|
placeholder: "[DRIVING_LICENSE_{n}]",
|
|
2296
2347
|
description: "UK Driving License",
|
|
2297
|
-
severity: "high"
|
|
2348
|
+
severity: "high",
|
|
2349
|
+
validator: (value) => {
|
|
2350
|
+
const normalized = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
2351
|
+
if (!/^[A-Z]{5}\d{6}[A-Z]{2}\d[A-Z]{2}$/.test(normalized)) {
|
|
2352
|
+
return false;
|
|
2353
|
+
}
|
|
2354
|
+
const dob = normalized.slice(5, 11);
|
|
2355
|
+
const month = parseInt(dob.slice(2, 4), 10);
|
|
2356
|
+
const day = parseInt(dob.slice(4, 6), 10);
|
|
2357
|
+
const validMonth = month >= 1 && month <= 12 || month >= 51 && month <= 62;
|
|
2358
|
+
const validDay = day >= 1 && day <= 31;
|
|
2359
|
+
return validMonth && validDay;
|
|
2360
|
+
}
|
|
2298
2361
|
},
|
|
2299
2362
|
{
|
|
2300
2363
|
type: "DRIVING_LICENSE_US",
|
|
2301
|
-
regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s
|
|
2364
|
+
regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){3,18}[A-Z0-9])\b/gi,
|
|
2302
2365
|
priority: 90,
|
|
2303
2366
|
placeholder: "[DRIVING_LICENSE_{n}]",
|
|
2304
2367
|
description: "US Driving License",
|
|
@@ -2306,7 +2369,7 @@ var governmentPatterns = [
|
|
|
2306
2369
|
},
|
|
2307
2370
|
{
|
|
2308
2371
|
type: "TAX_ID",
|
|
2309
|
-
regex: /\b(?:TIN|tax id|EIN)[:\s
|
|
2372
|
+
regex: /\b(?:TIN|tax id|EIN)[:\s#-]*(\d{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
|
|
2310
2373
|
priority: 95,
|
|
2311
2374
|
placeholder: "[TAX_ID_{n}]",
|
|
2312
2375
|
description: "Tax identification number",
|
|
@@ -2314,7 +2377,7 @@ var governmentPatterns = [
|
|
|
2314
2377
|
},
|
|
2315
2378
|
{
|
|
2316
2379
|
type: "PASSPORT_MRZ_TD3",
|
|
2317
|
-
regex: /P<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
2380
|
+
regex: /P<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
2318
2381
|
priority: 98,
|
|
2319
2382
|
placeholder: "[PASSPORT_MRZ_{n}]",
|
|
2320
2383
|
description: "Passport Machine Readable Zone (TD3 - 2 lines x 44 chars)",
|
|
@@ -2322,7 +2385,7 @@ var governmentPatterns = [
|
|
|
2322
2385
|
},
|
|
2323
2386
|
{
|
|
2324
2387
|
type: "PASSPORT_MRZ_TD1",
|
|
2325
|
-
regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\n[A-Z<]{30}/g,
|
|
2388
|
+
regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\r?\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\r?\n[A-Z<]{30}/g,
|
|
2326
2389
|
priority: 98,
|
|
2327
2390
|
placeholder: "[ID_MRZ_{n}]",
|
|
2328
2391
|
description: "ID Card Machine Readable Zone (TD1 - 3 lines x 30 chars)",
|
|
@@ -2330,7 +2393,7 @@ var governmentPatterns = [
|
|
|
2330
2393
|
},
|
|
2331
2394
|
{
|
|
2332
2395
|
type: "VISA_MRZ",
|
|
2333
|
-
regex: /V<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
2396
|
+
regex: /V<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
2334
2397
|
priority: 98,
|
|
2335
2398
|
placeholder: "[VISA_MRZ_{n}]",
|
|
2336
2399
|
description: "Visa Machine Readable Zone",
|
|
@@ -2338,7 +2401,7 @@ var governmentPatterns = [
|
|
|
2338
2401
|
},
|
|
2339
2402
|
{
|
|
2340
2403
|
type: "TRAVEL_DOCUMENT_NUMBER",
|
|
2341
|
-
regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s
|
|
2404
|
+
regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){4,13}[A-Z0-9])\b/gi,
|
|
2342
2405
|
priority: 92,
|
|
2343
2406
|
placeholder: "[TRAVEL_DOC_{n}]",
|
|
2344
2407
|
description: "Travel document numbers",
|
|
@@ -2349,7 +2412,7 @@ var governmentPatterns = [
|
|
|
2349
2412
|
},
|
|
2350
2413
|
{
|
|
2351
2414
|
type: "VISA_NUMBER",
|
|
2352
|
-
regex: /\b(?:VISA)[:\s
|
|
2415
|
+
regex: /\b(?:VISA)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){6,10}[A-Z0-9])\b/gi,
|
|
2353
2416
|
priority: 92,
|
|
2354
2417
|
placeholder: "[VISA_{n}]",
|
|
2355
2418
|
description: "Visa numbers",
|
|
@@ -2360,7 +2423,7 @@ var governmentPatterns = [
|
|
|
2360
2423
|
},
|
|
2361
2424
|
{
|
|
2362
2425
|
type: "IMMIGRATION_NUMBER",
|
|
2363
|
-
regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s
|
|
2426
|
+
regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s#-]*([A-Z]?(?:\d[\s\u00A0.-]?){7,9})\b/gi,
|
|
2364
2427
|
priority: 92,
|
|
2365
2428
|
placeholder: "[IMMIGRATION_{n}]",
|
|
2366
2429
|
description: "Immigration and alien registration numbers",
|
|
@@ -2368,7 +2431,7 @@ var governmentPatterns = [
|
|
|
2368
2431
|
},
|
|
2369
2432
|
{
|
|
2370
2433
|
type: "BORDER_CROSSING_CARD",
|
|
2371
|
-
regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s
|
|
2434
|
+
regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s#-]*([A-Z0-9](?:[A-Z0-9\s\u00A0.-]?){8,13}[A-Z0-9])\b/gi,
|
|
2372
2435
|
priority: 90,
|
|
2373
2436
|
placeholder: "[BCC_{n}]",
|
|
2374
2437
|
description: "Border crossing card numbers",
|
|
@@ -2379,7 +2442,7 @@ var governmentPatterns = [
|
|
|
2379
2442
|
},
|
|
2380
2443
|
{
|
|
2381
2444
|
type: "UTR_UK",
|
|
2382
|
-
regex: /\b(?:UTR|unique taxpayer reference)[:\s
|
|
2445
|
+
regex: /\b(?:UTR|unique taxpayer reference)[:\s#-]*((?:\d[\s\u00A0.-]?){10})\b/gi,
|
|
2383
2446
|
priority: 95,
|
|
2384
2447
|
validator: (match) => {
|
|
2385
2448
|
const digits = match.replace(/\D/g, "");
|
|
@@ -2391,10 +2454,10 @@ var governmentPatterns = [
|
|
|
2391
2454
|
},
|
|
2392
2455
|
{
|
|
2393
2456
|
type: "VAT_NUMBER",
|
|
2394
|
-
regex: /\b(?:VAT|vat number)[:\s
|
|
2457
|
+
regex: /\b(?:VAT|vat number)[:\s#-]*([A-Z]{2}(?:[\s\u00A0.-]?[A-Z0-9]){7,12})\b/gi,
|
|
2395
2458
|
priority: 90,
|
|
2396
2459
|
validator: (match) => {
|
|
2397
|
-
const cleaned = match.replace(
|
|
2460
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
2398
2461
|
const countryCode = cleaned.substring(0, 2).toUpperCase();
|
|
2399
2462
|
const validCountries = ["GB", "DE", "FR", "IT", "ES", "NL", "BE", "AT", "PL", "SE", "DK", "FI", "IE", "PT", "CZ", "HU", "RO", "BG", "GR", "HR", "SK", "SI", "LT", "LV", "EE", "CY", "LU", "MT"];
|
|
2400
2463
|
if (!validCountries.includes(countryCode)) {
|
|
@@ -2473,7 +2536,7 @@ var governmentPatterns = [
|
|
|
2473
2536
|
var contactPatterns = [
|
|
2474
2537
|
{
|
|
2475
2538
|
type: "PHONE_UK_MOBILE",
|
|
2476
|
-
regex: /\
|
|
2539
|
+
regex: /\b(?:\+?44[\s.-]?7\d{3}|0?7\d{3})[\s.-]?\d{3}[\s.-]?\d{3}\b/g,
|
|
2477
2540
|
priority: 90,
|
|
2478
2541
|
placeholder: "[PHONE_UK_MOBILE_{n}]",
|
|
2479
2542
|
description: "UK mobile phone",
|
|
@@ -2481,7 +2544,7 @@ var contactPatterns = [
|
|
|
2481
2544
|
},
|
|
2482
2545
|
{
|
|
2483
2546
|
type: "PHONE_UK",
|
|
2484
|
-
regex: /\b(?:0[1-9]\d{1,
|
|
2547
|
+
regex: /\b(?:\+?44[\s.-]?(?:0)?\s*)?(?:\(?0?[1-9]\d{1,3}\)?[\s.-]?\d{3,4}[\s.-]?\d{3,4})(?:\s?(?:ext\.?|x)\s?\d{1,5})?\b/g,
|
|
2485
2548
|
priority: 85,
|
|
2486
2549
|
placeholder: "[PHONE_UK_{n}]",
|
|
2487
2550
|
description: "UK phone number",
|
|
@@ -2489,15 +2552,23 @@ var contactPatterns = [
|
|
|
2489
2552
|
},
|
|
2490
2553
|
{
|
|
2491
2554
|
type: "PHONE_US",
|
|
2492
|
-
regex:
|
|
2555
|
+
regex: /\b(?:\+1[\s.-]?)?(?:\(\d{3}\)|\d{3})[\s.-]?\d{3}[\s.-]?\d{4}(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
|
|
2493
2556
|
priority: 85,
|
|
2494
2557
|
placeholder: "[PHONE_US_{n}]",
|
|
2495
2558
|
description: "US phone number",
|
|
2496
|
-
severity: "medium"
|
|
2559
|
+
severity: "medium",
|
|
2560
|
+
validator: (value, context) => {
|
|
2561
|
+
const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
|
|
2562
|
+
if (versionContext.test(context)) return false;
|
|
2563
|
+
const cleaned = value.replace(/[\s()-]/g, "");
|
|
2564
|
+
const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
|
|
2565
|
+
if (datePattern.test(cleaned)) return false;
|
|
2566
|
+
return true;
|
|
2567
|
+
}
|
|
2497
2568
|
},
|
|
2498
2569
|
{
|
|
2499
2570
|
type: "PHONE_INTERNATIONAL",
|
|
2500
|
-
regex: /\b
|
|
2571
|
+
regex: /\b\+(?:\d[\s.\-()]?){6,14}\d(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
|
|
2501
2572
|
priority: 80,
|
|
2502
2573
|
placeholder: "[PHONE_{n}]",
|
|
2503
2574
|
description: "International phone number",
|
|
@@ -2521,7 +2592,7 @@ var contactPatterns = [
|
|
|
2521
2592
|
},
|
|
2522
2593
|
{
|
|
2523
2594
|
type: "ADDRESS_STREET",
|
|
2524
|
-
regex: /\b
|
|
2595
|
+
regex: /\b\d{1,5}\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*(?:\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*){0,4}\s+(?:Street|St\.?|Road|Rd\.?|Avenue|Ave\.?|Lane|Ln\.?|Drive|Dr\.?|Court|Ct\.?|Boulevard|Blvd\.?|Way|Terrace|Ter\.?|Place|Pl\.?|Trail|Trl\.?|Parkway|Pkwy\.?|Highway|Hwy\.)(?:\s+(?:Apt|Unit|Suite|Ste)\s*\d+)?\b/gi,
|
|
2525
2596
|
priority: 70,
|
|
2526
2597
|
placeholder: "[ADDRESS_{n}]",
|
|
2527
2598
|
description: "Street address",
|
|
@@ -2872,15 +2943,20 @@ var BIOBANK_SAMPLE_ID = {
|
|
|
2872
2943
|
};
|
|
2873
2944
|
var PROVIDER_LICENSE = {
|
|
2874
2945
|
type: "PROVIDER_LICENSE",
|
|
2875
|
-
regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s]
|
|
2946
|
+
regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s\u00A0]*(?:LICENSE|LICENCE|LIC)[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*((?:[A-Z0-9]{2,6}[\s\u00A0./-]?){1,3}[A-Z0-9]{2,6})\b/gi,
|
|
2876
2947
|
placeholder: "[PROVIDER_LIC_{n}]",
|
|
2877
2948
|
priority: 80,
|
|
2878
2949
|
severity: "high",
|
|
2879
|
-
description: "Healthcare provider license numbers"
|
|
2950
|
+
description: "Healthcare provider license numbers",
|
|
2951
|
+
validator: (value) => {
|
|
2952
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "");
|
|
2953
|
+
if (normalized.length < 6 || normalized.length > 18) return false;
|
|
2954
|
+
return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
|
|
2955
|
+
}
|
|
2880
2956
|
};
|
|
2881
2957
|
var NPI_NUMBER = {
|
|
2882
2958
|
type: "NPI_NUMBER",
|
|
2883
|
-
regex: /\b(?:NPI[-\s]
|
|
2959
|
+
regex: /\b(?:NPI[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?((?:\d[\s\u00A0.-]?){10})\b/g,
|
|
2884
2960
|
placeholder: "[NPI_{n}]",
|
|
2885
2961
|
priority: 85,
|
|
2886
2962
|
severity: "high",
|
|
@@ -2889,7 +2965,8 @@ var NPI_NUMBER = {
|
|
|
2889
2965
|
if (!/provider|npi|physician|doctor|clinic|hospital|practice/i.test(context)) {
|
|
2890
2966
|
return false;
|
|
2891
2967
|
}
|
|
2892
|
-
const digits = value.split("").map(Number);
|
|
2968
|
+
const digits = value.replace(/\D/g, "").split("").map(Number);
|
|
2969
|
+
if (digits.length !== 10) return false;
|
|
2893
2970
|
let sum = 0;
|
|
2894
2971
|
for (let i = digits.length - 2; i >= 0; i--) {
|
|
2895
2972
|
let digit = digits[i];
|
|
@@ -2905,17 +2982,19 @@ var NPI_NUMBER = {
|
|
|
2905
2982
|
};
|
|
2906
2983
|
var DEA_NUMBER = {
|
|
2907
2984
|
type: "DEA_NUMBER",
|
|
2908
|
-
regex: /\b(?:DEA[-\s]
|
|
2985
|
+
regex: /\b(?:DEA[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?([A-Z]{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
|
|
2909
2986
|
placeholder: "[DEA_{n}]",
|
|
2910
2987
|
priority: 90,
|
|
2911
2988
|
severity: "high",
|
|
2912
2989
|
description: "DEA registration number for controlled substances",
|
|
2913
2990
|
validator: (value, _context) => {
|
|
2991
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "").toUpperCase();
|
|
2992
|
+
if (normalized.length !== 9) return false;
|
|
2914
2993
|
const validFirstLetters = ["A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "P", "R", "S", "T", "U"];
|
|
2915
|
-
if (!validFirstLetters.includes(
|
|
2994
|
+
if (!validFirstLetters.includes(normalized[0])) {
|
|
2916
2995
|
return false;
|
|
2917
2996
|
}
|
|
2918
|
-
const digits =
|
|
2997
|
+
const digits = normalized.substring(2).split("").map(Number);
|
|
2919
2998
|
const sum1 = digits[0] + digits[2] + digits[4];
|
|
2920
2999
|
const sum2 = (digits[1] + digits[3] + digits[5]) * 2;
|
|
2921
3000
|
const checkDigit = (sum1 + sum2) % 10;
|
|
@@ -2940,11 +3019,16 @@ var EMERGENCY_CONTACT_MARKER = {
|
|
|
2940
3019
|
};
|
|
2941
3020
|
var BIOMETRIC_ID = {
|
|
2942
3021
|
type: "BIOMETRIC_ID",
|
|
2943
|
-
regex: /\b(?:FINGERPRINT|RETINAL?[-\s]?SCAN|IRIS[-\s]?SCAN|VOICE[-\s]?PRINT|FACIAL[-\s]?RECOGNITION|BIOMETRIC)[-\s]?(?:ID|DATA|TEMPLATE|HASH)?[-\s]
|
|
3022
|
+
regex: /\b(?:FINGERPRINT|RETINAL?[-\s\u00A0]?SCAN|IRIS[-\s\u00A0]?SCAN|VOICE[-\s\u00A0]?PRINT|FACIAL[-\s\u00A0]?RECOGNITION|BIOMETRIC)[-\s\u00A0]?(?:ID|DATA|TEMPLATE|HASH)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9._-]{7,39})\b/gi,
|
|
2944
3023
|
placeholder: "[BIOMETRIC_{n}]",
|
|
2945
3024
|
priority: 95,
|
|
2946
3025
|
severity: "high",
|
|
2947
|
-
description: "Biometric identifier references"
|
|
3026
|
+
description: "Biometric identifier references",
|
|
3027
|
+
validator: (value) => {
|
|
3028
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "");
|
|
3029
|
+
if (normalized.length < 8 || normalized.length > 40) return false;
|
|
3030
|
+
return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
|
|
3031
|
+
}
|
|
2948
3032
|
};
|
|
2949
3033
|
var DNA_SEQUENCE = {
|
|
2950
3034
|
type: "DNA_SEQUENCE",
|
|
@@ -2973,7 +3057,7 @@ var DRUG_DOSAGE = {
|
|
|
2973
3057
|
};
|
|
2974
3058
|
var MEDICAL_IMAGE_REF = {
|
|
2975
3059
|
type: "MEDICAL_IMAGE_REF",
|
|
2976
|
-
regex: /\b(?:X[-\s]?RAY|MRI|CT[-\s]?SCAN|PET[-\s]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s]?(?:IMAGE|FILE|ID)?[-\s]
|
|
3060
|
+
regex: /\b(?:X[-\s\u00A0]?RAY|MRI|CT[-\s\u00A0]?SCAN|PET[-\s\u00A0]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s\u00A0]?(?:IMAGE|FILE|ID)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9_.-]{5,23})\b/gi,
|
|
2977
3061
|
placeholder: "[IMAGE_{n}]",
|
|
2978
3062
|
priority: 80,
|
|
2979
3063
|
severity: "high",
|
|
@@ -3136,11 +3220,18 @@ var TRANSACTION_ID = {
|
|
|
3136
3220
|
};
|
|
3137
3221
|
var INVESTMENT_ACCOUNT = {
|
|
3138
3222
|
type: "INVESTMENT_ACCOUNT",
|
|
3139
|
-
regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s]
|
|
3223
|
+
regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s\u00A0]*(?:ACCOUNT|ACCT|A\/C)?[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,18}[A-Z0-9])\b/gi,
|
|
3140
3224
|
placeholder: "[INV_ACCT_{n}]",
|
|
3141
3225
|
priority: 85,
|
|
3142
3226
|
severity: "high",
|
|
3143
|
-
description: "Investment and pension account numbers"
|
|
3227
|
+
description: "Investment and pension account numbers",
|
|
3228
|
+
validator: (value, context) => {
|
|
3229
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
3230
|
+
const hasDigits = /\d{4,}/.test(normalized);
|
|
3231
|
+
const validLength = normalized.length >= 6 && normalized.length <= 15;
|
|
3232
|
+
const inContext = /isa|sipp|invest|pension|401k|ira|account|fund/i.test(context);
|
|
3233
|
+
return hasDigits && validLength && inContext;
|
|
3234
|
+
}
|
|
3144
3235
|
};
|
|
3145
3236
|
var WIRE_TRANSFER_REF = {
|
|
3146
3237
|
type: "WIRE_TRANSFER_REF",
|
|
@@ -4274,13 +4365,17 @@ var RESUME_ID = {
|
|
|
4274
4365
|
};
|
|
4275
4366
|
var BENEFITS_PLAN_NUMBER = {
|
|
4276
4367
|
type: "BENEFITS_PLAN_NUMBER",
|
|
4277
|
-
regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s]?PLAN)[-\s]
|
|
4368
|
+
regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s\u00A0]?PLAN)[-\s\u00A0]*(?:PLAN)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
|
|
4278
4369
|
placeholder: "[BENEFITS_{n}]",
|
|
4279
4370
|
priority: 85,
|
|
4280
4371
|
severity: "high",
|
|
4281
4372
|
description: "Employee benefits and insurance plan numbers",
|
|
4282
|
-
validator: (
|
|
4283
|
-
|
|
4373
|
+
validator: (value, context) => {
|
|
4374
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
4375
|
+
const hasDigits = /\d{4,}/.test(normalized);
|
|
4376
|
+
const validLength = normalized.length >= 6 && normalized.length <= 14;
|
|
4377
|
+
const inContext = /benefit|insurance|health|dental|vision|plan|policy|enrollment/i.test(context);
|
|
4378
|
+
return hasDigits && validLength && inContext;
|
|
4284
4379
|
}
|
|
4285
4380
|
};
|
|
4286
4381
|
var RETIREMENT_ACCOUNT = {
|
|
@@ -4378,13 +4473,16 @@ var EXIT_INTERVIEW_ID = {
|
|
|
4378
4473
|
};
|
|
4379
4474
|
var DISCIPLINARY_ACTION_ID = {
|
|
4380
4475
|
type: "DISCIPLINARY_ACTION_ID",
|
|
4381
|
-
regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s]
|
|
4476
|
+
regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s\u00A0]*(?:ACTION)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
|
|
4382
4477
|
placeholder: "[DISCIPLINE_{n}]",
|
|
4383
4478
|
priority: 85,
|
|
4384
4479
|
severity: "high",
|
|
4385
4480
|
description: "Disciplinary action and incident identifiers",
|
|
4386
|
-
validator: (
|
|
4387
|
-
|
|
4481
|
+
validator: (value, context) => {
|
|
4482
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
4483
|
+
const hasDigits = /\d{3,}/.test(normalized);
|
|
4484
|
+
const validLength = normalized.length >= 6 && normalized.length <= 12;
|
|
4485
|
+
return hasDigits && validLength && /disciplinary|incident|warning|violation|misconduct|investigation/i.test(context);
|
|
4388
4486
|
}
|
|
4389
4487
|
};
|
|
4390
4488
|
var EMERGENCY_CONTACT_REF = {
|
|
@@ -4712,7 +4810,7 @@ var TELECOMS_ACCOUNT_NUMBER = {
|
|
|
4712
4810
|
type: "TELECOMS_ACCOUNT_NUMBER",
|
|
4713
4811
|
regex: /\bACC(?:OUNT)?[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*(\d{8,12})\b/gi,
|
|
4714
4812
|
placeholder: "[ACCOUNT_{n}]",
|
|
4715
|
-
priority:
|
|
4813
|
+
priority: 90,
|
|
4716
4814
|
severity: "high",
|
|
4717
4815
|
description: "Telecommunications customer account numbers",
|
|
4718
4816
|
validator: (_value, context) => {
|
|
@@ -5590,7 +5688,7 @@ var EMERGENCY_CALL_REF = {
|
|
|
5590
5688
|
};
|
|
5591
5689
|
var POLICE_REPORT_NUMBER = {
|
|
5592
5690
|
type: "POLICE_REPORT_NUMBER",
|
|
5593
|
-
regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s]
|
|
5691
|
+
regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s\u00A0]*(?:NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{5,10})\b/gi,
|
|
5594
5692
|
placeholder: "[POLICE_RPT_{n}]",
|
|
5595
5693
|
priority: 95,
|
|
5596
5694
|
severity: "high",
|
|
@@ -5601,7 +5699,7 @@ var POLICE_REPORT_NUMBER = {
|
|
|
5601
5699
|
};
|
|
5602
5700
|
var FIRE_INCIDENT_NUMBER = {
|
|
5603
5701
|
type: "FIRE_INCIDENT_NUMBER",
|
|
5604
|
-
regex: /\b(?:FIRE|FI|FD)[-\s]
|
|
5702
|
+
regex: /\b(?:FIRE|FI|FD)[-\s\u00A0]*(?:INCIDENT|INC|NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{4,8})\b/gi,
|
|
5605
5703
|
placeholder: "[FIRE_INC_{n}]",
|
|
5606
5704
|
priority: 95,
|
|
5607
5705
|
severity: "high",
|
|
@@ -9436,13 +9534,14 @@ var NINTENDO_FRIEND_CODE = {
|
|
|
9436
9534
|
type: "NINTENDO_FRIEND_CODE",
|
|
9437
9535
|
regex: /\bSW[-\s]?(\d{4}[-\s]?\d{4}[-\s]?\d{4})\b/gi,
|
|
9438
9536
|
placeholder: "[NINTENDO_FC_{n}]",
|
|
9439
|
-
priority:
|
|
9537
|
+
priority: 90,
|
|
9440
9538
|
severity: "medium",
|
|
9441
9539
|
description: "Nintendo Switch Friend Code",
|
|
9442
9540
|
validator: (value, context) => {
|
|
9443
9541
|
const digits = value.replace(/\D/g, "");
|
|
9444
9542
|
if (digits.length !== 12) return false;
|
|
9445
|
-
|
|
9543
|
+
const hasContext = /nintendo|switch|friend[- ]?code|gaming/i.test(context);
|
|
9544
|
+
return hasContext;
|
|
9446
9545
|
}
|
|
9447
9546
|
};
|
|
9448
9547
|
var BATTLETAG = {
|
|
@@ -9737,14 +9836,64 @@ var ccpaPreset = {
|
|
|
9737
9836
|
"USERNAME"
|
|
9738
9837
|
]
|
|
9739
9838
|
};
|
|
9839
|
+
var healthcarePreset = {
|
|
9840
|
+
includeNames: true,
|
|
9841
|
+
includeEmails: true,
|
|
9842
|
+
includePhones: true,
|
|
9843
|
+
includeAddresses: true,
|
|
9844
|
+
categories: ["personal", "contact", "healthcare", "insurance", "government"]
|
|
9845
|
+
};
|
|
9846
|
+
var healthcareResearchPreset = {
|
|
9847
|
+
includeNames: true,
|
|
9848
|
+
includeEmails: true,
|
|
9849
|
+
includePhones: true,
|
|
9850
|
+
includeAddresses: true,
|
|
9851
|
+
categories: ["personal", "contact", "healthcare", "insurance", "government"]
|
|
9852
|
+
};
|
|
9853
|
+
var financePreset = {
|
|
9854
|
+
includeNames: true,
|
|
9855
|
+
includeEmails: true,
|
|
9856
|
+
includePhones: true,
|
|
9857
|
+
includeAddresses: true,
|
|
9858
|
+
categories: ["personal", "contact", "financial", "government", "network"]
|
|
9859
|
+
};
|
|
9860
|
+
var educationPreset = {
|
|
9861
|
+
includeNames: true,
|
|
9862
|
+
includeEmails: true,
|
|
9863
|
+
includePhones: true,
|
|
9864
|
+
includeAddresses: true,
|
|
9865
|
+
categories: ["personal", "contact", "education", "government", "network"]
|
|
9866
|
+
};
|
|
9867
|
+
var transportLogisticsPreset = {
|
|
9868
|
+
includeNames: true,
|
|
9869
|
+
includeEmails: true,
|
|
9870
|
+
includePhones: true,
|
|
9871
|
+
includeAddresses: true,
|
|
9872
|
+
categories: ["personal", "contact", "transportation", "logistics", "vehicles", "network"]
|
|
9873
|
+
};
|
|
9740
9874
|
function getPreset(name) {
|
|
9741
|
-
|
|
9875
|
+
const presetName = name.toLowerCase();
|
|
9876
|
+
switch (presetName) {
|
|
9742
9877
|
case "gdpr":
|
|
9743
9878
|
return gdprPreset;
|
|
9744
9879
|
case "hipaa":
|
|
9745
9880
|
return hipaaPreset;
|
|
9746
9881
|
case "ccpa":
|
|
9747
9882
|
return ccpaPreset;
|
|
9883
|
+
case "healthcare":
|
|
9884
|
+
case "healthcare-provider":
|
|
9885
|
+
return healthcarePreset;
|
|
9886
|
+
case "healthcare-research":
|
|
9887
|
+
return healthcareResearchPreset;
|
|
9888
|
+
case "finance":
|
|
9889
|
+
case "financial-services":
|
|
9890
|
+
return financePreset;
|
|
9891
|
+
case "education":
|
|
9892
|
+
return educationPreset;
|
|
9893
|
+
case "transport-logistics":
|
|
9894
|
+
case "transportation":
|
|
9895
|
+
case "logistics":
|
|
9896
|
+
return transportLogisticsPreset;
|
|
9748
9897
|
default:
|
|
9749
9898
|
return {};
|
|
9750
9899
|
}
|
|
@@ -10267,9 +10416,23 @@ var ConfigLoader = class {
|
|
|
10267
10416
|
};
|
|
10268
10417
|
}
|
|
10269
10418
|
if (preset.startsWith("openredaction:")) {
|
|
10270
|
-
const
|
|
10271
|
-
|
|
10272
|
-
|
|
10419
|
+
const presetName = preset.replace("openredaction:", "");
|
|
10420
|
+
const supportedPresets = [
|
|
10421
|
+
"gdpr",
|
|
10422
|
+
"hipaa",
|
|
10423
|
+
"ccpa",
|
|
10424
|
+
"healthcare",
|
|
10425
|
+
"healthcare-provider",
|
|
10426
|
+
"healthcare-research",
|
|
10427
|
+
"finance",
|
|
10428
|
+
"financial-services",
|
|
10429
|
+
"education",
|
|
10430
|
+
"transport-logistics",
|
|
10431
|
+
"transportation",
|
|
10432
|
+
"logistics"
|
|
10433
|
+
];
|
|
10434
|
+
if (supportedPresets.includes(presetName)) {
|
|
10435
|
+
return { preset: presetName };
|
|
10273
10436
|
}
|
|
10274
10437
|
}
|
|
10275
10438
|
return null;
|
|
@@ -10285,7 +10448,8 @@ var ConfigLoader = class {
|
|
|
10285
10448
|
export default {
|
|
10286
10449
|
// Extend built-in presets
|
|
10287
10450
|
// Options: 'openredaction:recommended', 'openredaction:strict', 'openredaction:minimal'
|
|
10288
|
-
// Or compliance: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa'
|
|
10451
|
+
// Or compliance/industry presets: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa',
|
|
10452
|
+
// 'openredaction:finance', 'openredaction:education', 'openredaction:healthcare', 'openredaction:transport-logistics'
|
|
10289
10453
|
extends: ['openredaction:recommended'],
|
|
10290
10454
|
|
|
10291
10455
|
// Detection options
|
|
@@ -11962,9 +12126,8 @@ var ExplainAPI = class {
|
|
|
11962
12126
|
constructor(detector) {
|
|
11963
12127
|
this.detector = detector;
|
|
11964
12128
|
this.patterns = detector.getPatterns();
|
|
11965
|
-
const testResult = detector.detect("Contact: admin@business.co.uk");
|
|
11966
|
-
const hasConfidence = testResult.detections.length > 0 && testResult.detections[0].confidence !== void 0;
|
|
11967
12129
|
const detectorOptions = detector.options;
|
|
12130
|
+
const hasConfidence = detectorOptions?.enableContextAnalysis || false;
|
|
11968
12131
|
this.options = {
|
|
11969
12132
|
enableContextAnalysis: hasConfidence,
|
|
11970
12133
|
confidenceThreshold: detectorOptions?.confidenceThreshold || 0.5,
|
|
@@ -11976,7 +12139,7 @@ var ExplainAPI = class {
|
|
|
11976
12139
|
/**
|
|
11977
12140
|
* Explain why text was or wasn't detected as PII
|
|
11978
12141
|
*/
|
|
11979
|
-
explain(text) {
|
|
12142
|
+
async explain(text) {
|
|
11980
12143
|
const patternResults = [];
|
|
11981
12144
|
const matchedPatterns = [];
|
|
11982
12145
|
const unmatchedPatterns = [];
|
|
@@ -12066,7 +12229,8 @@ var ExplainAPI = class {
|
|
|
12066
12229
|
patternResults.push(result);
|
|
12067
12230
|
matchedPatterns.push(result);
|
|
12068
12231
|
}
|
|
12069
|
-
const
|
|
12232
|
+
const detectionResult = await this.detector.detect(text);
|
|
12233
|
+
const detections = detectionResult.detections;
|
|
12070
12234
|
return {
|
|
12071
12235
|
text,
|
|
12072
12236
|
patternResults,
|
|
@@ -12085,7 +12249,7 @@ var ExplainAPI = class {
|
|
|
12085
12249
|
/**
|
|
12086
12250
|
* Explain a specific detection
|
|
12087
12251
|
*/
|
|
12088
|
-
explainDetection(detection, text) {
|
|
12252
|
+
async explainDetection(detection, text) {
|
|
12089
12253
|
const pattern = this.patterns.find((p) => p.type === detection.type);
|
|
12090
12254
|
const reasoning = [];
|
|
12091
12255
|
reasoning.push(`Detected as ${detection.type}`);
|
|
@@ -12116,13 +12280,15 @@ var ExplainAPI = class {
|
|
|
12116
12280
|
detection,
|
|
12117
12281
|
pattern,
|
|
12118
12282
|
contextAnalysis,
|
|
12119
|
-
reasoning
|
|
12283
|
+
reasoning,
|
|
12284
|
+
suggestions: []
|
|
12285
|
+
// Will be populated if needed
|
|
12120
12286
|
};
|
|
12121
12287
|
}
|
|
12122
12288
|
/**
|
|
12123
12289
|
* Suggest why text wasn't detected
|
|
12124
12290
|
*/
|
|
12125
|
-
suggestWhy(text, expectedType) {
|
|
12291
|
+
async suggestWhy(text, expectedType) {
|
|
12126
12292
|
const suggestions = [];
|
|
12127
12293
|
const similarPatterns = [];
|
|
12128
12294
|
const typePatterns = this.patterns.filter(
|
|
@@ -12140,7 +12306,7 @@ var ExplainAPI = class {
|
|
|
12140
12306
|
similarPatterns.push(pattern);
|
|
12141
12307
|
const value = match[1] !== void 0 ? match[1] : match[0];
|
|
12142
12308
|
suggestions.push(`Pattern "${pattern.type}" matched value: "${value}"`);
|
|
12143
|
-
const explanation = this.explain(text);
|
|
12309
|
+
const explanation = await this.explain(text);
|
|
12144
12310
|
const filtered = explanation.filteredPatterns.find((r) => r.pattern.type === pattern.type);
|
|
12145
12311
|
if (filtered && filtered.reason) {
|
|
12146
12312
|
suggestions.push(`But was filtered: ${filtered.reason}`);
|
|
@@ -12170,9 +12336,9 @@ Example ${expectedType} pattern: ${examplePattern.regex.source.substring(0, 100)
|
|
|
12170
12336
|
/**
|
|
12171
12337
|
* Get debugging information for entire detection process
|
|
12172
12338
|
*/
|
|
12173
|
-
debug(text) {
|
|
12339
|
+
async debug(text) {
|
|
12174
12340
|
const start = performance.now();
|
|
12175
|
-
const explanation = this.explain(text);
|
|
12341
|
+
const explanation = await this.explain(text);
|
|
12176
12342
|
const duration = performance.now() - start;
|
|
12177
12343
|
const enabledFeatures = [];
|
|
12178
12344
|
if (this.options.enableContextAnalysis) {
|
|
@@ -13060,6 +13226,152 @@ function compileSafeRegex(pattern, flags) {
|
|
|
13060
13226
|
return new RegExp(patternStr, finalFlags);
|
|
13061
13227
|
}
|
|
13062
13228
|
|
|
13229
|
+
// src/utils/ai-assist.ts
|
|
13230
|
+
function getAIEndpoint(aiOptions) {
|
|
13231
|
+
if (!aiOptions?.enabled) {
|
|
13232
|
+
return null;
|
|
13233
|
+
}
|
|
13234
|
+
if (aiOptions.endpoint) {
|
|
13235
|
+
return aiOptions.endpoint;
|
|
13236
|
+
}
|
|
13237
|
+
if (typeof process !== "undefined" && process.env) {
|
|
13238
|
+
const envEndpoint = process.env.OPENREDACTION_AI_ENDPOINT;
|
|
13239
|
+
if (envEndpoint) {
|
|
13240
|
+
return envEndpoint;
|
|
13241
|
+
}
|
|
13242
|
+
}
|
|
13243
|
+
return null;
|
|
13244
|
+
}
|
|
13245
|
+
function isFetchAvailable() {
|
|
13246
|
+
return typeof fetch !== "undefined";
|
|
13247
|
+
}
|
|
13248
|
+
async function callAIDetect(text, endpoint, debug) {
|
|
13249
|
+
if (!isFetchAvailable()) {
|
|
13250
|
+
if (debug) {
|
|
13251
|
+
console.warn("[OpenRedaction] AI assist requires fetch API. Not available in this environment.");
|
|
13252
|
+
}
|
|
13253
|
+
return null;
|
|
13254
|
+
}
|
|
13255
|
+
try {
|
|
13256
|
+
const url = endpoint.endsWith("/ai-detect") ? endpoint : `${endpoint}/ai-detect`;
|
|
13257
|
+
if (debug) {
|
|
13258
|
+
console.log(`[OpenRedaction] Calling AI endpoint: ${url}`);
|
|
13259
|
+
}
|
|
13260
|
+
const response = await fetch(url, {
|
|
13261
|
+
method: "POST",
|
|
13262
|
+
headers: {
|
|
13263
|
+
"Content-Type": "application/json"
|
|
13264
|
+
},
|
|
13265
|
+
body: JSON.stringify({ text })
|
|
13266
|
+
});
|
|
13267
|
+
if (!response.ok) {
|
|
13268
|
+
if (debug) {
|
|
13269
|
+
const statusText = response.status === 429 ? "Rate limit exceeded (429)" : `${response.status}: ${response.statusText}`;
|
|
13270
|
+
console.warn(`[OpenRedaction] AI endpoint returned ${statusText}`);
|
|
13271
|
+
}
|
|
13272
|
+
return null;
|
|
13273
|
+
}
|
|
13274
|
+
const data = await response.json();
|
|
13275
|
+
if (!data.entities || !Array.isArray(data.entities)) {
|
|
13276
|
+
if (debug) {
|
|
13277
|
+
console.warn("[OpenRedaction] Invalid AI response format: missing entities array");
|
|
13278
|
+
}
|
|
13279
|
+
return null;
|
|
13280
|
+
}
|
|
13281
|
+
return data.entities;
|
|
13282
|
+
} catch (error) {
|
|
13283
|
+
if (debug) {
|
|
13284
|
+
console.warn(`[OpenRedaction] AI endpoint error: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
13285
|
+
}
|
|
13286
|
+
return null;
|
|
13287
|
+
}
|
|
13288
|
+
}
|
|
13289
|
+
function validateAIEntity(entity, textLength) {
|
|
13290
|
+
if (!entity.type || !entity.value || typeof entity.start !== "number" || typeof entity.end !== "number") {
|
|
13291
|
+
return false;
|
|
13292
|
+
}
|
|
13293
|
+
if (entity.start < 0 || entity.end < 0 || entity.start >= entity.end) {
|
|
13294
|
+
return false;
|
|
13295
|
+
}
|
|
13296
|
+
if (entity.start >= textLength || entity.end > textLength) {
|
|
13297
|
+
return false;
|
|
13298
|
+
}
|
|
13299
|
+
const actualValue = entity.value;
|
|
13300
|
+
if (actualValue.length !== entity.end - entity.start) {
|
|
13301
|
+
return false;
|
|
13302
|
+
}
|
|
13303
|
+
return true;
|
|
13304
|
+
}
|
|
13305
|
+
function detectionsOverlap(det1, det2) {
|
|
13306
|
+
const [start1, end1] = det1.position;
|
|
13307
|
+
const [start2, end2] = det2.position;
|
|
13308
|
+
const overlapStart = Math.max(start1, start2);
|
|
13309
|
+
const overlapEnd = Math.min(end1, end2);
|
|
13310
|
+
if (overlapStart >= overlapEnd) {
|
|
13311
|
+
return false;
|
|
13312
|
+
}
|
|
13313
|
+
const overlapLength = overlapEnd - overlapStart;
|
|
13314
|
+
const length1 = end1 - start1;
|
|
13315
|
+
const length2 = end2 - start2;
|
|
13316
|
+
const minLength = Math.min(length1, length2);
|
|
13317
|
+
return overlapLength > minLength * 0.5;
|
|
13318
|
+
}
|
|
13319
|
+
function convertAIEntityToDetection(entity, text) {
|
|
13320
|
+
if (!validateAIEntity(entity, text.length)) {
|
|
13321
|
+
return null;
|
|
13322
|
+
}
|
|
13323
|
+
const actualValue = text.substring(entity.start, entity.end);
|
|
13324
|
+
let type = entity.type.toUpperCase();
|
|
13325
|
+
if (type.includes("EMAIL") || type === "EMAIL_ADDRESS") {
|
|
13326
|
+
type = "EMAIL";
|
|
13327
|
+
} else if (type.includes("PHONE") || type === "PHONE_NUMBER") {
|
|
13328
|
+
type = "PHONE_US";
|
|
13329
|
+
} else if (type.includes("NAME") || type === "PERSON") {
|
|
13330
|
+
type = "NAME";
|
|
13331
|
+
} else if (type.includes("SSN") || type === "SOCIAL_SECURITY_NUMBER") {
|
|
13332
|
+
type = "SSN";
|
|
13333
|
+
} else if (type.includes("ADDRESS")) {
|
|
13334
|
+
type = "ADDRESS_STREET";
|
|
13335
|
+
}
|
|
13336
|
+
let severity = "medium";
|
|
13337
|
+
if (type === "SSN" || type === "CREDIT_CARD") {
|
|
13338
|
+
severity = "critical";
|
|
13339
|
+
} else if (type === "EMAIL" || type === "PHONE_US" || type === "NAME") {
|
|
13340
|
+
severity = "high";
|
|
13341
|
+
}
|
|
13342
|
+
return {
|
|
13343
|
+
type,
|
|
13344
|
+
value: actualValue,
|
|
13345
|
+
placeholder: `[${type}_${Math.random().toString(36).substring(2, 9)}]`,
|
|
13346
|
+
position: [entity.start, entity.end],
|
|
13347
|
+
severity,
|
|
13348
|
+
confidence: entity.confidence ?? 0.7
|
|
13349
|
+
// Default confidence for AI entities
|
|
13350
|
+
};
|
|
13351
|
+
}
|
|
13352
|
+
function mergeAIEntities(regexDetections, aiEntities, text) {
|
|
13353
|
+
const merged = [...regexDetections];
|
|
13354
|
+
const processedRanges = regexDetections.map((d) => d.position);
|
|
13355
|
+
for (const aiEntity of aiEntities) {
|
|
13356
|
+
const detection = convertAIEntityToDetection(aiEntity, text);
|
|
13357
|
+
if (!detection) {
|
|
13358
|
+
continue;
|
|
13359
|
+
}
|
|
13360
|
+
let hasOverlap = false;
|
|
13361
|
+
for (const regexDet of regexDetections) {
|
|
13362
|
+
if (detectionsOverlap(regexDet, detection)) {
|
|
13363
|
+
hasOverlap = true;
|
|
13364
|
+
break;
|
|
13365
|
+
}
|
|
13366
|
+
}
|
|
13367
|
+
if (!hasOverlap) {
|
|
13368
|
+
merged.push(detection);
|
|
13369
|
+
processedRanges.push(detection.position);
|
|
13370
|
+
}
|
|
13371
|
+
}
|
|
13372
|
+
return merged;
|
|
13373
|
+
}
|
|
13374
|
+
|
|
13063
13375
|
// src/detector.ts
|
|
13064
13376
|
var OpenRedaction = class _OpenRedaction {
|
|
13065
13377
|
constructor(options = {}) {
|
|
@@ -13269,6 +13581,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13269
13581
|
for (const pattern of this.patterns) {
|
|
13270
13582
|
const regex = new RegExp(pattern.regex.source, pattern.regex.flags);
|
|
13271
13583
|
this.compiledPatterns.set(pattern, regex);
|
|
13584
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
13585
|
+
console.log(`[OpenRedaction] Compiled pattern '${pattern.type}': ${regex}`);
|
|
13586
|
+
}
|
|
13272
13587
|
}
|
|
13273
13588
|
if (this.options.debug) {
|
|
13274
13589
|
console.log(`[OpenRedaction] Pre-compiled ${this.compiledPatterns.size} regex patterns`);
|
|
@@ -13288,12 +13603,18 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13288
13603
|
}
|
|
13289
13604
|
continue;
|
|
13290
13605
|
}
|
|
13606
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
13607
|
+
console.log(`[OpenRedaction] Processing pattern '${pattern.type}' with regex: ${regex}`);
|
|
13608
|
+
}
|
|
13291
13609
|
let match;
|
|
13292
13610
|
let matchCount = 0;
|
|
13293
13611
|
const maxMatches = 1e4;
|
|
13294
13612
|
regex.lastIndex = 0;
|
|
13295
13613
|
try {
|
|
13296
13614
|
while ((match = safeExec(regex, text, { timeout: this.options.regexTimeout })) !== null) {
|
|
13615
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
13616
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' regex match found: '${match[0]}' at position ${match.index}`);
|
|
13617
|
+
}
|
|
13297
13618
|
matchCount++;
|
|
13298
13619
|
if (matchCount >= maxMatches) {
|
|
13299
13620
|
if (this.options.debug) {
|
|
@@ -13314,12 +13635,18 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13314
13635
|
endPos = startPos + value.length;
|
|
13315
13636
|
}
|
|
13316
13637
|
if (this.overlapsWithExisting(startPos, endPos, processedRanges)) {
|
|
13638
|
+
if (this.options.debug) {
|
|
13639
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' skipped due to overlap at ${startPos}-${endPos}`);
|
|
13640
|
+
}
|
|
13317
13641
|
continue;
|
|
13318
13642
|
}
|
|
13319
13643
|
const contextStart = Math.max(0, startPos - 50);
|
|
13320
13644
|
const contextEnd = Math.min(text.length, endPos + 50);
|
|
13321
13645
|
const context = text.substring(contextStart, contextEnd);
|
|
13322
13646
|
if (pattern.validator && !pattern.validator(value, context)) {
|
|
13647
|
+
if (this.options.debug) {
|
|
13648
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' validation failed for value: '${value}' with context: '${context.substring(0, 100)}...'`);
|
|
13649
|
+
}
|
|
13323
13650
|
continue;
|
|
13324
13651
|
}
|
|
13325
13652
|
if (this.options.enableFalsePositiveFilter) {
|
|
@@ -13338,6 +13665,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13338
13665
|
endPos
|
|
13339
13666
|
);
|
|
13340
13667
|
confidence = contextAnalysis.confidence;
|
|
13668
|
+
if (this.options.debug && confidence < this.options.confidenceThreshold) {
|
|
13669
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' failed context analysis. Value: '${value}', Confidence: ${confidence} < ${this.options.confidenceThreshold}`);
|
|
13670
|
+
}
|
|
13341
13671
|
}
|
|
13342
13672
|
if (this.contextRulesEngine) {
|
|
13343
13673
|
const piiMatch = {
|
|
@@ -13363,6 +13693,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13363
13693
|
continue;
|
|
13364
13694
|
}
|
|
13365
13695
|
const placeholder = this.generatePlaceholder(value, pattern);
|
|
13696
|
+
if (this.options.debug) {
|
|
13697
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' detected: '${value}' at position ${startPos}-${endPos}, confidence: ${confidence}`);
|
|
13698
|
+
}
|
|
13366
13699
|
detections.push({
|
|
13367
13700
|
type: pattern.type,
|
|
13368
13701
|
value,
|
|
@@ -13423,8 +13756,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13423
13756
|
}
|
|
13424
13757
|
/**
|
|
13425
13758
|
* Detect PII in text
|
|
13759
|
+
* Now async to support optional AI assist
|
|
13426
13760
|
*/
|
|
13427
|
-
detect(text) {
|
|
13761
|
+
async detect(text) {
|
|
13428
13762
|
if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) {
|
|
13429
13763
|
throw new Error("[OpenRedaction] Permission denied: detection:detect required");
|
|
13430
13764
|
}
|
|
@@ -13478,12 +13812,42 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13478
13812
|
} else {
|
|
13479
13813
|
detections = this.processPatterns(text, this.patterns, processedRanges);
|
|
13480
13814
|
}
|
|
13815
|
+
if (this.options.ai?.enabled) {
|
|
13816
|
+
const aiEndpoint = getAIEndpoint(this.options.ai);
|
|
13817
|
+
if (aiEndpoint) {
|
|
13818
|
+
try {
|
|
13819
|
+
if (this.options.debug) {
|
|
13820
|
+
console.log("[OpenRedaction] AI assist enabled, calling AI endpoint...");
|
|
13821
|
+
}
|
|
13822
|
+
const aiEntities = await callAIDetect(text, aiEndpoint, this.options.debug);
|
|
13823
|
+
if (aiEntities && aiEntities.length > 0) {
|
|
13824
|
+
if (this.options.debug) {
|
|
13825
|
+
console.log(`[OpenRedaction] AI returned ${aiEntities.length} additional entities`);
|
|
13826
|
+
}
|
|
13827
|
+
detections = mergeAIEntities(detections, aiEntities, text);
|
|
13828
|
+
if (this.options.debug) {
|
|
13829
|
+
console.log(`[OpenRedaction] After AI merge: ${detections.length} total detections`);
|
|
13830
|
+
}
|
|
13831
|
+
} else if (this.options.debug) {
|
|
13832
|
+
console.log("[OpenRedaction] AI endpoint returned no additional entities");
|
|
13833
|
+
}
|
|
13834
|
+
} catch (error) {
|
|
13835
|
+
if (this.options.debug) {
|
|
13836
|
+
console.warn(`[OpenRedaction] AI assist failed, using regex-only: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
13837
|
+
}
|
|
13838
|
+
}
|
|
13839
|
+
} else if (this.options.debug) {
|
|
13840
|
+
console.warn("[OpenRedaction] AI assist enabled but no endpoint configured. Set ai.endpoint or OPENREDACTION_AI_ENDPOINT env var.");
|
|
13841
|
+
}
|
|
13842
|
+
}
|
|
13481
13843
|
detections.sort((a, b) => b.position[0] - a.position[0]);
|
|
13482
13844
|
let redacted = text;
|
|
13483
13845
|
const redactionMap = {};
|
|
13484
13846
|
for (const detection of detections) {
|
|
13485
|
-
|
|
13486
|
-
|
|
13847
|
+
if (!detection.value) continue;
|
|
13848
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
13849
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
13850
|
+
redacted = redacted.replace(pattern, detection.placeholder);
|
|
13487
13851
|
redactionMap[detection.placeholder] = detection.value;
|
|
13488
13852
|
}
|
|
13489
13853
|
const endTime = performance.now();
|
|
@@ -13636,8 +14000,8 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13636
14000
|
/**
|
|
13637
14001
|
* Get severity-based scan results
|
|
13638
14002
|
*/
|
|
13639
|
-
scan(text) {
|
|
13640
|
-
const result = this.detect(text);
|
|
14003
|
+
async scan(text) {
|
|
14004
|
+
const result = await this.detect(text);
|
|
13641
14005
|
return {
|
|
13642
14006
|
high: result.detections.filter((d) => d.severity === "high"),
|
|
13643
14007
|
medium: result.detections.filter((d) => d.severity === "medium"),
|
|
@@ -13845,7 +14209,7 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13845
14209
|
* Run health check
|
|
13846
14210
|
*/
|
|
13847
14211
|
async healthCheck(options) {
|
|
13848
|
-
const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-
|
|
14212
|
+
const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-77BBYEEO.mjs");
|
|
13849
14213
|
const checker = new HealthChecker2(this);
|
|
13850
14214
|
return checker.check(options);
|
|
13851
14215
|
}
|
|
@@ -13853,7 +14217,7 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13853
14217
|
* Quick health check (minimal overhead)
|
|
13854
14218
|
*/
|
|
13855
14219
|
async quickHealthCheck() {
|
|
13856
|
-
const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-
|
|
14220
|
+
const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-77BBYEEO.mjs");
|
|
13857
14221
|
const checker = new HealthChecker2(this);
|
|
13858
14222
|
return checker.quickCheck();
|
|
13859
14223
|
}
|
|
@@ -13867,14 +14231,14 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13867
14231
|
if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) {
|
|
13868
14232
|
throw new Error("[OpenRedaction] Permission denied: detection:detect required");
|
|
13869
14233
|
}
|
|
13870
|
-
const { createDocumentProcessor: createDocumentProcessor2 } = await import("./document-
|
|
14234
|
+
const { createDocumentProcessor: createDocumentProcessor2 } = await import("./document-NNFKTUEV.mjs");
|
|
13871
14235
|
const processor = createDocumentProcessor2();
|
|
13872
14236
|
const extractionStart = performance.now();
|
|
13873
14237
|
const text = await processor.extractText(buffer, options);
|
|
13874
14238
|
const metadata = await processor.getMetadata(buffer, options);
|
|
13875
14239
|
const extractionEnd = performance.now();
|
|
13876
14240
|
const extractionTime = Math.round((extractionEnd - extractionStart) * 100) / 100;
|
|
13877
|
-
const detection = this.detect(text);
|
|
14241
|
+
const detection = await this.detect(text);
|
|
13878
14242
|
return {
|
|
13879
14243
|
text,
|
|
13880
14244
|
metadata,
|
|
@@ -13968,7 +14332,7 @@ var StreamingDetector = class {
|
|
|
13968
14332
|
const end = Math.min(textLength, position + chunkSize);
|
|
13969
14333
|
const chunk = text.substring(start, end);
|
|
13970
14334
|
const byteOffset = start;
|
|
13971
|
-
const result = this.detector.detect(chunk);
|
|
14335
|
+
const result = await this.detector.detect(chunk);
|
|
13972
14336
|
const newDetections = result.detections.filter((detection) => {
|
|
13973
14337
|
const absoluteStart = byteOffset + detection.position[0];
|
|
13974
14338
|
const absoluteEnd = byteOffset + detection.position[1];
|
|
@@ -13998,8 +14362,10 @@ var StreamingDetector = class {
|
|
|
13998
14362
|
(a, b) => b.position[0] - a.position[0]
|
|
13999
14363
|
);
|
|
14000
14364
|
for (const detection of sortedDetections) {
|
|
14001
|
-
|
|
14002
|
-
|
|
14365
|
+
if (!detection.value) continue;
|
|
14366
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
14367
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
14368
|
+
redactedChunk = redactedChunk.replace(pattern, detection.placeholder);
|
|
14003
14369
|
}
|
|
14004
14370
|
}
|
|
14005
14371
|
yield {
|
|
@@ -14025,8 +14391,10 @@ var StreamingDetector = class {
|
|
|
14025
14391
|
allDetections.sort((a, b) => b.position[0] - a.position[0]);
|
|
14026
14392
|
const redactionMap = {};
|
|
14027
14393
|
for (const detection of allDetections) {
|
|
14028
|
-
|
|
14029
|
-
|
|
14394
|
+
if (!detection.value) continue;
|
|
14395
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
14396
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
14397
|
+
redactedText = redactedText.replace(pattern, detection.placeholder);
|
|
14030
14398
|
redactionMap[detection.placeholder] = detection.value;
|
|
14031
14399
|
}
|
|
14032
14400
|
return {
|
|
@@ -14101,6 +14469,9 @@ var StreamingDetector = class {
|
|
|
14101
14469
|
estimatedMemory
|
|
14102
14470
|
};
|
|
14103
14471
|
}
|
|
14472
|
+
escapeRegex(str) {
|
|
14473
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
14474
|
+
}
|
|
14104
14475
|
};
|
|
14105
14476
|
function createStreamingDetector(detector, options) {
|
|
14106
14477
|
return new StreamingDetector(detector, options);
|
|
@@ -14114,11 +14485,11 @@ var BatchProcessor = class {
|
|
|
14114
14485
|
/**
|
|
14115
14486
|
* Process multiple documents sequentially
|
|
14116
14487
|
*/
|
|
14117
|
-
processSequential(documents, options = {}) {
|
|
14488
|
+
async processSequential(documents, options = {}) {
|
|
14118
14489
|
const startTime = performance.now();
|
|
14119
14490
|
const results = [];
|
|
14120
14491
|
for (let i = 0; i < documents.length; i++) {
|
|
14121
|
-
const result = this.detector.detect(documents[i]);
|
|
14492
|
+
const result = await this.detector.detect(documents[i]);
|
|
14122
14493
|
results.push(result);
|
|
14123
14494
|
if (options.onProgress) {
|
|
14124
14495
|
options.onProgress(i + 1, documents.length);
|
|
@@ -14132,7 +14503,7 @@ var BatchProcessor = class {
|
|
|
14132
14503
|
totalDocuments: documents.length,
|
|
14133
14504
|
totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
|
|
14134
14505
|
totalTime,
|
|
14135
|
-
avgTimePerDocument: totalTime / documents.length
|
|
14506
|
+
avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
|
|
14136
14507
|
}
|
|
14137
14508
|
};
|
|
14138
14509
|
}
|
|
@@ -14146,16 +14517,14 @@ var BatchProcessor = class {
|
|
|
14146
14517
|
let completed = 0;
|
|
14147
14518
|
for (let i = 0; i < documents.length; i += maxConcurrency) {
|
|
14148
14519
|
const batch = documents.slice(i, i + maxConcurrency);
|
|
14149
|
-
const batchPromises = batch.map((doc, batchIndex) => {
|
|
14150
|
-
|
|
14151
|
-
|
|
14152
|
-
|
|
14153
|
-
|
|
14154
|
-
|
|
14155
|
-
|
|
14156
|
-
|
|
14157
|
-
return result;
|
|
14158
|
-
});
|
|
14520
|
+
const batchPromises = batch.map(async (doc, batchIndex) => {
|
|
14521
|
+
const result = await this.detector.detect(doc);
|
|
14522
|
+
results[i + batchIndex] = result;
|
|
14523
|
+
completed++;
|
|
14524
|
+
if (options.onProgress) {
|
|
14525
|
+
options.onProgress(completed, documents.length);
|
|
14526
|
+
}
|
|
14527
|
+
return result;
|
|
14159
14528
|
});
|
|
14160
14529
|
await Promise.all(batchPromises);
|
|
14161
14530
|
}
|
|
@@ -14167,7 +14536,7 @@ var BatchProcessor = class {
|
|
|
14167
14536
|
totalDocuments: documents.length,
|
|
14168
14537
|
totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
|
|
14169
14538
|
totalTime,
|
|
14170
|
-
avgTimePerDocument: totalTime / documents.length
|
|
14539
|
+
avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
|
|
14171
14540
|
}
|
|
14172
14541
|
};
|
|
14173
14542
|
}
|
|
@@ -14178,7 +14547,7 @@ var BatchProcessor = class {
|
|
|
14178
14547
|
if (options.parallel) {
|
|
14179
14548
|
return this.processParallel(documents, options);
|
|
14180
14549
|
} else {
|
|
14181
|
-
return
|
|
14550
|
+
return this.processSequential(documents, options);
|
|
14182
14551
|
}
|
|
14183
14552
|
}
|
|
14184
14553
|
/**
|
|
@@ -14189,7 +14558,7 @@ var BatchProcessor = class {
|
|
|
14189
14558
|
for (let i = 0; i < documents.length; i += batchSize) {
|
|
14190
14559
|
const batch = documents.slice(i, i + batchSize);
|
|
14191
14560
|
for (const doc of batch) {
|
|
14192
|
-
const result = this.detector.detect(doc);
|
|
14561
|
+
const result = await this.detector.detect(doc);
|
|
14193
14562
|
yield result;
|
|
14194
14563
|
}
|
|
14195
14564
|
}
|
|
@@ -14237,7 +14606,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
14237
14606
|
...detectorOptions
|
|
14238
14607
|
} = options;
|
|
14239
14608
|
const detector = new OpenRedaction(detectorOptions);
|
|
14240
|
-
return (req, res, next) => {
|
|
14609
|
+
return async (req, res, next) => {
|
|
14241
14610
|
if (skipRoutes.some((pattern) => pattern.test(req.path))) {
|
|
14242
14611
|
return next();
|
|
14243
14612
|
}
|
|
@@ -14257,7 +14626,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
14257
14626
|
const results = {};
|
|
14258
14627
|
const redactedBody = { ...req.body };
|
|
14259
14628
|
for (const { field, value } of textsToCheck) {
|
|
14260
|
-
const result = detector.detect(value);
|
|
14629
|
+
const result = await detector.detect(value);
|
|
14261
14630
|
if (result.detections.length > 0) {
|
|
14262
14631
|
totalDetections += result.detections.length;
|
|
14263
14632
|
results[field] = result;
|
|
@@ -14307,7 +14676,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
14307
14676
|
}
|
|
14308
14677
|
function detectPII(options = {}) {
|
|
14309
14678
|
const detector = new OpenRedaction(options);
|
|
14310
|
-
return (req, res) => {
|
|
14679
|
+
return async (req, res) => {
|
|
14311
14680
|
const text = req.body?.text || req.query.text;
|
|
14312
14681
|
if (!text) {
|
|
14313
14682
|
res.status(400).json({
|
|
@@ -14316,19 +14685,26 @@ function detectPII(options = {}) {
|
|
|
14316
14685
|
});
|
|
14317
14686
|
return;
|
|
14318
14687
|
}
|
|
14319
|
-
|
|
14320
|
-
|
|
14321
|
-
|
|
14322
|
-
|
|
14323
|
-
|
|
14324
|
-
|
|
14325
|
-
|
|
14326
|
-
|
|
14688
|
+
try {
|
|
14689
|
+
const result = await detector.detect(text);
|
|
14690
|
+
res.json({
|
|
14691
|
+
detected: result.detections.length > 0,
|
|
14692
|
+
count: result.detections.length,
|
|
14693
|
+
detections: result.detections,
|
|
14694
|
+
redacted: result.redacted,
|
|
14695
|
+
stats: result.stats
|
|
14696
|
+
});
|
|
14697
|
+
} catch (error) {
|
|
14698
|
+
res.status(500).json({
|
|
14699
|
+
error: "Detection failed",
|
|
14700
|
+
message: error instanceof Error ? error.message : "Unknown error"
|
|
14701
|
+
});
|
|
14702
|
+
}
|
|
14327
14703
|
};
|
|
14328
14704
|
}
|
|
14329
14705
|
function generateReport(options = {}) {
|
|
14330
14706
|
const detector = new OpenRedaction(options);
|
|
14331
|
-
return (req, res) => {
|
|
14707
|
+
return async (req, res) => {
|
|
14332
14708
|
const text = req.body?.text;
|
|
14333
14709
|
const format = req.body?.format || req.query.format || "json";
|
|
14334
14710
|
if (!text) {
|
|
@@ -14337,28 +14713,35 @@ function generateReport(options = {}) {
|
|
|
14337
14713
|
});
|
|
14338
14714
|
return;
|
|
14339
14715
|
}
|
|
14340
|
-
|
|
14341
|
-
|
|
14342
|
-
|
|
14343
|
-
|
|
14344
|
-
|
|
14345
|
-
|
|
14346
|
-
|
|
14347
|
-
|
|
14348
|
-
|
|
14349
|
-
|
|
14350
|
-
|
|
14351
|
-
|
|
14352
|
-
|
|
14353
|
-
|
|
14354
|
-
|
|
14355
|
-
|
|
14356
|
-
|
|
14357
|
-
|
|
14358
|
-
|
|
14359
|
-
|
|
14360
|
-
|
|
14361
|
-
|
|
14716
|
+
try {
|
|
14717
|
+
const result = await detector.detect(text);
|
|
14718
|
+
if (format === "html") {
|
|
14719
|
+
const html = detector.generateReport(result, {
|
|
14720
|
+
format: "html",
|
|
14721
|
+
title: req.body?.title || "PII Detection Report"
|
|
14722
|
+
});
|
|
14723
|
+
res.setHeader("Content-Type", "text/html");
|
|
14724
|
+
res.send(html);
|
|
14725
|
+
} else if (format === "markdown") {
|
|
14726
|
+
const md = detector.generateReport(result, {
|
|
14727
|
+
format: "markdown",
|
|
14728
|
+
title: req.body?.title || "PII Detection Report"
|
|
14729
|
+
});
|
|
14730
|
+
res.setHeader("Content-Type", "text/markdown");
|
|
14731
|
+
res.send(md);
|
|
14732
|
+
} else {
|
|
14733
|
+
res.json({
|
|
14734
|
+
detected: result.detections.length > 0,
|
|
14735
|
+
count: result.detections.length,
|
|
14736
|
+
detections: result.detections,
|
|
14737
|
+
redacted: result.redacted,
|
|
14738
|
+
stats: result.stats
|
|
14739
|
+
});
|
|
14740
|
+
}
|
|
14741
|
+
} catch (error) {
|
|
14742
|
+
res.status(500).json({
|
|
14743
|
+
error: "Report generation failed",
|
|
14744
|
+
message: error instanceof Error ? error.message : "Unknown error"
|
|
14362
14745
|
});
|
|
14363
14746
|
}
|
|
14364
14747
|
};
|
|
@@ -14370,12 +14753,17 @@ function useOpenRedaction(options) {
|
|
|
14370
14753
|
const detector = useMemo(() => new OpenRedaction(options), [options]);
|
|
14371
14754
|
const [result, setResult] = useState(null);
|
|
14372
14755
|
const [isDetecting, setIsDetecting] = useState(false);
|
|
14373
|
-
const detect = useCallback((text) => {
|
|
14756
|
+
const detect = useCallback(async (text) => {
|
|
14374
14757
|
setIsDetecting(true);
|
|
14375
|
-
|
|
14376
|
-
|
|
14377
|
-
|
|
14378
|
-
|
|
14758
|
+
try {
|
|
14759
|
+
const detection = await detector.detect(text);
|
|
14760
|
+
setResult(detection);
|
|
14761
|
+
setIsDetecting(false);
|
|
14762
|
+
return detection;
|
|
14763
|
+
} catch (error) {
|
|
14764
|
+
setIsDetecting(false);
|
|
14765
|
+
throw error;
|
|
14766
|
+
}
|
|
14379
14767
|
}, [detector]);
|
|
14380
14768
|
const clear = useCallback(() => {
|
|
14381
14769
|
setResult(null);
|
|
@@ -14401,10 +14789,14 @@ function usePIIDetector(text, options) {
|
|
|
14401
14789
|
return;
|
|
14402
14790
|
}
|
|
14403
14791
|
setIsDetecting(true);
|
|
14404
|
-
const timer = setTimeout(() => {
|
|
14405
|
-
|
|
14406
|
-
|
|
14407
|
-
|
|
14792
|
+
const timer = setTimeout(async () => {
|
|
14793
|
+
try {
|
|
14794
|
+
const detection = await detector.detect(text);
|
|
14795
|
+
setResult(detection);
|
|
14796
|
+
setIsDetecting(false);
|
|
14797
|
+
} catch (error) {
|
|
14798
|
+
setIsDetecting(false);
|
|
14799
|
+
}
|
|
14408
14800
|
}, debounce);
|
|
14409
14801
|
return () => {
|
|
14410
14802
|
clearTimeout(timer);
|
|
@@ -14425,27 +14817,32 @@ function useFormFieldValidator(options) {
|
|
|
14425
14817
|
const [value, setValue] = useState("");
|
|
14426
14818
|
const [error, setError] = useState(null);
|
|
14427
14819
|
const [result, setResult] = useState(null);
|
|
14428
|
-
const validate = useCallback((inputValue) => {
|
|
14820
|
+
const validate = useCallback(async (inputValue) => {
|
|
14429
14821
|
setValue(inputValue);
|
|
14430
14822
|
if (!inputValue) {
|
|
14431
14823
|
setError(null);
|
|
14432
14824
|
setResult(null);
|
|
14433
14825
|
return true;
|
|
14434
14826
|
}
|
|
14435
|
-
|
|
14436
|
-
|
|
14437
|
-
|
|
14438
|
-
|
|
14439
|
-
if (
|
|
14440
|
-
|
|
14441
|
-
|
|
14442
|
-
|
|
14443
|
-
onPIIDetected
|
|
14827
|
+
try {
|
|
14828
|
+
const detection = await detector.detect(inputValue);
|
|
14829
|
+
setResult(detection);
|
|
14830
|
+
const relevantDetections = types.length > 0 ? detection.detections.filter((d) => types.includes(d.type)) : detection.detections;
|
|
14831
|
+
if (relevantDetections.length > 0) {
|
|
14832
|
+
if (failOnPII) {
|
|
14833
|
+
setError(`Sensitive information detected: ${relevantDetections[0].type}`);
|
|
14834
|
+
}
|
|
14835
|
+
if (onPIIDetected) {
|
|
14836
|
+
onPIIDetected(detection);
|
|
14837
|
+
}
|
|
14838
|
+
return false;
|
|
14444
14839
|
}
|
|
14840
|
+
setError(null);
|
|
14841
|
+
return true;
|
|
14842
|
+
} catch (error2) {
|
|
14843
|
+
setError("Validation failed");
|
|
14445
14844
|
return false;
|
|
14446
14845
|
}
|
|
14447
|
-
setError(null);
|
|
14448
|
-
return true;
|
|
14449
14846
|
}, [detector, failOnPII, types, onPIIDetected]);
|
|
14450
14847
|
const getFieldProps = useCallback(() => ({
|
|
14451
14848
|
value,
|
|
@@ -14472,7 +14869,7 @@ function useBatchDetector(options) {
|
|
|
14472
14869
|
setProgress(0);
|
|
14473
14870
|
const detections = [];
|
|
14474
14871
|
for (let i = 0; i < texts.length; i++) {
|
|
14475
|
-
const result = detector.detect(texts[i]);
|
|
14872
|
+
const result = await detector.detect(texts[i]);
|
|
14476
14873
|
detections.push(result);
|
|
14477
14874
|
setProgress((i + 1) / texts.length * 100);
|
|
14478
14875
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
@@ -14509,9 +14906,12 @@ function useAutoRedact(options) {
|
|
|
14509
14906
|
setResult(null);
|
|
14510
14907
|
return;
|
|
14511
14908
|
}
|
|
14512
|
-
const timer = setTimeout(() => {
|
|
14513
|
-
|
|
14514
|
-
|
|
14909
|
+
const timer = setTimeout(async () => {
|
|
14910
|
+
try {
|
|
14911
|
+
const detection = await detector.detect(text);
|
|
14912
|
+
setResult(detection);
|
|
14913
|
+
} catch (error) {
|
|
14914
|
+
}
|
|
14515
14915
|
}, debounce);
|
|
14516
14916
|
return () => clearTimeout(timer);
|
|
14517
14917
|
}, [text, detector, debounce]);
|
|
@@ -14640,7 +15040,7 @@ var TenantManager = class {
|
|
|
14640
15040
|
await this.checkQuotas(tenantId, text);
|
|
14641
15041
|
this.trackRequest(tenantId, text);
|
|
14642
15042
|
const detector = this.getDetector(tenantId);
|
|
14643
|
-
const result = detector.detect(text);
|
|
15043
|
+
const result = await detector.detect(text);
|
|
14644
15044
|
const usage = this.usage.get(tenantId);
|
|
14645
15045
|
usage.piiDetectedThisMonth += result.detections.length;
|
|
14646
15046
|
usage.lastRequestAt = /* @__PURE__ */ new Date();
|
|
@@ -14927,6 +15327,7 @@ var DEFAULT_TIER_QUOTAS = {
|
|
|
14927
15327
|
// src/webhooks/WebhookManager.ts
|
|
14928
15328
|
var WebhookManager = class {
|
|
14929
15329
|
// 1 minute
|
|
15330
|
+
// private readonly HALF_OPEN_MAX_REQUESTS = 1; // Reserved for future use
|
|
14930
15331
|
constructor(options) {
|
|
14931
15332
|
this.webhooks = /* @__PURE__ */ new Map();
|
|
14932
15333
|
this.deliveryHistory = [];
|
|
@@ -15198,9 +15599,9 @@ var WebhookManager = class {
|
|
|
15198
15599
|
*/
|
|
15199
15600
|
async makeHttpRequest(webhook, event) {
|
|
15200
15601
|
try {
|
|
15201
|
-
let
|
|
15602
|
+
let fetch2;
|
|
15202
15603
|
try {
|
|
15203
|
-
|
|
15604
|
+
fetch2 = globalThis.fetch;
|
|
15204
15605
|
} catch {
|
|
15205
15606
|
throw new Error("[WebhookManager] HTTP client not available. Requires Node 18+ with fetch support.");
|
|
15206
15607
|
}
|
|
@@ -15220,7 +15621,7 @@ var WebhookManager = class {
|
|
|
15220
15621
|
const controller = new AbortController();
|
|
15221
15622
|
const timeoutId = setTimeout(() => controller.abort(), webhook.timeout);
|
|
15222
15623
|
try {
|
|
15223
|
-
const response = await
|
|
15624
|
+
const response = await fetch2(webhook.url, {
|
|
15224
15625
|
method: "POST",
|
|
15225
15626
|
headers,
|
|
15226
15627
|
body: JSON.stringify(event),
|
|
@@ -15591,7 +15992,7 @@ var APIServer = class {
|
|
|
15591
15992
|
if (req.tenantId && this.config.tenantManager) {
|
|
15592
15993
|
result = await this.config.tenantManager.detect(req.tenantId, text);
|
|
15593
15994
|
} else if (this.detector) {
|
|
15594
|
-
result = this.detector.detect(text);
|
|
15995
|
+
result = await this.detector.detect(text);
|
|
15595
15996
|
} else {
|
|
15596
15997
|
throw new Error("No detector available");
|
|
15597
15998
|
}
|
|
@@ -15632,7 +16033,7 @@ var APIServer = class {
|
|
|
15632
16033
|
if (req.tenantId && this.config.tenantManager) {
|
|
15633
16034
|
result = await this.config.tenantManager.detect(req.tenantId, text);
|
|
15634
16035
|
} else if (this.detector) {
|
|
15635
|
-
result = this.detector.detect(text);
|
|
16036
|
+
result = await this.detector.detect(text);
|
|
15636
16037
|
} else {
|
|
15637
16038
|
throw new Error("No detector available");
|
|
15638
16039
|
}
|
|
@@ -16053,10 +16454,12 @@ export {
|
|
|
16053
16454
|
analyzeFullContext,
|
|
16054
16455
|
calculateContextConfidence,
|
|
16055
16456
|
calculateRisk,
|
|
16457
|
+
callAIDetect,
|
|
16056
16458
|
ccpaPreset,
|
|
16057
16459
|
commonFalsePositives,
|
|
16058
16460
|
compileSafeRegex,
|
|
16059
16461
|
contactPatterns,
|
|
16462
|
+
convertAIEntityToDetection,
|
|
16060
16463
|
createAPIServer,
|
|
16061
16464
|
createBatchProcessor,
|
|
16062
16465
|
createCacheDisabledError,
|
|
@@ -16091,12 +16494,16 @@ export {
|
|
|
16091
16494
|
createXlsxProcessor,
|
|
16092
16495
|
defaultPasses,
|
|
16093
16496
|
detectPII,
|
|
16497
|
+
detectionsOverlap,
|
|
16498
|
+
educationPreset,
|
|
16094
16499
|
exportForVersionControl,
|
|
16095
16500
|
extractContext,
|
|
16096
16501
|
filterFalsePositives,
|
|
16502
|
+
financePreset,
|
|
16097
16503
|
financialPatterns,
|
|
16098
16504
|
gdprPreset,
|
|
16099
16505
|
generateReport,
|
|
16506
|
+
getAIEndpoint,
|
|
16100
16507
|
getPatternsByCategory,
|
|
16101
16508
|
getPredefinedRole,
|
|
16102
16509
|
getPreset,
|
|
@@ -16104,21 +16511,26 @@ export {
|
|
|
16104
16511
|
governmentPatterns,
|
|
16105
16512
|
groupPatternsByPass,
|
|
16106
16513
|
healthCheckMiddleware,
|
|
16514
|
+
healthcarePreset,
|
|
16515
|
+
healthcareResearchPreset,
|
|
16107
16516
|
hipaaPreset,
|
|
16108
16517
|
inferDocumentType,
|
|
16109
16518
|
isFalsePositive,
|
|
16110
16519
|
isUnsafePattern,
|
|
16520
|
+
mergeAIEntities,
|
|
16111
16521
|
mergePassDetections,
|
|
16112
16522
|
networkPatterns,
|
|
16113
16523
|
openredactionMiddleware,
|
|
16114
16524
|
personalPatterns,
|
|
16115
16525
|
safeExec,
|
|
16116
16526
|
safeExecAll,
|
|
16527
|
+
transportLogisticsPreset,
|
|
16117
16528
|
useAutoRedact,
|
|
16118
16529
|
useBatchDetector,
|
|
16119
16530
|
useFormFieldValidator,
|
|
16120
16531
|
useOpenRedaction,
|
|
16121
16532
|
usePIIDetector,
|
|
16533
|
+
validateAIEntity,
|
|
16122
16534
|
validateEmail,
|
|
16123
16535
|
validateIBAN,
|
|
16124
16536
|
validateLuhn,
|