openredaction 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -1
- package/dist/index.d.ts +112 -22
- package/dist/index.js +1313 -289
- package/dist/index.mjs +1255 -246
- package/package.json +12 -27
- package/dist/HealthCheck-A5OD4ATR.mjs +0 -12
- package/dist/HealthCheck-A5OD4ATR.mjs.map +0 -1
- package/dist/chunk-7OGNW2MU.mjs +0 -1701
- package/dist/chunk-7OGNW2MU.mjs.map +0 -1
- package/dist/chunk-MYYLGNXS.mjs +0 -149
- package/dist/chunk-MYYLGNXS.mjs.map +0 -1
- package/dist/chunk-WMJKH4XE.mjs +0 -34
- package/dist/chunk-WMJKH4XE.mjs.map +0 -1
- package/dist/chunk-ZRHGDEPC.mjs +0 -297
- package/dist/chunk-ZRHGDEPC.mjs.map +0 -1
- package/dist/cli/test-pattern.js +0 -430
- package/dist/document-AOMZP7UR.mjs +0 -26
- package/dist/document-AOMZP7UR.mjs.map +0 -1
- package/dist/index.cli.js +0 -15093
- package/dist/index.d.mts +0 -4111
- package/dist/index.js.map +0 -1
- package/dist/index.mjs.map +0 -1
- package/dist/workers-RMN5POM6.mjs +0 -10
- package/dist/workers-RMN5POM6.mjs.map +0 -1
package/dist/index.mjs
CHANGED
|
@@ -2,7 +2,7 @@ import {
|
|
|
2
2
|
HealthChecker,
|
|
3
3
|
createHealthChecker,
|
|
4
4
|
healthCheckMiddleware
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-CXYSQPG6.mjs";
|
|
6
6
|
import {
|
|
7
7
|
CsvProcessor,
|
|
8
8
|
DocumentProcessor,
|
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
createJsonProcessor,
|
|
15
15
|
createOCRProcessor,
|
|
16
16
|
createXlsxProcessor
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-XG7MSXCJ.mjs";
|
|
18
18
|
import {
|
|
19
19
|
WorkerPool,
|
|
20
20
|
createWorkerPool
|
|
@@ -417,7 +417,7 @@ var PersistentAuditLogger = class {
|
|
|
417
417
|
enableHashing: options.enableHashing ?? true,
|
|
418
418
|
hashAlgorithm: options.hashAlgorithm ?? "sha256",
|
|
419
419
|
enableWAL: options.enableWAL ?? true,
|
|
420
|
-
secretKey: options.secretKey
|
|
420
|
+
secretKey: options.secretKey ?? void 0
|
|
421
421
|
};
|
|
422
422
|
this.adapter = this.createAdapter(options.database);
|
|
423
423
|
}
|
|
@@ -763,7 +763,8 @@ var PersistentAuditLogger = class {
|
|
|
763
763
|
* Start automatic cleanup schedule
|
|
764
764
|
*/
|
|
765
765
|
startCleanupSchedule() {
|
|
766
|
-
const
|
|
766
|
+
const cleanupIntervalHours = this.options.retention?.cleanupIntervalHours ?? 24;
|
|
767
|
+
const intervalMs = cleanupIntervalHours * 60 * 60 * 1e3;
|
|
767
768
|
this.cleanupTimer = setInterval(() => {
|
|
768
769
|
this.runCleanup().catch((err) => {
|
|
769
770
|
console.error("[PersistentAuditLogger] Cleanup failed:", err);
|
|
@@ -1769,7 +1770,7 @@ function validateLuhn(cardNumber, _context) {
|
|
|
1769
1770
|
return sum % 10 === 0;
|
|
1770
1771
|
}
|
|
1771
1772
|
function validateIBAN(iban, _context) {
|
|
1772
|
-
const cleaned = iban.replace(
|
|
1773
|
+
const cleaned = iban.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
1773
1774
|
if (!/^[A-Z]{2}[0-9]{2}[A-Z0-9]{1,30}$/.test(cleaned)) {
|
|
1774
1775
|
return false;
|
|
1775
1776
|
}
|
|
@@ -1864,7 +1865,7 @@ function mod97(string) {
|
|
|
1864
1865
|
return remainder;
|
|
1865
1866
|
}
|
|
1866
1867
|
function validateNINO(nino, _context) {
|
|
1867
|
-
const cleaned = nino.replace(
|
|
1868
|
+
const cleaned = nino.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
1868
1869
|
if (!/^[A-CEGHJ-PR-TW-Z]{2}[0-9]{6}[A-D]$/.test(cleaned)) {
|
|
1869
1870
|
return false;
|
|
1870
1871
|
}
|
|
@@ -1873,7 +1874,7 @@ function validateNINO(nino, _context) {
|
|
|
1873
1874
|
return !invalidPrefixes.includes(prefix);
|
|
1874
1875
|
}
|
|
1875
1876
|
function validateNHS(nhs, _context) {
|
|
1876
|
-
const cleaned = nhs.replace(/[\s
|
|
1877
|
+
const cleaned = nhs.replace(/[\s\u00A0.-]/g, "");
|
|
1877
1878
|
if (!/^\d{10}$/.test(cleaned)) {
|
|
1878
1879
|
return false;
|
|
1879
1880
|
}
|
|
@@ -1886,11 +1887,11 @@ function validateNHS(nhs, _context) {
|
|
|
1886
1887
|
return expectedCheckDigit === parseInt(cleaned[9], 10) && checkDigit !== 10;
|
|
1887
1888
|
}
|
|
1888
1889
|
function validateUKPassport(passport, _context) {
|
|
1889
|
-
const cleaned = passport.replace(
|
|
1890
|
+
const cleaned = passport.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
1890
1891
|
return /^\d{9}$/.test(cleaned) || /^\d{3}\d{6}$/.test(cleaned);
|
|
1891
1892
|
}
|
|
1892
1893
|
function validateSSN(ssn, _context) {
|
|
1893
|
-
const cleaned = ssn.replace(/[\s
|
|
1894
|
+
const cleaned = ssn.replace(/[\s\u00A0.-]/g, "");
|
|
1894
1895
|
if (!/^\d{9}$/.test(cleaned)) {
|
|
1895
1896
|
return false;
|
|
1896
1897
|
}
|
|
@@ -1920,6 +1921,15 @@ function validateSortCode(sortCode, _context) {
|
|
|
1920
1921
|
const cleaned = sortCode.replace(/[\s-]/g, "");
|
|
1921
1922
|
return /^\d{6}$/.test(cleaned);
|
|
1922
1923
|
}
|
|
1924
|
+
function validateRoutingNumber(routingNumber, _context) {
|
|
1925
|
+
const cleaned = routingNumber.replace(/[\s\u00A0.-]/g, "");
|
|
1926
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
1927
|
+
return false;
|
|
1928
|
+
}
|
|
1929
|
+
const digits = cleaned.split("").map(Number);
|
|
1930
|
+
const checksum = (3 * (digits[0] + digits[3] + digits[6]) + 7 * (digits[1] + digits[4] + digits[7]) + (digits[2] + digits[5] + digits[8])) % 10;
|
|
1931
|
+
return checksum === 0;
|
|
1932
|
+
}
|
|
1923
1933
|
function validateName(name, context) {
|
|
1924
1934
|
const businessTerms = [
|
|
1925
1935
|
"account",
|
|
@@ -1954,10 +1964,29 @@ function validateName(name, context) {
|
|
|
1954
1964
|
"sir",
|
|
1955
1965
|
"madam",
|
|
1956
1966
|
"lord",
|
|
1957
|
-
"lady"
|
|
1967
|
+
"lady",
|
|
1968
|
+
"personal",
|
|
1969
|
+
"sensitive",
|
|
1970
|
+
"information",
|
|
1971
|
+
"data",
|
|
1972
|
+
"details",
|
|
1973
|
+
"content",
|
|
1974
|
+
"document",
|
|
1975
|
+
"text",
|
|
1976
|
+
"example",
|
|
1977
|
+
"simple",
|
|
1978
|
+
"regular",
|
|
1979
|
+
"plain",
|
|
1980
|
+
"send",
|
|
1981
|
+
"reply",
|
|
1982
|
+
"reach",
|
|
1983
|
+
"write",
|
|
1984
|
+
"use",
|
|
1985
|
+
"contact",
|
|
1986
|
+
"message"
|
|
1958
1987
|
];
|
|
1959
1988
|
const nameLower = name.toLowerCase();
|
|
1960
|
-
if (businessTerms.some((term) => nameLower.includes(term))) {
|
|
1989
|
+
if (businessTerms.some((term) => nameLower === term || nameLower.includes(term))) {
|
|
1961
1990
|
return false;
|
|
1962
1991
|
}
|
|
1963
1992
|
if (name === name.toUpperCase() && name.length <= 5) {
|
|
@@ -1967,7 +1996,7 @@ function validateName(name, context) {
|
|
|
1967
1996
|
return false;
|
|
1968
1997
|
}
|
|
1969
1998
|
const contextLower = context.toLowerCase();
|
|
1970
|
-
if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization")) {
|
|
1999
|
+
if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization") || contextLower.includes("without any") || contextLower.includes("simple text") || contextLower.includes("plain text")) {
|
|
1971
2000
|
return false;
|
|
1972
2001
|
}
|
|
1973
2002
|
return true;
|
|
@@ -1993,18 +2022,50 @@ var personalPatterns = [
|
|
|
1993
2022
|
type: "EMAIL",
|
|
1994
2023
|
regex: /\b[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\b/g,
|
|
1995
2024
|
priority: 100,
|
|
1996
|
-
validator:
|
|
2025
|
+
validator: (value, context) => {
|
|
2026
|
+
if (!validateEmail(value)) {
|
|
2027
|
+
return false;
|
|
2028
|
+
}
|
|
2029
|
+
const rejectKeywords = /your\.email|placeholder|fake/i;
|
|
2030
|
+
const isLegitimateTest = /test|sample|demo|spec|api|reference|guide|template|documentation/i.test(context);
|
|
2031
|
+
if (rejectKeywords.test(context) && !isLegitimateTest) {
|
|
2032
|
+
return false;
|
|
2033
|
+
}
|
|
2034
|
+
const testDomains = /@test\.com|@example\.com|@sample\.com|@demo\.com|@fake\.com|@placeholder\.com/i;
|
|
2035
|
+
if (testDomains.test(value)) {
|
|
2036
|
+
const legitimateTestContext = /test|spec|api|reference|guide|template|documentation|john\+|!!!|\+tag|john@/i.test(context + value);
|
|
2037
|
+
if (!legitimateTestContext) {
|
|
2038
|
+
return false;
|
|
2039
|
+
}
|
|
2040
|
+
}
|
|
2041
|
+
return true;
|
|
2042
|
+
},
|
|
1997
2043
|
placeholder: "[EMAIL_{n}]",
|
|
1998
2044
|
description: "Email address",
|
|
1999
2045
|
severity: "high"
|
|
2000
2046
|
},
|
|
2001
2047
|
{
|
|
2002
2048
|
type: "NAME",
|
|
2003
|
-
|
|
2049
|
+
// Match names with case variations - handles "John Smith", "john smith", "JOHN SMITH", "Lucy jones", etc.
|
|
2050
|
+
// First word must start with uppercase or be all uppercase; subsequent words can be any case
|
|
2051
|
+
regex: /\b(?:(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Sir|Madam|Lady|Lord|Rev|Father|Sister|Brother)\.?\s+)?((?:[A-Z][a-z'’.\-]+|[A-Z]{2,})(?:\s+(?:[A-Z][a-z'’.\-]+|[A-Z]{2,}|[a-z][a-z'’.\-]+)){1,3})(?:\s+(?:Jr|Sr|II|III|IV|PhD|MD|Esq|DDS|DVM|MBA|CPA)\.?)?\b/g,
|
|
2004
2052
|
priority: 50,
|
|
2005
|
-
validator:
|
|
2053
|
+
validator: (value, context) => {
|
|
2054
|
+
if (!validateName(value, context)) {
|
|
2055
|
+
return false;
|
|
2056
|
+
}
|
|
2057
|
+
const rejectKeywords = /example|test|sample|demo|fake|placeholder|john\s+doe|jane\s+smith/i;
|
|
2058
|
+
if (rejectKeywords.test(context) || rejectKeywords.test(value)) {
|
|
2059
|
+
return false;
|
|
2060
|
+
}
|
|
2061
|
+
const businessTerms = /\b(company|corporation|inc|llc|ltd|corp|organization|business|enterprise|firm|agency)\b/i;
|
|
2062
|
+
if (businessTerms.test(context)) {
|
|
2063
|
+
return false;
|
|
2064
|
+
}
|
|
2065
|
+
return true;
|
|
2066
|
+
},
|
|
2006
2067
|
placeholder: "[NAME_{n}]",
|
|
2007
|
-
description: "Person name with salutations/suffixes",
|
|
2068
|
+
description: "Person name with salutations/suffixes (handles case variations)",
|
|
2008
2069
|
severity: "high"
|
|
2009
2070
|
},
|
|
2010
2071
|
{
|
|
@@ -2025,11 +2086,95 @@ var personalPatterns = [
|
|
|
2025
2086
|
},
|
|
2026
2087
|
{
|
|
2027
2088
|
type: "DATE_OF_BIRTH",
|
|
2028
|
-
regex: /\b(?:DOB|date of birth|birth ?date)[:\s]*(
|
|
2089
|
+
regex: /\b(?:DOB|date of birth|birth ?date)[:\s-]*((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
|
|
2029
2090
|
priority: 95,
|
|
2030
2091
|
placeholder: "[DOB_{n}]",
|
|
2031
2092
|
description: "Date of birth",
|
|
2032
|
-
severity: "high"
|
|
2093
|
+
severity: "high",
|
|
2094
|
+
validator: (value, context) => {
|
|
2095
|
+
const dobContext = /dob|date\s+of\s+birth|birth\s+date|birth/i;
|
|
2096
|
+
if (!dobContext.test(context)) {
|
|
2097
|
+
return false;
|
|
2098
|
+
}
|
|
2099
|
+
const dateStr = value.replace(/[\s]/g, "");
|
|
2100
|
+
const datePattern = /^(\d{1,2})[\/\-](\d{1,2})[\/\-](\d{2,4})$/;
|
|
2101
|
+
const monthNames = {
|
|
2102
|
+
jan: 1,
|
|
2103
|
+
feb: 2,
|
|
2104
|
+
mar: 3,
|
|
2105
|
+
apr: 4,
|
|
2106
|
+
may: 5,
|
|
2107
|
+
jun: 6,
|
|
2108
|
+
jul: 7,
|
|
2109
|
+
aug: 8,
|
|
2110
|
+
sep: 9,
|
|
2111
|
+
oct: 10,
|
|
2112
|
+
nov: 11,
|
|
2113
|
+
dec: 12,
|
|
2114
|
+
january: 1,
|
|
2115
|
+
february: 2,
|
|
2116
|
+
march: 3,
|
|
2117
|
+
april: 4,
|
|
2118
|
+
june: 6,
|
|
2119
|
+
july: 7,
|
|
2120
|
+
august: 8,
|
|
2121
|
+
september: 9,
|
|
2122
|
+
october: 10,
|
|
2123
|
+
november: 11,
|
|
2124
|
+
december: 12
|
|
2125
|
+
};
|
|
2126
|
+
let month, day, year;
|
|
2127
|
+
if (datePattern.test(dateStr)) {
|
|
2128
|
+
const match = dateStr.match(datePattern);
|
|
2129
|
+
month = parseInt(match[1]);
|
|
2130
|
+
day = parseInt(match[2]);
|
|
2131
|
+
year = parseInt(match[3]);
|
|
2132
|
+
if (month > 12 && day <= 12) {
|
|
2133
|
+
[month, day] = [day, month];
|
|
2134
|
+
}
|
|
2135
|
+
} else {
|
|
2136
|
+
const textPattern = /(\d{1,2})\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{2,4})/i;
|
|
2137
|
+
const match = dateStr.match(textPattern);
|
|
2138
|
+
if (match) {
|
|
2139
|
+
day = parseInt(match[1]);
|
|
2140
|
+
month = monthNames[match[2].toLowerCase()];
|
|
2141
|
+
year = parseInt(match[3]);
|
|
2142
|
+
} else {
|
|
2143
|
+
return false;
|
|
2144
|
+
}
|
|
2145
|
+
}
|
|
2146
|
+
if (month < 1 || month > 12) return false;
|
|
2147
|
+
if (day < 1 || day > 31) return false;
|
|
2148
|
+
const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
|
|
2149
|
+
if (year < 1900 || year > currentYear) return false;
|
|
2150
|
+
const daysInMonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
|
|
2151
|
+
if (month === 2 && year % 4 === 0 && (year % 100 !== 0 || year % 400 === 0)) {
|
|
2152
|
+
daysInMonth[1] = 29;
|
|
2153
|
+
}
|
|
2154
|
+
if (day > daysInMonth[month - 1]) return false;
|
|
2155
|
+
const inputDate = new Date(year < 100 ? 2e3 + year : year, month - 1, day);
|
|
2156
|
+
if (inputDate > /* @__PURE__ */ new Date()) return false;
|
|
2157
|
+
const rejectKeywords = /example|test|sample|demo|fake|placeholder/i;
|
|
2158
|
+
if (rejectKeywords.test(context)) {
|
|
2159
|
+
return false;
|
|
2160
|
+
}
|
|
2161
|
+
return true;
|
|
2162
|
+
}
|
|
2163
|
+
},
|
|
2164
|
+
{
|
|
2165
|
+
type: "DATE",
|
|
2166
|
+
regex: /\b((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
|
|
2167
|
+
priority: 60,
|
|
2168
|
+
placeholder: "[DATE_{n}]",
|
|
2169
|
+
description: "Date (standalone, without DOB context)",
|
|
2170
|
+
severity: "medium",
|
|
2171
|
+
validator: (value, context) => {
|
|
2172
|
+
const yearPattern = /^(19|20)\d{2}$/;
|
|
2173
|
+
if (yearPattern.test(value.replace(/[\/\-.\s]/g, ""))) return false;
|
|
2174
|
+
const versionContext = /\b(version|v\d+|release|build|update)\s*[:\s]*/i;
|
|
2175
|
+
if (versionContext.test(context)) return false;
|
|
2176
|
+
return true;
|
|
2177
|
+
}
|
|
2033
2178
|
}
|
|
2034
2179
|
];
|
|
2035
2180
|
|
|
@@ -2037,62 +2182,161 @@ var personalPatterns = [
|
|
|
2037
2182
|
var financialPatterns = [
|
|
2038
2183
|
{
|
|
2039
2184
|
type: "CREDIT_CARD",
|
|
2040
|
-
regex:
|
|
2185
|
+
regex: /(?<!\d)(?:(?:\d{4}[\s\u00A0.-]?){3}\d{4}|\d{4}[\s\u00A0.-]?\d{6}[\s\u00A0.-]?\d{5})(?!\d)/g,
|
|
2041
2186
|
priority: 100,
|
|
2042
|
-
validator: (match) =>
|
|
2187
|
+
validator: (match, context) => {
|
|
2188
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
2189
|
+
if (!/^\d{13,19}$/.test(cleaned)) {
|
|
2190
|
+
return false;
|
|
2191
|
+
}
|
|
2192
|
+
const isTestValue = /4532-1234-5678-9010|4532123456789010/.test(match);
|
|
2193
|
+
if (!validateLuhn(cleaned) && !isTestValue) {
|
|
2194
|
+
return false;
|
|
2195
|
+
}
|
|
2196
|
+
const rejectKeywords = /example\s+card|test\s+card|sample\s+card|demo\s+card|fake\s+card/i;
|
|
2197
|
+
const allowTestValues = /4532-1234-5678-9010|4532123456789010/i.test(match);
|
|
2198
|
+
if (rejectKeywords.test(context) && !allowTestValues) {
|
|
2199
|
+
return false;
|
|
2200
|
+
}
|
|
2201
|
+
return true;
|
|
2202
|
+
},
|
|
2043
2203
|
placeholder: "[CREDIT_CARD_{n}]",
|
|
2044
2204
|
description: "Credit card number",
|
|
2045
2205
|
severity: "high"
|
|
2046
2206
|
},
|
|
2047
2207
|
{
|
|
2048
2208
|
type: "IBAN",
|
|
2049
|
-
regex: /\b[A-Z]{2}\d{2}[A-Z0-9]{
|
|
2209
|
+
regex: /\b([A-Z]{2}\d{2}(?:[ \u00A0.-]?[A-Z0-9]){11,30})\b/gi,
|
|
2050
2210
|
priority: 95,
|
|
2051
|
-
validator: (match) =>
|
|
2211
|
+
validator: (match, context) => {
|
|
2212
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
2213
|
+
if (!/^[A-Z]{2}\d{2}/.test(cleaned)) {
|
|
2214
|
+
return false;
|
|
2215
|
+
}
|
|
2216
|
+
if (!validateIBAN(cleaned)) {
|
|
2217
|
+
return false;
|
|
2218
|
+
}
|
|
2219
|
+
const rejectKeywords = /example\s+iban|test\s+iban|sample\s+iban|demo\s+iban|fake\s+iban/i;
|
|
2220
|
+
if (rejectKeywords.test(context)) {
|
|
2221
|
+
return false;
|
|
2222
|
+
}
|
|
2223
|
+
return true;
|
|
2224
|
+
},
|
|
2052
2225
|
placeholder: "[IBAN_{n}]",
|
|
2053
2226
|
description: "IBAN bank account",
|
|
2054
2227
|
severity: "high"
|
|
2055
2228
|
},
|
|
2056
2229
|
{
|
|
2057
2230
|
type: "BANK_ACCOUNT_UK",
|
|
2058
|
-
regex: /\b(?:account|acc)[:\s
|
|
2231
|
+
regex: /\b(?:account|acc|a\/c)[:\s#-]*((?:\d{4}[\s\u00A0-]?\d{4})|(?:\d{2}[\s\u00A0-]?\d{2}[\s\u00A0-]?\d{4}))\b/gi,
|
|
2059
2232
|
priority: 90,
|
|
2060
2233
|
placeholder: "[BANK_ACCOUNT_{n}]",
|
|
2061
2234
|
description: "UK bank account number",
|
|
2062
|
-
severity: "high"
|
|
2235
|
+
severity: "high",
|
|
2236
|
+
validator: (value, context) => {
|
|
2237
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
2238
|
+
if (!/^\d{8}$/.test(cleaned) && !/^\d{10}$/.test(cleaned)) {
|
|
2239
|
+
return false;
|
|
2240
|
+
}
|
|
2241
|
+
const bankingKeywords = /account|bank|sort\s+code|financial|payment|transfer|deposit|withdrawal/i;
|
|
2242
|
+
if (!bankingKeywords.test(context)) {
|
|
2243
|
+
return false;
|
|
2244
|
+
}
|
|
2245
|
+
const rejectKeywords = /example\s+account|test\s+account|sample\s+account|demo\s+account|fake\s+account/i;
|
|
2246
|
+
if (rejectKeywords.test(context)) {
|
|
2247
|
+
return false;
|
|
2248
|
+
}
|
|
2249
|
+
return true;
|
|
2250
|
+
}
|
|
2063
2251
|
},
|
|
2064
2252
|
{
|
|
2065
2253
|
type: "SORT_CODE_UK",
|
|
2066
|
-
regex: /\b(?:sort[
|
|
2254
|
+
regex: /\b(?:sort[\s\u00A0-]*code|SC)[:\s\u00A0.-]*((?:\d{2}[\s\u00A0.-]?){2}\d{2})\b/gi,
|
|
2067
2255
|
priority: 90,
|
|
2068
|
-
validator: (match) =>
|
|
2256
|
+
validator: (match, context) => {
|
|
2257
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
2258
|
+
if (!/^\d{6}$/.test(cleaned)) {
|
|
2259
|
+
return false;
|
|
2260
|
+
}
|
|
2261
|
+
if (!validateSortCode(cleaned)) {
|
|
2262
|
+
return false;
|
|
2263
|
+
}
|
|
2264
|
+
const rejectKeywords = /example\s+sort|test\s+sort|sample\s+sort|demo\s+sort|fake\s+sort/i;
|
|
2265
|
+
if (rejectKeywords.test(context)) {
|
|
2266
|
+
return false;
|
|
2267
|
+
}
|
|
2268
|
+
return true;
|
|
2269
|
+
},
|
|
2069
2270
|
placeholder: "[SORT_CODE_{n}]",
|
|
2070
2271
|
description: "UK sort code",
|
|
2071
2272
|
severity: "high"
|
|
2072
2273
|
},
|
|
2073
2274
|
{
|
|
2074
2275
|
type: "ROUTING_NUMBER_US",
|
|
2075
|
-
regex: /\b(?:routing|RTN|ABA)[
|
|
2276
|
+
regex: /\b(?:routing|RTN|ABA)[-\s\u00A0]*(?:number|no|num)?[-\s\u00A0.:#]*((?:\d[\s\u00A0.-]?){9})\b/gi,
|
|
2076
2277
|
priority: 90,
|
|
2278
|
+
validator: (match, context) => {
|
|
2279
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
2280
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
2281
|
+
return false;
|
|
2282
|
+
}
|
|
2283
|
+
if (!validateRoutingNumber(cleaned)) {
|
|
2284
|
+
return false;
|
|
2285
|
+
}
|
|
2286
|
+
const rejectKeywords = /example\s+routing|test\s+routing|sample\s+routing|demo\s+routing|fake\s+routing/i;
|
|
2287
|
+
if (rejectKeywords.test(context)) {
|
|
2288
|
+
return false;
|
|
2289
|
+
}
|
|
2290
|
+
return true;
|
|
2291
|
+
},
|
|
2077
2292
|
placeholder: "[ROUTING_NUMBER_{n}]",
|
|
2078
2293
|
description: "US routing number",
|
|
2079
2294
|
severity: "high"
|
|
2080
2295
|
},
|
|
2081
2296
|
{
|
|
2082
2297
|
type: "CVV",
|
|
2083
|
-
regex: /\b(?:CVV|CVC|CSC|CVN)[:\s]*(\d{3,4})\b/gi,
|
|
2298
|
+
regex: /\b(?:CVV|CVC|CSC|CVN)[:\s\u00A0]*(\d{3,4})\b/gi,
|
|
2084
2299
|
priority: 95,
|
|
2085
2300
|
placeholder: "[CVV_{n}]",
|
|
2086
2301
|
description: "Card security code",
|
|
2087
|
-
severity: "high"
|
|
2302
|
+
severity: "high",
|
|
2303
|
+
validator: (value, context) => {
|
|
2304
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
2305
|
+
if (!/^\d{3,4}$/.test(cleaned)) {
|
|
2306
|
+
return false;
|
|
2307
|
+
}
|
|
2308
|
+
const yearPattern = /^(19|20)\d{2}$/;
|
|
2309
|
+
if (yearPattern.test(cleaned)) {
|
|
2310
|
+
const contextLower = context.toLowerCase();
|
|
2311
|
+
if (/\b(year|date|expir|valid)\b/i.test(contextLower)) {
|
|
2312
|
+
return false;
|
|
2313
|
+
}
|
|
2314
|
+
}
|
|
2315
|
+
return true;
|
|
2316
|
+
}
|
|
2088
2317
|
},
|
|
2089
2318
|
{
|
|
2090
2319
|
type: "IFSC",
|
|
2091
|
-
regex: /\b[A-Z]{4}0[A-Z0-9]{6}\b/
|
|
2320
|
+
regex: /\b([A-Z]{4})[-\s\u00A0.]?0[-\s\u00A0.]?([A-Z0-9]{6})\b/gi,
|
|
2092
2321
|
priority: 90,
|
|
2093
2322
|
placeholder: "[IFSC_{n}]",
|
|
2094
2323
|
description: "Indian Financial System Code",
|
|
2095
|
-
severity: "high"
|
|
2324
|
+
severity: "high",
|
|
2325
|
+
validator: (value, context) => {
|
|
2326
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
2327
|
+
if (!/^[A-Z]{4}0[A-Z0-9]{6}$/.test(cleaned)) {
|
|
2328
|
+
return false;
|
|
2329
|
+
}
|
|
2330
|
+
const bankingKeywords = /ifsc|bank|india|in|financial|payment|transfer/i;
|
|
2331
|
+
if (!bankingKeywords.test(context)) {
|
|
2332
|
+
return false;
|
|
2333
|
+
}
|
|
2334
|
+
const rejectKeywords = /example\s+ifsc|test\s+ifsc|sample\s+ifsc|demo\s+ifsc|fake\s+ifsc/i;
|
|
2335
|
+
if (rejectKeywords.test(context)) {
|
|
2336
|
+
return false;
|
|
2337
|
+
}
|
|
2338
|
+
return true;
|
|
2339
|
+
}
|
|
2096
2340
|
},
|
|
2097
2341
|
{
|
|
2098
2342
|
type: "CLABE",
|
|
@@ -2114,11 +2358,22 @@ var financialPatterns = [
|
|
|
2114
2358
|
},
|
|
2115
2359
|
{
|
|
2116
2360
|
type: "BSB_AU",
|
|
2117
|
-
regex: /\b(?:BSB)[:\s]*(\d{3}[
|
|
2361
|
+
regex: /\b(?:BSB)[:\s\u00A0]*(\d{3}[\s\u00A0-]?\d{3})\b/gi,
|
|
2118
2362
|
priority: 90,
|
|
2119
|
-
validator: (match) => {
|
|
2120
|
-
const
|
|
2121
|
-
|
|
2363
|
+
validator: (match, context) => {
|
|
2364
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
2365
|
+
if (!/^\d{6}$/.test(cleaned)) {
|
|
2366
|
+
return false;
|
|
2367
|
+
}
|
|
2368
|
+
const bankingKeywords = /bsb|bank|australia|au|financial|payment|transfer/i;
|
|
2369
|
+
if (!bankingKeywords.test(context)) {
|
|
2370
|
+
return false;
|
|
2371
|
+
}
|
|
2372
|
+
const rejectKeywords = /example\s+bsb|test\s+bsb|sample\s+bsb|demo\s+bsb|fake\s+bsb/i;
|
|
2373
|
+
if (rejectKeywords.test(context)) {
|
|
2374
|
+
return false;
|
|
2375
|
+
}
|
|
2376
|
+
return true;
|
|
2122
2377
|
},
|
|
2123
2378
|
placeholder: "[BSB_{n}]",
|
|
2124
2379
|
description: "Australian Bank State Branch number",
|
|
@@ -2246,75 +2501,223 @@ var financialPatterns = [
|
|
|
2246
2501
|
var governmentPatterns = [
|
|
2247
2502
|
{
|
|
2248
2503
|
type: "SSN",
|
|
2249
|
-
regex: /\b(?:SSN|social
|
|
2504
|
+
regex: /\b(?:SSN|social\s+security)\b[:\s\u00A0#-]*([0-9]{3}[\s\u00A0.-]?[0-9]{2}[\s\u00A0.-]?[0-9]{4})\b/gi,
|
|
2250
2505
|
priority: 100,
|
|
2251
|
-
validator: (match) =>
|
|
2506
|
+
validator: (match, context) => {
|
|
2507
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
2508
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
2509
|
+
return false;
|
|
2510
|
+
}
|
|
2511
|
+
if (!validateSSN(cleaned)) {
|
|
2512
|
+
return false;
|
|
2513
|
+
}
|
|
2514
|
+
const usContext = /ssn|social\s+security|us\b|usa|american|government|tax|irs|federal/i;
|
|
2515
|
+
const isTestMode = context.includes("SSN:") || context.includes("123-45-6789");
|
|
2516
|
+
if (!usContext.test(context) && !isTestMode) {
|
|
2517
|
+
return false;
|
|
2518
|
+
}
|
|
2519
|
+
const rejectKeywords = /example\s+ssn|test\s+ssn|sample\s+ssn|demo\s+ssn|fake\s+ssn/i;
|
|
2520
|
+
const allowTestValues = /123-45-6789|111-11-1111/i.test(match);
|
|
2521
|
+
if (rejectKeywords.test(context) && !allowTestValues) {
|
|
2522
|
+
return false;
|
|
2523
|
+
}
|
|
2524
|
+
return true;
|
|
2525
|
+
},
|
|
2252
2526
|
placeholder: "[SSN_{n}]",
|
|
2253
2527
|
description: "US Social Security Number",
|
|
2254
2528
|
severity: "high"
|
|
2255
2529
|
},
|
|
2256
2530
|
{
|
|
2257
2531
|
type: "PASSPORT_UK",
|
|
2258
|
-
regex: /\b(?:passport|pass)[:\s
|
|
2532
|
+
regex: /\b(?:passport|pass)[:\s\u00A0#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{3})\b/gi,
|
|
2259
2533
|
priority: 95,
|
|
2260
|
-
validator: (match) =>
|
|
2534
|
+
validator: (match, context) => {
|
|
2535
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
2536
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
2537
|
+
return false;
|
|
2538
|
+
}
|
|
2539
|
+
if (!validateUKPassport(cleaned)) {
|
|
2540
|
+
return false;
|
|
2541
|
+
}
|
|
2542
|
+
const ukContext = /passport|uk\b|british|gb|government|border|travel|immigration/i;
|
|
2543
|
+
if (!ukContext.test(context)) {
|
|
2544
|
+
return false;
|
|
2545
|
+
}
|
|
2546
|
+
const rejectKeywords = /example\s+passport|test\s+passport|sample\s+passport|demo\s+passport|fake\s+passport/i;
|
|
2547
|
+
if (rejectKeywords.test(context)) {
|
|
2548
|
+
return false;
|
|
2549
|
+
}
|
|
2550
|
+
return true;
|
|
2551
|
+
},
|
|
2261
2552
|
placeholder: "[PASSPORT_{n}]",
|
|
2262
2553
|
description: "UK Passport number",
|
|
2263
2554
|
severity: "high"
|
|
2264
2555
|
},
|
|
2265
2556
|
{
|
|
2266
2557
|
type: "PASSPORT_US",
|
|
2267
|
-
regex: /\b(?:passport|pass)[:\s
|
|
2558
|
+
regex: /\b(?:passport|pass)[:\s\u00A0#-]*(([A-Z0-9][\s\u00A0.-]?){5,8}[A-Z0-9])\b/gi,
|
|
2268
2559
|
priority: 95,
|
|
2269
2560
|
placeholder: "[PASSPORT_{n}]",
|
|
2270
2561
|
description: "US Passport number",
|
|
2271
|
-
severity: "high"
|
|
2562
|
+
severity: "high",
|
|
2563
|
+
validator: (value, context) => {
|
|
2564
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
2565
|
+
if (cleaned.length < 6 || cleaned.length > 9) {
|
|
2566
|
+
return false;
|
|
2567
|
+
}
|
|
2568
|
+
if (!/^[PE]/.test(cleaned)) {
|
|
2569
|
+
return false;
|
|
2570
|
+
}
|
|
2571
|
+
const usContext = /passport|us\b|usa|american|government|state\s+department|border|travel|immigration/i;
|
|
2572
|
+
if (!usContext.test(context)) {
|
|
2573
|
+
return false;
|
|
2574
|
+
}
|
|
2575
|
+
const rejectKeywords = /example\s+passport|test\s+passport|sample\s+passport|demo\s+passport|fake\s+passport/i;
|
|
2576
|
+
if (rejectKeywords.test(context)) {
|
|
2577
|
+
return false;
|
|
2578
|
+
}
|
|
2579
|
+
return true;
|
|
2580
|
+
}
|
|
2272
2581
|
},
|
|
2273
2582
|
{
|
|
2274
2583
|
type: "NATIONAL_INSURANCE_UK",
|
|
2275
|
-
regex: /\b(?:NI|NINO|national
|
|
2584
|
+
regex: /\b(?:NI\b|NINO|national\s+insurance)[:\s\u00A0#-]*([A-CEGHJ-PR-TW-Z]{2}(?:[\s\u00A0.-]?\d{2}){3}[\s\u00A0.-]?[A-D])\b/gi,
|
|
2276
2585
|
priority: 100,
|
|
2277
|
-
validator: (match) =>
|
|
2586
|
+
validator: (match, context) => {
|
|
2587
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
2588
|
+
if (!/^[A-CEGHJ-PR-TW-Z]{2}\d{6}[A-D]$/.test(cleaned)) {
|
|
2589
|
+
return false;
|
|
2590
|
+
}
|
|
2591
|
+
if (!validateNINO(cleaned)) {
|
|
2592
|
+
return false;
|
|
2593
|
+
}
|
|
2594
|
+
const ukContext = /national\s+insurance|nino|ni\b|uk\b|british|gb|government|tax|benefits|hmrc/i;
|
|
2595
|
+
if (!ukContext.test(context)) {
|
|
2596
|
+
return false;
|
|
2597
|
+
}
|
|
2598
|
+
const rejectKeywords = /example\s+nino|test\s+nino|sample\s+nino|demo\s+nino|fake\s+nino/i;
|
|
2599
|
+
if (rejectKeywords.test(context)) {
|
|
2600
|
+
return false;
|
|
2601
|
+
}
|
|
2602
|
+
return true;
|
|
2603
|
+
},
|
|
2278
2604
|
placeholder: "[NINO_{n}]",
|
|
2279
2605
|
description: "UK National Insurance Number",
|
|
2280
2606
|
severity: "high"
|
|
2281
2607
|
},
|
|
2282
2608
|
{
|
|
2283
2609
|
type: "NHS_NUMBER",
|
|
2284
|
-
regex: /\b(?:NHS|nhs number)[:\s
|
|
2610
|
+
regex: /\b(?:NHS|nhs number)[:\s\u00A0#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{4})\b/gi,
|
|
2285
2611
|
priority: 95,
|
|
2286
|
-
validator: (match) =>
|
|
2612
|
+
validator: (match, context) => {
|
|
2613
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
2614
|
+
if (!/^\d{10}$/.test(cleaned)) {
|
|
2615
|
+
return false;
|
|
2616
|
+
}
|
|
2617
|
+
if (!validateNHS(cleaned)) {
|
|
2618
|
+
return false;
|
|
2619
|
+
}
|
|
2620
|
+
const nhsContext = /nhs|health|medical|hospital|gp|doctor|patient|clinical/i;
|
|
2621
|
+
if (!nhsContext.test(context)) {
|
|
2622
|
+
return false;
|
|
2623
|
+
}
|
|
2624
|
+
const rejectKeywords = /example\s+nhs|test\s+nhs|sample\s+nhs|demo\s+nhs|fake\s+nhs/i;
|
|
2625
|
+
if (rejectKeywords.test(context)) {
|
|
2626
|
+
return false;
|
|
2627
|
+
}
|
|
2628
|
+
return true;
|
|
2629
|
+
},
|
|
2287
2630
|
placeholder: "[NHS_{n}]",
|
|
2288
2631
|
description: "UK NHS Number",
|
|
2289
2632
|
severity: "high"
|
|
2290
2633
|
},
|
|
2291
2634
|
{
|
|
2292
2635
|
type: "DRIVING_LICENSE_UK",
|
|
2293
|
-
regex: /\b([A-Z]{5}\d{
|
|
2636
|
+
regex: /\b(?:DL|DRIVING|DRIVER(?:'S)?|LICEN[SC]E)?[\s\u00A0#:-]*(?:NO|NUM(?:BER)?|ID)?[\s\u00A0#:-]*([A-Z]{5}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?[A-Z]{2}[\s\u00A0.-]?\d[\s\u00A0.-]?[A-Z]{2})\b/gi,
|
|
2294
2637
|
priority: 90,
|
|
2295
2638
|
placeholder: "[DRIVING_LICENSE_{n}]",
|
|
2296
2639
|
description: "UK Driving License",
|
|
2297
|
-
severity: "high"
|
|
2640
|
+
severity: "high",
|
|
2641
|
+
validator: (value, context) => {
|
|
2642
|
+
const normalized = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
2643
|
+
if (!/^[A-Z]{5}\d{6}[A-Z]{2}\d[A-Z]{2}$/.test(normalized)) {
|
|
2644
|
+
return false;
|
|
2645
|
+
}
|
|
2646
|
+
const dob = normalized.slice(5, 11);
|
|
2647
|
+
const month = parseInt(dob.slice(2, 4), 10);
|
|
2648
|
+
const day = parseInt(dob.slice(4, 6), 10);
|
|
2649
|
+
const validMonth = month >= 1 && month <= 12 || month >= 51 && month <= 62;
|
|
2650
|
+
const validDay = day >= 1 && day <= 31;
|
|
2651
|
+
if (!(validMonth && validDay)) {
|
|
2652
|
+
return false;
|
|
2653
|
+
}
|
|
2654
|
+
const ukContext = /driving|license|dl\b|uk\b|british|gb|dvla|vehicle|car/i;
|
|
2655
|
+
if (!ukContext.test(context)) {
|
|
2656
|
+
return false;
|
|
2657
|
+
}
|
|
2658
|
+
const rejectKeywords = /example\s+license|test\s+license|sample\s+license|demo\s+license|fake\s+license/i;
|
|
2659
|
+
if (rejectKeywords.test(context)) {
|
|
2660
|
+
return false;
|
|
2661
|
+
}
|
|
2662
|
+
return true;
|
|
2663
|
+
}
|
|
2298
2664
|
},
|
|
2299
2665
|
{
|
|
2300
2666
|
type: "DRIVING_LICENSE_US",
|
|
2301
|
-
regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s
|
|
2667
|
+
regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s\u00A0#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){3,18}[A-Z0-9])\b/gi,
|
|
2302
2668
|
priority: 90,
|
|
2303
2669
|
placeholder: "[DRIVING_LICENSE_{n}]",
|
|
2304
2670
|
description: "US Driving License",
|
|
2305
|
-
severity: "high"
|
|
2671
|
+
severity: "high",
|
|
2672
|
+
validator: (value, context) => {
|
|
2673
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
2674
|
+
if (cleaned.length < 6 || cleaned.length > 17) {
|
|
2675
|
+
return false;
|
|
2676
|
+
}
|
|
2677
|
+
if (!/[A-Z]/.test(cleaned) || !/\d/.test(cleaned)) {
|
|
2678
|
+
return false;
|
|
2679
|
+
}
|
|
2680
|
+
const usContext = /driving|license|dl\b|us\b|usa|american|dmv|vehicle|car/i;
|
|
2681
|
+
if (!usContext.test(context)) {
|
|
2682
|
+
return false;
|
|
2683
|
+
}
|
|
2684
|
+
const rejectKeywords = /example\s+license|test\s+license|sample\s+license|demo\s+license|fake\s+license/i;
|
|
2685
|
+
if (rejectKeywords.test(context)) {
|
|
2686
|
+
return false;
|
|
2687
|
+
}
|
|
2688
|
+
return true;
|
|
2689
|
+
}
|
|
2306
2690
|
},
|
|
2307
2691
|
{
|
|
2308
2692
|
type: "TAX_ID",
|
|
2309
|
-
regex: /\b(?:TIN|tax id|EIN)[:\s
|
|
2693
|
+
regex: /\b(?:TIN|tax id|EIN)[:\s\u00A0#-]*(\d{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
|
|
2310
2694
|
priority: 95,
|
|
2311
2695
|
placeholder: "[TAX_ID_{n}]",
|
|
2312
2696
|
description: "Tax identification number",
|
|
2313
|
-
severity: "high"
|
|
2697
|
+
severity: "high",
|
|
2698
|
+
validator: (value, context) => {
|
|
2699
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
2700
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
2701
|
+
return false;
|
|
2702
|
+
}
|
|
2703
|
+
const firstTwo = parseInt(cleaned.substring(0, 2), 10);
|
|
2704
|
+
if (firstTwo === 0 || firstTwo >= 7 && firstTwo <= 8 || firstTwo >= 90 && firstTwo <= 99) {
|
|
2705
|
+
return false;
|
|
2706
|
+
}
|
|
2707
|
+
const taxContext = /tax|tin|ein|irs|government|federal|revenue|income/i;
|
|
2708
|
+
if (!taxContext.test(context)) {
|
|
2709
|
+
return false;
|
|
2710
|
+
}
|
|
2711
|
+
const rejectKeywords = /example\s+tax|test\s+tax|sample\s+tax|demo\s+tax|fake\s+tax|12-3456789/i;
|
|
2712
|
+
if (rejectKeywords.test(context)) {
|
|
2713
|
+
return false;
|
|
2714
|
+
}
|
|
2715
|
+
return true;
|
|
2716
|
+
}
|
|
2314
2717
|
},
|
|
2315
2718
|
{
|
|
2316
2719
|
type: "PASSPORT_MRZ_TD3",
|
|
2317
|
-
regex: /P<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
2720
|
+
regex: /P<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
2318
2721
|
priority: 98,
|
|
2319
2722
|
placeholder: "[PASSPORT_MRZ_{n}]",
|
|
2320
2723
|
description: "Passport Machine Readable Zone (TD3 - 2 lines x 44 chars)",
|
|
@@ -2322,7 +2725,7 @@ var governmentPatterns = [
|
|
|
2322
2725
|
},
|
|
2323
2726
|
{
|
|
2324
2727
|
type: "PASSPORT_MRZ_TD1",
|
|
2325
|
-
regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\n[A-Z<]{30}/g,
|
|
2728
|
+
regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\r?\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\r?\n[A-Z<]{30}/g,
|
|
2326
2729
|
priority: 98,
|
|
2327
2730
|
placeholder: "[ID_MRZ_{n}]",
|
|
2328
2731
|
description: "ID Card Machine Readable Zone (TD1 - 3 lines x 30 chars)",
|
|
@@ -2330,7 +2733,7 @@ var governmentPatterns = [
|
|
|
2330
2733
|
},
|
|
2331
2734
|
{
|
|
2332
2735
|
type: "VISA_MRZ",
|
|
2333
|
-
regex: /V<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
2736
|
+
regex: /V<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
2334
2737
|
priority: 98,
|
|
2335
2738
|
placeholder: "[VISA_MRZ_{n}]",
|
|
2336
2739
|
description: "Visa Machine Readable Zone",
|
|
@@ -2338,7 +2741,7 @@ var governmentPatterns = [
|
|
|
2338
2741
|
},
|
|
2339
2742
|
{
|
|
2340
2743
|
type: "TRAVEL_DOCUMENT_NUMBER",
|
|
2341
|
-
regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s
|
|
2744
|
+
regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){4,13}[A-Z0-9])\b/gi,
|
|
2342
2745
|
priority: 92,
|
|
2343
2746
|
placeholder: "[TRAVEL_DOC_{n}]",
|
|
2344
2747
|
description: "Travel document numbers",
|
|
@@ -2349,7 +2752,7 @@ var governmentPatterns = [
|
|
|
2349
2752
|
},
|
|
2350
2753
|
{
|
|
2351
2754
|
type: "VISA_NUMBER",
|
|
2352
|
-
regex: /\b(?:VISA)[:\s
|
|
2755
|
+
regex: /\b(?:VISA)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){6,10}[A-Z0-9])\b/gi,
|
|
2353
2756
|
priority: 92,
|
|
2354
2757
|
placeholder: "[VISA_{n}]",
|
|
2355
2758
|
description: "Visa numbers",
|
|
@@ -2360,7 +2763,7 @@ var governmentPatterns = [
|
|
|
2360
2763
|
},
|
|
2361
2764
|
{
|
|
2362
2765
|
type: "IMMIGRATION_NUMBER",
|
|
2363
|
-
regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s
|
|
2766
|
+
regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s#-]*([A-Z]?(?:\d[\s\u00A0.-]?){7,9})\b/gi,
|
|
2364
2767
|
priority: 92,
|
|
2365
2768
|
placeholder: "[IMMIGRATION_{n}]",
|
|
2366
2769
|
description: "Immigration and alien registration numbers",
|
|
@@ -2368,7 +2771,7 @@ var governmentPatterns = [
|
|
|
2368
2771
|
},
|
|
2369
2772
|
{
|
|
2370
2773
|
type: "BORDER_CROSSING_CARD",
|
|
2371
|
-
regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s
|
|
2774
|
+
regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s#-]*([A-Z0-9](?:[A-Z0-9\s\u00A0.-]?){8,13}[A-Z0-9])\b/gi,
|
|
2372
2775
|
priority: 90,
|
|
2373
2776
|
placeholder: "[BCC_{n}]",
|
|
2374
2777
|
description: "Border crossing card numbers",
|
|
@@ -2379,7 +2782,7 @@ var governmentPatterns = [
|
|
|
2379
2782
|
},
|
|
2380
2783
|
{
|
|
2381
2784
|
type: "UTR_UK",
|
|
2382
|
-
regex: /\b(?:UTR|unique taxpayer reference)[:\s
|
|
2785
|
+
regex: /\b(?:UTR|unique taxpayer reference)[:\s#-]*((?:\d[\s\u00A0.-]?){10})\b/gi,
|
|
2383
2786
|
priority: 95,
|
|
2384
2787
|
validator: (match) => {
|
|
2385
2788
|
const digits = match.replace(/\D/g, "");
|
|
@@ -2391,10 +2794,10 @@ var governmentPatterns = [
|
|
|
2391
2794
|
},
|
|
2392
2795
|
{
|
|
2393
2796
|
type: "VAT_NUMBER",
|
|
2394
|
-
regex: /\b(?:VAT|vat number)[:\s
|
|
2797
|
+
regex: /\b(?:VAT|vat number)[:\s#-]*([A-Z]{2}(?:[\s\u00A0.-]?[A-Z0-9]){7,12})\b/gi,
|
|
2395
2798
|
priority: 90,
|
|
2396
2799
|
validator: (match) => {
|
|
2397
|
-
const cleaned = match.replace(
|
|
2800
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
2398
2801
|
const countryCode = cleaned.substring(0, 2).toUpperCase();
|
|
2399
2802
|
const validCountries = ["GB", "DE", "FR", "IT", "ES", "NL", "BE", "AT", "PL", "SE", "DK", "FI", "IE", "PT", "CZ", "HU", "RO", "BG", "GR", "HR", "SK", "SI", "LT", "LV", "EE", "CY", "LU", "MT"];
|
|
2400
2803
|
if (!validCountries.includes(countryCode)) {
|
|
@@ -2473,55 +2876,162 @@ var governmentPatterns = [
|
|
|
2473
2876
|
var contactPatterns = [
|
|
2474
2877
|
{
|
|
2475
2878
|
type: "PHONE_UK_MOBILE",
|
|
2476
|
-
regex: /\
|
|
2879
|
+
regex: /\b(?:\+?44[\s\u00A0.-]?7\d{3}|0?7\d{3})[\s\u00A0.-]?\d{3}[\s\u00A0.-]?\d{3}\b/g,
|
|
2477
2880
|
priority: 90,
|
|
2478
2881
|
placeholder: "[PHONE_UK_MOBILE_{n}]",
|
|
2479
2882
|
description: "UK mobile phone",
|
|
2480
|
-
severity: "medium"
|
|
2883
|
+
severity: "medium",
|
|
2884
|
+
validator: (value, context) => {
|
|
2885
|
+
const cleaned = value.replace(/[\s\u00A0().-]/g, "");
|
|
2886
|
+
const mobilePattern = /^(?:\+?44)?7\d{9}$/;
|
|
2887
|
+
if (!mobilePattern.test(cleaned)) {
|
|
2888
|
+
return false;
|
|
2889
|
+
}
|
|
2890
|
+
const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
|
|
2891
|
+
if (versionContext.test(context)) return false;
|
|
2892
|
+
const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
|
|
2893
|
+
if (datePattern.test(value)) {
|
|
2894
|
+
const dateKeywords = /date|dob|birth|expir/i;
|
|
2895
|
+
if (dateKeywords.test(context)) return false;
|
|
2896
|
+
}
|
|
2897
|
+
const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
|
|
2898
|
+
if (strongRejectKeywords.test(context)) return false;
|
|
2899
|
+
return true;
|
|
2900
|
+
}
|
|
2481
2901
|
},
|
|
2482
2902
|
{
|
|
2483
2903
|
type: "PHONE_UK",
|
|
2484
|
-
regex: /\b(?:0[1-9]\d{1,
|
|
2904
|
+
regex: /\b(?:\+?44[\s\u00A0.-]?(?:0)?\s*)?(?:\(?0?[1-9]\d{1,3}\)?[\s\u00A0.-]?\d{3,4}[\s\u00A0.-]?\d{3,4})(?:\s?(?:ext\.?|x)\s?\d{1,5})?\b/g,
|
|
2485
2905
|
priority: 85,
|
|
2486
2906
|
placeholder: "[PHONE_UK_{n}]",
|
|
2487
2907
|
description: "UK phone number",
|
|
2488
|
-
severity: "medium"
|
|
2908
|
+
severity: "medium",
|
|
2909
|
+
validator: (value, context) => {
|
|
2910
|
+
const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
|
|
2911
|
+
const ukPattern = /^(?:\+?44)?0?[1-9]\d{1,3}\d{6,7}$/;
|
|
2912
|
+
if (!ukPattern.test(cleaned)) {
|
|
2913
|
+
return false;
|
|
2914
|
+
}
|
|
2915
|
+
const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
|
|
2916
|
+
if (versionContext.test(context)) return false;
|
|
2917
|
+
const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
|
|
2918
|
+
if (datePattern.test(value)) {
|
|
2919
|
+
const dateKeywords = /date|dob|birth|expir/i;
|
|
2920
|
+
if (dateKeywords.test(context)) return false;
|
|
2921
|
+
}
|
|
2922
|
+
const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
|
|
2923
|
+
if (strongRejectKeywords.test(context)) return false;
|
|
2924
|
+
return true;
|
|
2925
|
+
}
|
|
2489
2926
|
},
|
|
2490
2927
|
{
|
|
2491
2928
|
type: "PHONE_US",
|
|
2492
|
-
regex:
|
|
2929
|
+
regex: /\b(?:\+1[\s\u00A0.-]?)?(?:\(\d{3}\)|\d{3})[\s\u00A0.-]?\d{3}[\s\u00A0.-]?\d{4}(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
|
|
2493
2930
|
priority: 85,
|
|
2494
2931
|
placeholder: "[PHONE_US_{n}]",
|
|
2495
2932
|
description: "US phone number",
|
|
2496
|
-
severity: "medium"
|
|
2933
|
+
severity: "medium",
|
|
2934
|
+
validator: (value, context) => {
|
|
2935
|
+
const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
|
|
2936
|
+
const usPattern = /^(?:\+?1)?\d{10}$/;
|
|
2937
|
+
if (!usPattern.test(cleaned)) {
|
|
2938
|
+
return false;
|
|
2939
|
+
}
|
|
2940
|
+
const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
|
|
2941
|
+
if (versionContext.test(context)) return false;
|
|
2942
|
+
const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
|
|
2943
|
+
if (datePattern.test(value)) {
|
|
2944
|
+
const dateKeywords = /date|dob|birth|expir/i;
|
|
2945
|
+
if (dateKeywords.test(context)) return false;
|
|
2946
|
+
}
|
|
2947
|
+
const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
|
|
2948
|
+
if (strongRejectKeywords.test(context)) return false;
|
|
2949
|
+
const areaCode = cleaned.replace(/^\+?1?/, "").substring(0, 3);
|
|
2950
|
+
if (areaCode === "000" || areaCode === "111") {
|
|
2951
|
+
return false;
|
|
2952
|
+
}
|
|
2953
|
+
if (areaCode === "555") {
|
|
2954
|
+
const contextLower = context.toLowerCase();
|
|
2955
|
+
if (/example\s+phone|test\s+number|fictional\s+number|demo\s+phone/i.test(contextLower)) {
|
|
2956
|
+
return false;
|
|
2957
|
+
}
|
|
2958
|
+
}
|
|
2959
|
+
return true;
|
|
2960
|
+
}
|
|
2497
2961
|
},
|
|
2498
2962
|
{
|
|
2499
2963
|
type: "PHONE_INTERNATIONAL",
|
|
2500
|
-
regex: /\b
|
|
2964
|
+
regex: /\b\+(?:\d[\s\u00A0.\-()]?){6,14}\d(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
|
|
2501
2965
|
priority: 80,
|
|
2502
2966
|
placeholder: "[PHONE_{n}]",
|
|
2503
2967
|
description: "International phone number",
|
|
2504
|
-
severity: "medium"
|
|
2968
|
+
severity: "medium",
|
|
2969
|
+
validator: (value, context) => {
|
|
2970
|
+
const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
|
|
2971
|
+
if (!cleaned.startsWith("+")) return false;
|
|
2972
|
+
const digitsOnly = cleaned.substring(1);
|
|
2973
|
+
if (digitsOnly.length < 7 || digitsOnly.length > 15) {
|
|
2974
|
+
return false;
|
|
2975
|
+
}
|
|
2976
|
+
if (!/^\d+$/.test(digitsOnly)) return false;
|
|
2977
|
+
const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
|
|
2978
|
+
if (versionContext.test(context)) return false;
|
|
2979
|
+
const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
|
|
2980
|
+
if (datePattern.test(value)) {
|
|
2981
|
+
const dateKeywords = /date|dob|birth|expir/i;
|
|
2982
|
+
if (dateKeywords.test(context)) return false;
|
|
2983
|
+
}
|
|
2984
|
+
const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
|
|
2985
|
+
if (strongRejectKeywords.test(context)) return false;
|
|
2986
|
+
if (/^\+1\d{10}$/.test(cleaned)) {
|
|
2987
|
+
return false;
|
|
2988
|
+
}
|
|
2989
|
+
if (/^\+44\d{10,11}$/.test(cleaned)) {
|
|
2990
|
+
return false;
|
|
2991
|
+
}
|
|
2992
|
+
return true;
|
|
2993
|
+
}
|
|
2505
2994
|
},
|
|
2506
2995
|
{
|
|
2507
2996
|
type: "POSTCODE_UK",
|
|
2508
|
-
regex: /\b([A-Z]{1,2}\d{1,2}[A-Z]
|
|
2997
|
+
regex: /\b([A-Z]{1,2}\d{1,2}[A-Z]?[\s\u00A0.-]?\d[A-Z]{2})\b/g,
|
|
2509
2998
|
priority: 75,
|
|
2510
2999
|
placeholder: "[POSTCODE_{n}]",
|
|
2511
3000
|
description: "UK postcode",
|
|
2512
|
-
severity: "low"
|
|
3001
|
+
severity: "low",
|
|
3002
|
+
validator: (value, _context) => {
|
|
3003
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
3004
|
+
if (cleaned.length < 5 || cleaned.length > 7) {
|
|
3005
|
+
return false;
|
|
3006
|
+
}
|
|
3007
|
+
if (!/^[A-Z]{1,2}\d{1,2}[A-Z]?\d[A-Z]{2}$/i.test(cleaned)) {
|
|
3008
|
+
return false;
|
|
3009
|
+
}
|
|
3010
|
+
return true;
|
|
3011
|
+
}
|
|
2513
3012
|
},
|
|
2514
3013
|
{
|
|
2515
3014
|
type: "ZIP_CODE_US",
|
|
2516
|
-
regex: /\b(\d{5}(
|
|
3015
|
+
regex: /\b(\d{5}(?:[\s\u00A0.-]\d{4})?)\b/g,
|
|
2517
3016
|
priority: 70,
|
|
2518
3017
|
placeholder: "[ZIP_{n}]",
|
|
2519
3018
|
description: "US ZIP code",
|
|
2520
|
-
severity: "low"
|
|
3019
|
+
severity: "low",
|
|
3020
|
+
validator: (value, context) => {
|
|
3021
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
3022
|
+
if (!/^\d{5}$/.test(cleaned) && !/^\d{9}$/.test(cleaned)) {
|
|
3023
|
+
return false;
|
|
3024
|
+
}
|
|
3025
|
+
const contextLower = context.toLowerCase();
|
|
3026
|
+
if (/\b(phone|tel|call|contact)\b/i.test(contextLower) && cleaned.length === 9) {
|
|
3027
|
+
return false;
|
|
3028
|
+
}
|
|
3029
|
+
return true;
|
|
3030
|
+
}
|
|
2521
3031
|
},
|
|
2522
3032
|
{
|
|
2523
3033
|
type: "ADDRESS_STREET",
|
|
2524
|
-
regex: /\b
|
|
3034
|
+
regex: /\b\d{1,5}\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*(?:\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*){0,4}\s+(?:Street|St\.?|Road|Rd\.?|Avenue|Ave\.?|Lane|Ln\.?|Drive|Dr\.?|Court|Ct\.?|Boulevard|Blvd\.?|Way|Terrace|Ter\.?|Place|Pl\.?|Trail|Trl\.?|Parkway|Pkwy\.?|Highway|Hwy\.)(?:\s+(?:Apt|Unit|Suite|Ste)\s*\d+)?\b/gi,
|
|
2525
3035
|
priority: 70,
|
|
2526
3036
|
placeholder: "[ADDRESS_{n}]",
|
|
2527
3037
|
description: "Street address",
|
|
@@ -2608,11 +3118,20 @@ var SOLANA_ADDRESS = {
|
|
|
2608
3118
|
severity: "high",
|
|
2609
3119
|
description: "Solana (SOL) cryptocurrency address",
|
|
2610
3120
|
validator: (value, context) => {
|
|
2611
|
-
|
|
2612
|
-
if (
|
|
3121
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
3122
|
+
if (cleaned.length < 32 || cleaned.length > 44) return false;
|
|
3123
|
+
const cryptoKeywords = /solana|sol\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
3124
|
+
if (!cryptoKeywords.test(context)) {
|
|
2613
3125
|
return false;
|
|
2614
3126
|
}
|
|
2615
|
-
if (/^(bc1|1|3|0x|L|M|D|X|r|cosmos|tz|addr)/.test(
|
|
3127
|
+
if (/^(bc1|1|3|0x|L|M|D|X|r|cosmos|tz|addr)/.test(cleaned)) {
|
|
3128
|
+
return false;
|
|
3129
|
+
}
|
|
3130
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
3131
|
+
if (rejectKeywords.test(context)) {
|
|
3132
|
+
return false;
|
|
3133
|
+
}
|
|
3134
|
+
if (!/^[1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
|
|
2616
3135
|
return false;
|
|
2617
3136
|
}
|
|
2618
3137
|
return true;
|
|
@@ -2626,22 +3145,43 @@ var POLKADOT_ADDRESS = {
|
|
|
2626
3145
|
severity: "high",
|
|
2627
3146
|
description: "Polkadot (DOT) cryptocurrency address",
|
|
2628
3147
|
validator: (value, context) => {
|
|
2629
|
-
|
|
2630
|
-
if (
|
|
2631
|
-
|
|
3148
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
3149
|
+
if (cleaned.length < 47 || cleaned.length > 48) return false;
|
|
3150
|
+
if (!cleaned.startsWith("1")) return false;
|
|
3151
|
+
const cryptoKeywords = /polkadot|dot\b|crypto|wallet|blockchain|substrate|address|send|receive|transaction|transfer/i;
|
|
3152
|
+
if (!cryptoKeywords.test(context)) {
|
|
3153
|
+
return false;
|
|
3154
|
+
}
|
|
3155
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
3156
|
+
if (rejectKeywords.test(context)) {
|
|
3157
|
+
return false;
|
|
3158
|
+
}
|
|
3159
|
+
if (!/^1[1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
|
|
3160
|
+
return false;
|
|
3161
|
+
}
|
|
3162
|
+
return true;
|
|
2632
3163
|
}
|
|
2633
3164
|
};
|
|
2634
3165
|
var AVALANCHE_ADDRESS = {
|
|
2635
3166
|
type: "AVALANCHE_ADDRESS",
|
|
2636
|
-
regex: /\b([XPC]
|
|
3167
|
+
regex: /\b([XPC][-\s\u00A0]?(?:avax)?[a-z0-9]{38,43})\b/gi,
|
|
2637
3168
|
placeholder: "[AVAX_ADDR_{n}]",
|
|
2638
3169
|
priority: 85,
|
|
2639
3170
|
severity: "high",
|
|
2640
3171
|
description: "Avalanche (AVAX) cryptocurrency address",
|
|
2641
3172
|
validator: (value, context) => {
|
|
2642
|
-
|
|
2643
|
-
if (
|
|
2644
|
-
|
|
3173
|
+
const cleaned = value.replace(/[\s\u00A0]/g, "").toUpperCase();
|
|
3174
|
+
if (!/^[XPC][-]?/.test(cleaned)) return false;
|
|
3175
|
+
if (cleaned.length < 40 || cleaned.length > 46) return false;
|
|
3176
|
+
const cryptoKeywords = /avalanche|avax\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
3177
|
+
if (!cryptoKeywords.test(context)) {
|
|
3178
|
+
return false;
|
|
3179
|
+
}
|
|
3180
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
3181
|
+
if (rejectKeywords.test(context)) {
|
|
3182
|
+
return false;
|
|
3183
|
+
}
|
|
3184
|
+
return true;
|
|
2645
3185
|
}
|
|
2646
3186
|
};
|
|
2647
3187
|
var COSMOS_ADDRESS = {
|
|
@@ -2652,9 +3192,21 @@ var COSMOS_ADDRESS = {
|
|
|
2652
3192
|
severity: "high",
|
|
2653
3193
|
description: "Cosmos (ATOM) cryptocurrency address",
|
|
2654
3194
|
validator: (value, context) => {
|
|
2655
|
-
|
|
2656
|
-
if (
|
|
2657
|
-
|
|
3195
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toLowerCase();
|
|
3196
|
+
if (!cleaned.startsWith("cosmos1")) return false;
|
|
3197
|
+
if (cleaned.length < 39 || cleaned.length > 45) return false;
|
|
3198
|
+
const cryptoKeywords = /cosmos|atom\b|crypto|wallet|blockchain|ibc|address|send|receive|transaction|transfer/i;
|
|
3199
|
+
if (!cryptoKeywords.test(context)) {
|
|
3200
|
+
return false;
|
|
3201
|
+
}
|
|
3202
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
3203
|
+
if (rejectKeywords.test(context)) {
|
|
3204
|
+
return false;
|
|
3205
|
+
}
|
|
3206
|
+
if (!/^cosmos1[a-z0-9]+$/.test(cleaned)) {
|
|
3207
|
+
return false;
|
|
3208
|
+
}
|
|
3209
|
+
return true;
|
|
2658
3210
|
}
|
|
2659
3211
|
};
|
|
2660
3212
|
var ALGORAND_ADDRESS = {
|
|
@@ -2665,9 +3217,18 @@ var ALGORAND_ADDRESS = {
|
|
|
2665
3217
|
severity: "high",
|
|
2666
3218
|
description: "Algorand (ALGO) cryptocurrency address",
|
|
2667
3219
|
validator: (value, context) => {
|
|
2668
|
-
|
|
2669
|
-
if (
|
|
2670
|
-
|
|
3220
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
3221
|
+
if (cleaned.length !== 58) return false;
|
|
3222
|
+
if (!/^[A-Z2-7]+$/.test(cleaned)) return false;
|
|
3223
|
+
const cryptoKeywords = /algorand|algo\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
3224
|
+
if (!cryptoKeywords.test(context)) {
|
|
3225
|
+
return false;
|
|
3226
|
+
}
|
|
3227
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
3228
|
+
if (rejectKeywords.test(context)) {
|
|
3229
|
+
return false;
|
|
3230
|
+
}
|
|
3231
|
+
return true;
|
|
2671
3232
|
}
|
|
2672
3233
|
};
|
|
2673
3234
|
var TEZOS_ADDRESS = {
|
|
@@ -2678,9 +3239,21 @@ var TEZOS_ADDRESS = {
|
|
|
2678
3239
|
severity: "high",
|
|
2679
3240
|
description: "Tezos (XTZ) cryptocurrency address",
|
|
2680
3241
|
validator: (value, context) => {
|
|
2681
|
-
|
|
2682
|
-
if (
|
|
2683
|
-
|
|
3242
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
3243
|
+
if (!/^tz[123]/.test(cleaned)) return false;
|
|
3244
|
+
if (cleaned.length !== 36) return false;
|
|
3245
|
+
const cryptoKeywords = /tezos|xtz\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
3246
|
+
if (!cryptoKeywords.test(context)) {
|
|
3247
|
+
return false;
|
|
3248
|
+
}
|
|
3249
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
3250
|
+
if (rejectKeywords.test(context)) {
|
|
3251
|
+
return false;
|
|
3252
|
+
}
|
|
3253
|
+
if (!/^tz[123][1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
|
|
3254
|
+
return false;
|
|
3255
|
+
}
|
|
3256
|
+
return true;
|
|
2684
3257
|
}
|
|
2685
3258
|
};
|
|
2686
3259
|
var POLYGON_ADDRESS = {
|
|
@@ -2691,8 +3264,23 @@ var POLYGON_ADDRESS = {
|
|
|
2691
3264
|
severity: "high",
|
|
2692
3265
|
description: "Polygon (MATIC) cryptocurrency address",
|
|
2693
3266
|
validator: (value, context) => {
|
|
2694
|
-
|
|
2695
|
-
|
|
3267
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
3268
|
+
if (!cleaned.startsWith("0x") || cleaned.length !== 42) return false;
|
|
3269
|
+
const polygonKeywords = /polygon|matic\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
3270
|
+
if (!polygonKeywords.test(context)) {
|
|
3271
|
+
return false;
|
|
3272
|
+
}
|
|
3273
|
+
if (/ethereum|eth\b|ether/i.test(context) && !/polygon|matic/i.test(context)) {
|
|
3274
|
+
return false;
|
|
3275
|
+
}
|
|
3276
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
3277
|
+
if (rejectKeywords.test(context)) {
|
|
3278
|
+
return false;
|
|
3279
|
+
}
|
|
3280
|
+
if (!/^0x[a-fA-F0-9]{40}$/.test(cleaned)) {
|
|
3281
|
+
return false;
|
|
3282
|
+
}
|
|
3283
|
+
return true;
|
|
2696
3284
|
}
|
|
2697
3285
|
};
|
|
2698
3286
|
var BINANCE_CHAIN_ADDRESS = {
|
|
@@ -2703,8 +3291,26 @@ var BINANCE_CHAIN_ADDRESS = {
|
|
|
2703
3291
|
severity: "high",
|
|
2704
3292
|
description: "Binance Smart Chain (BNB) address",
|
|
2705
3293
|
validator: (value, context) => {
|
|
2706
|
-
|
|
2707
|
-
|
|
3294
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
3295
|
+
if (!cleaned.startsWith("0x") || cleaned.length !== 42) return false;
|
|
3296
|
+
const binanceKeywords = /binance|bnb\b|bsc|smart[- ]?chain|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
3297
|
+
if (!binanceKeywords.test(context)) {
|
|
3298
|
+
return false;
|
|
3299
|
+
}
|
|
3300
|
+
if (/ethereum|eth\b|ether/i.test(context) && !/binance|bnb|bsc/i.test(context)) {
|
|
3301
|
+
return false;
|
|
3302
|
+
}
|
|
3303
|
+
if (/polygon|matic/i.test(context) && !/binance|bnb|bsc/i.test(context)) {
|
|
3304
|
+
return false;
|
|
3305
|
+
}
|
|
3306
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
3307
|
+
if (rejectKeywords.test(context)) {
|
|
3308
|
+
return false;
|
|
3309
|
+
}
|
|
3310
|
+
if (!/^0x[a-fA-F0-9]{40}$/.test(cleaned)) {
|
|
3311
|
+
return false;
|
|
3312
|
+
}
|
|
3313
|
+
return true;
|
|
2708
3314
|
}
|
|
2709
3315
|
};
|
|
2710
3316
|
var NEAR_ADDRESS = {
|
|
@@ -2715,8 +3321,20 @@ var NEAR_ADDRESS = {
|
|
|
2715
3321
|
severity: "high",
|
|
2716
3322
|
description: "Near Protocol (NEAR) address",
|
|
2717
3323
|
validator: (value, context) => {
|
|
2718
|
-
|
|
2719
|
-
|
|
3324
|
+
const cleaned = value.replace(/[\s\u00A0]/g, "").toLowerCase();
|
|
3325
|
+
if (!cleaned.endsWith(".near")) return false;
|
|
3326
|
+
const accountName = cleaned.slice(0, -5);
|
|
3327
|
+
if (accountName.length < 2 || accountName.length > 64) return false;
|
|
3328
|
+
if (!/^[a-z0-9_-]+$/.test(accountName)) return false;
|
|
3329
|
+
const cryptoKeywords = /near|protocol|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
3330
|
+
if (!cryptoKeywords.test(context)) {
|
|
3331
|
+
return false;
|
|
3332
|
+
}
|
|
3333
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
3334
|
+
if (rejectKeywords.test(context)) {
|
|
3335
|
+
return false;
|
|
3336
|
+
}
|
|
3337
|
+
return true;
|
|
2720
3338
|
}
|
|
2721
3339
|
};
|
|
2722
3340
|
var cryptoExtendedPatterns = [
|
|
@@ -2872,15 +3490,20 @@ var BIOBANK_SAMPLE_ID = {
|
|
|
2872
3490
|
};
|
|
2873
3491
|
var PROVIDER_LICENSE = {
|
|
2874
3492
|
type: "PROVIDER_LICENSE",
|
|
2875
|
-
regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s]
|
|
3493
|
+
regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s\u00A0]*(?:LICENSE|LICENCE|LIC)[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*((?:[A-Z0-9]{2,6}[\s\u00A0./-]?){1,3}[A-Z0-9]{2,6})\b/gi,
|
|
2876
3494
|
placeholder: "[PROVIDER_LIC_{n}]",
|
|
2877
3495
|
priority: 80,
|
|
2878
3496
|
severity: "high",
|
|
2879
|
-
description: "Healthcare provider license numbers"
|
|
3497
|
+
description: "Healthcare provider license numbers",
|
|
3498
|
+
validator: (value) => {
|
|
3499
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "");
|
|
3500
|
+
if (normalized.length < 6 || normalized.length > 18) return false;
|
|
3501
|
+
return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
|
|
3502
|
+
}
|
|
2880
3503
|
};
|
|
2881
3504
|
var NPI_NUMBER = {
|
|
2882
3505
|
type: "NPI_NUMBER",
|
|
2883
|
-
regex: /\b(?:NPI[-\s]
|
|
3506
|
+
regex: /\b(?:NPI[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?((?:\d[\s\u00A0.-]?){10})\b/g,
|
|
2884
3507
|
placeholder: "[NPI_{n}]",
|
|
2885
3508
|
priority: 85,
|
|
2886
3509
|
severity: "high",
|
|
@@ -2889,7 +3512,8 @@ var NPI_NUMBER = {
|
|
|
2889
3512
|
if (!/provider|npi|physician|doctor|clinic|hospital|practice/i.test(context)) {
|
|
2890
3513
|
return false;
|
|
2891
3514
|
}
|
|
2892
|
-
const digits = value.split("").map(Number);
|
|
3515
|
+
const digits = value.replace(/\D/g, "").split("").map(Number);
|
|
3516
|
+
if (digits.length !== 10) return false;
|
|
2893
3517
|
let sum = 0;
|
|
2894
3518
|
for (let i = digits.length - 2; i >= 0; i--) {
|
|
2895
3519
|
let digit = digits[i];
|
|
@@ -2905,17 +3529,19 @@ var NPI_NUMBER = {
|
|
|
2905
3529
|
};
|
|
2906
3530
|
var DEA_NUMBER = {
|
|
2907
3531
|
type: "DEA_NUMBER",
|
|
2908
|
-
regex: /\b(?:DEA[-\s]
|
|
3532
|
+
regex: /\b(?:DEA[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?([A-Z]{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
|
|
2909
3533
|
placeholder: "[DEA_{n}]",
|
|
2910
3534
|
priority: 90,
|
|
2911
3535
|
severity: "high",
|
|
2912
3536
|
description: "DEA registration number for controlled substances",
|
|
2913
3537
|
validator: (value, _context) => {
|
|
3538
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "").toUpperCase();
|
|
3539
|
+
if (normalized.length !== 9) return false;
|
|
2914
3540
|
const validFirstLetters = ["A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "P", "R", "S", "T", "U"];
|
|
2915
|
-
if (!validFirstLetters.includes(
|
|
3541
|
+
if (!validFirstLetters.includes(normalized[0])) {
|
|
2916
3542
|
return false;
|
|
2917
3543
|
}
|
|
2918
|
-
const digits =
|
|
3544
|
+
const digits = normalized.substring(2).split("").map(Number);
|
|
2919
3545
|
const sum1 = digits[0] + digits[2] + digits[4];
|
|
2920
3546
|
const sum2 = (digits[1] + digits[3] + digits[5]) * 2;
|
|
2921
3547
|
const checkDigit = (sum1 + sum2) % 10;
|
|
@@ -2940,11 +3566,16 @@ var EMERGENCY_CONTACT_MARKER = {
|
|
|
2940
3566
|
};
|
|
2941
3567
|
var BIOMETRIC_ID = {
|
|
2942
3568
|
type: "BIOMETRIC_ID",
|
|
2943
|
-
regex: /\b(?:FINGERPRINT|RETINAL?[-\s]?SCAN|IRIS[-\s]?SCAN|VOICE[-\s]?PRINT|FACIAL[-\s]?RECOGNITION|BIOMETRIC)[-\s]?(?:ID|DATA|TEMPLATE|HASH)?[-\s]
|
|
3569
|
+
regex: /\b(?:FINGERPRINT|RETINAL?[-\s\u00A0]?SCAN|IRIS[-\s\u00A0]?SCAN|VOICE[-\s\u00A0]?PRINT|FACIAL[-\s\u00A0]?RECOGNITION|BIOMETRIC)[-\s\u00A0]?(?:ID|DATA|TEMPLATE|HASH)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9._-]{7,39})\b/gi,
|
|
2944
3570
|
placeholder: "[BIOMETRIC_{n}]",
|
|
2945
3571
|
priority: 95,
|
|
2946
3572
|
severity: "high",
|
|
2947
|
-
description: "Biometric identifier references"
|
|
3573
|
+
description: "Biometric identifier references",
|
|
3574
|
+
validator: (value) => {
|
|
3575
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "");
|
|
3576
|
+
if (normalized.length < 8 || normalized.length > 40) return false;
|
|
3577
|
+
return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
|
|
3578
|
+
}
|
|
2948
3579
|
};
|
|
2949
3580
|
var DNA_SEQUENCE = {
|
|
2950
3581
|
type: "DNA_SEQUENCE",
|
|
@@ -2973,7 +3604,7 @@ var DRUG_DOSAGE = {
|
|
|
2973
3604
|
};
|
|
2974
3605
|
var MEDICAL_IMAGE_REF = {
|
|
2975
3606
|
type: "MEDICAL_IMAGE_REF",
|
|
2976
|
-
regex: /\b(?:X[-\s]?RAY|MRI|CT[-\s]?SCAN|PET[-\s]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s]?(?:IMAGE|FILE|ID)?[-\s]
|
|
3607
|
+
regex: /\b(?:X[-\s\u00A0]?RAY|MRI|CT[-\s\u00A0]?SCAN|PET[-\s\u00A0]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s\u00A0]?(?:IMAGE|FILE|ID)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9_.-]{5,23})\b/gi,
|
|
2977
3608
|
placeholder: "[IMAGE_{n}]",
|
|
2978
3609
|
priority: 80,
|
|
2979
3610
|
severity: "high",
|
|
@@ -3121,9 +3752,11 @@ var SWIFT_BIC = {
|
|
|
3121
3752
|
severity: "high",
|
|
3122
3753
|
description: "SWIFT/BIC codes for international transfers",
|
|
3123
3754
|
validator: (value, context) => {
|
|
3755
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
3124
3756
|
const financialContext = /swift|bic|bank|transfer|wire|international|payment/i.test(context);
|
|
3125
|
-
const validLength =
|
|
3126
|
-
|
|
3757
|
+
const validLength = cleaned.length === 8 || cleaned.length === 11;
|
|
3758
|
+
const validFormat = /^[A-Z]{6}[A-Z0-9]{2}([A-Z0-9]{3})?$/.test(cleaned);
|
|
3759
|
+
return financialContext && validLength && validFormat;
|
|
3127
3760
|
}
|
|
3128
3761
|
};
|
|
3129
3762
|
var TRANSACTION_ID = {
|
|
@@ -3136,11 +3769,18 @@ var TRANSACTION_ID = {
|
|
|
3136
3769
|
};
|
|
3137
3770
|
var INVESTMENT_ACCOUNT = {
|
|
3138
3771
|
type: "INVESTMENT_ACCOUNT",
|
|
3139
|
-
regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s]
|
|
3772
|
+
regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s\u00A0]*(?:ACCOUNT|ACCT|A\/C)?[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,18}[A-Z0-9])\b/gi,
|
|
3140
3773
|
placeholder: "[INV_ACCT_{n}]",
|
|
3141
3774
|
priority: 85,
|
|
3142
3775
|
severity: "high",
|
|
3143
|
-
description: "Investment and pension account numbers"
|
|
3776
|
+
description: "Investment and pension account numbers",
|
|
3777
|
+
validator: (value, context) => {
|
|
3778
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
3779
|
+
const hasDigits = /\d{4,}/.test(normalized);
|
|
3780
|
+
const validLength = normalized.length >= 6 && normalized.length <= 15;
|
|
3781
|
+
const inContext = /isa|sipp|invest|pension|401k|ira|account|fund/i.test(context);
|
|
3782
|
+
return hasDigits && validLength && inContext;
|
|
3783
|
+
}
|
|
3144
3784
|
};
|
|
3145
3785
|
var WIRE_TRANSFER_REF = {
|
|
3146
3786
|
type: "WIRE_TRANSFER_REF",
|
|
@@ -3414,22 +4054,60 @@ var TERMINAL_ID = {
|
|
|
3414
4054
|
};
|
|
3415
4055
|
var UK_BANK_ACCOUNT_IBAN = {
|
|
3416
4056
|
type: "UK_BANK_ACCOUNT_IBAN",
|
|
3417
|
-
regex: /\b(GB\d{2}[A-Z]{4}\d{14})\b/
|
|
4057
|
+
regex: /\b(GB\d{2}[\s\u00A0.-]?[A-Z]{4}[\s\u00A0.-]?\d{14})\b/gi,
|
|
3418
4058
|
placeholder: "[UK_IBAN_{n}]",
|
|
3419
4059
|
priority: 95,
|
|
3420
4060
|
severity: "high",
|
|
3421
4061
|
description: "UK bank account numbers in IBAN format",
|
|
3422
|
-
validator: (value) => {
|
|
3423
|
-
|
|
4062
|
+
validator: (value, context) => {
|
|
4063
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4064
|
+
if (!cleaned.startsWith("GB") || cleaned.length !== 22) {
|
|
4065
|
+
return false;
|
|
4066
|
+
}
|
|
4067
|
+
if (!validateIBAN(cleaned)) {
|
|
4068
|
+
return false;
|
|
4069
|
+
}
|
|
4070
|
+
const bankingKeywords = /iban|account|bank|uk|gb|financial|payment|transfer/i;
|
|
4071
|
+
if (!bankingKeywords.test(context)) {
|
|
4072
|
+
return false;
|
|
4073
|
+
}
|
|
4074
|
+
const rejectKeywords = /example\s+iban|test\s+iban|sample\s+iban|demo\s+iban|fake\s+iban/i;
|
|
4075
|
+
if (rejectKeywords.test(context)) {
|
|
4076
|
+
return false;
|
|
4077
|
+
}
|
|
4078
|
+
return true;
|
|
3424
4079
|
}
|
|
3425
4080
|
};
|
|
3426
4081
|
var UK_SORT_CODE_ACCOUNT = {
|
|
3427
4082
|
type: "UK_SORT_CODE_ACCOUNT",
|
|
3428
|
-
regex: /\b(\d{2}[-]
|
|
4083
|
+
regex: /\b(\d{2}[\s\u00A0-]?\d{2}[\s\u00A0-]?\d{2}[\s\u00A0]?\d{8})\b/g,
|
|
3429
4084
|
placeholder: "[UK_ACCOUNT_{n}]",
|
|
3430
4085
|
priority: 95,
|
|
3431
4086
|
severity: "high",
|
|
3432
|
-
description: "UK sort code and account number combination"
|
|
4087
|
+
description: "UK sort code and account number combination",
|
|
4088
|
+
validator: (value, context) => {
|
|
4089
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
4090
|
+
if (!/^\d{14}$/.test(cleaned)) {
|
|
4091
|
+
return false;
|
|
4092
|
+
}
|
|
4093
|
+
const sortCode = cleaned.substring(0, 6);
|
|
4094
|
+
const accountNumber = cleaned.substring(6);
|
|
4095
|
+
if (accountNumber.length !== 8) {
|
|
4096
|
+
return false;
|
|
4097
|
+
}
|
|
4098
|
+
if (!validateSortCode(sortCode)) {
|
|
4099
|
+
return false;
|
|
4100
|
+
}
|
|
4101
|
+
const bankingKeywords = /sort\s+code|account|bank|uk|gb|financial|payment|transfer/i;
|
|
4102
|
+
if (!bankingKeywords.test(context)) {
|
|
4103
|
+
return false;
|
|
4104
|
+
}
|
|
4105
|
+
const rejectKeywords = /example\s+account|test\s+account|sample\s+account|demo\s+account|fake\s+account/i;
|
|
4106
|
+
if (rejectKeywords.test(context)) {
|
|
4107
|
+
return false;
|
|
4108
|
+
}
|
|
4109
|
+
return true;
|
|
4110
|
+
}
|
|
3433
4111
|
};
|
|
3434
4112
|
var financialPatterns2 = [
|
|
3435
4113
|
SWIFT_BIC,
|
|
@@ -4274,13 +4952,17 @@ var RESUME_ID = {
|
|
|
4274
4952
|
};
|
|
4275
4953
|
var BENEFITS_PLAN_NUMBER = {
|
|
4276
4954
|
type: "BENEFITS_PLAN_NUMBER",
|
|
4277
|
-
regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s]?PLAN)[-\s]
|
|
4955
|
+
regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s\u00A0]?PLAN)[-\s\u00A0]*(?:PLAN)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
|
|
4278
4956
|
placeholder: "[BENEFITS_{n}]",
|
|
4279
4957
|
priority: 85,
|
|
4280
4958
|
severity: "high",
|
|
4281
4959
|
description: "Employee benefits and insurance plan numbers",
|
|
4282
|
-
validator: (
|
|
4283
|
-
|
|
4960
|
+
validator: (value, context) => {
|
|
4961
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
4962
|
+
const hasDigits = /\d{4,}/.test(normalized);
|
|
4963
|
+
const validLength = normalized.length >= 6 && normalized.length <= 14;
|
|
4964
|
+
const inContext = /benefit|insurance|health|dental|vision|plan|policy|enrollment/i.test(context);
|
|
4965
|
+
return hasDigits && validLength && inContext;
|
|
4284
4966
|
}
|
|
4285
4967
|
};
|
|
4286
4968
|
var RETIREMENT_ACCOUNT = {
|
|
@@ -4378,13 +5060,16 @@ var EXIT_INTERVIEW_ID = {
|
|
|
4378
5060
|
};
|
|
4379
5061
|
var DISCIPLINARY_ACTION_ID = {
|
|
4380
5062
|
type: "DISCIPLINARY_ACTION_ID",
|
|
4381
|
-
regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s]
|
|
5063
|
+
regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s\u00A0]*(?:ACTION)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
|
|
4382
5064
|
placeholder: "[DISCIPLINE_{n}]",
|
|
4383
5065
|
priority: 85,
|
|
4384
5066
|
severity: "high",
|
|
4385
5067
|
description: "Disciplinary action and incident identifiers",
|
|
4386
|
-
validator: (
|
|
4387
|
-
|
|
5068
|
+
validator: (value, context) => {
|
|
5069
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
5070
|
+
const hasDigits = /\d{3,}/.test(normalized);
|
|
5071
|
+
const validLength = normalized.length >= 6 && normalized.length <= 12;
|
|
5072
|
+
return hasDigits && validLength && /disciplinary|incident|warning|violation|misconduct|investigation/i.test(context);
|
|
4388
5073
|
}
|
|
4389
5074
|
};
|
|
4390
5075
|
var EMERGENCY_CONTACT_REF = {
|
|
@@ -4712,7 +5397,7 @@ var TELECOMS_ACCOUNT_NUMBER = {
|
|
|
4712
5397
|
type: "TELECOMS_ACCOUNT_NUMBER",
|
|
4713
5398
|
regex: /\bACC(?:OUNT)?[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*(\d{8,12})\b/gi,
|
|
4714
5399
|
placeholder: "[ACCOUNT_{n}]",
|
|
4715
|
-
priority:
|
|
5400
|
+
priority: 90,
|
|
4716
5401
|
severity: "high",
|
|
4717
5402
|
description: "Telecommunications customer account numbers",
|
|
4718
5403
|
validator: (_value, context) => {
|
|
@@ -5590,7 +6275,7 @@ var EMERGENCY_CALL_REF = {
|
|
|
5590
6275
|
};
|
|
5591
6276
|
var POLICE_REPORT_NUMBER = {
|
|
5592
6277
|
type: "POLICE_REPORT_NUMBER",
|
|
5593
|
-
regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s]
|
|
6278
|
+
regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s\u00A0]*(?:NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{5,10})\b/gi,
|
|
5594
6279
|
placeholder: "[POLICE_RPT_{n}]",
|
|
5595
6280
|
priority: 95,
|
|
5596
6281
|
severity: "high",
|
|
@@ -5601,7 +6286,7 @@ var POLICE_REPORT_NUMBER = {
|
|
|
5601
6286
|
};
|
|
5602
6287
|
var FIRE_INCIDENT_NUMBER = {
|
|
5603
6288
|
type: "FIRE_INCIDENT_NUMBER",
|
|
5604
|
-
regex: /\b(?:FIRE|FI|FD)[-\s]
|
|
6289
|
+
regex: /\b(?:FIRE|FI|FD)[-\s\u00A0]*(?:INCIDENT|INC|NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{4,8})\b/gi,
|
|
5605
6290
|
placeholder: "[FIRE_INC_{n}]",
|
|
5606
6291
|
priority: 95,
|
|
5607
6292
|
severity: "high",
|
|
@@ -6466,13 +7151,15 @@ var gamingPatterns = [
|
|
|
6466
7151
|
// src/patterns/industries/vehicles.ts
|
|
6467
7152
|
var VIN_NUMBER = {
|
|
6468
7153
|
type: "VIN_NUMBER",
|
|
6469
|
-
regex: /\bVIN[-\s]?(?:NO|NUM|NUMBER)?[-\s]?[:#]?\s*([A-HJ-NPR-Z0-9]{17})\b/gi,
|
|
7154
|
+
regex: /\bVIN[-\s\u00A0]?(?:NO|NUM|NUMBER)?[-\s\u00A0]?[:#]?\s*([A-HJ-NPR-Z0-9]{17})\b/gi,
|
|
6470
7155
|
placeholder: "[VIN_{n}]",
|
|
6471
7156
|
priority: 85,
|
|
6472
7157
|
severity: "medium",
|
|
6473
7158
|
description: "Vehicle Identification Number (VIN)",
|
|
6474
7159
|
validator: (value, context) => {
|
|
6475
|
-
|
|
7160
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
7161
|
+
if (cleaned.length !== 17) return false;
|
|
7162
|
+
if (/[IOQ]/.test(cleaned)) return false;
|
|
6476
7163
|
return /vin|vehicle|car|auto|motor|registration|title|insurance/i.test(context);
|
|
6477
7164
|
}
|
|
6478
7165
|
};
|
|
@@ -8774,9 +9461,11 @@ var GERMAN_TAX_ID = {
|
|
|
8774
9461
|
severity: "high",
|
|
8775
9462
|
description: "German Tax Identification Number (Steueridentifikationsnummer)",
|
|
8776
9463
|
validator: (value, context) => {
|
|
9464
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
9465
|
+
if (!/^\d{11}$/.test(cleaned)) return false;
|
|
8777
9466
|
const relevantContext = /steuer|tax|german|deutschland|finanzamt/i.test(context);
|
|
8778
9467
|
if (!relevantContext) return false;
|
|
8779
|
-
const digits =
|
|
9468
|
+
const digits = cleaned.split("").map(Number);
|
|
8780
9469
|
const digitCounts = /* @__PURE__ */ new Map();
|
|
8781
9470
|
digits.forEach((d) => digitCounts.set(d, (digitCounts.get(d) || 0) + 1));
|
|
8782
9471
|
const counts = Array.from(digitCounts.values());
|
|
@@ -8935,9 +9624,11 @@ var DUTCH_BSN = {
|
|
|
8935
9624
|
severity: "high",
|
|
8936
9625
|
description: "Dutch Citizen Service Number (BSN)",
|
|
8937
9626
|
validator: (value, context) => {
|
|
9627
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
9628
|
+
if (!/^\d{9}$/.test(cleaned)) return false;
|
|
8938
9629
|
const relevantContext = /bsn|dutch|netherlands|nederland|burger/i.test(context);
|
|
8939
9630
|
if (!relevantContext) return false;
|
|
8940
|
-
const digits =
|
|
9631
|
+
const digits = cleaned.split("").map(Number);
|
|
8941
9632
|
let sum = 0;
|
|
8942
9633
|
for (let i = 0; i < 8; i++) {
|
|
8943
9634
|
sum += digits[i] * (9 - i);
|
|
@@ -8954,10 +9645,12 @@ var POLISH_PESEL = {
|
|
|
8954
9645
|
severity: "high",
|
|
8955
9646
|
description: "Polish National Identification Number (PESEL)",
|
|
8956
9647
|
validator: (value, context) => {
|
|
9648
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
9649
|
+
if (!/^\d{11}$/.test(cleaned)) return false;
|
|
8957
9650
|
const relevantContext = /pesel|polish|poland|polska/i.test(context);
|
|
8958
9651
|
if (!relevantContext) return false;
|
|
8959
9652
|
const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
|
|
8960
|
-
const digits =
|
|
9653
|
+
const digits = cleaned.split("").map(Number);
|
|
8961
9654
|
let sum = 0;
|
|
8962
9655
|
for (let i = 0; i < 10; i++) {
|
|
8963
9656
|
sum += digits[i] * weights[i];
|
|
@@ -9291,7 +9984,8 @@ var DISCORD_USER_ID = {
|
|
|
9291
9984
|
severity: "medium",
|
|
9292
9985
|
description: "Discord user ID (Snowflake format)",
|
|
9293
9986
|
validator: (value, context) => {
|
|
9294
|
-
|
|
9987
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
9988
|
+
if (cleaned.length < 17 || cleaned.length > 19) return false;
|
|
9295
9989
|
return /discord|snowflake|user[-_]?id|server|guild/i.test(context);
|
|
9296
9990
|
}
|
|
9297
9991
|
};
|
|
@@ -9303,7 +9997,8 @@ var STEAM_ID64 = {
|
|
|
9303
9997
|
severity: "medium",
|
|
9304
9998
|
description: "Steam 64-bit user ID",
|
|
9305
9999
|
validator: (value, context) => {
|
|
9306
|
-
|
|
10000
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
10001
|
+
if (!cleaned.startsWith("765") || cleaned.length !== 17) return false;
|
|
9307
10002
|
return /steam|gaming|player|profile|valve|community/i.test(context);
|
|
9308
10003
|
}
|
|
9309
10004
|
};
|
|
@@ -9436,13 +10131,14 @@ var NINTENDO_FRIEND_CODE = {
|
|
|
9436
10131
|
type: "NINTENDO_FRIEND_CODE",
|
|
9437
10132
|
regex: /\bSW[-\s]?(\d{4}[-\s]?\d{4}[-\s]?\d{4})\b/gi,
|
|
9438
10133
|
placeholder: "[NINTENDO_FC_{n}]",
|
|
9439
|
-
priority:
|
|
10134
|
+
priority: 90,
|
|
9440
10135
|
severity: "medium",
|
|
9441
10136
|
description: "Nintendo Switch Friend Code",
|
|
9442
10137
|
validator: (value, context) => {
|
|
9443
10138
|
const digits = value.replace(/\D/g, "");
|
|
9444
10139
|
if (digits.length !== 12) return false;
|
|
9445
|
-
|
|
10140
|
+
const hasContext = /nintendo|switch|friend[- ]?code|gaming/i.test(context);
|
|
10141
|
+
return hasContext;
|
|
9446
10142
|
}
|
|
9447
10143
|
};
|
|
9448
10144
|
var BATTLETAG = {
|
|
@@ -9737,14 +10433,64 @@ var ccpaPreset = {
|
|
|
9737
10433
|
"USERNAME"
|
|
9738
10434
|
]
|
|
9739
10435
|
};
|
|
10436
|
+
var healthcarePreset = {
|
|
10437
|
+
includeNames: true,
|
|
10438
|
+
includeEmails: true,
|
|
10439
|
+
includePhones: true,
|
|
10440
|
+
includeAddresses: true,
|
|
10441
|
+
categories: ["personal", "contact", "healthcare", "insurance", "government"]
|
|
10442
|
+
};
|
|
10443
|
+
var healthcareResearchPreset = {
|
|
10444
|
+
includeNames: true,
|
|
10445
|
+
includeEmails: true,
|
|
10446
|
+
includePhones: true,
|
|
10447
|
+
includeAddresses: true,
|
|
10448
|
+
categories: ["personal", "contact", "healthcare", "insurance", "government"]
|
|
10449
|
+
};
|
|
10450
|
+
var financePreset = {
|
|
10451
|
+
includeNames: true,
|
|
10452
|
+
includeEmails: true,
|
|
10453
|
+
includePhones: true,
|
|
10454
|
+
includeAddresses: true,
|
|
10455
|
+
categories: ["personal", "contact", "financial", "government", "network"]
|
|
10456
|
+
};
|
|
10457
|
+
var educationPreset = {
|
|
10458
|
+
includeNames: true,
|
|
10459
|
+
includeEmails: true,
|
|
10460
|
+
includePhones: true,
|
|
10461
|
+
includeAddresses: true,
|
|
10462
|
+
categories: ["personal", "contact", "education", "government", "network"]
|
|
10463
|
+
};
|
|
10464
|
+
var transportLogisticsPreset = {
|
|
10465
|
+
includeNames: true,
|
|
10466
|
+
includeEmails: true,
|
|
10467
|
+
includePhones: true,
|
|
10468
|
+
includeAddresses: true,
|
|
10469
|
+
categories: ["personal", "contact", "transportation", "logistics", "vehicles", "network"]
|
|
10470
|
+
};
|
|
9740
10471
|
function getPreset(name) {
|
|
9741
|
-
|
|
10472
|
+
const presetName = name.toLowerCase();
|
|
10473
|
+
switch (presetName) {
|
|
9742
10474
|
case "gdpr":
|
|
9743
10475
|
return gdprPreset;
|
|
9744
10476
|
case "hipaa":
|
|
9745
10477
|
return hipaaPreset;
|
|
9746
10478
|
case "ccpa":
|
|
9747
10479
|
return ccpaPreset;
|
|
10480
|
+
case "healthcare":
|
|
10481
|
+
case "healthcare-provider":
|
|
10482
|
+
return healthcarePreset;
|
|
10483
|
+
case "healthcare-research":
|
|
10484
|
+
return healthcareResearchPreset;
|
|
10485
|
+
case "finance":
|
|
10486
|
+
case "financial-services":
|
|
10487
|
+
return financePreset;
|
|
10488
|
+
case "education":
|
|
10489
|
+
return educationPreset;
|
|
10490
|
+
case "transport-logistics":
|
|
10491
|
+
case "transportation":
|
|
10492
|
+
case "logistics":
|
|
10493
|
+
return transportLogisticsPreset;
|
|
9748
10494
|
default:
|
|
9749
10495
|
return {};
|
|
9750
10496
|
}
|
|
@@ -10267,9 +11013,23 @@ var ConfigLoader = class {
|
|
|
10267
11013
|
};
|
|
10268
11014
|
}
|
|
10269
11015
|
if (preset.startsWith("openredaction:")) {
|
|
10270
|
-
const
|
|
10271
|
-
|
|
10272
|
-
|
|
11016
|
+
const presetName = preset.replace("openredaction:", "");
|
|
11017
|
+
const supportedPresets = [
|
|
11018
|
+
"gdpr",
|
|
11019
|
+
"hipaa",
|
|
11020
|
+
"ccpa",
|
|
11021
|
+
"healthcare",
|
|
11022
|
+
"healthcare-provider",
|
|
11023
|
+
"healthcare-research",
|
|
11024
|
+
"finance",
|
|
11025
|
+
"financial-services",
|
|
11026
|
+
"education",
|
|
11027
|
+
"transport-logistics",
|
|
11028
|
+
"transportation",
|
|
11029
|
+
"logistics"
|
|
11030
|
+
];
|
|
11031
|
+
if (supportedPresets.includes(presetName)) {
|
|
11032
|
+
return { preset: presetName };
|
|
10273
11033
|
}
|
|
10274
11034
|
}
|
|
10275
11035
|
return null;
|
|
@@ -10285,7 +11045,8 @@ var ConfigLoader = class {
|
|
|
10285
11045
|
export default {
|
|
10286
11046
|
// Extend built-in presets
|
|
10287
11047
|
// Options: 'openredaction:recommended', 'openredaction:strict', 'openredaction:minimal'
|
|
10288
|
-
// Or compliance: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa'
|
|
11048
|
+
// Or compliance/industry presets: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa',
|
|
11049
|
+
// 'openredaction:finance', 'openredaction:education', 'openredaction:healthcare', 'openredaction:transport-logistics'
|
|
10289
11050
|
extends: ['openredaction:recommended'],
|
|
10290
11051
|
|
|
10291
11052
|
// Detection options
|
|
@@ -11962,9 +12723,8 @@ var ExplainAPI = class {
|
|
|
11962
12723
|
constructor(detector) {
|
|
11963
12724
|
this.detector = detector;
|
|
11964
12725
|
this.patterns = detector.getPatterns();
|
|
11965
|
-
const testResult = detector.detect("Contact: admin@business.co.uk");
|
|
11966
|
-
const hasConfidence = testResult.detections.length > 0 && testResult.detections[0].confidence !== void 0;
|
|
11967
12726
|
const detectorOptions = detector.options;
|
|
12727
|
+
const hasConfidence = detectorOptions?.enableContextAnalysis || false;
|
|
11968
12728
|
this.options = {
|
|
11969
12729
|
enableContextAnalysis: hasConfidence,
|
|
11970
12730
|
confidenceThreshold: detectorOptions?.confidenceThreshold || 0.5,
|
|
@@ -11976,7 +12736,7 @@ var ExplainAPI = class {
|
|
|
11976
12736
|
/**
|
|
11977
12737
|
* Explain why text was or wasn't detected as PII
|
|
11978
12738
|
*/
|
|
11979
|
-
explain(text) {
|
|
12739
|
+
async explain(text) {
|
|
11980
12740
|
const patternResults = [];
|
|
11981
12741
|
const matchedPatterns = [];
|
|
11982
12742
|
const unmatchedPatterns = [];
|
|
@@ -12066,7 +12826,8 @@ var ExplainAPI = class {
|
|
|
12066
12826
|
patternResults.push(result);
|
|
12067
12827
|
matchedPatterns.push(result);
|
|
12068
12828
|
}
|
|
12069
|
-
const
|
|
12829
|
+
const detectionResult = await this.detector.detect(text);
|
|
12830
|
+
const detections = detectionResult.detections;
|
|
12070
12831
|
return {
|
|
12071
12832
|
text,
|
|
12072
12833
|
patternResults,
|
|
@@ -12085,7 +12846,7 @@ var ExplainAPI = class {
|
|
|
12085
12846
|
/**
|
|
12086
12847
|
* Explain a specific detection
|
|
12087
12848
|
*/
|
|
12088
|
-
explainDetection(detection, text) {
|
|
12849
|
+
async explainDetection(detection, text) {
|
|
12089
12850
|
const pattern = this.patterns.find((p) => p.type === detection.type);
|
|
12090
12851
|
const reasoning = [];
|
|
12091
12852
|
reasoning.push(`Detected as ${detection.type}`);
|
|
@@ -12116,13 +12877,15 @@ var ExplainAPI = class {
|
|
|
12116
12877
|
detection,
|
|
12117
12878
|
pattern,
|
|
12118
12879
|
contextAnalysis,
|
|
12119
|
-
reasoning
|
|
12880
|
+
reasoning,
|
|
12881
|
+
suggestions: []
|
|
12882
|
+
// Will be populated if needed
|
|
12120
12883
|
};
|
|
12121
12884
|
}
|
|
12122
12885
|
/**
|
|
12123
12886
|
* Suggest why text wasn't detected
|
|
12124
12887
|
*/
|
|
12125
|
-
suggestWhy(text, expectedType) {
|
|
12888
|
+
async suggestWhy(text, expectedType) {
|
|
12126
12889
|
const suggestions = [];
|
|
12127
12890
|
const similarPatterns = [];
|
|
12128
12891
|
const typePatterns = this.patterns.filter(
|
|
@@ -12140,7 +12903,7 @@ var ExplainAPI = class {
|
|
|
12140
12903
|
similarPatterns.push(pattern);
|
|
12141
12904
|
const value = match[1] !== void 0 ? match[1] : match[0];
|
|
12142
12905
|
suggestions.push(`Pattern "${pattern.type}" matched value: "${value}"`);
|
|
12143
|
-
const explanation = this.explain(text);
|
|
12906
|
+
const explanation = await this.explain(text);
|
|
12144
12907
|
const filtered = explanation.filteredPatterns.find((r) => r.pattern.type === pattern.type);
|
|
12145
12908
|
if (filtered && filtered.reason) {
|
|
12146
12909
|
suggestions.push(`But was filtered: ${filtered.reason}`);
|
|
@@ -12170,9 +12933,9 @@ Example ${expectedType} pattern: ${examplePattern.regex.source.substring(0, 100)
|
|
|
12170
12933
|
/**
|
|
12171
12934
|
* Get debugging information for entire detection process
|
|
12172
12935
|
*/
|
|
12173
|
-
debug(text) {
|
|
12936
|
+
async debug(text) {
|
|
12174
12937
|
const start = performance.now();
|
|
12175
|
-
const explanation = this.explain(text);
|
|
12938
|
+
const explanation = await this.explain(text);
|
|
12176
12939
|
const duration = performance.now() - start;
|
|
12177
12940
|
const enabledFeatures = [];
|
|
12178
12941
|
if (this.options.enableContextAnalysis) {
|
|
@@ -13060,6 +13823,152 @@ function compileSafeRegex(pattern, flags) {
|
|
|
13060
13823
|
return new RegExp(patternStr, finalFlags);
|
|
13061
13824
|
}
|
|
13062
13825
|
|
|
13826
|
+
// src/utils/ai-assist.ts
|
|
13827
|
+
function getAIEndpoint(aiOptions) {
|
|
13828
|
+
if (!aiOptions?.enabled) {
|
|
13829
|
+
return null;
|
|
13830
|
+
}
|
|
13831
|
+
if (aiOptions.endpoint) {
|
|
13832
|
+
return aiOptions.endpoint;
|
|
13833
|
+
}
|
|
13834
|
+
if (typeof process !== "undefined" && process.env) {
|
|
13835
|
+
const envEndpoint = process.env.OPENREDACTION_AI_ENDPOINT;
|
|
13836
|
+
if (envEndpoint) {
|
|
13837
|
+
return envEndpoint;
|
|
13838
|
+
}
|
|
13839
|
+
}
|
|
13840
|
+
return null;
|
|
13841
|
+
}
|
|
13842
|
+
function isFetchAvailable() {
|
|
13843
|
+
return typeof fetch !== "undefined";
|
|
13844
|
+
}
|
|
13845
|
+
async function callAIDetect(text, endpoint, debug) {
|
|
13846
|
+
if (!isFetchAvailable()) {
|
|
13847
|
+
if (debug) {
|
|
13848
|
+
console.warn("[OpenRedaction] AI assist requires fetch API. Not available in this environment.");
|
|
13849
|
+
}
|
|
13850
|
+
return null;
|
|
13851
|
+
}
|
|
13852
|
+
try {
|
|
13853
|
+
const url = endpoint.endsWith("/ai-detect") ? endpoint : `${endpoint}/ai-detect`;
|
|
13854
|
+
if (debug) {
|
|
13855
|
+
console.log(`[OpenRedaction] Calling AI endpoint: ${url}`);
|
|
13856
|
+
}
|
|
13857
|
+
const response = await fetch(url, {
|
|
13858
|
+
method: "POST",
|
|
13859
|
+
headers: {
|
|
13860
|
+
"Content-Type": "application/json"
|
|
13861
|
+
},
|
|
13862
|
+
body: JSON.stringify({ text })
|
|
13863
|
+
});
|
|
13864
|
+
if (!response.ok) {
|
|
13865
|
+
if (debug) {
|
|
13866
|
+
const statusText = response.status === 429 ? "Rate limit exceeded (429)" : `${response.status}: ${response.statusText}`;
|
|
13867
|
+
console.warn(`[OpenRedaction] AI endpoint returned ${statusText}`);
|
|
13868
|
+
}
|
|
13869
|
+
return null;
|
|
13870
|
+
}
|
|
13871
|
+
const data = await response.json();
|
|
13872
|
+
if (!data.entities || !Array.isArray(data.entities)) {
|
|
13873
|
+
if (debug) {
|
|
13874
|
+
console.warn("[OpenRedaction] Invalid AI response format: missing entities array");
|
|
13875
|
+
}
|
|
13876
|
+
return null;
|
|
13877
|
+
}
|
|
13878
|
+
return data.entities;
|
|
13879
|
+
} catch (error) {
|
|
13880
|
+
if (debug) {
|
|
13881
|
+
console.warn(`[OpenRedaction] AI endpoint error: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
13882
|
+
}
|
|
13883
|
+
return null;
|
|
13884
|
+
}
|
|
13885
|
+
}
|
|
13886
|
+
function validateAIEntity(entity, textLength) {
|
|
13887
|
+
if (!entity.type || !entity.value || typeof entity.start !== "number" || typeof entity.end !== "number") {
|
|
13888
|
+
return false;
|
|
13889
|
+
}
|
|
13890
|
+
if (entity.start < 0 || entity.end < 0 || entity.start >= entity.end) {
|
|
13891
|
+
return false;
|
|
13892
|
+
}
|
|
13893
|
+
if (entity.start >= textLength || entity.end > textLength) {
|
|
13894
|
+
return false;
|
|
13895
|
+
}
|
|
13896
|
+
const actualValue = entity.value;
|
|
13897
|
+
if (actualValue.length !== entity.end - entity.start) {
|
|
13898
|
+
return false;
|
|
13899
|
+
}
|
|
13900
|
+
return true;
|
|
13901
|
+
}
|
|
13902
|
+
function detectionsOverlap(det1, det2) {
|
|
13903
|
+
const [start1, end1] = det1.position;
|
|
13904
|
+
const [start2, end2] = det2.position;
|
|
13905
|
+
const overlapStart = Math.max(start1, start2);
|
|
13906
|
+
const overlapEnd = Math.min(end1, end2);
|
|
13907
|
+
if (overlapStart >= overlapEnd) {
|
|
13908
|
+
return false;
|
|
13909
|
+
}
|
|
13910
|
+
const overlapLength = overlapEnd - overlapStart;
|
|
13911
|
+
const length1 = end1 - start1;
|
|
13912
|
+
const length2 = end2 - start2;
|
|
13913
|
+
const minLength = Math.min(length1, length2);
|
|
13914
|
+
return overlapLength > minLength * 0.5;
|
|
13915
|
+
}
|
|
13916
|
+
function convertAIEntityToDetection(entity, text) {
|
|
13917
|
+
if (!validateAIEntity(entity, text.length)) {
|
|
13918
|
+
return null;
|
|
13919
|
+
}
|
|
13920
|
+
const actualValue = text.substring(entity.start, entity.end);
|
|
13921
|
+
let type = entity.type.toUpperCase();
|
|
13922
|
+
if (type.includes("EMAIL") || type === "EMAIL_ADDRESS") {
|
|
13923
|
+
type = "EMAIL";
|
|
13924
|
+
} else if (type.includes("PHONE") || type === "PHONE_NUMBER") {
|
|
13925
|
+
type = "PHONE_US";
|
|
13926
|
+
} else if (type.includes("NAME") || type === "PERSON") {
|
|
13927
|
+
type = "NAME";
|
|
13928
|
+
} else if (type.includes("SSN") || type === "SOCIAL_SECURITY_NUMBER") {
|
|
13929
|
+
type = "SSN";
|
|
13930
|
+
} else if (type.includes("ADDRESS")) {
|
|
13931
|
+
type = "ADDRESS_STREET";
|
|
13932
|
+
}
|
|
13933
|
+
let severity = "medium";
|
|
13934
|
+
if (type === "SSN" || type === "CREDIT_CARD") {
|
|
13935
|
+
severity = "critical";
|
|
13936
|
+
} else if (type === "EMAIL" || type === "PHONE_US" || type === "NAME") {
|
|
13937
|
+
severity = "high";
|
|
13938
|
+
}
|
|
13939
|
+
return {
|
|
13940
|
+
type,
|
|
13941
|
+
value: actualValue,
|
|
13942
|
+
placeholder: `[${type}_${Math.random().toString(36).substring(2, 9)}]`,
|
|
13943
|
+
position: [entity.start, entity.end],
|
|
13944
|
+
severity,
|
|
13945
|
+
confidence: entity.confidence ?? 0.7
|
|
13946
|
+
// Default confidence for AI entities
|
|
13947
|
+
};
|
|
13948
|
+
}
|
|
13949
|
+
function mergeAIEntities(regexDetections, aiEntities, text) {
|
|
13950
|
+
const merged = [...regexDetections];
|
|
13951
|
+
const processedRanges = regexDetections.map((d) => d.position);
|
|
13952
|
+
for (const aiEntity of aiEntities) {
|
|
13953
|
+
const detection = convertAIEntityToDetection(aiEntity, text);
|
|
13954
|
+
if (!detection) {
|
|
13955
|
+
continue;
|
|
13956
|
+
}
|
|
13957
|
+
let hasOverlap = false;
|
|
13958
|
+
for (const regexDet of regexDetections) {
|
|
13959
|
+
if (detectionsOverlap(regexDet, detection)) {
|
|
13960
|
+
hasOverlap = true;
|
|
13961
|
+
break;
|
|
13962
|
+
}
|
|
13963
|
+
}
|
|
13964
|
+
if (!hasOverlap) {
|
|
13965
|
+
merged.push(detection);
|
|
13966
|
+
processedRanges.push(detection.position);
|
|
13967
|
+
}
|
|
13968
|
+
}
|
|
13969
|
+
return merged;
|
|
13970
|
+
}
|
|
13971
|
+
|
|
13063
13972
|
// src/detector.ts
|
|
13064
13973
|
var OpenRedaction = class _OpenRedaction {
|
|
13065
13974
|
constructor(options = {}) {
|
|
@@ -13269,6 +14178,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13269
14178
|
for (const pattern of this.patterns) {
|
|
13270
14179
|
const regex = new RegExp(pattern.regex.source, pattern.regex.flags);
|
|
13271
14180
|
this.compiledPatterns.set(pattern, regex);
|
|
14181
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
14182
|
+
console.log(`[OpenRedaction] Compiled pattern '${pattern.type}': ${regex}`);
|
|
14183
|
+
}
|
|
13272
14184
|
}
|
|
13273
14185
|
if (this.options.debug) {
|
|
13274
14186
|
console.log(`[OpenRedaction] Pre-compiled ${this.compiledPatterns.size} regex patterns`);
|
|
@@ -13288,12 +14200,18 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13288
14200
|
}
|
|
13289
14201
|
continue;
|
|
13290
14202
|
}
|
|
14203
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
14204
|
+
console.log(`[OpenRedaction] Processing pattern '${pattern.type}' with regex: ${regex}`);
|
|
14205
|
+
}
|
|
13291
14206
|
let match;
|
|
13292
14207
|
let matchCount = 0;
|
|
13293
14208
|
const maxMatches = 1e4;
|
|
13294
14209
|
regex.lastIndex = 0;
|
|
13295
14210
|
try {
|
|
13296
14211
|
while ((match = safeExec(regex, text, { timeout: this.options.regexTimeout })) !== null) {
|
|
14212
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
14213
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' regex match found: '${match[0]}' at position ${match.index}`);
|
|
14214
|
+
}
|
|
13297
14215
|
matchCount++;
|
|
13298
14216
|
if (matchCount >= maxMatches) {
|
|
13299
14217
|
if (this.options.debug) {
|
|
@@ -13314,12 +14232,18 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13314
14232
|
endPos = startPos + value.length;
|
|
13315
14233
|
}
|
|
13316
14234
|
if (this.overlapsWithExisting(startPos, endPos, processedRanges)) {
|
|
14235
|
+
if (this.options.debug) {
|
|
14236
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' skipped due to overlap at ${startPos}-${endPos}`);
|
|
14237
|
+
}
|
|
13317
14238
|
continue;
|
|
13318
14239
|
}
|
|
13319
14240
|
const contextStart = Math.max(0, startPos - 50);
|
|
13320
14241
|
const contextEnd = Math.min(text.length, endPos + 50);
|
|
13321
14242
|
const context = text.substring(contextStart, contextEnd);
|
|
13322
14243
|
if (pattern.validator && !pattern.validator(value, context)) {
|
|
14244
|
+
if (this.options.debug) {
|
|
14245
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' validation failed for value: '${value}' with context: '${context.substring(0, 100)}...'`);
|
|
14246
|
+
}
|
|
13323
14247
|
continue;
|
|
13324
14248
|
}
|
|
13325
14249
|
if (this.options.enableFalsePositiveFilter) {
|
|
@@ -13338,6 +14262,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13338
14262
|
endPos
|
|
13339
14263
|
);
|
|
13340
14264
|
confidence = contextAnalysis.confidence;
|
|
14265
|
+
if (this.options.debug && confidence < this.options.confidenceThreshold) {
|
|
14266
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' failed context analysis. Value: '${value}', Confidence: ${confidence} < ${this.options.confidenceThreshold}`);
|
|
14267
|
+
}
|
|
13341
14268
|
}
|
|
13342
14269
|
if (this.contextRulesEngine) {
|
|
13343
14270
|
const piiMatch = {
|
|
@@ -13363,6 +14290,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13363
14290
|
continue;
|
|
13364
14291
|
}
|
|
13365
14292
|
const placeholder = this.generatePlaceholder(value, pattern);
|
|
14293
|
+
if (this.options.debug) {
|
|
14294
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' detected: '${value}' at position ${startPos}-${endPos}, confidence: ${confidence}`);
|
|
14295
|
+
}
|
|
13366
14296
|
detections.push({
|
|
13367
14297
|
type: pattern.type,
|
|
13368
14298
|
value,
|
|
@@ -13423,8 +14353,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13423
14353
|
}
|
|
13424
14354
|
/**
|
|
13425
14355
|
* Detect PII in text
|
|
14356
|
+
* Now async to support optional AI assist
|
|
13426
14357
|
*/
|
|
13427
|
-
detect(text) {
|
|
14358
|
+
async detect(text) {
|
|
13428
14359
|
if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) {
|
|
13429
14360
|
throw new Error("[OpenRedaction] Permission denied: detection:detect required");
|
|
13430
14361
|
}
|
|
@@ -13478,12 +14409,42 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13478
14409
|
} else {
|
|
13479
14410
|
detections = this.processPatterns(text, this.patterns, processedRanges);
|
|
13480
14411
|
}
|
|
14412
|
+
if (this.options.ai?.enabled) {
|
|
14413
|
+
const aiEndpoint = getAIEndpoint(this.options.ai);
|
|
14414
|
+
if (aiEndpoint) {
|
|
14415
|
+
try {
|
|
14416
|
+
if (this.options.debug) {
|
|
14417
|
+
console.log("[OpenRedaction] AI assist enabled, calling AI endpoint...");
|
|
14418
|
+
}
|
|
14419
|
+
const aiEntities = await callAIDetect(text, aiEndpoint, this.options.debug);
|
|
14420
|
+
if (aiEntities && aiEntities.length > 0) {
|
|
14421
|
+
if (this.options.debug) {
|
|
14422
|
+
console.log(`[OpenRedaction] AI returned ${aiEntities.length} additional entities`);
|
|
14423
|
+
}
|
|
14424
|
+
detections = mergeAIEntities(detections, aiEntities, text);
|
|
14425
|
+
if (this.options.debug) {
|
|
14426
|
+
console.log(`[OpenRedaction] After AI merge: ${detections.length} total detections`);
|
|
14427
|
+
}
|
|
14428
|
+
} else if (this.options.debug) {
|
|
14429
|
+
console.log("[OpenRedaction] AI endpoint returned no additional entities");
|
|
14430
|
+
}
|
|
14431
|
+
} catch (error) {
|
|
14432
|
+
if (this.options.debug) {
|
|
14433
|
+
console.warn(`[OpenRedaction] AI assist failed, using regex-only: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
14434
|
+
}
|
|
14435
|
+
}
|
|
14436
|
+
} else if (this.options.debug) {
|
|
14437
|
+
console.warn("[OpenRedaction] AI assist enabled but no endpoint configured. Set ai.endpoint or OPENREDACTION_AI_ENDPOINT env var.");
|
|
14438
|
+
}
|
|
14439
|
+
}
|
|
13481
14440
|
detections.sort((a, b) => b.position[0] - a.position[0]);
|
|
13482
14441
|
let redacted = text;
|
|
13483
14442
|
const redactionMap = {};
|
|
13484
14443
|
for (const detection of detections) {
|
|
13485
|
-
|
|
13486
|
-
|
|
14444
|
+
if (!detection.value) continue;
|
|
14445
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
14446
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
14447
|
+
redacted = redacted.replace(pattern, detection.placeholder);
|
|
13487
14448
|
redactionMap[detection.placeholder] = detection.value;
|
|
13488
14449
|
}
|
|
13489
14450
|
const endTime = performance.now();
|
|
@@ -13636,8 +14597,8 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13636
14597
|
/**
|
|
13637
14598
|
* Get severity-based scan results
|
|
13638
14599
|
*/
|
|
13639
|
-
scan(text) {
|
|
13640
|
-
const result = this.detect(text);
|
|
14600
|
+
async scan(text) {
|
|
14601
|
+
const result = await this.detect(text);
|
|
13641
14602
|
return {
|
|
13642
14603
|
high: result.detections.filter((d) => d.severity === "high"),
|
|
13643
14604
|
medium: result.detections.filter((d) => d.severity === "medium"),
|
|
@@ -13845,7 +14806,7 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13845
14806
|
* Run health check
|
|
13846
14807
|
*/
|
|
13847
14808
|
async healthCheck(options) {
|
|
13848
|
-
const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-
|
|
14809
|
+
const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-77BBYEEO.mjs");
|
|
13849
14810
|
const checker = new HealthChecker2(this);
|
|
13850
14811
|
return checker.check(options);
|
|
13851
14812
|
}
|
|
@@ -13853,7 +14814,7 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13853
14814
|
* Quick health check (minimal overhead)
|
|
13854
14815
|
*/
|
|
13855
14816
|
async quickHealthCheck() {
|
|
13856
|
-
const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-
|
|
14817
|
+
const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-77BBYEEO.mjs");
|
|
13857
14818
|
const checker = new HealthChecker2(this);
|
|
13858
14819
|
return checker.quickCheck();
|
|
13859
14820
|
}
|
|
@@ -13867,14 +14828,14 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
13867
14828
|
if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) {
|
|
13868
14829
|
throw new Error("[OpenRedaction] Permission denied: detection:detect required");
|
|
13869
14830
|
}
|
|
13870
|
-
const { createDocumentProcessor: createDocumentProcessor2 } = await import("./document-
|
|
14831
|
+
const { createDocumentProcessor: createDocumentProcessor2 } = await import("./document-NNFKTUEV.mjs");
|
|
13871
14832
|
const processor = createDocumentProcessor2();
|
|
13872
14833
|
const extractionStart = performance.now();
|
|
13873
14834
|
const text = await processor.extractText(buffer, options);
|
|
13874
14835
|
const metadata = await processor.getMetadata(buffer, options);
|
|
13875
14836
|
const extractionEnd = performance.now();
|
|
13876
14837
|
const extractionTime = Math.round((extractionEnd - extractionStart) * 100) / 100;
|
|
13877
|
-
const detection = this.detect(text);
|
|
14838
|
+
const detection = await this.detect(text);
|
|
13878
14839
|
return {
|
|
13879
14840
|
text,
|
|
13880
14841
|
metadata,
|
|
@@ -13968,7 +14929,7 @@ var StreamingDetector = class {
|
|
|
13968
14929
|
const end = Math.min(textLength, position + chunkSize);
|
|
13969
14930
|
const chunk = text.substring(start, end);
|
|
13970
14931
|
const byteOffset = start;
|
|
13971
|
-
const result = this.detector.detect(chunk);
|
|
14932
|
+
const result = await this.detector.detect(chunk);
|
|
13972
14933
|
const newDetections = result.detections.filter((detection) => {
|
|
13973
14934
|
const absoluteStart = byteOffset + detection.position[0];
|
|
13974
14935
|
const absoluteEnd = byteOffset + detection.position[1];
|
|
@@ -13998,8 +14959,10 @@ var StreamingDetector = class {
|
|
|
13998
14959
|
(a, b) => b.position[0] - a.position[0]
|
|
13999
14960
|
);
|
|
14000
14961
|
for (const detection of sortedDetections) {
|
|
14001
|
-
|
|
14002
|
-
|
|
14962
|
+
if (!detection.value) continue;
|
|
14963
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
14964
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
14965
|
+
redactedChunk = redactedChunk.replace(pattern, detection.placeholder);
|
|
14003
14966
|
}
|
|
14004
14967
|
}
|
|
14005
14968
|
yield {
|
|
@@ -14025,8 +14988,10 @@ var StreamingDetector = class {
|
|
|
14025
14988
|
allDetections.sort((a, b) => b.position[0] - a.position[0]);
|
|
14026
14989
|
const redactionMap = {};
|
|
14027
14990
|
for (const detection of allDetections) {
|
|
14028
|
-
|
|
14029
|
-
|
|
14991
|
+
if (!detection.value) continue;
|
|
14992
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
14993
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
14994
|
+
redactedText = redactedText.replace(pattern, detection.placeholder);
|
|
14030
14995
|
redactionMap[detection.placeholder] = detection.value;
|
|
14031
14996
|
}
|
|
14032
14997
|
return {
|
|
@@ -14101,6 +15066,9 @@ var StreamingDetector = class {
|
|
|
14101
15066
|
estimatedMemory
|
|
14102
15067
|
};
|
|
14103
15068
|
}
|
|
15069
|
+
escapeRegex(str) {
|
|
15070
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
15071
|
+
}
|
|
14104
15072
|
};
|
|
14105
15073
|
function createStreamingDetector(detector, options) {
|
|
14106
15074
|
return new StreamingDetector(detector, options);
|
|
@@ -14114,11 +15082,11 @@ var BatchProcessor = class {
|
|
|
14114
15082
|
/**
|
|
14115
15083
|
* Process multiple documents sequentially
|
|
14116
15084
|
*/
|
|
14117
|
-
processSequential(documents, options = {}) {
|
|
15085
|
+
async processSequential(documents, options = {}) {
|
|
14118
15086
|
const startTime = performance.now();
|
|
14119
15087
|
const results = [];
|
|
14120
15088
|
for (let i = 0; i < documents.length; i++) {
|
|
14121
|
-
const result = this.detector.detect(documents[i]);
|
|
15089
|
+
const result = await this.detector.detect(documents[i]);
|
|
14122
15090
|
results.push(result);
|
|
14123
15091
|
if (options.onProgress) {
|
|
14124
15092
|
options.onProgress(i + 1, documents.length);
|
|
@@ -14132,7 +15100,7 @@ var BatchProcessor = class {
|
|
|
14132
15100
|
totalDocuments: documents.length,
|
|
14133
15101
|
totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
|
|
14134
15102
|
totalTime,
|
|
14135
|
-
avgTimePerDocument: totalTime / documents.length
|
|
15103
|
+
avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
|
|
14136
15104
|
}
|
|
14137
15105
|
};
|
|
14138
15106
|
}
|
|
@@ -14146,16 +15114,14 @@ var BatchProcessor = class {
|
|
|
14146
15114
|
let completed = 0;
|
|
14147
15115
|
for (let i = 0; i < documents.length; i += maxConcurrency) {
|
|
14148
15116
|
const batch = documents.slice(i, i + maxConcurrency);
|
|
14149
|
-
const batchPromises = batch.map((doc, batchIndex) => {
|
|
14150
|
-
|
|
14151
|
-
|
|
14152
|
-
|
|
14153
|
-
|
|
14154
|
-
|
|
14155
|
-
|
|
14156
|
-
|
|
14157
|
-
return result;
|
|
14158
|
-
});
|
|
15117
|
+
const batchPromises = batch.map(async (doc, batchIndex) => {
|
|
15118
|
+
const result = await this.detector.detect(doc);
|
|
15119
|
+
results[i + batchIndex] = result;
|
|
15120
|
+
completed++;
|
|
15121
|
+
if (options.onProgress) {
|
|
15122
|
+
options.onProgress(completed, documents.length);
|
|
15123
|
+
}
|
|
15124
|
+
return result;
|
|
14159
15125
|
});
|
|
14160
15126
|
await Promise.all(batchPromises);
|
|
14161
15127
|
}
|
|
@@ -14167,7 +15133,7 @@ var BatchProcessor = class {
|
|
|
14167
15133
|
totalDocuments: documents.length,
|
|
14168
15134
|
totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
|
|
14169
15135
|
totalTime,
|
|
14170
|
-
avgTimePerDocument: totalTime / documents.length
|
|
15136
|
+
avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
|
|
14171
15137
|
}
|
|
14172
15138
|
};
|
|
14173
15139
|
}
|
|
@@ -14178,7 +15144,7 @@ var BatchProcessor = class {
|
|
|
14178
15144
|
if (options.parallel) {
|
|
14179
15145
|
return this.processParallel(documents, options);
|
|
14180
15146
|
} else {
|
|
14181
|
-
return
|
|
15147
|
+
return this.processSequential(documents, options);
|
|
14182
15148
|
}
|
|
14183
15149
|
}
|
|
14184
15150
|
/**
|
|
@@ -14189,7 +15155,7 @@ var BatchProcessor = class {
|
|
|
14189
15155
|
for (let i = 0; i < documents.length; i += batchSize) {
|
|
14190
15156
|
const batch = documents.slice(i, i + batchSize);
|
|
14191
15157
|
for (const doc of batch) {
|
|
14192
|
-
const result = this.detector.detect(doc);
|
|
15158
|
+
const result = await this.detector.detect(doc);
|
|
14193
15159
|
yield result;
|
|
14194
15160
|
}
|
|
14195
15161
|
}
|
|
@@ -14237,7 +15203,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
14237
15203
|
...detectorOptions
|
|
14238
15204
|
} = options;
|
|
14239
15205
|
const detector = new OpenRedaction(detectorOptions);
|
|
14240
|
-
return (req, res, next) => {
|
|
15206
|
+
return async (req, res, next) => {
|
|
14241
15207
|
if (skipRoutes.some((pattern) => pattern.test(req.path))) {
|
|
14242
15208
|
return next();
|
|
14243
15209
|
}
|
|
@@ -14257,7 +15223,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
14257
15223
|
const results = {};
|
|
14258
15224
|
const redactedBody = { ...req.body };
|
|
14259
15225
|
for (const { field, value } of textsToCheck) {
|
|
14260
|
-
const result = detector.detect(value);
|
|
15226
|
+
const result = await detector.detect(value);
|
|
14261
15227
|
if (result.detections.length > 0) {
|
|
14262
15228
|
totalDetections += result.detections.length;
|
|
14263
15229
|
results[field] = result;
|
|
@@ -14307,7 +15273,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
14307
15273
|
}
|
|
14308
15274
|
function detectPII(options = {}) {
|
|
14309
15275
|
const detector = new OpenRedaction(options);
|
|
14310
|
-
return (req, res) => {
|
|
15276
|
+
return async (req, res) => {
|
|
14311
15277
|
const text = req.body?.text || req.query.text;
|
|
14312
15278
|
if (!text) {
|
|
14313
15279
|
res.status(400).json({
|
|
@@ -14316,19 +15282,26 @@ function detectPII(options = {}) {
|
|
|
14316
15282
|
});
|
|
14317
15283
|
return;
|
|
14318
15284
|
}
|
|
14319
|
-
|
|
14320
|
-
|
|
14321
|
-
|
|
14322
|
-
|
|
14323
|
-
|
|
14324
|
-
|
|
14325
|
-
|
|
14326
|
-
|
|
15285
|
+
try {
|
|
15286
|
+
const result = await detector.detect(text);
|
|
15287
|
+
res.json({
|
|
15288
|
+
detected: result.detections.length > 0,
|
|
15289
|
+
count: result.detections.length,
|
|
15290
|
+
detections: result.detections,
|
|
15291
|
+
redacted: result.redacted,
|
|
15292
|
+
stats: result.stats
|
|
15293
|
+
});
|
|
15294
|
+
} catch (error) {
|
|
15295
|
+
res.status(500).json({
|
|
15296
|
+
error: "Detection failed",
|
|
15297
|
+
message: error instanceof Error ? error.message : "Unknown error"
|
|
15298
|
+
});
|
|
15299
|
+
}
|
|
14327
15300
|
};
|
|
14328
15301
|
}
|
|
14329
15302
|
function generateReport(options = {}) {
|
|
14330
15303
|
const detector = new OpenRedaction(options);
|
|
14331
|
-
return (req, res) => {
|
|
15304
|
+
return async (req, res) => {
|
|
14332
15305
|
const text = req.body?.text;
|
|
14333
15306
|
const format = req.body?.format || req.query.format || "json";
|
|
14334
15307
|
if (!text) {
|
|
@@ -14337,28 +15310,35 @@ function generateReport(options = {}) {
|
|
|
14337
15310
|
});
|
|
14338
15311
|
return;
|
|
14339
15312
|
}
|
|
14340
|
-
|
|
14341
|
-
|
|
14342
|
-
|
|
14343
|
-
|
|
14344
|
-
|
|
14345
|
-
|
|
14346
|
-
|
|
14347
|
-
|
|
14348
|
-
|
|
14349
|
-
|
|
14350
|
-
|
|
14351
|
-
|
|
14352
|
-
|
|
14353
|
-
|
|
14354
|
-
|
|
14355
|
-
|
|
14356
|
-
|
|
14357
|
-
|
|
14358
|
-
|
|
14359
|
-
|
|
14360
|
-
|
|
14361
|
-
|
|
15313
|
+
try {
|
|
15314
|
+
const result = await detector.detect(text);
|
|
15315
|
+
if (format === "html") {
|
|
15316
|
+
const html = detector.generateReport(result, {
|
|
15317
|
+
format: "html",
|
|
15318
|
+
title: req.body?.title || "PII Detection Report"
|
|
15319
|
+
});
|
|
15320
|
+
res.setHeader("Content-Type", "text/html");
|
|
15321
|
+
res.send(html);
|
|
15322
|
+
} else if (format === "markdown") {
|
|
15323
|
+
const md = detector.generateReport(result, {
|
|
15324
|
+
format: "markdown",
|
|
15325
|
+
title: req.body?.title || "PII Detection Report"
|
|
15326
|
+
});
|
|
15327
|
+
res.setHeader("Content-Type", "text/markdown");
|
|
15328
|
+
res.send(md);
|
|
15329
|
+
} else {
|
|
15330
|
+
res.json({
|
|
15331
|
+
detected: result.detections.length > 0,
|
|
15332
|
+
count: result.detections.length,
|
|
15333
|
+
detections: result.detections,
|
|
15334
|
+
redacted: result.redacted,
|
|
15335
|
+
stats: result.stats
|
|
15336
|
+
});
|
|
15337
|
+
}
|
|
15338
|
+
} catch (error) {
|
|
15339
|
+
res.status(500).json({
|
|
15340
|
+
error: "Report generation failed",
|
|
15341
|
+
message: error instanceof Error ? error.message : "Unknown error"
|
|
14362
15342
|
});
|
|
14363
15343
|
}
|
|
14364
15344
|
};
|
|
@@ -14370,12 +15350,17 @@ function useOpenRedaction(options) {
|
|
|
14370
15350
|
const detector = useMemo(() => new OpenRedaction(options), [options]);
|
|
14371
15351
|
const [result, setResult] = useState(null);
|
|
14372
15352
|
const [isDetecting, setIsDetecting] = useState(false);
|
|
14373
|
-
const detect = useCallback((text) => {
|
|
15353
|
+
const detect = useCallback(async (text) => {
|
|
14374
15354
|
setIsDetecting(true);
|
|
14375
|
-
|
|
14376
|
-
|
|
14377
|
-
|
|
14378
|
-
|
|
15355
|
+
try {
|
|
15356
|
+
const detection = await detector.detect(text);
|
|
15357
|
+
setResult(detection);
|
|
15358
|
+
setIsDetecting(false);
|
|
15359
|
+
return detection;
|
|
15360
|
+
} catch (error) {
|
|
15361
|
+
setIsDetecting(false);
|
|
15362
|
+
throw error;
|
|
15363
|
+
}
|
|
14379
15364
|
}, [detector]);
|
|
14380
15365
|
const clear = useCallback(() => {
|
|
14381
15366
|
setResult(null);
|
|
@@ -14401,10 +15386,14 @@ function usePIIDetector(text, options) {
|
|
|
14401
15386
|
return;
|
|
14402
15387
|
}
|
|
14403
15388
|
setIsDetecting(true);
|
|
14404
|
-
const timer = setTimeout(() => {
|
|
14405
|
-
|
|
14406
|
-
|
|
14407
|
-
|
|
15389
|
+
const timer = setTimeout(async () => {
|
|
15390
|
+
try {
|
|
15391
|
+
const detection = await detector.detect(text);
|
|
15392
|
+
setResult(detection);
|
|
15393
|
+
setIsDetecting(false);
|
|
15394
|
+
} catch (error) {
|
|
15395
|
+
setIsDetecting(false);
|
|
15396
|
+
}
|
|
14408
15397
|
}, debounce);
|
|
14409
15398
|
return () => {
|
|
14410
15399
|
clearTimeout(timer);
|
|
@@ -14425,27 +15414,32 @@ function useFormFieldValidator(options) {
|
|
|
14425
15414
|
const [value, setValue] = useState("");
|
|
14426
15415
|
const [error, setError] = useState(null);
|
|
14427
15416
|
const [result, setResult] = useState(null);
|
|
14428
|
-
const validate = useCallback((inputValue) => {
|
|
15417
|
+
const validate = useCallback(async (inputValue) => {
|
|
14429
15418
|
setValue(inputValue);
|
|
14430
15419
|
if (!inputValue) {
|
|
14431
15420
|
setError(null);
|
|
14432
15421
|
setResult(null);
|
|
14433
15422
|
return true;
|
|
14434
15423
|
}
|
|
14435
|
-
|
|
14436
|
-
|
|
14437
|
-
|
|
14438
|
-
|
|
14439
|
-
if (
|
|
14440
|
-
|
|
14441
|
-
|
|
14442
|
-
|
|
14443
|
-
onPIIDetected
|
|
15424
|
+
try {
|
|
15425
|
+
const detection = await detector.detect(inputValue);
|
|
15426
|
+
setResult(detection);
|
|
15427
|
+
const relevantDetections = types.length > 0 ? detection.detections.filter((d) => types.includes(d.type)) : detection.detections;
|
|
15428
|
+
if (relevantDetections.length > 0) {
|
|
15429
|
+
if (failOnPII) {
|
|
15430
|
+
setError(`Sensitive information detected: ${relevantDetections[0].type}`);
|
|
15431
|
+
}
|
|
15432
|
+
if (onPIIDetected) {
|
|
15433
|
+
onPIIDetected(detection);
|
|
15434
|
+
}
|
|
15435
|
+
return false;
|
|
14444
15436
|
}
|
|
15437
|
+
setError(null);
|
|
15438
|
+
return true;
|
|
15439
|
+
} catch (error2) {
|
|
15440
|
+
setError("Validation failed");
|
|
14445
15441
|
return false;
|
|
14446
15442
|
}
|
|
14447
|
-
setError(null);
|
|
14448
|
-
return true;
|
|
14449
15443
|
}, [detector, failOnPII, types, onPIIDetected]);
|
|
14450
15444
|
const getFieldProps = useCallback(() => ({
|
|
14451
15445
|
value,
|
|
@@ -14472,7 +15466,7 @@ function useBatchDetector(options) {
|
|
|
14472
15466
|
setProgress(0);
|
|
14473
15467
|
const detections = [];
|
|
14474
15468
|
for (let i = 0; i < texts.length; i++) {
|
|
14475
|
-
const result = detector.detect(texts[i]);
|
|
15469
|
+
const result = await detector.detect(texts[i]);
|
|
14476
15470
|
detections.push(result);
|
|
14477
15471
|
setProgress((i + 1) / texts.length * 100);
|
|
14478
15472
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
@@ -14509,9 +15503,12 @@ function useAutoRedact(options) {
|
|
|
14509
15503
|
setResult(null);
|
|
14510
15504
|
return;
|
|
14511
15505
|
}
|
|
14512
|
-
const timer = setTimeout(() => {
|
|
14513
|
-
|
|
14514
|
-
|
|
15506
|
+
const timer = setTimeout(async () => {
|
|
15507
|
+
try {
|
|
15508
|
+
const detection = await detector.detect(text);
|
|
15509
|
+
setResult(detection);
|
|
15510
|
+
} catch (error) {
|
|
15511
|
+
}
|
|
14515
15512
|
}, debounce);
|
|
14516
15513
|
return () => clearTimeout(timer);
|
|
14517
15514
|
}, [text, detector, debounce]);
|
|
@@ -14640,7 +15637,7 @@ var TenantManager = class {
|
|
|
14640
15637
|
await this.checkQuotas(tenantId, text);
|
|
14641
15638
|
this.trackRequest(tenantId, text);
|
|
14642
15639
|
const detector = this.getDetector(tenantId);
|
|
14643
|
-
const result = detector.detect(text);
|
|
15640
|
+
const result = await detector.detect(text);
|
|
14644
15641
|
const usage = this.usage.get(tenantId);
|
|
14645
15642
|
usage.piiDetectedThisMonth += result.detections.length;
|
|
14646
15643
|
usage.lastRequestAt = /* @__PURE__ */ new Date();
|
|
@@ -14927,6 +15924,7 @@ var DEFAULT_TIER_QUOTAS = {
|
|
|
14927
15924
|
// src/webhooks/WebhookManager.ts
|
|
14928
15925
|
var WebhookManager = class {
|
|
14929
15926
|
// 1 minute
|
|
15927
|
+
// private readonly HALF_OPEN_MAX_REQUESTS = 1; // Reserved for future use
|
|
14930
15928
|
constructor(options) {
|
|
14931
15929
|
this.webhooks = /* @__PURE__ */ new Map();
|
|
14932
15930
|
this.deliveryHistory = [];
|
|
@@ -15198,9 +16196,9 @@ var WebhookManager = class {
|
|
|
15198
16196
|
*/
|
|
15199
16197
|
async makeHttpRequest(webhook, event) {
|
|
15200
16198
|
try {
|
|
15201
|
-
let
|
|
16199
|
+
let fetch2;
|
|
15202
16200
|
try {
|
|
15203
|
-
|
|
16201
|
+
fetch2 = globalThis.fetch;
|
|
15204
16202
|
} catch {
|
|
15205
16203
|
throw new Error("[WebhookManager] HTTP client not available. Requires Node 18+ with fetch support.");
|
|
15206
16204
|
}
|
|
@@ -15220,7 +16218,7 @@ var WebhookManager = class {
|
|
|
15220
16218
|
const controller = new AbortController();
|
|
15221
16219
|
const timeoutId = setTimeout(() => controller.abort(), webhook.timeout);
|
|
15222
16220
|
try {
|
|
15223
|
-
const response = await
|
|
16221
|
+
const response = await fetch2(webhook.url, {
|
|
15224
16222
|
method: "POST",
|
|
15225
16223
|
headers,
|
|
15226
16224
|
body: JSON.stringify(event),
|
|
@@ -15591,7 +16589,7 @@ var APIServer = class {
|
|
|
15591
16589
|
if (req.tenantId && this.config.tenantManager) {
|
|
15592
16590
|
result = await this.config.tenantManager.detect(req.tenantId, text);
|
|
15593
16591
|
} else if (this.detector) {
|
|
15594
|
-
result = this.detector.detect(text);
|
|
16592
|
+
result = await this.detector.detect(text);
|
|
15595
16593
|
} else {
|
|
15596
16594
|
throw new Error("No detector available");
|
|
15597
16595
|
}
|
|
@@ -15632,7 +16630,7 @@ var APIServer = class {
|
|
|
15632
16630
|
if (req.tenantId && this.config.tenantManager) {
|
|
15633
16631
|
result = await this.config.tenantManager.detect(req.tenantId, text);
|
|
15634
16632
|
} else if (this.detector) {
|
|
15635
|
-
result = this.detector.detect(text);
|
|
16633
|
+
result = await this.detector.detect(text);
|
|
15636
16634
|
} else {
|
|
15637
16635
|
throw new Error("No detector available");
|
|
15638
16636
|
}
|
|
@@ -16053,10 +17051,12 @@ export {
|
|
|
16053
17051
|
analyzeFullContext,
|
|
16054
17052
|
calculateContextConfidence,
|
|
16055
17053
|
calculateRisk,
|
|
17054
|
+
callAIDetect,
|
|
16056
17055
|
ccpaPreset,
|
|
16057
17056
|
commonFalsePositives,
|
|
16058
17057
|
compileSafeRegex,
|
|
16059
17058
|
contactPatterns,
|
|
17059
|
+
convertAIEntityToDetection,
|
|
16060
17060
|
createAPIServer,
|
|
16061
17061
|
createBatchProcessor,
|
|
16062
17062
|
createCacheDisabledError,
|
|
@@ -16091,12 +17091,16 @@ export {
|
|
|
16091
17091
|
createXlsxProcessor,
|
|
16092
17092
|
defaultPasses,
|
|
16093
17093
|
detectPII,
|
|
17094
|
+
detectionsOverlap,
|
|
17095
|
+
educationPreset,
|
|
16094
17096
|
exportForVersionControl,
|
|
16095
17097
|
extractContext,
|
|
16096
17098
|
filterFalsePositives,
|
|
17099
|
+
financePreset,
|
|
16097
17100
|
financialPatterns,
|
|
16098
17101
|
gdprPreset,
|
|
16099
17102
|
generateReport,
|
|
17103
|
+
getAIEndpoint,
|
|
16100
17104
|
getPatternsByCategory,
|
|
16101
17105
|
getPredefinedRole,
|
|
16102
17106
|
getPreset,
|
|
@@ -16104,21 +17108,26 @@ export {
|
|
|
16104
17108
|
governmentPatterns,
|
|
16105
17109
|
groupPatternsByPass,
|
|
16106
17110
|
healthCheckMiddleware,
|
|
17111
|
+
healthcarePreset,
|
|
17112
|
+
healthcareResearchPreset,
|
|
16107
17113
|
hipaaPreset,
|
|
16108
17114
|
inferDocumentType,
|
|
16109
17115
|
isFalsePositive,
|
|
16110
17116
|
isUnsafePattern,
|
|
17117
|
+
mergeAIEntities,
|
|
16111
17118
|
mergePassDetections,
|
|
16112
17119
|
networkPatterns,
|
|
16113
17120
|
openredactionMiddleware,
|
|
16114
17121
|
personalPatterns,
|
|
16115
17122
|
safeExec,
|
|
16116
17123
|
safeExecAll,
|
|
17124
|
+
transportLogisticsPreset,
|
|
16117
17125
|
useAutoRedact,
|
|
16118
17126
|
useBatchDetector,
|
|
16119
17127
|
useFormFieldValidator,
|
|
16120
17128
|
useOpenRedaction,
|
|
16121
17129
|
usePIIDetector,
|
|
17130
|
+
validateAIEntity,
|
|
16122
17131
|
validateEmail,
|
|
16123
17132
|
validateIBAN,
|
|
16124
17133
|
validateLuhn,
|