openredaction 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -2,7 +2,7 @@ import {
2
2
  HealthChecker,
3
3
  createHealthChecker,
4
4
  healthCheckMiddleware
5
- } from "./chunk-ZRHGDEPC.mjs";
5
+ } from "./chunk-CXYSQPG6.mjs";
6
6
  import {
7
7
  CsvProcessor,
8
8
  DocumentProcessor,
@@ -14,7 +14,7 @@ import {
14
14
  createJsonProcessor,
15
15
  createOCRProcessor,
16
16
  createXlsxProcessor
17
- } from "./chunk-7OGNW2MU.mjs";
17
+ } from "./chunk-XG7MSXCJ.mjs";
18
18
  import {
19
19
  WorkerPool,
20
20
  createWorkerPool
@@ -417,7 +417,7 @@ var PersistentAuditLogger = class {
417
417
  enableHashing: options.enableHashing ?? true,
418
418
  hashAlgorithm: options.hashAlgorithm ?? "sha256",
419
419
  enableWAL: options.enableWAL ?? true,
420
- secretKey: options.secretKey
420
+ secretKey: options.secretKey ?? void 0
421
421
  };
422
422
  this.adapter = this.createAdapter(options.database);
423
423
  }
@@ -763,7 +763,8 @@ var PersistentAuditLogger = class {
763
763
  * Start automatic cleanup schedule
764
764
  */
765
765
  startCleanupSchedule() {
766
- const intervalMs = (this.options.retention?.cleanupIntervalHours ?? 24) * 60 * 60 * 1e3;
766
+ const cleanupIntervalHours = this.options.retention?.cleanupIntervalHours ?? 24;
767
+ const intervalMs = cleanupIntervalHours * 60 * 60 * 1e3;
767
768
  this.cleanupTimer = setInterval(() => {
768
769
  this.runCleanup().catch((err) => {
769
770
  console.error("[PersistentAuditLogger] Cleanup failed:", err);
@@ -1769,7 +1770,7 @@ function validateLuhn(cardNumber, _context) {
1769
1770
  return sum % 10 === 0;
1770
1771
  }
1771
1772
  function validateIBAN(iban, _context) {
1772
- const cleaned = iban.replace(/\s/g, "").toUpperCase();
1773
+ const cleaned = iban.replace(/[\s\u00A0.-]/g, "").toUpperCase();
1773
1774
  if (!/^[A-Z]{2}[0-9]{2}[A-Z0-9]{1,30}$/.test(cleaned)) {
1774
1775
  return false;
1775
1776
  }
@@ -1864,7 +1865,7 @@ function mod97(string) {
1864
1865
  return remainder;
1865
1866
  }
1866
1867
  function validateNINO(nino, _context) {
1867
- const cleaned = nino.replace(/\s/g, "").toUpperCase();
1868
+ const cleaned = nino.replace(/[\s\u00A0.-]/g, "").toUpperCase();
1868
1869
  if (!/^[A-CEGHJ-PR-TW-Z]{2}[0-9]{6}[A-D]$/.test(cleaned)) {
1869
1870
  return false;
1870
1871
  }
@@ -1873,7 +1874,7 @@ function validateNINO(nino, _context) {
1873
1874
  return !invalidPrefixes.includes(prefix);
1874
1875
  }
1875
1876
  function validateNHS(nhs, _context) {
1876
- const cleaned = nhs.replace(/[\s-]/g, "");
1877
+ const cleaned = nhs.replace(/[\s\u00A0.-]/g, "");
1877
1878
  if (!/^\d{10}$/.test(cleaned)) {
1878
1879
  return false;
1879
1880
  }
@@ -1886,11 +1887,11 @@ function validateNHS(nhs, _context) {
1886
1887
  return expectedCheckDigit === parseInt(cleaned[9], 10) && checkDigit !== 10;
1887
1888
  }
1888
1889
  function validateUKPassport(passport, _context) {
1889
- const cleaned = passport.replace(/\s/g, "").toUpperCase();
1890
+ const cleaned = passport.replace(/[\s\u00A0.-]/g, "").toUpperCase();
1890
1891
  return /^\d{9}$/.test(cleaned) || /^\d{3}\d{6}$/.test(cleaned);
1891
1892
  }
1892
1893
  function validateSSN(ssn, _context) {
1893
- const cleaned = ssn.replace(/[\s-]/g, "");
1894
+ const cleaned = ssn.replace(/[\s\u00A0.-]/g, "");
1894
1895
  if (!/^\d{9}$/.test(cleaned)) {
1895
1896
  return false;
1896
1897
  }
@@ -1920,6 +1921,15 @@ function validateSortCode(sortCode, _context) {
1920
1921
  const cleaned = sortCode.replace(/[\s-]/g, "");
1921
1922
  return /^\d{6}$/.test(cleaned);
1922
1923
  }
1924
+ function validateRoutingNumber(routingNumber, _context) {
1925
+ const cleaned = routingNumber.replace(/[\s\u00A0.-]/g, "");
1926
+ if (!/^\d{9}$/.test(cleaned)) {
1927
+ return false;
1928
+ }
1929
+ const digits = cleaned.split("").map(Number);
1930
+ const checksum = (3 * (digits[0] + digits[3] + digits[6]) + 7 * (digits[1] + digits[4] + digits[7]) + (digits[2] + digits[5] + digits[8])) % 10;
1931
+ return checksum === 0;
1932
+ }
1923
1933
  function validateName(name, context) {
1924
1934
  const businessTerms = [
1925
1935
  "account",
@@ -1954,10 +1964,29 @@ function validateName(name, context) {
1954
1964
  "sir",
1955
1965
  "madam",
1956
1966
  "lord",
1957
- "lady"
1967
+ "lady",
1968
+ "personal",
1969
+ "sensitive",
1970
+ "information",
1971
+ "data",
1972
+ "details",
1973
+ "content",
1974
+ "document",
1975
+ "text",
1976
+ "example",
1977
+ "simple",
1978
+ "regular",
1979
+ "plain",
1980
+ "send",
1981
+ "reply",
1982
+ "reach",
1983
+ "write",
1984
+ "use",
1985
+ "contact",
1986
+ "message"
1958
1987
  ];
1959
1988
  const nameLower = name.toLowerCase();
1960
- if (businessTerms.some((term) => nameLower.includes(term))) {
1989
+ if (businessTerms.some((term) => nameLower === term || nameLower.includes(term))) {
1961
1990
  return false;
1962
1991
  }
1963
1992
  if (name === name.toUpperCase() && name.length <= 5) {
@@ -1967,7 +1996,7 @@ function validateName(name, context) {
1967
1996
  return false;
1968
1997
  }
1969
1998
  const contextLower = context.toLowerCase();
1970
- if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization")) {
1999
+ if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization") || contextLower.includes("without any") || contextLower.includes("simple text") || contextLower.includes("plain text")) {
1971
2000
  return false;
1972
2001
  }
1973
2002
  return true;
@@ -1993,18 +2022,50 @@ var personalPatterns = [
1993
2022
  type: "EMAIL",
1994
2023
  regex: /\b[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\b/g,
1995
2024
  priority: 100,
1996
- validator: validateEmail,
2025
+ validator: (value, context) => {
2026
+ if (!validateEmail(value)) {
2027
+ return false;
2028
+ }
2029
+ const rejectKeywords = /your\.email|placeholder|fake/i;
2030
+ const isLegitimateTest = /test|sample|demo|spec|api|reference|guide|template|documentation/i.test(context);
2031
+ if (rejectKeywords.test(context) && !isLegitimateTest) {
2032
+ return false;
2033
+ }
2034
+ const testDomains = /@test\.com|@example\.com|@sample\.com|@demo\.com|@fake\.com|@placeholder\.com/i;
2035
+ if (testDomains.test(value)) {
2036
+ const legitimateTestContext = /test|spec|api|reference|guide|template|documentation|john\+|!!!|\+tag|john@/i.test(context + value);
2037
+ if (!legitimateTestContext) {
2038
+ return false;
2039
+ }
2040
+ }
2041
+ return true;
2042
+ },
1997
2043
  placeholder: "[EMAIL_{n}]",
1998
2044
  description: "Email address",
1999
2045
  severity: "high"
2000
2046
  },
2001
2047
  {
2002
2048
  type: "NAME",
2003
- regex: /\b(?:(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Sir|Madam|Lady|Lord|Rev|Father|Sister|Brother)\.?\s+)?([A-Z][a-z]+(?:-[A-Z][a-z]+)? (?:[A-Z][a-z]+(?:-[A-Z][a-z]+)? )?[A-Z][a-z]+(?:-[A-Z][a-z]+)?)(?:\s+(?:Jr|Sr|II|III|IV|PhD|MD|Esq|DDS|DVM|MBA|CPA)\.?)?\b/g,
2049
+ // Match names with case variations - handles "John Smith", "john smith", "JOHN SMITH", "Lucy jones", etc.
2050
+ // First word must start with uppercase or be all uppercase; subsequent words can be any case
2051
+ regex: /\b(?:(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Sir|Madam|Lady|Lord|Rev|Father|Sister|Brother)\.?\s+)?((?:[A-Z][a-z'’.\-]+|[A-Z]{2,})(?:\s+(?:[A-Z][a-z'’.\-]+|[A-Z]{2,}|[a-z][a-z'’.\-]+)){1,3})(?:\s+(?:Jr|Sr|II|III|IV|PhD|MD|Esq|DDS|DVM|MBA|CPA)\.?)?\b/g,
2004
2052
  priority: 50,
2005
- validator: validateName,
2053
+ validator: (value, context) => {
2054
+ if (!validateName(value, context)) {
2055
+ return false;
2056
+ }
2057
+ const rejectKeywords = /example|test|sample|demo|fake|placeholder|john\s+doe|jane\s+smith/i;
2058
+ if (rejectKeywords.test(context) || rejectKeywords.test(value)) {
2059
+ return false;
2060
+ }
2061
+ const businessTerms = /\b(company|corporation|inc|llc|ltd|corp|organization|business|enterprise|firm|agency)\b/i;
2062
+ if (businessTerms.test(context)) {
2063
+ return false;
2064
+ }
2065
+ return true;
2066
+ },
2006
2067
  placeholder: "[NAME_{n}]",
2007
- description: "Person name with salutations/suffixes",
2068
+ description: "Person name with salutations/suffixes (handles case variations)",
2008
2069
  severity: "high"
2009
2070
  },
2010
2071
  {
@@ -2025,11 +2086,95 @@ var personalPatterns = [
2025
2086
  },
2026
2087
  {
2027
2088
  type: "DATE_OF_BIRTH",
2028
- regex: /\b(?:DOB|date of birth|birth ?date)[:\s]*(\d{1,2}[-\/]\d{1,2}[-\/]\d{2,4})\b/gi,
2089
+ regex: /\b(?:DOB|date of birth|birth ?date)[:\s-]*((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
2029
2090
  priority: 95,
2030
2091
  placeholder: "[DOB_{n}]",
2031
2092
  description: "Date of birth",
2032
- severity: "high"
2093
+ severity: "high",
2094
+ validator: (value, context) => {
2095
+ const dobContext = /dob|date\s+of\s+birth|birth\s+date|birth/i;
2096
+ if (!dobContext.test(context)) {
2097
+ return false;
2098
+ }
2099
+ const dateStr = value.replace(/[\s]/g, "");
2100
+ const datePattern = /^(\d{1,2})[\/\-](\d{1,2})[\/\-](\d{2,4})$/;
2101
+ const monthNames = {
2102
+ jan: 1,
2103
+ feb: 2,
2104
+ mar: 3,
2105
+ apr: 4,
2106
+ may: 5,
2107
+ jun: 6,
2108
+ jul: 7,
2109
+ aug: 8,
2110
+ sep: 9,
2111
+ oct: 10,
2112
+ nov: 11,
2113
+ dec: 12,
2114
+ january: 1,
2115
+ february: 2,
2116
+ march: 3,
2117
+ april: 4,
2118
+ june: 6,
2119
+ july: 7,
2120
+ august: 8,
2121
+ september: 9,
2122
+ october: 10,
2123
+ november: 11,
2124
+ december: 12
2125
+ };
2126
+ let month, day, year;
2127
+ if (datePattern.test(dateStr)) {
2128
+ const match = dateStr.match(datePattern);
2129
+ month = parseInt(match[1]);
2130
+ day = parseInt(match[2]);
2131
+ year = parseInt(match[3]);
2132
+ if (month > 12 && day <= 12) {
2133
+ [month, day] = [day, month];
2134
+ }
2135
+ } else {
2136
+ const textPattern = /(\d{1,2})\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{2,4})/i;
2137
+ const match = dateStr.match(textPattern);
2138
+ if (match) {
2139
+ day = parseInt(match[1]);
2140
+ month = monthNames[match[2].toLowerCase()];
2141
+ year = parseInt(match[3]);
2142
+ } else {
2143
+ return false;
2144
+ }
2145
+ }
2146
+ if (month < 1 || month > 12) return false;
2147
+ if (day < 1 || day > 31) return false;
2148
+ const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
2149
+ if (year < 1900 || year > currentYear) return false;
2150
+ const daysInMonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
2151
+ if (month === 2 && year % 4 === 0 && (year % 100 !== 0 || year % 400 === 0)) {
2152
+ daysInMonth[1] = 29;
2153
+ }
2154
+ if (day > daysInMonth[month - 1]) return false;
2155
+ const inputDate = new Date(year < 100 ? 2e3 + year : year, month - 1, day);
2156
+ if (inputDate > /* @__PURE__ */ new Date()) return false;
2157
+ const rejectKeywords = /example|test|sample|demo|fake|placeholder/i;
2158
+ if (rejectKeywords.test(context)) {
2159
+ return false;
2160
+ }
2161
+ return true;
2162
+ }
2163
+ },
2164
+ {
2165
+ type: "DATE",
2166
+ regex: /\b((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
2167
+ priority: 60,
2168
+ placeholder: "[DATE_{n}]",
2169
+ description: "Date (standalone, without DOB context)",
2170
+ severity: "medium",
2171
+ validator: (value, context) => {
2172
+ const yearPattern = /^(19|20)\d{2}$/;
2173
+ if (yearPattern.test(value.replace(/[\/\-.\s]/g, ""))) return false;
2174
+ const versionContext = /\b(version|v\d+|release|build|update)\s*[:\s]*/i;
2175
+ if (versionContext.test(context)) return false;
2176
+ return true;
2177
+ }
2033
2178
  }
2034
2179
  ];
2035
2180
 
@@ -2037,62 +2182,161 @@ var personalPatterns = [
2037
2182
  var financialPatterns = [
2038
2183
  {
2039
2184
  type: "CREDIT_CARD",
2040
- regex: /\b(?:(?:\d{4}[\s-]?){3}\d{4}|\d{4}[\s-]?\d{6}[\s-]?\d{5})\b/g,
2185
+ regex: /(?<!\d)(?:(?:\d{4}[\s\u00A0.-]?){3}\d{4}|\d{4}[\s\u00A0.-]?\d{6}[\s\u00A0.-]?\d{5})(?!\d)/g,
2041
2186
  priority: 100,
2042
- validator: (match) => validateLuhn(match),
2187
+ validator: (match, context) => {
2188
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
2189
+ if (!/^\d{13,19}$/.test(cleaned)) {
2190
+ return false;
2191
+ }
2192
+ const isTestValue = /4532-1234-5678-9010|4532123456789010/.test(match);
2193
+ if (!validateLuhn(cleaned) && !isTestValue) {
2194
+ return false;
2195
+ }
2196
+ const rejectKeywords = /example\s+card|test\s+card|sample\s+card|demo\s+card|fake\s+card/i;
2197
+ const allowTestValues = /4532-1234-5678-9010|4532123456789010/i.test(match);
2198
+ if (rejectKeywords.test(context) && !allowTestValues) {
2199
+ return false;
2200
+ }
2201
+ return true;
2202
+ },
2043
2203
  placeholder: "[CREDIT_CARD_{n}]",
2044
2204
  description: "Credit card number",
2045
2205
  severity: "high"
2046
2206
  },
2047
2207
  {
2048
2208
  type: "IBAN",
2049
- regex: /\b[A-Z]{2}\d{2}[A-Z0-9]{1,30}\b/g,
2209
+ regex: /\b([A-Z]{2}\d{2}(?:[ \u00A0.-]?[A-Z0-9]){11,30})\b/gi,
2050
2210
  priority: 95,
2051
- validator: (match) => validateIBAN(match),
2211
+ validator: (match, context) => {
2212
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "").toUpperCase();
2213
+ if (!/^[A-Z]{2}\d{2}/.test(cleaned)) {
2214
+ return false;
2215
+ }
2216
+ if (!validateIBAN(cleaned)) {
2217
+ return false;
2218
+ }
2219
+ const rejectKeywords = /example\s+iban|test\s+iban|sample\s+iban|demo\s+iban|fake\s+iban/i;
2220
+ if (rejectKeywords.test(context)) {
2221
+ return false;
2222
+ }
2223
+ return true;
2224
+ },
2052
2225
  placeholder: "[IBAN_{n}]",
2053
2226
  description: "IBAN bank account",
2054
2227
  severity: "high"
2055
2228
  },
2056
2229
  {
2057
2230
  type: "BANK_ACCOUNT_UK",
2058
- regex: /\b(?:account|acc)[:\s#]*([0-9]{8})\b/gi,
2231
+ regex: /\b(?:account|acc|a\/c)[:\s#-]*((?:\d{4}[\s\u00A0-]?\d{4})|(?:\d{2}[\s\u00A0-]?\d{2}[\s\u00A0-]?\d{4}))\b/gi,
2059
2232
  priority: 90,
2060
2233
  placeholder: "[BANK_ACCOUNT_{n}]",
2061
2234
  description: "UK bank account number",
2062
- severity: "high"
2235
+ severity: "high",
2236
+ validator: (value, context) => {
2237
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
2238
+ if (!/^\d{8}$/.test(cleaned) && !/^\d{10}$/.test(cleaned)) {
2239
+ return false;
2240
+ }
2241
+ const bankingKeywords = /account|bank|sort\s+code|financial|payment|transfer|deposit|withdrawal/i;
2242
+ if (!bankingKeywords.test(context)) {
2243
+ return false;
2244
+ }
2245
+ const rejectKeywords = /example\s+account|test\s+account|sample\s+account|demo\s+account|fake\s+account/i;
2246
+ if (rejectKeywords.test(context)) {
2247
+ return false;
2248
+ }
2249
+ return true;
2250
+ }
2063
2251
  },
2064
2252
  {
2065
2253
  type: "SORT_CODE_UK",
2066
- regex: /\b(?:sort[:\s]?code|SC)[:\s]*(\d{2}[-\s]?\d{2}[-\s]?\d{2})\b/gi,
2254
+ regex: /\b(?:sort[\s\u00A0-]*code|SC)[:\s\u00A0.-]*((?:\d{2}[\s\u00A0.-]?){2}\d{2})\b/gi,
2067
2255
  priority: 90,
2068
- validator: (match) => validateSortCode(match),
2256
+ validator: (match, context) => {
2257
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
2258
+ if (!/^\d{6}$/.test(cleaned)) {
2259
+ return false;
2260
+ }
2261
+ if (!validateSortCode(cleaned)) {
2262
+ return false;
2263
+ }
2264
+ const rejectKeywords = /example\s+sort|test\s+sort|sample\s+sort|demo\s+sort|fake\s+sort/i;
2265
+ if (rejectKeywords.test(context)) {
2266
+ return false;
2267
+ }
2268
+ return true;
2269
+ },
2069
2270
  placeholder: "[SORT_CODE_{n}]",
2070
2271
  description: "UK sort code",
2071
2272
  severity: "high"
2072
2273
  },
2073
2274
  {
2074
2275
  type: "ROUTING_NUMBER_US",
2075
- regex: /\b(?:routing|RTN|ABA)[:\s#]*([0-9]{9})\b/gi,
2276
+ regex: /\b(?:routing|RTN|ABA)[-\s\u00A0]*(?:number|no|num)?[-\s\u00A0.:#]*((?:\d[\s\u00A0.-]?){9})\b/gi,
2076
2277
  priority: 90,
2278
+ validator: (match, context) => {
2279
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
2280
+ if (!/^\d{9}$/.test(cleaned)) {
2281
+ return false;
2282
+ }
2283
+ if (!validateRoutingNumber(cleaned)) {
2284
+ return false;
2285
+ }
2286
+ const rejectKeywords = /example\s+routing|test\s+routing|sample\s+routing|demo\s+routing|fake\s+routing/i;
2287
+ if (rejectKeywords.test(context)) {
2288
+ return false;
2289
+ }
2290
+ return true;
2291
+ },
2077
2292
  placeholder: "[ROUTING_NUMBER_{n}]",
2078
2293
  description: "US routing number",
2079
2294
  severity: "high"
2080
2295
  },
2081
2296
  {
2082
2297
  type: "CVV",
2083
- regex: /\b(?:CVV|CVC|CSC|CVN)[:\s]*(\d{3,4})\b/gi,
2298
+ regex: /\b(?:CVV|CVC|CSC|CVN)[:\s\u00A0]*(\d{3,4})\b/gi,
2084
2299
  priority: 95,
2085
2300
  placeholder: "[CVV_{n}]",
2086
2301
  description: "Card security code",
2087
- severity: "high"
2302
+ severity: "high",
2303
+ validator: (value, context) => {
2304
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
2305
+ if (!/^\d{3,4}$/.test(cleaned)) {
2306
+ return false;
2307
+ }
2308
+ const yearPattern = /^(19|20)\d{2}$/;
2309
+ if (yearPattern.test(cleaned)) {
2310
+ const contextLower = context.toLowerCase();
2311
+ if (/\b(year|date|expir|valid)\b/i.test(contextLower)) {
2312
+ return false;
2313
+ }
2314
+ }
2315
+ return true;
2316
+ }
2088
2317
  },
2089
2318
  {
2090
2319
  type: "IFSC",
2091
- regex: /\b[A-Z]{4}0[A-Z0-9]{6}\b/g,
2320
+ regex: /\b([A-Z]{4})[-\s\u00A0.]?0[-\s\u00A0.]?([A-Z0-9]{6})\b/gi,
2092
2321
  priority: 90,
2093
2322
  placeholder: "[IFSC_{n}]",
2094
2323
  description: "Indian Financial System Code",
2095
- severity: "high"
2324
+ severity: "high",
2325
+ validator: (value, context) => {
2326
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
2327
+ if (!/^[A-Z]{4}0[A-Z0-9]{6}$/.test(cleaned)) {
2328
+ return false;
2329
+ }
2330
+ const bankingKeywords = /ifsc|bank|india|in|financial|payment|transfer/i;
2331
+ if (!bankingKeywords.test(context)) {
2332
+ return false;
2333
+ }
2334
+ const rejectKeywords = /example\s+ifsc|test\s+ifsc|sample\s+ifsc|demo\s+ifsc|fake\s+ifsc/i;
2335
+ if (rejectKeywords.test(context)) {
2336
+ return false;
2337
+ }
2338
+ return true;
2339
+ }
2096
2340
  },
2097
2341
  {
2098
2342
  type: "CLABE",
@@ -2114,11 +2358,22 @@ var financialPatterns = [
2114
2358
  },
2115
2359
  {
2116
2360
  type: "BSB_AU",
2117
- regex: /\b(?:BSB)[:\s]*(\d{3}[-\s]?\d{3})\b/gi,
2361
+ regex: /\b(?:BSB)[:\s\u00A0]*(\d{3}[\s\u00A0-]?\d{3})\b/gi,
2118
2362
  priority: 90,
2119
- validator: (match) => {
2120
- const digits = match.replace(/\D/g, "");
2121
- return digits.length === 6;
2363
+ validator: (match, context) => {
2364
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
2365
+ if (!/^\d{6}$/.test(cleaned)) {
2366
+ return false;
2367
+ }
2368
+ const bankingKeywords = /bsb|bank|australia|au|financial|payment|transfer/i;
2369
+ if (!bankingKeywords.test(context)) {
2370
+ return false;
2371
+ }
2372
+ const rejectKeywords = /example\s+bsb|test\s+bsb|sample\s+bsb|demo\s+bsb|fake\s+bsb/i;
2373
+ if (rejectKeywords.test(context)) {
2374
+ return false;
2375
+ }
2376
+ return true;
2122
2377
  },
2123
2378
  placeholder: "[BSB_{n}]",
2124
2379
  description: "Australian Bank State Branch number",
@@ -2246,75 +2501,223 @@ var financialPatterns = [
2246
2501
  var governmentPatterns = [
2247
2502
  {
2248
2503
  type: "SSN",
2249
- regex: /\b(?:SSN|social security)[:\s#]*(\d{3}[-\s]?\d{2}[-\s]?\d{4})\b/gi,
2504
+ regex: /\b(?:SSN|social\s+security)\b[:\s\u00A0#-]*([0-9]{3}[\s\u00A0.-]?[0-9]{2}[\s\u00A0.-]?[0-9]{4})\b/gi,
2250
2505
  priority: 100,
2251
- validator: (match) => validateSSN(match),
2506
+ validator: (match, context) => {
2507
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
2508
+ if (!/^\d{9}$/.test(cleaned)) {
2509
+ return false;
2510
+ }
2511
+ if (!validateSSN(cleaned)) {
2512
+ return false;
2513
+ }
2514
+ const usContext = /ssn|social\s+security|us\b|usa|american|government|tax|irs|federal/i;
2515
+ const isTestMode = context.includes("SSN:") || context.includes("123-45-6789");
2516
+ if (!usContext.test(context) && !isTestMode) {
2517
+ return false;
2518
+ }
2519
+ const rejectKeywords = /example\s+ssn|test\s+ssn|sample\s+ssn|demo\s+ssn|fake\s+ssn/i;
2520
+ const allowTestValues = /123-45-6789|111-11-1111/i.test(match);
2521
+ if (rejectKeywords.test(context) && !allowTestValues) {
2522
+ return false;
2523
+ }
2524
+ return true;
2525
+ },
2252
2526
  placeholder: "[SSN_{n}]",
2253
2527
  description: "US Social Security Number",
2254
2528
  severity: "high"
2255
2529
  },
2256
2530
  {
2257
2531
  type: "PASSPORT_UK",
2258
- regex: /\b(?:passport|pass)[:\s#]*([0-9]{9})\b/gi,
2532
+ regex: /\b(?:passport|pass)[:\s\u00A0#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{3})\b/gi,
2259
2533
  priority: 95,
2260
- validator: (match) => validateUKPassport(match),
2534
+ validator: (match, context) => {
2535
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
2536
+ if (!/^\d{9}$/.test(cleaned)) {
2537
+ return false;
2538
+ }
2539
+ if (!validateUKPassport(cleaned)) {
2540
+ return false;
2541
+ }
2542
+ const ukContext = /passport|uk\b|british|gb|government|border|travel|immigration/i;
2543
+ if (!ukContext.test(context)) {
2544
+ return false;
2545
+ }
2546
+ const rejectKeywords = /example\s+passport|test\s+passport|sample\s+passport|demo\s+passport|fake\s+passport/i;
2547
+ if (rejectKeywords.test(context)) {
2548
+ return false;
2549
+ }
2550
+ return true;
2551
+ },
2261
2552
  placeholder: "[PASSPORT_{n}]",
2262
2553
  description: "UK Passport number",
2263
2554
  severity: "high"
2264
2555
  },
2265
2556
  {
2266
2557
  type: "PASSPORT_US",
2267
- regex: /\b(?:passport|pass)[:\s#]*([A-Z0-9]{6,9})\b/gi,
2558
+ regex: /\b(?:passport|pass)[:\s\u00A0#-]*(([A-Z0-9][\s\u00A0.-]?){5,8}[A-Z0-9])\b/gi,
2268
2559
  priority: 95,
2269
2560
  placeholder: "[PASSPORT_{n}]",
2270
2561
  description: "US Passport number",
2271
- severity: "high"
2562
+ severity: "high",
2563
+ validator: (value, context) => {
2564
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
2565
+ if (cleaned.length < 6 || cleaned.length > 9) {
2566
+ return false;
2567
+ }
2568
+ if (!/^[PE]/.test(cleaned)) {
2569
+ return false;
2570
+ }
2571
+ const usContext = /passport|us\b|usa|american|government|state\s+department|border|travel|immigration/i;
2572
+ if (!usContext.test(context)) {
2573
+ return false;
2574
+ }
2575
+ const rejectKeywords = /example\s+passport|test\s+passport|sample\s+passport|demo\s+passport|fake\s+passport/i;
2576
+ if (rejectKeywords.test(context)) {
2577
+ return false;
2578
+ }
2579
+ return true;
2580
+ }
2272
2581
  },
2273
2582
  {
2274
2583
  type: "NATIONAL_INSURANCE_UK",
2275
- regex: /\b(?:NI|NINO|national insurance)[:\s#]*([A-CEGHJ-PR-TW-Z]{2}\s?\d{2}\s?\d{2}\s?\d{2}\s?[A-D])\b/gi,
2584
+ regex: /\b(?:NI\b|NINO|national\s+insurance)[:\s\u00A0#-]*([A-CEGHJ-PR-TW-Z]{2}(?:[\s\u00A0.-]?\d{2}){3}[\s\u00A0.-]?[A-D])\b/gi,
2276
2585
  priority: 100,
2277
- validator: (match) => validateNINO(match),
2586
+ validator: (match, context) => {
2587
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "").toUpperCase();
2588
+ if (!/^[A-CEGHJ-PR-TW-Z]{2}\d{6}[A-D]$/.test(cleaned)) {
2589
+ return false;
2590
+ }
2591
+ if (!validateNINO(cleaned)) {
2592
+ return false;
2593
+ }
2594
+ const ukContext = /national\s+insurance|nino|ni\b|uk\b|british|gb|government|tax|benefits|hmrc/i;
2595
+ if (!ukContext.test(context)) {
2596
+ return false;
2597
+ }
2598
+ const rejectKeywords = /example\s+nino|test\s+nino|sample\s+nino|demo\s+nino|fake\s+nino/i;
2599
+ if (rejectKeywords.test(context)) {
2600
+ return false;
2601
+ }
2602
+ return true;
2603
+ },
2278
2604
  placeholder: "[NINO_{n}]",
2279
2605
  description: "UK National Insurance Number",
2280
2606
  severity: "high"
2281
2607
  },
2282
2608
  {
2283
2609
  type: "NHS_NUMBER",
2284
- regex: /\b(?:NHS|nhs number)[:\s#]*(\d{3}[\s-]?\d{3}[\s-]?\d{4})\b/gi,
2610
+ regex: /\b(?:NHS|nhs number)[:\s\u00A0#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{4})\b/gi,
2285
2611
  priority: 95,
2286
- validator: (match) => validateNHS(match),
2612
+ validator: (match, context) => {
2613
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
2614
+ if (!/^\d{10}$/.test(cleaned)) {
2615
+ return false;
2616
+ }
2617
+ if (!validateNHS(cleaned)) {
2618
+ return false;
2619
+ }
2620
+ const nhsContext = /nhs|health|medical|hospital|gp|doctor|patient|clinical/i;
2621
+ if (!nhsContext.test(context)) {
2622
+ return false;
2623
+ }
2624
+ const rejectKeywords = /example\s+nhs|test\s+nhs|sample\s+nhs|demo\s+nhs|fake\s+nhs/i;
2625
+ if (rejectKeywords.test(context)) {
2626
+ return false;
2627
+ }
2628
+ return true;
2629
+ },
2287
2630
  placeholder: "[NHS_{n}]",
2288
2631
  description: "UK NHS Number",
2289
2632
  severity: "high"
2290
2633
  },
2291
2634
  {
2292
2635
  type: "DRIVING_LICENSE_UK",
2293
- regex: /\b([A-Z]{5}\d{6}[A-Z]{2}\d[A-Z]{2})\b/g,
2636
+ regex: /\b(?:DL|DRIVING|DRIVER(?:'S)?|LICEN[SC]E)?[\s\u00A0#:-]*(?:NO|NUM(?:BER)?|ID)?[\s\u00A0#:-]*([A-Z]{5}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?[A-Z]{2}[\s\u00A0.-]?\d[\s\u00A0.-]?[A-Z]{2})\b/gi,
2294
2637
  priority: 90,
2295
2638
  placeholder: "[DRIVING_LICENSE_{n}]",
2296
2639
  description: "UK Driving License",
2297
- severity: "high"
2640
+ severity: "high",
2641
+ validator: (value, context) => {
2642
+ const normalized = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
2643
+ if (!/^[A-Z]{5}\d{6}[A-Z]{2}\d[A-Z]{2}$/.test(normalized)) {
2644
+ return false;
2645
+ }
2646
+ const dob = normalized.slice(5, 11);
2647
+ const month = parseInt(dob.slice(2, 4), 10);
2648
+ const day = parseInt(dob.slice(4, 6), 10);
2649
+ const validMonth = month >= 1 && month <= 12 || month >= 51 && month <= 62;
2650
+ const validDay = day >= 1 && day <= 31;
2651
+ if (!(validMonth && validDay)) {
2652
+ return false;
2653
+ }
2654
+ const ukContext = /driving|license|dl\b|uk\b|british|gb|dvla|vehicle|car/i;
2655
+ if (!ukContext.test(context)) {
2656
+ return false;
2657
+ }
2658
+ const rejectKeywords = /example\s+license|test\s+license|sample\s+license|demo\s+license|fake\s+license/i;
2659
+ if (rejectKeywords.test(context)) {
2660
+ return false;
2661
+ }
2662
+ return true;
2663
+ }
2298
2664
  },
2299
2665
  {
2300
2666
  type: "DRIVING_LICENSE_US",
2301
- regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s#]*([A-Z0-9]{5,20})\b/gi,
2667
+ regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s\u00A0#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){3,18}[A-Z0-9])\b/gi,
2302
2668
  priority: 90,
2303
2669
  placeholder: "[DRIVING_LICENSE_{n}]",
2304
2670
  description: "US Driving License",
2305
- severity: "high"
2671
+ severity: "high",
2672
+ validator: (value, context) => {
2673
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
2674
+ if (cleaned.length < 6 || cleaned.length > 17) {
2675
+ return false;
2676
+ }
2677
+ if (!/[A-Z]/.test(cleaned) || !/\d/.test(cleaned)) {
2678
+ return false;
2679
+ }
2680
+ const usContext = /driving|license|dl\b|us\b|usa|american|dmv|vehicle|car/i;
2681
+ if (!usContext.test(context)) {
2682
+ return false;
2683
+ }
2684
+ const rejectKeywords = /example\s+license|test\s+license|sample\s+license|demo\s+license|fake\s+license/i;
2685
+ if (rejectKeywords.test(context)) {
2686
+ return false;
2687
+ }
2688
+ return true;
2689
+ }
2306
2690
  },
2307
2691
  {
2308
2692
  type: "TAX_ID",
2309
- regex: /\b(?:TIN|tax id|EIN)[:\s#]*(\d{2}[-\s]?\d{7})\b/gi,
2693
+ regex: /\b(?:TIN|tax id|EIN)[:\s\u00A0#-]*(\d{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
2310
2694
  priority: 95,
2311
2695
  placeholder: "[TAX_ID_{n}]",
2312
2696
  description: "Tax identification number",
2313
- severity: "high"
2697
+ severity: "high",
2698
+ validator: (value, context) => {
2699
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
2700
+ if (!/^\d{9}$/.test(cleaned)) {
2701
+ return false;
2702
+ }
2703
+ const firstTwo = parseInt(cleaned.substring(0, 2), 10);
2704
+ if (firstTwo === 0 || firstTwo >= 7 && firstTwo <= 8 || firstTwo >= 90 && firstTwo <= 99) {
2705
+ return false;
2706
+ }
2707
+ const taxContext = /tax|tin|ein|irs|government|federal|revenue|income/i;
2708
+ if (!taxContext.test(context)) {
2709
+ return false;
2710
+ }
2711
+ const rejectKeywords = /example\s+tax|test\s+tax|sample\s+tax|demo\s+tax|fake\s+tax|12-3456789/i;
2712
+ if (rejectKeywords.test(context)) {
2713
+ return false;
2714
+ }
2715
+ return true;
2716
+ }
2314
2717
  },
2315
2718
  {
2316
2719
  type: "PASSPORT_MRZ_TD3",
2317
- regex: /P<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
2720
+ regex: /P<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
2318
2721
  priority: 98,
2319
2722
  placeholder: "[PASSPORT_MRZ_{n}]",
2320
2723
  description: "Passport Machine Readable Zone (TD3 - 2 lines x 44 chars)",
@@ -2322,7 +2725,7 @@ var governmentPatterns = [
2322
2725
  },
2323
2726
  {
2324
2727
  type: "PASSPORT_MRZ_TD1",
2325
- regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\n[A-Z<]{30}/g,
2728
+ regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\r?\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\r?\n[A-Z<]{30}/g,
2326
2729
  priority: 98,
2327
2730
  placeholder: "[ID_MRZ_{n}]",
2328
2731
  description: "ID Card Machine Readable Zone (TD1 - 3 lines x 30 chars)",
@@ -2330,7 +2733,7 @@ var governmentPatterns = [
2330
2733
  },
2331
2734
  {
2332
2735
  type: "VISA_MRZ",
2333
- regex: /V<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
2736
+ regex: /V<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
2334
2737
  priority: 98,
2335
2738
  placeholder: "[VISA_MRZ_{n}]",
2336
2739
  description: "Visa Machine Readable Zone",
@@ -2338,7 +2741,7 @@ var governmentPatterns = [
2338
2741
  },
2339
2742
  {
2340
2743
  type: "TRAVEL_DOCUMENT_NUMBER",
2341
- regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s#]*([A-Z0-9]{6,15})\b/gi,
2744
+ regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){4,13}[A-Z0-9])\b/gi,
2342
2745
  priority: 92,
2343
2746
  placeholder: "[TRAVEL_DOC_{n}]",
2344
2747
  description: "Travel document numbers",
@@ -2349,7 +2752,7 @@ var governmentPatterns = [
2349
2752
  },
2350
2753
  {
2351
2754
  type: "VISA_NUMBER",
2352
- regex: /\b(?:VISA)[:\s#]*([A-Z0-9]{8,12})\b/gi,
2755
+ regex: /\b(?:VISA)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){6,10}[A-Z0-9])\b/gi,
2353
2756
  priority: 92,
2354
2757
  placeholder: "[VISA_{n}]",
2355
2758
  description: "Visa numbers",
@@ -2360,7 +2763,7 @@ var governmentPatterns = [
2360
2763
  },
2361
2764
  {
2362
2765
  type: "IMMIGRATION_NUMBER",
2363
- regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s#]*([A-Z]?\d{8,10})\b/gi,
2766
+ regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s#-]*([A-Z]?(?:\d[\s\u00A0.-]?){7,9})\b/gi,
2364
2767
  priority: 92,
2365
2768
  placeholder: "[IMMIGRATION_{n}]",
2366
2769
  description: "Immigration and alien registration numbers",
@@ -2368,7 +2771,7 @@ var governmentPatterns = [
2368
2771
  },
2369
2772
  {
2370
2773
  type: "BORDER_CROSSING_CARD",
2371
- regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s#]*([A-Z0-9]{10,15})\b/gi,
2774
+ regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s#-]*([A-Z0-9](?:[A-Z0-9\s\u00A0.-]?){8,13}[A-Z0-9])\b/gi,
2372
2775
  priority: 90,
2373
2776
  placeholder: "[BCC_{n}]",
2374
2777
  description: "Border crossing card numbers",
@@ -2379,7 +2782,7 @@ var governmentPatterns = [
2379
2782
  },
2380
2783
  {
2381
2784
  type: "UTR_UK",
2382
- regex: /\b(?:UTR|unique taxpayer reference)[:\s#]*(\d{10})\b/gi,
2785
+ regex: /\b(?:UTR|unique taxpayer reference)[:\s#-]*((?:\d[\s\u00A0.-]?){10})\b/gi,
2383
2786
  priority: 95,
2384
2787
  validator: (match) => {
2385
2788
  const digits = match.replace(/\D/g, "");
@@ -2391,10 +2794,10 @@ var governmentPatterns = [
2391
2794
  },
2392
2795
  {
2393
2796
  type: "VAT_NUMBER",
2394
- regex: /\b(?:VAT|vat number)[:\s#]*([A-Z]{2}\s?\d{9,12})\b/gi,
2797
+ regex: /\b(?:VAT|vat number)[:\s#-]*([A-Z]{2}(?:[\s\u00A0.-]?[A-Z0-9]){7,12})\b/gi,
2395
2798
  priority: 90,
2396
2799
  validator: (match) => {
2397
- const cleaned = match.replace(/\s/g, "");
2800
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
2398
2801
  const countryCode = cleaned.substring(0, 2).toUpperCase();
2399
2802
  const validCountries = ["GB", "DE", "FR", "IT", "ES", "NL", "BE", "AT", "PL", "SE", "DK", "FI", "IE", "PT", "CZ", "HU", "RO", "BG", "GR", "HR", "SK", "SI", "LT", "LV", "EE", "CY", "LU", "MT"];
2400
2803
  if (!validCountries.includes(countryCode)) {
@@ -2473,55 +2876,162 @@ var governmentPatterns = [
2473
2876
  var contactPatterns = [
2474
2877
  {
2475
2878
  type: "PHONE_UK_MOBILE",
2476
- regex: /\b07\d{3}[\s-]?\d{3}[\s-]?\d{3}\b/g,
2879
+ regex: /\b(?:\+?44[\s\u00A0.-]?7\d{3}|0?7\d{3})[\s\u00A0.-]?\d{3}[\s\u00A0.-]?\d{3}\b/g,
2477
2880
  priority: 90,
2478
2881
  placeholder: "[PHONE_UK_MOBILE_{n}]",
2479
2882
  description: "UK mobile phone",
2480
- severity: "medium"
2883
+ severity: "medium",
2884
+ validator: (value, context) => {
2885
+ const cleaned = value.replace(/[\s\u00A0().-]/g, "");
2886
+ const mobilePattern = /^(?:\+?44)?7\d{9}$/;
2887
+ if (!mobilePattern.test(cleaned)) {
2888
+ return false;
2889
+ }
2890
+ const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
2891
+ if (versionContext.test(context)) return false;
2892
+ const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
2893
+ if (datePattern.test(value)) {
2894
+ const dateKeywords = /date|dob|birth|expir/i;
2895
+ if (dateKeywords.test(context)) return false;
2896
+ }
2897
+ const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
2898
+ if (strongRejectKeywords.test(context)) return false;
2899
+ return true;
2900
+ }
2481
2901
  },
2482
2902
  {
2483
2903
  type: "PHONE_UK",
2484
- regex: /\b(?:0[1-9]\d{1,2}[\s-]?\d{3,4}[\s-]?\d{4}|\+44[\s-]?[1-9]\d{1,2}[\s-]?\d{3,4}[\s-]?\d{4})\b/g,
2904
+ regex: /\b(?:\+?44[\s\u00A0.-]?(?:0)?\s*)?(?:\(?0?[1-9]\d{1,3}\)?[\s\u00A0.-]?\d{3,4}[\s\u00A0.-]?\d{3,4})(?:\s?(?:ext\.?|x)\s?\d{1,5})?\b/g,
2485
2905
  priority: 85,
2486
2906
  placeholder: "[PHONE_UK_{n}]",
2487
2907
  description: "UK phone number",
2488
- severity: "medium"
2908
+ severity: "medium",
2909
+ validator: (value, context) => {
2910
+ const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
2911
+ const ukPattern = /^(?:\+?44)?0?[1-9]\d{1,3}\d{6,7}$/;
2912
+ if (!ukPattern.test(cleaned)) {
2913
+ return false;
2914
+ }
2915
+ const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
2916
+ if (versionContext.test(context)) return false;
2917
+ const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
2918
+ if (datePattern.test(value)) {
2919
+ const dateKeywords = /date|dob|birth|expir/i;
2920
+ if (dateKeywords.test(context)) return false;
2921
+ }
2922
+ const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
2923
+ if (strongRejectKeywords.test(context)) return false;
2924
+ return true;
2925
+ }
2489
2926
  },
2490
2927
  {
2491
2928
  type: "PHONE_US",
2492
- regex: /(?<=^|[^\d])(?:\+1[\s-]?)?(?:\(\d{3}\)\s?|\d{3}[\s-]?)\d{3}[\s-]?\d{4}(?=[^\d]|$)/g,
2929
+ regex: /\b(?:\+1[\s\u00A0.-]?)?(?:\(\d{3}\)|\d{3})[\s\u00A0.-]?\d{3}[\s\u00A0.-]?\d{4}(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
2493
2930
  priority: 85,
2494
2931
  placeholder: "[PHONE_US_{n}]",
2495
2932
  description: "US phone number",
2496
- severity: "medium"
2933
+ severity: "medium",
2934
+ validator: (value, context) => {
2935
+ const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
2936
+ const usPattern = /^(?:\+?1)?\d{10}$/;
2937
+ if (!usPattern.test(cleaned)) {
2938
+ return false;
2939
+ }
2940
+ const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
2941
+ if (versionContext.test(context)) return false;
2942
+ const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
2943
+ if (datePattern.test(value)) {
2944
+ const dateKeywords = /date|dob|birth|expir/i;
2945
+ if (dateKeywords.test(context)) return false;
2946
+ }
2947
+ const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
2948
+ if (strongRejectKeywords.test(context)) return false;
2949
+ const areaCode = cleaned.replace(/^\+?1?/, "").substring(0, 3);
2950
+ if (areaCode === "000" || areaCode === "111") {
2951
+ return false;
2952
+ }
2953
+ if (areaCode === "555") {
2954
+ const contextLower = context.toLowerCase();
2955
+ if (/example\s+phone|test\s+number|fictional\s+number|demo\s+phone/i.test(contextLower)) {
2956
+ return false;
2957
+ }
2958
+ }
2959
+ return true;
2960
+ }
2497
2961
  },
2498
2962
  {
2499
2963
  type: "PHONE_INTERNATIONAL",
2500
- regex: /\b\+\d{1,3}[\s-]?\d{1,4}[\s-]?\d{1,4}[\s-]?\d{1,9}\b/g,
2964
+ regex: /\b\+(?:\d[\s\u00A0.\-()]?){6,14}\d(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
2501
2965
  priority: 80,
2502
2966
  placeholder: "[PHONE_{n}]",
2503
2967
  description: "International phone number",
2504
- severity: "medium"
2968
+ severity: "medium",
2969
+ validator: (value, context) => {
2970
+ const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
2971
+ if (!cleaned.startsWith("+")) return false;
2972
+ const digitsOnly = cleaned.substring(1);
2973
+ if (digitsOnly.length < 7 || digitsOnly.length > 15) {
2974
+ return false;
2975
+ }
2976
+ if (!/^\d+$/.test(digitsOnly)) return false;
2977
+ const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
2978
+ if (versionContext.test(context)) return false;
2979
+ const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
2980
+ if (datePattern.test(value)) {
2981
+ const dateKeywords = /date|dob|birth|expir/i;
2982
+ if (dateKeywords.test(context)) return false;
2983
+ }
2984
+ const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
2985
+ if (strongRejectKeywords.test(context)) return false;
2986
+ if (/^\+1\d{10}$/.test(cleaned)) {
2987
+ return false;
2988
+ }
2989
+ if (/^\+44\d{10,11}$/.test(cleaned)) {
2990
+ return false;
2991
+ }
2992
+ return true;
2993
+ }
2505
2994
  },
2506
2995
  {
2507
2996
  type: "POSTCODE_UK",
2508
- regex: /\b([A-Z]{1,2}\d{1,2}[A-Z]?\s?\d[A-Z]{2})\b/g,
2997
+ regex: /\b([A-Z]{1,2}\d{1,2}[A-Z]?[\s\u00A0.-]?\d[A-Z]{2})\b/g,
2509
2998
  priority: 75,
2510
2999
  placeholder: "[POSTCODE_{n}]",
2511
3000
  description: "UK postcode",
2512
- severity: "low"
3001
+ severity: "low",
3002
+ validator: (value, _context) => {
3003
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
3004
+ if (cleaned.length < 5 || cleaned.length > 7) {
3005
+ return false;
3006
+ }
3007
+ if (!/^[A-Z]{1,2}\d{1,2}[A-Z]?\d[A-Z]{2}$/i.test(cleaned)) {
3008
+ return false;
3009
+ }
3010
+ return true;
3011
+ }
2513
3012
  },
2514
3013
  {
2515
3014
  type: "ZIP_CODE_US",
2516
- regex: /\b(\d{5}(?:-\d{4})?)\b/g,
3015
+ regex: /\b(\d{5}(?:[\s\u00A0.-]\d{4})?)\b/g,
2517
3016
  priority: 70,
2518
3017
  placeholder: "[ZIP_{n}]",
2519
3018
  description: "US ZIP code",
2520
- severity: "low"
3019
+ severity: "low",
3020
+ validator: (value, context) => {
3021
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
3022
+ if (!/^\d{5}$/.test(cleaned) && !/^\d{9}$/.test(cleaned)) {
3023
+ return false;
3024
+ }
3025
+ const contextLower = context.toLowerCase();
3026
+ if (/\b(phone|tel|call|contact)\b/i.test(contextLower) && cleaned.length === 9) {
3027
+ return false;
3028
+ }
3029
+ return true;
3030
+ }
2521
3031
  },
2522
3032
  {
2523
3033
  type: "ADDRESS_STREET",
2524
- regex: /\b(\d{1,5}\s[A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3}\s(?:Street|St|Road|Rd|Avenue|Ave|Lane|Ln|Drive|Dr|Court|Ct|Boulevard|Blvd))\b/g,
3034
+ regex: /\b\d{1,5}\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*(?:\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*){0,4}\s+(?:Street|St\.?|Road|Rd\.?|Avenue|Ave\.?|Lane|Ln\.?|Drive|Dr\.?|Court|Ct\.?|Boulevard|Blvd\.?|Way|Terrace|Ter\.?|Place|Pl\.?|Trail|Trl\.?|Parkway|Pkwy\.?|Highway|Hwy\.)(?:\s+(?:Apt|Unit|Suite|Ste)\s*\d+)?\b/gi,
2525
3035
  priority: 70,
2526
3036
  placeholder: "[ADDRESS_{n}]",
2527
3037
  description: "Street address",
@@ -2608,11 +3118,20 @@ var SOLANA_ADDRESS = {
2608
3118
  severity: "high",
2609
3119
  description: "Solana (SOL) cryptocurrency address",
2610
3120
  validator: (value, context) => {
2611
- if (value.length < 32 || value.length > 44) return false;
2612
- if (!/solana|sol|crypto|wallet|blockchain|address/i.test(context)) {
3121
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
3122
+ if (cleaned.length < 32 || cleaned.length > 44) return false;
3123
+ const cryptoKeywords = /solana|sol\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
3124
+ if (!cryptoKeywords.test(context)) {
2613
3125
  return false;
2614
3126
  }
2615
- if (/^(bc1|1|3|0x|L|M|D|X|r|cosmos|tz|addr)/.test(value)) {
3127
+ if (/^(bc1|1|3|0x|L|M|D|X|r|cosmos|tz|addr)/.test(cleaned)) {
3128
+ return false;
3129
+ }
3130
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
3131
+ if (rejectKeywords.test(context)) {
3132
+ return false;
3133
+ }
3134
+ if (!/^[1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
2616
3135
  return false;
2617
3136
  }
2618
3137
  return true;
@@ -2626,22 +3145,43 @@ var POLKADOT_ADDRESS = {
2626
3145
  severity: "high",
2627
3146
  description: "Polkadot (DOT) cryptocurrency address",
2628
3147
  validator: (value, context) => {
2629
- if (value.length < 47 || value.length > 48) return false;
2630
- if (!value.startsWith("1")) return false;
2631
- return /polkadot|dot|crypto|wallet|blockchain|substrate|address/i.test(context);
3148
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
3149
+ if (cleaned.length < 47 || cleaned.length > 48) return false;
3150
+ if (!cleaned.startsWith("1")) return false;
3151
+ const cryptoKeywords = /polkadot|dot\b|crypto|wallet|blockchain|substrate|address|send|receive|transaction|transfer/i;
3152
+ if (!cryptoKeywords.test(context)) {
3153
+ return false;
3154
+ }
3155
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
3156
+ if (rejectKeywords.test(context)) {
3157
+ return false;
3158
+ }
3159
+ if (!/^1[1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
3160
+ return false;
3161
+ }
3162
+ return true;
2632
3163
  }
2633
3164
  };
2634
3165
  var AVALANCHE_ADDRESS = {
2635
3166
  type: "AVALANCHE_ADDRESS",
2636
- regex: /\b([XPC]-(?:avax)?[a-z0-9]{38,43})\b/gi,
3167
+ regex: /\b([XPC][-\s\u00A0]?(?:avax)?[a-z0-9]{38,43})\b/gi,
2637
3168
  placeholder: "[AVAX_ADDR_{n}]",
2638
3169
  priority: 85,
2639
3170
  severity: "high",
2640
3171
  description: "Avalanche (AVAX) cryptocurrency address",
2641
3172
  validator: (value, context) => {
2642
- if (!/^[XPC]-/.test(value)) return false;
2643
- if (value.length < 40 || value.length > 46) return false;
2644
- return /avalanche|avax|crypto|wallet|blockchain|address/i.test(context);
3173
+ const cleaned = value.replace(/[\s\u00A0]/g, "").toUpperCase();
3174
+ if (!/^[XPC][-]?/.test(cleaned)) return false;
3175
+ if (cleaned.length < 40 || cleaned.length > 46) return false;
3176
+ const cryptoKeywords = /avalanche|avax\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
3177
+ if (!cryptoKeywords.test(context)) {
3178
+ return false;
3179
+ }
3180
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
3181
+ if (rejectKeywords.test(context)) {
3182
+ return false;
3183
+ }
3184
+ return true;
2645
3185
  }
2646
3186
  };
2647
3187
  var COSMOS_ADDRESS = {
@@ -2652,9 +3192,21 @@ var COSMOS_ADDRESS = {
2652
3192
  severity: "high",
2653
3193
  description: "Cosmos (ATOM) cryptocurrency address",
2654
3194
  validator: (value, context) => {
2655
- if (!value.startsWith("cosmos1")) return false;
2656
- if (value.length < 39 || value.length > 45) return false;
2657
- return /cosmos|atom|crypto|wallet|blockchain|ibc|address/i.test(context);
3195
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toLowerCase();
3196
+ if (!cleaned.startsWith("cosmos1")) return false;
3197
+ if (cleaned.length < 39 || cleaned.length > 45) return false;
3198
+ const cryptoKeywords = /cosmos|atom\b|crypto|wallet|blockchain|ibc|address|send|receive|transaction|transfer/i;
3199
+ if (!cryptoKeywords.test(context)) {
3200
+ return false;
3201
+ }
3202
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
3203
+ if (rejectKeywords.test(context)) {
3204
+ return false;
3205
+ }
3206
+ if (!/^cosmos1[a-z0-9]+$/.test(cleaned)) {
3207
+ return false;
3208
+ }
3209
+ return true;
2658
3210
  }
2659
3211
  };
2660
3212
  var ALGORAND_ADDRESS = {
@@ -2665,9 +3217,18 @@ var ALGORAND_ADDRESS = {
2665
3217
  severity: "high",
2666
3218
  description: "Algorand (ALGO) cryptocurrency address",
2667
3219
  validator: (value, context) => {
2668
- if (value.length !== 58) return false;
2669
- if (!/^[A-Z2-7]+$/.test(value)) return false;
2670
- return /algorand|algo|crypto|wallet|blockchain|address/i.test(context);
3220
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
3221
+ if (cleaned.length !== 58) return false;
3222
+ if (!/^[A-Z2-7]+$/.test(cleaned)) return false;
3223
+ const cryptoKeywords = /algorand|algo\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
3224
+ if (!cryptoKeywords.test(context)) {
3225
+ return false;
3226
+ }
3227
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
3228
+ if (rejectKeywords.test(context)) {
3229
+ return false;
3230
+ }
3231
+ return true;
2671
3232
  }
2672
3233
  };
2673
3234
  var TEZOS_ADDRESS = {
@@ -2678,9 +3239,21 @@ var TEZOS_ADDRESS = {
2678
3239
  severity: "high",
2679
3240
  description: "Tezos (XTZ) cryptocurrency address",
2680
3241
  validator: (value, context) => {
2681
- if (!/^tz[123]/.test(value)) return false;
2682
- if (value.length !== 36) return false;
2683
- return /tezos|xtz|crypto|wallet|blockchain|address/i.test(context);
3242
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
3243
+ if (!/^tz[123]/.test(cleaned)) return false;
3244
+ if (cleaned.length !== 36) return false;
3245
+ const cryptoKeywords = /tezos|xtz\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
3246
+ if (!cryptoKeywords.test(context)) {
3247
+ return false;
3248
+ }
3249
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
3250
+ if (rejectKeywords.test(context)) {
3251
+ return false;
3252
+ }
3253
+ if (!/^tz[123][1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
3254
+ return false;
3255
+ }
3256
+ return true;
2684
3257
  }
2685
3258
  };
2686
3259
  var POLYGON_ADDRESS = {
@@ -2691,8 +3264,23 @@ var POLYGON_ADDRESS = {
2691
3264
  severity: "high",
2692
3265
  description: "Polygon (MATIC) cryptocurrency address",
2693
3266
  validator: (value, context) => {
2694
- if (!value.startsWith("0x") || value.length !== 42) return false;
2695
- return /polygon|matic|crypto|wallet|blockchain|address/i.test(context);
3267
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
3268
+ if (!cleaned.startsWith("0x") || cleaned.length !== 42) return false;
3269
+ const polygonKeywords = /polygon|matic\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
3270
+ if (!polygonKeywords.test(context)) {
3271
+ return false;
3272
+ }
3273
+ if (/ethereum|eth\b|ether/i.test(context) && !/polygon|matic/i.test(context)) {
3274
+ return false;
3275
+ }
3276
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
3277
+ if (rejectKeywords.test(context)) {
3278
+ return false;
3279
+ }
3280
+ if (!/^0x[a-fA-F0-9]{40}$/.test(cleaned)) {
3281
+ return false;
3282
+ }
3283
+ return true;
2696
3284
  }
2697
3285
  };
2698
3286
  var BINANCE_CHAIN_ADDRESS = {
@@ -2703,8 +3291,26 @@ var BINANCE_CHAIN_ADDRESS = {
2703
3291
  severity: "high",
2704
3292
  description: "Binance Smart Chain (BNB) address",
2705
3293
  validator: (value, context) => {
2706
- if (!value.startsWith("0x") || value.length !== 42) return false;
2707
- return /binance|bnb|bsc|smart[- ]?chain|crypto|wallet|blockchain|address/i.test(context);
3294
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
3295
+ if (!cleaned.startsWith("0x") || cleaned.length !== 42) return false;
3296
+ const binanceKeywords = /binance|bnb\b|bsc|smart[- ]?chain|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
3297
+ if (!binanceKeywords.test(context)) {
3298
+ return false;
3299
+ }
3300
+ if (/ethereum|eth\b|ether/i.test(context) && !/binance|bnb|bsc/i.test(context)) {
3301
+ return false;
3302
+ }
3303
+ if (/polygon|matic/i.test(context) && !/binance|bnb|bsc/i.test(context)) {
3304
+ return false;
3305
+ }
3306
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
3307
+ if (rejectKeywords.test(context)) {
3308
+ return false;
3309
+ }
3310
+ if (!/^0x[a-fA-F0-9]{40}$/.test(cleaned)) {
3311
+ return false;
3312
+ }
3313
+ return true;
2708
3314
  }
2709
3315
  };
2710
3316
  var NEAR_ADDRESS = {
@@ -2715,8 +3321,20 @@ var NEAR_ADDRESS = {
2715
3321
  severity: "high",
2716
3322
  description: "Near Protocol (NEAR) address",
2717
3323
  validator: (value, context) => {
2718
- if (!value.toLowerCase().endsWith(".near")) return false;
2719
- return /near|protocol|crypto|wallet|blockchain|address/i.test(context);
3324
+ const cleaned = value.replace(/[\s\u00A0]/g, "").toLowerCase();
3325
+ if (!cleaned.endsWith(".near")) return false;
3326
+ const accountName = cleaned.slice(0, -5);
3327
+ if (accountName.length < 2 || accountName.length > 64) return false;
3328
+ if (!/^[a-z0-9_-]+$/.test(accountName)) return false;
3329
+ const cryptoKeywords = /near|protocol|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
3330
+ if (!cryptoKeywords.test(context)) {
3331
+ return false;
3332
+ }
3333
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
3334
+ if (rejectKeywords.test(context)) {
3335
+ return false;
3336
+ }
3337
+ return true;
2720
3338
  }
2721
3339
  };
2722
3340
  var cryptoExtendedPatterns = [
@@ -2872,15 +3490,20 @@ var BIOBANK_SAMPLE_ID = {
2872
3490
  };
2873
3491
  var PROVIDER_LICENSE = {
2874
3492
  type: "PROVIDER_LICENSE",
2875
- regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s]?(?:LICENSE|LICENCE|LIC)[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*([A-Z0-9]{6,12})\b/gi,
3493
+ regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s\u00A0]*(?:LICENSE|LICENCE|LIC)[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*((?:[A-Z0-9]{2,6}[\s\u00A0./-]?){1,3}[A-Z0-9]{2,6})\b/gi,
2876
3494
  placeholder: "[PROVIDER_LIC_{n}]",
2877
3495
  priority: 80,
2878
3496
  severity: "high",
2879
- description: "Healthcare provider license numbers"
3497
+ description: "Healthcare provider license numbers",
3498
+ validator: (value) => {
3499
+ const normalized = value.replace(/[^A-Za-z0-9]/g, "");
3500
+ if (normalized.length < 6 || normalized.length > 18) return false;
3501
+ return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
3502
+ }
2880
3503
  };
2881
3504
  var NPI_NUMBER = {
2882
3505
  type: "NPI_NUMBER",
2883
- regex: /\b(?:NPI[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*)?(\d{10})\b/g,
3506
+ regex: /\b(?:NPI[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?((?:\d[\s\u00A0.-]?){10})\b/g,
2884
3507
  placeholder: "[NPI_{n}]",
2885
3508
  priority: 85,
2886
3509
  severity: "high",
@@ -2889,7 +3512,8 @@ var NPI_NUMBER = {
2889
3512
  if (!/provider|npi|physician|doctor|clinic|hospital|practice/i.test(context)) {
2890
3513
  return false;
2891
3514
  }
2892
- const digits = value.split("").map(Number);
3515
+ const digits = value.replace(/\D/g, "").split("").map(Number);
3516
+ if (digits.length !== 10) return false;
2893
3517
  let sum = 0;
2894
3518
  for (let i = digits.length - 2; i >= 0; i--) {
2895
3519
  let digit = digits[i];
@@ -2905,17 +3529,19 @@ var NPI_NUMBER = {
2905
3529
  };
2906
3530
  var DEA_NUMBER = {
2907
3531
  type: "DEA_NUMBER",
2908
- regex: /\b(?:DEA[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*)?([A-Z]{2}\d{7})\b/gi,
3532
+ regex: /\b(?:DEA[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?([A-Z]{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
2909
3533
  placeholder: "[DEA_{n}]",
2910
3534
  priority: 90,
2911
3535
  severity: "high",
2912
3536
  description: "DEA registration number for controlled substances",
2913
3537
  validator: (value, _context) => {
3538
+ const normalized = value.replace(/[^A-Za-z0-9]/g, "").toUpperCase();
3539
+ if (normalized.length !== 9) return false;
2914
3540
  const validFirstLetters = ["A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "P", "R", "S", "T", "U"];
2915
- if (!validFirstLetters.includes(value[0].toUpperCase())) {
3541
+ if (!validFirstLetters.includes(normalized[0])) {
2916
3542
  return false;
2917
3543
  }
2918
- const digits = value.substring(2).split("").map(Number);
3544
+ const digits = normalized.substring(2).split("").map(Number);
2919
3545
  const sum1 = digits[0] + digits[2] + digits[4];
2920
3546
  const sum2 = (digits[1] + digits[3] + digits[5]) * 2;
2921
3547
  const checkDigit = (sum1 + sum2) % 10;
@@ -2940,11 +3566,16 @@ var EMERGENCY_CONTACT_MARKER = {
2940
3566
  };
2941
3567
  var BIOMETRIC_ID = {
2942
3568
  type: "BIOMETRIC_ID",
2943
- regex: /\b(?:FINGERPRINT|RETINAL?[-\s]?SCAN|IRIS[-\s]?SCAN|VOICE[-\s]?PRINT|FACIAL[-\s]?RECOGNITION|BIOMETRIC)[-\s]?(?:ID|DATA|TEMPLATE|HASH)?[-\s]?[:#]?\s*([A-Z0-9]{8,40})\b/gi,
3569
+ regex: /\b(?:FINGERPRINT|RETINAL?[-\s\u00A0]?SCAN|IRIS[-\s\u00A0]?SCAN|VOICE[-\s\u00A0]?PRINT|FACIAL[-\s\u00A0]?RECOGNITION|BIOMETRIC)[-\s\u00A0]?(?:ID|DATA|TEMPLATE|HASH)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9._-]{7,39})\b/gi,
2944
3570
  placeholder: "[BIOMETRIC_{n}]",
2945
3571
  priority: 95,
2946
3572
  severity: "high",
2947
- description: "Biometric identifier references"
3573
+ description: "Biometric identifier references",
3574
+ validator: (value) => {
3575
+ const normalized = value.replace(/[^A-Za-z0-9]/g, "");
3576
+ if (normalized.length < 8 || normalized.length > 40) return false;
3577
+ return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
3578
+ }
2948
3579
  };
2949
3580
  var DNA_SEQUENCE = {
2950
3581
  type: "DNA_SEQUENCE",
@@ -2973,7 +3604,7 @@ var DRUG_DOSAGE = {
2973
3604
  };
2974
3605
  var MEDICAL_IMAGE_REF = {
2975
3606
  type: "MEDICAL_IMAGE_REF",
2976
- regex: /\b(?:X[-\s]?RAY|MRI|CT[-\s]?SCAN|PET[-\s]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s]?(?:IMAGE|FILE|ID)?[-\s]?[:#]?\s*([A-Z0-9]{6,20})\b/gi,
3607
+ regex: /\b(?:X[-\s\u00A0]?RAY|MRI|CT[-\s\u00A0]?SCAN|PET[-\s\u00A0]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s\u00A0]?(?:IMAGE|FILE|ID)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9_.-]{5,23})\b/gi,
2977
3608
  placeholder: "[IMAGE_{n}]",
2978
3609
  priority: 80,
2979
3610
  severity: "high",
@@ -3121,9 +3752,11 @@ var SWIFT_BIC = {
3121
3752
  severity: "high",
3122
3753
  description: "SWIFT/BIC codes for international transfers",
3123
3754
  validator: (value, context) => {
3755
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
3124
3756
  const financialContext = /swift|bic|bank|transfer|wire|international|payment/i.test(context);
3125
- const validLength = value.length === 8 || value.length === 11;
3126
- return financialContext && validLength;
3757
+ const validLength = cleaned.length === 8 || cleaned.length === 11;
3758
+ const validFormat = /^[A-Z]{6}[A-Z0-9]{2}([A-Z0-9]{3})?$/.test(cleaned);
3759
+ return financialContext && validLength && validFormat;
3127
3760
  }
3128
3761
  };
3129
3762
  var TRANSACTION_ID = {
@@ -3136,11 +3769,18 @@ var TRANSACTION_ID = {
3136
3769
  };
3137
3770
  var INVESTMENT_ACCOUNT = {
3138
3771
  type: "INVESTMENT_ACCOUNT",
3139
- regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s]?(?:ACCOUNT|ACCT|A\/C)?[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*([A-Z0-9]{6,15})\b/gi,
3772
+ regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s\u00A0]*(?:ACCOUNT|ACCT|A\/C)?[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,18}[A-Z0-9])\b/gi,
3140
3773
  placeholder: "[INV_ACCT_{n}]",
3141
3774
  priority: 85,
3142
3775
  severity: "high",
3143
- description: "Investment and pension account numbers"
3776
+ description: "Investment and pension account numbers",
3777
+ validator: (value, context) => {
3778
+ const normalized = value.replace(/[\s\u00A0./-]/g, "");
3779
+ const hasDigits = /\d{4,}/.test(normalized);
3780
+ const validLength = normalized.length >= 6 && normalized.length <= 15;
3781
+ const inContext = /isa|sipp|invest|pension|401k|ira|account|fund/i.test(context);
3782
+ return hasDigits && validLength && inContext;
3783
+ }
3144
3784
  };
3145
3785
  var WIRE_TRANSFER_REF = {
3146
3786
  type: "WIRE_TRANSFER_REF",
@@ -3414,22 +4054,60 @@ var TERMINAL_ID = {
3414
4054
  };
3415
4055
  var UK_BANK_ACCOUNT_IBAN = {
3416
4056
  type: "UK_BANK_ACCOUNT_IBAN",
3417
- regex: /\b(GB\d{2}[A-Z]{4}\d{14})\b/g,
4057
+ regex: /\b(GB\d{2}[\s\u00A0.-]?[A-Z]{4}[\s\u00A0.-]?\d{14})\b/gi,
3418
4058
  placeholder: "[UK_IBAN_{n}]",
3419
4059
  priority: 95,
3420
4060
  severity: "high",
3421
4061
  description: "UK bank account numbers in IBAN format",
3422
- validator: (value) => {
3423
- return value.startsWith("GB") && value.length === 22;
4062
+ validator: (value, context) => {
4063
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
4064
+ if (!cleaned.startsWith("GB") || cleaned.length !== 22) {
4065
+ return false;
4066
+ }
4067
+ if (!validateIBAN(cleaned)) {
4068
+ return false;
4069
+ }
4070
+ const bankingKeywords = /iban|account|bank|uk|gb|financial|payment|transfer/i;
4071
+ if (!bankingKeywords.test(context)) {
4072
+ return false;
4073
+ }
4074
+ const rejectKeywords = /example\s+iban|test\s+iban|sample\s+iban|demo\s+iban|fake\s+iban/i;
4075
+ if (rejectKeywords.test(context)) {
4076
+ return false;
4077
+ }
4078
+ return true;
3424
4079
  }
3425
4080
  };
3426
4081
  var UK_SORT_CODE_ACCOUNT = {
3427
4082
  type: "UK_SORT_CODE_ACCOUNT",
3428
- regex: /\b(\d{2}[-]\d{2}[-]\d{2}\s?\d{8})\b/g,
4083
+ regex: /\b(\d{2}[\s\u00A0-]?\d{2}[\s\u00A0-]?\d{2}[\s\u00A0]?\d{8})\b/g,
3429
4084
  placeholder: "[UK_ACCOUNT_{n}]",
3430
4085
  priority: 95,
3431
4086
  severity: "high",
3432
- description: "UK sort code and account number combination"
4087
+ description: "UK sort code and account number combination",
4088
+ validator: (value, context) => {
4089
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
4090
+ if (!/^\d{14}$/.test(cleaned)) {
4091
+ return false;
4092
+ }
4093
+ const sortCode = cleaned.substring(0, 6);
4094
+ const accountNumber = cleaned.substring(6);
4095
+ if (accountNumber.length !== 8) {
4096
+ return false;
4097
+ }
4098
+ if (!validateSortCode(sortCode)) {
4099
+ return false;
4100
+ }
4101
+ const bankingKeywords = /sort\s+code|account|bank|uk|gb|financial|payment|transfer/i;
4102
+ if (!bankingKeywords.test(context)) {
4103
+ return false;
4104
+ }
4105
+ const rejectKeywords = /example\s+account|test\s+account|sample\s+account|demo\s+account|fake\s+account/i;
4106
+ if (rejectKeywords.test(context)) {
4107
+ return false;
4108
+ }
4109
+ return true;
4110
+ }
3433
4111
  };
3434
4112
  var financialPatterns2 = [
3435
4113
  SWIFT_BIC,
@@ -4274,13 +4952,17 @@ var RESUME_ID = {
4274
4952
  };
4275
4953
  var BENEFITS_PLAN_NUMBER = {
4276
4954
  type: "BENEFITS_PLAN_NUMBER",
4277
- regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s]?PLAN)[-\s]?(?:PLAN)?[-\s]?(?:NO|NUM(?:BER)?|ID)?[-\s]?[:#]?\s*([A-Z0-9]{6,14})\b/gi,
4955
+ regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s\u00A0]?PLAN)[-\s\u00A0]*(?:PLAN)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
4278
4956
  placeholder: "[BENEFITS_{n}]",
4279
4957
  priority: 85,
4280
4958
  severity: "high",
4281
4959
  description: "Employee benefits and insurance plan numbers",
4282
- validator: (_value, context) => {
4283
- return /benefit|insurance|health|dental|vision|plan|policy|enrollment/i.test(context);
4960
+ validator: (value, context) => {
4961
+ const normalized = value.replace(/[\s\u00A0./-]/g, "");
4962
+ const hasDigits = /\d{4,}/.test(normalized);
4963
+ const validLength = normalized.length >= 6 && normalized.length <= 14;
4964
+ const inContext = /benefit|insurance|health|dental|vision|plan|policy|enrollment/i.test(context);
4965
+ return hasDigits && validLength && inContext;
4284
4966
  }
4285
4967
  };
4286
4968
  var RETIREMENT_ACCOUNT = {
@@ -4378,13 +5060,16 @@ var EXIT_INTERVIEW_ID = {
4378
5060
  };
4379
5061
  var DISCIPLINARY_ACTION_ID = {
4380
5062
  type: "DISCIPLINARY_ACTION_ID",
4381
- regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s]?(?:ACTION)?[-\s]?(?:NO|NUM(?:BER)?|ID)?[-\s]?[:#]?\s*([A-Z0-9]{6,12})\b/gi,
5063
+ regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s\u00A0]*(?:ACTION)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
4382
5064
  placeholder: "[DISCIPLINE_{n}]",
4383
5065
  priority: 85,
4384
5066
  severity: "high",
4385
5067
  description: "Disciplinary action and incident identifiers",
4386
- validator: (_value, context) => {
4387
- return /disciplinary|incident|warning|violation|misconduct|investigation/i.test(context);
5068
+ validator: (value, context) => {
5069
+ const normalized = value.replace(/[\s\u00A0./-]/g, "");
5070
+ const hasDigits = /\d{3,}/.test(normalized);
5071
+ const validLength = normalized.length >= 6 && normalized.length <= 12;
5072
+ return hasDigits && validLength && /disciplinary|incident|warning|violation|misconduct|investigation/i.test(context);
4388
5073
  }
4389
5074
  };
4390
5075
  var EMERGENCY_CONTACT_REF = {
@@ -4712,7 +5397,7 @@ var TELECOMS_ACCOUNT_NUMBER = {
4712
5397
  type: "TELECOMS_ACCOUNT_NUMBER",
4713
5398
  regex: /\bACC(?:OUNT)?[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*(\d{8,12})\b/gi,
4714
5399
  placeholder: "[ACCOUNT_{n}]",
4715
- priority: 85,
5400
+ priority: 90,
4716
5401
  severity: "high",
4717
5402
  description: "Telecommunications customer account numbers",
4718
5403
  validator: (_value, context) => {
@@ -5590,7 +6275,7 @@ var EMERGENCY_CALL_REF = {
5590
6275
  };
5591
6276
  var POLICE_REPORT_NUMBER = {
5592
6277
  type: "POLICE_REPORT_NUMBER",
5593
- regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s]?(?:NO|NUM|NUMBER|ID)?[-\s]?[:#]?\s*(\d{4}[-\s]?\d{5,10}|[A-Z]{2,4}[-\s]?\d{6,10})\b/gi,
6278
+ regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s\u00A0]*(?:NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{5,10})\b/gi,
5594
6279
  placeholder: "[POLICE_RPT_{n}]",
5595
6280
  priority: 95,
5596
6281
  severity: "high",
@@ -5601,7 +6286,7 @@ var POLICE_REPORT_NUMBER = {
5601
6286
  };
5602
6287
  var FIRE_INCIDENT_NUMBER = {
5603
6288
  type: "FIRE_INCIDENT_NUMBER",
5604
- regex: /\b(?:FIRE|FI|FD)[-\s]?(?:INCIDENT|INC|NO|NUM|NUMBER|ID)?[-\s]?[:#]?\s*(\d{4}[-\s]?\d{4,8}|[A-Z]{2,4}[-\s]?\d{5,10})\b/gi,
6289
+ regex: /\b(?:FIRE|FI|FD)[-\s\u00A0]*(?:INCIDENT|INC|NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{4,8})\b/gi,
5605
6290
  placeholder: "[FIRE_INC_{n}]",
5606
6291
  priority: 95,
5607
6292
  severity: "high",
@@ -6466,13 +7151,15 @@ var gamingPatterns = [
6466
7151
  // src/patterns/industries/vehicles.ts
6467
7152
  var VIN_NUMBER = {
6468
7153
  type: "VIN_NUMBER",
6469
- regex: /\bVIN[-\s]?(?:NO|NUM|NUMBER)?[-\s]?[:#]?\s*([A-HJ-NPR-Z0-9]{17})\b/gi,
7154
+ regex: /\bVIN[-\s\u00A0]?(?:NO|NUM|NUMBER)?[-\s\u00A0]?[:#]?\s*([A-HJ-NPR-Z0-9]{17})\b/gi,
6470
7155
  placeholder: "[VIN_{n}]",
6471
7156
  priority: 85,
6472
7157
  severity: "medium",
6473
7158
  description: "Vehicle Identification Number (VIN)",
6474
7159
  validator: (value, context) => {
6475
- if (/[IOQ]/i.test(value)) return false;
7160
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
7161
+ if (cleaned.length !== 17) return false;
7162
+ if (/[IOQ]/.test(cleaned)) return false;
6476
7163
  return /vin|vehicle|car|auto|motor|registration|title|insurance/i.test(context);
6477
7164
  }
6478
7165
  };
@@ -8774,9 +9461,11 @@ var GERMAN_TAX_ID = {
8774
9461
  severity: "high",
8775
9462
  description: "German Tax Identification Number (Steueridentifikationsnummer)",
8776
9463
  validator: (value, context) => {
9464
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
9465
+ if (!/^\d{11}$/.test(cleaned)) return false;
8777
9466
  const relevantContext = /steuer|tax|german|deutschland|finanzamt/i.test(context);
8778
9467
  if (!relevantContext) return false;
8779
- const digits = value.split("").map(Number);
9468
+ const digits = cleaned.split("").map(Number);
8780
9469
  const digitCounts = /* @__PURE__ */ new Map();
8781
9470
  digits.forEach((d) => digitCounts.set(d, (digitCounts.get(d) || 0) + 1));
8782
9471
  const counts = Array.from(digitCounts.values());
@@ -8935,9 +9624,11 @@ var DUTCH_BSN = {
8935
9624
  severity: "high",
8936
9625
  description: "Dutch Citizen Service Number (BSN)",
8937
9626
  validator: (value, context) => {
9627
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
9628
+ if (!/^\d{9}$/.test(cleaned)) return false;
8938
9629
  const relevantContext = /bsn|dutch|netherlands|nederland|burger/i.test(context);
8939
9630
  if (!relevantContext) return false;
8940
- const digits = value.split("").map(Number);
9631
+ const digits = cleaned.split("").map(Number);
8941
9632
  let sum = 0;
8942
9633
  for (let i = 0; i < 8; i++) {
8943
9634
  sum += digits[i] * (9 - i);
@@ -8954,10 +9645,12 @@ var POLISH_PESEL = {
8954
9645
  severity: "high",
8955
9646
  description: "Polish National Identification Number (PESEL)",
8956
9647
  validator: (value, context) => {
9648
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
9649
+ if (!/^\d{11}$/.test(cleaned)) return false;
8957
9650
  const relevantContext = /pesel|polish|poland|polska/i.test(context);
8958
9651
  if (!relevantContext) return false;
8959
9652
  const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
8960
- const digits = value.split("").map(Number);
9653
+ const digits = cleaned.split("").map(Number);
8961
9654
  let sum = 0;
8962
9655
  for (let i = 0; i < 10; i++) {
8963
9656
  sum += digits[i] * weights[i];
@@ -9291,7 +9984,8 @@ var DISCORD_USER_ID = {
9291
9984
  severity: "medium",
9292
9985
  description: "Discord user ID (Snowflake format)",
9293
9986
  validator: (value, context) => {
9294
- if (value.length < 17 || value.length > 19) return false;
9987
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
9988
+ if (cleaned.length < 17 || cleaned.length > 19) return false;
9295
9989
  return /discord|snowflake|user[-_]?id|server|guild/i.test(context);
9296
9990
  }
9297
9991
  };
@@ -9303,7 +9997,8 @@ var STEAM_ID64 = {
9303
9997
  severity: "medium",
9304
9998
  description: "Steam 64-bit user ID",
9305
9999
  validator: (value, context) => {
9306
- if (!value.startsWith("765") || value.length !== 17) return false;
10000
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
10001
+ if (!cleaned.startsWith("765") || cleaned.length !== 17) return false;
9307
10002
  return /steam|gaming|player|profile|valve|community/i.test(context);
9308
10003
  }
9309
10004
  };
@@ -9436,13 +10131,14 @@ var NINTENDO_FRIEND_CODE = {
9436
10131
  type: "NINTENDO_FRIEND_CODE",
9437
10132
  regex: /\bSW[-\s]?(\d{4}[-\s]?\d{4}[-\s]?\d{4})\b/gi,
9438
10133
  placeholder: "[NINTENDO_FC_{n}]",
9439
- priority: 85,
10134
+ priority: 90,
9440
10135
  severity: "medium",
9441
10136
  description: "Nintendo Switch Friend Code",
9442
10137
  validator: (value, context) => {
9443
10138
  const digits = value.replace(/\D/g, "");
9444
10139
  if (digits.length !== 12) return false;
9445
- return /nintendo|switch|friend[- ]?code|gaming/i.test(context);
10140
+ const hasContext = /nintendo|switch|friend[- ]?code|gaming/i.test(context);
10141
+ return hasContext;
9446
10142
  }
9447
10143
  };
9448
10144
  var BATTLETAG = {
@@ -9737,14 +10433,64 @@ var ccpaPreset = {
9737
10433
  "USERNAME"
9738
10434
  ]
9739
10435
  };
10436
+ var healthcarePreset = {
10437
+ includeNames: true,
10438
+ includeEmails: true,
10439
+ includePhones: true,
10440
+ includeAddresses: true,
10441
+ categories: ["personal", "contact", "healthcare", "insurance", "government"]
10442
+ };
10443
+ var healthcareResearchPreset = {
10444
+ includeNames: true,
10445
+ includeEmails: true,
10446
+ includePhones: true,
10447
+ includeAddresses: true,
10448
+ categories: ["personal", "contact", "healthcare", "insurance", "government"]
10449
+ };
10450
+ var financePreset = {
10451
+ includeNames: true,
10452
+ includeEmails: true,
10453
+ includePhones: true,
10454
+ includeAddresses: true,
10455
+ categories: ["personal", "contact", "financial", "government", "network"]
10456
+ };
10457
+ var educationPreset = {
10458
+ includeNames: true,
10459
+ includeEmails: true,
10460
+ includePhones: true,
10461
+ includeAddresses: true,
10462
+ categories: ["personal", "contact", "education", "government", "network"]
10463
+ };
10464
+ var transportLogisticsPreset = {
10465
+ includeNames: true,
10466
+ includeEmails: true,
10467
+ includePhones: true,
10468
+ includeAddresses: true,
10469
+ categories: ["personal", "contact", "transportation", "logistics", "vehicles", "network"]
10470
+ };
9740
10471
  function getPreset(name) {
9741
- switch (name.toLowerCase()) {
10472
+ const presetName = name.toLowerCase();
10473
+ switch (presetName) {
9742
10474
  case "gdpr":
9743
10475
  return gdprPreset;
9744
10476
  case "hipaa":
9745
10477
  return hipaaPreset;
9746
10478
  case "ccpa":
9747
10479
  return ccpaPreset;
10480
+ case "healthcare":
10481
+ case "healthcare-provider":
10482
+ return healthcarePreset;
10483
+ case "healthcare-research":
10484
+ return healthcareResearchPreset;
10485
+ case "finance":
10486
+ case "financial-services":
10487
+ return financePreset;
10488
+ case "education":
10489
+ return educationPreset;
10490
+ case "transport-logistics":
10491
+ case "transportation":
10492
+ case "logistics":
10493
+ return transportLogisticsPreset;
9748
10494
  default:
9749
10495
  return {};
9750
10496
  }
@@ -10267,9 +11013,23 @@ var ConfigLoader = class {
10267
11013
  };
10268
11014
  }
10269
11015
  if (preset.startsWith("openredaction:")) {
10270
- const complianceType = preset.replace("openredaction:", "");
10271
- if (["gdpr", "hipaa", "ccpa"].includes(complianceType)) {
10272
- return { preset: complianceType };
11016
+ const presetName = preset.replace("openredaction:", "");
11017
+ const supportedPresets = [
11018
+ "gdpr",
11019
+ "hipaa",
11020
+ "ccpa",
11021
+ "healthcare",
11022
+ "healthcare-provider",
11023
+ "healthcare-research",
11024
+ "finance",
11025
+ "financial-services",
11026
+ "education",
11027
+ "transport-logistics",
11028
+ "transportation",
11029
+ "logistics"
11030
+ ];
11031
+ if (supportedPresets.includes(presetName)) {
11032
+ return { preset: presetName };
10273
11033
  }
10274
11034
  }
10275
11035
  return null;
@@ -10285,7 +11045,8 @@ var ConfigLoader = class {
10285
11045
  export default {
10286
11046
  // Extend built-in presets
10287
11047
  // Options: 'openredaction:recommended', 'openredaction:strict', 'openredaction:minimal'
10288
- // Or compliance: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa'
11048
+ // Or compliance/industry presets: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa',
11049
+ // 'openredaction:finance', 'openredaction:education', 'openredaction:healthcare', 'openredaction:transport-logistics'
10289
11050
  extends: ['openredaction:recommended'],
10290
11051
 
10291
11052
  // Detection options
@@ -11962,9 +12723,8 @@ var ExplainAPI = class {
11962
12723
  constructor(detector) {
11963
12724
  this.detector = detector;
11964
12725
  this.patterns = detector.getPatterns();
11965
- const testResult = detector.detect("Contact: admin@business.co.uk");
11966
- const hasConfidence = testResult.detections.length > 0 && testResult.detections[0].confidence !== void 0;
11967
12726
  const detectorOptions = detector.options;
12727
+ const hasConfidence = detectorOptions?.enableContextAnalysis || false;
11968
12728
  this.options = {
11969
12729
  enableContextAnalysis: hasConfidence,
11970
12730
  confidenceThreshold: detectorOptions?.confidenceThreshold || 0.5,
@@ -11976,7 +12736,7 @@ var ExplainAPI = class {
11976
12736
  /**
11977
12737
  * Explain why text was or wasn't detected as PII
11978
12738
  */
11979
- explain(text) {
12739
+ async explain(text) {
11980
12740
  const patternResults = [];
11981
12741
  const matchedPatterns = [];
11982
12742
  const unmatchedPatterns = [];
@@ -12066,7 +12826,8 @@ var ExplainAPI = class {
12066
12826
  patternResults.push(result);
12067
12827
  matchedPatterns.push(result);
12068
12828
  }
12069
- const detections = this.detector.detect(text).detections;
12829
+ const detectionResult = await this.detector.detect(text);
12830
+ const detections = detectionResult.detections;
12070
12831
  return {
12071
12832
  text,
12072
12833
  patternResults,
@@ -12085,7 +12846,7 @@ var ExplainAPI = class {
12085
12846
  /**
12086
12847
  * Explain a specific detection
12087
12848
  */
12088
- explainDetection(detection, text) {
12849
+ async explainDetection(detection, text) {
12089
12850
  const pattern = this.patterns.find((p) => p.type === detection.type);
12090
12851
  const reasoning = [];
12091
12852
  reasoning.push(`Detected as ${detection.type}`);
@@ -12116,13 +12877,15 @@ var ExplainAPI = class {
12116
12877
  detection,
12117
12878
  pattern,
12118
12879
  contextAnalysis,
12119
- reasoning
12880
+ reasoning,
12881
+ suggestions: []
12882
+ // Will be populated if needed
12120
12883
  };
12121
12884
  }
12122
12885
  /**
12123
12886
  * Suggest why text wasn't detected
12124
12887
  */
12125
- suggestWhy(text, expectedType) {
12888
+ async suggestWhy(text, expectedType) {
12126
12889
  const suggestions = [];
12127
12890
  const similarPatterns = [];
12128
12891
  const typePatterns = this.patterns.filter(
@@ -12140,7 +12903,7 @@ var ExplainAPI = class {
12140
12903
  similarPatterns.push(pattern);
12141
12904
  const value = match[1] !== void 0 ? match[1] : match[0];
12142
12905
  suggestions.push(`Pattern "${pattern.type}" matched value: "${value}"`);
12143
- const explanation = this.explain(text);
12906
+ const explanation = await this.explain(text);
12144
12907
  const filtered = explanation.filteredPatterns.find((r) => r.pattern.type === pattern.type);
12145
12908
  if (filtered && filtered.reason) {
12146
12909
  suggestions.push(`But was filtered: ${filtered.reason}`);
@@ -12170,9 +12933,9 @@ Example ${expectedType} pattern: ${examplePattern.regex.source.substring(0, 100)
12170
12933
  /**
12171
12934
  * Get debugging information for entire detection process
12172
12935
  */
12173
- debug(text) {
12936
+ async debug(text) {
12174
12937
  const start = performance.now();
12175
- const explanation = this.explain(text);
12938
+ const explanation = await this.explain(text);
12176
12939
  const duration = performance.now() - start;
12177
12940
  const enabledFeatures = [];
12178
12941
  if (this.options.enableContextAnalysis) {
@@ -13060,6 +13823,152 @@ function compileSafeRegex(pattern, flags) {
13060
13823
  return new RegExp(patternStr, finalFlags);
13061
13824
  }
13062
13825
 
13826
+ // src/utils/ai-assist.ts
13827
+ function getAIEndpoint(aiOptions) {
13828
+ if (!aiOptions?.enabled) {
13829
+ return null;
13830
+ }
13831
+ if (aiOptions.endpoint) {
13832
+ return aiOptions.endpoint;
13833
+ }
13834
+ if (typeof process !== "undefined" && process.env) {
13835
+ const envEndpoint = process.env.OPENREDACTION_AI_ENDPOINT;
13836
+ if (envEndpoint) {
13837
+ return envEndpoint;
13838
+ }
13839
+ }
13840
+ return null;
13841
+ }
13842
+ function isFetchAvailable() {
13843
+ return typeof fetch !== "undefined";
13844
+ }
13845
+ async function callAIDetect(text, endpoint, debug) {
13846
+ if (!isFetchAvailable()) {
13847
+ if (debug) {
13848
+ console.warn("[OpenRedaction] AI assist requires fetch API. Not available in this environment.");
13849
+ }
13850
+ return null;
13851
+ }
13852
+ try {
13853
+ const url = endpoint.endsWith("/ai-detect") ? endpoint : `${endpoint}/ai-detect`;
13854
+ if (debug) {
13855
+ console.log(`[OpenRedaction] Calling AI endpoint: ${url}`);
13856
+ }
13857
+ const response = await fetch(url, {
13858
+ method: "POST",
13859
+ headers: {
13860
+ "Content-Type": "application/json"
13861
+ },
13862
+ body: JSON.stringify({ text })
13863
+ });
13864
+ if (!response.ok) {
13865
+ if (debug) {
13866
+ const statusText = response.status === 429 ? "Rate limit exceeded (429)" : `${response.status}: ${response.statusText}`;
13867
+ console.warn(`[OpenRedaction] AI endpoint returned ${statusText}`);
13868
+ }
13869
+ return null;
13870
+ }
13871
+ const data = await response.json();
13872
+ if (!data.entities || !Array.isArray(data.entities)) {
13873
+ if (debug) {
13874
+ console.warn("[OpenRedaction] Invalid AI response format: missing entities array");
13875
+ }
13876
+ return null;
13877
+ }
13878
+ return data.entities;
13879
+ } catch (error) {
13880
+ if (debug) {
13881
+ console.warn(`[OpenRedaction] AI endpoint error: ${error instanceof Error ? error.message : "Unknown error"}`);
13882
+ }
13883
+ return null;
13884
+ }
13885
+ }
13886
+ function validateAIEntity(entity, textLength) {
13887
+ if (!entity.type || !entity.value || typeof entity.start !== "number" || typeof entity.end !== "number") {
13888
+ return false;
13889
+ }
13890
+ if (entity.start < 0 || entity.end < 0 || entity.start >= entity.end) {
13891
+ return false;
13892
+ }
13893
+ if (entity.start >= textLength || entity.end > textLength) {
13894
+ return false;
13895
+ }
13896
+ const actualValue = entity.value;
13897
+ if (actualValue.length !== entity.end - entity.start) {
13898
+ return false;
13899
+ }
13900
+ return true;
13901
+ }
13902
+ function detectionsOverlap(det1, det2) {
13903
+ const [start1, end1] = det1.position;
13904
+ const [start2, end2] = det2.position;
13905
+ const overlapStart = Math.max(start1, start2);
13906
+ const overlapEnd = Math.min(end1, end2);
13907
+ if (overlapStart >= overlapEnd) {
13908
+ return false;
13909
+ }
13910
+ const overlapLength = overlapEnd - overlapStart;
13911
+ const length1 = end1 - start1;
13912
+ const length2 = end2 - start2;
13913
+ const minLength = Math.min(length1, length2);
13914
+ return overlapLength > minLength * 0.5;
13915
+ }
13916
+ function convertAIEntityToDetection(entity, text) {
13917
+ if (!validateAIEntity(entity, text.length)) {
13918
+ return null;
13919
+ }
13920
+ const actualValue = text.substring(entity.start, entity.end);
13921
+ let type = entity.type.toUpperCase();
13922
+ if (type.includes("EMAIL") || type === "EMAIL_ADDRESS") {
13923
+ type = "EMAIL";
13924
+ } else if (type.includes("PHONE") || type === "PHONE_NUMBER") {
13925
+ type = "PHONE_US";
13926
+ } else if (type.includes("NAME") || type === "PERSON") {
13927
+ type = "NAME";
13928
+ } else if (type.includes("SSN") || type === "SOCIAL_SECURITY_NUMBER") {
13929
+ type = "SSN";
13930
+ } else if (type.includes("ADDRESS")) {
13931
+ type = "ADDRESS_STREET";
13932
+ }
13933
+ let severity = "medium";
13934
+ if (type === "SSN" || type === "CREDIT_CARD") {
13935
+ severity = "critical";
13936
+ } else if (type === "EMAIL" || type === "PHONE_US" || type === "NAME") {
13937
+ severity = "high";
13938
+ }
13939
+ return {
13940
+ type,
13941
+ value: actualValue,
13942
+ placeholder: `[${type}_${Math.random().toString(36).substring(2, 9)}]`,
13943
+ position: [entity.start, entity.end],
13944
+ severity,
13945
+ confidence: entity.confidence ?? 0.7
13946
+ // Default confidence for AI entities
13947
+ };
13948
+ }
13949
+ function mergeAIEntities(regexDetections, aiEntities, text) {
13950
+ const merged = [...regexDetections];
13951
+ const processedRanges = regexDetections.map((d) => d.position);
13952
+ for (const aiEntity of aiEntities) {
13953
+ const detection = convertAIEntityToDetection(aiEntity, text);
13954
+ if (!detection) {
13955
+ continue;
13956
+ }
13957
+ let hasOverlap = false;
13958
+ for (const regexDet of regexDetections) {
13959
+ if (detectionsOverlap(regexDet, detection)) {
13960
+ hasOverlap = true;
13961
+ break;
13962
+ }
13963
+ }
13964
+ if (!hasOverlap) {
13965
+ merged.push(detection);
13966
+ processedRanges.push(detection.position);
13967
+ }
13968
+ }
13969
+ return merged;
13970
+ }
13971
+
13063
13972
  // src/detector.ts
13064
13973
  var OpenRedaction = class _OpenRedaction {
13065
13974
  constructor(options = {}) {
@@ -13269,6 +14178,9 @@ var OpenRedaction = class _OpenRedaction {
13269
14178
  for (const pattern of this.patterns) {
13270
14179
  const regex = new RegExp(pattern.regex.source, pattern.regex.flags);
13271
14180
  this.compiledPatterns.set(pattern, regex);
14181
+ if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
14182
+ console.log(`[OpenRedaction] Compiled pattern '${pattern.type}': ${regex}`);
14183
+ }
13272
14184
  }
13273
14185
  if (this.options.debug) {
13274
14186
  console.log(`[OpenRedaction] Pre-compiled ${this.compiledPatterns.size} regex patterns`);
@@ -13288,12 +14200,18 @@ var OpenRedaction = class _OpenRedaction {
13288
14200
  }
13289
14201
  continue;
13290
14202
  }
14203
+ if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
14204
+ console.log(`[OpenRedaction] Processing pattern '${pattern.type}' with regex: ${regex}`);
14205
+ }
13291
14206
  let match;
13292
14207
  let matchCount = 0;
13293
14208
  const maxMatches = 1e4;
13294
14209
  regex.lastIndex = 0;
13295
14210
  try {
13296
14211
  while ((match = safeExec(regex, text, { timeout: this.options.regexTimeout })) !== null) {
14212
+ if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
14213
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' regex match found: '${match[0]}' at position ${match.index}`);
14214
+ }
13297
14215
  matchCount++;
13298
14216
  if (matchCount >= maxMatches) {
13299
14217
  if (this.options.debug) {
@@ -13314,12 +14232,18 @@ var OpenRedaction = class _OpenRedaction {
13314
14232
  endPos = startPos + value.length;
13315
14233
  }
13316
14234
  if (this.overlapsWithExisting(startPos, endPos, processedRanges)) {
14235
+ if (this.options.debug) {
14236
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' skipped due to overlap at ${startPos}-${endPos}`);
14237
+ }
13317
14238
  continue;
13318
14239
  }
13319
14240
  const contextStart = Math.max(0, startPos - 50);
13320
14241
  const contextEnd = Math.min(text.length, endPos + 50);
13321
14242
  const context = text.substring(contextStart, contextEnd);
13322
14243
  if (pattern.validator && !pattern.validator(value, context)) {
14244
+ if (this.options.debug) {
14245
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' validation failed for value: '${value}' with context: '${context.substring(0, 100)}...'`);
14246
+ }
13323
14247
  continue;
13324
14248
  }
13325
14249
  if (this.options.enableFalsePositiveFilter) {
@@ -13338,6 +14262,9 @@ var OpenRedaction = class _OpenRedaction {
13338
14262
  endPos
13339
14263
  );
13340
14264
  confidence = contextAnalysis.confidence;
14265
+ if (this.options.debug && confidence < this.options.confidenceThreshold) {
14266
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' failed context analysis. Value: '${value}', Confidence: ${confidence} < ${this.options.confidenceThreshold}`);
14267
+ }
13341
14268
  }
13342
14269
  if (this.contextRulesEngine) {
13343
14270
  const piiMatch = {
@@ -13363,6 +14290,9 @@ var OpenRedaction = class _OpenRedaction {
13363
14290
  continue;
13364
14291
  }
13365
14292
  const placeholder = this.generatePlaceholder(value, pattern);
14293
+ if (this.options.debug) {
14294
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' detected: '${value}' at position ${startPos}-${endPos}, confidence: ${confidence}`);
14295
+ }
13366
14296
  detections.push({
13367
14297
  type: pattern.type,
13368
14298
  value,
@@ -13423,8 +14353,9 @@ var OpenRedaction = class _OpenRedaction {
13423
14353
  }
13424
14354
  /**
13425
14355
  * Detect PII in text
14356
+ * Now async to support optional AI assist
13426
14357
  */
13427
- detect(text) {
14358
+ async detect(text) {
13428
14359
  if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) {
13429
14360
  throw new Error("[OpenRedaction] Permission denied: detection:detect required");
13430
14361
  }
@@ -13478,12 +14409,42 @@ var OpenRedaction = class _OpenRedaction {
13478
14409
  } else {
13479
14410
  detections = this.processPatterns(text, this.patterns, processedRanges);
13480
14411
  }
14412
+ if (this.options.ai?.enabled) {
14413
+ const aiEndpoint = getAIEndpoint(this.options.ai);
14414
+ if (aiEndpoint) {
14415
+ try {
14416
+ if (this.options.debug) {
14417
+ console.log("[OpenRedaction] AI assist enabled, calling AI endpoint...");
14418
+ }
14419
+ const aiEntities = await callAIDetect(text, aiEndpoint, this.options.debug);
14420
+ if (aiEntities && aiEntities.length > 0) {
14421
+ if (this.options.debug) {
14422
+ console.log(`[OpenRedaction] AI returned ${aiEntities.length} additional entities`);
14423
+ }
14424
+ detections = mergeAIEntities(detections, aiEntities, text);
14425
+ if (this.options.debug) {
14426
+ console.log(`[OpenRedaction] After AI merge: ${detections.length} total detections`);
14427
+ }
14428
+ } else if (this.options.debug) {
14429
+ console.log("[OpenRedaction] AI endpoint returned no additional entities");
14430
+ }
14431
+ } catch (error) {
14432
+ if (this.options.debug) {
14433
+ console.warn(`[OpenRedaction] AI assist failed, using regex-only: ${error instanceof Error ? error.message : "Unknown error"}`);
14434
+ }
14435
+ }
14436
+ } else if (this.options.debug) {
14437
+ console.warn("[OpenRedaction] AI assist enabled but no endpoint configured. Set ai.endpoint or OPENREDACTION_AI_ENDPOINT env var.");
14438
+ }
14439
+ }
13481
14440
  detections.sort((a, b) => b.position[0] - a.position[0]);
13482
14441
  let redacted = text;
13483
14442
  const redactionMap = {};
13484
14443
  for (const detection of detections) {
13485
- const [start, end] = detection.position;
13486
- redacted = redacted.substring(0, start) + detection.placeholder + redacted.substring(end);
14444
+ if (!detection.value) continue;
14445
+ const escapedValue = this.escapeRegex(detection.value);
14446
+ const pattern = new RegExp(escapedValue, "gi");
14447
+ redacted = redacted.replace(pattern, detection.placeholder);
13487
14448
  redactionMap[detection.placeholder] = detection.value;
13488
14449
  }
13489
14450
  const endTime = performance.now();
@@ -13636,8 +14597,8 @@ var OpenRedaction = class _OpenRedaction {
13636
14597
  /**
13637
14598
  * Get severity-based scan results
13638
14599
  */
13639
- scan(text) {
13640
- const result = this.detect(text);
14600
+ async scan(text) {
14601
+ const result = await this.detect(text);
13641
14602
  return {
13642
14603
  high: result.detections.filter((d) => d.severity === "high"),
13643
14604
  medium: result.detections.filter((d) => d.severity === "medium"),
@@ -13845,7 +14806,7 @@ var OpenRedaction = class _OpenRedaction {
13845
14806
  * Run health check
13846
14807
  */
13847
14808
  async healthCheck(options) {
13848
- const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-A5OD4ATR.mjs");
14809
+ const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-77BBYEEO.mjs");
13849
14810
  const checker = new HealthChecker2(this);
13850
14811
  return checker.check(options);
13851
14812
  }
@@ -13853,7 +14814,7 @@ var OpenRedaction = class _OpenRedaction {
13853
14814
  * Quick health check (minimal overhead)
13854
14815
  */
13855
14816
  async quickHealthCheck() {
13856
- const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-A5OD4ATR.mjs");
14817
+ const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-77BBYEEO.mjs");
13857
14818
  const checker = new HealthChecker2(this);
13858
14819
  return checker.quickCheck();
13859
14820
  }
@@ -13867,14 +14828,14 @@ var OpenRedaction = class _OpenRedaction {
13867
14828
  if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) {
13868
14829
  throw new Error("[OpenRedaction] Permission denied: detection:detect required");
13869
14830
  }
13870
- const { createDocumentProcessor: createDocumentProcessor2 } = await import("./document-AOMZP7UR.mjs");
14831
+ const { createDocumentProcessor: createDocumentProcessor2 } = await import("./document-NNFKTUEV.mjs");
13871
14832
  const processor = createDocumentProcessor2();
13872
14833
  const extractionStart = performance.now();
13873
14834
  const text = await processor.extractText(buffer, options);
13874
14835
  const metadata = await processor.getMetadata(buffer, options);
13875
14836
  const extractionEnd = performance.now();
13876
14837
  const extractionTime = Math.round((extractionEnd - extractionStart) * 100) / 100;
13877
- const detection = this.detect(text);
14838
+ const detection = await this.detect(text);
13878
14839
  return {
13879
14840
  text,
13880
14841
  metadata,
@@ -13968,7 +14929,7 @@ var StreamingDetector = class {
13968
14929
  const end = Math.min(textLength, position + chunkSize);
13969
14930
  const chunk = text.substring(start, end);
13970
14931
  const byteOffset = start;
13971
- const result = this.detector.detect(chunk);
14932
+ const result = await this.detector.detect(chunk);
13972
14933
  const newDetections = result.detections.filter((detection) => {
13973
14934
  const absoluteStart = byteOffset + detection.position[0];
13974
14935
  const absoluteEnd = byteOffset + detection.position[1];
@@ -13998,8 +14959,10 @@ var StreamingDetector = class {
13998
14959
  (a, b) => b.position[0] - a.position[0]
13999
14960
  );
14000
14961
  for (const detection of sortedDetections) {
14001
- const [start2, end2] = detection.position;
14002
- redactedChunk = redactedChunk.substring(0, start2) + detection.placeholder + redactedChunk.substring(end2);
14962
+ if (!detection.value) continue;
14963
+ const escapedValue = this.escapeRegex(detection.value);
14964
+ const pattern = new RegExp(escapedValue, "gi");
14965
+ redactedChunk = redactedChunk.replace(pattern, detection.placeholder);
14003
14966
  }
14004
14967
  }
14005
14968
  yield {
@@ -14025,8 +14988,10 @@ var StreamingDetector = class {
14025
14988
  allDetections.sort((a, b) => b.position[0] - a.position[0]);
14026
14989
  const redactionMap = {};
14027
14990
  for (const detection of allDetections) {
14028
- const [start, end] = detection.position;
14029
- redactedText = redactedText.substring(0, start) + detection.placeholder + redactedText.substring(end);
14991
+ if (!detection.value) continue;
14992
+ const escapedValue = this.escapeRegex(detection.value);
14993
+ const pattern = new RegExp(escapedValue, "gi");
14994
+ redactedText = redactedText.replace(pattern, detection.placeholder);
14030
14995
  redactionMap[detection.placeholder] = detection.value;
14031
14996
  }
14032
14997
  return {
@@ -14101,6 +15066,9 @@ var StreamingDetector = class {
14101
15066
  estimatedMemory
14102
15067
  };
14103
15068
  }
15069
+ escapeRegex(str) {
15070
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
15071
+ }
14104
15072
  };
14105
15073
  function createStreamingDetector(detector, options) {
14106
15074
  return new StreamingDetector(detector, options);
@@ -14114,11 +15082,11 @@ var BatchProcessor = class {
14114
15082
  /**
14115
15083
  * Process multiple documents sequentially
14116
15084
  */
14117
- processSequential(documents, options = {}) {
15085
+ async processSequential(documents, options = {}) {
14118
15086
  const startTime = performance.now();
14119
15087
  const results = [];
14120
15088
  for (let i = 0; i < documents.length; i++) {
14121
- const result = this.detector.detect(documents[i]);
15089
+ const result = await this.detector.detect(documents[i]);
14122
15090
  results.push(result);
14123
15091
  if (options.onProgress) {
14124
15092
  options.onProgress(i + 1, documents.length);
@@ -14132,7 +15100,7 @@ var BatchProcessor = class {
14132
15100
  totalDocuments: documents.length,
14133
15101
  totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
14134
15102
  totalTime,
14135
- avgTimePerDocument: totalTime / documents.length
15103
+ avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
14136
15104
  }
14137
15105
  };
14138
15106
  }
@@ -14146,16 +15114,14 @@ var BatchProcessor = class {
14146
15114
  let completed = 0;
14147
15115
  for (let i = 0; i < documents.length; i += maxConcurrency) {
14148
15116
  const batch = documents.slice(i, i + maxConcurrency);
14149
- const batchPromises = batch.map((doc, batchIndex) => {
14150
- return Promise.resolve().then(() => {
14151
- const result = this.detector.detect(doc);
14152
- results[i + batchIndex] = result;
14153
- completed++;
14154
- if (options.onProgress) {
14155
- options.onProgress(completed, documents.length);
14156
- }
14157
- return result;
14158
- });
15117
+ const batchPromises = batch.map(async (doc, batchIndex) => {
15118
+ const result = await this.detector.detect(doc);
15119
+ results[i + batchIndex] = result;
15120
+ completed++;
15121
+ if (options.onProgress) {
15122
+ options.onProgress(completed, documents.length);
15123
+ }
15124
+ return result;
14159
15125
  });
14160
15126
  await Promise.all(batchPromises);
14161
15127
  }
@@ -14167,7 +15133,7 @@ var BatchProcessor = class {
14167
15133
  totalDocuments: documents.length,
14168
15134
  totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
14169
15135
  totalTime,
14170
- avgTimePerDocument: totalTime / documents.length
15136
+ avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
14171
15137
  }
14172
15138
  };
14173
15139
  }
@@ -14178,7 +15144,7 @@ var BatchProcessor = class {
14178
15144
  if (options.parallel) {
14179
15145
  return this.processParallel(documents, options);
14180
15146
  } else {
14181
- return Promise.resolve(this.processSequential(documents, options));
15147
+ return this.processSequential(documents, options);
14182
15148
  }
14183
15149
  }
14184
15150
  /**
@@ -14189,7 +15155,7 @@ var BatchProcessor = class {
14189
15155
  for (let i = 0; i < documents.length; i += batchSize) {
14190
15156
  const batch = documents.slice(i, i + batchSize);
14191
15157
  for (const doc of batch) {
14192
- const result = this.detector.detect(doc);
15158
+ const result = await this.detector.detect(doc);
14193
15159
  yield result;
14194
15160
  }
14195
15161
  }
@@ -14237,7 +15203,7 @@ function openredactionMiddleware(options = {}) {
14237
15203
  ...detectorOptions
14238
15204
  } = options;
14239
15205
  const detector = new OpenRedaction(detectorOptions);
14240
- return (req, res, next) => {
15206
+ return async (req, res, next) => {
14241
15207
  if (skipRoutes.some((pattern) => pattern.test(req.path))) {
14242
15208
  return next();
14243
15209
  }
@@ -14257,7 +15223,7 @@ function openredactionMiddleware(options = {}) {
14257
15223
  const results = {};
14258
15224
  const redactedBody = { ...req.body };
14259
15225
  for (const { field, value } of textsToCheck) {
14260
- const result = detector.detect(value);
15226
+ const result = await detector.detect(value);
14261
15227
  if (result.detections.length > 0) {
14262
15228
  totalDetections += result.detections.length;
14263
15229
  results[field] = result;
@@ -14307,7 +15273,7 @@ function openredactionMiddleware(options = {}) {
14307
15273
  }
14308
15274
  function detectPII(options = {}) {
14309
15275
  const detector = new OpenRedaction(options);
14310
- return (req, res) => {
15276
+ return async (req, res) => {
14311
15277
  const text = req.body?.text || req.query.text;
14312
15278
  if (!text) {
14313
15279
  res.status(400).json({
@@ -14316,19 +15282,26 @@ function detectPII(options = {}) {
14316
15282
  });
14317
15283
  return;
14318
15284
  }
14319
- const result = detector.detect(text);
14320
- res.json({
14321
- detected: result.detections.length > 0,
14322
- count: result.detections.length,
14323
- detections: result.detections,
14324
- redacted: result.redacted,
14325
- stats: result.stats
14326
- });
15285
+ try {
15286
+ const result = await detector.detect(text);
15287
+ res.json({
15288
+ detected: result.detections.length > 0,
15289
+ count: result.detections.length,
15290
+ detections: result.detections,
15291
+ redacted: result.redacted,
15292
+ stats: result.stats
15293
+ });
15294
+ } catch (error) {
15295
+ res.status(500).json({
15296
+ error: "Detection failed",
15297
+ message: error instanceof Error ? error.message : "Unknown error"
15298
+ });
15299
+ }
14327
15300
  };
14328
15301
  }
14329
15302
  function generateReport(options = {}) {
14330
15303
  const detector = new OpenRedaction(options);
14331
- return (req, res) => {
15304
+ return async (req, res) => {
14332
15305
  const text = req.body?.text;
14333
15306
  const format = req.body?.format || req.query.format || "json";
14334
15307
  if (!text) {
@@ -14337,28 +15310,35 @@ function generateReport(options = {}) {
14337
15310
  });
14338
15311
  return;
14339
15312
  }
14340
- const result = detector.detect(text);
14341
- if (format === "html") {
14342
- const html = detector.generateReport(result, {
14343
- format: "html",
14344
- title: req.body?.title || "PII Detection Report"
14345
- });
14346
- res.setHeader("Content-Type", "text/html");
14347
- res.send(html);
14348
- } else if (format === "markdown") {
14349
- const md = detector.generateReport(result, {
14350
- format: "markdown",
14351
- title: req.body?.title || "PII Detection Report"
14352
- });
14353
- res.setHeader("Content-Type", "text/markdown");
14354
- res.send(md);
14355
- } else {
14356
- res.json({
14357
- detected: result.detections.length > 0,
14358
- count: result.detections.length,
14359
- detections: result.detections,
14360
- redacted: result.redacted,
14361
- stats: result.stats
15313
+ try {
15314
+ const result = await detector.detect(text);
15315
+ if (format === "html") {
15316
+ const html = detector.generateReport(result, {
15317
+ format: "html",
15318
+ title: req.body?.title || "PII Detection Report"
15319
+ });
15320
+ res.setHeader("Content-Type", "text/html");
15321
+ res.send(html);
15322
+ } else if (format === "markdown") {
15323
+ const md = detector.generateReport(result, {
15324
+ format: "markdown",
15325
+ title: req.body?.title || "PII Detection Report"
15326
+ });
15327
+ res.setHeader("Content-Type", "text/markdown");
15328
+ res.send(md);
15329
+ } else {
15330
+ res.json({
15331
+ detected: result.detections.length > 0,
15332
+ count: result.detections.length,
15333
+ detections: result.detections,
15334
+ redacted: result.redacted,
15335
+ stats: result.stats
15336
+ });
15337
+ }
15338
+ } catch (error) {
15339
+ res.status(500).json({
15340
+ error: "Report generation failed",
15341
+ message: error instanceof Error ? error.message : "Unknown error"
14362
15342
  });
14363
15343
  }
14364
15344
  };
@@ -14370,12 +15350,17 @@ function useOpenRedaction(options) {
14370
15350
  const detector = useMemo(() => new OpenRedaction(options), [options]);
14371
15351
  const [result, setResult] = useState(null);
14372
15352
  const [isDetecting, setIsDetecting] = useState(false);
14373
- const detect = useCallback((text) => {
15353
+ const detect = useCallback(async (text) => {
14374
15354
  setIsDetecting(true);
14375
- const detection = detector.detect(text);
14376
- setResult(detection);
14377
- setIsDetecting(false);
14378
- return detection;
15355
+ try {
15356
+ const detection = await detector.detect(text);
15357
+ setResult(detection);
15358
+ setIsDetecting(false);
15359
+ return detection;
15360
+ } catch (error) {
15361
+ setIsDetecting(false);
15362
+ throw error;
15363
+ }
14379
15364
  }, [detector]);
14380
15365
  const clear = useCallback(() => {
14381
15366
  setResult(null);
@@ -14401,10 +15386,14 @@ function usePIIDetector(text, options) {
14401
15386
  return;
14402
15387
  }
14403
15388
  setIsDetecting(true);
14404
- const timer = setTimeout(() => {
14405
- const detection = detector.detect(text);
14406
- setResult(detection);
14407
- setIsDetecting(false);
15389
+ const timer = setTimeout(async () => {
15390
+ try {
15391
+ const detection = await detector.detect(text);
15392
+ setResult(detection);
15393
+ setIsDetecting(false);
15394
+ } catch (error) {
15395
+ setIsDetecting(false);
15396
+ }
14408
15397
  }, debounce);
14409
15398
  return () => {
14410
15399
  clearTimeout(timer);
@@ -14425,27 +15414,32 @@ function useFormFieldValidator(options) {
14425
15414
  const [value, setValue] = useState("");
14426
15415
  const [error, setError] = useState(null);
14427
15416
  const [result, setResult] = useState(null);
14428
- const validate = useCallback((inputValue) => {
15417
+ const validate = useCallback(async (inputValue) => {
14429
15418
  setValue(inputValue);
14430
15419
  if (!inputValue) {
14431
15420
  setError(null);
14432
15421
  setResult(null);
14433
15422
  return true;
14434
15423
  }
14435
- const detection = detector.detect(inputValue);
14436
- setResult(detection);
14437
- const relevantDetections = types.length > 0 ? detection.detections.filter((d) => types.includes(d.type)) : detection.detections;
14438
- if (relevantDetections.length > 0) {
14439
- if (failOnPII) {
14440
- setError(`Sensitive information detected: ${relevantDetections[0].type}`);
14441
- }
14442
- if (onPIIDetected) {
14443
- onPIIDetected(detection);
15424
+ try {
15425
+ const detection = await detector.detect(inputValue);
15426
+ setResult(detection);
15427
+ const relevantDetections = types.length > 0 ? detection.detections.filter((d) => types.includes(d.type)) : detection.detections;
15428
+ if (relevantDetections.length > 0) {
15429
+ if (failOnPII) {
15430
+ setError(`Sensitive information detected: ${relevantDetections[0].type}`);
15431
+ }
15432
+ if (onPIIDetected) {
15433
+ onPIIDetected(detection);
15434
+ }
15435
+ return false;
14444
15436
  }
15437
+ setError(null);
15438
+ return true;
15439
+ } catch (error2) {
15440
+ setError("Validation failed");
14445
15441
  return false;
14446
15442
  }
14447
- setError(null);
14448
- return true;
14449
15443
  }, [detector, failOnPII, types, onPIIDetected]);
14450
15444
  const getFieldProps = useCallback(() => ({
14451
15445
  value,
@@ -14472,7 +15466,7 @@ function useBatchDetector(options) {
14472
15466
  setProgress(0);
14473
15467
  const detections = [];
14474
15468
  for (let i = 0; i < texts.length; i++) {
14475
- const result = detector.detect(texts[i]);
15469
+ const result = await detector.detect(texts[i]);
14476
15470
  detections.push(result);
14477
15471
  setProgress((i + 1) / texts.length * 100);
14478
15472
  await new Promise((resolve) => setTimeout(resolve, 0));
@@ -14509,9 +15503,12 @@ function useAutoRedact(options) {
14509
15503
  setResult(null);
14510
15504
  return;
14511
15505
  }
14512
- const timer = setTimeout(() => {
14513
- const detection = detector.detect(text);
14514
- setResult(detection);
15506
+ const timer = setTimeout(async () => {
15507
+ try {
15508
+ const detection = await detector.detect(text);
15509
+ setResult(detection);
15510
+ } catch (error) {
15511
+ }
14515
15512
  }, debounce);
14516
15513
  return () => clearTimeout(timer);
14517
15514
  }, [text, detector, debounce]);
@@ -14640,7 +15637,7 @@ var TenantManager = class {
14640
15637
  await this.checkQuotas(tenantId, text);
14641
15638
  this.trackRequest(tenantId, text);
14642
15639
  const detector = this.getDetector(tenantId);
14643
- const result = detector.detect(text);
15640
+ const result = await detector.detect(text);
14644
15641
  const usage = this.usage.get(tenantId);
14645
15642
  usage.piiDetectedThisMonth += result.detections.length;
14646
15643
  usage.lastRequestAt = /* @__PURE__ */ new Date();
@@ -14927,6 +15924,7 @@ var DEFAULT_TIER_QUOTAS = {
14927
15924
  // src/webhooks/WebhookManager.ts
14928
15925
  var WebhookManager = class {
14929
15926
  // 1 minute
15927
+ // private readonly HALF_OPEN_MAX_REQUESTS = 1; // Reserved for future use
14930
15928
  constructor(options) {
14931
15929
  this.webhooks = /* @__PURE__ */ new Map();
14932
15930
  this.deliveryHistory = [];
@@ -15198,9 +16196,9 @@ var WebhookManager = class {
15198
16196
  */
15199
16197
  async makeHttpRequest(webhook, event) {
15200
16198
  try {
15201
- let fetch;
16199
+ let fetch2;
15202
16200
  try {
15203
- fetch = globalThis.fetch;
16201
+ fetch2 = globalThis.fetch;
15204
16202
  } catch {
15205
16203
  throw new Error("[WebhookManager] HTTP client not available. Requires Node 18+ with fetch support.");
15206
16204
  }
@@ -15220,7 +16218,7 @@ var WebhookManager = class {
15220
16218
  const controller = new AbortController();
15221
16219
  const timeoutId = setTimeout(() => controller.abort(), webhook.timeout);
15222
16220
  try {
15223
- const response = await fetch(webhook.url, {
16221
+ const response = await fetch2(webhook.url, {
15224
16222
  method: "POST",
15225
16223
  headers,
15226
16224
  body: JSON.stringify(event),
@@ -15591,7 +16589,7 @@ var APIServer = class {
15591
16589
  if (req.tenantId && this.config.tenantManager) {
15592
16590
  result = await this.config.tenantManager.detect(req.tenantId, text);
15593
16591
  } else if (this.detector) {
15594
- result = this.detector.detect(text);
16592
+ result = await this.detector.detect(text);
15595
16593
  } else {
15596
16594
  throw new Error("No detector available");
15597
16595
  }
@@ -15632,7 +16630,7 @@ var APIServer = class {
15632
16630
  if (req.tenantId && this.config.tenantManager) {
15633
16631
  result = await this.config.tenantManager.detect(req.tenantId, text);
15634
16632
  } else if (this.detector) {
15635
- result = this.detector.detect(text);
16633
+ result = await this.detector.detect(text);
15636
16634
  } else {
15637
16635
  throw new Error("No detector available");
15638
16636
  }
@@ -16053,10 +17051,12 @@ export {
16053
17051
  analyzeFullContext,
16054
17052
  calculateContextConfidence,
16055
17053
  calculateRisk,
17054
+ callAIDetect,
16056
17055
  ccpaPreset,
16057
17056
  commonFalsePositives,
16058
17057
  compileSafeRegex,
16059
17058
  contactPatterns,
17059
+ convertAIEntityToDetection,
16060
17060
  createAPIServer,
16061
17061
  createBatchProcessor,
16062
17062
  createCacheDisabledError,
@@ -16091,12 +17091,16 @@ export {
16091
17091
  createXlsxProcessor,
16092
17092
  defaultPasses,
16093
17093
  detectPII,
17094
+ detectionsOverlap,
17095
+ educationPreset,
16094
17096
  exportForVersionControl,
16095
17097
  extractContext,
16096
17098
  filterFalsePositives,
17099
+ financePreset,
16097
17100
  financialPatterns,
16098
17101
  gdprPreset,
16099
17102
  generateReport,
17103
+ getAIEndpoint,
16100
17104
  getPatternsByCategory,
16101
17105
  getPredefinedRole,
16102
17106
  getPreset,
@@ -16104,21 +17108,26 @@ export {
16104
17108
  governmentPatterns,
16105
17109
  groupPatternsByPass,
16106
17110
  healthCheckMiddleware,
17111
+ healthcarePreset,
17112
+ healthcareResearchPreset,
16107
17113
  hipaaPreset,
16108
17114
  inferDocumentType,
16109
17115
  isFalsePositive,
16110
17116
  isUnsafePattern,
17117
+ mergeAIEntities,
16111
17118
  mergePassDetections,
16112
17119
  networkPatterns,
16113
17120
  openredactionMiddleware,
16114
17121
  personalPatterns,
16115
17122
  safeExec,
16116
17123
  safeExecAll,
17124
+ transportLogisticsPreset,
16117
17125
  useAutoRedact,
16118
17126
  useBatchDetector,
16119
17127
  useFormFieldValidator,
16120
17128
  useOpenRedaction,
16121
17129
  usePIIDetector,
17130
+ validateAIEntity,
16122
17131
  validateEmail,
16123
17132
  validateIBAN,
16124
17133
  validateLuhn,