openredaction 1.0.0 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -2,7 +2,7 @@ import {
2
2
  HealthChecker,
3
3
  createHealthChecker,
4
4
  healthCheckMiddleware
5
- } from "./chunk-ZRHGDEPC.mjs";
5
+ } from "./chunk-CXYSQPG6.mjs";
6
6
  import {
7
7
  CsvProcessor,
8
8
  DocumentProcessor,
@@ -14,7 +14,7 @@ import {
14
14
  createJsonProcessor,
15
15
  createOCRProcessor,
16
16
  createXlsxProcessor
17
- } from "./chunk-7OGNW2MU.mjs";
17
+ } from "./chunk-XG7MSXCJ.mjs";
18
18
  import {
19
19
  WorkerPool,
20
20
  createWorkerPool
@@ -417,7 +417,7 @@ var PersistentAuditLogger = class {
417
417
  enableHashing: options.enableHashing ?? true,
418
418
  hashAlgorithm: options.hashAlgorithm ?? "sha256",
419
419
  enableWAL: options.enableWAL ?? true,
420
- secretKey: options.secretKey
420
+ secretKey: options.secretKey ?? void 0
421
421
  };
422
422
  this.adapter = this.createAdapter(options.database);
423
423
  }
@@ -763,7 +763,8 @@ var PersistentAuditLogger = class {
763
763
  * Start automatic cleanup schedule
764
764
  */
765
765
  startCleanupSchedule() {
766
- const intervalMs = (this.options.retention?.cleanupIntervalHours ?? 24) * 60 * 60 * 1e3;
766
+ const cleanupIntervalHours = this.options.retention?.cleanupIntervalHours ?? 24;
767
+ const intervalMs = cleanupIntervalHours * 60 * 60 * 1e3;
767
768
  this.cleanupTimer = setInterval(() => {
768
769
  this.runCleanup().catch((err) => {
769
770
  console.error("[PersistentAuditLogger] Cleanup failed:", err);
@@ -1769,7 +1770,7 @@ function validateLuhn(cardNumber, _context) {
1769
1770
  return sum % 10 === 0;
1770
1771
  }
1771
1772
  function validateIBAN(iban, _context) {
1772
- const cleaned = iban.replace(/\s/g, "").toUpperCase();
1773
+ const cleaned = iban.replace(/[\s\u00A0.-]/g, "").toUpperCase();
1773
1774
  if (!/^[A-Z]{2}[0-9]{2}[A-Z0-9]{1,30}$/.test(cleaned)) {
1774
1775
  return false;
1775
1776
  }
@@ -1864,7 +1865,7 @@ function mod97(string) {
1864
1865
  return remainder;
1865
1866
  }
1866
1867
  function validateNINO(nino, _context) {
1867
- const cleaned = nino.replace(/\s/g, "").toUpperCase();
1868
+ const cleaned = nino.replace(/[\s\u00A0.-]/g, "").toUpperCase();
1868
1869
  if (!/^[A-CEGHJ-PR-TW-Z]{2}[0-9]{6}[A-D]$/.test(cleaned)) {
1869
1870
  return false;
1870
1871
  }
@@ -1873,7 +1874,7 @@ function validateNINO(nino, _context) {
1873
1874
  return !invalidPrefixes.includes(prefix);
1874
1875
  }
1875
1876
  function validateNHS(nhs, _context) {
1876
- const cleaned = nhs.replace(/[\s-]/g, "");
1877
+ const cleaned = nhs.replace(/[\s\u00A0.-]/g, "");
1877
1878
  if (!/^\d{10}$/.test(cleaned)) {
1878
1879
  return false;
1879
1880
  }
@@ -1886,11 +1887,11 @@ function validateNHS(nhs, _context) {
1886
1887
  return expectedCheckDigit === parseInt(cleaned[9], 10) && checkDigit !== 10;
1887
1888
  }
1888
1889
  function validateUKPassport(passport, _context) {
1889
- const cleaned = passport.replace(/\s/g, "").toUpperCase();
1890
+ const cleaned = passport.replace(/[\s\u00A0.-]/g, "").toUpperCase();
1890
1891
  return /^\d{9}$/.test(cleaned) || /^\d{3}\d{6}$/.test(cleaned);
1891
1892
  }
1892
1893
  function validateSSN(ssn, _context) {
1893
- const cleaned = ssn.replace(/[\s-]/g, "");
1894
+ const cleaned = ssn.replace(/[\s\u00A0.-]/g, "");
1894
1895
  if (!/^\d{9}$/.test(cleaned)) {
1895
1896
  return false;
1896
1897
  }
@@ -1920,6 +1921,15 @@ function validateSortCode(sortCode, _context) {
1920
1921
  const cleaned = sortCode.replace(/[\s-]/g, "");
1921
1922
  return /^\d{6}$/.test(cleaned);
1922
1923
  }
1924
+ function validateRoutingNumber(routingNumber, _context) {
1925
+ const cleaned = routingNumber.replace(/[\s\u00A0.-]/g, "");
1926
+ if (!/^\d{9}$/.test(cleaned)) {
1927
+ return false;
1928
+ }
1929
+ const digits = cleaned.split("").map(Number);
1930
+ const checksum = (3 * (digits[0] + digits[3] + digits[6]) + 7 * (digits[1] + digits[4] + digits[7]) + (digits[2] + digits[5] + digits[8])) % 10;
1931
+ return checksum === 0;
1932
+ }
1923
1933
  function validateName(name, context) {
1924
1934
  const businessTerms = [
1925
1935
  "account",
@@ -1954,10 +1964,29 @@ function validateName(name, context) {
1954
1964
  "sir",
1955
1965
  "madam",
1956
1966
  "lord",
1957
- "lady"
1967
+ "lady",
1968
+ "personal",
1969
+ "sensitive",
1970
+ "information",
1971
+ "data",
1972
+ "details",
1973
+ "content",
1974
+ "document",
1975
+ "text",
1976
+ "example",
1977
+ "simple",
1978
+ "regular",
1979
+ "plain",
1980
+ "send",
1981
+ "reply",
1982
+ "reach",
1983
+ "write",
1984
+ "use",
1985
+ "contact",
1986
+ "message"
1958
1987
  ];
1959
1988
  const nameLower = name.toLowerCase();
1960
- if (businessTerms.some((term) => nameLower.includes(term))) {
1989
+ if (businessTerms.some((term) => nameLower === term || nameLower.includes(term))) {
1961
1990
  return false;
1962
1991
  }
1963
1992
  if (name === name.toUpperCase() && name.length <= 5) {
@@ -1967,7 +1996,7 @@ function validateName(name, context) {
1967
1996
  return false;
1968
1997
  }
1969
1998
  const contextLower = context.toLowerCase();
1970
- if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization")) {
1999
+ if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization") || contextLower.includes("without any") || contextLower.includes("simple text") || contextLower.includes("plain text")) {
1971
2000
  return false;
1972
2001
  }
1973
2002
  return true;
@@ -2000,11 +2029,13 @@ var personalPatterns = [
2000
2029
  },
2001
2030
  {
2002
2031
  type: "NAME",
2003
- regex: /\b(?:(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Sir|Madam|Lady|Lord|Rev|Father|Sister|Brother)\.?\s+)?([A-Z][a-z]+(?:-[A-Z][a-z]+)? (?:[A-Z][a-z]+(?:-[A-Z][a-z]+)? )?[A-Z][a-z]+(?:-[A-Z][a-z]+)?)(?:\s+(?:Jr|Sr|II|III|IV|PhD|MD|Esq|DDS|DVM|MBA|CPA)\.?)?\b/g,
2032
+ // Match names with case variations - handles "John Smith", "john smith", "JOHN SMITH", "Lucy jones", etc.
2033
+ // First word must start with uppercase or be all uppercase; subsequent words can be any case
2034
+ regex: /\b(?:(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Sir|Madam|Lady|Lord|Rev|Father|Sister|Brother)\.?\s+)?((?:[A-Z][a-z'’.\-]+|[A-Z]{2,})(?:\s+(?:[A-Z][a-z'’.\-]+|[A-Z]{2,}|[a-z][a-z'’.\-]+)){1,3})(?:\s+(?:Jr|Sr|II|III|IV|PhD|MD|Esq|DDS|DVM|MBA|CPA)\.?)?\b/g,
2004
2035
  priority: 50,
2005
2036
  validator: validateName,
2006
2037
  placeholder: "[NAME_{n}]",
2007
- description: "Person name with salutations/suffixes",
2038
+ description: "Person name with salutations/suffixes (handles case variations)",
2008
2039
  severity: "high"
2009
2040
  },
2010
2041
  {
@@ -2025,11 +2056,26 @@ var personalPatterns = [
2025
2056
  },
2026
2057
  {
2027
2058
  type: "DATE_OF_BIRTH",
2028
- regex: /\b(?:DOB|date of birth|birth ?date)[:\s]*(\d{1,2}[-\/]\d{1,2}[-\/]\d{2,4})\b/gi,
2059
+ regex: /\b(?:DOB|date of birth|birth ?date)[:\s-]*((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
2029
2060
  priority: 95,
2030
2061
  placeholder: "[DOB_{n}]",
2031
2062
  description: "Date of birth",
2032
2063
  severity: "high"
2064
+ },
2065
+ {
2066
+ type: "DATE",
2067
+ regex: /\b((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
2068
+ priority: 60,
2069
+ placeholder: "[DATE_{n}]",
2070
+ description: "Date (standalone, without DOB context)",
2071
+ severity: "medium",
2072
+ validator: (value, context) => {
2073
+ const yearPattern = /^(19|20)\d{2}$/;
2074
+ if (yearPattern.test(value.replace(/[\/\-.\s]/g, ""))) return false;
2075
+ const versionContext = /\b(version|v\d+|release|build|update)\s*[:\s]*/i;
2076
+ if (versionContext.test(context)) return false;
2077
+ return true;
2078
+ }
2033
2079
  }
2034
2080
  ];
2035
2081
 
@@ -2037,7 +2083,7 @@ var personalPatterns = [
2037
2083
  var financialPatterns = [
2038
2084
  {
2039
2085
  type: "CREDIT_CARD",
2040
- regex: /\b(?:(?:\d{4}[\s-]?){3}\d{4}|\d{4}[\s-]?\d{6}[\s-]?\d{5})\b/g,
2086
+ regex: /(?<!\d)(?:(?:\d{4}[\s\u00A0.-]?){3}\d{4}|\d{4}[\s\u00A0.-]?\d{6}[\s\u00A0.-]?\d{5})(?!\d)/g,
2041
2087
  priority: 100,
2042
2088
  validator: (match) => validateLuhn(match),
2043
2089
  placeholder: "[CREDIT_CARD_{n}]",
@@ -2046,7 +2092,7 @@ var financialPatterns = [
2046
2092
  },
2047
2093
  {
2048
2094
  type: "IBAN",
2049
- regex: /\b[A-Z]{2}\d{2}[A-Z0-9]{1,30}\b/g,
2095
+ regex: /\b([A-Z]{2}\d{2}(?:[ \u00A0.-]?[A-Z0-9]){11,30})\b/gi,
2050
2096
  priority: 95,
2051
2097
  validator: (match) => validateIBAN(match),
2052
2098
  placeholder: "[IBAN_{n}]",
@@ -2055,7 +2101,7 @@ var financialPatterns = [
2055
2101
  },
2056
2102
  {
2057
2103
  type: "BANK_ACCOUNT_UK",
2058
- regex: /\b(?:account|acc)[:\s#]*([0-9]{8})\b/gi,
2104
+ regex: /\b(?:account|acc|a\/c)[:\s#-]*((?:\d{4}[\s-]?\d{4})|(?:\d{2}[\s-]?\d{2}[\s-]?\d{4}))\b/gi,
2059
2105
  priority: 90,
2060
2106
  placeholder: "[BANK_ACCOUNT_{n}]",
2061
2107
  description: "UK bank account number",
@@ -2063,7 +2109,7 @@ var financialPatterns = [
2063
2109
  },
2064
2110
  {
2065
2111
  type: "SORT_CODE_UK",
2066
- regex: /\b(?:sort[:\s]?code|SC)[:\s]*(\d{2}[-\s]?\d{2}[-\s]?\d{2})\b/gi,
2112
+ regex: /\b(?:sort[\s-]*code|SC)[:\s.-]*((?:\d{2}[\s.-]?){2}\d{2})\b/gi,
2067
2113
  priority: 90,
2068
2114
  validator: (match) => validateSortCode(match),
2069
2115
  placeholder: "[SORT_CODE_{n}]",
@@ -2072,8 +2118,9 @@ var financialPatterns = [
2072
2118
  },
2073
2119
  {
2074
2120
  type: "ROUTING_NUMBER_US",
2075
- regex: /\b(?:routing|RTN|ABA)[:\s#]*([0-9]{9})\b/gi,
2121
+ regex: /\b(?:routing|RTN|ABA)[-\s\u00A0]*(?:number|no|num)?[-\s\u00A0.:#]*((?:\d[\s\u00A0.-]?){9})\b/gi,
2076
2122
  priority: 90,
2123
+ validator: (match) => validateRoutingNumber(match),
2077
2124
  placeholder: "[ROUTING_NUMBER_{n}]",
2078
2125
  description: "US routing number",
2079
2126
  severity: "high"
@@ -2088,11 +2135,15 @@ var financialPatterns = [
2088
2135
  },
2089
2136
  {
2090
2137
  type: "IFSC",
2091
- regex: /\b[A-Z]{4}0[A-Z0-9]{6}\b/g,
2138
+ regex: /\b([A-Z]{4})[-\s\u00A0.]?0[-\s\u00A0.]?([A-Z0-9]{6})\b/gi,
2092
2139
  priority: 90,
2093
2140
  placeholder: "[IFSC_{n}]",
2094
2141
  description: "Indian Financial System Code",
2095
- severity: "high"
2142
+ severity: "high",
2143
+ validator: (value) => {
2144
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
2145
+ return /^[A-Z]{4}0[A-Z0-9]{6}$/.test(cleaned);
2146
+ }
2096
2147
  },
2097
2148
  {
2098
2149
  type: "CLABE",
@@ -2246,7 +2297,7 @@ var financialPatterns = [
2246
2297
  var governmentPatterns = [
2247
2298
  {
2248
2299
  type: "SSN",
2249
- regex: /\b(?:SSN|social security)[:\s#]*(\d{3}[-\s]?\d{2}[-\s]?\d{4})\b/gi,
2300
+ regex: /\b(?:SSN|social\s+security)\b[:\s#-]*([0-9]{3}[\s\u00A0.-]?[0-9]{2}[\s\u00A0.-]?[0-9]{4})\b/gi,
2250
2301
  priority: 100,
2251
2302
  validator: (match) => validateSSN(match),
2252
2303
  placeholder: "[SSN_{n}]",
@@ -2255,7 +2306,7 @@ var governmentPatterns = [
2255
2306
  },
2256
2307
  {
2257
2308
  type: "PASSPORT_UK",
2258
- regex: /\b(?:passport|pass)[:\s#]*([0-9]{9})\b/gi,
2309
+ regex: /\b(?:passport|pass)[:\s#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{3})\b/gi,
2259
2310
  priority: 95,
2260
2311
  validator: (match) => validateUKPassport(match),
2261
2312
  placeholder: "[PASSPORT_{n}]",
@@ -2264,7 +2315,7 @@ var governmentPatterns = [
2264
2315
  },
2265
2316
  {
2266
2317
  type: "PASSPORT_US",
2267
- regex: /\b(?:passport|pass)[:\s#]*([A-Z0-9]{6,9})\b/gi,
2318
+ regex: /\b(?:passport|pass)[:\s#-]*(([A-Z0-9][\s\u00A0.-]?){5,8}[A-Z0-9])\b/gi,
2268
2319
  priority: 95,
2269
2320
  placeholder: "[PASSPORT_{n}]",
2270
2321
  description: "US Passport number",
@@ -2272,7 +2323,7 @@ var governmentPatterns = [
2272
2323
  },
2273
2324
  {
2274
2325
  type: "NATIONAL_INSURANCE_UK",
2275
- regex: /\b(?:NI|NINO|national insurance)[:\s#]*([A-CEGHJ-PR-TW-Z]{2}\s?\d{2}\s?\d{2}\s?\d{2}\s?[A-D])\b/gi,
2326
+ regex: /\b(?:NI\b|NINO|national\s+insurance)[:\s#-]*([A-CEGHJ-PR-TW-Z]{2}(?:[\s\u00A0.-]?\d{2}){3}[\s\u00A0.-]?[A-D])\b/gi,
2276
2327
  priority: 100,
2277
2328
  validator: (match) => validateNINO(match),
2278
2329
  placeholder: "[NINO_{n}]",
@@ -2281,7 +2332,7 @@ var governmentPatterns = [
2281
2332
  },
2282
2333
  {
2283
2334
  type: "NHS_NUMBER",
2284
- regex: /\b(?:NHS|nhs number)[:\s#]*(\d{3}[\s-]?\d{3}[\s-]?\d{4})\b/gi,
2335
+ regex: /\b(?:NHS|nhs number)[:\s#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{4})\b/gi,
2285
2336
  priority: 95,
2286
2337
  validator: (match) => validateNHS(match),
2287
2338
  placeholder: "[NHS_{n}]",
@@ -2290,15 +2341,27 @@ var governmentPatterns = [
2290
2341
  },
2291
2342
  {
2292
2343
  type: "DRIVING_LICENSE_UK",
2293
- regex: /\b([A-Z]{5}\d{6}[A-Z]{2}\d[A-Z]{2})\b/g,
2344
+ regex: /\b(?:DL|DRIVING|DRIVER(?:'S)?|LICEN[SC]E)?[\s#:-]*(?:NO|NUM(?:BER)?|ID)?[\s#:-]*([A-Z]{5}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?[A-Z]{2}[\s\u00A0.-]?\d[\s\u00A0.-]?[A-Z]{2})\b/gi,
2294
2345
  priority: 90,
2295
2346
  placeholder: "[DRIVING_LICENSE_{n}]",
2296
2347
  description: "UK Driving License",
2297
- severity: "high"
2348
+ severity: "high",
2349
+ validator: (value) => {
2350
+ const normalized = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
2351
+ if (!/^[A-Z]{5}\d{6}[A-Z]{2}\d[A-Z]{2}$/.test(normalized)) {
2352
+ return false;
2353
+ }
2354
+ const dob = normalized.slice(5, 11);
2355
+ const month = parseInt(dob.slice(2, 4), 10);
2356
+ const day = parseInt(dob.slice(4, 6), 10);
2357
+ const validMonth = month >= 1 && month <= 12 || month >= 51 && month <= 62;
2358
+ const validDay = day >= 1 && day <= 31;
2359
+ return validMonth && validDay;
2360
+ }
2298
2361
  },
2299
2362
  {
2300
2363
  type: "DRIVING_LICENSE_US",
2301
- regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s#]*([A-Z0-9]{5,20})\b/gi,
2364
+ regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){3,18}[A-Z0-9])\b/gi,
2302
2365
  priority: 90,
2303
2366
  placeholder: "[DRIVING_LICENSE_{n}]",
2304
2367
  description: "US Driving License",
@@ -2306,7 +2369,7 @@ var governmentPatterns = [
2306
2369
  },
2307
2370
  {
2308
2371
  type: "TAX_ID",
2309
- regex: /\b(?:TIN|tax id|EIN)[:\s#]*(\d{2}[-\s]?\d{7})\b/gi,
2372
+ regex: /\b(?:TIN|tax id|EIN)[:\s#-]*(\d{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
2310
2373
  priority: 95,
2311
2374
  placeholder: "[TAX_ID_{n}]",
2312
2375
  description: "Tax identification number",
@@ -2314,7 +2377,7 @@ var governmentPatterns = [
2314
2377
  },
2315
2378
  {
2316
2379
  type: "PASSPORT_MRZ_TD3",
2317
- regex: /P<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
2380
+ regex: /P<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
2318
2381
  priority: 98,
2319
2382
  placeholder: "[PASSPORT_MRZ_{n}]",
2320
2383
  description: "Passport Machine Readable Zone (TD3 - 2 lines x 44 chars)",
@@ -2322,7 +2385,7 @@ var governmentPatterns = [
2322
2385
  },
2323
2386
  {
2324
2387
  type: "PASSPORT_MRZ_TD1",
2325
- regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\n[A-Z<]{30}/g,
2388
+ regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\r?\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\r?\n[A-Z<]{30}/g,
2326
2389
  priority: 98,
2327
2390
  placeholder: "[ID_MRZ_{n}]",
2328
2391
  description: "ID Card Machine Readable Zone (TD1 - 3 lines x 30 chars)",
@@ -2330,7 +2393,7 @@ var governmentPatterns = [
2330
2393
  },
2331
2394
  {
2332
2395
  type: "VISA_MRZ",
2333
- regex: /V<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
2396
+ regex: /V<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
2334
2397
  priority: 98,
2335
2398
  placeholder: "[VISA_MRZ_{n}]",
2336
2399
  description: "Visa Machine Readable Zone",
@@ -2338,7 +2401,7 @@ var governmentPatterns = [
2338
2401
  },
2339
2402
  {
2340
2403
  type: "TRAVEL_DOCUMENT_NUMBER",
2341
- regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s#]*([A-Z0-9]{6,15})\b/gi,
2404
+ regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){4,13}[A-Z0-9])\b/gi,
2342
2405
  priority: 92,
2343
2406
  placeholder: "[TRAVEL_DOC_{n}]",
2344
2407
  description: "Travel document numbers",
@@ -2349,7 +2412,7 @@ var governmentPatterns = [
2349
2412
  },
2350
2413
  {
2351
2414
  type: "VISA_NUMBER",
2352
- regex: /\b(?:VISA)[:\s#]*([A-Z0-9]{8,12})\b/gi,
2415
+ regex: /\b(?:VISA)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){6,10}[A-Z0-9])\b/gi,
2353
2416
  priority: 92,
2354
2417
  placeholder: "[VISA_{n}]",
2355
2418
  description: "Visa numbers",
@@ -2360,7 +2423,7 @@ var governmentPatterns = [
2360
2423
  },
2361
2424
  {
2362
2425
  type: "IMMIGRATION_NUMBER",
2363
- regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s#]*([A-Z]?\d{8,10})\b/gi,
2426
+ regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s#-]*([A-Z]?(?:\d[\s\u00A0.-]?){7,9})\b/gi,
2364
2427
  priority: 92,
2365
2428
  placeholder: "[IMMIGRATION_{n}]",
2366
2429
  description: "Immigration and alien registration numbers",
@@ -2368,7 +2431,7 @@ var governmentPatterns = [
2368
2431
  },
2369
2432
  {
2370
2433
  type: "BORDER_CROSSING_CARD",
2371
- regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s#]*([A-Z0-9]{10,15})\b/gi,
2434
+ regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s#-]*([A-Z0-9](?:[A-Z0-9\s\u00A0.-]?){8,13}[A-Z0-9])\b/gi,
2372
2435
  priority: 90,
2373
2436
  placeholder: "[BCC_{n}]",
2374
2437
  description: "Border crossing card numbers",
@@ -2379,7 +2442,7 @@ var governmentPatterns = [
2379
2442
  },
2380
2443
  {
2381
2444
  type: "UTR_UK",
2382
- regex: /\b(?:UTR|unique taxpayer reference)[:\s#]*(\d{10})\b/gi,
2445
+ regex: /\b(?:UTR|unique taxpayer reference)[:\s#-]*((?:\d[\s\u00A0.-]?){10})\b/gi,
2383
2446
  priority: 95,
2384
2447
  validator: (match) => {
2385
2448
  const digits = match.replace(/\D/g, "");
@@ -2391,10 +2454,10 @@ var governmentPatterns = [
2391
2454
  },
2392
2455
  {
2393
2456
  type: "VAT_NUMBER",
2394
- regex: /\b(?:VAT|vat number)[:\s#]*([A-Z]{2}\s?\d{9,12})\b/gi,
2457
+ regex: /\b(?:VAT|vat number)[:\s#-]*([A-Z]{2}(?:[\s\u00A0.-]?[A-Z0-9]){7,12})\b/gi,
2395
2458
  priority: 90,
2396
2459
  validator: (match) => {
2397
- const cleaned = match.replace(/\s/g, "");
2460
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
2398
2461
  const countryCode = cleaned.substring(0, 2).toUpperCase();
2399
2462
  const validCountries = ["GB", "DE", "FR", "IT", "ES", "NL", "BE", "AT", "PL", "SE", "DK", "FI", "IE", "PT", "CZ", "HU", "RO", "BG", "GR", "HR", "SK", "SI", "LT", "LV", "EE", "CY", "LU", "MT"];
2400
2463
  if (!validCountries.includes(countryCode)) {
@@ -2473,7 +2536,7 @@ var governmentPatterns = [
2473
2536
  var contactPatterns = [
2474
2537
  {
2475
2538
  type: "PHONE_UK_MOBILE",
2476
- regex: /\b07\d{3}[\s-]?\d{3}[\s-]?\d{3}\b/g,
2539
+ regex: /\b(?:\+?44[\s.-]?7\d{3}|0?7\d{3})[\s.-]?\d{3}[\s.-]?\d{3}\b/g,
2477
2540
  priority: 90,
2478
2541
  placeholder: "[PHONE_UK_MOBILE_{n}]",
2479
2542
  description: "UK mobile phone",
@@ -2481,7 +2544,7 @@ var contactPatterns = [
2481
2544
  },
2482
2545
  {
2483
2546
  type: "PHONE_UK",
2484
- regex: /\b(?:0[1-9]\d{1,2}[\s-]?\d{3,4}[\s-]?\d{4}|\+44[\s-]?[1-9]\d{1,2}[\s-]?\d{3,4}[\s-]?\d{4})\b/g,
2547
+ regex: /\b(?:\+?44[\s.-]?(?:0)?\s*)?(?:\(?0?[1-9]\d{1,3}\)?[\s.-]?\d{3,4}[\s.-]?\d{3,4})(?:\s?(?:ext\.?|x)\s?\d{1,5})?\b/g,
2485
2548
  priority: 85,
2486
2549
  placeholder: "[PHONE_UK_{n}]",
2487
2550
  description: "UK phone number",
@@ -2489,15 +2552,23 @@ var contactPatterns = [
2489
2552
  },
2490
2553
  {
2491
2554
  type: "PHONE_US",
2492
- regex: /(?<=^|[^\d])(?:\+1[\s-]?)?(?:\(\d{3}\)\s?|\d{3}[\s-]?)\d{3}[\s-]?\d{4}(?=[^\d]|$)/g,
2555
+ regex: /\b(?:\+1[\s.-]?)?(?:\(\d{3}\)|\d{3})[\s.-]?\d{3}[\s.-]?\d{4}(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
2493
2556
  priority: 85,
2494
2557
  placeholder: "[PHONE_US_{n}]",
2495
2558
  description: "US phone number",
2496
- severity: "medium"
2559
+ severity: "medium",
2560
+ validator: (value, context) => {
2561
+ const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
2562
+ if (versionContext.test(context)) return false;
2563
+ const cleaned = value.replace(/[\s()-]/g, "");
2564
+ const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
2565
+ if (datePattern.test(cleaned)) return false;
2566
+ return true;
2567
+ }
2497
2568
  },
2498
2569
  {
2499
2570
  type: "PHONE_INTERNATIONAL",
2500
- regex: /\b\+\d{1,3}[\s-]?\d{1,4}[\s-]?\d{1,4}[\s-]?\d{1,9}\b/g,
2571
+ regex: /\b\+(?:\d[\s.\-()]?){6,14}\d(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
2501
2572
  priority: 80,
2502
2573
  placeholder: "[PHONE_{n}]",
2503
2574
  description: "International phone number",
@@ -2521,7 +2592,7 @@ var contactPatterns = [
2521
2592
  },
2522
2593
  {
2523
2594
  type: "ADDRESS_STREET",
2524
- regex: /\b(\d{1,5}\s[A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3}\s(?:Street|St|Road|Rd|Avenue|Ave|Lane|Ln|Drive|Dr|Court|Ct|Boulevard|Blvd))\b/g,
2595
+ regex: /\b\d{1,5}\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*(?:\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*){0,4}\s+(?:Street|St\.?|Road|Rd\.?|Avenue|Ave\.?|Lane|Ln\.?|Drive|Dr\.?|Court|Ct\.?|Boulevard|Blvd\.?|Way|Terrace|Ter\.?|Place|Pl\.?|Trail|Trl\.?|Parkway|Pkwy\.?|Highway|Hwy\.)(?:\s+(?:Apt|Unit|Suite|Ste)\s*\d+)?\b/gi,
2525
2596
  priority: 70,
2526
2597
  placeholder: "[ADDRESS_{n}]",
2527
2598
  description: "Street address",
@@ -2872,15 +2943,20 @@ var BIOBANK_SAMPLE_ID = {
2872
2943
  };
2873
2944
  var PROVIDER_LICENSE = {
2874
2945
  type: "PROVIDER_LICENSE",
2875
- regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s]?(?:LICENSE|LICENCE|LIC)[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*([A-Z0-9]{6,12})\b/gi,
2946
+ regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s\u00A0]*(?:LICENSE|LICENCE|LIC)[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*((?:[A-Z0-9]{2,6}[\s\u00A0./-]?){1,3}[A-Z0-9]{2,6})\b/gi,
2876
2947
  placeholder: "[PROVIDER_LIC_{n}]",
2877
2948
  priority: 80,
2878
2949
  severity: "high",
2879
- description: "Healthcare provider license numbers"
2950
+ description: "Healthcare provider license numbers",
2951
+ validator: (value) => {
2952
+ const normalized = value.replace(/[^A-Za-z0-9]/g, "");
2953
+ if (normalized.length < 6 || normalized.length > 18) return false;
2954
+ return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
2955
+ }
2880
2956
  };
2881
2957
  var NPI_NUMBER = {
2882
2958
  type: "NPI_NUMBER",
2883
- regex: /\b(?:NPI[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*)?(\d{10})\b/g,
2959
+ regex: /\b(?:NPI[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?((?:\d[\s\u00A0.-]?){10})\b/g,
2884
2960
  placeholder: "[NPI_{n}]",
2885
2961
  priority: 85,
2886
2962
  severity: "high",
@@ -2889,7 +2965,8 @@ var NPI_NUMBER = {
2889
2965
  if (!/provider|npi|physician|doctor|clinic|hospital|practice/i.test(context)) {
2890
2966
  return false;
2891
2967
  }
2892
- const digits = value.split("").map(Number);
2968
+ const digits = value.replace(/\D/g, "").split("").map(Number);
2969
+ if (digits.length !== 10) return false;
2893
2970
  let sum = 0;
2894
2971
  for (let i = digits.length - 2; i >= 0; i--) {
2895
2972
  let digit = digits[i];
@@ -2905,17 +2982,19 @@ var NPI_NUMBER = {
2905
2982
  };
2906
2983
  var DEA_NUMBER = {
2907
2984
  type: "DEA_NUMBER",
2908
- regex: /\b(?:DEA[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*)?([A-Z]{2}\d{7})\b/gi,
2985
+ regex: /\b(?:DEA[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?([A-Z]{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
2909
2986
  placeholder: "[DEA_{n}]",
2910
2987
  priority: 90,
2911
2988
  severity: "high",
2912
2989
  description: "DEA registration number for controlled substances",
2913
2990
  validator: (value, _context) => {
2991
+ const normalized = value.replace(/[^A-Za-z0-9]/g, "").toUpperCase();
2992
+ if (normalized.length !== 9) return false;
2914
2993
  const validFirstLetters = ["A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "P", "R", "S", "T", "U"];
2915
- if (!validFirstLetters.includes(value[0].toUpperCase())) {
2994
+ if (!validFirstLetters.includes(normalized[0])) {
2916
2995
  return false;
2917
2996
  }
2918
- const digits = value.substring(2).split("").map(Number);
2997
+ const digits = normalized.substring(2).split("").map(Number);
2919
2998
  const sum1 = digits[0] + digits[2] + digits[4];
2920
2999
  const sum2 = (digits[1] + digits[3] + digits[5]) * 2;
2921
3000
  const checkDigit = (sum1 + sum2) % 10;
@@ -2940,11 +3019,16 @@ var EMERGENCY_CONTACT_MARKER = {
2940
3019
  };
2941
3020
  var BIOMETRIC_ID = {
2942
3021
  type: "BIOMETRIC_ID",
2943
- regex: /\b(?:FINGERPRINT|RETINAL?[-\s]?SCAN|IRIS[-\s]?SCAN|VOICE[-\s]?PRINT|FACIAL[-\s]?RECOGNITION|BIOMETRIC)[-\s]?(?:ID|DATA|TEMPLATE|HASH)?[-\s]?[:#]?\s*([A-Z0-9]{8,40})\b/gi,
3022
+ regex: /\b(?:FINGERPRINT|RETINAL?[-\s\u00A0]?SCAN|IRIS[-\s\u00A0]?SCAN|VOICE[-\s\u00A0]?PRINT|FACIAL[-\s\u00A0]?RECOGNITION|BIOMETRIC)[-\s\u00A0]?(?:ID|DATA|TEMPLATE|HASH)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9._-]{7,39})\b/gi,
2944
3023
  placeholder: "[BIOMETRIC_{n}]",
2945
3024
  priority: 95,
2946
3025
  severity: "high",
2947
- description: "Biometric identifier references"
3026
+ description: "Biometric identifier references",
3027
+ validator: (value) => {
3028
+ const normalized = value.replace(/[^A-Za-z0-9]/g, "");
3029
+ if (normalized.length < 8 || normalized.length > 40) return false;
3030
+ return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
3031
+ }
2948
3032
  };
2949
3033
  var DNA_SEQUENCE = {
2950
3034
  type: "DNA_SEQUENCE",
@@ -2973,7 +3057,7 @@ var DRUG_DOSAGE = {
2973
3057
  };
2974
3058
  var MEDICAL_IMAGE_REF = {
2975
3059
  type: "MEDICAL_IMAGE_REF",
2976
- regex: /\b(?:X[-\s]?RAY|MRI|CT[-\s]?SCAN|PET[-\s]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s]?(?:IMAGE|FILE|ID)?[-\s]?[:#]?\s*([A-Z0-9]{6,20})\b/gi,
3060
+ regex: /\b(?:X[-\s\u00A0]?RAY|MRI|CT[-\s\u00A0]?SCAN|PET[-\s\u00A0]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s\u00A0]?(?:IMAGE|FILE|ID)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9_.-]{5,23})\b/gi,
2977
3061
  placeholder: "[IMAGE_{n}]",
2978
3062
  priority: 80,
2979
3063
  severity: "high",
@@ -3136,11 +3220,18 @@ var TRANSACTION_ID = {
3136
3220
  };
3137
3221
  var INVESTMENT_ACCOUNT = {
3138
3222
  type: "INVESTMENT_ACCOUNT",
3139
- regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s]?(?:ACCOUNT|ACCT|A\/C)?[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*([A-Z0-9]{6,15})\b/gi,
3223
+ regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s\u00A0]*(?:ACCOUNT|ACCT|A\/C)?[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,18}[A-Z0-9])\b/gi,
3140
3224
  placeholder: "[INV_ACCT_{n}]",
3141
3225
  priority: 85,
3142
3226
  severity: "high",
3143
- description: "Investment and pension account numbers"
3227
+ description: "Investment and pension account numbers",
3228
+ validator: (value, context) => {
3229
+ const normalized = value.replace(/[\s\u00A0./-]/g, "");
3230
+ const hasDigits = /\d{4,}/.test(normalized);
3231
+ const validLength = normalized.length >= 6 && normalized.length <= 15;
3232
+ const inContext = /isa|sipp|invest|pension|401k|ira|account|fund/i.test(context);
3233
+ return hasDigits && validLength && inContext;
3234
+ }
3144
3235
  };
3145
3236
  var WIRE_TRANSFER_REF = {
3146
3237
  type: "WIRE_TRANSFER_REF",
@@ -4274,13 +4365,17 @@ var RESUME_ID = {
4274
4365
  };
4275
4366
  var BENEFITS_PLAN_NUMBER = {
4276
4367
  type: "BENEFITS_PLAN_NUMBER",
4277
- regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s]?PLAN)[-\s]?(?:PLAN)?[-\s]?(?:NO|NUM(?:BER)?|ID)?[-\s]?[:#]?\s*([A-Z0-9]{6,14})\b/gi,
4368
+ regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s\u00A0]?PLAN)[-\s\u00A0]*(?:PLAN)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
4278
4369
  placeholder: "[BENEFITS_{n}]",
4279
4370
  priority: 85,
4280
4371
  severity: "high",
4281
4372
  description: "Employee benefits and insurance plan numbers",
4282
- validator: (_value, context) => {
4283
- return /benefit|insurance|health|dental|vision|plan|policy|enrollment/i.test(context);
4373
+ validator: (value, context) => {
4374
+ const normalized = value.replace(/[\s\u00A0./-]/g, "");
4375
+ const hasDigits = /\d{4,}/.test(normalized);
4376
+ const validLength = normalized.length >= 6 && normalized.length <= 14;
4377
+ const inContext = /benefit|insurance|health|dental|vision|plan|policy|enrollment/i.test(context);
4378
+ return hasDigits && validLength && inContext;
4284
4379
  }
4285
4380
  };
4286
4381
  var RETIREMENT_ACCOUNT = {
@@ -4378,13 +4473,16 @@ var EXIT_INTERVIEW_ID = {
4378
4473
  };
4379
4474
  var DISCIPLINARY_ACTION_ID = {
4380
4475
  type: "DISCIPLINARY_ACTION_ID",
4381
- regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s]?(?:ACTION)?[-\s]?(?:NO|NUM(?:BER)?|ID)?[-\s]?[:#]?\s*([A-Z0-9]{6,12})\b/gi,
4476
+ regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s\u00A0]*(?:ACTION)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
4382
4477
  placeholder: "[DISCIPLINE_{n}]",
4383
4478
  priority: 85,
4384
4479
  severity: "high",
4385
4480
  description: "Disciplinary action and incident identifiers",
4386
- validator: (_value, context) => {
4387
- return /disciplinary|incident|warning|violation|misconduct|investigation/i.test(context);
4481
+ validator: (value, context) => {
4482
+ const normalized = value.replace(/[\s\u00A0./-]/g, "");
4483
+ const hasDigits = /\d{3,}/.test(normalized);
4484
+ const validLength = normalized.length >= 6 && normalized.length <= 12;
4485
+ return hasDigits && validLength && /disciplinary|incident|warning|violation|misconduct|investigation/i.test(context);
4388
4486
  }
4389
4487
  };
4390
4488
  var EMERGENCY_CONTACT_REF = {
@@ -4712,7 +4810,7 @@ var TELECOMS_ACCOUNT_NUMBER = {
4712
4810
  type: "TELECOMS_ACCOUNT_NUMBER",
4713
4811
  regex: /\bACC(?:OUNT)?[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*(\d{8,12})\b/gi,
4714
4812
  placeholder: "[ACCOUNT_{n}]",
4715
- priority: 85,
4813
+ priority: 90,
4716
4814
  severity: "high",
4717
4815
  description: "Telecommunications customer account numbers",
4718
4816
  validator: (_value, context) => {
@@ -5590,7 +5688,7 @@ var EMERGENCY_CALL_REF = {
5590
5688
  };
5591
5689
  var POLICE_REPORT_NUMBER = {
5592
5690
  type: "POLICE_REPORT_NUMBER",
5593
- regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s]?(?:NO|NUM|NUMBER|ID)?[-\s]?[:#]?\s*(\d{4}[-\s]?\d{5,10}|[A-Z]{2,4}[-\s]?\d{6,10})\b/gi,
5691
+ regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s\u00A0]*(?:NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{5,10})\b/gi,
5594
5692
  placeholder: "[POLICE_RPT_{n}]",
5595
5693
  priority: 95,
5596
5694
  severity: "high",
@@ -5601,7 +5699,7 @@ var POLICE_REPORT_NUMBER = {
5601
5699
  };
5602
5700
  var FIRE_INCIDENT_NUMBER = {
5603
5701
  type: "FIRE_INCIDENT_NUMBER",
5604
- regex: /\b(?:FIRE|FI|FD)[-\s]?(?:INCIDENT|INC|NO|NUM|NUMBER|ID)?[-\s]?[:#]?\s*(\d{4}[-\s]?\d{4,8}|[A-Z]{2,4}[-\s]?\d{5,10})\b/gi,
5702
+ regex: /\b(?:FIRE|FI|FD)[-\s\u00A0]*(?:INCIDENT|INC|NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{4,8})\b/gi,
5605
5703
  placeholder: "[FIRE_INC_{n}]",
5606
5704
  priority: 95,
5607
5705
  severity: "high",
@@ -9436,13 +9534,14 @@ var NINTENDO_FRIEND_CODE = {
9436
9534
  type: "NINTENDO_FRIEND_CODE",
9437
9535
  regex: /\bSW[-\s]?(\d{4}[-\s]?\d{4}[-\s]?\d{4})\b/gi,
9438
9536
  placeholder: "[NINTENDO_FC_{n}]",
9439
- priority: 85,
9537
+ priority: 90,
9440
9538
  severity: "medium",
9441
9539
  description: "Nintendo Switch Friend Code",
9442
9540
  validator: (value, context) => {
9443
9541
  const digits = value.replace(/\D/g, "");
9444
9542
  if (digits.length !== 12) return false;
9445
- return /nintendo|switch|friend[- ]?code|gaming/i.test(context);
9543
+ const hasContext = /nintendo|switch|friend[- ]?code|gaming/i.test(context);
9544
+ return hasContext;
9446
9545
  }
9447
9546
  };
9448
9547
  var BATTLETAG = {
@@ -9737,14 +9836,64 @@ var ccpaPreset = {
9737
9836
  "USERNAME"
9738
9837
  ]
9739
9838
  };
9839
+ var healthcarePreset = {
9840
+ includeNames: true,
9841
+ includeEmails: true,
9842
+ includePhones: true,
9843
+ includeAddresses: true,
9844
+ categories: ["personal", "contact", "healthcare", "insurance", "government"]
9845
+ };
9846
+ var healthcareResearchPreset = {
9847
+ includeNames: true,
9848
+ includeEmails: true,
9849
+ includePhones: true,
9850
+ includeAddresses: true,
9851
+ categories: ["personal", "contact", "healthcare", "insurance", "government"]
9852
+ };
9853
+ var financePreset = {
9854
+ includeNames: true,
9855
+ includeEmails: true,
9856
+ includePhones: true,
9857
+ includeAddresses: true,
9858
+ categories: ["personal", "contact", "financial", "government", "network"]
9859
+ };
9860
+ var educationPreset = {
9861
+ includeNames: true,
9862
+ includeEmails: true,
9863
+ includePhones: true,
9864
+ includeAddresses: true,
9865
+ categories: ["personal", "contact", "education", "government", "network"]
9866
+ };
9867
+ var transportLogisticsPreset = {
9868
+ includeNames: true,
9869
+ includeEmails: true,
9870
+ includePhones: true,
9871
+ includeAddresses: true,
9872
+ categories: ["personal", "contact", "transportation", "logistics", "vehicles", "network"]
9873
+ };
9740
9874
  function getPreset(name) {
9741
- switch (name.toLowerCase()) {
9875
+ const presetName = name.toLowerCase();
9876
+ switch (presetName) {
9742
9877
  case "gdpr":
9743
9878
  return gdprPreset;
9744
9879
  case "hipaa":
9745
9880
  return hipaaPreset;
9746
9881
  case "ccpa":
9747
9882
  return ccpaPreset;
9883
+ case "healthcare":
9884
+ case "healthcare-provider":
9885
+ return healthcarePreset;
9886
+ case "healthcare-research":
9887
+ return healthcareResearchPreset;
9888
+ case "finance":
9889
+ case "financial-services":
9890
+ return financePreset;
9891
+ case "education":
9892
+ return educationPreset;
9893
+ case "transport-logistics":
9894
+ case "transportation":
9895
+ case "logistics":
9896
+ return transportLogisticsPreset;
9748
9897
  default:
9749
9898
  return {};
9750
9899
  }
@@ -10267,9 +10416,23 @@ var ConfigLoader = class {
10267
10416
  };
10268
10417
  }
10269
10418
  if (preset.startsWith("openredaction:")) {
10270
- const complianceType = preset.replace("openredaction:", "");
10271
- if (["gdpr", "hipaa", "ccpa"].includes(complianceType)) {
10272
- return { preset: complianceType };
10419
+ const presetName = preset.replace("openredaction:", "");
10420
+ const supportedPresets = [
10421
+ "gdpr",
10422
+ "hipaa",
10423
+ "ccpa",
10424
+ "healthcare",
10425
+ "healthcare-provider",
10426
+ "healthcare-research",
10427
+ "finance",
10428
+ "financial-services",
10429
+ "education",
10430
+ "transport-logistics",
10431
+ "transportation",
10432
+ "logistics"
10433
+ ];
10434
+ if (supportedPresets.includes(presetName)) {
10435
+ return { preset: presetName };
10273
10436
  }
10274
10437
  }
10275
10438
  return null;
@@ -10285,7 +10448,8 @@ var ConfigLoader = class {
10285
10448
  export default {
10286
10449
  // Extend built-in presets
10287
10450
  // Options: 'openredaction:recommended', 'openredaction:strict', 'openredaction:minimal'
10288
- // Or compliance: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa'
10451
+ // Or compliance/industry presets: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa',
10452
+ // 'openredaction:finance', 'openredaction:education', 'openredaction:healthcare', 'openredaction:transport-logistics'
10289
10453
  extends: ['openredaction:recommended'],
10290
10454
 
10291
10455
  // Detection options
@@ -11962,9 +12126,8 @@ var ExplainAPI = class {
11962
12126
  constructor(detector) {
11963
12127
  this.detector = detector;
11964
12128
  this.patterns = detector.getPatterns();
11965
- const testResult = detector.detect("Contact: admin@business.co.uk");
11966
- const hasConfidence = testResult.detections.length > 0 && testResult.detections[0].confidence !== void 0;
11967
12129
  const detectorOptions = detector.options;
12130
+ const hasConfidence = detectorOptions?.enableContextAnalysis || false;
11968
12131
  this.options = {
11969
12132
  enableContextAnalysis: hasConfidence,
11970
12133
  confidenceThreshold: detectorOptions?.confidenceThreshold || 0.5,
@@ -11976,7 +12139,7 @@ var ExplainAPI = class {
11976
12139
  /**
11977
12140
  * Explain why text was or wasn't detected as PII
11978
12141
  */
11979
- explain(text) {
12142
+ async explain(text) {
11980
12143
  const patternResults = [];
11981
12144
  const matchedPatterns = [];
11982
12145
  const unmatchedPatterns = [];
@@ -12066,7 +12229,8 @@ var ExplainAPI = class {
12066
12229
  patternResults.push(result);
12067
12230
  matchedPatterns.push(result);
12068
12231
  }
12069
- const detections = this.detector.detect(text).detections;
12232
+ const detectionResult = await this.detector.detect(text);
12233
+ const detections = detectionResult.detections;
12070
12234
  return {
12071
12235
  text,
12072
12236
  patternResults,
@@ -12085,7 +12249,7 @@ var ExplainAPI = class {
12085
12249
  /**
12086
12250
  * Explain a specific detection
12087
12251
  */
12088
- explainDetection(detection, text) {
12252
+ async explainDetection(detection, text) {
12089
12253
  const pattern = this.patterns.find((p) => p.type === detection.type);
12090
12254
  const reasoning = [];
12091
12255
  reasoning.push(`Detected as ${detection.type}`);
@@ -12116,13 +12280,15 @@ var ExplainAPI = class {
12116
12280
  detection,
12117
12281
  pattern,
12118
12282
  contextAnalysis,
12119
- reasoning
12283
+ reasoning,
12284
+ suggestions: []
12285
+ // Will be populated if needed
12120
12286
  };
12121
12287
  }
12122
12288
  /**
12123
12289
  * Suggest why text wasn't detected
12124
12290
  */
12125
- suggestWhy(text, expectedType) {
12291
+ async suggestWhy(text, expectedType) {
12126
12292
  const suggestions = [];
12127
12293
  const similarPatterns = [];
12128
12294
  const typePatterns = this.patterns.filter(
@@ -12140,7 +12306,7 @@ var ExplainAPI = class {
12140
12306
  similarPatterns.push(pattern);
12141
12307
  const value = match[1] !== void 0 ? match[1] : match[0];
12142
12308
  suggestions.push(`Pattern "${pattern.type}" matched value: "${value}"`);
12143
- const explanation = this.explain(text);
12309
+ const explanation = await this.explain(text);
12144
12310
  const filtered = explanation.filteredPatterns.find((r) => r.pattern.type === pattern.type);
12145
12311
  if (filtered && filtered.reason) {
12146
12312
  suggestions.push(`But was filtered: ${filtered.reason}`);
@@ -12170,9 +12336,9 @@ Example ${expectedType} pattern: ${examplePattern.regex.source.substring(0, 100)
12170
12336
  /**
12171
12337
  * Get debugging information for entire detection process
12172
12338
  */
12173
- debug(text) {
12339
+ async debug(text) {
12174
12340
  const start = performance.now();
12175
- const explanation = this.explain(text);
12341
+ const explanation = await this.explain(text);
12176
12342
  const duration = performance.now() - start;
12177
12343
  const enabledFeatures = [];
12178
12344
  if (this.options.enableContextAnalysis) {
@@ -13060,6 +13226,152 @@ function compileSafeRegex(pattern, flags) {
13060
13226
  return new RegExp(patternStr, finalFlags);
13061
13227
  }
13062
13228
 
13229
+ // src/utils/ai-assist.ts
13230
+ function getAIEndpoint(aiOptions) {
13231
+ if (!aiOptions?.enabled) {
13232
+ return null;
13233
+ }
13234
+ if (aiOptions.endpoint) {
13235
+ return aiOptions.endpoint;
13236
+ }
13237
+ if (typeof process !== "undefined" && process.env) {
13238
+ const envEndpoint = process.env.OPENREDACTION_AI_ENDPOINT;
13239
+ if (envEndpoint) {
13240
+ return envEndpoint;
13241
+ }
13242
+ }
13243
+ return null;
13244
+ }
13245
+ function isFetchAvailable() {
13246
+ return typeof fetch !== "undefined";
13247
+ }
13248
+ async function callAIDetect(text, endpoint, debug) {
13249
+ if (!isFetchAvailable()) {
13250
+ if (debug) {
13251
+ console.warn("[OpenRedaction] AI assist requires fetch API. Not available in this environment.");
13252
+ }
13253
+ return null;
13254
+ }
13255
+ try {
13256
+ const url = endpoint.endsWith("/ai-detect") ? endpoint : `${endpoint}/ai-detect`;
13257
+ if (debug) {
13258
+ console.log(`[OpenRedaction] Calling AI endpoint: ${url}`);
13259
+ }
13260
+ const response = await fetch(url, {
13261
+ method: "POST",
13262
+ headers: {
13263
+ "Content-Type": "application/json"
13264
+ },
13265
+ body: JSON.stringify({ text })
13266
+ });
13267
+ if (!response.ok) {
13268
+ if (debug) {
13269
+ const statusText = response.status === 429 ? "Rate limit exceeded (429)" : `${response.status}: ${response.statusText}`;
13270
+ console.warn(`[OpenRedaction] AI endpoint returned ${statusText}`);
13271
+ }
13272
+ return null;
13273
+ }
13274
+ const data = await response.json();
13275
+ if (!data.entities || !Array.isArray(data.entities)) {
13276
+ if (debug) {
13277
+ console.warn("[OpenRedaction] Invalid AI response format: missing entities array");
13278
+ }
13279
+ return null;
13280
+ }
13281
+ return data.entities;
13282
+ } catch (error) {
13283
+ if (debug) {
13284
+ console.warn(`[OpenRedaction] AI endpoint error: ${error instanceof Error ? error.message : "Unknown error"}`);
13285
+ }
13286
+ return null;
13287
+ }
13288
+ }
13289
+ function validateAIEntity(entity, textLength) {
13290
+ if (!entity.type || !entity.value || typeof entity.start !== "number" || typeof entity.end !== "number") {
13291
+ return false;
13292
+ }
13293
+ if (entity.start < 0 || entity.end < 0 || entity.start >= entity.end) {
13294
+ return false;
13295
+ }
13296
+ if (entity.start >= textLength || entity.end > textLength) {
13297
+ return false;
13298
+ }
13299
+ const actualValue = entity.value;
13300
+ if (actualValue.length !== entity.end - entity.start) {
13301
+ return false;
13302
+ }
13303
+ return true;
13304
+ }
13305
+ function detectionsOverlap(det1, det2) {
13306
+ const [start1, end1] = det1.position;
13307
+ const [start2, end2] = det2.position;
13308
+ const overlapStart = Math.max(start1, start2);
13309
+ const overlapEnd = Math.min(end1, end2);
13310
+ if (overlapStart >= overlapEnd) {
13311
+ return false;
13312
+ }
13313
+ const overlapLength = overlapEnd - overlapStart;
13314
+ const length1 = end1 - start1;
13315
+ const length2 = end2 - start2;
13316
+ const minLength = Math.min(length1, length2);
13317
+ return overlapLength > minLength * 0.5;
13318
+ }
13319
+ function convertAIEntityToDetection(entity, text) {
13320
+ if (!validateAIEntity(entity, text.length)) {
13321
+ return null;
13322
+ }
13323
+ const actualValue = text.substring(entity.start, entity.end);
13324
+ let type = entity.type.toUpperCase();
13325
+ if (type.includes("EMAIL") || type === "EMAIL_ADDRESS") {
13326
+ type = "EMAIL";
13327
+ } else if (type.includes("PHONE") || type === "PHONE_NUMBER") {
13328
+ type = "PHONE_US";
13329
+ } else if (type.includes("NAME") || type === "PERSON") {
13330
+ type = "NAME";
13331
+ } else if (type.includes("SSN") || type === "SOCIAL_SECURITY_NUMBER") {
13332
+ type = "SSN";
13333
+ } else if (type.includes("ADDRESS")) {
13334
+ type = "ADDRESS_STREET";
13335
+ }
13336
+ let severity = "medium";
13337
+ if (type === "SSN" || type === "CREDIT_CARD") {
13338
+ severity = "critical";
13339
+ } else if (type === "EMAIL" || type === "PHONE_US" || type === "NAME") {
13340
+ severity = "high";
13341
+ }
13342
+ return {
13343
+ type,
13344
+ value: actualValue,
13345
+ placeholder: `[${type}_${Math.random().toString(36).substring(2, 9)}]`,
13346
+ position: [entity.start, entity.end],
13347
+ severity,
13348
+ confidence: entity.confidence ?? 0.7
13349
+ // Default confidence for AI entities
13350
+ };
13351
+ }
13352
+ function mergeAIEntities(regexDetections, aiEntities, text) {
13353
+ const merged = [...regexDetections];
13354
+ const processedRanges = regexDetections.map((d) => d.position);
13355
+ for (const aiEntity of aiEntities) {
13356
+ const detection = convertAIEntityToDetection(aiEntity, text);
13357
+ if (!detection) {
13358
+ continue;
13359
+ }
13360
+ let hasOverlap = false;
13361
+ for (const regexDet of regexDetections) {
13362
+ if (detectionsOverlap(regexDet, detection)) {
13363
+ hasOverlap = true;
13364
+ break;
13365
+ }
13366
+ }
13367
+ if (!hasOverlap) {
13368
+ merged.push(detection);
13369
+ processedRanges.push(detection.position);
13370
+ }
13371
+ }
13372
+ return merged;
13373
+ }
13374
+
13063
13375
  // src/detector.ts
13064
13376
  var OpenRedaction = class _OpenRedaction {
13065
13377
  constructor(options = {}) {
@@ -13269,6 +13581,9 @@ var OpenRedaction = class _OpenRedaction {
13269
13581
  for (const pattern of this.patterns) {
13270
13582
  const regex = new RegExp(pattern.regex.source, pattern.regex.flags);
13271
13583
  this.compiledPatterns.set(pattern, regex);
13584
+ if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
13585
+ console.log(`[OpenRedaction] Compiled pattern '${pattern.type}': ${regex}`);
13586
+ }
13272
13587
  }
13273
13588
  if (this.options.debug) {
13274
13589
  console.log(`[OpenRedaction] Pre-compiled ${this.compiledPatterns.size} regex patterns`);
@@ -13288,12 +13603,18 @@ var OpenRedaction = class _OpenRedaction {
13288
13603
  }
13289
13604
  continue;
13290
13605
  }
13606
+ if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
13607
+ console.log(`[OpenRedaction] Processing pattern '${pattern.type}' with regex: ${regex}`);
13608
+ }
13291
13609
  let match;
13292
13610
  let matchCount = 0;
13293
13611
  const maxMatches = 1e4;
13294
13612
  regex.lastIndex = 0;
13295
13613
  try {
13296
13614
  while ((match = safeExec(regex, text, { timeout: this.options.regexTimeout })) !== null) {
13615
+ if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
13616
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' regex match found: '${match[0]}' at position ${match.index}`);
13617
+ }
13297
13618
  matchCount++;
13298
13619
  if (matchCount >= maxMatches) {
13299
13620
  if (this.options.debug) {
@@ -13314,12 +13635,18 @@ var OpenRedaction = class _OpenRedaction {
13314
13635
  endPos = startPos + value.length;
13315
13636
  }
13316
13637
  if (this.overlapsWithExisting(startPos, endPos, processedRanges)) {
13638
+ if (this.options.debug) {
13639
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' skipped due to overlap at ${startPos}-${endPos}`);
13640
+ }
13317
13641
  continue;
13318
13642
  }
13319
13643
  const contextStart = Math.max(0, startPos - 50);
13320
13644
  const contextEnd = Math.min(text.length, endPos + 50);
13321
13645
  const context = text.substring(contextStart, contextEnd);
13322
13646
  if (pattern.validator && !pattern.validator(value, context)) {
13647
+ if (this.options.debug) {
13648
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' validation failed for value: '${value}' with context: '${context.substring(0, 100)}...'`);
13649
+ }
13323
13650
  continue;
13324
13651
  }
13325
13652
  if (this.options.enableFalsePositiveFilter) {
@@ -13338,6 +13665,9 @@ var OpenRedaction = class _OpenRedaction {
13338
13665
  endPos
13339
13666
  );
13340
13667
  confidence = contextAnalysis.confidence;
13668
+ if (this.options.debug && confidence < this.options.confidenceThreshold) {
13669
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' failed context analysis. Value: '${value}', Confidence: ${confidence} < ${this.options.confidenceThreshold}`);
13670
+ }
13341
13671
  }
13342
13672
  if (this.contextRulesEngine) {
13343
13673
  const piiMatch = {
@@ -13363,6 +13693,9 @@ var OpenRedaction = class _OpenRedaction {
13363
13693
  continue;
13364
13694
  }
13365
13695
  const placeholder = this.generatePlaceholder(value, pattern);
13696
+ if (this.options.debug) {
13697
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' detected: '${value}' at position ${startPos}-${endPos}, confidence: ${confidence}`);
13698
+ }
13366
13699
  detections.push({
13367
13700
  type: pattern.type,
13368
13701
  value,
@@ -13423,8 +13756,9 @@ var OpenRedaction = class _OpenRedaction {
13423
13756
  }
13424
13757
  /**
13425
13758
  * Detect PII in text
13759
+ * Now async to support optional AI assist
13426
13760
  */
13427
- detect(text) {
13761
+ async detect(text) {
13428
13762
  if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) {
13429
13763
  throw new Error("[OpenRedaction] Permission denied: detection:detect required");
13430
13764
  }
@@ -13478,12 +13812,42 @@ var OpenRedaction = class _OpenRedaction {
13478
13812
  } else {
13479
13813
  detections = this.processPatterns(text, this.patterns, processedRanges);
13480
13814
  }
13815
+ if (this.options.ai?.enabled) {
13816
+ const aiEndpoint = getAIEndpoint(this.options.ai);
13817
+ if (aiEndpoint) {
13818
+ try {
13819
+ if (this.options.debug) {
13820
+ console.log("[OpenRedaction] AI assist enabled, calling AI endpoint...");
13821
+ }
13822
+ const aiEntities = await callAIDetect(text, aiEndpoint, this.options.debug);
13823
+ if (aiEntities && aiEntities.length > 0) {
13824
+ if (this.options.debug) {
13825
+ console.log(`[OpenRedaction] AI returned ${aiEntities.length} additional entities`);
13826
+ }
13827
+ detections = mergeAIEntities(detections, aiEntities, text);
13828
+ if (this.options.debug) {
13829
+ console.log(`[OpenRedaction] After AI merge: ${detections.length} total detections`);
13830
+ }
13831
+ } else if (this.options.debug) {
13832
+ console.log("[OpenRedaction] AI endpoint returned no additional entities");
13833
+ }
13834
+ } catch (error) {
13835
+ if (this.options.debug) {
13836
+ console.warn(`[OpenRedaction] AI assist failed, using regex-only: ${error instanceof Error ? error.message : "Unknown error"}`);
13837
+ }
13838
+ }
13839
+ } else if (this.options.debug) {
13840
+ console.warn("[OpenRedaction] AI assist enabled but no endpoint configured. Set ai.endpoint or OPENREDACTION_AI_ENDPOINT env var.");
13841
+ }
13842
+ }
13481
13843
  detections.sort((a, b) => b.position[0] - a.position[0]);
13482
13844
  let redacted = text;
13483
13845
  const redactionMap = {};
13484
13846
  for (const detection of detections) {
13485
- const [start, end] = detection.position;
13486
- redacted = redacted.substring(0, start) + detection.placeholder + redacted.substring(end);
13847
+ if (!detection.value) continue;
13848
+ const escapedValue = this.escapeRegex(detection.value);
13849
+ const pattern = new RegExp(escapedValue, "gi");
13850
+ redacted = redacted.replace(pattern, detection.placeholder);
13487
13851
  redactionMap[detection.placeholder] = detection.value;
13488
13852
  }
13489
13853
  const endTime = performance.now();
@@ -13636,8 +14000,8 @@ var OpenRedaction = class _OpenRedaction {
13636
14000
  /**
13637
14001
  * Get severity-based scan results
13638
14002
  */
13639
- scan(text) {
13640
- const result = this.detect(text);
14003
+ async scan(text) {
14004
+ const result = await this.detect(text);
13641
14005
  return {
13642
14006
  high: result.detections.filter((d) => d.severity === "high"),
13643
14007
  medium: result.detections.filter((d) => d.severity === "medium"),
@@ -13845,7 +14209,7 @@ var OpenRedaction = class _OpenRedaction {
13845
14209
  * Run health check
13846
14210
  */
13847
14211
  async healthCheck(options) {
13848
- const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-A5OD4ATR.mjs");
14212
+ const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-77BBYEEO.mjs");
13849
14213
  const checker = new HealthChecker2(this);
13850
14214
  return checker.check(options);
13851
14215
  }
@@ -13853,7 +14217,7 @@ var OpenRedaction = class _OpenRedaction {
13853
14217
  * Quick health check (minimal overhead)
13854
14218
  */
13855
14219
  async quickHealthCheck() {
13856
- const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-A5OD4ATR.mjs");
14220
+ const { HealthChecker: HealthChecker2 } = await import("./HealthCheck-77BBYEEO.mjs");
13857
14221
  const checker = new HealthChecker2(this);
13858
14222
  return checker.quickCheck();
13859
14223
  }
@@ -13867,14 +14231,14 @@ var OpenRedaction = class _OpenRedaction {
13867
14231
  if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) {
13868
14232
  throw new Error("[OpenRedaction] Permission denied: detection:detect required");
13869
14233
  }
13870
- const { createDocumentProcessor: createDocumentProcessor2 } = await import("./document-AOMZP7UR.mjs");
14234
+ const { createDocumentProcessor: createDocumentProcessor2 } = await import("./document-NNFKTUEV.mjs");
13871
14235
  const processor = createDocumentProcessor2();
13872
14236
  const extractionStart = performance.now();
13873
14237
  const text = await processor.extractText(buffer, options);
13874
14238
  const metadata = await processor.getMetadata(buffer, options);
13875
14239
  const extractionEnd = performance.now();
13876
14240
  const extractionTime = Math.round((extractionEnd - extractionStart) * 100) / 100;
13877
- const detection = this.detect(text);
14241
+ const detection = await this.detect(text);
13878
14242
  return {
13879
14243
  text,
13880
14244
  metadata,
@@ -13968,7 +14332,7 @@ var StreamingDetector = class {
13968
14332
  const end = Math.min(textLength, position + chunkSize);
13969
14333
  const chunk = text.substring(start, end);
13970
14334
  const byteOffset = start;
13971
- const result = this.detector.detect(chunk);
14335
+ const result = await this.detector.detect(chunk);
13972
14336
  const newDetections = result.detections.filter((detection) => {
13973
14337
  const absoluteStart = byteOffset + detection.position[0];
13974
14338
  const absoluteEnd = byteOffset + detection.position[1];
@@ -13998,8 +14362,10 @@ var StreamingDetector = class {
13998
14362
  (a, b) => b.position[0] - a.position[0]
13999
14363
  );
14000
14364
  for (const detection of sortedDetections) {
14001
- const [start2, end2] = detection.position;
14002
- redactedChunk = redactedChunk.substring(0, start2) + detection.placeholder + redactedChunk.substring(end2);
14365
+ if (!detection.value) continue;
14366
+ const escapedValue = this.escapeRegex(detection.value);
14367
+ const pattern = new RegExp(escapedValue, "gi");
14368
+ redactedChunk = redactedChunk.replace(pattern, detection.placeholder);
14003
14369
  }
14004
14370
  }
14005
14371
  yield {
@@ -14025,8 +14391,10 @@ var StreamingDetector = class {
14025
14391
  allDetections.sort((a, b) => b.position[0] - a.position[0]);
14026
14392
  const redactionMap = {};
14027
14393
  for (const detection of allDetections) {
14028
- const [start, end] = detection.position;
14029
- redactedText = redactedText.substring(0, start) + detection.placeholder + redactedText.substring(end);
14394
+ if (!detection.value) continue;
14395
+ const escapedValue = this.escapeRegex(detection.value);
14396
+ const pattern = new RegExp(escapedValue, "gi");
14397
+ redactedText = redactedText.replace(pattern, detection.placeholder);
14030
14398
  redactionMap[detection.placeholder] = detection.value;
14031
14399
  }
14032
14400
  return {
@@ -14101,6 +14469,9 @@ var StreamingDetector = class {
14101
14469
  estimatedMemory
14102
14470
  };
14103
14471
  }
14472
+ escapeRegex(str) {
14473
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
14474
+ }
14104
14475
  };
14105
14476
  function createStreamingDetector(detector, options) {
14106
14477
  return new StreamingDetector(detector, options);
@@ -14114,11 +14485,11 @@ var BatchProcessor = class {
14114
14485
  /**
14115
14486
  * Process multiple documents sequentially
14116
14487
  */
14117
- processSequential(documents, options = {}) {
14488
+ async processSequential(documents, options = {}) {
14118
14489
  const startTime = performance.now();
14119
14490
  const results = [];
14120
14491
  for (let i = 0; i < documents.length; i++) {
14121
- const result = this.detector.detect(documents[i]);
14492
+ const result = await this.detector.detect(documents[i]);
14122
14493
  results.push(result);
14123
14494
  if (options.onProgress) {
14124
14495
  options.onProgress(i + 1, documents.length);
@@ -14132,7 +14503,7 @@ var BatchProcessor = class {
14132
14503
  totalDocuments: documents.length,
14133
14504
  totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
14134
14505
  totalTime,
14135
- avgTimePerDocument: totalTime / documents.length
14506
+ avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
14136
14507
  }
14137
14508
  };
14138
14509
  }
@@ -14146,16 +14517,14 @@ var BatchProcessor = class {
14146
14517
  let completed = 0;
14147
14518
  for (let i = 0; i < documents.length; i += maxConcurrency) {
14148
14519
  const batch = documents.slice(i, i + maxConcurrency);
14149
- const batchPromises = batch.map((doc, batchIndex) => {
14150
- return Promise.resolve().then(() => {
14151
- const result = this.detector.detect(doc);
14152
- results[i + batchIndex] = result;
14153
- completed++;
14154
- if (options.onProgress) {
14155
- options.onProgress(completed, documents.length);
14156
- }
14157
- return result;
14158
- });
14520
+ const batchPromises = batch.map(async (doc, batchIndex) => {
14521
+ const result = await this.detector.detect(doc);
14522
+ results[i + batchIndex] = result;
14523
+ completed++;
14524
+ if (options.onProgress) {
14525
+ options.onProgress(completed, documents.length);
14526
+ }
14527
+ return result;
14159
14528
  });
14160
14529
  await Promise.all(batchPromises);
14161
14530
  }
@@ -14167,7 +14536,7 @@ var BatchProcessor = class {
14167
14536
  totalDocuments: documents.length,
14168
14537
  totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
14169
14538
  totalTime,
14170
- avgTimePerDocument: totalTime / documents.length
14539
+ avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
14171
14540
  }
14172
14541
  };
14173
14542
  }
@@ -14178,7 +14547,7 @@ var BatchProcessor = class {
14178
14547
  if (options.parallel) {
14179
14548
  return this.processParallel(documents, options);
14180
14549
  } else {
14181
- return Promise.resolve(this.processSequential(documents, options));
14550
+ return this.processSequential(documents, options);
14182
14551
  }
14183
14552
  }
14184
14553
  /**
@@ -14189,7 +14558,7 @@ var BatchProcessor = class {
14189
14558
  for (let i = 0; i < documents.length; i += batchSize) {
14190
14559
  const batch = documents.slice(i, i + batchSize);
14191
14560
  for (const doc of batch) {
14192
- const result = this.detector.detect(doc);
14561
+ const result = await this.detector.detect(doc);
14193
14562
  yield result;
14194
14563
  }
14195
14564
  }
@@ -14237,7 +14606,7 @@ function openredactionMiddleware(options = {}) {
14237
14606
  ...detectorOptions
14238
14607
  } = options;
14239
14608
  const detector = new OpenRedaction(detectorOptions);
14240
- return (req, res, next) => {
14609
+ return async (req, res, next) => {
14241
14610
  if (skipRoutes.some((pattern) => pattern.test(req.path))) {
14242
14611
  return next();
14243
14612
  }
@@ -14257,7 +14626,7 @@ function openredactionMiddleware(options = {}) {
14257
14626
  const results = {};
14258
14627
  const redactedBody = { ...req.body };
14259
14628
  for (const { field, value } of textsToCheck) {
14260
- const result = detector.detect(value);
14629
+ const result = await detector.detect(value);
14261
14630
  if (result.detections.length > 0) {
14262
14631
  totalDetections += result.detections.length;
14263
14632
  results[field] = result;
@@ -14307,7 +14676,7 @@ function openredactionMiddleware(options = {}) {
14307
14676
  }
14308
14677
  function detectPII(options = {}) {
14309
14678
  const detector = new OpenRedaction(options);
14310
- return (req, res) => {
14679
+ return async (req, res) => {
14311
14680
  const text = req.body?.text || req.query.text;
14312
14681
  if (!text) {
14313
14682
  res.status(400).json({
@@ -14316,19 +14685,26 @@ function detectPII(options = {}) {
14316
14685
  });
14317
14686
  return;
14318
14687
  }
14319
- const result = detector.detect(text);
14320
- res.json({
14321
- detected: result.detections.length > 0,
14322
- count: result.detections.length,
14323
- detections: result.detections,
14324
- redacted: result.redacted,
14325
- stats: result.stats
14326
- });
14688
+ try {
14689
+ const result = await detector.detect(text);
14690
+ res.json({
14691
+ detected: result.detections.length > 0,
14692
+ count: result.detections.length,
14693
+ detections: result.detections,
14694
+ redacted: result.redacted,
14695
+ stats: result.stats
14696
+ });
14697
+ } catch (error) {
14698
+ res.status(500).json({
14699
+ error: "Detection failed",
14700
+ message: error instanceof Error ? error.message : "Unknown error"
14701
+ });
14702
+ }
14327
14703
  };
14328
14704
  }
14329
14705
  function generateReport(options = {}) {
14330
14706
  const detector = new OpenRedaction(options);
14331
- return (req, res) => {
14707
+ return async (req, res) => {
14332
14708
  const text = req.body?.text;
14333
14709
  const format = req.body?.format || req.query.format || "json";
14334
14710
  if (!text) {
@@ -14337,28 +14713,35 @@ function generateReport(options = {}) {
14337
14713
  });
14338
14714
  return;
14339
14715
  }
14340
- const result = detector.detect(text);
14341
- if (format === "html") {
14342
- const html = detector.generateReport(result, {
14343
- format: "html",
14344
- title: req.body?.title || "PII Detection Report"
14345
- });
14346
- res.setHeader("Content-Type", "text/html");
14347
- res.send(html);
14348
- } else if (format === "markdown") {
14349
- const md = detector.generateReport(result, {
14350
- format: "markdown",
14351
- title: req.body?.title || "PII Detection Report"
14352
- });
14353
- res.setHeader("Content-Type", "text/markdown");
14354
- res.send(md);
14355
- } else {
14356
- res.json({
14357
- detected: result.detections.length > 0,
14358
- count: result.detections.length,
14359
- detections: result.detections,
14360
- redacted: result.redacted,
14361
- stats: result.stats
14716
+ try {
14717
+ const result = await detector.detect(text);
14718
+ if (format === "html") {
14719
+ const html = detector.generateReport(result, {
14720
+ format: "html",
14721
+ title: req.body?.title || "PII Detection Report"
14722
+ });
14723
+ res.setHeader("Content-Type", "text/html");
14724
+ res.send(html);
14725
+ } else if (format === "markdown") {
14726
+ const md = detector.generateReport(result, {
14727
+ format: "markdown",
14728
+ title: req.body?.title || "PII Detection Report"
14729
+ });
14730
+ res.setHeader("Content-Type", "text/markdown");
14731
+ res.send(md);
14732
+ } else {
14733
+ res.json({
14734
+ detected: result.detections.length > 0,
14735
+ count: result.detections.length,
14736
+ detections: result.detections,
14737
+ redacted: result.redacted,
14738
+ stats: result.stats
14739
+ });
14740
+ }
14741
+ } catch (error) {
14742
+ res.status(500).json({
14743
+ error: "Report generation failed",
14744
+ message: error instanceof Error ? error.message : "Unknown error"
14362
14745
  });
14363
14746
  }
14364
14747
  };
@@ -14370,12 +14753,17 @@ function useOpenRedaction(options) {
14370
14753
  const detector = useMemo(() => new OpenRedaction(options), [options]);
14371
14754
  const [result, setResult] = useState(null);
14372
14755
  const [isDetecting, setIsDetecting] = useState(false);
14373
- const detect = useCallback((text) => {
14756
+ const detect = useCallback(async (text) => {
14374
14757
  setIsDetecting(true);
14375
- const detection = detector.detect(text);
14376
- setResult(detection);
14377
- setIsDetecting(false);
14378
- return detection;
14758
+ try {
14759
+ const detection = await detector.detect(text);
14760
+ setResult(detection);
14761
+ setIsDetecting(false);
14762
+ return detection;
14763
+ } catch (error) {
14764
+ setIsDetecting(false);
14765
+ throw error;
14766
+ }
14379
14767
  }, [detector]);
14380
14768
  const clear = useCallback(() => {
14381
14769
  setResult(null);
@@ -14401,10 +14789,14 @@ function usePIIDetector(text, options) {
14401
14789
  return;
14402
14790
  }
14403
14791
  setIsDetecting(true);
14404
- const timer = setTimeout(() => {
14405
- const detection = detector.detect(text);
14406
- setResult(detection);
14407
- setIsDetecting(false);
14792
+ const timer = setTimeout(async () => {
14793
+ try {
14794
+ const detection = await detector.detect(text);
14795
+ setResult(detection);
14796
+ setIsDetecting(false);
14797
+ } catch (error) {
14798
+ setIsDetecting(false);
14799
+ }
14408
14800
  }, debounce);
14409
14801
  return () => {
14410
14802
  clearTimeout(timer);
@@ -14425,27 +14817,32 @@ function useFormFieldValidator(options) {
14425
14817
  const [value, setValue] = useState("");
14426
14818
  const [error, setError] = useState(null);
14427
14819
  const [result, setResult] = useState(null);
14428
- const validate = useCallback((inputValue) => {
14820
+ const validate = useCallback(async (inputValue) => {
14429
14821
  setValue(inputValue);
14430
14822
  if (!inputValue) {
14431
14823
  setError(null);
14432
14824
  setResult(null);
14433
14825
  return true;
14434
14826
  }
14435
- const detection = detector.detect(inputValue);
14436
- setResult(detection);
14437
- const relevantDetections = types.length > 0 ? detection.detections.filter((d) => types.includes(d.type)) : detection.detections;
14438
- if (relevantDetections.length > 0) {
14439
- if (failOnPII) {
14440
- setError(`Sensitive information detected: ${relevantDetections[0].type}`);
14441
- }
14442
- if (onPIIDetected) {
14443
- onPIIDetected(detection);
14827
+ try {
14828
+ const detection = await detector.detect(inputValue);
14829
+ setResult(detection);
14830
+ const relevantDetections = types.length > 0 ? detection.detections.filter((d) => types.includes(d.type)) : detection.detections;
14831
+ if (relevantDetections.length > 0) {
14832
+ if (failOnPII) {
14833
+ setError(`Sensitive information detected: ${relevantDetections[0].type}`);
14834
+ }
14835
+ if (onPIIDetected) {
14836
+ onPIIDetected(detection);
14837
+ }
14838
+ return false;
14444
14839
  }
14840
+ setError(null);
14841
+ return true;
14842
+ } catch (error2) {
14843
+ setError("Validation failed");
14445
14844
  return false;
14446
14845
  }
14447
- setError(null);
14448
- return true;
14449
14846
  }, [detector, failOnPII, types, onPIIDetected]);
14450
14847
  const getFieldProps = useCallback(() => ({
14451
14848
  value,
@@ -14472,7 +14869,7 @@ function useBatchDetector(options) {
14472
14869
  setProgress(0);
14473
14870
  const detections = [];
14474
14871
  for (let i = 0; i < texts.length; i++) {
14475
- const result = detector.detect(texts[i]);
14872
+ const result = await detector.detect(texts[i]);
14476
14873
  detections.push(result);
14477
14874
  setProgress((i + 1) / texts.length * 100);
14478
14875
  await new Promise((resolve) => setTimeout(resolve, 0));
@@ -14509,9 +14906,12 @@ function useAutoRedact(options) {
14509
14906
  setResult(null);
14510
14907
  return;
14511
14908
  }
14512
- const timer = setTimeout(() => {
14513
- const detection = detector.detect(text);
14514
- setResult(detection);
14909
+ const timer = setTimeout(async () => {
14910
+ try {
14911
+ const detection = await detector.detect(text);
14912
+ setResult(detection);
14913
+ } catch (error) {
14914
+ }
14515
14915
  }, debounce);
14516
14916
  return () => clearTimeout(timer);
14517
14917
  }, [text, detector, debounce]);
@@ -14640,7 +15040,7 @@ var TenantManager = class {
14640
15040
  await this.checkQuotas(tenantId, text);
14641
15041
  this.trackRequest(tenantId, text);
14642
15042
  const detector = this.getDetector(tenantId);
14643
- const result = detector.detect(text);
15043
+ const result = await detector.detect(text);
14644
15044
  const usage = this.usage.get(tenantId);
14645
15045
  usage.piiDetectedThisMonth += result.detections.length;
14646
15046
  usage.lastRequestAt = /* @__PURE__ */ new Date();
@@ -14927,6 +15327,7 @@ var DEFAULT_TIER_QUOTAS = {
14927
15327
  // src/webhooks/WebhookManager.ts
14928
15328
  var WebhookManager = class {
14929
15329
  // 1 minute
15330
+ // private readonly HALF_OPEN_MAX_REQUESTS = 1; // Reserved for future use
14930
15331
  constructor(options) {
14931
15332
  this.webhooks = /* @__PURE__ */ new Map();
14932
15333
  this.deliveryHistory = [];
@@ -15198,9 +15599,9 @@ var WebhookManager = class {
15198
15599
  */
15199
15600
  async makeHttpRequest(webhook, event) {
15200
15601
  try {
15201
- let fetch;
15602
+ let fetch2;
15202
15603
  try {
15203
- fetch = globalThis.fetch;
15604
+ fetch2 = globalThis.fetch;
15204
15605
  } catch {
15205
15606
  throw new Error("[WebhookManager] HTTP client not available. Requires Node 18+ with fetch support.");
15206
15607
  }
@@ -15220,7 +15621,7 @@ var WebhookManager = class {
15220
15621
  const controller = new AbortController();
15221
15622
  const timeoutId = setTimeout(() => controller.abort(), webhook.timeout);
15222
15623
  try {
15223
- const response = await fetch(webhook.url, {
15624
+ const response = await fetch2(webhook.url, {
15224
15625
  method: "POST",
15225
15626
  headers,
15226
15627
  body: JSON.stringify(event),
@@ -15591,7 +15992,7 @@ var APIServer = class {
15591
15992
  if (req.tenantId && this.config.tenantManager) {
15592
15993
  result = await this.config.tenantManager.detect(req.tenantId, text);
15593
15994
  } else if (this.detector) {
15594
- result = this.detector.detect(text);
15995
+ result = await this.detector.detect(text);
15595
15996
  } else {
15596
15997
  throw new Error("No detector available");
15597
15998
  }
@@ -15632,7 +16033,7 @@ var APIServer = class {
15632
16033
  if (req.tenantId && this.config.tenantManager) {
15633
16034
  result = await this.config.tenantManager.detect(req.tenantId, text);
15634
16035
  } else if (this.detector) {
15635
- result = this.detector.detect(text);
16036
+ result = await this.detector.detect(text);
15636
16037
  } else {
15637
16038
  throw new Error("No detector available");
15638
16039
  }
@@ -16053,10 +16454,12 @@ export {
16053
16454
  analyzeFullContext,
16054
16455
  calculateContextConfidence,
16055
16456
  calculateRisk,
16457
+ callAIDetect,
16056
16458
  ccpaPreset,
16057
16459
  commonFalsePositives,
16058
16460
  compileSafeRegex,
16059
16461
  contactPatterns,
16462
+ convertAIEntityToDetection,
16060
16463
  createAPIServer,
16061
16464
  createBatchProcessor,
16062
16465
  createCacheDisabledError,
@@ -16091,12 +16494,16 @@ export {
16091
16494
  createXlsxProcessor,
16092
16495
  defaultPasses,
16093
16496
  detectPII,
16497
+ detectionsOverlap,
16498
+ educationPreset,
16094
16499
  exportForVersionControl,
16095
16500
  extractContext,
16096
16501
  filterFalsePositives,
16502
+ financePreset,
16097
16503
  financialPatterns,
16098
16504
  gdprPreset,
16099
16505
  generateReport,
16506
+ getAIEndpoint,
16100
16507
  getPatternsByCategory,
16101
16508
  getPredefinedRole,
16102
16509
  getPreset,
@@ -16104,21 +16511,26 @@ export {
16104
16511
  governmentPatterns,
16105
16512
  groupPatternsByPass,
16106
16513
  healthCheckMiddleware,
16514
+ healthcarePreset,
16515
+ healthcareResearchPreset,
16107
16516
  hipaaPreset,
16108
16517
  inferDocumentType,
16109
16518
  isFalsePositive,
16110
16519
  isUnsafePattern,
16520
+ mergeAIEntities,
16111
16521
  mergePassDetections,
16112
16522
  networkPatterns,
16113
16523
  openredactionMiddleware,
16114
16524
  personalPatterns,
16115
16525
  safeExec,
16116
16526
  safeExecAll,
16527
+ transportLogisticsPreset,
16117
16528
  useAutoRedact,
16118
16529
  useBatchDetector,
16119
16530
  useFormFieldValidator,
16120
16531
  useOpenRedaction,
16121
16532
  usePIIDetector,
16533
+ validateAIEntity,
16122
16534
  validateEmail,
16123
16535
  validateIBAN,
16124
16536
  validateLuhn,