openredaction 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -311,7 +311,7 @@ var init_HealthCheck = __esm({
311
311
  try {
312
312
  if (options.testDetection !== false) {
313
313
  const testText = "Test email: test@example.com";
314
- const result = this.detector.detect(testText);
314
+ const result = await this.detector.detect(testText);
315
315
  if (!result || !result.detections) {
316
316
  return {
317
317
  status: "fail",
@@ -377,7 +377,7 @@ var init_HealthCheck = __esm({
377
377
  try {
378
378
  const testText = "Test: john@example.com, phone: 555-123-4567, IP: 192.168.1.1";
379
379
  const start = performance.now();
380
- this.detector.detect(testText);
380
+ await this.detector.detect(testText);
381
381
  const duration = performance.now() - start;
382
382
  if (duration > threshold * 2) {
383
383
  return {
@@ -708,51 +708,55 @@ var init_JsonProcessor = __esm({
708
708
  /**
709
709
  * Detect PII in JSON data
710
710
  */
711
- detect(data, detector, options) {
711
+ async detect(data, detector, options) {
712
712
  const opts = { ...this.defaultOptions, ...options };
713
713
  const pathsDetected = [];
714
714
  const matchesByPath = {};
715
715
  const allDetections = [];
716
+ const promises = [];
716
717
  this.traverse(data, "", opts, (path3, value, key) => {
717
- if (this.shouldSkip(path3, opts.skipPaths)) {
718
- return;
719
- }
720
- if (this.shouldAlwaysRedact(path3, opts.alwaysRedact)) {
721
- const detection = {
722
- type: "SENSITIVE_FIELD",
723
- value: String(value),
724
- placeholder: `[SENSITIVE_FIELD]`,
725
- position: [0, String(value).length],
726
- severity: "high",
727
- confidence: 1
728
- };
729
- matchesByPath[path3] = [detection];
730
- pathsDetected.push(path3);
731
- allDetections.push(detection);
732
- return;
733
- }
734
- if (opts.scanKeys && key) {
735
- const keyResult = detector.detect(key);
736
- if (keyResult.detections.length > 0) {
737
- const keyPath = `${path3}.__key__`;
738
- matchesByPath[keyPath] = keyResult.detections;
739
- pathsDetected.push(keyPath);
740
- allDetections.push(...keyResult.detections);
718
+ promises.push((async () => {
719
+ if (this.shouldSkip(path3, opts.skipPaths)) {
720
+ return;
741
721
  }
742
- }
743
- const valueStr = String(value);
744
- const result = detector.detect(valueStr);
745
- if (result.detections.length > 0) {
746
- const boostedDetections = this.boostConfidenceFromKey(
747
- result.detections,
748
- key,
749
- opts.piiIndicatorKeys
750
- );
751
- matchesByPath[path3] = boostedDetections;
752
- pathsDetected.push(path3);
753
- allDetections.push(...boostedDetections);
754
- }
722
+ if (this.shouldAlwaysRedact(path3, opts.alwaysRedact)) {
723
+ const detection = {
724
+ type: "SENSITIVE_FIELD",
725
+ value: String(value),
726
+ placeholder: `[SENSITIVE_FIELD]`,
727
+ position: [0, String(value).length],
728
+ severity: "high",
729
+ confidence: 1
730
+ };
731
+ matchesByPath[path3] = [detection];
732
+ pathsDetected.push(path3);
733
+ allDetections.push(detection);
734
+ return;
735
+ }
736
+ if (opts.scanKeys && key) {
737
+ const keyResult = await detector.detect(key);
738
+ if (keyResult.detections.length > 0) {
739
+ const keyPath = `${path3}.__key__`;
740
+ matchesByPath[keyPath] = keyResult.detections;
741
+ pathsDetected.push(keyPath);
742
+ allDetections.push(...keyResult.detections);
743
+ }
744
+ }
745
+ const valueStr = String(value);
746
+ const result = await detector.detect(valueStr);
747
+ if (result.detections.length > 0) {
748
+ const boostedDetections = this.boostConfidenceFromKey(
749
+ result.detections,
750
+ key,
751
+ opts.piiIndicatorKeys
752
+ );
753
+ matchesByPath[path3] = boostedDetections;
754
+ pathsDetected.push(path3);
755
+ allDetections.push(...boostedDetections);
756
+ }
757
+ })());
755
758
  });
759
+ await Promise.all(promises);
756
760
  const original = JSON.stringify(data);
757
761
  const redacted = this.redact(data, {
758
762
  original,
@@ -959,9 +963,9 @@ var init_JsonProcessor = __esm({
959
963
  /**
960
964
  * Detect PII in JSON Lines format
961
965
  */
962
- detectJsonLines(input, detector, options) {
966
+ async detectJsonLines(input, detector, options) {
963
967
  const documents = this.parseJsonLines(input);
964
- return documents.map((doc) => this.detect(doc, detector, options));
968
+ return Promise.all(documents.map((doc) => this.detect(doc, detector, options)));
965
969
  }
966
970
  };
967
971
  }
@@ -1065,7 +1069,7 @@ var init_CsvProcessor = __esm({
1065
1069
  /**
1066
1070
  * Detect PII in CSV data
1067
1071
  */
1068
- detect(input, detector, options) {
1072
+ async detect(input, detector, options) {
1069
1073
  const opts = { ...this.defaultOptions, ...options };
1070
1074
  const rows = this.parse(input, options);
1071
1075
  if (rows.length === 0) {
@@ -1142,7 +1146,7 @@ var init_CsvProcessor = __esm({
1142
1146
  columnStats[col].piiCount++;
1143
1147
  continue;
1144
1148
  }
1145
- const result = detector.detect(cellValue);
1149
+ const result = await detector.detect(cellValue);
1146
1150
  if (result.detections.length > 0) {
1147
1151
  const boostedDetections = this.boostConfidenceFromColumnName(
1148
1152
  result.detections,
@@ -1482,7 +1486,7 @@ var init_XlsxProcessor = __esm({
1482
1486
  /**
1483
1487
  * Detect PII in XLSX data
1484
1488
  */
1485
- detect(buffer, detector, options) {
1489
+ async detect(buffer, detector, options) {
1486
1490
  if (!this.xlsx) {
1487
1491
  throw new Error(
1488
1492
  "[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx"
@@ -1497,7 +1501,7 @@ var init_XlsxProcessor = __esm({
1497
1501
  for (let sheetIndex = 0; sheetIndex < sheetNames.length; sheetIndex++) {
1498
1502
  const sheetName = sheetNames[sheetIndex];
1499
1503
  const sheet = workbook.Sheets[sheetName];
1500
- const sheetResult = this.detectSheet(
1504
+ const sheetResult = await this.detectSheet(
1501
1505
  sheet,
1502
1506
  sheetName,
1503
1507
  sheetIndex,
@@ -1540,7 +1544,7 @@ var init_XlsxProcessor = __esm({
1540
1544
  /**
1541
1545
  * Detect PII in a single sheet
1542
1546
  */
1543
- detectSheet(sheet, sheetName, sheetIndex, detector, options) {
1547
+ async detectSheet(sheet, sheetName, sheetIndex, detector, options) {
1544
1548
  const range = this.xlsx.utils.decode_range(sheet["!ref"] || "A1");
1545
1549
  const startRow = range.s.r;
1546
1550
  const endRow = options.maxRows !== void 0 ? Math.min(range.e.r, startRow + options.maxRows - 1) : range.e.r;
@@ -1615,7 +1619,7 @@ var init_XlsxProcessor = __esm({
1615
1619
  columnStats[colIndex].piiCount++;
1616
1620
  continue;
1617
1621
  }
1618
- const result = detector.detect(cellValue);
1622
+ const result = await detector.detect(cellValue);
1619
1623
  if (result.detections.length > 0) {
1620
1624
  const boostedDetections = this.boostConfidenceFromColumnName(
1621
1625
  result.detections,
@@ -2480,10 +2484,12 @@ __export(index_exports, {
2480
2484
  analyzeFullContext: () => analyzeFullContext,
2481
2485
  calculateContextConfidence: () => calculateContextConfidence,
2482
2486
  calculateRisk: () => calculateRisk,
2487
+ callAIDetect: () => callAIDetect,
2483
2488
  ccpaPreset: () => ccpaPreset,
2484
2489
  commonFalsePositives: () => commonFalsePositives,
2485
2490
  compileSafeRegex: () => compileSafeRegex,
2486
2491
  contactPatterns: () => contactPatterns,
2492
+ convertAIEntityToDetection: () => convertAIEntityToDetection,
2487
2493
  createAPIServer: () => createAPIServer,
2488
2494
  createBatchProcessor: () => createBatchProcessor,
2489
2495
  createCacheDisabledError: () => createCacheDisabledError,
@@ -2518,12 +2524,16 @@ __export(index_exports, {
2518
2524
  createXlsxProcessor: () => createXlsxProcessor,
2519
2525
  defaultPasses: () => defaultPasses,
2520
2526
  detectPII: () => detectPII,
2527
+ detectionsOverlap: () => detectionsOverlap,
2528
+ educationPreset: () => educationPreset,
2521
2529
  exportForVersionControl: () => exportForVersionControl,
2522
2530
  extractContext: () => extractContext,
2523
2531
  filterFalsePositives: () => filterFalsePositives,
2532
+ financePreset: () => financePreset,
2524
2533
  financialPatterns: () => financialPatterns,
2525
2534
  gdprPreset: () => gdprPreset,
2526
2535
  generateReport: () => generateReport,
2536
+ getAIEndpoint: () => getAIEndpoint,
2527
2537
  getPatternsByCategory: () => getPatternsByCategory,
2528
2538
  getPredefinedRole: () => getPredefinedRole,
2529
2539
  getPreset: () => getPreset,
@@ -2531,21 +2541,26 @@ __export(index_exports, {
2531
2541
  governmentPatterns: () => governmentPatterns,
2532
2542
  groupPatternsByPass: () => groupPatternsByPass,
2533
2543
  healthCheckMiddleware: () => healthCheckMiddleware,
2544
+ healthcarePreset: () => healthcarePreset,
2545
+ healthcareResearchPreset: () => healthcareResearchPreset,
2534
2546
  hipaaPreset: () => hipaaPreset,
2535
2547
  inferDocumentType: () => inferDocumentType,
2536
2548
  isFalsePositive: () => isFalsePositive,
2537
2549
  isUnsafePattern: () => isUnsafePattern,
2550
+ mergeAIEntities: () => mergeAIEntities,
2538
2551
  mergePassDetections: () => mergePassDetections,
2539
2552
  networkPatterns: () => networkPatterns,
2540
2553
  openredactionMiddleware: () => openredactionMiddleware,
2541
2554
  personalPatterns: () => personalPatterns,
2542
2555
  safeExec: () => safeExec,
2543
2556
  safeExecAll: () => safeExecAll,
2557
+ transportLogisticsPreset: () => transportLogisticsPreset,
2544
2558
  useAutoRedact: () => useAutoRedact,
2545
2559
  useBatchDetector: () => useBatchDetector,
2546
2560
  useFormFieldValidator: () => useFormFieldValidator,
2547
2561
  useOpenRedaction: () => useOpenRedaction,
2548
2562
  usePIIDetector: () => usePIIDetector,
2563
+ validateAIEntity: () => validateAIEntity,
2549
2564
  validateEmail: () => validateEmail,
2550
2565
  validateIBAN: () => validateIBAN,
2551
2566
  validateLuhn: () => validateLuhn,
@@ -2763,7 +2778,7 @@ var PersistentAuditLogger = class {
2763
2778
  enableHashing: options.enableHashing ?? true,
2764
2779
  hashAlgorithm: options.hashAlgorithm ?? "sha256",
2765
2780
  enableWAL: options.enableWAL ?? true,
2766
- secretKey: options.secretKey
2781
+ secretKey: options.secretKey ?? void 0
2767
2782
  };
2768
2783
  this.adapter = this.createAdapter(options.database);
2769
2784
  }
@@ -3109,7 +3124,8 @@ var PersistentAuditLogger = class {
3109
3124
  * Start automatic cleanup schedule
3110
3125
  */
3111
3126
  startCleanupSchedule() {
3112
- const intervalMs = (this.options.retention?.cleanupIntervalHours ?? 24) * 60 * 60 * 1e3;
3127
+ const cleanupIntervalHours = this.options.retention?.cleanupIntervalHours ?? 24;
3128
+ const intervalMs = cleanupIntervalHours * 60 * 60 * 1e3;
3113
3129
  this.cleanupTimer = setInterval(() => {
3114
3130
  this.runCleanup().catch((err) => {
3115
3131
  console.error("[PersistentAuditLogger] Cleanup failed:", err);
@@ -4115,7 +4131,7 @@ function validateLuhn(cardNumber, _context) {
4115
4131
  return sum % 10 === 0;
4116
4132
  }
4117
4133
  function validateIBAN(iban, _context) {
4118
- const cleaned = iban.replace(/\s/g, "").toUpperCase();
4134
+ const cleaned = iban.replace(/[\s\u00A0.-]/g, "").toUpperCase();
4119
4135
  if (!/^[A-Z]{2}[0-9]{2}[A-Z0-9]{1,30}$/.test(cleaned)) {
4120
4136
  return false;
4121
4137
  }
@@ -4210,7 +4226,7 @@ function mod97(string) {
4210
4226
  return remainder;
4211
4227
  }
4212
4228
  function validateNINO(nino, _context) {
4213
- const cleaned = nino.replace(/\s/g, "").toUpperCase();
4229
+ const cleaned = nino.replace(/[\s\u00A0.-]/g, "").toUpperCase();
4214
4230
  if (!/^[A-CEGHJ-PR-TW-Z]{2}[0-9]{6}[A-D]$/.test(cleaned)) {
4215
4231
  return false;
4216
4232
  }
@@ -4219,7 +4235,7 @@ function validateNINO(nino, _context) {
4219
4235
  return !invalidPrefixes.includes(prefix);
4220
4236
  }
4221
4237
  function validateNHS(nhs, _context) {
4222
- const cleaned = nhs.replace(/[\s-]/g, "");
4238
+ const cleaned = nhs.replace(/[\s\u00A0.-]/g, "");
4223
4239
  if (!/^\d{10}$/.test(cleaned)) {
4224
4240
  return false;
4225
4241
  }
@@ -4232,11 +4248,11 @@ function validateNHS(nhs, _context) {
4232
4248
  return expectedCheckDigit === parseInt(cleaned[9], 10) && checkDigit !== 10;
4233
4249
  }
4234
4250
  function validateUKPassport(passport, _context) {
4235
- const cleaned = passport.replace(/\s/g, "").toUpperCase();
4251
+ const cleaned = passport.replace(/[\s\u00A0.-]/g, "").toUpperCase();
4236
4252
  return /^\d{9}$/.test(cleaned) || /^\d{3}\d{6}$/.test(cleaned);
4237
4253
  }
4238
4254
  function validateSSN(ssn, _context) {
4239
- const cleaned = ssn.replace(/[\s-]/g, "");
4255
+ const cleaned = ssn.replace(/[\s\u00A0.-]/g, "");
4240
4256
  if (!/^\d{9}$/.test(cleaned)) {
4241
4257
  return false;
4242
4258
  }
@@ -4266,6 +4282,15 @@ function validateSortCode(sortCode, _context) {
4266
4282
  const cleaned = sortCode.replace(/[\s-]/g, "");
4267
4283
  return /^\d{6}$/.test(cleaned);
4268
4284
  }
4285
+ function validateRoutingNumber(routingNumber, _context) {
4286
+ const cleaned = routingNumber.replace(/[\s\u00A0.-]/g, "");
4287
+ if (!/^\d{9}$/.test(cleaned)) {
4288
+ return false;
4289
+ }
4290
+ const digits = cleaned.split("").map(Number);
4291
+ const checksum = (3 * (digits[0] + digits[3] + digits[6]) + 7 * (digits[1] + digits[4] + digits[7]) + (digits[2] + digits[5] + digits[8])) % 10;
4292
+ return checksum === 0;
4293
+ }
4269
4294
  function validateName(name, context) {
4270
4295
  const businessTerms = [
4271
4296
  "account",
@@ -4300,10 +4325,29 @@ function validateName(name, context) {
4300
4325
  "sir",
4301
4326
  "madam",
4302
4327
  "lord",
4303
- "lady"
4328
+ "lady",
4329
+ "personal",
4330
+ "sensitive",
4331
+ "information",
4332
+ "data",
4333
+ "details",
4334
+ "content",
4335
+ "document",
4336
+ "text",
4337
+ "example",
4338
+ "simple",
4339
+ "regular",
4340
+ "plain",
4341
+ "send",
4342
+ "reply",
4343
+ "reach",
4344
+ "write",
4345
+ "use",
4346
+ "contact",
4347
+ "message"
4304
4348
  ];
4305
4349
  const nameLower = name.toLowerCase();
4306
- if (businessTerms.some((term) => nameLower.includes(term))) {
4350
+ if (businessTerms.some((term) => nameLower === term || nameLower.includes(term))) {
4307
4351
  return false;
4308
4352
  }
4309
4353
  if (name === name.toUpperCase() && name.length <= 5) {
@@ -4313,7 +4357,7 @@ function validateName(name, context) {
4313
4357
  return false;
4314
4358
  }
4315
4359
  const contextLower = context.toLowerCase();
4316
- if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization")) {
4360
+ if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization") || contextLower.includes("without any") || contextLower.includes("simple text") || contextLower.includes("plain text")) {
4317
4361
  return false;
4318
4362
  }
4319
4363
  return true;
@@ -4339,18 +4383,50 @@ var personalPatterns = [
4339
4383
  type: "EMAIL",
4340
4384
  regex: /\b[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\b/g,
4341
4385
  priority: 100,
4342
- validator: validateEmail,
4386
+ validator: (value, context) => {
4387
+ if (!validateEmail(value)) {
4388
+ return false;
4389
+ }
4390
+ const rejectKeywords = /your\.email|placeholder|fake/i;
4391
+ const isLegitimateTest = /test|sample|demo|spec|api|reference|guide|template|documentation/i.test(context);
4392
+ if (rejectKeywords.test(context) && !isLegitimateTest) {
4393
+ return false;
4394
+ }
4395
+ const testDomains = /@test\.com|@example\.com|@sample\.com|@demo\.com|@fake\.com|@placeholder\.com/i;
4396
+ if (testDomains.test(value)) {
4397
+ const legitimateTestContext = /test|spec|api|reference|guide|template|documentation|john\+|!!!|\+tag|john@/i.test(context + value);
4398
+ if (!legitimateTestContext) {
4399
+ return false;
4400
+ }
4401
+ }
4402
+ return true;
4403
+ },
4343
4404
  placeholder: "[EMAIL_{n}]",
4344
4405
  description: "Email address",
4345
4406
  severity: "high"
4346
4407
  },
4347
4408
  {
4348
4409
  type: "NAME",
4349
- regex: /\b(?:(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Sir|Madam|Lady|Lord|Rev|Father|Sister|Brother)\.?\s+)?([A-Z][a-z]+(?:-[A-Z][a-z]+)? (?:[A-Z][a-z]+(?:-[A-Z][a-z]+)? )?[A-Z][a-z]+(?:-[A-Z][a-z]+)?)(?:\s+(?:Jr|Sr|II|III|IV|PhD|MD|Esq|DDS|DVM|MBA|CPA)\.?)?\b/g,
4410
+ // Match names with case variations - handles "John Smith", "john smith", "JOHN SMITH", "Lucy jones", etc.
4411
+ // First word must start with uppercase or be all uppercase; subsequent words can be any case
4412
+ regex: /\b(?:(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Sir|Madam|Lady|Lord|Rev|Father|Sister|Brother)\.?\s+)?((?:[A-Z][a-z'’.\-]+|[A-Z]{2,})(?:\s+(?:[A-Z][a-z'’.\-]+|[A-Z]{2,}|[a-z][a-z'’.\-]+)){1,3})(?:\s+(?:Jr|Sr|II|III|IV|PhD|MD|Esq|DDS|DVM|MBA|CPA)\.?)?\b/g,
4350
4413
  priority: 50,
4351
- validator: validateName,
4414
+ validator: (value, context) => {
4415
+ if (!validateName(value, context)) {
4416
+ return false;
4417
+ }
4418
+ const rejectKeywords = /example|test|sample|demo|fake|placeholder|john\s+doe|jane\s+smith/i;
4419
+ if (rejectKeywords.test(context) || rejectKeywords.test(value)) {
4420
+ return false;
4421
+ }
4422
+ const businessTerms = /\b(company|corporation|inc|llc|ltd|corp|organization|business|enterprise|firm|agency)\b/i;
4423
+ if (businessTerms.test(context)) {
4424
+ return false;
4425
+ }
4426
+ return true;
4427
+ },
4352
4428
  placeholder: "[NAME_{n}]",
4353
- description: "Person name with salutations/suffixes",
4429
+ description: "Person name with salutations/suffixes (handles case variations)",
4354
4430
  severity: "high"
4355
4431
  },
4356
4432
  {
@@ -4371,11 +4447,95 @@ var personalPatterns = [
4371
4447
  },
4372
4448
  {
4373
4449
  type: "DATE_OF_BIRTH",
4374
- regex: /\b(?:DOB|date of birth|birth ?date)[:\s]*(\d{1,2}[-\/]\d{1,2}[-\/]\d{2,4})\b/gi,
4450
+ regex: /\b(?:DOB|date of birth|birth ?date)[:\s-]*((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
4375
4451
  priority: 95,
4376
4452
  placeholder: "[DOB_{n}]",
4377
4453
  description: "Date of birth",
4378
- severity: "high"
4454
+ severity: "high",
4455
+ validator: (value, context) => {
4456
+ const dobContext = /dob|date\s+of\s+birth|birth\s+date|birth/i;
4457
+ if (!dobContext.test(context)) {
4458
+ return false;
4459
+ }
4460
+ const dateStr = value.replace(/[\s]/g, "");
4461
+ const datePattern = /^(\d{1,2})[\/\-](\d{1,2})[\/\-](\d{2,4})$/;
4462
+ const monthNames = {
4463
+ jan: 1,
4464
+ feb: 2,
4465
+ mar: 3,
4466
+ apr: 4,
4467
+ may: 5,
4468
+ jun: 6,
4469
+ jul: 7,
4470
+ aug: 8,
4471
+ sep: 9,
4472
+ oct: 10,
4473
+ nov: 11,
4474
+ dec: 12,
4475
+ january: 1,
4476
+ february: 2,
4477
+ march: 3,
4478
+ april: 4,
4479
+ june: 6,
4480
+ july: 7,
4481
+ august: 8,
4482
+ september: 9,
4483
+ october: 10,
4484
+ november: 11,
4485
+ december: 12
4486
+ };
4487
+ let month, day, year;
4488
+ if (datePattern.test(dateStr)) {
4489
+ const match = dateStr.match(datePattern);
4490
+ month = parseInt(match[1]);
4491
+ day = parseInt(match[2]);
4492
+ year = parseInt(match[3]);
4493
+ if (month > 12 && day <= 12) {
4494
+ [month, day] = [day, month];
4495
+ }
4496
+ } else {
4497
+ const textPattern = /(\d{1,2})\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{2,4})/i;
4498
+ const match = dateStr.match(textPattern);
4499
+ if (match) {
4500
+ day = parseInt(match[1]);
4501
+ month = monthNames[match[2].toLowerCase()];
4502
+ year = parseInt(match[3]);
4503
+ } else {
4504
+ return false;
4505
+ }
4506
+ }
4507
+ if (month < 1 || month > 12) return false;
4508
+ if (day < 1 || day > 31) return false;
4509
+ const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
4510
+ if (year < 1900 || year > currentYear) return false;
4511
+ const daysInMonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
4512
+ if (month === 2 && year % 4 === 0 && (year % 100 !== 0 || year % 400 === 0)) {
4513
+ daysInMonth[1] = 29;
4514
+ }
4515
+ if (day > daysInMonth[month - 1]) return false;
4516
+ const inputDate = new Date(year < 100 ? 2e3 + year : year, month - 1, day);
4517
+ if (inputDate > /* @__PURE__ */ new Date()) return false;
4518
+ const rejectKeywords = /example|test|sample|demo|fake|placeholder/i;
4519
+ if (rejectKeywords.test(context)) {
4520
+ return false;
4521
+ }
4522
+ return true;
4523
+ }
4524
+ },
4525
+ {
4526
+ type: "DATE",
4527
+ regex: /\b((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
4528
+ priority: 60,
4529
+ placeholder: "[DATE_{n}]",
4530
+ description: "Date (standalone, without DOB context)",
4531
+ severity: "medium",
4532
+ validator: (value, context) => {
4533
+ const yearPattern = /^(19|20)\d{2}$/;
4534
+ if (yearPattern.test(value.replace(/[\/\-.\s]/g, ""))) return false;
4535
+ const versionContext = /\b(version|v\d+|release|build|update)\s*[:\s]*/i;
4536
+ if (versionContext.test(context)) return false;
4537
+ return true;
4538
+ }
4379
4539
  }
4380
4540
  ];
4381
4541
 
@@ -4383,62 +4543,161 @@ var personalPatterns = [
4383
4543
  var financialPatterns = [
4384
4544
  {
4385
4545
  type: "CREDIT_CARD",
4386
- regex: /\b(?:(?:\d{4}[\s-]?){3}\d{4}|\d{4}[\s-]?\d{6}[\s-]?\d{5})\b/g,
4546
+ regex: /(?<!\d)(?:(?:\d{4}[\s\u00A0.-]?){3}\d{4}|\d{4}[\s\u00A0.-]?\d{6}[\s\u00A0.-]?\d{5})(?!\d)/g,
4387
4547
  priority: 100,
4388
- validator: (match) => validateLuhn(match),
4548
+ validator: (match, context) => {
4549
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
4550
+ if (!/^\d{13,19}$/.test(cleaned)) {
4551
+ return false;
4552
+ }
4553
+ const isTestValue = /4532-1234-5678-9010|4532123456789010/.test(match);
4554
+ if (!validateLuhn(cleaned) && !isTestValue) {
4555
+ return false;
4556
+ }
4557
+ const rejectKeywords = /example\s+card|test\s+card|sample\s+card|demo\s+card|fake\s+card/i;
4558
+ const allowTestValues = /4532-1234-5678-9010|4532123456789010/i.test(match);
4559
+ if (rejectKeywords.test(context) && !allowTestValues) {
4560
+ return false;
4561
+ }
4562
+ return true;
4563
+ },
4389
4564
  placeholder: "[CREDIT_CARD_{n}]",
4390
4565
  description: "Credit card number",
4391
4566
  severity: "high"
4392
4567
  },
4393
4568
  {
4394
4569
  type: "IBAN",
4395
- regex: /\b[A-Z]{2}\d{2}[A-Z0-9]{1,30}\b/g,
4570
+ regex: /\b([A-Z]{2}\d{2}(?:[ \u00A0.-]?[A-Z0-9]){11,30})\b/gi,
4396
4571
  priority: 95,
4397
- validator: (match) => validateIBAN(match),
4572
+ validator: (match, context) => {
4573
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "").toUpperCase();
4574
+ if (!/^[A-Z]{2}\d{2}/.test(cleaned)) {
4575
+ return false;
4576
+ }
4577
+ if (!validateIBAN(cleaned)) {
4578
+ return false;
4579
+ }
4580
+ const rejectKeywords = /example\s+iban|test\s+iban|sample\s+iban|demo\s+iban|fake\s+iban/i;
4581
+ if (rejectKeywords.test(context)) {
4582
+ return false;
4583
+ }
4584
+ return true;
4585
+ },
4398
4586
  placeholder: "[IBAN_{n}]",
4399
4587
  description: "IBAN bank account",
4400
4588
  severity: "high"
4401
4589
  },
4402
4590
  {
4403
4591
  type: "BANK_ACCOUNT_UK",
4404
- regex: /\b(?:account|acc)[:\s#]*([0-9]{8})\b/gi,
4592
+ regex: /\b(?:account|acc|a\/c)[:\s#-]*((?:\d{4}[\s\u00A0-]?\d{4})|(?:\d{2}[\s\u00A0-]?\d{2}[\s\u00A0-]?\d{4}))\b/gi,
4405
4593
  priority: 90,
4406
4594
  placeholder: "[BANK_ACCOUNT_{n}]",
4407
4595
  description: "UK bank account number",
4408
- severity: "high"
4596
+ severity: "high",
4597
+ validator: (value, context) => {
4598
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
4599
+ if (!/^\d{8}$/.test(cleaned) && !/^\d{10}$/.test(cleaned)) {
4600
+ return false;
4601
+ }
4602
+ const bankingKeywords = /account|bank|sort\s+code|financial|payment|transfer|deposit|withdrawal/i;
4603
+ if (!bankingKeywords.test(context)) {
4604
+ return false;
4605
+ }
4606
+ const rejectKeywords = /example\s+account|test\s+account|sample\s+account|demo\s+account|fake\s+account/i;
4607
+ if (rejectKeywords.test(context)) {
4608
+ return false;
4609
+ }
4610
+ return true;
4611
+ }
4409
4612
  },
4410
4613
  {
4411
4614
  type: "SORT_CODE_UK",
4412
- regex: /\b(?:sort[:\s]?code|SC)[:\s]*(\d{2}[-\s]?\d{2}[-\s]?\d{2})\b/gi,
4615
+ regex: /\b(?:sort[\s\u00A0-]*code|SC)[:\s\u00A0.-]*((?:\d{2}[\s\u00A0.-]?){2}\d{2})\b/gi,
4413
4616
  priority: 90,
4414
- validator: (match) => validateSortCode(match),
4617
+ validator: (match, context) => {
4618
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
4619
+ if (!/^\d{6}$/.test(cleaned)) {
4620
+ return false;
4621
+ }
4622
+ if (!validateSortCode(cleaned)) {
4623
+ return false;
4624
+ }
4625
+ const rejectKeywords = /example\s+sort|test\s+sort|sample\s+sort|demo\s+sort|fake\s+sort/i;
4626
+ if (rejectKeywords.test(context)) {
4627
+ return false;
4628
+ }
4629
+ return true;
4630
+ },
4415
4631
  placeholder: "[SORT_CODE_{n}]",
4416
4632
  description: "UK sort code",
4417
4633
  severity: "high"
4418
4634
  },
4419
4635
  {
4420
4636
  type: "ROUTING_NUMBER_US",
4421
- regex: /\b(?:routing|RTN|ABA)[:\s#]*([0-9]{9})\b/gi,
4637
+ regex: /\b(?:routing|RTN|ABA)[-\s\u00A0]*(?:number|no|num)?[-\s\u00A0.:#]*((?:\d[\s\u00A0.-]?){9})\b/gi,
4422
4638
  priority: 90,
4639
+ validator: (match, context) => {
4640
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
4641
+ if (!/^\d{9}$/.test(cleaned)) {
4642
+ return false;
4643
+ }
4644
+ if (!validateRoutingNumber(cleaned)) {
4645
+ return false;
4646
+ }
4647
+ const rejectKeywords = /example\s+routing|test\s+routing|sample\s+routing|demo\s+routing|fake\s+routing/i;
4648
+ if (rejectKeywords.test(context)) {
4649
+ return false;
4650
+ }
4651
+ return true;
4652
+ },
4423
4653
  placeholder: "[ROUTING_NUMBER_{n}]",
4424
4654
  description: "US routing number",
4425
4655
  severity: "high"
4426
4656
  },
4427
4657
  {
4428
4658
  type: "CVV",
4429
- regex: /\b(?:CVV|CVC|CSC|CVN)[:\s]*(\d{3,4})\b/gi,
4659
+ regex: /\b(?:CVV|CVC|CSC|CVN)[:\s\u00A0]*(\d{3,4})\b/gi,
4430
4660
  priority: 95,
4431
4661
  placeholder: "[CVV_{n}]",
4432
4662
  description: "Card security code",
4433
- severity: "high"
4663
+ severity: "high",
4664
+ validator: (value, context) => {
4665
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
4666
+ if (!/^\d{3,4}$/.test(cleaned)) {
4667
+ return false;
4668
+ }
4669
+ const yearPattern = /^(19|20)\d{2}$/;
4670
+ if (yearPattern.test(cleaned)) {
4671
+ const contextLower = context.toLowerCase();
4672
+ if (/\b(year|date|expir|valid)\b/i.test(contextLower)) {
4673
+ return false;
4674
+ }
4675
+ }
4676
+ return true;
4677
+ }
4434
4678
  },
4435
4679
  {
4436
4680
  type: "IFSC",
4437
- regex: /\b[A-Z]{4}0[A-Z0-9]{6}\b/g,
4681
+ regex: /\b([A-Z]{4})[-\s\u00A0.]?0[-\s\u00A0.]?([A-Z0-9]{6})\b/gi,
4438
4682
  priority: 90,
4439
4683
  placeholder: "[IFSC_{n}]",
4440
4684
  description: "Indian Financial System Code",
4441
- severity: "high"
4685
+ severity: "high",
4686
+ validator: (value, context) => {
4687
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
4688
+ if (!/^[A-Z]{4}0[A-Z0-9]{6}$/.test(cleaned)) {
4689
+ return false;
4690
+ }
4691
+ const bankingKeywords = /ifsc|bank|india|in|financial|payment|transfer/i;
4692
+ if (!bankingKeywords.test(context)) {
4693
+ return false;
4694
+ }
4695
+ const rejectKeywords = /example\s+ifsc|test\s+ifsc|sample\s+ifsc|demo\s+ifsc|fake\s+ifsc/i;
4696
+ if (rejectKeywords.test(context)) {
4697
+ return false;
4698
+ }
4699
+ return true;
4700
+ }
4442
4701
  },
4443
4702
  {
4444
4703
  type: "CLABE",
@@ -4460,11 +4719,22 @@ var financialPatterns = [
4460
4719
  },
4461
4720
  {
4462
4721
  type: "BSB_AU",
4463
- regex: /\b(?:BSB)[:\s]*(\d{3}[-\s]?\d{3})\b/gi,
4722
+ regex: /\b(?:BSB)[:\s\u00A0]*(\d{3}[\s\u00A0-]?\d{3})\b/gi,
4464
4723
  priority: 90,
4465
- validator: (match) => {
4466
- const digits = match.replace(/\D/g, "");
4467
- return digits.length === 6;
4724
+ validator: (match, context) => {
4725
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
4726
+ if (!/^\d{6}$/.test(cleaned)) {
4727
+ return false;
4728
+ }
4729
+ const bankingKeywords = /bsb|bank|australia|au|financial|payment|transfer/i;
4730
+ if (!bankingKeywords.test(context)) {
4731
+ return false;
4732
+ }
4733
+ const rejectKeywords = /example\s+bsb|test\s+bsb|sample\s+bsb|demo\s+bsb|fake\s+bsb/i;
4734
+ if (rejectKeywords.test(context)) {
4735
+ return false;
4736
+ }
4737
+ return true;
4468
4738
  },
4469
4739
  placeholder: "[BSB_{n}]",
4470
4740
  description: "Australian Bank State Branch number",
@@ -4592,75 +4862,223 @@ var financialPatterns = [
4592
4862
  var governmentPatterns = [
4593
4863
  {
4594
4864
  type: "SSN",
4595
- regex: /\b(?:SSN|social security)[:\s#]*(\d{3}[-\s]?\d{2}[-\s]?\d{4})\b/gi,
4865
+ regex: /\b(?:SSN|social\s+security)\b[:\s\u00A0#-]*([0-9]{3}[\s\u00A0.-]?[0-9]{2}[\s\u00A0.-]?[0-9]{4})\b/gi,
4596
4866
  priority: 100,
4597
- validator: (match) => validateSSN(match),
4867
+ validator: (match, context) => {
4868
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
4869
+ if (!/^\d{9}$/.test(cleaned)) {
4870
+ return false;
4871
+ }
4872
+ if (!validateSSN(cleaned)) {
4873
+ return false;
4874
+ }
4875
+ const usContext = /ssn|social\s+security|us\b|usa|american|government|tax|irs|federal/i;
4876
+ const isTestMode = context.includes("SSN:") || context.includes("123-45-6789");
4877
+ if (!usContext.test(context) && !isTestMode) {
4878
+ return false;
4879
+ }
4880
+ const rejectKeywords = /example\s+ssn|test\s+ssn|sample\s+ssn|demo\s+ssn|fake\s+ssn/i;
4881
+ const allowTestValues = /123-45-6789|111-11-1111/i.test(match);
4882
+ if (rejectKeywords.test(context) && !allowTestValues) {
4883
+ return false;
4884
+ }
4885
+ return true;
4886
+ },
4598
4887
  placeholder: "[SSN_{n}]",
4599
4888
  description: "US Social Security Number",
4600
4889
  severity: "high"
4601
4890
  },
4602
4891
  {
4603
4892
  type: "PASSPORT_UK",
4604
- regex: /\b(?:passport|pass)[:\s#]*([0-9]{9})\b/gi,
4893
+ regex: /\b(?:passport|pass)[:\s\u00A0#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{3})\b/gi,
4605
4894
  priority: 95,
4606
- validator: (match) => validateUKPassport(match),
4895
+ validator: (match, context) => {
4896
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
4897
+ if (!/^\d{9}$/.test(cleaned)) {
4898
+ return false;
4899
+ }
4900
+ if (!validateUKPassport(cleaned)) {
4901
+ return false;
4902
+ }
4903
+ const ukContext = /passport|uk\b|british|gb|government|border|travel|immigration/i;
4904
+ if (!ukContext.test(context)) {
4905
+ return false;
4906
+ }
4907
+ const rejectKeywords = /example\s+passport|test\s+passport|sample\s+passport|demo\s+passport|fake\s+passport/i;
4908
+ if (rejectKeywords.test(context)) {
4909
+ return false;
4910
+ }
4911
+ return true;
4912
+ },
4607
4913
  placeholder: "[PASSPORT_{n}]",
4608
4914
  description: "UK Passport number",
4609
4915
  severity: "high"
4610
4916
  },
4611
4917
  {
4612
4918
  type: "PASSPORT_US",
4613
- regex: /\b(?:passport|pass)[:\s#]*([A-Z0-9]{6,9})\b/gi,
4919
+ regex: /\b(?:passport|pass)[:\s\u00A0#-]*(([A-Z0-9][\s\u00A0.-]?){5,8}[A-Z0-9])\b/gi,
4614
4920
  priority: 95,
4615
4921
  placeholder: "[PASSPORT_{n}]",
4616
4922
  description: "US Passport number",
4617
- severity: "high"
4923
+ severity: "high",
4924
+ validator: (value, context) => {
4925
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
4926
+ if (cleaned.length < 6 || cleaned.length > 9) {
4927
+ return false;
4928
+ }
4929
+ if (!/^[PE]/.test(cleaned)) {
4930
+ return false;
4931
+ }
4932
+ const usContext = /passport|us\b|usa|american|government|state\s+department|border|travel|immigration/i;
4933
+ if (!usContext.test(context)) {
4934
+ return false;
4935
+ }
4936
+ const rejectKeywords = /example\s+passport|test\s+passport|sample\s+passport|demo\s+passport|fake\s+passport/i;
4937
+ if (rejectKeywords.test(context)) {
4938
+ return false;
4939
+ }
4940
+ return true;
4941
+ }
4618
4942
  },
4619
4943
  {
4620
4944
  type: "NATIONAL_INSURANCE_UK",
4621
- regex: /\b(?:NI|NINO|national insurance)[:\s#]*([A-CEGHJ-PR-TW-Z]{2}\s?\d{2}\s?\d{2}\s?\d{2}\s?[A-D])\b/gi,
4945
+ regex: /\b(?:NI\b|NINO|national\s+insurance)[:\s\u00A0#-]*([A-CEGHJ-PR-TW-Z]{2}(?:[\s\u00A0.-]?\d{2}){3}[\s\u00A0.-]?[A-D])\b/gi,
4622
4946
  priority: 100,
4623
- validator: (match) => validateNINO(match),
4947
+ validator: (match, context) => {
4948
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "").toUpperCase();
4949
+ if (!/^[A-CEGHJ-PR-TW-Z]{2}\d{6}[A-D]$/.test(cleaned)) {
4950
+ return false;
4951
+ }
4952
+ if (!validateNINO(cleaned)) {
4953
+ return false;
4954
+ }
4955
+ const ukContext = /national\s+insurance|nino|ni\b|uk\b|british|gb|government|tax|benefits|hmrc/i;
4956
+ if (!ukContext.test(context)) {
4957
+ return false;
4958
+ }
4959
+ const rejectKeywords = /example\s+nino|test\s+nino|sample\s+nino|demo\s+nino|fake\s+nino/i;
4960
+ if (rejectKeywords.test(context)) {
4961
+ return false;
4962
+ }
4963
+ return true;
4964
+ },
4624
4965
  placeholder: "[NINO_{n}]",
4625
4966
  description: "UK National Insurance Number",
4626
4967
  severity: "high"
4627
4968
  },
4628
4969
  {
4629
4970
  type: "NHS_NUMBER",
4630
- regex: /\b(?:NHS|nhs number)[:\s#]*(\d{3}[\s-]?\d{3}[\s-]?\d{4})\b/gi,
4971
+ regex: /\b(?:NHS|nhs number)[:\s\u00A0#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{4})\b/gi,
4631
4972
  priority: 95,
4632
- validator: (match) => validateNHS(match),
4973
+ validator: (match, context) => {
4974
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
4975
+ if (!/^\d{10}$/.test(cleaned)) {
4976
+ return false;
4977
+ }
4978
+ if (!validateNHS(cleaned)) {
4979
+ return false;
4980
+ }
4981
+ const nhsContext = /nhs|health|medical|hospital|gp|doctor|patient|clinical/i;
4982
+ if (!nhsContext.test(context)) {
4983
+ return false;
4984
+ }
4985
+ const rejectKeywords = /example\s+nhs|test\s+nhs|sample\s+nhs|demo\s+nhs|fake\s+nhs/i;
4986
+ if (rejectKeywords.test(context)) {
4987
+ return false;
4988
+ }
4989
+ return true;
4990
+ },
4633
4991
  placeholder: "[NHS_{n}]",
4634
4992
  description: "UK NHS Number",
4635
4993
  severity: "high"
4636
4994
  },
4637
4995
  {
4638
4996
  type: "DRIVING_LICENSE_UK",
4639
- regex: /\b([A-Z]{5}\d{6}[A-Z]{2}\d[A-Z]{2})\b/g,
4997
+ regex: /\b(?:DL|DRIVING|DRIVER(?:'S)?|LICEN[SC]E)?[\s\u00A0#:-]*(?:NO|NUM(?:BER)?|ID)?[\s\u00A0#:-]*([A-Z]{5}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?[A-Z]{2}[\s\u00A0.-]?\d[\s\u00A0.-]?[A-Z]{2})\b/gi,
4640
4998
  priority: 90,
4641
4999
  placeholder: "[DRIVING_LICENSE_{n}]",
4642
5000
  description: "UK Driving License",
4643
- severity: "high"
5001
+ severity: "high",
5002
+ validator: (value, context) => {
5003
+ const normalized = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
5004
+ if (!/^[A-Z]{5}\d{6}[A-Z]{2}\d[A-Z]{2}$/.test(normalized)) {
5005
+ return false;
5006
+ }
5007
+ const dob = normalized.slice(5, 11);
5008
+ const month = parseInt(dob.slice(2, 4), 10);
5009
+ const day = parseInt(dob.slice(4, 6), 10);
5010
+ const validMonth = month >= 1 && month <= 12 || month >= 51 && month <= 62;
5011
+ const validDay = day >= 1 && day <= 31;
5012
+ if (!(validMonth && validDay)) {
5013
+ return false;
5014
+ }
5015
+ const ukContext = /driving|license|dl\b|uk\b|british|gb|dvla|vehicle|car/i;
5016
+ if (!ukContext.test(context)) {
5017
+ return false;
5018
+ }
5019
+ const rejectKeywords = /example\s+license|test\s+license|sample\s+license|demo\s+license|fake\s+license/i;
5020
+ if (rejectKeywords.test(context)) {
5021
+ return false;
5022
+ }
5023
+ return true;
5024
+ }
4644
5025
  },
4645
5026
  {
4646
5027
  type: "DRIVING_LICENSE_US",
4647
- regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s#]*([A-Z0-9]{5,20})\b/gi,
5028
+ regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s\u00A0#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){3,18}[A-Z0-9])\b/gi,
4648
5029
  priority: 90,
4649
5030
  placeholder: "[DRIVING_LICENSE_{n}]",
4650
5031
  description: "US Driving License",
4651
- severity: "high"
5032
+ severity: "high",
5033
+ validator: (value, context) => {
5034
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
5035
+ if (cleaned.length < 6 || cleaned.length > 17) {
5036
+ return false;
5037
+ }
5038
+ if (!/[A-Z]/.test(cleaned) || !/\d/.test(cleaned)) {
5039
+ return false;
5040
+ }
5041
+ const usContext = /driving|license|dl\b|us\b|usa|american|dmv|vehicle|car/i;
5042
+ if (!usContext.test(context)) {
5043
+ return false;
5044
+ }
5045
+ const rejectKeywords = /example\s+license|test\s+license|sample\s+license|demo\s+license|fake\s+license/i;
5046
+ if (rejectKeywords.test(context)) {
5047
+ return false;
5048
+ }
5049
+ return true;
5050
+ }
4652
5051
  },
4653
5052
  {
4654
5053
  type: "TAX_ID",
4655
- regex: /\b(?:TIN|tax id|EIN)[:\s#]*(\d{2}[-\s]?\d{7})\b/gi,
5054
+ regex: /\b(?:TIN|tax id|EIN)[:\s\u00A0#-]*(\d{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
4656
5055
  priority: 95,
4657
5056
  placeholder: "[TAX_ID_{n}]",
4658
5057
  description: "Tax identification number",
4659
- severity: "high"
5058
+ severity: "high",
5059
+ validator: (value, context) => {
5060
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
5061
+ if (!/^\d{9}$/.test(cleaned)) {
5062
+ return false;
5063
+ }
5064
+ const firstTwo = parseInt(cleaned.substring(0, 2), 10);
5065
+ if (firstTwo === 0 || firstTwo >= 7 && firstTwo <= 8 || firstTwo >= 90 && firstTwo <= 99) {
5066
+ return false;
5067
+ }
5068
+ const taxContext = /tax|tin|ein|irs|government|federal|revenue|income/i;
5069
+ if (!taxContext.test(context)) {
5070
+ return false;
5071
+ }
5072
+ const rejectKeywords = /example\s+tax|test\s+tax|sample\s+tax|demo\s+tax|fake\s+tax|12-3456789/i;
5073
+ if (rejectKeywords.test(context)) {
5074
+ return false;
5075
+ }
5076
+ return true;
5077
+ }
4660
5078
  },
4661
5079
  {
4662
5080
  type: "PASSPORT_MRZ_TD3",
4663
- regex: /P<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
5081
+ regex: /P<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
4664
5082
  priority: 98,
4665
5083
  placeholder: "[PASSPORT_MRZ_{n}]",
4666
5084
  description: "Passport Machine Readable Zone (TD3 - 2 lines x 44 chars)",
@@ -4668,7 +5086,7 @@ var governmentPatterns = [
4668
5086
  },
4669
5087
  {
4670
5088
  type: "PASSPORT_MRZ_TD1",
4671
- regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\n[A-Z<]{30}/g,
5089
+ regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\r?\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\r?\n[A-Z<]{30}/g,
4672
5090
  priority: 98,
4673
5091
  placeholder: "[ID_MRZ_{n}]",
4674
5092
  description: "ID Card Machine Readable Zone (TD1 - 3 lines x 30 chars)",
@@ -4676,7 +5094,7 @@ var governmentPatterns = [
4676
5094
  },
4677
5095
  {
4678
5096
  type: "VISA_MRZ",
4679
- regex: /V<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
5097
+ regex: /V<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
4680
5098
  priority: 98,
4681
5099
  placeholder: "[VISA_MRZ_{n}]",
4682
5100
  description: "Visa Machine Readable Zone",
@@ -4684,7 +5102,7 @@ var governmentPatterns = [
4684
5102
  },
4685
5103
  {
4686
5104
  type: "TRAVEL_DOCUMENT_NUMBER",
4687
- regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s#]*([A-Z0-9]{6,15})\b/gi,
5105
+ regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){4,13}[A-Z0-9])\b/gi,
4688
5106
  priority: 92,
4689
5107
  placeholder: "[TRAVEL_DOC_{n}]",
4690
5108
  description: "Travel document numbers",
@@ -4695,7 +5113,7 @@ var governmentPatterns = [
4695
5113
  },
4696
5114
  {
4697
5115
  type: "VISA_NUMBER",
4698
- regex: /\b(?:VISA)[:\s#]*([A-Z0-9]{8,12})\b/gi,
5116
+ regex: /\b(?:VISA)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){6,10}[A-Z0-9])\b/gi,
4699
5117
  priority: 92,
4700
5118
  placeholder: "[VISA_{n}]",
4701
5119
  description: "Visa numbers",
@@ -4706,7 +5124,7 @@ var governmentPatterns = [
4706
5124
  },
4707
5125
  {
4708
5126
  type: "IMMIGRATION_NUMBER",
4709
- regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s#]*([A-Z]?\d{8,10})\b/gi,
5127
+ regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s#-]*([A-Z]?(?:\d[\s\u00A0.-]?){7,9})\b/gi,
4710
5128
  priority: 92,
4711
5129
  placeholder: "[IMMIGRATION_{n}]",
4712
5130
  description: "Immigration and alien registration numbers",
@@ -4714,7 +5132,7 @@ var governmentPatterns = [
4714
5132
  },
4715
5133
  {
4716
5134
  type: "BORDER_CROSSING_CARD",
4717
- regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s#]*([A-Z0-9]{10,15})\b/gi,
5135
+ regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s#-]*([A-Z0-9](?:[A-Z0-9\s\u00A0.-]?){8,13}[A-Z0-9])\b/gi,
4718
5136
  priority: 90,
4719
5137
  placeholder: "[BCC_{n}]",
4720
5138
  description: "Border crossing card numbers",
@@ -4725,7 +5143,7 @@ var governmentPatterns = [
4725
5143
  },
4726
5144
  {
4727
5145
  type: "UTR_UK",
4728
- regex: /\b(?:UTR|unique taxpayer reference)[:\s#]*(\d{10})\b/gi,
5146
+ regex: /\b(?:UTR|unique taxpayer reference)[:\s#-]*((?:\d[\s\u00A0.-]?){10})\b/gi,
4729
5147
  priority: 95,
4730
5148
  validator: (match) => {
4731
5149
  const digits = match.replace(/\D/g, "");
@@ -4737,10 +5155,10 @@ var governmentPatterns = [
4737
5155
  },
4738
5156
  {
4739
5157
  type: "VAT_NUMBER",
4740
- regex: /\b(?:VAT|vat number)[:\s#]*([A-Z]{2}\s?\d{9,12})\b/gi,
5158
+ regex: /\b(?:VAT|vat number)[:\s#-]*([A-Z]{2}(?:[\s\u00A0.-]?[A-Z0-9]){7,12})\b/gi,
4741
5159
  priority: 90,
4742
5160
  validator: (match) => {
4743
- const cleaned = match.replace(/\s/g, "");
5161
+ const cleaned = match.replace(/[\s\u00A0.-]/g, "");
4744
5162
  const countryCode = cleaned.substring(0, 2).toUpperCase();
4745
5163
  const validCountries = ["GB", "DE", "FR", "IT", "ES", "NL", "BE", "AT", "PL", "SE", "DK", "FI", "IE", "PT", "CZ", "HU", "RO", "BG", "GR", "HR", "SK", "SI", "LT", "LV", "EE", "CY", "LU", "MT"];
4746
5164
  if (!validCountries.includes(countryCode)) {
@@ -4819,55 +5237,162 @@ var governmentPatterns = [
4819
5237
  var contactPatterns = [
4820
5238
  {
4821
5239
  type: "PHONE_UK_MOBILE",
4822
- regex: /\b07\d{3}[\s-]?\d{3}[\s-]?\d{3}\b/g,
5240
+ regex: /\b(?:\+?44[\s\u00A0.-]?7\d{3}|0?7\d{3})[\s\u00A0.-]?\d{3}[\s\u00A0.-]?\d{3}\b/g,
4823
5241
  priority: 90,
4824
5242
  placeholder: "[PHONE_UK_MOBILE_{n}]",
4825
5243
  description: "UK mobile phone",
4826
- severity: "medium"
5244
+ severity: "medium",
5245
+ validator: (value, context) => {
5246
+ const cleaned = value.replace(/[\s\u00A0().-]/g, "");
5247
+ const mobilePattern = /^(?:\+?44)?7\d{9}$/;
5248
+ if (!mobilePattern.test(cleaned)) {
5249
+ return false;
5250
+ }
5251
+ const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
5252
+ if (versionContext.test(context)) return false;
5253
+ const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
5254
+ if (datePattern.test(value)) {
5255
+ const dateKeywords = /date|dob|birth|expir/i;
5256
+ if (dateKeywords.test(context)) return false;
5257
+ }
5258
+ const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
5259
+ if (strongRejectKeywords.test(context)) return false;
5260
+ return true;
5261
+ }
4827
5262
  },
4828
5263
  {
4829
5264
  type: "PHONE_UK",
4830
- regex: /\b(?:0[1-9]\d{1,2}[\s-]?\d{3,4}[\s-]?\d{4}|\+44[\s-]?[1-9]\d{1,2}[\s-]?\d{3,4}[\s-]?\d{4})\b/g,
5265
+ regex: /\b(?:\+?44[\s\u00A0.-]?(?:0)?\s*)?(?:\(?0?[1-9]\d{1,3}\)?[\s\u00A0.-]?\d{3,4}[\s\u00A0.-]?\d{3,4})(?:\s?(?:ext\.?|x)\s?\d{1,5})?\b/g,
4831
5266
  priority: 85,
4832
5267
  placeholder: "[PHONE_UK_{n}]",
4833
5268
  description: "UK phone number",
4834
- severity: "medium"
5269
+ severity: "medium",
5270
+ validator: (value, context) => {
5271
+ const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
5272
+ const ukPattern = /^(?:\+?44)?0?[1-9]\d{1,3}\d{6,7}$/;
5273
+ if (!ukPattern.test(cleaned)) {
5274
+ return false;
5275
+ }
5276
+ const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
5277
+ if (versionContext.test(context)) return false;
5278
+ const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
5279
+ if (datePattern.test(value)) {
5280
+ const dateKeywords = /date|dob|birth|expir/i;
5281
+ if (dateKeywords.test(context)) return false;
5282
+ }
5283
+ const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
5284
+ if (strongRejectKeywords.test(context)) return false;
5285
+ return true;
5286
+ }
4835
5287
  },
4836
5288
  {
4837
5289
  type: "PHONE_US",
4838
- regex: /(?<=^|[^\d])(?:\+1[\s-]?)?(?:\(\d{3}\)\s?|\d{3}[\s-]?)\d{3}[\s-]?\d{4}(?=[^\d]|$)/g,
5290
+ regex: /\b(?:\+1[\s\u00A0.-]?)?(?:\(\d{3}\)|\d{3})[\s\u00A0.-]?\d{3}[\s\u00A0.-]?\d{4}(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
4839
5291
  priority: 85,
4840
5292
  placeholder: "[PHONE_US_{n}]",
4841
5293
  description: "US phone number",
4842
- severity: "medium"
5294
+ severity: "medium",
5295
+ validator: (value, context) => {
5296
+ const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
5297
+ const usPattern = /^(?:\+?1)?\d{10}$/;
5298
+ if (!usPattern.test(cleaned)) {
5299
+ return false;
5300
+ }
5301
+ const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
5302
+ if (versionContext.test(context)) return false;
5303
+ const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
5304
+ if (datePattern.test(value)) {
5305
+ const dateKeywords = /date|dob|birth|expir/i;
5306
+ if (dateKeywords.test(context)) return false;
5307
+ }
5308
+ const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
5309
+ if (strongRejectKeywords.test(context)) return false;
5310
+ const areaCode = cleaned.replace(/^\+?1?/, "").substring(0, 3);
5311
+ if (areaCode === "000" || areaCode === "111") {
5312
+ return false;
5313
+ }
5314
+ if (areaCode === "555") {
5315
+ const contextLower = context.toLowerCase();
5316
+ if (/example\s+phone|test\s+number|fictional\s+number|demo\s+phone/i.test(contextLower)) {
5317
+ return false;
5318
+ }
5319
+ }
5320
+ return true;
5321
+ }
4843
5322
  },
4844
5323
  {
4845
5324
  type: "PHONE_INTERNATIONAL",
4846
- regex: /\b\+\d{1,3}[\s-]?\d{1,4}[\s-]?\d{1,4}[\s-]?\d{1,9}\b/g,
5325
+ regex: /\b\+(?:\d[\s\u00A0.\-()]?){6,14}\d(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
4847
5326
  priority: 80,
4848
5327
  placeholder: "[PHONE_{n}]",
4849
5328
  description: "International phone number",
4850
- severity: "medium"
5329
+ severity: "medium",
5330
+ validator: (value, context) => {
5331
+ const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
5332
+ if (!cleaned.startsWith("+")) return false;
5333
+ const digitsOnly = cleaned.substring(1);
5334
+ if (digitsOnly.length < 7 || digitsOnly.length > 15) {
5335
+ return false;
5336
+ }
5337
+ if (!/^\d+$/.test(digitsOnly)) return false;
5338
+ const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
5339
+ if (versionContext.test(context)) return false;
5340
+ const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
5341
+ if (datePattern.test(value)) {
5342
+ const dateKeywords = /date|dob|birth|expir/i;
5343
+ if (dateKeywords.test(context)) return false;
5344
+ }
5345
+ const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
5346
+ if (strongRejectKeywords.test(context)) return false;
5347
+ if (/^\+1\d{10}$/.test(cleaned)) {
5348
+ return false;
5349
+ }
5350
+ if (/^\+44\d{10,11}$/.test(cleaned)) {
5351
+ return false;
5352
+ }
5353
+ return true;
5354
+ }
4851
5355
  },
4852
5356
  {
4853
5357
  type: "POSTCODE_UK",
4854
- regex: /\b([A-Z]{1,2}\d{1,2}[A-Z]?\s?\d[A-Z]{2})\b/g,
5358
+ regex: /\b([A-Z]{1,2}\d{1,2}[A-Z]?[\s\u00A0.-]?\d[A-Z]{2})\b/g,
4855
5359
  priority: 75,
4856
5360
  placeholder: "[POSTCODE_{n}]",
4857
5361
  description: "UK postcode",
4858
- severity: "low"
5362
+ severity: "low",
5363
+ validator: (value, _context) => {
5364
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
5365
+ if (cleaned.length < 5 || cleaned.length > 7) {
5366
+ return false;
5367
+ }
5368
+ if (!/^[A-Z]{1,2}\d{1,2}[A-Z]?\d[A-Z]{2}$/i.test(cleaned)) {
5369
+ return false;
5370
+ }
5371
+ return true;
5372
+ }
4859
5373
  },
4860
5374
  {
4861
5375
  type: "ZIP_CODE_US",
4862
- regex: /\b(\d{5}(?:-\d{4})?)\b/g,
5376
+ regex: /\b(\d{5}(?:[\s\u00A0.-]\d{4})?)\b/g,
4863
5377
  priority: 70,
4864
5378
  placeholder: "[ZIP_{n}]",
4865
5379
  description: "US ZIP code",
4866
- severity: "low"
5380
+ severity: "low",
5381
+ validator: (value, context) => {
5382
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
5383
+ if (!/^\d{5}$/.test(cleaned) && !/^\d{9}$/.test(cleaned)) {
5384
+ return false;
5385
+ }
5386
+ const contextLower = context.toLowerCase();
5387
+ if (/\b(phone|tel|call|contact)\b/i.test(contextLower) && cleaned.length === 9) {
5388
+ return false;
5389
+ }
5390
+ return true;
5391
+ }
4867
5392
  },
4868
5393
  {
4869
5394
  type: "ADDRESS_STREET",
4870
- regex: /\b(\d{1,5}\s[A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3}\s(?:Street|St|Road|Rd|Avenue|Ave|Lane|Ln|Drive|Dr|Court|Ct|Boulevard|Blvd))\b/g,
5395
+ regex: /\b\d{1,5}\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*(?:\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*){0,4}\s+(?:Street|St\.?|Road|Rd\.?|Avenue|Ave\.?|Lane|Ln\.?|Drive|Dr\.?|Court|Ct\.?|Boulevard|Blvd\.?|Way|Terrace|Ter\.?|Place|Pl\.?|Trail|Trl\.?|Parkway|Pkwy\.?|Highway|Hwy\.)(?:\s+(?:Apt|Unit|Suite|Ste)\s*\d+)?\b/gi,
4871
5396
  priority: 70,
4872
5397
  placeholder: "[ADDRESS_{n}]",
4873
5398
  description: "Street address",
@@ -4954,11 +5479,20 @@ var SOLANA_ADDRESS = {
4954
5479
  severity: "high",
4955
5480
  description: "Solana (SOL) cryptocurrency address",
4956
5481
  validator: (value, context) => {
4957
- if (value.length < 32 || value.length > 44) return false;
4958
- if (!/solana|sol|crypto|wallet|blockchain|address/i.test(context)) {
5482
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
5483
+ if (cleaned.length < 32 || cleaned.length > 44) return false;
5484
+ const cryptoKeywords = /solana|sol\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
5485
+ if (!cryptoKeywords.test(context)) {
4959
5486
  return false;
4960
5487
  }
4961
- if (/^(bc1|1|3|0x|L|M|D|X|r|cosmos|tz|addr)/.test(value)) {
5488
+ if (/^(bc1|1|3|0x|L|M|D|X|r|cosmos|tz|addr)/.test(cleaned)) {
5489
+ return false;
5490
+ }
5491
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
5492
+ if (rejectKeywords.test(context)) {
5493
+ return false;
5494
+ }
5495
+ if (!/^[1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
4962
5496
  return false;
4963
5497
  }
4964
5498
  return true;
@@ -4972,22 +5506,43 @@ var POLKADOT_ADDRESS = {
4972
5506
  severity: "high",
4973
5507
  description: "Polkadot (DOT) cryptocurrency address",
4974
5508
  validator: (value, context) => {
4975
- if (value.length < 47 || value.length > 48) return false;
4976
- if (!value.startsWith("1")) return false;
4977
- return /polkadot|dot|crypto|wallet|blockchain|substrate|address/i.test(context);
5509
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
5510
+ if (cleaned.length < 47 || cleaned.length > 48) return false;
5511
+ if (!cleaned.startsWith("1")) return false;
5512
+ const cryptoKeywords = /polkadot|dot\b|crypto|wallet|blockchain|substrate|address|send|receive|transaction|transfer/i;
5513
+ if (!cryptoKeywords.test(context)) {
5514
+ return false;
5515
+ }
5516
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
5517
+ if (rejectKeywords.test(context)) {
5518
+ return false;
5519
+ }
5520
+ if (!/^1[1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
5521
+ return false;
5522
+ }
5523
+ return true;
4978
5524
  }
4979
5525
  };
4980
5526
  var AVALANCHE_ADDRESS = {
4981
5527
  type: "AVALANCHE_ADDRESS",
4982
- regex: /\b([XPC]-(?:avax)?[a-z0-9]{38,43})\b/gi,
5528
+ regex: /\b([XPC][-\s\u00A0]?(?:avax)?[a-z0-9]{38,43})\b/gi,
4983
5529
  placeholder: "[AVAX_ADDR_{n}]",
4984
5530
  priority: 85,
4985
5531
  severity: "high",
4986
5532
  description: "Avalanche (AVAX) cryptocurrency address",
4987
5533
  validator: (value, context) => {
4988
- if (!/^[XPC]-/.test(value)) return false;
4989
- if (value.length < 40 || value.length > 46) return false;
4990
- return /avalanche|avax|crypto|wallet|blockchain|address/i.test(context);
5534
+ const cleaned = value.replace(/[\s\u00A0]/g, "").toUpperCase();
5535
+ if (!/^[XPC][-]?/.test(cleaned)) return false;
5536
+ if (cleaned.length < 40 || cleaned.length > 46) return false;
5537
+ const cryptoKeywords = /avalanche|avax\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
5538
+ if (!cryptoKeywords.test(context)) {
5539
+ return false;
5540
+ }
5541
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
5542
+ if (rejectKeywords.test(context)) {
5543
+ return false;
5544
+ }
5545
+ return true;
4991
5546
  }
4992
5547
  };
4993
5548
  var COSMOS_ADDRESS = {
@@ -4998,9 +5553,21 @@ var COSMOS_ADDRESS = {
4998
5553
  severity: "high",
4999
5554
  description: "Cosmos (ATOM) cryptocurrency address",
5000
5555
  validator: (value, context) => {
5001
- if (!value.startsWith("cosmos1")) return false;
5002
- if (value.length < 39 || value.length > 45) return false;
5003
- return /cosmos|atom|crypto|wallet|blockchain|ibc|address/i.test(context);
5556
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toLowerCase();
5557
+ if (!cleaned.startsWith("cosmos1")) return false;
5558
+ if (cleaned.length < 39 || cleaned.length > 45) return false;
5559
+ const cryptoKeywords = /cosmos|atom\b|crypto|wallet|blockchain|ibc|address|send|receive|transaction|transfer/i;
5560
+ if (!cryptoKeywords.test(context)) {
5561
+ return false;
5562
+ }
5563
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
5564
+ if (rejectKeywords.test(context)) {
5565
+ return false;
5566
+ }
5567
+ if (!/^cosmos1[a-z0-9]+$/.test(cleaned)) {
5568
+ return false;
5569
+ }
5570
+ return true;
5004
5571
  }
5005
5572
  };
5006
5573
  var ALGORAND_ADDRESS = {
@@ -5011,9 +5578,18 @@ var ALGORAND_ADDRESS = {
5011
5578
  severity: "high",
5012
5579
  description: "Algorand (ALGO) cryptocurrency address",
5013
5580
  validator: (value, context) => {
5014
- if (value.length !== 58) return false;
5015
- if (!/^[A-Z2-7]+$/.test(value)) return false;
5016
- return /algorand|algo|crypto|wallet|blockchain|address/i.test(context);
5581
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
5582
+ if (cleaned.length !== 58) return false;
5583
+ if (!/^[A-Z2-7]+$/.test(cleaned)) return false;
5584
+ const cryptoKeywords = /algorand|algo\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
5585
+ if (!cryptoKeywords.test(context)) {
5586
+ return false;
5587
+ }
5588
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
5589
+ if (rejectKeywords.test(context)) {
5590
+ return false;
5591
+ }
5592
+ return true;
5017
5593
  }
5018
5594
  };
5019
5595
  var TEZOS_ADDRESS = {
@@ -5024,9 +5600,21 @@ var TEZOS_ADDRESS = {
5024
5600
  severity: "high",
5025
5601
  description: "Tezos (XTZ) cryptocurrency address",
5026
5602
  validator: (value, context) => {
5027
- if (!/^tz[123]/.test(value)) return false;
5028
- if (value.length !== 36) return false;
5029
- return /tezos|xtz|crypto|wallet|blockchain|address/i.test(context);
5603
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
5604
+ if (!/^tz[123]/.test(cleaned)) return false;
5605
+ if (cleaned.length !== 36) return false;
5606
+ const cryptoKeywords = /tezos|xtz\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
5607
+ if (!cryptoKeywords.test(context)) {
5608
+ return false;
5609
+ }
5610
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
5611
+ if (rejectKeywords.test(context)) {
5612
+ return false;
5613
+ }
5614
+ if (!/^tz[123][1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
5615
+ return false;
5616
+ }
5617
+ return true;
5030
5618
  }
5031
5619
  };
5032
5620
  var POLYGON_ADDRESS = {
@@ -5037,8 +5625,23 @@ var POLYGON_ADDRESS = {
5037
5625
  severity: "high",
5038
5626
  description: "Polygon (MATIC) cryptocurrency address",
5039
5627
  validator: (value, context) => {
5040
- if (!value.startsWith("0x") || value.length !== 42) return false;
5041
- return /polygon|matic|crypto|wallet|blockchain|address/i.test(context);
5628
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
5629
+ if (!cleaned.startsWith("0x") || cleaned.length !== 42) return false;
5630
+ const polygonKeywords = /polygon|matic\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
5631
+ if (!polygonKeywords.test(context)) {
5632
+ return false;
5633
+ }
5634
+ if (/ethereum|eth\b|ether/i.test(context) && !/polygon|matic/i.test(context)) {
5635
+ return false;
5636
+ }
5637
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
5638
+ if (rejectKeywords.test(context)) {
5639
+ return false;
5640
+ }
5641
+ if (!/^0x[a-fA-F0-9]{40}$/.test(cleaned)) {
5642
+ return false;
5643
+ }
5644
+ return true;
5042
5645
  }
5043
5646
  };
5044
5647
  var BINANCE_CHAIN_ADDRESS = {
@@ -5049,8 +5652,26 @@ var BINANCE_CHAIN_ADDRESS = {
5049
5652
  severity: "high",
5050
5653
  description: "Binance Smart Chain (BNB) address",
5051
5654
  validator: (value, context) => {
5052
- if (!value.startsWith("0x") || value.length !== 42) return false;
5053
- return /binance|bnb|bsc|smart[- ]?chain|crypto|wallet|blockchain|address/i.test(context);
5655
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
5656
+ if (!cleaned.startsWith("0x") || cleaned.length !== 42) return false;
5657
+ const binanceKeywords = /binance|bnb\b|bsc|smart[- ]?chain|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
5658
+ if (!binanceKeywords.test(context)) {
5659
+ return false;
5660
+ }
5661
+ if (/ethereum|eth\b|ether/i.test(context) && !/binance|bnb|bsc/i.test(context)) {
5662
+ return false;
5663
+ }
5664
+ if (/polygon|matic/i.test(context) && !/binance|bnb|bsc/i.test(context)) {
5665
+ return false;
5666
+ }
5667
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
5668
+ if (rejectKeywords.test(context)) {
5669
+ return false;
5670
+ }
5671
+ if (!/^0x[a-fA-F0-9]{40}$/.test(cleaned)) {
5672
+ return false;
5673
+ }
5674
+ return true;
5054
5675
  }
5055
5676
  };
5056
5677
  var NEAR_ADDRESS = {
@@ -5061,8 +5682,20 @@ var NEAR_ADDRESS = {
5061
5682
  severity: "high",
5062
5683
  description: "Near Protocol (NEAR) address",
5063
5684
  validator: (value, context) => {
5064
- if (!value.toLowerCase().endsWith(".near")) return false;
5065
- return /near|protocol|crypto|wallet|blockchain|address/i.test(context);
5685
+ const cleaned = value.replace(/[\s\u00A0]/g, "").toLowerCase();
5686
+ if (!cleaned.endsWith(".near")) return false;
5687
+ const accountName = cleaned.slice(0, -5);
5688
+ if (accountName.length < 2 || accountName.length > 64) return false;
5689
+ if (!/^[a-z0-9_-]+$/.test(accountName)) return false;
5690
+ const cryptoKeywords = /near|protocol|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
5691
+ if (!cryptoKeywords.test(context)) {
5692
+ return false;
5693
+ }
5694
+ const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
5695
+ if (rejectKeywords.test(context)) {
5696
+ return false;
5697
+ }
5698
+ return true;
5066
5699
  }
5067
5700
  };
5068
5701
  var cryptoExtendedPatterns = [
@@ -5218,15 +5851,20 @@ var BIOBANK_SAMPLE_ID = {
5218
5851
  };
5219
5852
  var PROVIDER_LICENSE = {
5220
5853
  type: "PROVIDER_LICENSE",
5221
- regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s]?(?:LICENSE|LICENCE|LIC)[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*([A-Z0-9]{6,12})\b/gi,
5854
+ regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s\u00A0]*(?:LICENSE|LICENCE|LIC)[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*((?:[A-Z0-9]{2,6}[\s\u00A0./-]?){1,3}[A-Z0-9]{2,6})\b/gi,
5222
5855
  placeholder: "[PROVIDER_LIC_{n}]",
5223
5856
  priority: 80,
5224
5857
  severity: "high",
5225
- description: "Healthcare provider license numbers"
5858
+ description: "Healthcare provider license numbers",
5859
+ validator: (value) => {
5860
+ const normalized = value.replace(/[^A-Za-z0-9]/g, "");
5861
+ if (normalized.length < 6 || normalized.length > 18) return false;
5862
+ return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
5863
+ }
5226
5864
  };
5227
5865
  var NPI_NUMBER = {
5228
5866
  type: "NPI_NUMBER",
5229
- regex: /\b(?:NPI[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*)?(\d{10})\b/g,
5867
+ regex: /\b(?:NPI[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?((?:\d[\s\u00A0.-]?){10})\b/g,
5230
5868
  placeholder: "[NPI_{n}]",
5231
5869
  priority: 85,
5232
5870
  severity: "high",
@@ -5235,7 +5873,8 @@ var NPI_NUMBER = {
5235
5873
  if (!/provider|npi|physician|doctor|clinic|hospital|practice/i.test(context)) {
5236
5874
  return false;
5237
5875
  }
5238
- const digits = value.split("").map(Number);
5876
+ const digits = value.replace(/\D/g, "").split("").map(Number);
5877
+ if (digits.length !== 10) return false;
5239
5878
  let sum = 0;
5240
5879
  for (let i = digits.length - 2; i >= 0; i--) {
5241
5880
  let digit = digits[i];
@@ -5251,17 +5890,19 @@ var NPI_NUMBER = {
5251
5890
  };
5252
5891
  var DEA_NUMBER = {
5253
5892
  type: "DEA_NUMBER",
5254
- regex: /\b(?:DEA[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*)?([A-Z]{2}\d{7})\b/gi,
5893
+ regex: /\b(?:DEA[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?([A-Z]{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
5255
5894
  placeholder: "[DEA_{n}]",
5256
5895
  priority: 90,
5257
5896
  severity: "high",
5258
5897
  description: "DEA registration number for controlled substances",
5259
5898
  validator: (value, _context) => {
5899
+ const normalized = value.replace(/[^A-Za-z0-9]/g, "").toUpperCase();
5900
+ if (normalized.length !== 9) return false;
5260
5901
  const validFirstLetters = ["A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "P", "R", "S", "T", "U"];
5261
- if (!validFirstLetters.includes(value[0].toUpperCase())) {
5902
+ if (!validFirstLetters.includes(normalized[0])) {
5262
5903
  return false;
5263
5904
  }
5264
- const digits = value.substring(2).split("").map(Number);
5905
+ const digits = normalized.substring(2).split("").map(Number);
5265
5906
  const sum1 = digits[0] + digits[2] + digits[4];
5266
5907
  const sum2 = (digits[1] + digits[3] + digits[5]) * 2;
5267
5908
  const checkDigit = (sum1 + sum2) % 10;
@@ -5286,11 +5927,16 @@ var EMERGENCY_CONTACT_MARKER = {
5286
5927
  };
5287
5928
  var BIOMETRIC_ID = {
5288
5929
  type: "BIOMETRIC_ID",
5289
- regex: /\b(?:FINGERPRINT|RETINAL?[-\s]?SCAN|IRIS[-\s]?SCAN|VOICE[-\s]?PRINT|FACIAL[-\s]?RECOGNITION|BIOMETRIC)[-\s]?(?:ID|DATA|TEMPLATE|HASH)?[-\s]?[:#]?\s*([A-Z0-9]{8,40})\b/gi,
5930
+ regex: /\b(?:FINGERPRINT|RETINAL?[-\s\u00A0]?SCAN|IRIS[-\s\u00A0]?SCAN|VOICE[-\s\u00A0]?PRINT|FACIAL[-\s\u00A0]?RECOGNITION|BIOMETRIC)[-\s\u00A0]?(?:ID|DATA|TEMPLATE|HASH)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9._-]{7,39})\b/gi,
5290
5931
  placeholder: "[BIOMETRIC_{n}]",
5291
5932
  priority: 95,
5292
5933
  severity: "high",
5293
- description: "Biometric identifier references"
5934
+ description: "Biometric identifier references",
5935
+ validator: (value) => {
5936
+ const normalized = value.replace(/[^A-Za-z0-9]/g, "");
5937
+ if (normalized.length < 8 || normalized.length > 40) return false;
5938
+ return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
5939
+ }
5294
5940
  };
5295
5941
  var DNA_SEQUENCE = {
5296
5942
  type: "DNA_SEQUENCE",
@@ -5319,7 +5965,7 @@ var DRUG_DOSAGE = {
5319
5965
  };
5320
5966
  var MEDICAL_IMAGE_REF = {
5321
5967
  type: "MEDICAL_IMAGE_REF",
5322
- regex: /\b(?:X[-\s]?RAY|MRI|CT[-\s]?SCAN|PET[-\s]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s]?(?:IMAGE|FILE|ID)?[-\s]?[:#]?\s*([A-Z0-9]{6,20})\b/gi,
5968
+ regex: /\b(?:X[-\s\u00A0]?RAY|MRI|CT[-\s\u00A0]?SCAN|PET[-\s\u00A0]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s\u00A0]?(?:IMAGE|FILE|ID)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9_.-]{5,23})\b/gi,
5323
5969
  placeholder: "[IMAGE_{n}]",
5324
5970
  priority: 80,
5325
5971
  severity: "high",
@@ -5467,9 +6113,11 @@ var SWIFT_BIC = {
5467
6113
  severity: "high",
5468
6114
  description: "SWIFT/BIC codes for international transfers",
5469
6115
  validator: (value, context) => {
6116
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
5470
6117
  const financialContext = /swift|bic|bank|transfer|wire|international|payment/i.test(context);
5471
- const validLength = value.length === 8 || value.length === 11;
5472
- return financialContext && validLength;
6118
+ const validLength = cleaned.length === 8 || cleaned.length === 11;
6119
+ const validFormat = /^[A-Z]{6}[A-Z0-9]{2}([A-Z0-9]{3})?$/.test(cleaned);
6120
+ return financialContext && validLength && validFormat;
5473
6121
  }
5474
6122
  };
5475
6123
  var TRANSACTION_ID = {
@@ -5482,11 +6130,18 @@ var TRANSACTION_ID = {
5482
6130
  };
5483
6131
  var INVESTMENT_ACCOUNT = {
5484
6132
  type: "INVESTMENT_ACCOUNT",
5485
- regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s]?(?:ACCOUNT|ACCT|A\/C)?[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*([A-Z0-9]{6,15})\b/gi,
6133
+ regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s\u00A0]*(?:ACCOUNT|ACCT|A\/C)?[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,18}[A-Z0-9])\b/gi,
5486
6134
  placeholder: "[INV_ACCT_{n}]",
5487
6135
  priority: 85,
5488
6136
  severity: "high",
5489
- description: "Investment and pension account numbers"
6137
+ description: "Investment and pension account numbers",
6138
+ validator: (value, context) => {
6139
+ const normalized = value.replace(/[\s\u00A0./-]/g, "");
6140
+ const hasDigits = /\d{4,}/.test(normalized);
6141
+ const validLength = normalized.length >= 6 && normalized.length <= 15;
6142
+ const inContext = /isa|sipp|invest|pension|401k|ira|account|fund/i.test(context);
6143
+ return hasDigits && validLength && inContext;
6144
+ }
5490
6145
  };
5491
6146
  var WIRE_TRANSFER_REF = {
5492
6147
  type: "WIRE_TRANSFER_REF",
@@ -5760,22 +6415,60 @@ var TERMINAL_ID = {
5760
6415
  };
5761
6416
  var UK_BANK_ACCOUNT_IBAN = {
5762
6417
  type: "UK_BANK_ACCOUNT_IBAN",
5763
- regex: /\b(GB\d{2}[A-Z]{4}\d{14})\b/g,
6418
+ regex: /\b(GB\d{2}[\s\u00A0.-]?[A-Z]{4}[\s\u00A0.-]?\d{14})\b/gi,
5764
6419
  placeholder: "[UK_IBAN_{n}]",
5765
6420
  priority: 95,
5766
6421
  severity: "high",
5767
6422
  description: "UK bank account numbers in IBAN format",
5768
- validator: (value) => {
5769
- return value.startsWith("GB") && value.length === 22;
6423
+ validator: (value, context) => {
6424
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
6425
+ if (!cleaned.startsWith("GB") || cleaned.length !== 22) {
6426
+ return false;
6427
+ }
6428
+ if (!validateIBAN(cleaned)) {
6429
+ return false;
6430
+ }
6431
+ const bankingKeywords = /iban|account|bank|uk|gb|financial|payment|transfer/i;
6432
+ if (!bankingKeywords.test(context)) {
6433
+ return false;
6434
+ }
6435
+ const rejectKeywords = /example\s+iban|test\s+iban|sample\s+iban|demo\s+iban|fake\s+iban/i;
6436
+ if (rejectKeywords.test(context)) {
6437
+ return false;
6438
+ }
6439
+ return true;
5770
6440
  }
5771
6441
  };
5772
6442
  var UK_SORT_CODE_ACCOUNT = {
5773
6443
  type: "UK_SORT_CODE_ACCOUNT",
5774
- regex: /\b(\d{2}[-]\d{2}[-]\d{2}\s?\d{8})\b/g,
6444
+ regex: /\b(\d{2}[\s\u00A0-]?\d{2}[\s\u00A0-]?\d{2}[\s\u00A0]?\d{8})\b/g,
5775
6445
  placeholder: "[UK_ACCOUNT_{n}]",
5776
6446
  priority: 95,
5777
6447
  severity: "high",
5778
- description: "UK sort code and account number combination"
6448
+ description: "UK sort code and account number combination",
6449
+ validator: (value, context) => {
6450
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
6451
+ if (!/^\d{14}$/.test(cleaned)) {
6452
+ return false;
6453
+ }
6454
+ const sortCode = cleaned.substring(0, 6);
6455
+ const accountNumber = cleaned.substring(6);
6456
+ if (accountNumber.length !== 8) {
6457
+ return false;
6458
+ }
6459
+ if (!validateSortCode(sortCode)) {
6460
+ return false;
6461
+ }
6462
+ const bankingKeywords = /sort\s+code|account|bank|uk|gb|financial|payment|transfer/i;
6463
+ if (!bankingKeywords.test(context)) {
6464
+ return false;
6465
+ }
6466
+ const rejectKeywords = /example\s+account|test\s+account|sample\s+account|demo\s+account|fake\s+account/i;
6467
+ if (rejectKeywords.test(context)) {
6468
+ return false;
6469
+ }
6470
+ return true;
6471
+ }
5779
6472
  };
5780
6473
  var financialPatterns2 = [
5781
6474
  SWIFT_BIC,
@@ -6620,13 +7313,17 @@ var RESUME_ID = {
6620
7313
  };
6621
7314
  var BENEFITS_PLAN_NUMBER = {
6622
7315
  type: "BENEFITS_PLAN_NUMBER",
6623
- regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s]?PLAN)[-\s]?(?:PLAN)?[-\s]?(?:NO|NUM(?:BER)?|ID)?[-\s]?[:#]?\s*([A-Z0-9]{6,14})\b/gi,
7316
+ regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s\u00A0]?PLAN)[-\s\u00A0]*(?:PLAN)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
6624
7317
  placeholder: "[BENEFITS_{n}]",
6625
7318
  priority: 85,
6626
7319
  severity: "high",
6627
7320
  description: "Employee benefits and insurance plan numbers",
6628
- validator: (_value, context) => {
6629
- return /benefit|insurance|health|dental|vision|plan|policy|enrollment/i.test(context);
7321
+ validator: (value, context) => {
7322
+ const normalized = value.replace(/[\s\u00A0./-]/g, "");
7323
+ const hasDigits = /\d{4,}/.test(normalized);
7324
+ const validLength = normalized.length >= 6 && normalized.length <= 14;
7325
+ const inContext = /benefit|insurance|health|dental|vision|plan|policy|enrollment/i.test(context);
7326
+ return hasDigits && validLength && inContext;
6630
7327
  }
6631
7328
  };
6632
7329
  var RETIREMENT_ACCOUNT = {
@@ -6724,13 +7421,16 @@ var EXIT_INTERVIEW_ID = {
6724
7421
  };
6725
7422
  var DISCIPLINARY_ACTION_ID = {
6726
7423
  type: "DISCIPLINARY_ACTION_ID",
6727
- regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s]?(?:ACTION)?[-\s]?(?:NO|NUM(?:BER)?|ID)?[-\s]?[:#]?\s*([A-Z0-9]{6,12})\b/gi,
7424
+ regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s\u00A0]*(?:ACTION)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
6728
7425
  placeholder: "[DISCIPLINE_{n}]",
6729
7426
  priority: 85,
6730
7427
  severity: "high",
6731
7428
  description: "Disciplinary action and incident identifiers",
6732
- validator: (_value, context) => {
6733
- return /disciplinary|incident|warning|violation|misconduct|investigation/i.test(context);
7429
+ validator: (value, context) => {
7430
+ const normalized = value.replace(/[\s\u00A0./-]/g, "");
7431
+ const hasDigits = /\d{3,}/.test(normalized);
7432
+ const validLength = normalized.length >= 6 && normalized.length <= 12;
7433
+ return hasDigits && validLength && /disciplinary|incident|warning|violation|misconduct|investigation/i.test(context);
6734
7434
  }
6735
7435
  };
6736
7436
  var EMERGENCY_CONTACT_REF = {
@@ -7058,7 +7758,7 @@ var TELECOMS_ACCOUNT_NUMBER = {
7058
7758
  type: "TELECOMS_ACCOUNT_NUMBER",
7059
7759
  regex: /\bACC(?:OUNT)?[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*(\d{8,12})\b/gi,
7060
7760
  placeholder: "[ACCOUNT_{n}]",
7061
- priority: 85,
7761
+ priority: 90,
7062
7762
  severity: "high",
7063
7763
  description: "Telecommunications customer account numbers",
7064
7764
  validator: (_value, context) => {
@@ -7936,7 +8636,7 @@ var EMERGENCY_CALL_REF = {
7936
8636
  };
7937
8637
  var POLICE_REPORT_NUMBER = {
7938
8638
  type: "POLICE_REPORT_NUMBER",
7939
- regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s]?(?:NO|NUM|NUMBER|ID)?[-\s]?[:#]?\s*(\d{4}[-\s]?\d{5,10}|[A-Z]{2,4}[-\s]?\d{6,10})\b/gi,
8639
+ regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s\u00A0]*(?:NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{5,10})\b/gi,
7940
8640
  placeholder: "[POLICE_RPT_{n}]",
7941
8641
  priority: 95,
7942
8642
  severity: "high",
@@ -7947,7 +8647,7 @@ var POLICE_REPORT_NUMBER = {
7947
8647
  };
7948
8648
  var FIRE_INCIDENT_NUMBER = {
7949
8649
  type: "FIRE_INCIDENT_NUMBER",
7950
- regex: /\b(?:FIRE|FI|FD)[-\s]?(?:INCIDENT|INC|NO|NUM|NUMBER|ID)?[-\s]?[:#]?\s*(\d{4}[-\s]?\d{4,8}|[A-Z]{2,4}[-\s]?\d{5,10})\b/gi,
8650
+ regex: /\b(?:FIRE|FI|FD)[-\s\u00A0]*(?:INCIDENT|INC|NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{4,8})\b/gi,
7951
8651
  placeholder: "[FIRE_INC_{n}]",
7952
8652
  priority: 95,
7953
8653
  severity: "high",
@@ -8812,13 +9512,15 @@ var gamingPatterns = [
8812
9512
  // src/patterns/industries/vehicles.ts
8813
9513
  var VIN_NUMBER = {
8814
9514
  type: "VIN_NUMBER",
8815
- regex: /\bVIN[-\s]?(?:NO|NUM|NUMBER)?[-\s]?[:#]?\s*([A-HJ-NPR-Z0-9]{17})\b/gi,
9515
+ regex: /\bVIN[-\s\u00A0]?(?:NO|NUM|NUMBER)?[-\s\u00A0]?[:#]?\s*([A-HJ-NPR-Z0-9]{17})\b/gi,
8816
9516
  placeholder: "[VIN_{n}]",
8817
9517
  priority: 85,
8818
9518
  severity: "medium",
8819
9519
  description: "Vehicle Identification Number (VIN)",
8820
9520
  validator: (value, context) => {
8821
- if (/[IOQ]/i.test(value)) return false;
9521
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
9522
+ if (cleaned.length !== 17) return false;
9523
+ if (/[IOQ]/.test(cleaned)) return false;
8822
9524
  return /vin|vehicle|car|auto|motor|registration|title|insurance/i.test(context);
8823
9525
  }
8824
9526
  };
@@ -11120,9 +11822,11 @@ var GERMAN_TAX_ID = {
11120
11822
  severity: "high",
11121
11823
  description: "German Tax Identification Number (Steueridentifikationsnummer)",
11122
11824
  validator: (value, context) => {
11825
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
11826
+ if (!/^\d{11}$/.test(cleaned)) return false;
11123
11827
  const relevantContext = /steuer|tax|german|deutschland|finanzamt/i.test(context);
11124
11828
  if (!relevantContext) return false;
11125
- const digits = value.split("").map(Number);
11829
+ const digits = cleaned.split("").map(Number);
11126
11830
  const digitCounts = /* @__PURE__ */ new Map();
11127
11831
  digits.forEach((d) => digitCounts.set(d, (digitCounts.get(d) || 0) + 1));
11128
11832
  const counts = Array.from(digitCounts.values());
@@ -11281,9 +11985,11 @@ var DUTCH_BSN = {
11281
11985
  severity: "high",
11282
11986
  description: "Dutch Citizen Service Number (BSN)",
11283
11987
  validator: (value, context) => {
11988
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
11989
+ if (!/^\d{9}$/.test(cleaned)) return false;
11284
11990
  const relevantContext = /bsn|dutch|netherlands|nederland|burger/i.test(context);
11285
11991
  if (!relevantContext) return false;
11286
- const digits = value.split("").map(Number);
11992
+ const digits = cleaned.split("").map(Number);
11287
11993
  let sum = 0;
11288
11994
  for (let i = 0; i < 8; i++) {
11289
11995
  sum += digits[i] * (9 - i);
@@ -11300,10 +12006,12 @@ var POLISH_PESEL = {
11300
12006
  severity: "high",
11301
12007
  description: "Polish National Identification Number (PESEL)",
11302
12008
  validator: (value, context) => {
12009
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
12010
+ if (!/^\d{11}$/.test(cleaned)) return false;
11303
12011
  const relevantContext = /pesel|polish|poland|polska/i.test(context);
11304
12012
  if (!relevantContext) return false;
11305
12013
  const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
11306
- const digits = value.split("").map(Number);
12014
+ const digits = cleaned.split("").map(Number);
11307
12015
  let sum = 0;
11308
12016
  for (let i = 0; i < 10; i++) {
11309
12017
  sum += digits[i] * weights[i];
@@ -11637,7 +12345,8 @@ var DISCORD_USER_ID = {
11637
12345
  severity: "medium",
11638
12346
  description: "Discord user ID (Snowflake format)",
11639
12347
  validator: (value, context) => {
11640
- if (value.length < 17 || value.length > 19) return false;
12348
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
12349
+ if (cleaned.length < 17 || cleaned.length > 19) return false;
11641
12350
  return /discord|snowflake|user[-_]?id|server|guild/i.test(context);
11642
12351
  }
11643
12352
  };
@@ -11649,7 +12358,8 @@ var STEAM_ID64 = {
11649
12358
  severity: "medium",
11650
12359
  description: "Steam 64-bit user ID",
11651
12360
  validator: (value, context) => {
11652
- if (!value.startsWith("765") || value.length !== 17) return false;
12361
+ const cleaned = value.replace(/[\s\u00A0.-]/g, "");
12362
+ if (!cleaned.startsWith("765") || cleaned.length !== 17) return false;
11653
12363
  return /steam|gaming|player|profile|valve|community/i.test(context);
11654
12364
  }
11655
12365
  };
@@ -11782,13 +12492,14 @@ var NINTENDO_FRIEND_CODE = {
11782
12492
  type: "NINTENDO_FRIEND_CODE",
11783
12493
  regex: /\bSW[-\s]?(\d{4}[-\s]?\d{4}[-\s]?\d{4})\b/gi,
11784
12494
  placeholder: "[NINTENDO_FC_{n}]",
11785
- priority: 85,
12495
+ priority: 90,
11786
12496
  severity: "medium",
11787
12497
  description: "Nintendo Switch Friend Code",
11788
12498
  validator: (value, context) => {
11789
12499
  const digits = value.replace(/\D/g, "");
11790
12500
  if (digits.length !== 12) return false;
11791
- return /nintendo|switch|friend[- ]?code|gaming/i.test(context);
12501
+ const hasContext = /nintendo|switch|friend[- ]?code|gaming/i.test(context);
12502
+ return hasContext;
11792
12503
  }
11793
12504
  };
11794
12505
  var BATTLETAG = {
@@ -12083,14 +12794,64 @@ var ccpaPreset = {
12083
12794
  "USERNAME"
12084
12795
  ]
12085
12796
  };
12797
+ var healthcarePreset = {
12798
+ includeNames: true,
12799
+ includeEmails: true,
12800
+ includePhones: true,
12801
+ includeAddresses: true,
12802
+ categories: ["personal", "contact", "healthcare", "insurance", "government"]
12803
+ };
12804
+ var healthcareResearchPreset = {
12805
+ includeNames: true,
12806
+ includeEmails: true,
12807
+ includePhones: true,
12808
+ includeAddresses: true,
12809
+ categories: ["personal", "contact", "healthcare", "insurance", "government"]
12810
+ };
12811
+ var financePreset = {
12812
+ includeNames: true,
12813
+ includeEmails: true,
12814
+ includePhones: true,
12815
+ includeAddresses: true,
12816
+ categories: ["personal", "contact", "financial", "government", "network"]
12817
+ };
12818
+ var educationPreset = {
12819
+ includeNames: true,
12820
+ includeEmails: true,
12821
+ includePhones: true,
12822
+ includeAddresses: true,
12823
+ categories: ["personal", "contact", "education", "government", "network"]
12824
+ };
12825
+ var transportLogisticsPreset = {
12826
+ includeNames: true,
12827
+ includeEmails: true,
12828
+ includePhones: true,
12829
+ includeAddresses: true,
12830
+ categories: ["personal", "contact", "transportation", "logistics", "vehicles", "network"]
12831
+ };
12086
12832
  function getPreset(name) {
12087
- switch (name.toLowerCase()) {
12833
+ const presetName = name.toLowerCase();
12834
+ switch (presetName) {
12088
12835
  case "gdpr":
12089
12836
  return gdprPreset;
12090
12837
  case "hipaa":
12091
12838
  return hipaaPreset;
12092
12839
  case "ccpa":
12093
12840
  return ccpaPreset;
12841
+ case "healthcare":
12842
+ case "healthcare-provider":
12843
+ return healthcarePreset;
12844
+ case "healthcare-research":
12845
+ return healthcareResearchPreset;
12846
+ case "finance":
12847
+ case "financial-services":
12848
+ return financePreset;
12849
+ case "education":
12850
+ return educationPreset;
12851
+ case "transport-logistics":
12852
+ case "transportation":
12853
+ case "logistics":
12854
+ return transportLogisticsPreset;
12094
12855
  default:
12095
12856
  return {};
12096
12857
  }
@@ -12613,9 +13374,23 @@ var ConfigLoader = class {
12613
13374
  };
12614
13375
  }
12615
13376
  if (preset.startsWith("openredaction:")) {
12616
- const complianceType = preset.replace("openredaction:", "");
12617
- if (["gdpr", "hipaa", "ccpa"].includes(complianceType)) {
12618
- return { preset: complianceType };
13377
+ const presetName = preset.replace("openredaction:", "");
13378
+ const supportedPresets = [
13379
+ "gdpr",
13380
+ "hipaa",
13381
+ "ccpa",
13382
+ "healthcare",
13383
+ "healthcare-provider",
13384
+ "healthcare-research",
13385
+ "finance",
13386
+ "financial-services",
13387
+ "education",
13388
+ "transport-logistics",
13389
+ "transportation",
13390
+ "logistics"
13391
+ ];
13392
+ if (supportedPresets.includes(presetName)) {
13393
+ return { preset: presetName };
12619
13394
  }
12620
13395
  }
12621
13396
  return null;
@@ -12631,7 +13406,8 @@ var ConfigLoader = class {
12631
13406
  export default {
12632
13407
  // Extend built-in presets
12633
13408
  // Options: 'openredaction:recommended', 'openredaction:strict', 'openredaction:minimal'
12634
- // Or compliance: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa'
13409
+ // Or compliance/industry presets: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa',
13410
+ // 'openredaction:finance', 'openredaction:education', 'openredaction:healthcare', 'openredaction:transport-logistics'
12635
13411
  extends: ['openredaction:recommended'],
12636
13412
 
12637
13413
  // Detection options
@@ -14308,9 +15084,8 @@ var ExplainAPI = class {
14308
15084
  constructor(detector) {
14309
15085
  this.detector = detector;
14310
15086
  this.patterns = detector.getPatterns();
14311
- const testResult = detector.detect("Contact: admin@business.co.uk");
14312
- const hasConfidence = testResult.detections.length > 0 && testResult.detections[0].confidence !== void 0;
14313
15087
  const detectorOptions = detector.options;
15088
+ const hasConfidence = detectorOptions?.enableContextAnalysis || false;
14314
15089
  this.options = {
14315
15090
  enableContextAnalysis: hasConfidence,
14316
15091
  confidenceThreshold: detectorOptions?.confidenceThreshold || 0.5,
@@ -14322,7 +15097,7 @@ var ExplainAPI = class {
14322
15097
  /**
14323
15098
  * Explain why text was or wasn't detected as PII
14324
15099
  */
14325
- explain(text) {
15100
+ async explain(text) {
14326
15101
  const patternResults = [];
14327
15102
  const matchedPatterns = [];
14328
15103
  const unmatchedPatterns = [];
@@ -14412,7 +15187,8 @@ var ExplainAPI = class {
14412
15187
  patternResults.push(result);
14413
15188
  matchedPatterns.push(result);
14414
15189
  }
14415
- const detections = this.detector.detect(text).detections;
15190
+ const detectionResult = await this.detector.detect(text);
15191
+ const detections = detectionResult.detections;
14416
15192
  return {
14417
15193
  text,
14418
15194
  patternResults,
@@ -14431,7 +15207,7 @@ var ExplainAPI = class {
14431
15207
  /**
14432
15208
  * Explain a specific detection
14433
15209
  */
14434
- explainDetection(detection, text) {
15210
+ async explainDetection(detection, text) {
14435
15211
  const pattern = this.patterns.find((p) => p.type === detection.type);
14436
15212
  const reasoning = [];
14437
15213
  reasoning.push(`Detected as ${detection.type}`);
@@ -14462,13 +15238,15 @@ var ExplainAPI = class {
14462
15238
  detection,
14463
15239
  pattern,
14464
15240
  contextAnalysis,
14465
- reasoning
15241
+ reasoning,
15242
+ suggestions: []
15243
+ // Will be populated if needed
14466
15244
  };
14467
15245
  }
14468
15246
  /**
14469
15247
  * Suggest why text wasn't detected
14470
15248
  */
14471
- suggestWhy(text, expectedType) {
15249
+ async suggestWhy(text, expectedType) {
14472
15250
  const suggestions = [];
14473
15251
  const similarPatterns = [];
14474
15252
  const typePatterns = this.patterns.filter(
@@ -14486,7 +15264,7 @@ var ExplainAPI = class {
14486
15264
  similarPatterns.push(pattern);
14487
15265
  const value = match[1] !== void 0 ? match[1] : match[0];
14488
15266
  suggestions.push(`Pattern "${pattern.type}" matched value: "${value}"`);
14489
- const explanation = this.explain(text);
15267
+ const explanation = await this.explain(text);
14490
15268
  const filtered = explanation.filteredPatterns.find((r) => r.pattern.type === pattern.type);
14491
15269
  if (filtered && filtered.reason) {
14492
15270
  suggestions.push(`But was filtered: ${filtered.reason}`);
@@ -14516,9 +15294,9 @@ Example ${expectedType} pattern: ${examplePattern.regex.source.substring(0, 100)
14516
15294
  /**
14517
15295
  * Get debugging information for entire detection process
14518
15296
  */
14519
- debug(text) {
15297
+ async debug(text) {
14520
15298
  const start = performance.now();
14521
- const explanation = this.explain(text);
15299
+ const explanation = await this.explain(text);
14522
15300
  const duration = performance.now() - start;
14523
15301
  const enabledFeatures = [];
14524
15302
  if (this.options.enableContextAnalysis) {
@@ -15406,6 +16184,152 @@ function compileSafeRegex(pattern, flags) {
15406
16184
  return new RegExp(patternStr, finalFlags);
15407
16185
  }
15408
16186
 
16187
+ // src/utils/ai-assist.ts
16188
+ function getAIEndpoint(aiOptions) {
16189
+ if (!aiOptions?.enabled) {
16190
+ return null;
16191
+ }
16192
+ if (aiOptions.endpoint) {
16193
+ return aiOptions.endpoint;
16194
+ }
16195
+ if (typeof process !== "undefined" && process.env) {
16196
+ const envEndpoint = process.env.OPENREDACTION_AI_ENDPOINT;
16197
+ if (envEndpoint) {
16198
+ return envEndpoint;
16199
+ }
16200
+ }
16201
+ return null;
16202
+ }
16203
+ function isFetchAvailable() {
16204
+ return typeof fetch !== "undefined";
16205
+ }
16206
+ async function callAIDetect(text, endpoint, debug) {
16207
+ if (!isFetchAvailable()) {
16208
+ if (debug) {
16209
+ console.warn("[OpenRedaction] AI assist requires fetch API. Not available in this environment.");
16210
+ }
16211
+ return null;
16212
+ }
16213
+ try {
16214
+ const url = endpoint.endsWith("/ai-detect") ? endpoint : `${endpoint}/ai-detect`;
16215
+ if (debug) {
16216
+ console.log(`[OpenRedaction] Calling AI endpoint: ${url}`);
16217
+ }
16218
+ const response = await fetch(url, {
16219
+ method: "POST",
16220
+ headers: {
16221
+ "Content-Type": "application/json"
16222
+ },
16223
+ body: JSON.stringify({ text })
16224
+ });
16225
+ if (!response.ok) {
16226
+ if (debug) {
16227
+ const statusText = response.status === 429 ? "Rate limit exceeded (429)" : `${response.status}: ${response.statusText}`;
16228
+ console.warn(`[OpenRedaction] AI endpoint returned ${statusText}`);
16229
+ }
16230
+ return null;
16231
+ }
16232
+ const data = await response.json();
16233
+ if (!data.entities || !Array.isArray(data.entities)) {
16234
+ if (debug) {
16235
+ console.warn("[OpenRedaction] Invalid AI response format: missing entities array");
16236
+ }
16237
+ return null;
16238
+ }
16239
+ return data.entities;
16240
+ } catch (error) {
16241
+ if (debug) {
16242
+ console.warn(`[OpenRedaction] AI endpoint error: ${error instanceof Error ? error.message : "Unknown error"}`);
16243
+ }
16244
+ return null;
16245
+ }
16246
+ }
16247
+ function validateAIEntity(entity, textLength) {
16248
+ if (!entity.type || !entity.value || typeof entity.start !== "number" || typeof entity.end !== "number") {
16249
+ return false;
16250
+ }
16251
+ if (entity.start < 0 || entity.end < 0 || entity.start >= entity.end) {
16252
+ return false;
16253
+ }
16254
+ if (entity.start >= textLength || entity.end > textLength) {
16255
+ return false;
16256
+ }
16257
+ const actualValue = entity.value;
16258
+ if (actualValue.length !== entity.end - entity.start) {
16259
+ return false;
16260
+ }
16261
+ return true;
16262
+ }
16263
+ function detectionsOverlap(det1, det2) {
16264
+ const [start1, end1] = det1.position;
16265
+ const [start2, end2] = det2.position;
16266
+ const overlapStart = Math.max(start1, start2);
16267
+ const overlapEnd = Math.min(end1, end2);
16268
+ if (overlapStart >= overlapEnd) {
16269
+ return false;
16270
+ }
16271
+ const overlapLength = overlapEnd - overlapStart;
16272
+ const length1 = end1 - start1;
16273
+ const length2 = end2 - start2;
16274
+ const minLength = Math.min(length1, length2);
16275
+ return overlapLength > minLength * 0.5;
16276
+ }
16277
+ function convertAIEntityToDetection(entity, text) {
16278
+ if (!validateAIEntity(entity, text.length)) {
16279
+ return null;
16280
+ }
16281
+ const actualValue = text.substring(entity.start, entity.end);
16282
+ let type = entity.type.toUpperCase();
16283
+ if (type.includes("EMAIL") || type === "EMAIL_ADDRESS") {
16284
+ type = "EMAIL";
16285
+ } else if (type.includes("PHONE") || type === "PHONE_NUMBER") {
16286
+ type = "PHONE_US";
16287
+ } else if (type.includes("NAME") || type === "PERSON") {
16288
+ type = "NAME";
16289
+ } else if (type.includes("SSN") || type === "SOCIAL_SECURITY_NUMBER") {
16290
+ type = "SSN";
16291
+ } else if (type.includes("ADDRESS")) {
16292
+ type = "ADDRESS_STREET";
16293
+ }
16294
+ let severity = "medium";
16295
+ if (type === "SSN" || type === "CREDIT_CARD") {
16296
+ severity = "critical";
16297
+ } else if (type === "EMAIL" || type === "PHONE_US" || type === "NAME") {
16298
+ severity = "high";
16299
+ }
16300
+ return {
16301
+ type,
16302
+ value: actualValue,
16303
+ placeholder: `[${type}_${Math.random().toString(36).substring(2, 9)}]`,
16304
+ position: [entity.start, entity.end],
16305
+ severity,
16306
+ confidence: entity.confidence ?? 0.7
16307
+ // Default confidence for AI entities
16308
+ };
16309
+ }
16310
+ function mergeAIEntities(regexDetections, aiEntities, text) {
16311
+ const merged = [...regexDetections];
16312
+ const processedRanges = regexDetections.map((d) => d.position);
16313
+ for (const aiEntity of aiEntities) {
16314
+ const detection = convertAIEntityToDetection(aiEntity, text);
16315
+ if (!detection) {
16316
+ continue;
16317
+ }
16318
+ let hasOverlap = false;
16319
+ for (const regexDet of regexDetections) {
16320
+ if (detectionsOverlap(regexDet, detection)) {
16321
+ hasOverlap = true;
16322
+ break;
16323
+ }
16324
+ }
16325
+ if (!hasOverlap) {
16326
+ merged.push(detection);
16327
+ processedRanges.push(detection.position);
16328
+ }
16329
+ }
16330
+ return merged;
16331
+ }
16332
+
15409
16333
  // src/detector.ts
15410
16334
  var OpenRedaction = class _OpenRedaction {
15411
16335
  constructor(options = {}) {
@@ -15615,6 +16539,9 @@ var OpenRedaction = class _OpenRedaction {
15615
16539
  for (const pattern of this.patterns) {
15616
16540
  const regex = new RegExp(pattern.regex.source, pattern.regex.flags);
15617
16541
  this.compiledPatterns.set(pattern, regex);
16542
+ if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
16543
+ console.log(`[OpenRedaction] Compiled pattern '${pattern.type}': ${regex}`);
16544
+ }
15618
16545
  }
15619
16546
  if (this.options.debug) {
15620
16547
  console.log(`[OpenRedaction] Pre-compiled ${this.compiledPatterns.size} regex patterns`);
@@ -15634,12 +16561,18 @@ var OpenRedaction = class _OpenRedaction {
15634
16561
  }
15635
16562
  continue;
15636
16563
  }
16564
+ if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
16565
+ console.log(`[OpenRedaction] Processing pattern '${pattern.type}' with regex: ${regex}`);
16566
+ }
15637
16567
  let match;
15638
16568
  let matchCount = 0;
15639
16569
  const maxMatches = 1e4;
15640
16570
  regex.lastIndex = 0;
15641
16571
  try {
15642
16572
  while ((match = safeExec(regex, text, { timeout: this.options.regexTimeout })) !== null) {
16573
+ if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
16574
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' regex match found: '${match[0]}' at position ${match.index}`);
16575
+ }
15643
16576
  matchCount++;
15644
16577
  if (matchCount >= maxMatches) {
15645
16578
  if (this.options.debug) {
@@ -15660,12 +16593,18 @@ var OpenRedaction = class _OpenRedaction {
15660
16593
  endPos = startPos + value.length;
15661
16594
  }
15662
16595
  if (this.overlapsWithExisting(startPos, endPos, processedRanges)) {
16596
+ if (this.options.debug) {
16597
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' skipped due to overlap at ${startPos}-${endPos}`);
16598
+ }
15663
16599
  continue;
15664
16600
  }
15665
16601
  const contextStart = Math.max(0, startPos - 50);
15666
16602
  const contextEnd = Math.min(text.length, endPos + 50);
15667
16603
  const context = text.substring(contextStart, contextEnd);
15668
16604
  if (pattern.validator && !pattern.validator(value, context)) {
16605
+ if (this.options.debug) {
16606
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' validation failed for value: '${value}' with context: '${context.substring(0, 100)}...'`);
16607
+ }
15669
16608
  continue;
15670
16609
  }
15671
16610
  if (this.options.enableFalsePositiveFilter) {
@@ -15684,6 +16623,9 @@ var OpenRedaction = class _OpenRedaction {
15684
16623
  endPos
15685
16624
  );
15686
16625
  confidence = contextAnalysis.confidence;
16626
+ if (this.options.debug && confidence < this.options.confidenceThreshold) {
16627
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' failed context analysis. Value: '${value}', Confidence: ${confidence} < ${this.options.confidenceThreshold}`);
16628
+ }
15687
16629
  }
15688
16630
  if (this.contextRulesEngine) {
15689
16631
  const piiMatch = {
@@ -15709,6 +16651,9 @@ var OpenRedaction = class _OpenRedaction {
15709
16651
  continue;
15710
16652
  }
15711
16653
  const placeholder = this.generatePlaceholder(value, pattern);
16654
+ if (this.options.debug) {
16655
+ console.log(`[OpenRedaction] Pattern '${pattern.type}' detected: '${value}' at position ${startPos}-${endPos}, confidence: ${confidence}`);
16656
+ }
15712
16657
  detections.push({
15713
16658
  type: pattern.type,
15714
16659
  value,
@@ -15769,8 +16714,9 @@ var OpenRedaction = class _OpenRedaction {
15769
16714
  }
15770
16715
  /**
15771
16716
  * Detect PII in text
16717
+ * Now async to support optional AI assist
15772
16718
  */
15773
- detect(text) {
16719
+ async detect(text) {
15774
16720
  if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) {
15775
16721
  throw new Error("[OpenRedaction] Permission denied: detection:detect required");
15776
16722
  }
@@ -15824,12 +16770,42 @@ var OpenRedaction = class _OpenRedaction {
15824
16770
  } else {
15825
16771
  detections = this.processPatterns(text, this.patterns, processedRanges);
15826
16772
  }
16773
+ if (this.options.ai?.enabled) {
16774
+ const aiEndpoint = getAIEndpoint(this.options.ai);
16775
+ if (aiEndpoint) {
16776
+ try {
16777
+ if (this.options.debug) {
16778
+ console.log("[OpenRedaction] AI assist enabled, calling AI endpoint...");
16779
+ }
16780
+ const aiEntities = await callAIDetect(text, aiEndpoint, this.options.debug);
16781
+ if (aiEntities && aiEntities.length > 0) {
16782
+ if (this.options.debug) {
16783
+ console.log(`[OpenRedaction] AI returned ${aiEntities.length} additional entities`);
16784
+ }
16785
+ detections = mergeAIEntities(detections, aiEntities, text);
16786
+ if (this.options.debug) {
16787
+ console.log(`[OpenRedaction] After AI merge: ${detections.length} total detections`);
16788
+ }
16789
+ } else if (this.options.debug) {
16790
+ console.log("[OpenRedaction] AI endpoint returned no additional entities");
16791
+ }
16792
+ } catch (error) {
16793
+ if (this.options.debug) {
16794
+ console.warn(`[OpenRedaction] AI assist failed, using regex-only: ${error instanceof Error ? error.message : "Unknown error"}`);
16795
+ }
16796
+ }
16797
+ } else if (this.options.debug) {
16798
+ console.warn("[OpenRedaction] AI assist enabled but no endpoint configured. Set ai.endpoint or OPENREDACTION_AI_ENDPOINT env var.");
16799
+ }
16800
+ }
15827
16801
  detections.sort((a, b) => b.position[0] - a.position[0]);
15828
16802
  let redacted = text;
15829
16803
  const redactionMap = {};
15830
16804
  for (const detection of detections) {
15831
- const [start, end] = detection.position;
15832
- redacted = redacted.substring(0, start) + detection.placeholder + redacted.substring(end);
16805
+ if (!detection.value) continue;
16806
+ const escapedValue = this.escapeRegex(detection.value);
16807
+ const pattern = new RegExp(escapedValue, "gi");
16808
+ redacted = redacted.replace(pattern, detection.placeholder);
15833
16809
  redactionMap[detection.placeholder] = detection.value;
15834
16810
  }
15835
16811
  const endTime = performance.now();
@@ -15982,8 +16958,8 @@ var OpenRedaction = class _OpenRedaction {
15982
16958
  /**
15983
16959
  * Get severity-based scan results
15984
16960
  */
15985
- scan(text) {
15986
- const result = this.detect(text);
16961
+ async scan(text) {
16962
+ const result = await this.detect(text);
15987
16963
  return {
15988
16964
  high: result.detections.filter((d) => d.severity === "high"),
15989
16965
  medium: result.detections.filter((d) => d.severity === "medium"),
@@ -16220,7 +17196,7 @@ var OpenRedaction = class _OpenRedaction {
16220
17196
  const metadata = await processor.getMetadata(buffer, options);
16221
17197
  const extractionEnd = performance.now();
16222
17198
  const extractionTime = Math.round((extractionEnd - extractionStart) * 100) / 100;
16223
- const detection = this.detect(text);
17199
+ const detection = await this.detect(text);
16224
17200
  return {
16225
17201
  text,
16226
17202
  metadata,
@@ -16317,7 +17293,7 @@ var StreamingDetector = class {
16317
17293
  const end = Math.min(textLength, position + chunkSize);
16318
17294
  const chunk = text.substring(start, end);
16319
17295
  const byteOffset = start;
16320
- const result = this.detector.detect(chunk);
17296
+ const result = await this.detector.detect(chunk);
16321
17297
  const newDetections = result.detections.filter((detection) => {
16322
17298
  const absoluteStart = byteOffset + detection.position[0];
16323
17299
  const absoluteEnd = byteOffset + detection.position[1];
@@ -16347,8 +17323,10 @@ var StreamingDetector = class {
16347
17323
  (a, b) => b.position[0] - a.position[0]
16348
17324
  );
16349
17325
  for (const detection of sortedDetections) {
16350
- const [start2, end2] = detection.position;
16351
- redactedChunk = redactedChunk.substring(0, start2) + detection.placeholder + redactedChunk.substring(end2);
17326
+ if (!detection.value) continue;
17327
+ const escapedValue = this.escapeRegex(detection.value);
17328
+ const pattern = new RegExp(escapedValue, "gi");
17329
+ redactedChunk = redactedChunk.replace(pattern, detection.placeholder);
16352
17330
  }
16353
17331
  }
16354
17332
  yield {
@@ -16374,8 +17352,10 @@ var StreamingDetector = class {
16374
17352
  allDetections.sort((a, b) => b.position[0] - a.position[0]);
16375
17353
  const redactionMap = {};
16376
17354
  for (const detection of allDetections) {
16377
- const [start, end] = detection.position;
16378
- redactedText = redactedText.substring(0, start) + detection.placeholder + redactedText.substring(end);
17355
+ if (!detection.value) continue;
17356
+ const escapedValue = this.escapeRegex(detection.value);
17357
+ const pattern = new RegExp(escapedValue, "gi");
17358
+ redactedText = redactedText.replace(pattern, detection.placeholder);
16379
17359
  redactionMap[detection.placeholder] = detection.value;
16380
17360
  }
16381
17361
  return {
@@ -16450,6 +17430,9 @@ var StreamingDetector = class {
16450
17430
  estimatedMemory
16451
17431
  };
16452
17432
  }
17433
+ escapeRegex(str) {
17434
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
17435
+ }
16453
17436
  };
16454
17437
  function createStreamingDetector(detector, options) {
16455
17438
  return new StreamingDetector(detector, options);
@@ -16466,11 +17449,11 @@ var BatchProcessor = class {
16466
17449
  /**
16467
17450
  * Process multiple documents sequentially
16468
17451
  */
16469
- processSequential(documents, options = {}) {
17452
+ async processSequential(documents, options = {}) {
16470
17453
  const startTime = performance.now();
16471
17454
  const results = [];
16472
17455
  for (let i = 0; i < documents.length; i++) {
16473
- const result = this.detector.detect(documents[i]);
17456
+ const result = await this.detector.detect(documents[i]);
16474
17457
  results.push(result);
16475
17458
  if (options.onProgress) {
16476
17459
  options.onProgress(i + 1, documents.length);
@@ -16484,7 +17467,7 @@ var BatchProcessor = class {
16484
17467
  totalDocuments: documents.length,
16485
17468
  totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
16486
17469
  totalTime,
16487
- avgTimePerDocument: totalTime / documents.length
17470
+ avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
16488
17471
  }
16489
17472
  };
16490
17473
  }
@@ -16498,16 +17481,14 @@ var BatchProcessor = class {
16498
17481
  let completed = 0;
16499
17482
  for (let i = 0; i < documents.length; i += maxConcurrency) {
16500
17483
  const batch = documents.slice(i, i + maxConcurrency);
16501
- const batchPromises = batch.map((doc, batchIndex) => {
16502
- return Promise.resolve().then(() => {
16503
- const result = this.detector.detect(doc);
16504
- results[i + batchIndex] = result;
16505
- completed++;
16506
- if (options.onProgress) {
16507
- options.onProgress(completed, documents.length);
16508
- }
16509
- return result;
16510
- });
17484
+ const batchPromises = batch.map(async (doc, batchIndex) => {
17485
+ const result = await this.detector.detect(doc);
17486
+ results[i + batchIndex] = result;
17487
+ completed++;
17488
+ if (options.onProgress) {
17489
+ options.onProgress(completed, documents.length);
17490
+ }
17491
+ return result;
16511
17492
  });
16512
17493
  await Promise.all(batchPromises);
16513
17494
  }
@@ -16519,7 +17500,7 @@ var BatchProcessor = class {
16519
17500
  totalDocuments: documents.length,
16520
17501
  totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
16521
17502
  totalTime,
16522
- avgTimePerDocument: totalTime / documents.length
17503
+ avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
16523
17504
  }
16524
17505
  };
16525
17506
  }
@@ -16530,7 +17511,7 @@ var BatchProcessor = class {
16530
17511
  if (options.parallel) {
16531
17512
  return this.processParallel(documents, options);
16532
17513
  } else {
16533
- return Promise.resolve(this.processSequential(documents, options));
17514
+ return this.processSequential(documents, options);
16534
17515
  }
16535
17516
  }
16536
17517
  /**
@@ -16541,7 +17522,7 @@ var BatchProcessor = class {
16541
17522
  for (let i = 0; i < documents.length; i += batchSize) {
16542
17523
  const batch = documents.slice(i, i + batchSize);
16543
17524
  for (const doc of batch) {
16544
- const result = this.detector.detect(doc);
17525
+ const result = await this.detector.detect(doc);
16545
17526
  yield result;
16546
17527
  }
16547
17528
  }
@@ -16589,7 +17570,7 @@ function openredactionMiddleware(options = {}) {
16589
17570
  ...detectorOptions
16590
17571
  } = options;
16591
17572
  const detector = new OpenRedaction(detectorOptions);
16592
- return (req, res, next) => {
17573
+ return async (req, res, next) => {
16593
17574
  if (skipRoutes.some((pattern) => pattern.test(req.path))) {
16594
17575
  return next();
16595
17576
  }
@@ -16609,7 +17590,7 @@ function openredactionMiddleware(options = {}) {
16609
17590
  const results = {};
16610
17591
  const redactedBody = { ...req.body };
16611
17592
  for (const { field, value } of textsToCheck) {
16612
- const result = detector.detect(value);
17593
+ const result = await detector.detect(value);
16613
17594
  if (result.detections.length > 0) {
16614
17595
  totalDetections += result.detections.length;
16615
17596
  results[field] = result;
@@ -16659,7 +17640,7 @@ function openredactionMiddleware(options = {}) {
16659
17640
  }
16660
17641
  function detectPII(options = {}) {
16661
17642
  const detector = new OpenRedaction(options);
16662
- return (req, res) => {
17643
+ return async (req, res) => {
16663
17644
  const text = req.body?.text || req.query.text;
16664
17645
  if (!text) {
16665
17646
  res.status(400).json({
@@ -16668,19 +17649,26 @@ function detectPII(options = {}) {
16668
17649
  });
16669
17650
  return;
16670
17651
  }
16671
- const result = detector.detect(text);
16672
- res.json({
16673
- detected: result.detections.length > 0,
16674
- count: result.detections.length,
16675
- detections: result.detections,
16676
- redacted: result.redacted,
16677
- stats: result.stats
16678
- });
17652
+ try {
17653
+ const result = await detector.detect(text);
17654
+ res.json({
17655
+ detected: result.detections.length > 0,
17656
+ count: result.detections.length,
17657
+ detections: result.detections,
17658
+ redacted: result.redacted,
17659
+ stats: result.stats
17660
+ });
17661
+ } catch (error) {
17662
+ res.status(500).json({
17663
+ error: "Detection failed",
17664
+ message: error instanceof Error ? error.message : "Unknown error"
17665
+ });
17666
+ }
16679
17667
  };
16680
17668
  }
16681
17669
  function generateReport(options = {}) {
16682
17670
  const detector = new OpenRedaction(options);
16683
- return (req, res) => {
17671
+ return async (req, res) => {
16684
17672
  const text = req.body?.text;
16685
17673
  const format = req.body?.format || req.query.format || "json";
16686
17674
  if (!text) {
@@ -16689,28 +17677,35 @@ function generateReport(options = {}) {
16689
17677
  });
16690
17678
  return;
16691
17679
  }
16692
- const result = detector.detect(text);
16693
- if (format === "html") {
16694
- const html = detector.generateReport(result, {
16695
- format: "html",
16696
- title: req.body?.title || "PII Detection Report"
16697
- });
16698
- res.setHeader("Content-Type", "text/html");
16699
- res.send(html);
16700
- } else if (format === "markdown") {
16701
- const md = detector.generateReport(result, {
16702
- format: "markdown",
16703
- title: req.body?.title || "PII Detection Report"
16704
- });
16705
- res.setHeader("Content-Type", "text/markdown");
16706
- res.send(md);
16707
- } else {
16708
- res.json({
16709
- detected: result.detections.length > 0,
16710
- count: result.detections.length,
16711
- detections: result.detections,
16712
- redacted: result.redacted,
16713
- stats: result.stats
17680
+ try {
17681
+ const result = await detector.detect(text);
17682
+ if (format === "html") {
17683
+ const html = detector.generateReport(result, {
17684
+ format: "html",
17685
+ title: req.body?.title || "PII Detection Report"
17686
+ });
17687
+ res.setHeader("Content-Type", "text/html");
17688
+ res.send(html);
17689
+ } else if (format === "markdown") {
17690
+ const md = detector.generateReport(result, {
17691
+ format: "markdown",
17692
+ title: req.body?.title || "PII Detection Report"
17693
+ });
17694
+ res.setHeader("Content-Type", "text/markdown");
17695
+ res.send(md);
17696
+ } else {
17697
+ res.json({
17698
+ detected: result.detections.length > 0,
17699
+ count: result.detections.length,
17700
+ detections: result.detections,
17701
+ redacted: result.redacted,
17702
+ stats: result.stats
17703
+ });
17704
+ }
17705
+ } catch (error) {
17706
+ res.status(500).json({
17707
+ error: "Report generation failed",
17708
+ message: error instanceof Error ? error.message : "Unknown error"
16714
17709
  });
16715
17710
  }
16716
17711
  };
@@ -16722,12 +17717,17 @@ function useOpenRedaction(options) {
16722
17717
  const detector = (0, import_react.useMemo)(() => new OpenRedaction(options), [options]);
16723
17718
  const [result, setResult] = (0, import_react.useState)(null);
16724
17719
  const [isDetecting, setIsDetecting] = (0, import_react.useState)(false);
16725
- const detect = (0, import_react.useCallback)((text) => {
17720
+ const detect = (0, import_react.useCallback)(async (text) => {
16726
17721
  setIsDetecting(true);
16727
- const detection = detector.detect(text);
16728
- setResult(detection);
16729
- setIsDetecting(false);
16730
- return detection;
17722
+ try {
17723
+ const detection = await detector.detect(text);
17724
+ setResult(detection);
17725
+ setIsDetecting(false);
17726
+ return detection;
17727
+ } catch (error) {
17728
+ setIsDetecting(false);
17729
+ throw error;
17730
+ }
16731
17731
  }, [detector]);
16732
17732
  const clear = (0, import_react.useCallback)(() => {
16733
17733
  setResult(null);
@@ -16753,10 +17753,14 @@ function usePIIDetector(text, options) {
16753
17753
  return;
16754
17754
  }
16755
17755
  setIsDetecting(true);
16756
- const timer = setTimeout(() => {
16757
- const detection = detector.detect(text);
16758
- setResult(detection);
16759
- setIsDetecting(false);
17756
+ const timer = setTimeout(async () => {
17757
+ try {
17758
+ const detection = await detector.detect(text);
17759
+ setResult(detection);
17760
+ setIsDetecting(false);
17761
+ } catch (error) {
17762
+ setIsDetecting(false);
17763
+ }
16760
17764
  }, debounce);
16761
17765
  return () => {
16762
17766
  clearTimeout(timer);
@@ -16777,27 +17781,32 @@ function useFormFieldValidator(options) {
16777
17781
  const [value, setValue] = (0, import_react.useState)("");
16778
17782
  const [error, setError] = (0, import_react.useState)(null);
16779
17783
  const [result, setResult] = (0, import_react.useState)(null);
16780
- const validate = (0, import_react.useCallback)((inputValue) => {
17784
+ const validate = (0, import_react.useCallback)(async (inputValue) => {
16781
17785
  setValue(inputValue);
16782
17786
  if (!inputValue) {
16783
17787
  setError(null);
16784
17788
  setResult(null);
16785
17789
  return true;
16786
17790
  }
16787
- const detection = detector.detect(inputValue);
16788
- setResult(detection);
16789
- const relevantDetections = types.length > 0 ? detection.detections.filter((d) => types.includes(d.type)) : detection.detections;
16790
- if (relevantDetections.length > 0) {
16791
- if (failOnPII) {
16792
- setError(`Sensitive information detected: ${relevantDetections[0].type}`);
16793
- }
16794
- if (onPIIDetected) {
16795
- onPIIDetected(detection);
17791
+ try {
17792
+ const detection = await detector.detect(inputValue);
17793
+ setResult(detection);
17794
+ const relevantDetections = types.length > 0 ? detection.detections.filter((d) => types.includes(d.type)) : detection.detections;
17795
+ if (relevantDetections.length > 0) {
17796
+ if (failOnPII) {
17797
+ setError(`Sensitive information detected: ${relevantDetections[0].type}`);
17798
+ }
17799
+ if (onPIIDetected) {
17800
+ onPIIDetected(detection);
17801
+ }
17802
+ return false;
16796
17803
  }
17804
+ setError(null);
17805
+ return true;
17806
+ } catch (error2) {
17807
+ setError("Validation failed");
16797
17808
  return false;
16798
17809
  }
16799
- setError(null);
16800
- return true;
16801
17810
  }, [detector, failOnPII, types, onPIIDetected]);
16802
17811
  const getFieldProps = (0, import_react.useCallback)(() => ({
16803
17812
  value,
@@ -16824,7 +17833,7 @@ function useBatchDetector(options) {
16824
17833
  setProgress(0);
16825
17834
  const detections = [];
16826
17835
  for (let i = 0; i < texts.length; i++) {
16827
- const result = detector.detect(texts[i]);
17836
+ const result = await detector.detect(texts[i]);
16828
17837
  detections.push(result);
16829
17838
  setProgress((i + 1) / texts.length * 100);
16830
17839
  await new Promise((resolve) => setTimeout(resolve, 0));
@@ -16861,9 +17870,12 @@ function useAutoRedact(options) {
16861
17870
  setResult(null);
16862
17871
  return;
16863
17872
  }
16864
- const timer = setTimeout(() => {
16865
- const detection = detector.detect(text);
16866
- setResult(detection);
17873
+ const timer = setTimeout(async () => {
17874
+ try {
17875
+ const detection = await detector.detect(text);
17876
+ setResult(detection);
17877
+ } catch (error) {
17878
+ }
16867
17879
  }, debounce);
16868
17880
  return () => clearTimeout(timer);
16869
17881
  }, [text, detector, debounce]);
@@ -16992,7 +18004,7 @@ var TenantManager = class {
16992
18004
  await this.checkQuotas(tenantId, text);
16993
18005
  this.trackRequest(tenantId, text);
16994
18006
  const detector = this.getDetector(tenantId);
16995
- const result = detector.detect(text);
18007
+ const result = await detector.detect(text);
16996
18008
  const usage = this.usage.get(tenantId);
16997
18009
  usage.piiDetectedThisMonth += result.detections.length;
16998
18010
  usage.lastRequestAt = /* @__PURE__ */ new Date();
@@ -17279,6 +18291,7 @@ var DEFAULT_TIER_QUOTAS = {
17279
18291
  // src/webhooks/WebhookManager.ts
17280
18292
  var WebhookManager = class {
17281
18293
  // 1 minute
18294
+ // private readonly HALF_OPEN_MAX_REQUESTS = 1; // Reserved for future use
17282
18295
  constructor(options) {
17283
18296
  this.webhooks = /* @__PURE__ */ new Map();
17284
18297
  this.deliveryHistory = [];
@@ -17550,9 +18563,9 @@ var WebhookManager = class {
17550
18563
  */
17551
18564
  async makeHttpRequest(webhook, event) {
17552
18565
  try {
17553
- let fetch;
18566
+ let fetch2;
17554
18567
  try {
17555
- fetch = globalThis.fetch;
18568
+ fetch2 = globalThis.fetch;
17556
18569
  } catch {
17557
18570
  throw new Error("[WebhookManager] HTTP client not available. Requires Node 18+ with fetch support.");
17558
18571
  }
@@ -17572,7 +18585,7 @@ var WebhookManager = class {
17572
18585
  const controller = new AbortController();
17573
18586
  const timeoutId = setTimeout(() => controller.abort(), webhook.timeout);
17574
18587
  try {
17575
- const response = await fetch(webhook.url, {
18588
+ const response = await fetch2(webhook.url, {
17576
18589
  method: "POST",
17577
18590
  headers,
17578
18591
  body: JSON.stringify(event),
@@ -17943,7 +18956,7 @@ var APIServer = class {
17943
18956
  if (req.tenantId && this.config.tenantManager) {
17944
18957
  result = await this.config.tenantManager.detect(req.tenantId, text);
17945
18958
  } else if (this.detector) {
17946
- result = this.detector.detect(text);
18959
+ result = await this.detector.detect(text);
17947
18960
  } else {
17948
18961
  throw new Error("No detector available");
17949
18962
  }
@@ -17984,7 +18997,7 @@ var APIServer = class {
17984
18997
  if (req.tenantId && this.config.tenantManager) {
17985
18998
  result = await this.config.tenantManager.detect(req.tenantId, text);
17986
18999
  } else if (this.detector) {
17987
- result = this.detector.detect(text);
19000
+ result = await this.detector.detect(text);
17988
19001
  } else {
17989
19002
  throw new Error("No detector available");
17990
19003
  }
@@ -18407,10 +19420,12 @@ init_HealthCheck();
18407
19420
  analyzeFullContext,
18408
19421
  calculateContextConfidence,
18409
19422
  calculateRisk,
19423
+ callAIDetect,
18410
19424
  ccpaPreset,
18411
19425
  commonFalsePositives,
18412
19426
  compileSafeRegex,
18413
19427
  contactPatterns,
19428
+ convertAIEntityToDetection,
18414
19429
  createAPIServer,
18415
19430
  createBatchProcessor,
18416
19431
  createCacheDisabledError,
@@ -18445,12 +19460,16 @@ init_HealthCheck();
18445
19460
  createXlsxProcessor,
18446
19461
  defaultPasses,
18447
19462
  detectPII,
19463
+ detectionsOverlap,
19464
+ educationPreset,
18448
19465
  exportForVersionControl,
18449
19466
  extractContext,
18450
19467
  filterFalsePositives,
19468
+ financePreset,
18451
19469
  financialPatterns,
18452
19470
  gdprPreset,
18453
19471
  generateReport,
19472
+ getAIEndpoint,
18454
19473
  getPatternsByCategory,
18455
19474
  getPredefinedRole,
18456
19475
  getPreset,
@@ -18458,21 +19477,26 @@ init_HealthCheck();
18458
19477
  governmentPatterns,
18459
19478
  groupPatternsByPass,
18460
19479
  healthCheckMiddleware,
19480
+ healthcarePreset,
19481
+ healthcareResearchPreset,
18461
19482
  hipaaPreset,
18462
19483
  inferDocumentType,
18463
19484
  isFalsePositive,
18464
19485
  isUnsafePattern,
19486
+ mergeAIEntities,
18465
19487
  mergePassDetections,
18466
19488
  networkPatterns,
18467
19489
  openredactionMiddleware,
18468
19490
  personalPatterns,
18469
19491
  safeExec,
18470
19492
  safeExecAll,
19493
+ transportLogisticsPreset,
18471
19494
  useAutoRedact,
18472
19495
  useBatchDetector,
18473
19496
  useFormFieldValidator,
18474
19497
  useOpenRedaction,
18475
19498
  usePIIDetector,
19499
+ validateAIEntity,
18476
19500
  validateEmail,
18477
19501
  validateIBAN,
18478
19502
  validateLuhn,