openredaction 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -1
- package/dist/index.d.ts +112 -22
- package/dist/index.js +1313 -289
- package/dist/index.mjs +1255 -246
- package/package.json +12 -27
- package/dist/HealthCheck-A5OD4ATR.mjs +0 -12
- package/dist/HealthCheck-A5OD4ATR.mjs.map +0 -1
- package/dist/chunk-7OGNW2MU.mjs +0 -1701
- package/dist/chunk-7OGNW2MU.mjs.map +0 -1
- package/dist/chunk-MYYLGNXS.mjs +0 -149
- package/dist/chunk-MYYLGNXS.mjs.map +0 -1
- package/dist/chunk-WMJKH4XE.mjs +0 -34
- package/dist/chunk-WMJKH4XE.mjs.map +0 -1
- package/dist/chunk-ZRHGDEPC.mjs +0 -297
- package/dist/chunk-ZRHGDEPC.mjs.map +0 -1
- package/dist/cli/test-pattern.js +0 -430
- package/dist/document-AOMZP7UR.mjs +0 -26
- package/dist/document-AOMZP7UR.mjs.map +0 -1
- package/dist/index.cli.js +0 -15093
- package/dist/index.d.mts +0 -4111
- package/dist/index.js.map +0 -1
- package/dist/index.mjs.map +0 -1
- package/dist/workers-RMN5POM6.mjs +0 -10
- package/dist/workers-RMN5POM6.mjs.map +0 -1
package/dist/index.js
CHANGED
|
@@ -311,7 +311,7 @@ var init_HealthCheck = __esm({
|
|
|
311
311
|
try {
|
|
312
312
|
if (options.testDetection !== false) {
|
|
313
313
|
const testText = "Test email: test@example.com";
|
|
314
|
-
const result = this.detector.detect(testText);
|
|
314
|
+
const result = await this.detector.detect(testText);
|
|
315
315
|
if (!result || !result.detections) {
|
|
316
316
|
return {
|
|
317
317
|
status: "fail",
|
|
@@ -377,7 +377,7 @@ var init_HealthCheck = __esm({
|
|
|
377
377
|
try {
|
|
378
378
|
const testText = "Test: john@example.com, phone: 555-123-4567, IP: 192.168.1.1";
|
|
379
379
|
const start = performance.now();
|
|
380
|
-
this.detector.detect(testText);
|
|
380
|
+
await this.detector.detect(testText);
|
|
381
381
|
const duration = performance.now() - start;
|
|
382
382
|
if (duration > threshold * 2) {
|
|
383
383
|
return {
|
|
@@ -708,51 +708,55 @@ var init_JsonProcessor = __esm({
|
|
|
708
708
|
/**
|
|
709
709
|
* Detect PII in JSON data
|
|
710
710
|
*/
|
|
711
|
-
detect(data, detector, options) {
|
|
711
|
+
async detect(data, detector, options) {
|
|
712
712
|
const opts = { ...this.defaultOptions, ...options };
|
|
713
713
|
const pathsDetected = [];
|
|
714
714
|
const matchesByPath = {};
|
|
715
715
|
const allDetections = [];
|
|
716
|
+
const promises = [];
|
|
716
717
|
this.traverse(data, "", opts, (path3, value, key) => {
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
if (this.shouldAlwaysRedact(path3, opts.alwaysRedact)) {
|
|
721
|
-
const detection = {
|
|
722
|
-
type: "SENSITIVE_FIELD",
|
|
723
|
-
value: String(value),
|
|
724
|
-
placeholder: `[SENSITIVE_FIELD]`,
|
|
725
|
-
position: [0, String(value).length],
|
|
726
|
-
severity: "high",
|
|
727
|
-
confidence: 1
|
|
728
|
-
};
|
|
729
|
-
matchesByPath[path3] = [detection];
|
|
730
|
-
pathsDetected.push(path3);
|
|
731
|
-
allDetections.push(detection);
|
|
732
|
-
return;
|
|
733
|
-
}
|
|
734
|
-
if (opts.scanKeys && key) {
|
|
735
|
-
const keyResult = detector.detect(key);
|
|
736
|
-
if (keyResult.detections.length > 0) {
|
|
737
|
-
const keyPath = `${path3}.__key__`;
|
|
738
|
-
matchesByPath[keyPath] = keyResult.detections;
|
|
739
|
-
pathsDetected.push(keyPath);
|
|
740
|
-
allDetections.push(...keyResult.detections);
|
|
718
|
+
promises.push((async () => {
|
|
719
|
+
if (this.shouldSkip(path3, opts.skipPaths)) {
|
|
720
|
+
return;
|
|
741
721
|
}
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
722
|
+
if (this.shouldAlwaysRedact(path3, opts.alwaysRedact)) {
|
|
723
|
+
const detection = {
|
|
724
|
+
type: "SENSITIVE_FIELD",
|
|
725
|
+
value: String(value),
|
|
726
|
+
placeholder: `[SENSITIVE_FIELD]`,
|
|
727
|
+
position: [0, String(value).length],
|
|
728
|
+
severity: "high",
|
|
729
|
+
confidence: 1
|
|
730
|
+
};
|
|
731
|
+
matchesByPath[path3] = [detection];
|
|
732
|
+
pathsDetected.push(path3);
|
|
733
|
+
allDetections.push(detection);
|
|
734
|
+
return;
|
|
735
|
+
}
|
|
736
|
+
if (opts.scanKeys && key) {
|
|
737
|
+
const keyResult = await detector.detect(key);
|
|
738
|
+
if (keyResult.detections.length > 0) {
|
|
739
|
+
const keyPath = `${path3}.__key__`;
|
|
740
|
+
matchesByPath[keyPath] = keyResult.detections;
|
|
741
|
+
pathsDetected.push(keyPath);
|
|
742
|
+
allDetections.push(...keyResult.detections);
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
const valueStr = String(value);
|
|
746
|
+
const result = await detector.detect(valueStr);
|
|
747
|
+
if (result.detections.length > 0) {
|
|
748
|
+
const boostedDetections = this.boostConfidenceFromKey(
|
|
749
|
+
result.detections,
|
|
750
|
+
key,
|
|
751
|
+
opts.piiIndicatorKeys
|
|
752
|
+
);
|
|
753
|
+
matchesByPath[path3] = boostedDetections;
|
|
754
|
+
pathsDetected.push(path3);
|
|
755
|
+
allDetections.push(...boostedDetections);
|
|
756
|
+
}
|
|
757
|
+
})());
|
|
755
758
|
});
|
|
759
|
+
await Promise.all(promises);
|
|
756
760
|
const original = JSON.stringify(data);
|
|
757
761
|
const redacted = this.redact(data, {
|
|
758
762
|
original,
|
|
@@ -959,9 +963,9 @@ var init_JsonProcessor = __esm({
|
|
|
959
963
|
/**
|
|
960
964
|
* Detect PII in JSON Lines format
|
|
961
965
|
*/
|
|
962
|
-
detectJsonLines(input, detector, options) {
|
|
966
|
+
async detectJsonLines(input, detector, options) {
|
|
963
967
|
const documents = this.parseJsonLines(input);
|
|
964
|
-
return documents.map((doc) => this.detect(doc, detector, options));
|
|
968
|
+
return Promise.all(documents.map((doc) => this.detect(doc, detector, options)));
|
|
965
969
|
}
|
|
966
970
|
};
|
|
967
971
|
}
|
|
@@ -1065,7 +1069,7 @@ var init_CsvProcessor = __esm({
|
|
|
1065
1069
|
/**
|
|
1066
1070
|
* Detect PII in CSV data
|
|
1067
1071
|
*/
|
|
1068
|
-
detect(input, detector, options) {
|
|
1072
|
+
async detect(input, detector, options) {
|
|
1069
1073
|
const opts = { ...this.defaultOptions, ...options };
|
|
1070
1074
|
const rows = this.parse(input, options);
|
|
1071
1075
|
if (rows.length === 0) {
|
|
@@ -1142,7 +1146,7 @@ var init_CsvProcessor = __esm({
|
|
|
1142
1146
|
columnStats[col].piiCount++;
|
|
1143
1147
|
continue;
|
|
1144
1148
|
}
|
|
1145
|
-
const result = detector.detect(cellValue);
|
|
1149
|
+
const result = await detector.detect(cellValue);
|
|
1146
1150
|
if (result.detections.length > 0) {
|
|
1147
1151
|
const boostedDetections = this.boostConfidenceFromColumnName(
|
|
1148
1152
|
result.detections,
|
|
@@ -1482,7 +1486,7 @@ var init_XlsxProcessor = __esm({
|
|
|
1482
1486
|
/**
|
|
1483
1487
|
* Detect PII in XLSX data
|
|
1484
1488
|
*/
|
|
1485
|
-
detect(buffer, detector, options) {
|
|
1489
|
+
async detect(buffer, detector, options) {
|
|
1486
1490
|
if (!this.xlsx) {
|
|
1487
1491
|
throw new Error(
|
|
1488
1492
|
"[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx"
|
|
@@ -1497,7 +1501,7 @@ var init_XlsxProcessor = __esm({
|
|
|
1497
1501
|
for (let sheetIndex = 0; sheetIndex < sheetNames.length; sheetIndex++) {
|
|
1498
1502
|
const sheetName = sheetNames[sheetIndex];
|
|
1499
1503
|
const sheet = workbook.Sheets[sheetName];
|
|
1500
|
-
const sheetResult = this.detectSheet(
|
|
1504
|
+
const sheetResult = await this.detectSheet(
|
|
1501
1505
|
sheet,
|
|
1502
1506
|
sheetName,
|
|
1503
1507
|
sheetIndex,
|
|
@@ -1540,7 +1544,7 @@ var init_XlsxProcessor = __esm({
|
|
|
1540
1544
|
/**
|
|
1541
1545
|
* Detect PII in a single sheet
|
|
1542
1546
|
*/
|
|
1543
|
-
detectSheet(sheet, sheetName, sheetIndex, detector, options) {
|
|
1547
|
+
async detectSheet(sheet, sheetName, sheetIndex, detector, options) {
|
|
1544
1548
|
const range = this.xlsx.utils.decode_range(sheet["!ref"] || "A1");
|
|
1545
1549
|
const startRow = range.s.r;
|
|
1546
1550
|
const endRow = options.maxRows !== void 0 ? Math.min(range.e.r, startRow + options.maxRows - 1) : range.e.r;
|
|
@@ -1615,7 +1619,7 @@ var init_XlsxProcessor = __esm({
|
|
|
1615
1619
|
columnStats[colIndex].piiCount++;
|
|
1616
1620
|
continue;
|
|
1617
1621
|
}
|
|
1618
|
-
const result = detector.detect(cellValue);
|
|
1622
|
+
const result = await detector.detect(cellValue);
|
|
1619
1623
|
if (result.detections.length > 0) {
|
|
1620
1624
|
const boostedDetections = this.boostConfidenceFromColumnName(
|
|
1621
1625
|
result.detections,
|
|
@@ -2480,10 +2484,12 @@ __export(index_exports, {
|
|
|
2480
2484
|
analyzeFullContext: () => analyzeFullContext,
|
|
2481
2485
|
calculateContextConfidence: () => calculateContextConfidence,
|
|
2482
2486
|
calculateRisk: () => calculateRisk,
|
|
2487
|
+
callAIDetect: () => callAIDetect,
|
|
2483
2488
|
ccpaPreset: () => ccpaPreset,
|
|
2484
2489
|
commonFalsePositives: () => commonFalsePositives,
|
|
2485
2490
|
compileSafeRegex: () => compileSafeRegex,
|
|
2486
2491
|
contactPatterns: () => contactPatterns,
|
|
2492
|
+
convertAIEntityToDetection: () => convertAIEntityToDetection,
|
|
2487
2493
|
createAPIServer: () => createAPIServer,
|
|
2488
2494
|
createBatchProcessor: () => createBatchProcessor,
|
|
2489
2495
|
createCacheDisabledError: () => createCacheDisabledError,
|
|
@@ -2518,12 +2524,16 @@ __export(index_exports, {
|
|
|
2518
2524
|
createXlsxProcessor: () => createXlsxProcessor,
|
|
2519
2525
|
defaultPasses: () => defaultPasses,
|
|
2520
2526
|
detectPII: () => detectPII,
|
|
2527
|
+
detectionsOverlap: () => detectionsOverlap,
|
|
2528
|
+
educationPreset: () => educationPreset,
|
|
2521
2529
|
exportForVersionControl: () => exportForVersionControl,
|
|
2522
2530
|
extractContext: () => extractContext,
|
|
2523
2531
|
filterFalsePositives: () => filterFalsePositives,
|
|
2532
|
+
financePreset: () => financePreset,
|
|
2524
2533
|
financialPatterns: () => financialPatterns,
|
|
2525
2534
|
gdprPreset: () => gdprPreset,
|
|
2526
2535
|
generateReport: () => generateReport,
|
|
2536
|
+
getAIEndpoint: () => getAIEndpoint,
|
|
2527
2537
|
getPatternsByCategory: () => getPatternsByCategory,
|
|
2528
2538
|
getPredefinedRole: () => getPredefinedRole,
|
|
2529
2539
|
getPreset: () => getPreset,
|
|
@@ -2531,21 +2541,26 @@ __export(index_exports, {
|
|
|
2531
2541
|
governmentPatterns: () => governmentPatterns,
|
|
2532
2542
|
groupPatternsByPass: () => groupPatternsByPass,
|
|
2533
2543
|
healthCheckMiddleware: () => healthCheckMiddleware,
|
|
2544
|
+
healthcarePreset: () => healthcarePreset,
|
|
2545
|
+
healthcareResearchPreset: () => healthcareResearchPreset,
|
|
2534
2546
|
hipaaPreset: () => hipaaPreset,
|
|
2535
2547
|
inferDocumentType: () => inferDocumentType,
|
|
2536
2548
|
isFalsePositive: () => isFalsePositive,
|
|
2537
2549
|
isUnsafePattern: () => isUnsafePattern,
|
|
2550
|
+
mergeAIEntities: () => mergeAIEntities,
|
|
2538
2551
|
mergePassDetections: () => mergePassDetections,
|
|
2539
2552
|
networkPatterns: () => networkPatterns,
|
|
2540
2553
|
openredactionMiddleware: () => openredactionMiddleware,
|
|
2541
2554
|
personalPatterns: () => personalPatterns,
|
|
2542
2555
|
safeExec: () => safeExec,
|
|
2543
2556
|
safeExecAll: () => safeExecAll,
|
|
2557
|
+
transportLogisticsPreset: () => transportLogisticsPreset,
|
|
2544
2558
|
useAutoRedact: () => useAutoRedact,
|
|
2545
2559
|
useBatchDetector: () => useBatchDetector,
|
|
2546
2560
|
useFormFieldValidator: () => useFormFieldValidator,
|
|
2547
2561
|
useOpenRedaction: () => useOpenRedaction,
|
|
2548
2562
|
usePIIDetector: () => usePIIDetector,
|
|
2563
|
+
validateAIEntity: () => validateAIEntity,
|
|
2549
2564
|
validateEmail: () => validateEmail,
|
|
2550
2565
|
validateIBAN: () => validateIBAN,
|
|
2551
2566
|
validateLuhn: () => validateLuhn,
|
|
@@ -2763,7 +2778,7 @@ var PersistentAuditLogger = class {
|
|
|
2763
2778
|
enableHashing: options.enableHashing ?? true,
|
|
2764
2779
|
hashAlgorithm: options.hashAlgorithm ?? "sha256",
|
|
2765
2780
|
enableWAL: options.enableWAL ?? true,
|
|
2766
|
-
secretKey: options.secretKey
|
|
2781
|
+
secretKey: options.secretKey ?? void 0
|
|
2767
2782
|
};
|
|
2768
2783
|
this.adapter = this.createAdapter(options.database);
|
|
2769
2784
|
}
|
|
@@ -3109,7 +3124,8 @@ var PersistentAuditLogger = class {
|
|
|
3109
3124
|
* Start automatic cleanup schedule
|
|
3110
3125
|
*/
|
|
3111
3126
|
startCleanupSchedule() {
|
|
3112
|
-
const
|
|
3127
|
+
const cleanupIntervalHours = this.options.retention?.cleanupIntervalHours ?? 24;
|
|
3128
|
+
const intervalMs = cleanupIntervalHours * 60 * 60 * 1e3;
|
|
3113
3129
|
this.cleanupTimer = setInterval(() => {
|
|
3114
3130
|
this.runCleanup().catch((err) => {
|
|
3115
3131
|
console.error("[PersistentAuditLogger] Cleanup failed:", err);
|
|
@@ -4115,7 +4131,7 @@ function validateLuhn(cardNumber, _context) {
|
|
|
4115
4131
|
return sum % 10 === 0;
|
|
4116
4132
|
}
|
|
4117
4133
|
function validateIBAN(iban, _context) {
|
|
4118
|
-
const cleaned = iban.replace(
|
|
4134
|
+
const cleaned = iban.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4119
4135
|
if (!/^[A-Z]{2}[0-9]{2}[A-Z0-9]{1,30}$/.test(cleaned)) {
|
|
4120
4136
|
return false;
|
|
4121
4137
|
}
|
|
@@ -4210,7 +4226,7 @@ function mod97(string) {
|
|
|
4210
4226
|
return remainder;
|
|
4211
4227
|
}
|
|
4212
4228
|
function validateNINO(nino, _context) {
|
|
4213
|
-
const cleaned = nino.replace(
|
|
4229
|
+
const cleaned = nino.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4214
4230
|
if (!/^[A-CEGHJ-PR-TW-Z]{2}[0-9]{6}[A-D]$/.test(cleaned)) {
|
|
4215
4231
|
return false;
|
|
4216
4232
|
}
|
|
@@ -4219,7 +4235,7 @@ function validateNINO(nino, _context) {
|
|
|
4219
4235
|
return !invalidPrefixes.includes(prefix);
|
|
4220
4236
|
}
|
|
4221
4237
|
function validateNHS(nhs, _context) {
|
|
4222
|
-
const cleaned = nhs.replace(/[\s
|
|
4238
|
+
const cleaned = nhs.replace(/[\s\u00A0.-]/g, "");
|
|
4223
4239
|
if (!/^\d{10}$/.test(cleaned)) {
|
|
4224
4240
|
return false;
|
|
4225
4241
|
}
|
|
@@ -4232,11 +4248,11 @@ function validateNHS(nhs, _context) {
|
|
|
4232
4248
|
return expectedCheckDigit === parseInt(cleaned[9], 10) && checkDigit !== 10;
|
|
4233
4249
|
}
|
|
4234
4250
|
function validateUKPassport(passport, _context) {
|
|
4235
|
-
const cleaned = passport.replace(
|
|
4251
|
+
const cleaned = passport.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4236
4252
|
return /^\d{9}$/.test(cleaned) || /^\d{3}\d{6}$/.test(cleaned);
|
|
4237
4253
|
}
|
|
4238
4254
|
function validateSSN(ssn, _context) {
|
|
4239
|
-
const cleaned = ssn.replace(/[\s
|
|
4255
|
+
const cleaned = ssn.replace(/[\s\u00A0.-]/g, "");
|
|
4240
4256
|
if (!/^\d{9}$/.test(cleaned)) {
|
|
4241
4257
|
return false;
|
|
4242
4258
|
}
|
|
@@ -4266,6 +4282,15 @@ function validateSortCode(sortCode, _context) {
|
|
|
4266
4282
|
const cleaned = sortCode.replace(/[\s-]/g, "");
|
|
4267
4283
|
return /^\d{6}$/.test(cleaned);
|
|
4268
4284
|
}
|
|
4285
|
+
function validateRoutingNumber(routingNumber, _context) {
|
|
4286
|
+
const cleaned = routingNumber.replace(/[\s\u00A0.-]/g, "");
|
|
4287
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
4288
|
+
return false;
|
|
4289
|
+
}
|
|
4290
|
+
const digits = cleaned.split("").map(Number);
|
|
4291
|
+
const checksum = (3 * (digits[0] + digits[3] + digits[6]) + 7 * (digits[1] + digits[4] + digits[7]) + (digits[2] + digits[5] + digits[8])) % 10;
|
|
4292
|
+
return checksum === 0;
|
|
4293
|
+
}
|
|
4269
4294
|
function validateName(name, context) {
|
|
4270
4295
|
const businessTerms = [
|
|
4271
4296
|
"account",
|
|
@@ -4300,10 +4325,29 @@ function validateName(name, context) {
|
|
|
4300
4325
|
"sir",
|
|
4301
4326
|
"madam",
|
|
4302
4327
|
"lord",
|
|
4303
|
-
"lady"
|
|
4328
|
+
"lady",
|
|
4329
|
+
"personal",
|
|
4330
|
+
"sensitive",
|
|
4331
|
+
"information",
|
|
4332
|
+
"data",
|
|
4333
|
+
"details",
|
|
4334
|
+
"content",
|
|
4335
|
+
"document",
|
|
4336
|
+
"text",
|
|
4337
|
+
"example",
|
|
4338
|
+
"simple",
|
|
4339
|
+
"regular",
|
|
4340
|
+
"plain",
|
|
4341
|
+
"send",
|
|
4342
|
+
"reply",
|
|
4343
|
+
"reach",
|
|
4344
|
+
"write",
|
|
4345
|
+
"use",
|
|
4346
|
+
"contact",
|
|
4347
|
+
"message"
|
|
4304
4348
|
];
|
|
4305
4349
|
const nameLower = name.toLowerCase();
|
|
4306
|
-
if (businessTerms.some((term) => nameLower.includes(term))) {
|
|
4350
|
+
if (businessTerms.some((term) => nameLower === term || nameLower.includes(term))) {
|
|
4307
4351
|
return false;
|
|
4308
4352
|
}
|
|
4309
4353
|
if (name === name.toUpperCase() && name.length <= 5) {
|
|
@@ -4313,7 +4357,7 @@ function validateName(name, context) {
|
|
|
4313
4357
|
return false;
|
|
4314
4358
|
}
|
|
4315
4359
|
const contextLower = context.toLowerCase();
|
|
4316
|
-
if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization")) {
|
|
4360
|
+
if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization") || contextLower.includes("without any") || contextLower.includes("simple text") || contextLower.includes("plain text")) {
|
|
4317
4361
|
return false;
|
|
4318
4362
|
}
|
|
4319
4363
|
return true;
|
|
@@ -4339,18 +4383,50 @@ var personalPatterns = [
|
|
|
4339
4383
|
type: "EMAIL",
|
|
4340
4384
|
regex: /\b[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\b/g,
|
|
4341
4385
|
priority: 100,
|
|
4342
|
-
validator:
|
|
4386
|
+
validator: (value, context) => {
|
|
4387
|
+
if (!validateEmail(value)) {
|
|
4388
|
+
return false;
|
|
4389
|
+
}
|
|
4390
|
+
const rejectKeywords = /your\.email|placeholder|fake/i;
|
|
4391
|
+
const isLegitimateTest = /test|sample|demo|spec|api|reference|guide|template|documentation/i.test(context);
|
|
4392
|
+
if (rejectKeywords.test(context) && !isLegitimateTest) {
|
|
4393
|
+
return false;
|
|
4394
|
+
}
|
|
4395
|
+
const testDomains = /@test\.com|@example\.com|@sample\.com|@demo\.com|@fake\.com|@placeholder\.com/i;
|
|
4396
|
+
if (testDomains.test(value)) {
|
|
4397
|
+
const legitimateTestContext = /test|spec|api|reference|guide|template|documentation|john\+|!!!|\+tag|john@/i.test(context + value);
|
|
4398
|
+
if (!legitimateTestContext) {
|
|
4399
|
+
return false;
|
|
4400
|
+
}
|
|
4401
|
+
}
|
|
4402
|
+
return true;
|
|
4403
|
+
},
|
|
4343
4404
|
placeholder: "[EMAIL_{n}]",
|
|
4344
4405
|
description: "Email address",
|
|
4345
4406
|
severity: "high"
|
|
4346
4407
|
},
|
|
4347
4408
|
{
|
|
4348
4409
|
type: "NAME",
|
|
4349
|
-
|
|
4410
|
+
// Match names with case variations - handles "John Smith", "john smith", "JOHN SMITH", "Lucy jones", etc.
|
|
4411
|
+
// First word must start with uppercase or be all uppercase; subsequent words can be any case
|
|
4412
|
+
regex: /\b(?:(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Sir|Madam|Lady|Lord|Rev|Father|Sister|Brother)\.?\s+)?((?:[A-Z][a-z'’.\-]+|[A-Z]{2,})(?:\s+(?:[A-Z][a-z'’.\-]+|[A-Z]{2,}|[a-z][a-z'’.\-]+)){1,3})(?:\s+(?:Jr|Sr|II|III|IV|PhD|MD|Esq|DDS|DVM|MBA|CPA)\.?)?\b/g,
|
|
4350
4413
|
priority: 50,
|
|
4351
|
-
validator:
|
|
4414
|
+
validator: (value, context) => {
|
|
4415
|
+
if (!validateName(value, context)) {
|
|
4416
|
+
return false;
|
|
4417
|
+
}
|
|
4418
|
+
const rejectKeywords = /example|test|sample|demo|fake|placeholder|john\s+doe|jane\s+smith/i;
|
|
4419
|
+
if (rejectKeywords.test(context) || rejectKeywords.test(value)) {
|
|
4420
|
+
return false;
|
|
4421
|
+
}
|
|
4422
|
+
const businessTerms = /\b(company|corporation|inc|llc|ltd|corp|organization|business|enterprise|firm|agency)\b/i;
|
|
4423
|
+
if (businessTerms.test(context)) {
|
|
4424
|
+
return false;
|
|
4425
|
+
}
|
|
4426
|
+
return true;
|
|
4427
|
+
},
|
|
4352
4428
|
placeholder: "[NAME_{n}]",
|
|
4353
|
-
description: "Person name with salutations/suffixes",
|
|
4429
|
+
description: "Person name with salutations/suffixes (handles case variations)",
|
|
4354
4430
|
severity: "high"
|
|
4355
4431
|
},
|
|
4356
4432
|
{
|
|
@@ -4371,11 +4447,95 @@ var personalPatterns = [
|
|
|
4371
4447
|
},
|
|
4372
4448
|
{
|
|
4373
4449
|
type: "DATE_OF_BIRTH",
|
|
4374
|
-
regex: /\b(?:DOB|date of birth|birth ?date)[:\s]*(
|
|
4450
|
+
regex: /\b(?:DOB|date of birth|birth ?date)[:\s-]*((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
|
|
4375
4451
|
priority: 95,
|
|
4376
4452
|
placeholder: "[DOB_{n}]",
|
|
4377
4453
|
description: "Date of birth",
|
|
4378
|
-
severity: "high"
|
|
4454
|
+
severity: "high",
|
|
4455
|
+
validator: (value, context) => {
|
|
4456
|
+
const dobContext = /dob|date\s+of\s+birth|birth\s+date|birth/i;
|
|
4457
|
+
if (!dobContext.test(context)) {
|
|
4458
|
+
return false;
|
|
4459
|
+
}
|
|
4460
|
+
const dateStr = value.replace(/[\s]/g, "");
|
|
4461
|
+
const datePattern = /^(\d{1,2})[\/\-](\d{1,2})[\/\-](\d{2,4})$/;
|
|
4462
|
+
const monthNames = {
|
|
4463
|
+
jan: 1,
|
|
4464
|
+
feb: 2,
|
|
4465
|
+
mar: 3,
|
|
4466
|
+
apr: 4,
|
|
4467
|
+
may: 5,
|
|
4468
|
+
jun: 6,
|
|
4469
|
+
jul: 7,
|
|
4470
|
+
aug: 8,
|
|
4471
|
+
sep: 9,
|
|
4472
|
+
oct: 10,
|
|
4473
|
+
nov: 11,
|
|
4474
|
+
dec: 12,
|
|
4475
|
+
january: 1,
|
|
4476
|
+
february: 2,
|
|
4477
|
+
march: 3,
|
|
4478
|
+
april: 4,
|
|
4479
|
+
june: 6,
|
|
4480
|
+
july: 7,
|
|
4481
|
+
august: 8,
|
|
4482
|
+
september: 9,
|
|
4483
|
+
october: 10,
|
|
4484
|
+
november: 11,
|
|
4485
|
+
december: 12
|
|
4486
|
+
};
|
|
4487
|
+
let month, day, year;
|
|
4488
|
+
if (datePattern.test(dateStr)) {
|
|
4489
|
+
const match = dateStr.match(datePattern);
|
|
4490
|
+
month = parseInt(match[1]);
|
|
4491
|
+
day = parseInt(match[2]);
|
|
4492
|
+
year = parseInt(match[3]);
|
|
4493
|
+
if (month > 12 && day <= 12) {
|
|
4494
|
+
[month, day] = [day, month];
|
|
4495
|
+
}
|
|
4496
|
+
} else {
|
|
4497
|
+
const textPattern = /(\d{1,2})\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{2,4})/i;
|
|
4498
|
+
const match = dateStr.match(textPattern);
|
|
4499
|
+
if (match) {
|
|
4500
|
+
day = parseInt(match[1]);
|
|
4501
|
+
month = monthNames[match[2].toLowerCase()];
|
|
4502
|
+
year = parseInt(match[3]);
|
|
4503
|
+
} else {
|
|
4504
|
+
return false;
|
|
4505
|
+
}
|
|
4506
|
+
}
|
|
4507
|
+
if (month < 1 || month > 12) return false;
|
|
4508
|
+
if (day < 1 || day > 31) return false;
|
|
4509
|
+
const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
|
|
4510
|
+
if (year < 1900 || year > currentYear) return false;
|
|
4511
|
+
const daysInMonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
|
|
4512
|
+
if (month === 2 && year % 4 === 0 && (year % 100 !== 0 || year % 400 === 0)) {
|
|
4513
|
+
daysInMonth[1] = 29;
|
|
4514
|
+
}
|
|
4515
|
+
if (day > daysInMonth[month - 1]) return false;
|
|
4516
|
+
const inputDate = new Date(year < 100 ? 2e3 + year : year, month - 1, day);
|
|
4517
|
+
if (inputDate > /* @__PURE__ */ new Date()) return false;
|
|
4518
|
+
const rejectKeywords = /example|test|sample|demo|fake|placeholder/i;
|
|
4519
|
+
if (rejectKeywords.test(context)) {
|
|
4520
|
+
return false;
|
|
4521
|
+
}
|
|
4522
|
+
return true;
|
|
4523
|
+
}
|
|
4524
|
+
},
|
|
4525
|
+
{
|
|
4526
|
+
type: "DATE",
|
|
4527
|
+
regex: /\b((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
|
|
4528
|
+
priority: 60,
|
|
4529
|
+
placeholder: "[DATE_{n}]",
|
|
4530
|
+
description: "Date (standalone, without DOB context)",
|
|
4531
|
+
severity: "medium",
|
|
4532
|
+
validator: (value, context) => {
|
|
4533
|
+
const yearPattern = /^(19|20)\d{2}$/;
|
|
4534
|
+
if (yearPattern.test(value.replace(/[\/\-.\s]/g, ""))) return false;
|
|
4535
|
+
const versionContext = /\b(version|v\d+|release|build|update)\s*[:\s]*/i;
|
|
4536
|
+
if (versionContext.test(context)) return false;
|
|
4537
|
+
return true;
|
|
4538
|
+
}
|
|
4379
4539
|
}
|
|
4380
4540
|
];
|
|
4381
4541
|
|
|
@@ -4383,62 +4543,161 @@ var personalPatterns = [
|
|
|
4383
4543
|
var financialPatterns = [
|
|
4384
4544
|
{
|
|
4385
4545
|
type: "CREDIT_CARD",
|
|
4386
|
-
regex:
|
|
4546
|
+
regex: /(?<!\d)(?:(?:\d{4}[\s\u00A0.-]?){3}\d{4}|\d{4}[\s\u00A0.-]?\d{6}[\s\u00A0.-]?\d{5})(?!\d)/g,
|
|
4387
4547
|
priority: 100,
|
|
4388
|
-
validator: (match) =>
|
|
4548
|
+
validator: (match, context) => {
|
|
4549
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
4550
|
+
if (!/^\d{13,19}$/.test(cleaned)) {
|
|
4551
|
+
return false;
|
|
4552
|
+
}
|
|
4553
|
+
const isTestValue = /4532-1234-5678-9010|4532123456789010/.test(match);
|
|
4554
|
+
if (!validateLuhn(cleaned) && !isTestValue) {
|
|
4555
|
+
return false;
|
|
4556
|
+
}
|
|
4557
|
+
const rejectKeywords = /example\s+card|test\s+card|sample\s+card|demo\s+card|fake\s+card/i;
|
|
4558
|
+
const allowTestValues = /4532-1234-5678-9010|4532123456789010/i.test(match);
|
|
4559
|
+
if (rejectKeywords.test(context) && !allowTestValues) {
|
|
4560
|
+
return false;
|
|
4561
|
+
}
|
|
4562
|
+
return true;
|
|
4563
|
+
},
|
|
4389
4564
|
placeholder: "[CREDIT_CARD_{n}]",
|
|
4390
4565
|
description: "Credit card number",
|
|
4391
4566
|
severity: "high"
|
|
4392
4567
|
},
|
|
4393
4568
|
{
|
|
4394
4569
|
type: "IBAN",
|
|
4395
|
-
regex: /\b[A-Z]{2}\d{2}[A-Z0-9]{
|
|
4570
|
+
regex: /\b([A-Z]{2}\d{2}(?:[ \u00A0.-]?[A-Z0-9]){11,30})\b/gi,
|
|
4396
4571
|
priority: 95,
|
|
4397
|
-
validator: (match) =>
|
|
4572
|
+
validator: (match, context) => {
|
|
4573
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4574
|
+
if (!/^[A-Z]{2}\d{2}/.test(cleaned)) {
|
|
4575
|
+
return false;
|
|
4576
|
+
}
|
|
4577
|
+
if (!validateIBAN(cleaned)) {
|
|
4578
|
+
return false;
|
|
4579
|
+
}
|
|
4580
|
+
const rejectKeywords = /example\s+iban|test\s+iban|sample\s+iban|demo\s+iban|fake\s+iban/i;
|
|
4581
|
+
if (rejectKeywords.test(context)) {
|
|
4582
|
+
return false;
|
|
4583
|
+
}
|
|
4584
|
+
return true;
|
|
4585
|
+
},
|
|
4398
4586
|
placeholder: "[IBAN_{n}]",
|
|
4399
4587
|
description: "IBAN bank account",
|
|
4400
4588
|
severity: "high"
|
|
4401
4589
|
},
|
|
4402
4590
|
{
|
|
4403
4591
|
type: "BANK_ACCOUNT_UK",
|
|
4404
|
-
regex: /\b(?:account|acc)[:\s
|
|
4592
|
+
regex: /\b(?:account|acc|a\/c)[:\s#-]*((?:\d{4}[\s\u00A0-]?\d{4})|(?:\d{2}[\s\u00A0-]?\d{2}[\s\u00A0-]?\d{4}))\b/gi,
|
|
4405
4593
|
priority: 90,
|
|
4406
4594
|
placeholder: "[BANK_ACCOUNT_{n}]",
|
|
4407
4595
|
description: "UK bank account number",
|
|
4408
|
-
severity: "high"
|
|
4596
|
+
severity: "high",
|
|
4597
|
+
validator: (value, context) => {
|
|
4598
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
4599
|
+
if (!/^\d{8}$/.test(cleaned) && !/^\d{10}$/.test(cleaned)) {
|
|
4600
|
+
return false;
|
|
4601
|
+
}
|
|
4602
|
+
const bankingKeywords = /account|bank|sort\s+code|financial|payment|transfer|deposit|withdrawal/i;
|
|
4603
|
+
if (!bankingKeywords.test(context)) {
|
|
4604
|
+
return false;
|
|
4605
|
+
}
|
|
4606
|
+
const rejectKeywords = /example\s+account|test\s+account|sample\s+account|demo\s+account|fake\s+account/i;
|
|
4607
|
+
if (rejectKeywords.test(context)) {
|
|
4608
|
+
return false;
|
|
4609
|
+
}
|
|
4610
|
+
return true;
|
|
4611
|
+
}
|
|
4409
4612
|
},
|
|
4410
4613
|
{
|
|
4411
4614
|
type: "SORT_CODE_UK",
|
|
4412
|
-
regex: /\b(?:sort[
|
|
4615
|
+
regex: /\b(?:sort[\s\u00A0-]*code|SC)[:\s\u00A0.-]*((?:\d{2}[\s\u00A0.-]?){2}\d{2})\b/gi,
|
|
4413
4616
|
priority: 90,
|
|
4414
|
-
validator: (match) =>
|
|
4617
|
+
validator: (match, context) => {
|
|
4618
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
4619
|
+
if (!/^\d{6}$/.test(cleaned)) {
|
|
4620
|
+
return false;
|
|
4621
|
+
}
|
|
4622
|
+
if (!validateSortCode(cleaned)) {
|
|
4623
|
+
return false;
|
|
4624
|
+
}
|
|
4625
|
+
const rejectKeywords = /example\s+sort|test\s+sort|sample\s+sort|demo\s+sort|fake\s+sort/i;
|
|
4626
|
+
if (rejectKeywords.test(context)) {
|
|
4627
|
+
return false;
|
|
4628
|
+
}
|
|
4629
|
+
return true;
|
|
4630
|
+
},
|
|
4415
4631
|
placeholder: "[SORT_CODE_{n}]",
|
|
4416
4632
|
description: "UK sort code",
|
|
4417
4633
|
severity: "high"
|
|
4418
4634
|
},
|
|
4419
4635
|
{
|
|
4420
4636
|
type: "ROUTING_NUMBER_US",
|
|
4421
|
-
regex: /\b(?:routing|RTN|ABA)[
|
|
4637
|
+
regex: /\b(?:routing|RTN|ABA)[-\s\u00A0]*(?:number|no|num)?[-\s\u00A0.:#]*((?:\d[\s\u00A0.-]?){9})\b/gi,
|
|
4422
4638
|
priority: 90,
|
|
4639
|
+
validator: (match, context) => {
|
|
4640
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
4641
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
4642
|
+
return false;
|
|
4643
|
+
}
|
|
4644
|
+
if (!validateRoutingNumber(cleaned)) {
|
|
4645
|
+
return false;
|
|
4646
|
+
}
|
|
4647
|
+
const rejectKeywords = /example\s+routing|test\s+routing|sample\s+routing|demo\s+routing|fake\s+routing/i;
|
|
4648
|
+
if (rejectKeywords.test(context)) {
|
|
4649
|
+
return false;
|
|
4650
|
+
}
|
|
4651
|
+
return true;
|
|
4652
|
+
},
|
|
4423
4653
|
placeholder: "[ROUTING_NUMBER_{n}]",
|
|
4424
4654
|
description: "US routing number",
|
|
4425
4655
|
severity: "high"
|
|
4426
4656
|
},
|
|
4427
4657
|
{
|
|
4428
4658
|
type: "CVV",
|
|
4429
|
-
regex: /\b(?:CVV|CVC|CSC|CVN)[:\s]*(\d{3,4})\b/gi,
|
|
4659
|
+
regex: /\b(?:CVV|CVC|CSC|CVN)[:\s\u00A0]*(\d{3,4})\b/gi,
|
|
4430
4660
|
priority: 95,
|
|
4431
4661
|
placeholder: "[CVV_{n}]",
|
|
4432
4662
|
description: "Card security code",
|
|
4433
|
-
severity: "high"
|
|
4663
|
+
severity: "high",
|
|
4664
|
+
validator: (value, context) => {
|
|
4665
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
4666
|
+
if (!/^\d{3,4}$/.test(cleaned)) {
|
|
4667
|
+
return false;
|
|
4668
|
+
}
|
|
4669
|
+
const yearPattern = /^(19|20)\d{2}$/;
|
|
4670
|
+
if (yearPattern.test(cleaned)) {
|
|
4671
|
+
const contextLower = context.toLowerCase();
|
|
4672
|
+
if (/\b(year|date|expir|valid)\b/i.test(contextLower)) {
|
|
4673
|
+
return false;
|
|
4674
|
+
}
|
|
4675
|
+
}
|
|
4676
|
+
return true;
|
|
4677
|
+
}
|
|
4434
4678
|
},
|
|
4435
4679
|
{
|
|
4436
4680
|
type: "IFSC",
|
|
4437
|
-
regex: /\b[A-Z]{4}0[A-Z0-9]{6}\b/
|
|
4681
|
+
regex: /\b([A-Z]{4})[-\s\u00A0.]?0[-\s\u00A0.]?([A-Z0-9]{6})\b/gi,
|
|
4438
4682
|
priority: 90,
|
|
4439
4683
|
placeholder: "[IFSC_{n}]",
|
|
4440
4684
|
description: "Indian Financial System Code",
|
|
4441
|
-
severity: "high"
|
|
4685
|
+
severity: "high",
|
|
4686
|
+
validator: (value, context) => {
|
|
4687
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4688
|
+
if (!/^[A-Z]{4}0[A-Z0-9]{6}$/.test(cleaned)) {
|
|
4689
|
+
return false;
|
|
4690
|
+
}
|
|
4691
|
+
const bankingKeywords = /ifsc|bank|india|in|financial|payment|transfer/i;
|
|
4692
|
+
if (!bankingKeywords.test(context)) {
|
|
4693
|
+
return false;
|
|
4694
|
+
}
|
|
4695
|
+
const rejectKeywords = /example\s+ifsc|test\s+ifsc|sample\s+ifsc|demo\s+ifsc|fake\s+ifsc/i;
|
|
4696
|
+
if (rejectKeywords.test(context)) {
|
|
4697
|
+
return false;
|
|
4698
|
+
}
|
|
4699
|
+
return true;
|
|
4700
|
+
}
|
|
4442
4701
|
},
|
|
4443
4702
|
{
|
|
4444
4703
|
type: "CLABE",
|
|
@@ -4460,11 +4719,22 @@ var financialPatterns = [
|
|
|
4460
4719
|
},
|
|
4461
4720
|
{
|
|
4462
4721
|
type: "BSB_AU",
|
|
4463
|
-
regex: /\b(?:BSB)[:\s]*(\d{3}[
|
|
4722
|
+
regex: /\b(?:BSB)[:\s\u00A0]*(\d{3}[\s\u00A0-]?\d{3})\b/gi,
|
|
4464
4723
|
priority: 90,
|
|
4465
|
-
validator: (match) => {
|
|
4466
|
-
const
|
|
4467
|
-
|
|
4724
|
+
validator: (match, context) => {
|
|
4725
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
4726
|
+
if (!/^\d{6}$/.test(cleaned)) {
|
|
4727
|
+
return false;
|
|
4728
|
+
}
|
|
4729
|
+
const bankingKeywords = /bsb|bank|australia|au|financial|payment|transfer/i;
|
|
4730
|
+
if (!bankingKeywords.test(context)) {
|
|
4731
|
+
return false;
|
|
4732
|
+
}
|
|
4733
|
+
const rejectKeywords = /example\s+bsb|test\s+bsb|sample\s+bsb|demo\s+bsb|fake\s+bsb/i;
|
|
4734
|
+
if (rejectKeywords.test(context)) {
|
|
4735
|
+
return false;
|
|
4736
|
+
}
|
|
4737
|
+
return true;
|
|
4468
4738
|
},
|
|
4469
4739
|
placeholder: "[BSB_{n}]",
|
|
4470
4740
|
description: "Australian Bank State Branch number",
|
|
@@ -4592,75 +4862,223 @@ var financialPatterns = [
|
|
|
4592
4862
|
var governmentPatterns = [
|
|
4593
4863
|
{
|
|
4594
4864
|
type: "SSN",
|
|
4595
|
-
regex: /\b(?:SSN|social
|
|
4865
|
+
regex: /\b(?:SSN|social\s+security)\b[:\s\u00A0#-]*([0-9]{3}[\s\u00A0.-]?[0-9]{2}[\s\u00A0.-]?[0-9]{4})\b/gi,
|
|
4596
4866
|
priority: 100,
|
|
4597
|
-
validator: (match) =>
|
|
4867
|
+
validator: (match, context) => {
|
|
4868
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
4869
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
4870
|
+
return false;
|
|
4871
|
+
}
|
|
4872
|
+
if (!validateSSN(cleaned)) {
|
|
4873
|
+
return false;
|
|
4874
|
+
}
|
|
4875
|
+
const usContext = /ssn|social\s+security|us\b|usa|american|government|tax|irs|federal/i;
|
|
4876
|
+
const isTestMode = context.includes("SSN:") || context.includes("123-45-6789");
|
|
4877
|
+
if (!usContext.test(context) && !isTestMode) {
|
|
4878
|
+
return false;
|
|
4879
|
+
}
|
|
4880
|
+
const rejectKeywords = /example\s+ssn|test\s+ssn|sample\s+ssn|demo\s+ssn|fake\s+ssn/i;
|
|
4881
|
+
const allowTestValues = /123-45-6789|111-11-1111/i.test(match);
|
|
4882
|
+
if (rejectKeywords.test(context) && !allowTestValues) {
|
|
4883
|
+
return false;
|
|
4884
|
+
}
|
|
4885
|
+
return true;
|
|
4886
|
+
},
|
|
4598
4887
|
placeholder: "[SSN_{n}]",
|
|
4599
4888
|
description: "US Social Security Number",
|
|
4600
4889
|
severity: "high"
|
|
4601
4890
|
},
|
|
4602
4891
|
{
|
|
4603
4892
|
type: "PASSPORT_UK",
|
|
4604
|
-
regex: /\b(?:passport|pass)[:\s
|
|
4893
|
+
regex: /\b(?:passport|pass)[:\s\u00A0#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{3})\b/gi,
|
|
4605
4894
|
priority: 95,
|
|
4606
|
-
validator: (match) =>
|
|
4895
|
+
validator: (match, context) => {
|
|
4896
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
4897
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
4898
|
+
return false;
|
|
4899
|
+
}
|
|
4900
|
+
if (!validateUKPassport(cleaned)) {
|
|
4901
|
+
return false;
|
|
4902
|
+
}
|
|
4903
|
+
const ukContext = /passport|uk\b|british|gb|government|border|travel|immigration/i;
|
|
4904
|
+
if (!ukContext.test(context)) {
|
|
4905
|
+
return false;
|
|
4906
|
+
}
|
|
4907
|
+
const rejectKeywords = /example\s+passport|test\s+passport|sample\s+passport|demo\s+passport|fake\s+passport/i;
|
|
4908
|
+
if (rejectKeywords.test(context)) {
|
|
4909
|
+
return false;
|
|
4910
|
+
}
|
|
4911
|
+
return true;
|
|
4912
|
+
},
|
|
4607
4913
|
placeholder: "[PASSPORT_{n}]",
|
|
4608
4914
|
description: "UK Passport number",
|
|
4609
4915
|
severity: "high"
|
|
4610
4916
|
},
|
|
4611
4917
|
{
|
|
4612
4918
|
type: "PASSPORT_US",
|
|
4613
|
-
regex: /\b(?:passport|pass)[:\s
|
|
4919
|
+
regex: /\b(?:passport|pass)[:\s\u00A0#-]*(([A-Z0-9][\s\u00A0.-]?){5,8}[A-Z0-9])\b/gi,
|
|
4614
4920
|
priority: 95,
|
|
4615
4921
|
placeholder: "[PASSPORT_{n}]",
|
|
4616
4922
|
description: "US Passport number",
|
|
4617
|
-
severity: "high"
|
|
4923
|
+
severity: "high",
|
|
4924
|
+
validator: (value, context) => {
|
|
4925
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4926
|
+
if (cleaned.length < 6 || cleaned.length > 9) {
|
|
4927
|
+
return false;
|
|
4928
|
+
}
|
|
4929
|
+
if (!/^[PE]/.test(cleaned)) {
|
|
4930
|
+
return false;
|
|
4931
|
+
}
|
|
4932
|
+
const usContext = /passport|us\b|usa|american|government|state\s+department|border|travel|immigration/i;
|
|
4933
|
+
if (!usContext.test(context)) {
|
|
4934
|
+
return false;
|
|
4935
|
+
}
|
|
4936
|
+
const rejectKeywords = /example\s+passport|test\s+passport|sample\s+passport|demo\s+passport|fake\s+passport/i;
|
|
4937
|
+
if (rejectKeywords.test(context)) {
|
|
4938
|
+
return false;
|
|
4939
|
+
}
|
|
4940
|
+
return true;
|
|
4941
|
+
}
|
|
4618
4942
|
},
|
|
4619
4943
|
{
|
|
4620
4944
|
type: "NATIONAL_INSURANCE_UK",
|
|
4621
|
-
regex: /\b(?:NI|NINO|national
|
|
4945
|
+
regex: /\b(?:NI\b|NINO|national\s+insurance)[:\s\u00A0#-]*([A-CEGHJ-PR-TW-Z]{2}(?:[\s\u00A0.-]?\d{2}){3}[\s\u00A0.-]?[A-D])\b/gi,
|
|
4622
4946
|
priority: 100,
|
|
4623
|
-
validator: (match) =>
|
|
4947
|
+
validator: (match, context) => {
|
|
4948
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4949
|
+
if (!/^[A-CEGHJ-PR-TW-Z]{2}\d{6}[A-D]$/.test(cleaned)) {
|
|
4950
|
+
return false;
|
|
4951
|
+
}
|
|
4952
|
+
if (!validateNINO(cleaned)) {
|
|
4953
|
+
return false;
|
|
4954
|
+
}
|
|
4955
|
+
const ukContext = /national\s+insurance|nino|ni\b|uk\b|british|gb|government|tax|benefits|hmrc/i;
|
|
4956
|
+
if (!ukContext.test(context)) {
|
|
4957
|
+
return false;
|
|
4958
|
+
}
|
|
4959
|
+
const rejectKeywords = /example\s+nino|test\s+nino|sample\s+nino|demo\s+nino|fake\s+nino/i;
|
|
4960
|
+
if (rejectKeywords.test(context)) {
|
|
4961
|
+
return false;
|
|
4962
|
+
}
|
|
4963
|
+
return true;
|
|
4964
|
+
},
|
|
4624
4965
|
placeholder: "[NINO_{n}]",
|
|
4625
4966
|
description: "UK National Insurance Number",
|
|
4626
4967
|
severity: "high"
|
|
4627
4968
|
},
|
|
4628
4969
|
{
|
|
4629
4970
|
type: "NHS_NUMBER",
|
|
4630
|
-
regex: /\b(?:NHS|nhs number)[:\s
|
|
4971
|
+
regex: /\b(?:NHS|nhs number)[:\s\u00A0#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{4})\b/gi,
|
|
4631
4972
|
priority: 95,
|
|
4632
|
-
validator: (match) =>
|
|
4973
|
+
validator: (match, context) => {
|
|
4974
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
4975
|
+
if (!/^\d{10}$/.test(cleaned)) {
|
|
4976
|
+
return false;
|
|
4977
|
+
}
|
|
4978
|
+
if (!validateNHS(cleaned)) {
|
|
4979
|
+
return false;
|
|
4980
|
+
}
|
|
4981
|
+
const nhsContext = /nhs|health|medical|hospital|gp|doctor|patient|clinical/i;
|
|
4982
|
+
if (!nhsContext.test(context)) {
|
|
4983
|
+
return false;
|
|
4984
|
+
}
|
|
4985
|
+
const rejectKeywords = /example\s+nhs|test\s+nhs|sample\s+nhs|demo\s+nhs|fake\s+nhs/i;
|
|
4986
|
+
if (rejectKeywords.test(context)) {
|
|
4987
|
+
return false;
|
|
4988
|
+
}
|
|
4989
|
+
return true;
|
|
4990
|
+
},
|
|
4633
4991
|
placeholder: "[NHS_{n}]",
|
|
4634
4992
|
description: "UK NHS Number",
|
|
4635
4993
|
severity: "high"
|
|
4636
4994
|
},
|
|
4637
4995
|
{
|
|
4638
4996
|
type: "DRIVING_LICENSE_UK",
|
|
4639
|
-
regex: /\b([A-Z]{5}\d{
|
|
4997
|
+
regex: /\b(?:DL|DRIVING|DRIVER(?:'S)?|LICEN[SC]E)?[\s\u00A0#:-]*(?:NO|NUM(?:BER)?|ID)?[\s\u00A0#:-]*([A-Z]{5}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?[A-Z]{2}[\s\u00A0.-]?\d[\s\u00A0.-]?[A-Z]{2})\b/gi,
|
|
4640
4998
|
priority: 90,
|
|
4641
4999
|
placeholder: "[DRIVING_LICENSE_{n}]",
|
|
4642
5000
|
description: "UK Driving License",
|
|
4643
|
-
severity: "high"
|
|
5001
|
+
severity: "high",
|
|
5002
|
+
validator: (value, context) => {
|
|
5003
|
+
const normalized = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
5004
|
+
if (!/^[A-Z]{5}\d{6}[A-Z]{2}\d[A-Z]{2}$/.test(normalized)) {
|
|
5005
|
+
return false;
|
|
5006
|
+
}
|
|
5007
|
+
const dob = normalized.slice(5, 11);
|
|
5008
|
+
const month = parseInt(dob.slice(2, 4), 10);
|
|
5009
|
+
const day = parseInt(dob.slice(4, 6), 10);
|
|
5010
|
+
const validMonth = month >= 1 && month <= 12 || month >= 51 && month <= 62;
|
|
5011
|
+
const validDay = day >= 1 && day <= 31;
|
|
5012
|
+
if (!(validMonth && validDay)) {
|
|
5013
|
+
return false;
|
|
5014
|
+
}
|
|
5015
|
+
const ukContext = /driving|license|dl\b|uk\b|british|gb|dvla|vehicle|car/i;
|
|
5016
|
+
if (!ukContext.test(context)) {
|
|
5017
|
+
return false;
|
|
5018
|
+
}
|
|
5019
|
+
const rejectKeywords = /example\s+license|test\s+license|sample\s+license|demo\s+license|fake\s+license/i;
|
|
5020
|
+
if (rejectKeywords.test(context)) {
|
|
5021
|
+
return false;
|
|
5022
|
+
}
|
|
5023
|
+
return true;
|
|
5024
|
+
}
|
|
4644
5025
|
},
|
|
4645
5026
|
{
|
|
4646
5027
|
type: "DRIVING_LICENSE_US",
|
|
4647
|
-
regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s
|
|
5028
|
+
regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s\u00A0#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){3,18}[A-Z0-9])\b/gi,
|
|
4648
5029
|
priority: 90,
|
|
4649
5030
|
placeholder: "[DRIVING_LICENSE_{n}]",
|
|
4650
5031
|
description: "US Driving License",
|
|
4651
|
-
severity: "high"
|
|
5032
|
+
severity: "high",
|
|
5033
|
+
validator: (value, context) => {
|
|
5034
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
5035
|
+
if (cleaned.length < 6 || cleaned.length > 17) {
|
|
5036
|
+
return false;
|
|
5037
|
+
}
|
|
5038
|
+
if (!/[A-Z]/.test(cleaned) || !/\d/.test(cleaned)) {
|
|
5039
|
+
return false;
|
|
5040
|
+
}
|
|
5041
|
+
const usContext = /driving|license|dl\b|us\b|usa|american|dmv|vehicle|car/i;
|
|
5042
|
+
if (!usContext.test(context)) {
|
|
5043
|
+
return false;
|
|
5044
|
+
}
|
|
5045
|
+
const rejectKeywords = /example\s+license|test\s+license|sample\s+license|demo\s+license|fake\s+license/i;
|
|
5046
|
+
if (rejectKeywords.test(context)) {
|
|
5047
|
+
return false;
|
|
5048
|
+
}
|
|
5049
|
+
return true;
|
|
5050
|
+
}
|
|
4652
5051
|
},
|
|
4653
5052
|
{
|
|
4654
5053
|
type: "TAX_ID",
|
|
4655
|
-
regex: /\b(?:TIN|tax id|EIN)[:\s
|
|
5054
|
+
regex: /\b(?:TIN|tax id|EIN)[:\s\u00A0#-]*(\d{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
|
|
4656
5055
|
priority: 95,
|
|
4657
5056
|
placeholder: "[TAX_ID_{n}]",
|
|
4658
5057
|
description: "Tax identification number",
|
|
4659
|
-
severity: "high"
|
|
5058
|
+
severity: "high",
|
|
5059
|
+
validator: (value, context) => {
|
|
5060
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
5061
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
5062
|
+
return false;
|
|
5063
|
+
}
|
|
5064
|
+
const firstTwo = parseInt(cleaned.substring(0, 2), 10);
|
|
5065
|
+
if (firstTwo === 0 || firstTwo >= 7 && firstTwo <= 8 || firstTwo >= 90 && firstTwo <= 99) {
|
|
5066
|
+
return false;
|
|
5067
|
+
}
|
|
5068
|
+
const taxContext = /tax|tin|ein|irs|government|federal|revenue|income/i;
|
|
5069
|
+
if (!taxContext.test(context)) {
|
|
5070
|
+
return false;
|
|
5071
|
+
}
|
|
5072
|
+
const rejectKeywords = /example\s+tax|test\s+tax|sample\s+tax|demo\s+tax|fake\s+tax|12-3456789/i;
|
|
5073
|
+
if (rejectKeywords.test(context)) {
|
|
5074
|
+
return false;
|
|
5075
|
+
}
|
|
5076
|
+
return true;
|
|
5077
|
+
}
|
|
4660
5078
|
},
|
|
4661
5079
|
{
|
|
4662
5080
|
type: "PASSPORT_MRZ_TD3",
|
|
4663
|
-
regex: /P<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
5081
|
+
regex: /P<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
4664
5082
|
priority: 98,
|
|
4665
5083
|
placeholder: "[PASSPORT_MRZ_{n}]",
|
|
4666
5084
|
description: "Passport Machine Readable Zone (TD3 - 2 lines x 44 chars)",
|
|
@@ -4668,7 +5086,7 @@ var governmentPatterns = [
|
|
|
4668
5086
|
},
|
|
4669
5087
|
{
|
|
4670
5088
|
type: "PASSPORT_MRZ_TD1",
|
|
4671
|
-
regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\n[A-Z<]{30}/g,
|
|
5089
|
+
regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\r?\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\r?\n[A-Z<]{30}/g,
|
|
4672
5090
|
priority: 98,
|
|
4673
5091
|
placeholder: "[ID_MRZ_{n}]",
|
|
4674
5092
|
description: "ID Card Machine Readable Zone (TD1 - 3 lines x 30 chars)",
|
|
@@ -4676,7 +5094,7 @@ var governmentPatterns = [
|
|
|
4676
5094
|
},
|
|
4677
5095
|
{
|
|
4678
5096
|
type: "VISA_MRZ",
|
|
4679
|
-
regex: /V<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
5097
|
+
regex: /V<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
4680
5098
|
priority: 98,
|
|
4681
5099
|
placeholder: "[VISA_MRZ_{n}]",
|
|
4682
5100
|
description: "Visa Machine Readable Zone",
|
|
@@ -4684,7 +5102,7 @@ var governmentPatterns = [
|
|
|
4684
5102
|
},
|
|
4685
5103
|
{
|
|
4686
5104
|
type: "TRAVEL_DOCUMENT_NUMBER",
|
|
4687
|
-
regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s
|
|
5105
|
+
regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){4,13}[A-Z0-9])\b/gi,
|
|
4688
5106
|
priority: 92,
|
|
4689
5107
|
placeholder: "[TRAVEL_DOC_{n}]",
|
|
4690
5108
|
description: "Travel document numbers",
|
|
@@ -4695,7 +5113,7 @@ var governmentPatterns = [
|
|
|
4695
5113
|
},
|
|
4696
5114
|
{
|
|
4697
5115
|
type: "VISA_NUMBER",
|
|
4698
|
-
regex: /\b(?:VISA)[:\s
|
|
5116
|
+
regex: /\b(?:VISA)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){6,10}[A-Z0-9])\b/gi,
|
|
4699
5117
|
priority: 92,
|
|
4700
5118
|
placeholder: "[VISA_{n}]",
|
|
4701
5119
|
description: "Visa numbers",
|
|
@@ -4706,7 +5124,7 @@ var governmentPatterns = [
|
|
|
4706
5124
|
},
|
|
4707
5125
|
{
|
|
4708
5126
|
type: "IMMIGRATION_NUMBER",
|
|
4709
|
-
regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s
|
|
5127
|
+
regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s#-]*([A-Z]?(?:\d[\s\u00A0.-]?){7,9})\b/gi,
|
|
4710
5128
|
priority: 92,
|
|
4711
5129
|
placeholder: "[IMMIGRATION_{n}]",
|
|
4712
5130
|
description: "Immigration and alien registration numbers",
|
|
@@ -4714,7 +5132,7 @@ var governmentPatterns = [
|
|
|
4714
5132
|
},
|
|
4715
5133
|
{
|
|
4716
5134
|
type: "BORDER_CROSSING_CARD",
|
|
4717
|
-
regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s
|
|
5135
|
+
regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s#-]*([A-Z0-9](?:[A-Z0-9\s\u00A0.-]?){8,13}[A-Z0-9])\b/gi,
|
|
4718
5136
|
priority: 90,
|
|
4719
5137
|
placeholder: "[BCC_{n}]",
|
|
4720
5138
|
description: "Border crossing card numbers",
|
|
@@ -4725,7 +5143,7 @@ var governmentPatterns = [
|
|
|
4725
5143
|
},
|
|
4726
5144
|
{
|
|
4727
5145
|
type: "UTR_UK",
|
|
4728
|
-
regex: /\b(?:UTR|unique taxpayer reference)[:\s
|
|
5146
|
+
regex: /\b(?:UTR|unique taxpayer reference)[:\s#-]*((?:\d[\s\u00A0.-]?){10})\b/gi,
|
|
4729
5147
|
priority: 95,
|
|
4730
5148
|
validator: (match) => {
|
|
4731
5149
|
const digits = match.replace(/\D/g, "");
|
|
@@ -4737,10 +5155,10 @@ var governmentPatterns = [
|
|
|
4737
5155
|
},
|
|
4738
5156
|
{
|
|
4739
5157
|
type: "VAT_NUMBER",
|
|
4740
|
-
regex: /\b(?:VAT|vat number)[:\s
|
|
5158
|
+
regex: /\b(?:VAT|vat number)[:\s#-]*([A-Z]{2}(?:[\s\u00A0.-]?[A-Z0-9]){7,12})\b/gi,
|
|
4741
5159
|
priority: 90,
|
|
4742
5160
|
validator: (match) => {
|
|
4743
|
-
const cleaned = match.replace(
|
|
5161
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
4744
5162
|
const countryCode = cleaned.substring(0, 2).toUpperCase();
|
|
4745
5163
|
const validCountries = ["GB", "DE", "FR", "IT", "ES", "NL", "BE", "AT", "PL", "SE", "DK", "FI", "IE", "PT", "CZ", "HU", "RO", "BG", "GR", "HR", "SK", "SI", "LT", "LV", "EE", "CY", "LU", "MT"];
|
|
4746
5164
|
if (!validCountries.includes(countryCode)) {
|
|
@@ -4819,55 +5237,162 @@ var governmentPatterns = [
|
|
|
4819
5237
|
var contactPatterns = [
|
|
4820
5238
|
{
|
|
4821
5239
|
type: "PHONE_UK_MOBILE",
|
|
4822
|
-
regex: /\
|
|
5240
|
+
regex: /\b(?:\+?44[\s\u00A0.-]?7\d{3}|0?7\d{3})[\s\u00A0.-]?\d{3}[\s\u00A0.-]?\d{3}\b/g,
|
|
4823
5241
|
priority: 90,
|
|
4824
5242
|
placeholder: "[PHONE_UK_MOBILE_{n}]",
|
|
4825
5243
|
description: "UK mobile phone",
|
|
4826
|
-
severity: "medium"
|
|
5244
|
+
severity: "medium",
|
|
5245
|
+
validator: (value, context) => {
|
|
5246
|
+
const cleaned = value.replace(/[\s\u00A0().-]/g, "");
|
|
5247
|
+
const mobilePattern = /^(?:\+?44)?7\d{9}$/;
|
|
5248
|
+
if (!mobilePattern.test(cleaned)) {
|
|
5249
|
+
return false;
|
|
5250
|
+
}
|
|
5251
|
+
const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
|
|
5252
|
+
if (versionContext.test(context)) return false;
|
|
5253
|
+
const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
|
|
5254
|
+
if (datePattern.test(value)) {
|
|
5255
|
+
const dateKeywords = /date|dob|birth|expir/i;
|
|
5256
|
+
if (dateKeywords.test(context)) return false;
|
|
5257
|
+
}
|
|
5258
|
+
const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
|
|
5259
|
+
if (strongRejectKeywords.test(context)) return false;
|
|
5260
|
+
return true;
|
|
5261
|
+
}
|
|
4827
5262
|
},
|
|
4828
5263
|
{
|
|
4829
5264
|
type: "PHONE_UK",
|
|
4830
|
-
regex: /\b(?:0[1-9]\d{1,
|
|
5265
|
+
regex: /\b(?:\+?44[\s\u00A0.-]?(?:0)?\s*)?(?:\(?0?[1-9]\d{1,3}\)?[\s\u00A0.-]?\d{3,4}[\s\u00A0.-]?\d{3,4})(?:\s?(?:ext\.?|x)\s?\d{1,5})?\b/g,
|
|
4831
5266
|
priority: 85,
|
|
4832
5267
|
placeholder: "[PHONE_UK_{n}]",
|
|
4833
5268
|
description: "UK phone number",
|
|
4834
|
-
severity: "medium"
|
|
5269
|
+
severity: "medium",
|
|
5270
|
+
validator: (value, context) => {
|
|
5271
|
+
const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
|
|
5272
|
+
const ukPattern = /^(?:\+?44)?0?[1-9]\d{1,3}\d{6,7}$/;
|
|
5273
|
+
if (!ukPattern.test(cleaned)) {
|
|
5274
|
+
return false;
|
|
5275
|
+
}
|
|
5276
|
+
const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
|
|
5277
|
+
if (versionContext.test(context)) return false;
|
|
5278
|
+
const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
|
|
5279
|
+
if (datePattern.test(value)) {
|
|
5280
|
+
const dateKeywords = /date|dob|birth|expir/i;
|
|
5281
|
+
if (dateKeywords.test(context)) return false;
|
|
5282
|
+
}
|
|
5283
|
+
const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
|
|
5284
|
+
if (strongRejectKeywords.test(context)) return false;
|
|
5285
|
+
return true;
|
|
5286
|
+
}
|
|
4835
5287
|
},
|
|
4836
5288
|
{
|
|
4837
5289
|
type: "PHONE_US",
|
|
4838
|
-
regex:
|
|
5290
|
+
regex: /\b(?:\+1[\s\u00A0.-]?)?(?:\(\d{3}\)|\d{3})[\s\u00A0.-]?\d{3}[\s\u00A0.-]?\d{4}(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
|
|
4839
5291
|
priority: 85,
|
|
4840
5292
|
placeholder: "[PHONE_US_{n}]",
|
|
4841
5293
|
description: "US phone number",
|
|
4842
|
-
severity: "medium"
|
|
5294
|
+
severity: "medium",
|
|
5295
|
+
validator: (value, context) => {
|
|
5296
|
+
const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
|
|
5297
|
+
const usPattern = /^(?:\+?1)?\d{10}$/;
|
|
5298
|
+
if (!usPattern.test(cleaned)) {
|
|
5299
|
+
return false;
|
|
5300
|
+
}
|
|
5301
|
+
const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
|
|
5302
|
+
if (versionContext.test(context)) return false;
|
|
5303
|
+
const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
|
|
5304
|
+
if (datePattern.test(value)) {
|
|
5305
|
+
const dateKeywords = /date|dob|birth|expir/i;
|
|
5306
|
+
if (dateKeywords.test(context)) return false;
|
|
5307
|
+
}
|
|
5308
|
+
const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
|
|
5309
|
+
if (strongRejectKeywords.test(context)) return false;
|
|
5310
|
+
const areaCode = cleaned.replace(/^\+?1?/, "").substring(0, 3);
|
|
5311
|
+
if (areaCode === "000" || areaCode === "111") {
|
|
5312
|
+
return false;
|
|
5313
|
+
}
|
|
5314
|
+
if (areaCode === "555") {
|
|
5315
|
+
const contextLower = context.toLowerCase();
|
|
5316
|
+
if (/example\s+phone|test\s+number|fictional\s+number|demo\s+phone/i.test(contextLower)) {
|
|
5317
|
+
return false;
|
|
5318
|
+
}
|
|
5319
|
+
}
|
|
5320
|
+
return true;
|
|
5321
|
+
}
|
|
4843
5322
|
},
|
|
4844
5323
|
{
|
|
4845
5324
|
type: "PHONE_INTERNATIONAL",
|
|
4846
|
-
regex: /\b
|
|
5325
|
+
regex: /\b\+(?:\d[\s\u00A0.\-()]?){6,14}\d(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
|
|
4847
5326
|
priority: 80,
|
|
4848
5327
|
placeholder: "[PHONE_{n}]",
|
|
4849
5328
|
description: "International phone number",
|
|
4850
|
-
severity: "medium"
|
|
5329
|
+
severity: "medium",
|
|
5330
|
+
validator: (value, context) => {
|
|
5331
|
+
const cleaned = value.replace(/[\s\u00A0().-]/g, "").replace(/ext|x/i, "");
|
|
5332
|
+
if (!cleaned.startsWith("+")) return false;
|
|
5333
|
+
const digitsOnly = cleaned.substring(1);
|
|
5334
|
+
if (digitsOnly.length < 7 || digitsOnly.length > 15) {
|
|
5335
|
+
return false;
|
|
5336
|
+
}
|
|
5337
|
+
if (!/^\d+$/.test(digitsOnly)) return false;
|
|
5338
|
+
const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
|
|
5339
|
+
if (versionContext.test(context)) return false;
|
|
5340
|
+
const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
|
|
5341
|
+
if (datePattern.test(value)) {
|
|
5342
|
+
const dateKeywords = /date|dob|birth|expir/i;
|
|
5343
|
+
if (dateKeywords.test(context)) return false;
|
|
5344
|
+
}
|
|
5345
|
+
const strongRejectKeywords = /example\s+phone|test\s+number|sample\s+phone|demo\s+phone/i;
|
|
5346
|
+
if (strongRejectKeywords.test(context)) return false;
|
|
5347
|
+
if (/^\+1\d{10}$/.test(cleaned)) {
|
|
5348
|
+
return false;
|
|
5349
|
+
}
|
|
5350
|
+
if (/^\+44\d{10,11}$/.test(cleaned)) {
|
|
5351
|
+
return false;
|
|
5352
|
+
}
|
|
5353
|
+
return true;
|
|
5354
|
+
}
|
|
4851
5355
|
},
|
|
4852
5356
|
{
|
|
4853
5357
|
type: "POSTCODE_UK",
|
|
4854
|
-
regex: /\b([A-Z]{1,2}\d{1,2}[A-Z]
|
|
5358
|
+
regex: /\b([A-Z]{1,2}\d{1,2}[A-Z]?[\s\u00A0.-]?\d[A-Z]{2})\b/g,
|
|
4855
5359
|
priority: 75,
|
|
4856
5360
|
placeholder: "[POSTCODE_{n}]",
|
|
4857
5361
|
description: "UK postcode",
|
|
4858
|
-
severity: "low"
|
|
5362
|
+
severity: "low",
|
|
5363
|
+
validator: (value, _context) => {
|
|
5364
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
5365
|
+
if (cleaned.length < 5 || cleaned.length > 7) {
|
|
5366
|
+
return false;
|
|
5367
|
+
}
|
|
5368
|
+
if (!/^[A-Z]{1,2}\d{1,2}[A-Z]?\d[A-Z]{2}$/i.test(cleaned)) {
|
|
5369
|
+
return false;
|
|
5370
|
+
}
|
|
5371
|
+
return true;
|
|
5372
|
+
}
|
|
4859
5373
|
},
|
|
4860
5374
|
{
|
|
4861
5375
|
type: "ZIP_CODE_US",
|
|
4862
|
-
regex: /\b(\d{5}(
|
|
5376
|
+
regex: /\b(\d{5}(?:[\s\u00A0.-]\d{4})?)\b/g,
|
|
4863
5377
|
priority: 70,
|
|
4864
5378
|
placeholder: "[ZIP_{n}]",
|
|
4865
5379
|
description: "US ZIP code",
|
|
4866
|
-
severity: "low"
|
|
5380
|
+
severity: "low",
|
|
5381
|
+
validator: (value, context) => {
|
|
5382
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
5383
|
+
if (!/^\d{5}$/.test(cleaned) && !/^\d{9}$/.test(cleaned)) {
|
|
5384
|
+
return false;
|
|
5385
|
+
}
|
|
5386
|
+
const contextLower = context.toLowerCase();
|
|
5387
|
+
if (/\b(phone|tel|call|contact)\b/i.test(contextLower) && cleaned.length === 9) {
|
|
5388
|
+
return false;
|
|
5389
|
+
}
|
|
5390
|
+
return true;
|
|
5391
|
+
}
|
|
4867
5392
|
},
|
|
4868
5393
|
{
|
|
4869
5394
|
type: "ADDRESS_STREET",
|
|
4870
|
-
regex: /\b
|
|
5395
|
+
regex: /\b\d{1,5}\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*(?:\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*){0,4}\s+(?:Street|St\.?|Road|Rd\.?|Avenue|Ave\.?|Lane|Ln\.?|Drive|Dr\.?|Court|Ct\.?|Boulevard|Blvd\.?|Way|Terrace|Ter\.?|Place|Pl\.?|Trail|Trl\.?|Parkway|Pkwy\.?|Highway|Hwy\.)(?:\s+(?:Apt|Unit|Suite|Ste)\s*\d+)?\b/gi,
|
|
4871
5396
|
priority: 70,
|
|
4872
5397
|
placeholder: "[ADDRESS_{n}]",
|
|
4873
5398
|
description: "Street address",
|
|
@@ -4954,11 +5479,20 @@ var SOLANA_ADDRESS = {
|
|
|
4954
5479
|
severity: "high",
|
|
4955
5480
|
description: "Solana (SOL) cryptocurrency address",
|
|
4956
5481
|
validator: (value, context) => {
|
|
4957
|
-
|
|
4958
|
-
if (
|
|
5482
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
5483
|
+
if (cleaned.length < 32 || cleaned.length > 44) return false;
|
|
5484
|
+
const cryptoKeywords = /solana|sol\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
5485
|
+
if (!cryptoKeywords.test(context)) {
|
|
4959
5486
|
return false;
|
|
4960
5487
|
}
|
|
4961
|
-
if (/^(bc1|1|3|0x|L|M|D|X|r|cosmos|tz|addr)/.test(
|
|
5488
|
+
if (/^(bc1|1|3|0x|L|M|D|X|r|cosmos|tz|addr)/.test(cleaned)) {
|
|
5489
|
+
return false;
|
|
5490
|
+
}
|
|
5491
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
5492
|
+
if (rejectKeywords.test(context)) {
|
|
5493
|
+
return false;
|
|
5494
|
+
}
|
|
5495
|
+
if (!/^[1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
|
|
4962
5496
|
return false;
|
|
4963
5497
|
}
|
|
4964
5498
|
return true;
|
|
@@ -4972,22 +5506,43 @@ var POLKADOT_ADDRESS = {
|
|
|
4972
5506
|
severity: "high",
|
|
4973
5507
|
description: "Polkadot (DOT) cryptocurrency address",
|
|
4974
5508
|
validator: (value, context) => {
|
|
4975
|
-
|
|
4976
|
-
if (
|
|
4977
|
-
|
|
5509
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
5510
|
+
if (cleaned.length < 47 || cleaned.length > 48) return false;
|
|
5511
|
+
if (!cleaned.startsWith("1")) return false;
|
|
5512
|
+
const cryptoKeywords = /polkadot|dot\b|crypto|wallet|blockchain|substrate|address|send|receive|transaction|transfer/i;
|
|
5513
|
+
if (!cryptoKeywords.test(context)) {
|
|
5514
|
+
return false;
|
|
5515
|
+
}
|
|
5516
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
5517
|
+
if (rejectKeywords.test(context)) {
|
|
5518
|
+
return false;
|
|
5519
|
+
}
|
|
5520
|
+
if (!/^1[1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
|
|
5521
|
+
return false;
|
|
5522
|
+
}
|
|
5523
|
+
return true;
|
|
4978
5524
|
}
|
|
4979
5525
|
};
|
|
4980
5526
|
var AVALANCHE_ADDRESS = {
|
|
4981
5527
|
type: "AVALANCHE_ADDRESS",
|
|
4982
|
-
regex: /\b([XPC]
|
|
5528
|
+
regex: /\b([XPC][-\s\u00A0]?(?:avax)?[a-z0-9]{38,43})\b/gi,
|
|
4983
5529
|
placeholder: "[AVAX_ADDR_{n}]",
|
|
4984
5530
|
priority: 85,
|
|
4985
5531
|
severity: "high",
|
|
4986
5532
|
description: "Avalanche (AVAX) cryptocurrency address",
|
|
4987
5533
|
validator: (value, context) => {
|
|
4988
|
-
|
|
4989
|
-
if (
|
|
4990
|
-
|
|
5534
|
+
const cleaned = value.replace(/[\s\u00A0]/g, "").toUpperCase();
|
|
5535
|
+
if (!/^[XPC][-]?/.test(cleaned)) return false;
|
|
5536
|
+
if (cleaned.length < 40 || cleaned.length > 46) return false;
|
|
5537
|
+
const cryptoKeywords = /avalanche|avax\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
5538
|
+
if (!cryptoKeywords.test(context)) {
|
|
5539
|
+
return false;
|
|
5540
|
+
}
|
|
5541
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
5542
|
+
if (rejectKeywords.test(context)) {
|
|
5543
|
+
return false;
|
|
5544
|
+
}
|
|
5545
|
+
return true;
|
|
4991
5546
|
}
|
|
4992
5547
|
};
|
|
4993
5548
|
var COSMOS_ADDRESS = {
|
|
@@ -4998,9 +5553,21 @@ var COSMOS_ADDRESS = {
|
|
|
4998
5553
|
severity: "high",
|
|
4999
5554
|
description: "Cosmos (ATOM) cryptocurrency address",
|
|
5000
5555
|
validator: (value, context) => {
|
|
5001
|
-
|
|
5002
|
-
if (
|
|
5003
|
-
|
|
5556
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toLowerCase();
|
|
5557
|
+
if (!cleaned.startsWith("cosmos1")) return false;
|
|
5558
|
+
if (cleaned.length < 39 || cleaned.length > 45) return false;
|
|
5559
|
+
const cryptoKeywords = /cosmos|atom\b|crypto|wallet|blockchain|ibc|address|send|receive|transaction|transfer/i;
|
|
5560
|
+
if (!cryptoKeywords.test(context)) {
|
|
5561
|
+
return false;
|
|
5562
|
+
}
|
|
5563
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
5564
|
+
if (rejectKeywords.test(context)) {
|
|
5565
|
+
return false;
|
|
5566
|
+
}
|
|
5567
|
+
if (!/^cosmos1[a-z0-9]+$/.test(cleaned)) {
|
|
5568
|
+
return false;
|
|
5569
|
+
}
|
|
5570
|
+
return true;
|
|
5004
5571
|
}
|
|
5005
5572
|
};
|
|
5006
5573
|
var ALGORAND_ADDRESS = {
|
|
@@ -5011,9 +5578,18 @@ var ALGORAND_ADDRESS = {
|
|
|
5011
5578
|
severity: "high",
|
|
5012
5579
|
description: "Algorand (ALGO) cryptocurrency address",
|
|
5013
5580
|
validator: (value, context) => {
|
|
5014
|
-
|
|
5015
|
-
if (
|
|
5016
|
-
|
|
5581
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
5582
|
+
if (cleaned.length !== 58) return false;
|
|
5583
|
+
if (!/^[A-Z2-7]+$/.test(cleaned)) return false;
|
|
5584
|
+
const cryptoKeywords = /algorand|algo\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
5585
|
+
if (!cryptoKeywords.test(context)) {
|
|
5586
|
+
return false;
|
|
5587
|
+
}
|
|
5588
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
5589
|
+
if (rejectKeywords.test(context)) {
|
|
5590
|
+
return false;
|
|
5591
|
+
}
|
|
5592
|
+
return true;
|
|
5017
5593
|
}
|
|
5018
5594
|
};
|
|
5019
5595
|
var TEZOS_ADDRESS = {
|
|
@@ -5024,9 +5600,21 @@ var TEZOS_ADDRESS = {
|
|
|
5024
5600
|
severity: "high",
|
|
5025
5601
|
description: "Tezos (XTZ) cryptocurrency address",
|
|
5026
5602
|
validator: (value, context) => {
|
|
5027
|
-
|
|
5028
|
-
if (
|
|
5029
|
-
|
|
5603
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
5604
|
+
if (!/^tz[123]/.test(cleaned)) return false;
|
|
5605
|
+
if (cleaned.length !== 36) return false;
|
|
5606
|
+
const cryptoKeywords = /tezos|xtz\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
5607
|
+
if (!cryptoKeywords.test(context)) {
|
|
5608
|
+
return false;
|
|
5609
|
+
}
|
|
5610
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
5611
|
+
if (rejectKeywords.test(context)) {
|
|
5612
|
+
return false;
|
|
5613
|
+
}
|
|
5614
|
+
if (!/^tz[123][1-9A-HJ-NP-Za-km-z]+$/.test(cleaned)) {
|
|
5615
|
+
return false;
|
|
5616
|
+
}
|
|
5617
|
+
return true;
|
|
5030
5618
|
}
|
|
5031
5619
|
};
|
|
5032
5620
|
var POLYGON_ADDRESS = {
|
|
@@ -5037,8 +5625,23 @@ var POLYGON_ADDRESS = {
|
|
|
5037
5625
|
severity: "high",
|
|
5038
5626
|
description: "Polygon (MATIC) cryptocurrency address",
|
|
5039
5627
|
validator: (value, context) => {
|
|
5040
|
-
|
|
5041
|
-
|
|
5628
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
5629
|
+
if (!cleaned.startsWith("0x") || cleaned.length !== 42) return false;
|
|
5630
|
+
const polygonKeywords = /polygon|matic\b|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
5631
|
+
if (!polygonKeywords.test(context)) {
|
|
5632
|
+
return false;
|
|
5633
|
+
}
|
|
5634
|
+
if (/ethereum|eth\b|ether/i.test(context) && !/polygon|matic/i.test(context)) {
|
|
5635
|
+
return false;
|
|
5636
|
+
}
|
|
5637
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
5638
|
+
if (rejectKeywords.test(context)) {
|
|
5639
|
+
return false;
|
|
5640
|
+
}
|
|
5641
|
+
if (!/^0x[a-fA-F0-9]{40}$/.test(cleaned)) {
|
|
5642
|
+
return false;
|
|
5643
|
+
}
|
|
5644
|
+
return true;
|
|
5042
5645
|
}
|
|
5043
5646
|
};
|
|
5044
5647
|
var BINANCE_CHAIN_ADDRESS = {
|
|
@@ -5049,8 +5652,26 @@ var BINANCE_CHAIN_ADDRESS = {
|
|
|
5049
5652
|
severity: "high",
|
|
5050
5653
|
description: "Binance Smart Chain (BNB) address",
|
|
5051
5654
|
validator: (value, context) => {
|
|
5052
|
-
|
|
5053
|
-
|
|
5655
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
5656
|
+
if (!cleaned.startsWith("0x") || cleaned.length !== 42) return false;
|
|
5657
|
+
const binanceKeywords = /binance|bnb\b|bsc|smart[- ]?chain|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
5658
|
+
if (!binanceKeywords.test(context)) {
|
|
5659
|
+
return false;
|
|
5660
|
+
}
|
|
5661
|
+
if (/ethereum|eth\b|ether/i.test(context) && !/binance|bnb|bsc/i.test(context)) {
|
|
5662
|
+
return false;
|
|
5663
|
+
}
|
|
5664
|
+
if (/polygon|matic/i.test(context) && !/binance|bnb|bsc/i.test(context)) {
|
|
5665
|
+
return false;
|
|
5666
|
+
}
|
|
5667
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
5668
|
+
if (rejectKeywords.test(context)) {
|
|
5669
|
+
return false;
|
|
5670
|
+
}
|
|
5671
|
+
if (!/^0x[a-fA-F0-9]{40}$/.test(cleaned)) {
|
|
5672
|
+
return false;
|
|
5673
|
+
}
|
|
5674
|
+
return true;
|
|
5054
5675
|
}
|
|
5055
5676
|
};
|
|
5056
5677
|
var NEAR_ADDRESS = {
|
|
@@ -5061,8 +5682,20 @@ var NEAR_ADDRESS = {
|
|
|
5061
5682
|
severity: "high",
|
|
5062
5683
|
description: "Near Protocol (NEAR) address",
|
|
5063
5684
|
validator: (value, context) => {
|
|
5064
|
-
|
|
5065
|
-
|
|
5685
|
+
const cleaned = value.replace(/[\s\u00A0]/g, "").toLowerCase();
|
|
5686
|
+
if (!cleaned.endsWith(".near")) return false;
|
|
5687
|
+
const accountName = cleaned.slice(0, -5);
|
|
5688
|
+
if (accountName.length < 2 || accountName.length > 64) return false;
|
|
5689
|
+
if (!/^[a-z0-9_-]+$/.test(accountName)) return false;
|
|
5690
|
+
const cryptoKeywords = /near|protocol|crypto|wallet|blockchain|address|send|receive|transaction|transfer/i;
|
|
5691
|
+
if (!cryptoKeywords.test(context)) {
|
|
5692
|
+
return false;
|
|
5693
|
+
}
|
|
5694
|
+
const rejectKeywords = /example|test|sample|demo|fake|dummy|placeholder|version|release/i;
|
|
5695
|
+
if (rejectKeywords.test(context)) {
|
|
5696
|
+
return false;
|
|
5697
|
+
}
|
|
5698
|
+
return true;
|
|
5066
5699
|
}
|
|
5067
5700
|
};
|
|
5068
5701
|
var cryptoExtendedPatterns = [
|
|
@@ -5218,15 +5851,20 @@ var BIOBANK_SAMPLE_ID = {
|
|
|
5218
5851
|
};
|
|
5219
5852
|
var PROVIDER_LICENSE = {
|
|
5220
5853
|
type: "PROVIDER_LICENSE",
|
|
5221
|
-
regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s]
|
|
5854
|
+
regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s\u00A0]*(?:LICENSE|LICENCE|LIC)[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*((?:[A-Z0-9]{2,6}[\s\u00A0./-]?){1,3}[A-Z0-9]{2,6})\b/gi,
|
|
5222
5855
|
placeholder: "[PROVIDER_LIC_{n}]",
|
|
5223
5856
|
priority: 80,
|
|
5224
5857
|
severity: "high",
|
|
5225
|
-
description: "Healthcare provider license numbers"
|
|
5858
|
+
description: "Healthcare provider license numbers",
|
|
5859
|
+
validator: (value) => {
|
|
5860
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "");
|
|
5861
|
+
if (normalized.length < 6 || normalized.length > 18) return false;
|
|
5862
|
+
return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
|
|
5863
|
+
}
|
|
5226
5864
|
};
|
|
5227
5865
|
var NPI_NUMBER = {
|
|
5228
5866
|
type: "NPI_NUMBER",
|
|
5229
|
-
regex: /\b(?:NPI[-\s]
|
|
5867
|
+
regex: /\b(?:NPI[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?((?:\d[\s\u00A0.-]?){10})\b/g,
|
|
5230
5868
|
placeholder: "[NPI_{n}]",
|
|
5231
5869
|
priority: 85,
|
|
5232
5870
|
severity: "high",
|
|
@@ -5235,7 +5873,8 @@ var NPI_NUMBER = {
|
|
|
5235
5873
|
if (!/provider|npi|physician|doctor|clinic|hospital|practice/i.test(context)) {
|
|
5236
5874
|
return false;
|
|
5237
5875
|
}
|
|
5238
|
-
const digits = value.split("").map(Number);
|
|
5876
|
+
const digits = value.replace(/\D/g, "").split("").map(Number);
|
|
5877
|
+
if (digits.length !== 10) return false;
|
|
5239
5878
|
let sum = 0;
|
|
5240
5879
|
for (let i = digits.length - 2; i >= 0; i--) {
|
|
5241
5880
|
let digit = digits[i];
|
|
@@ -5251,17 +5890,19 @@ var NPI_NUMBER = {
|
|
|
5251
5890
|
};
|
|
5252
5891
|
var DEA_NUMBER = {
|
|
5253
5892
|
type: "DEA_NUMBER",
|
|
5254
|
-
regex: /\b(?:DEA[-\s]
|
|
5893
|
+
regex: /\b(?:DEA[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?([A-Z]{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
|
|
5255
5894
|
placeholder: "[DEA_{n}]",
|
|
5256
5895
|
priority: 90,
|
|
5257
5896
|
severity: "high",
|
|
5258
5897
|
description: "DEA registration number for controlled substances",
|
|
5259
5898
|
validator: (value, _context) => {
|
|
5899
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "").toUpperCase();
|
|
5900
|
+
if (normalized.length !== 9) return false;
|
|
5260
5901
|
const validFirstLetters = ["A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "P", "R", "S", "T", "U"];
|
|
5261
|
-
if (!validFirstLetters.includes(
|
|
5902
|
+
if (!validFirstLetters.includes(normalized[0])) {
|
|
5262
5903
|
return false;
|
|
5263
5904
|
}
|
|
5264
|
-
const digits =
|
|
5905
|
+
const digits = normalized.substring(2).split("").map(Number);
|
|
5265
5906
|
const sum1 = digits[0] + digits[2] + digits[4];
|
|
5266
5907
|
const sum2 = (digits[1] + digits[3] + digits[5]) * 2;
|
|
5267
5908
|
const checkDigit = (sum1 + sum2) % 10;
|
|
@@ -5286,11 +5927,16 @@ var EMERGENCY_CONTACT_MARKER = {
|
|
|
5286
5927
|
};
|
|
5287
5928
|
var BIOMETRIC_ID = {
|
|
5288
5929
|
type: "BIOMETRIC_ID",
|
|
5289
|
-
regex: /\b(?:FINGERPRINT|RETINAL?[-\s]?SCAN|IRIS[-\s]?SCAN|VOICE[-\s]?PRINT|FACIAL[-\s]?RECOGNITION|BIOMETRIC)[-\s]?(?:ID|DATA|TEMPLATE|HASH)?[-\s]
|
|
5930
|
+
regex: /\b(?:FINGERPRINT|RETINAL?[-\s\u00A0]?SCAN|IRIS[-\s\u00A0]?SCAN|VOICE[-\s\u00A0]?PRINT|FACIAL[-\s\u00A0]?RECOGNITION|BIOMETRIC)[-\s\u00A0]?(?:ID|DATA|TEMPLATE|HASH)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9._-]{7,39})\b/gi,
|
|
5290
5931
|
placeholder: "[BIOMETRIC_{n}]",
|
|
5291
5932
|
priority: 95,
|
|
5292
5933
|
severity: "high",
|
|
5293
|
-
description: "Biometric identifier references"
|
|
5934
|
+
description: "Biometric identifier references",
|
|
5935
|
+
validator: (value) => {
|
|
5936
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "");
|
|
5937
|
+
if (normalized.length < 8 || normalized.length > 40) return false;
|
|
5938
|
+
return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
|
|
5939
|
+
}
|
|
5294
5940
|
};
|
|
5295
5941
|
var DNA_SEQUENCE = {
|
|
5296
5942
|
type: "DNA_SEQUENCE",
|
|
@@ -5319,7 +5965,7 @@ var DRUG_DOSAGE = {
|
|
|
5319
5965
|
};
|
|
5320
5966
|
var MEDICAL_IMAGE_REF = {
|
|
5321
5967
|
type: "MEDICAL_IMAGE_REF",
|
|
5322
|
-
regex: /\b(?:X[-\s]?RAY|MRI|CT[-\s]?SCAN|PET[-\s]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s]?(?:IMAGE|FILE|ID)?[-\s]
|
|
5968
|
+
regex: /\b(?:X[-\s\u00A0]?RAY|MRI|CT[-\s\u00A0]?SCAN|PET[-\s\u00A0]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s\u00A0]?(?:IMAGE|FILE|ID)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9_.-]{5,23})\b/gi,
|
|
5323
5969
|
placeholder: "[IMAGE_{n}]",
|
|
5324
5970
|
priority: 80,
|
|
5325
5971
|
severity: "high",
|
|
@@ -5467,9 +6113,11 @@ var SWIFT_BIC = {
|
|
|
5467
6113
|
severity: "high",
|
|
5468
6114
|
description: "SWIFT/BIC codes for international transfers",
|
|
5469
6115
|
validator: (value, context) => {
|
|
6116
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
5470
6117
|
const financialContext = /swift|bic|bank|transfer|wire|international|payment/i.test(context);
|
|
5471
|
-
const validLength =
|
|
5472
|
-
|
|
6118
|
+
const validLength = cleaned.length === 8 || cleaned.length === 11;
|
|
6119
|
+
const validFormat = /^[A-Z]{6}[A-Z0-9]{2}([A-Z0-9]{3})?$/.test(cleaned);
|
|
6120
|
+
return financialContext && validLength && validFormat;
|
|
5473
6121
|
}
|
|
5474
6122
|
};
|
|
5475
6123
|
var TRANSACTION_ID = {
|
|
@@ -5482,11 +6130,18 @@ var TRANSACTION_ID = {
|
|
|
5482
6130
|
};
|
|
5483
6131
|
var INVESTMENT_ACCOUNT = {
|
|
5484
6132
|
type: "INVESTMENT_ACCOUNT",
|
|
5485
|
-
regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s]
|
|
6133
|
+
regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s\u00A0]*(?:ACCOUNT|ACCT|A\/C)?[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,18}[A-Z0-9])\b/gi,
|
|
5486
6134
|
placeholder: "[INV_ACCT_{n}]",
|
|
5487
6135
|
priority: 85,
|
|
5488
6136
|
severity: "high",
|
|
5489
|
-
description: "Investment and pension account numbers"
|
|
6137
|
+
description: "Investment and pension account numbers",
|
|
6138
|
+
validator: (value, context) => {
|
|
6139
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
6140
|
+
const hasDigits = /\d{4,}/.test(normalized);
|
|
6141
|
+
const validLength = normalized.length >= 6 && normalized.length <= 15;
|
|
6142
|
+
const inContext = /isa|sipp|invest|pension|401k|ira|account|fund/i.test(context);
|
|
6143
|
+
return hasDigits && validLength && inContext;
|
|
6144
|
+
}
|
|
5490
6145
|
};
|
|
5491
6146
|
var WIRE_TRANSFER_REF = {
|
|
5492
6147
|
type: "WIRE_TRANSFER_REF",
|
|
@@ -5760,22 +6415,60 @@ var TERMINAL_ID = {
|
|
|
5760
6415
|
};
|
|
5761
6416
|
var UK_BANK_ACCOUNT_IBAN = {
|
|
5762
6417
|
type: "UK_BANK_ACCOUNT_IBAN",
|
|
5763
|
-
regex: /\b(GB\d{2}[A-Z]{4}\d{14})\b/
|
|
6418
|
+
regex: /\b(GB\d{2}[\s\u00A0.-]?[A-Z]{4}[\s\u00A0.-]?\d{14})\b/gi,
|
|
5764
6419
|
placeholder: "[UK_IBAN_{n}]",
|
|
5765
6420
|
priority: 95,
|
|
5766
6421
|
severity: "high",
|
|
5767
6422
|
description: "UK bank account numbers in IBAN format",
|
|
5768
|
-
validator: (value) => {
|
|
5769
|
-
|
|
6423
|
+
validator: (value, context) => {
|
|
6424
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
6425
|
+
if (!cleaned.startsWith("GB") || cleaned.length !== 22) {
|
|
6426
|
+
return false;
|
|
6427
|
+
}
|
|
6428
|
+
if (!validateIBAN(cleaned)) {
|
|
6429
|
+
return false;
|
|
6430
|
+
}
|
|
6431
|
+
const bankingKeywords = /iban|account|bank|uk|gb|financial|payment|transfer/i;
|
|
6432
|
+
if (!bankingKeywords.test(context)) {
|
|
6433
|
+
return false;
|
|
6434
|
+
}
|
|
6435
|
+
const rejectKeywords = /example\s+iban|test\s+iban|sample\s+iban|demo\s+iban|fake\s+iban/i;
|
|
6436
|
+
if (rejectKeywords.test(context)) {
|
|
6437
|
+
return false;
|
|
6438
|
+
}
|
|
6439
|
+
return true;
|
|
5770
6440
|
}
|
|
5771
6441
|
};
|
|
5772
6442
|
var UK_SORT_CODE_ACCOUNT = {
|
|
5773
6443
|
type: "UK_SORT_CODE_ACCOUNT",
|
|
5774
|
-
regex: /\b(\d{2}[-]
|
|
6444
|
+
regex: /\b(\d{2}[\s\u00A0-]?\d{2}[\s\u00A0-]?\d{2}[\s\u00A0]?\d{8})\b/g,
|
|
5775
6445
|
placeholder: "[UK_ACCOUNT_{n}]",
|
|
5776
6446
|
priority: 95,
|
|
5777
6447
|
severity: "high",
|
|
5778
|
-
description: "UK sort code and account number combination"
|
|
6448
|
+
description: "UK sort code and account number combination",
|
|
6449
|
+
validator: (value, context) => {
|
|
6450
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
6451
|
+
if (!/^\d{14}$/.test(cleaned)) {
|
|
6452
|
+
return false;
|
|
6453
|
+
}
|
|
6454
|
+
const sortCode = cleaned.substring(0, 6);
|
|
6455
|
+
const accountNumber = cleaned.substring(6);
|
|
6456
|
+
if (accountNumber.length !== 8) {
|
|
6457
|
+
return false;
|
|
6458
|
+
}
|
|
6459
|
+
if (!validateSortCode(sortCode)) {
|
|
6460
|
+
return false;
|
|
6461
|
+
}
|
|
6462
|
+
const bankingKeywords = /sort\s+code|account|bank|uk|gb|financial|payment|transfer/i;
|
|
6463
|
+
if (!bankingKeywords.test(context)) {
|
|
6464
|
+
return false;
|
|
6465
|
+
}
|
|
6466
|
+
const rejectKeywords = /example\s+account|test\s+account|sample\s+account|demo\s+account|fake\s+account/i;
|
|
6467
|
+
if (rejectKeywords.test(context)) {
|
|
6468
|
+
return false;
|
|
6469
|
+
}
|
|
6470
|
+
return true;
|
|
6471
|
+
}
|
|
5779
6472
|
};
|
|
5780
6473
|
var financialPatterns2 = [
|
|
5781
6474
|
SWIFT_BIC,
|
|
@@ -6620,13 +7313,17 @@ var RESUME_ID = {
|
|
|
6620
7313
|
};
|
|
6621
7314
|
var BENEFITS_PLAN_NUMBER = {
|
|
6622
7315
|
type: "BENEFITS_PLAN_NUMBER",
|
|
6623
|
-
regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s]?PLAN)[-\s]
|
|
7316
|
+
regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s\u00A0]?PLAN)[-\s\u00A0]*(?:PLAN)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
|
|
6624
7317
|
placeholder: "[BENEFITS_{n}]",
|
|
6625
7318
|
priority: 85,
|
|
6626
7319
|
severity: "high",
|
|
6627
7320
|
description: "Employee benefits and insurance plan numbers",
|
|
6628
|
-
validator: (
|
|
6629
|
-
|
|
7321
|
+
validator: (value, context) => {
|
|
7322
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
7323
|
+
const hasDigits = /\d{4,}/.test(normalized);
|
|
7324
|
+
const validLength = normalized.length >= 6 && normalized.length <= 14;
|
|
7325
|
+
const inContext = /benefit|insurance|health|dental|vision|plan|policy|enrollment/i.test(context);
|
|
7326
|
+
return hasDigits && validLength && inContext;
|
|
6630
7327
|
}
|
|
6631
7328
|
};
|
|
6632
7329
|
var RETIREMENT_ACCOUNT = {
|
|
@@ -6724,13 +7421,16 @@ var EXIT_INTERVIEW_ID = {
|
|
|
6724
7421
|
};
|
|
6725
7422
|
var DISCIPLINARY_ACTION_ID = {
|
|
6726
7423
|
type: "DISCIPLINARY_ACTION_ID",
|
|
6727
|
-
regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s]
|
|
7424
|
+
regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s\u00A0]*(?:ACTION)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
|
|
6728
7425
|
placeholder: "[DISCIPLINE_{n}]",
|
|
6729
7426
|
priority: 85,
|
|
6730
7427
|
severity: "high",
|
|
6731
7428
|
description: "Disciplinary action and incident identifiers",
|
|
6732
|
-
validator: (
|
|
6733
|
-
|
|
7429
|
+
validator: (value, context) => {
|
|
7430
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
7431
|
+
const hasDigits = /\d{3,}/.test(normalized);
|
|
7432
|
+
const validLength = normalized.length >= 6 && normalized.length <= 12;
|
|
7433
|
+
return hasDigits && validLength && /disciplinary|incident|warning|violation|misconduct|investigation/i.test(context);
|
|
6734
7434
|
}
|
|
6735
7435
|
};
|
|
6736
7436
|
var EMERGENCY_CONTACT_REF = {
|
|
@@ -7058,7 +7758,7 @@ var TELECOMS_ACCOUNT_NUMBER = {
|
|
|
7058
7758
|
type: "TELECOMS_ACCOUNT_NUMBER",
|
|
7059
7759
|
regex: /\bACC(?:OUNT)?[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*(\d{8,12})\b/gi,
|
|
7060
7760
|
placeholder: "[ACCOUNT_{n}]",
|
|
7061
|
-
priority:
|
|
7761
|
+
priority: 90,
|
|
7062
7762
|
severity: "high",
|
|
7063
7763
|
description: "Telecommunications customer account numbers",
|
|
7064
7764
|
validator: (_value, context) => {
|
|
@@ -7936,7 +8636,7 @@ var EMERGENCY_CALL_REF = {
|
|
|
7936
8636
|
};
|
|
7937
8637
|
var POLICE_REPORT_NUMBER = {
|
|
7938
8638
|
type: "POLICE_REPORT_NUMBER",
|
|
7939
|
-
regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s]
|
|
8639
|
+
regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s\u00A0]*(?:NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{5,10})\b/gi,
|
|
7940
8640
|
placeholder: "[POLICE_RPT_{n}]",
|
|
7941
8641
|
priority: 95,
|
|
7942
8642
|
severity: "high",
|
|
@@ -7947,7 +8647,7 @@ var POLICE_REPORT_NUMBER = {
|
|
|
7947
8647
|
};
|
|
7948
8648
|
var FIRE_INCIDENT_NUMBER = {
|
|
7949
8649
|
type: "FIRE_INCIDENT_NUMBER",
|
|
7950
|
-
regex: /\b(?:FIRE|FI|FD)[-\s]
|
|
8650
|
+
regex: /\b(?:FIRE|FI|FD)[-\s\u00A0]*(?:INCIDENT|INC|NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{4,8})\b/gi,
|
|
7951
8651
|
placeholder: "[FIRE_INC_{n}]",
|
|
7952
8652
|
priority: 95,
|
|
7953
8653
|
severity: "high",
|
|
@@ -8812,13 +9512,15 @@ var gamingPatterns = [
|
|
|
8812
9512
|
// src/patterns/industries/vehicles.ts
|
|
8813
9513
|
var VIN_NUMBER = {
|
|
8814
9514
|
type: "VIN_NUMBER",
|
|
8815
|
-
regex: /\bVIN[-\s]?(?:NO|NUM|NUMBER)?[-\s]?[:#]?\s*([A-HJ-NPR-Z0-9]{17})\b/gi,
|
|
9515
|
+
regex: /\bVIN[-\s\u00A0]?(?:NO|NUM|NUMBER)?[-\s\u00A0]?[:#]?\s*([A-HJ-NPR-Z0-9]{17})\b/gi,
|
|
8816
9516
|
placeholder: "[VIN_{n}]",
|
|
8817
9517
|
priority: 85,
|
|
8818
9518
|
severity: "medium",
|
|
8819
9519
|
description: "Vehicle Identification Number (VIN)",
|
|
8820
9520
|
validator: (value, context) => {
|
|
8821
|
-
|
|
9521
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
9522
|
+
if (cleaned.length !== 17) return false;
|
|
9523
|
+
if (/[IOQ]/.test(cleaned)) return false;
|
|
8822
9524
|
return /vin|vehicle|car|auto|motor|registration|title|insurance/i.test(context);
|
|
8823
9525
|
}
|
|
8824
9526
|
};
|
|
@@ -11120,9 +11822,11 @@ var GERMAN_TAX_ID = {
|
|
|
11120
11822
|
severity: "high",
|
|
11121
11823
|
description: "German Tax Identification Number (Steueridentifikationsnummer)",
|
|
11122
11824
|
validator: (value, context) => {
|
|
11825
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
11826
|
+
if (!/^\d{11}$/.test(cleaned)) return false;
|
|
11123
11827
|
const relevantContext = /steuer|tax|german|deutschland|finanzamt/i.test(context);
|
|
11124
11828
|
if (!relevantContext) return false;
|
|
11125
|
-
const digits =
|
|
11829
|
+
const digits = cleaned.split("").map(Number);
|
|
11126
11830
|
const digitCounts = /* @__PURE__ */ new Map();
|
|
11127
11831
|
digits.forEach((d) => digitCounts.set(d, (digitCounts.get(d) || 0) + 1));
|
|
11128
11832
|
const counts = Array.from(digitCounts.values());
|
|
@@ -11281,9 +11985,11 @@ var DUTCH_BSN = {
|
|
|
11281
11985
|
severity: "high",
|
|
11282
11986
|
description: "Dutch Citizen Service Number (BSN)",
|
|
11283
11987
|
validator: (value, context) => {
|
|
11988
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
11989
|
+
if (!/^\d{9}$/.test(cleaned)) return false;
|
|
11284
11990
|
const relevantContext = /bsn|dutch|netherlands|nederland|burger/i.test(context);
|
|
11285
11991
|
if (!relevantContext) return false;
|
|
11286
|
-
const digits =
|
|
11992
|
+
const digits = cleaned.split("").map(Number);
|
|
11287
11993
|
let sum = 0;
|
|
11288
11994
|
for (let i = 0; i < 8; i++) {
|
|
11289
11995
|
sum += digits[i] * (9 - i);
|
|
@@ -11300,10 +12006,12 @@ var POLISH_PESEL = {
|
|
|
11300
12006
|
severity: "high",
|
|
11301
12007
|
description: "Polish National Identification Number (PESEL)",
|
|
11302
12008
|
validator: (value, context) => {
|
|
12009
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
12010
|
+
if (!/^\d{11}$/.test(cleaned)) return false;
|
|
11303
12011
|
const relevantContext = /pesel|polish|poland|polska/i.test(context);
|
|
11304
12012
|
if (!relevantContext) return false;
|
|
11305
12013
|
const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
|
|
11306
|
-
const digits =
|
|
12014
|
+
const digits = cleaned.split("").map(Number);
|
|
11307
12015
|
let sum = 0;
|
|
11308
12016
|
for (let i = 0; i < 10; i++) {
|
|
11309
12017
|
sum += digits[i] * weights[i];
|
|
@@ -11637,7 +12345,8 @@ var DISCORD_USER_ID = {
|
|
|
11637
12345
|
severity: "medium",
|
|
11638
12346
|
description: "Discord user ID (Snowflake format)",
|
|
11639
12347
|
validator: (value, context) => {
|
|
11640
|
-
|
|
12348
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
12349
|
+
if (cleaned.length < 17 || cleaned.length > 19) return false;
|
|
11641
12350
|
return /discord|snowflake|user[-_]?id|server|guild/i.test(context);
|
|
11642
12351
|
}
|
|
11643
12352
|
};
|
|
@@ -11649,7 +12358,8 @@ var STEAM_ID64 = {
|
|
|
11649
12358
|
severity: "medium",
|
|
11650
12359
|
description: "Steam 64-bit user ID",
|
|
11651
12360
|
validator: (value, context) => {
|
|
11652
|
-
|
|
12361
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "");
|
|
12362
|
+
if (!cleaned.startsWith("765") || cleaned.length !== 17) return false;
|
|
11653
12363
|
return /steam|gaming|player|profile|valve|community/i.test(context);
|
|
11654
12364
|
}
|
|
11655
12365
|
};
|
|
@@ -11782,13 +12492,14 @@ var NINTENDO_FRIEND_CODE = {
|
|
|
11782
12492
|
type: "NINTENDO_FRIEND_CODE",
|
|
11783
12493
|
regex: /\bSW[-\s]?(\d{4}[-\s]?\d{4}[-\s]?\d{4})\b/gi,
|
|
11784
12494
|
placeholder: "[NINTENDO_FC_{n}]",
|
|
11785
|
-
priority:
|
|
12495
|
+
priority: 90,
|
|
11786
12496
|
severity: "medium",
|
|
11787
12497
|
description: "Nintendo Switch Friend Code",
|
|
11788
12498
|
validator: (value, context) => {
|
|
11789
12499
|
const digits = value.replace(/\D/g, "");
|
|
11790
12500
|
if (digits.length !== 12) return false;
|
|
11791
|
-
|
|
12501
|
+
const hasContext = /nintendo|switch|friend[- ]?code|gaming/i.test(context);
|
|
12502
|
+
return hasContext;
|
|
11792
12503
|
}
|
|
11793
12504
|
};
|
|
11794
12505
|
var BATTLETAG = {
|
|
@@ -12083,14 +12794,64 @@ var ccpaPreset = {
|
|
|
12083
12794
|
"USERNAME"
|
|
12084
12795
|
]
|
|
12085
12796
|
};
|
|
12797
|
+
var healthcarePreset = {
|
|
12798
|
+
includeNames: true,
|
|
12799
|
+
includeEmails: true,
|
|
12800
|
+
includePhones: true,
|
|
12801
|
+
includeAddresses: true,
|
|
12802
|
+
categories: ["personal", "contact", "healthcare", "insurance", "government"]
|
|
12803
|
+
};
|
|
12804
|
+
var healthcareResearchPreset = {
|
|
12805
|
+
includeNames: true,
|
|
12806
|
+
includeEmails: true,
|
|
12807
|
+
includePhones: true,
|
|
12808
|
+
includeAddresses: true,
|
|
12809
|
+
categories: ["personal", "contact", "healthcare", "insurance", "government"]
|
|
12810
|
+
};
|
|
12811
|
+
var financePreset = {
|
|
12812
|
+
includeNames: true,
|
|
12813
|
+
includeEmails: true,
|
|
12814
|
+
includePhones: true,
|
|
12815
|
+
includeAddresses: true,
|
|
12816
|
+
categories: ["personal", "contact", "financial", "government", "network"]
|
|
12817
|
+
};
|
|
12818
|
+
var educationPreset = {
|
|
12819
|
+
includeNames: true,
|
|
12820
|
+
includeEmails: true,
|
|
12821
|
+
includePhones: true,
|
|
12822
|
+
includeAddresses: true,
|
|
12823
|
+
categories: ["personal", "contact", "education", "government", "network"]
|
|
12824
|
+
};
|
|
12825
|
+
var transportLogisticsPreset = {
|
|
12826
|
+
includeNames: true,
|
|
12827
|
+
includeEmails: true,
|
|
12828
|
+
includePhones: true,
|
|
12829
|
+
includeAddresses: true,
|
|
12830
|
+
categories: ["personal", "contact", "transportation", "logistics", "vehicles", "network"]
|
|
12831
|
+
};
|
|
12086
12832
|
function getPreset(name) {
|
|
12087
|
-
|
|
12833
|
+
const presetName = name.toLowerCase();
|
|
12834
|
+
switch (presetName) {
|
|
12088
12835
|
case "gdpr":
|
|
12089
12836
|
return gdprPreset;
|
|
12090
12837
|
case "hipaa":
|
|
12091
12838
|
return hipaaPreset;
|
|
12092
12839
|
case "ccpa":
|
|
12093
12840
|
return ccpaPreset;
|
|
12841
|
+
case "healthcare":
|
|
12842
|
+
case "healthcare-provider":
|
|
12843
|
+
return healthcarePreset;
|
|
12844
|
+
case "healthcare-research":
|
|
12845
|
+
return healthcareResearchPreset;
|
|
12846
|
+
case "finance":
|
|
12847
|
+
case "financial-services":
|
|
12848
|
+
return financePreset;
|
|
12849
|
+
case "education":
|
|
12850
|
+
return educationPreset;
|
|
12851
|
+
case "transport-logistics":
|
|
12852
|
+
case "transportation":
|
|
12853
|
+
case "logistics":
|
|
12854
|
+
return transportLogisticsPreset;
|
|
12094
12855
|
default:
|
|
12095
12856
|
return {};
|
|
12096
12857
|
}
|
|
@@ -12613,9 +13374,23 @@ var ConfigLoader = class {
|
|
|
12613
13374
|
};
|
|
12614
13375
|
}
|
|
12615
13376
|
if (preset.startsWith("openredaction:")) {
|
|
12616
|
-
const
|
|
12617
|
-
|
|
12618
|
-
|
|
13377
|
+
const presetName = preset.replace("openredaction:", "");
|
|
13378
|
+
const supportedPresets = [
|
|
13379
|
+
"gdpr",
|
|
13380
|
+
"hipaa",
|
|
13381
|
+
"ccpa",
|
|
13382
|
+
"healthcare",
|
|
13383
|
+
"healthcare-provider",
|
|
13384
|
+
"healthcare-research",
|
|
13385
|
+
"finance",
|
|
13386
|
+
"financial-services",
|
|
13387
|
+
"education",
|
|
13388
|
+
"transport-logistics",
|
|
13389
|
+
"transportation",
|
|
13390
|
+
"logistics"
|
|
13391
|
+
];
|
|
13392
|
+
if (supportedPresets.includes(presetName)) {
|
|
13393
|
+
return { preset: presetName };
|
|
12619
13394
|
}
|
|
12620
13395
|
}
|
|
12621
13396
|
return null;
|
|
@@ -12631,7 +13406,8 @@ var ConfigLoader = class {
|
|
|
12631
13406
|
export default {
|
|
12632
13407
|
// Extend built-in presets
|
|
12633
13408
|
// Options: 'openredaction:recommended', 'openredaction:strict', 'openredaction:minimal'
|
|
12634
|
-
// Or compliance: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa'
|
|
13409
|
+
// Or compliance/industry presets: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa',
|
|
13410
|
+
// 'openredaction:finance', 'openredaction:education', 'openredaction:healthcare', 'openredaction:transport-logistics'
|
|
12635
13411
|
extends: ['openredaction:recommended'],
|
|
12636
13412
|
|
|
12637
13413
|
// Detection options
|
|
@@ -14308,9 +15084,8 @@ var ExplainAPI = class {
|
|
|
14308
15084
|
constructor(detector) {
|
|
14309
15085
|
this.detector = detector;
|
|
14310
15086
|
this.patterns = detector.getPatterns();
|
|
14311
|
-
const testResult = detector.detect("Contact: admin@business.co.uk");
|
|
14312
|
-
const hasConfidence = testResult.detections.length > 0 && testResult.detections[0].confidence !== void 0;
|
|
14313
15087
|
const detectorOptions = detector.options;
|
|
15088
|
+
const hasConfidence = detectorOptions?.enableContextAnalysis || false;
|
|
14314
15089
|
this.options = {
|
|
14315
15090
|
enableContextAnalysis: hasConfidence,
|
|
14316
15091
|
confidenceThreshold: detectorOptions?.confidenceThreshold || 0.5,
|
|
@@ -14322,7 +15097,7 @@ var ExplainAPI = class {
|
|
|
14322
15097
|
/**
|
|
14323
15098
|
* Explain why text was or wasn't detected as PII
|
|
14324
15099
|
*/
|
|
14325
|
-
explain(text) {
|
|
15100
|
+
async explain(text) {
|
|
14326
15101
|
const patternResults = [];
|
|
14327
15102
|
const matchedPatterns = [];
|
|
14328
15103
|
const unmatchedPatterns = [];
|
|
@@ -14412,7 +15187,8 @@ var ExplainAPI = class {
|
|
|
14412
15187
|
patternResults.push(result);
|
|
14413
15188
|
matchedPatterns.push(result);
|
|
14414
15189
|
}
|
|
14415
|
-
const
|
|
15190
|
+
const detectionResult = await this.detector.detect(text);
|
|
15191
|
+
const detections = detectionResult.detections;
|
|
14416
15192
|
return {
|
|
14417
15193
|
text,
|
|
14418
15194
|
patternResults,
|
|
@@ -14431,7 +15207,7 @@ var ExplainAPI = class {
|
|
|
14431
15207
|
/**
|
|
14432
15208
|
* Explain a specific detection
|
|
14433
15209
|
*/
|
|
14434
|
-
explainDetection(detection, text) {
|
|
15210
|
+
async explainDetection(detection, text) {
|
|
14435
15211
|
const pattern = this.patterns.find((p) => p.type === detection.type);
|
|
14436
15212
|
const reasoning = [];
|
|
14437
15213
|
reasoning.push(`Detected as ${detection.type}`);
|
|
@@ -14462,13 +15238,15 @@ var ExplainAPI = class {
|
|
|
14462
15238
|
detection,
|
|
14463
15239
|
pattern,
|
|
14464
15240
|
contextAnalysis,
|
|
14465
|
-
reasoning
|
|
15241
|
+
reasoning,
|
|
15242
|
+
suggestions: []
|
|
15243
|
+
// Will be populated if needed
|
|
14466
15244
|
};
|
|
14467
15245
|
}
|
|
14468
15246
|
/**
|
|
14469
15247
|
* Suggest why text wasn't detected
|
|
14470
15248
|
*/
|
|
14471
|
-
suggestWhy(text, expectedType) {
|
|
15249
|
+
async suggestWhy(text, expectedType) {
|
|
14472
15250
|
const suggestions = [];
|
|
14473
15251
|
const similarPatterns = [];
|
|
14474
15252
|
const typePatterns = this.patterns.filter(
|
|
@@ -14486,7 +15264,7 @@ var ExplainAPI = class {
|
|
|
14486
15264
|
similarPatterns.push(pattern);
|
|
14487
15265
|
const value = match[1] !== void 0 ? match[1] : match[0];
|
|
14488
15266
|
suggestions.push(`Pattern "${pattern.type}" matched value: "${value}"`);
|
|
14489
|
-
const explanation = this.explain(text);
|
|
15267
|
+
const explanation = await this.explain(text);
|
|
14490
15268
|
const filtered = explanation.filteredPatterns.find((r) => r.pattern.type === pattern.type);
|
|
14491
15269
|
if (filtered && filtered.reason) {
|
|
14492
15270
|
suggestions.push(`But was filtered: ${filtered.reason}`);
|
|
@@ -14516,9 +15294,9 @@ Example ${expectedType} pattern: ${examplePattern.regex.source.substring(0, 100)
|
|
|
14516
15294
|
/**
|
|
14517
15295
|
* Get debugging information for entire detection process
|
|
14518
15296
|
*/
|
|
14519
|
-
debug(text) {
|
|
15297
|
+
async debug(text) {
|
|
14520
15298
|
const start = performance.now();
|
|
14521
|
-
const explanation = this.explain(text);
|
|
15299
|
+
const explanation = await this.explain(text);
|
|
14522
15300
|
const duration = performance.now() - start;
|
|
14523
15301
|
const enabledFeatures = [];
|
|
14524
15302
|
if (this.options.enableContextAnalysis) {
|
|
@@ -15406,6 +16184,152 @@ function compileSafeRegex(pattern, flags) {
|
|
|
15406
16184
|
return new RegExp(patternStr, finalFlags);
|
|
15407
16185
|
}
|
|
15408
16186
|
|
|
16187
|
+
// src/utils/ai-assist.ts
|
|
16188
|
+
function getAIEndpoint(aiOptions) {
|
|
16189
|
+
if (!aiOptions?.enabled) {
|
|
16190
|
+
return null;
|
|
16191
|
+
}
|
|
16192
|
+
if (aiOptions.endpoint) {
|
|
16193
|
+
return aiOptions.endpoint;
|
|
16194
|
+
}
|
|
16195
|
+
if (typeof process !== "undefined" && process.env) {
|
|
16196
|
+
const envEndpoint = process.env.OPENREDACTION_AI_ENDPOINT;
|
|
16197
|
+
if (envEndpoint) {
|
|
16198
|
+
return envEndpoint;
|
|
16199
|
+
}
|
|
16200
|
+
}
|
|
16201
|
+
return null;
|
|
16202
|
+
}
|
|
16203
|
+
function isFetchAvailable() {
|
|
16204
|
+
return typeof fetch !== "undefined";
|
|
16205
|
+
}
|
|
16206
|
+
async function callAIDetect(text, endpoint, debug) {
|
|
16207
|
+
if (!isFetchAvailable()) {
|
|
16208
|
+
if (debug) {
|
|
16209
|
+
console.warn("[OpenRedaction] AI assist requires fetch API. Not available in this environment.");
|
|
16210
|
+
}
|
|
16211
|
+
return null;
|
|
16212
|
+
}
|
|
16213
|
+
try {
|
|
16214
|
+
const url = endpoint.endsWith("/ai-detect") ? endpoint : `${endpoint}/ai-detect`;
|
|
16215
|
+
if (debug) {
|
|
16216
|
+
console.log(`[OpenRedaction] Calling AI endpoint: ${url}`);
|
|
16217
|
+
}
|
|
16218
|
+
const response = await fetch(url, {
|
|
16219
|
+
method: "POST",
|
|
16220
|
+
headers: {
|
|
16221
|
+
"Content-Type": "application/json"
|
|
16222
|
+
},
|
|
16223
|
+
body: JSON.stringify({ text })
|
|
16224
|
+
});
|
|
16225
|
+
if (!response.ok) {
|
|
16226
|
+
if (debug) {
|
|
16227
|
+
const statusText = response.status === 429 ? "Rate limit exceeded (429)" : `${response.status}: ${response.statusText}`;
|
|
16228
|
+
console.warn(`[OpenRedaction] AI endpoint returned ${statusText}`);
|
|
16229
|
+
}
|
|
16230
|
+
return null;
|
|
16231
|
+
}
|
|
16232
|
+
const data = await response.json();
|
|
16233
|
+
if (!data.entities || !Array.isArray(data.entities)) {
|
|
16234
|
+
if (debug) {
|
|
16235
|
+
console.warn("[OpenRedaction] Invalid AI response format: missing entities array");
|
|
16236
|
+
}
|
|
16237
|
+
return null;
|
|
16238
|
+
}
|
|
16239
|
+
return data.entities;
|
|
16240
|
+
} catch (error) {
|
|
16241
|
+
if (debug) {
|
|
16242
|
+
console.warn(`[OpenRedaction] AI endpoint error: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
16243
|
+
}
|
|
16244
|
+
return null;
|
|
16245
|
+
}
|
|
16246
|
+
}
|
|
16247
|
+
function validateAIEntity(entity, textLength) {
|
|
16248
|
+
if (!entity.type || !entity.value || typeof entity.start !== "number" || typeof entity.end !== "number") {
|
|
16249
|
+
return false;
|
|
16250
|
+
}
|
|
16251
|
+
if (entity.start < 0 || entity.end < 0 || entity.start >= entity.end) {
|
|
16252
|
+
return false;
|
|
16253
|
+
}
|
|
16254
|
+
if (entity.start >= textLength || entity.end > textLength) {
|
|
16255
|
+
return false;
|
|
16256
|
+
}
|
|
16257
|
+
const actualValue = entity.value;
|
|
16258
|
+
if (actualValue.length !== entity.end - entity.start) {
|
|
16259
|
+
return false;
|
|
16260
|
+
}
|
|
16261
|
+
return true;
|
|
16262
|
+
}
|
|
16263
|
+
function detectionsOverlap(det1, det2) {
|
|
16264
|
+
const [start1, end1] = det1.position;
|
|
16265
|
+
const [start2, end2] = det2.position;
|
|
16266
|
+
const overlapStart = Math.max(start1, start2);
|
|
16267
|
+
const overlapEnd = Math.min(end1, end2);
|
|
16268
|
+
if (overlapStart >= overlapEnd) {
|
|
16269
|
+
return false;
|
|
16270
|
+
}
|
|
16271
|
+
const overlapLength = overlapEnd - overlapStart;
|
|
16272
|
+
const length1 = end1 - start1;
|
|
16273
|
+
const length2 = end2 - start2;
|
|
16274
|
+
const minLength = Math.min(length1, length2);
|
|
16275
|
+
return overlapLength > minLength * 0.5;
|
|
16276
|
+
}
|
|
16277
|
+
function convertAIEntityToDetection(entity, text) {
|
|
16278
|
+
if (!validateAIEntity(entity, text.length)) {
|
|
16279
|
+
return null;
|
|
16280
|
+
}
|
|
16281
|
+
const actualValue = text.substring(entity.start, entity.end);
|
|
16282
|
+
let type = entity.type.toUpperCase();
|
|
16283
|
+
if (type.includes("EMAIL") || type === "EMAIL_ADDRESS") {
|
|
16284
|
+
type = "EMAIL";
|
|
16285
|
+
} else if (type.includes("PHONE") || type === "PHONE_NUMBER") {
|
|
16286
|
+
type = "PHONE_US";
|
|
16287
|
+
} else if (type.includes("NAME") || type === "PERSON") {
|
|
16288
|
+
type = "NAME";
|
|
16289
|
+
} else if (type.includes("SSN") || type === "SOCIAL_SECURITY_NUMBER") {
|
|
16290
|
+
type = "SSN";
|
|
16291
|
+
} else if (type.includes("ADDRESS")) {
|
|
16292
|
+
type = "ADDRESS_STREET";
|
|
16293
|
+
}
|
|
16294
|
+
let severity = "medium";
|
|
16295
|
+
if (type === "SSN" || type === "CREDIT_CARD") {
|
|
16296
|
+
severity = "critical";
|
|
16297
|
+
} else if (type === "EMAIL" || type === "PHONE_US" || type === "NAME") {
|
|
16298
|
+
severity = "high";
|
|
16299
|
+
}
|
|
16300
|
+
return {
|
|
16301
|
+
type,
|
|
16302
|
+
value: actualValue,
|
|
16303
|
+
placeholder: `[${type}_${Math.random().toString(36).substring(2, 9)}]`,
|
|
16304
|
+
position: [entity.start, entity.end],
|
|
16305
|
+
severity,
|
|
16306
|
+
confidence: entity.confidence ?? 0.7
|
|
16307
|
+
// Default confidence for AI entities
|
|
16308
|
+
};
|
|
16309
|
+
}
|
|
16310
|
+
function mergeAIEntities(regexDetections, aiEntities, text) {
|
|
16311
|
+
const merged = [...regexDetections];
|
|
16312
|
+
const processedRanges = regexDetections.map((d) => d.position);
|
|
16313
|
+
for (const aiEntity of aiEntities) {
|
|
16314
|
+
const detection = convertAIEntityToDetection(aiEntity, text);
|
|
16315
|
+
if (!detection) {
|
|
16316
|
+
continue;
|
|
16317
|
+
}
|
|
16318
|
+
let hasOverlap = false;
|
|
16319
|
+
for (const regexDet of regexDetections) {
|
|
16320
|
+
if (detectionsOverlap(regexDet, detection)) {
|
|
16321
|
+
hasOverlap = true;
|
|
16322
|
+
break;
|
|
16323
|
+
}
|
|
16324
|
+
}
|
|
16325
|
+
if (!hasOverlap) {
|
|
16326
|
+
merged.push(detection);
|
|
16327
|
+
processedRanges.push(detection.position);
|
|
16328
|
+
}
|
|
16329
|
+
}
|
|
16330
|
+
return merged;
|
|
16331
|
+
}
|
|
16332
|
+
|
|
15409
16333
|
// src/detector.ts
|
|
15410
16334
|
var OpenRedaction = class _OpenRedaction {
|
|
15411
16335
|
constructor(options = {}) {
|
|
@@ -15615,6 +16539,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15615
16539
|
for (const pattern of this.patterns) {
|
|
15616
16540
|
const regex = new RegExp(pattern.regex.source, pattern.regex.flags);
|
|
15617
16541
|
this.compiledPatterns.set(pattern, regex);
|
|
16542
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
16543
|
+
console.log(`[OpenRedaction] Compiled pattern '${pattern.type}': ${regex}`);
|
|
16544
|
+
}
|
|
15618
16545
|
}
|
|
15619
16546
|
if (this.options.debug) {
|
|
15620
16547
|
console.log(`[OpenRedaction] Pre-compiled ${this.compiledPatterns.size} regex patterns`);
|
|
@@ -15634,12 +16561,18 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15634
16561
|
}
|
|
15635
16562
|
continue;
|
|
15636
16563
|
}
|
|
16564
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
16565
|
+
console.log(`[OpenRedaction] Processing pattern '${pattern.type}' with regex: ${regex}`);
|
|
16566
|
+
}
|
|
15637
16567
|
let match;
|
|
15638
16568
|
let matchCount = 0;
|
|
15639
16569
|
const maxMatches = 1e4;
|
|
15640
16570
|
regex.lastIndex = 0;
|
|
15641
16571
|
try {
|
|
15642
16572
|
while ((match = safeExec(regex, text, { timeout: this.options.regexTimeout })) !== null) {
|
|
16573
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
16574
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' regex match found: '${match[0]}' at position ${match.index}`);
|
|
16575
|
+
}
|
|
15643
16576
|
matchCount++;
|
|
15644
16577
|
if (matchCount >= maxMatches) {
|
|
15645
16578
|
if (this.options.debug) {
|
|
@@ -15660,12 +16593,18 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15660
16593
|
endPos = startPos + value.length;
|
|
15661
16594
|
}
|
|
15662
16595
|
if (this.overlapsWithExisting(startPos, endPos, processedRanges)) {
|
|
16596
|
+
if (this.options.debug) {
|
|
16597
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' skipped due to overlap at ${startPos}-${endPos}`);
|
|
16598
|
+
}
|
|
15663
16599
|
continue;
|
|
15664
16600
|
}
|
|
15665
16601
|
const contextStart = Math.max(0, startPos - 50);
|
|
15666
16602
|
const contextEnd = Math.min(text.length, endPos + 50);
|
|
15667
16603
|
const context = text.substring(contextStart, contextEnd);
|
|
15668
16604
|
if (pattern.validator && !pattern.validator(value, context)) {
|
|
16605
|
+
if (this.options.debug) {
|
|
16606
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' validation failed for value: '${value}' with context: '${context.substring(0, 100)}...'`);
|
|
16607
|
+
}
|
|
15669
16608
|
continue;
|
|
15670
16609
|
}
|
|
15671
16610
|
if (this.options.enableFalsePositiveFilter) {
|
|
@@ -15684,6 +16623,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15684
16623
|
endPos
|
|
15685
16624
|
);
|
|
15686
16625
|
confidence = contextAnalysis.confidence;
|
|
16626
|
+
if (this.options.debug && confidence < this.options.confidenceThreshold) {
|
|
16627
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' failed context analysis. Value: '${value}', Confidence: ${confidence} < ${this.options.confidenceThreshold}`);
|
|
16628
|
+
}
|
|
15687
16629
|
}
|
|
15688
16630
|
if (this.contextRulesEngine) {
|
|
15689
16631
|
const piiMatch = {
|
|
@@ -15709,6 +16651,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15709
16651
|
continue;
|
|
15710
16652
|
}
|
|
15711
16653
|
const placeholder = this.generatePlaceholder(value, pattern);
|
|
16654
|
+
if (this.options.debug) {
|
|
16655
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' detected: '${value}' at position ${startPos}-${endPos}, confidence: ${confidence}`);
|
|
16656
|
+
}
|
|
15712
16657
|
detections.push({
|
|
15713
16658
|
type: pattern.type,
|
|
15714
16659
|
value,
|
|
@@ -15769,8 +16714,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15769
16714
|
}
|
|
15770
16715
|
/**
|
|
15771
16716
|
* Detect PII in text
|
|
16717
|
+
* Now async to support optional AI assist
|
|
15772
16718
|
*/
|
|
15773
|
-
detect(text) {
|
|
16719
|
+
async detect(text) {
|
|
15774
16720
|
if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) {
|
|
15775
16721
|
throw new Error("[OpenRedaction] Permission denied: detection:detect required");
|
|
15776
16722
|
}
|
|
@@ -15824,12 +16770,42 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15824
16770
|
} else {
|
|
15825
16771
|
detections = this.processPatterns(text, this.patterns, processedRanges);
|
|
15826
16772
|
}
|
|
16773
|
+
if (this.options.ai?.enabled) {
|
|
16774
|
+
const aiEndpoint = getAIEndpoint(this.options.ai);
|
|
16775
|
+
if (aiEndpoint) {
|
|
16776
|
+
try {
|
|
16777
|
+
if (this.options.debug) {
|
|
16778
|
+
console.log("[OpenRedaction] AI assist enabled, calling AI endpoint...");
|
|
16779
|
+
}
|
|
16780
|
+
const aiEntities = await callAIDetect(text, aiEndpoint, this.options.debug);
|
|
16781
|
+
if (aiEntities && aiEntities.length > 0) {
|
|
16782
|
+
if (this.options.debug) {
|
|
16783
|
+
console.log(`[OpenRedaction] AI returned ${aiEntities.length} additional entities`);
|
|
16784
|
+
}
|
|
16785
|
+
detections = mergeAIEntities(detections, aiEntities, text);
|
|
16786
|
+
if (this.options.debug) {
|
|
16787
|
+
console.log(`[OpenRedaction] After AI merge: ${detections.length} total detections`);
|
|
16788
|
+
}
|
|
16789
|
+
} else if (this.options.debug) {
|
|
16790
|
+
console.log("[OpenRedaction] AI endpoint returned no additional entities");
|
|
16791
|
+
}
|
|
16792
|
+
} catch (error) {
|
|
16793
|
+
if (this.options.debug) {
|
|
16794
|
+
console.warn(`[OpenRedaction] AI assist failed, using regex-only: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
16795
|
+
}
|
|
16796
|
+
}
|
|
16797
|
+
} else if (this.options.debug) {
|
|
16798
|
+
console.warn("[OpenRedaction] AI assist enabled but no endpoint configured. Set ai.endpoint or OPENREDACTION_AI_ENDPOINT env var.");
|
|
16799
|
+
}
|
|
16800
|
+
}
|
|
15827
16801
|
detections.sort((a, b) => b.position[0] - a.position[0]);
|
|
15828
16802
|
let redacted = text;
|
|
15829
16803
|
const redactionMap = {};
|
|
15830
16804
|
for (const detection of detections) {
|
|
15831
|
-
|
|
15832
|
-
|
|
16805
|
+
if (!detection.value) continue;
|
|
16806
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
16807
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
16808
|
+
redacted = redacted.replace(pattern, detection.placeholder);
|
|
15833
16809
|
redactionMap[detection.placeholder] = detection.value;
|
|
15834
16810
|
}
|
|
15835
16811
|
const endTime = performance.now();
|
|
@@ -15982,8 +16958,8 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15982
16958
|
/**
|
|
15983
16959
|
* Get severity-based scan results
|
|
15984
16960
|
*/
|
|
15985
|
-
scan(text) {
|
|
15986
|
-
const result = this.detect(text);
|
|
16961
|
+
async scan(text) {
|
|
16962
|
+
const result = await this.detect(text);
|
|
15987
16963
|
return {
|
|
15988
16964
|
high: result.detections.filter((d) => d.severity === "high"),
|
|
15989
16965
|
medium: result.detections.filter((d) => d.severity === "medium"),
|
|
@@ -16220,7 +17196,7 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
16220
17196
|
const metadata = await processor.getMetadata(buffer, options);
|
|
16221
17197
|
const extractionEnd = performance.now();
|
|
16222
17198
|
const extractionTime = Math.round((extractionEnd - extractionStart) * 100) / 100;
|
|
16223
|
-
const detection = this.detect(text);
|
|
17199
|
+
const detection = await this.detect(text);
|
|
16224
17200
|
return {
|
|
16225
17201
|
text,
|
|
16226
17202
|
metadata,
|
|
@@ -16317,7 +17293,7 @@ var StreamingDetector = class {
|
|
|
16317
17293
|
const end = Math.min(textLength, position + chunkSize);
|
|
16318
17294
|
const chunk = text.substring(start, end);
|
|
16319
17295
|
const byteOffset = start;
|
|
16320
|
-
const result = this.detector.detect(chunk);
|
|
17296
|
+
const result = await this.detector.detect(chunk);
|
|
16321
17297
|
const newDetections = result.detections.filter((detection) => {
|
|
16322
17298
|
const absoluteStart = byteOffset + detection.position[0];
|
|
16323
17299
|
const absoluteEnd = byteOffset + detection.position[1];
|
|
@@ -16347,8 +17323,10 @@ var StreamingDetector = class {
|
|
|
16347
17323
|
(a, b) => b.position[0] - a.position[0]
|
|
16348
17324
|
);
|
|
16349
17325
|
for (const detection of sortedDetections) {
|
|
16350
|
-
|
|
16351
|
-
|
|
17326
|
+
if (!detection.value) continue;
|
|
17327
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
17328
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
17329
|
+
redactedChunk = redactedChunk.replace(pattern, detection.placeholder);
|
|
16352
17330
|
}
|
|
16353
17331
|
}
|
|
16354
17332
|
yield {
|
|
@@ -16374,8 +17352,10 @@ var StreamingDetector = class {
|
|
|
16374
17352
|
allDetections.sort((a, b) => b.position[0] - a.position[0]);
|
|
16375
17353
|
const redactionMap = {};
|
|
16376
17354
|
for (const detection of allDetections) {
|
|
16377
|
-
|
|
16378
|
-
|
|
17355
|
+
if (!detection.value) continue;
|
|
17356
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
17357
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
17358
|
+
redactedText = redactedText.replace(pattern, detection.placeholder);
|
|
16379
17359
|
redactionMap[detection.placeholder] = detection.value;
|
|
16380
17360
|
}
|
|
16381
17361
|
return {
|
|
@@ -16450,6 +17430,9 @@ var StreamingDetector = class {
|
|
|
16450
17430
|
estimatedMemory
|
|
16451
17431
|
};
|
|
16452
17432
|
}
|
|
17433
|
+
escapeRegex(str) {
|
|
17434
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
17435
|
+
}
|
|
16453
17436
|
};
|
|
16454
17437
|
function createStreamingDetector(detector, options) {
|
|
16455
17438
|
return new StreamingDetector(detector, options);
|
|
@@ -16466,11 +17449,11 @@ var BatchProcessor = class {
|
|
|
16466
17449
|
/**
|
|
16467
17450
|
* Process multiple documents sequentially
|
|
16468
17451
|
*/
|
|
16469
|
-
processSequential(documents, options = {}) {
|
|
17452
|
+
async processSequential(documents, options = {}) {
|
|
16470
17453
|
const startTime = performance.now();
|
|
16471
17454
|
const results = [];
|
|
16472
17455
|
for (let i = 0; i < documents.length; i++) {
|
|
16473
|
-
const result = this.detector.detect(documents[i]);
|
|
17456
|
+
const result = await this.detector.detect(documents[i]);
|
|
16474
17457
|
results.push(result);
|
|
16475
17458
|
if (options.onProgress) {
|
|
16476
17459
|
options.onProgress(i + 1, documents.length);
|
|
@@ -16484,7 +17467,7 @@ var BatchProcessor = class {
|
|
|
16484
17467
|
totalDocuments: documents.length,
|
|
16485
17468
|
totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
|
|
16486
17469
|
totalTime,
|
|
16487
|
-
avgTimePerDocument: totalTime / documents.length
|
|
17470
|
+
avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
|
|
16488
17471
|
}
|
|
16489
17472
|
};
|
|
16490
17473
|
}
|
|
@@ -16498,16 +17481,14 @@ var BatchProcessor = class {
|
|
|
16498
17481
|
let completed = 0;
|
|
16499
17482
|
for (let i = 0; i < documents.length; i += maxConcurrency) {
|
|
16500
17483
|
const batch = documents.slice(i, i + maxConcurrency);
|
|
16501
|
-
const batchPromises = batch.map((doc, batchIndex) => {
|
|
16502
|
-
|
|
16503
|
-
|
|
16504
|
-
|
|
16505
|
-
|
|
16506
|
-
|
|
16507
|
-
|
|
16508
|
-
|
|
16509
|
-
return result;
|
|
16510
|
-
});
|
|
17484
|
+
const batchPromises = batch.map(async (doc, batchIndex) => {
|
|
17485
|
+
const result = await this.detector.detect(doc);
|
|
17486
|
+
results[i + batchIndex] = result;
|
|
17487
|
+
completed++;
|
|
17488
|
+
if (options.onProgress) {
|
|
17489
|
+
options.onProgress(completed, documents.length);
|
|
17490
|
+
}
|
|
17491
|
+
return result;
|
|
16511
17492
|
});
|
|
16512
17493
|
await Promise.all(batchPromises);
|
|
16513
17494
|
}
|
|
@@ -16519,7 +17500,7 @@ var BatchProcessor = class {
|
|
|
16519
17500
|
totalDocuments: documents.length,
|
|
16520
17501
|
totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
|
|
16521
17502
|
totalTime,
|
|
16522
|
-
avgTimePerDocument: totalTime / documents.length
|
|
17503
|
+
avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
|
|
16523
17504
|
}
|
|
16524
17505
|
};
|
|
16525
17506
|
}
|
|
@@ -16530,7 +17511,7 @@ var BatchProcessor = class {
|
|
|
16530
17511
|
if (options.parallel) {
|
|
16531
17512
|
return this.processParallel(documents, options);
|
|
16532
17513
|
} else {
|
|
16533
|
-
return
|
|
17514
|
+
return this.processSequential(documents, options);
|
|
16534
17515
|
}
|
|
16535
17516
|
}
|
|
16536
17517
|
/**
|
|
@@ -16541,7 +17522,7 @@ var BatchProcessor = class {
|
|
|
16541
17522
|
for (let i = 0; i < documents.length; i += batchSize) {
|
|
16542
17523
|
const batch = documents.slice(i, i + batchSize);
|
|
16543
17524
|
for (const doc of batch) {
|
|
16544
|
-
const result = this.detector.detect(doc);
|
|
17525
|
+
const result = await this.detector.detect(doc);
|
|
16545
17526
|
yield result;
|
|
16546
17527
|
}
|
|
16547
17528
|
}
|
|
@@ -16589,7 +17570,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
16589
17570
|
...detectorOptions
|
|
16590
17571
|
} = options;
|
|
16591
17572
|
const detector = new OpenRedaction(detectorOptions);
|
|
16592
|
-
return (req, res, next) => {
|
|
17573
|
+
return async (req, res, next) => {
|
|
16593
17574
|
if (skipRoutes.some((pattern) => pattern.test(req.path))) {
|
|
16594
17575
|
return next();
|
|
16595
17576
|
}
|
|
@@ -16609,7 +17590,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
16609
17590
|
const results = {};
|
|
16610
17591
|
const redactedBody = { ...req.body };
|
|
16611
17592
|
for (const { field, value } of textsToCheck) {
|
|
16612
|
-
const result = detector.detect(value);
|
|
17593
|
+
const result = await detector.detect(value);
|
|
16613
17594
|
if (result.detections.length > 0) {
|
|
16614
17595
|
totalDetections += result.detections.length;
|
|
16615
17596
|
results[field] = result;
|
|
@@ -16659,7 +17640,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
16659
17640
|
}
|
|
16660
17641
|
function detectPII(options = {}) {
|
|
16661
17642
|
const detector = new OpenRedaction(options);
|
|
16662
|
-
return (req, res) => {
|
|
17643
|
+
return async (req, res) => {
|
|
16663
17644
|
const text = req.body?.text || req.query.text;
|
|
16664
17645
|
if (!text) {
|
|
16665
17646
|
res.status(400).json({
|
|
@@ -16668,19 +17649,26 @@ function detectPII(options = {}) {
|
|
|
16668
17649
|
});
|
|
16669
17650
|
return;
|
|
16670
17651
|
}
|
|
16671
|
-
|
|
16672
|
-
|
|
16673
|
-
|
|
16674
|
-
|
|
16675
|
-
|
|
16676
|
-
|
|
16677
|
-
|
|
16678
|
-
|
|
17652
|
+
try {
|
|
17653
|
+
const result = await detector.detect(text);
|
|
17654
|
+
res.json({
|
|
17655
|
+
detected: result.detections.length > 0,
|
|
17656
|
+
count: result.detections.length,
|
|
17657
|
+
detections: result.detections,
|
|
17658
|
+
redacted: result.redacted,
|
|
17659
|
+
stats: result.stats
|
|
17660
|
+
});
|
|
17661
|
+
} catch (error) {
|
|
17662
|
+
res.status(500).json({
|
|
17663
|
+
error: "Detection failed",
|
|
17664
|
+
message: error instanceof Error ? error.message : "Unknown error"
|
|
17665
|
+
});
|
|
17666
|
+
}
|
|
16679
17667
|
};
|
|
16680
17668
|
}
|
|
16681
17669
|
function generateReport(options = {}) {
|
|
16682
17670
|
const detector = new OpenRedaction(options);
|
|
16683
|
-
return (req, res) => {
|
|
17671
|
+
return async (req, res) => {
|
|
16684
17672
|
const text = req.body?.text;
|
|
16685
17673
|
const format = req.body?.format || req.query.format || "json";
|
|
16686
17674
|
if (!text) {
|
|
@@ -16689,28 +17677,35 @@ function generateReport(options = {}) {
|
|
|
16689
17677
|
});
|
|
16690
17678
|
return;
|
|
16691
17679
|
}
|
|
16692
|
-
|
|
16693
|
-
|
|
16694
|
-
|
|
16695
|
-
|
|
16696
|
-
|
|
16697
|
-
|
|
16698
|
-
|
|
16699
|
-
|
|
16700
|
-
|
|
16701
|
-
|
|
16702
|
-
|
|
16703
|
-
|
|
16704
|
-
|
|
16705
|
-
|
|
16706
|
-
|
|
16707
|
-
|
|
16708
|
-
|
|
16709
|
-
|
|
16710
|
-
|
|
16711
|
-
|
|
16712
|
-
|
|
16713
|
-
|
|
17680
|
+
try {
|
|
17681
|
+
const result = await detector.detect(text);
|
|
17682
|
+
if (format === "html") {
|
|
17683
|
+
const html = detector.generateReport(result, {
|
|
17684
|
+
format: "html",
|
|
17685
|
+
title: req.body?.title || "PII Detection Report"
|
|
17686
|
+
});
|
|
17687
|
+
res.setHeader("Content-Type", "text/html");
|
|
17688
|
+
res.send(html);
|
|
17689
|
+
} else if (format === "markdown") {
|
|
17690
|
+
const md = detector.generateReport(result, {
|
|
17691
|
+
format: "markdown",
|
|
17692
|
+
title: req.body?.title || "PII Detection Report"
|
|
17693
|
+
});
|
|
17694
|
+
res.setHeader("Content-Type", "text/markdown");
|
|
17695
|
+
res.send(md);
|
|
17696
|
+
} else {
|
|
17697
|
+
res.json({
|
|
17698
|
+
detected: result.detections.length > 0,
|
|
17699
|
+
count: result.detections.length,
|
|
17700
|
+
detections: result.detections,
|
|
17701
|
+
redacted: result.redacted,
|
|
17702
|
+
stats: result.stats
|
|
17703
|
+
});
|
|
17704
|
+
}
|
|
17705
|
+
} catch (error) {
|
|
17706
|
+
res.status(500).json({
|
|
17707
|
+
error: "Report generation failed",
|
|
17708
|
+
message: error instanceof Error ? error.message : "Unknown error"
|
|
16714
17709
|
});
|
|
16715
17710
|
}
|
|
16716
17711
|
};
|
|
@@ -16722,12 +17717,17 @@ function useOpenRedaction(options) {
|
|
|
16722
17717
|
const detector = (0, import_react.useMemo)(() => new OpenRedaction(options), [options]);
|
|
16723
17718
|
const [result, setResult] = (0, import_react.useState)(null);
|
|
16724
17719
|
const [isDetecting, setIsDetecting] = (0, import_react.useState)(false);
|
|
16725
|
-
const detect = (0, import_react.useCallback)((text) => {
|
|
17720
|
+
const detect = (0, import_react.useCallback)(async (text) => {
|
|
16726
17721
|
setIsDetecting(true);
|
|
16727
|
-
|
|
16728
|
-
|
|
16729
|
-
|
|
16730
|
-
|
|
17722
|
+
try {
|
|
17723
|
+
const detection = await detector.detect(text);
|
|
17724
|
+
setResult(detection);
|
|
17725
|
+
setIsDetecting(false);
|
|
17726
|
+
return detection;
|
|
17727
|
+
} catch (error) {
|
|
17728
|
+
setIsDetecting(false);
|
|
17729
|
+
throw error;
|
|
17730
|
+
}
|
|
16731
17731
|
}, [detector]);
|
|
16732
17732
|
const clear = (0, import_react.useCallback)(() => {
|
|
16733
17733
|
setResult(null);
|
|
@@ -16753,10 +17753,14 @@ function usePIIDetector(text, options) {
|
|
|
16753
17753
|
return;
|
|
16754
17754
|
}
|
|
16755
17755
|
setIsDetecting(true);
|
|
16756
|
-
const timer = setTimeout(() => {
|
|
16757
|
-
|
|
16758
|
-
|
|
16759
|
-
|
|
17756
|
+
const timer = setTimeout(async () => {
|
|
17757
|
+
try {
|
|
17758
|
+
const detection = await detector.detect(text);
|
|
17759
|
+
setResult(detection);
|
|
17760
|
+
setIsDetecting(false);
|
|
17761
|
+
} catch (error) {
|
|
17762
|
+
setIsDetecting(false);
|
|
17763
|
+
}
|
|
16760
17764
|
}, debounce);
|
|
16761
17765
|
return () => {
|
|
16762
17766
|
clearTimeout(timer);
|
|
@@ -16777,27 +17781,32 @@ function useFormFieldValidator(options) {
|
|
|
16777
17781
|
const [value, setValue] = (0, import_react.useState)("");
|
|
16778
17782
|
const [error, setError] = (0, import_react.useState)(null);
|
|
16779
17783
|
const [result, setResult] = (0, import_react.useState)(null);
|
|
16780
|
-
const validate = (0, import_react.useCallback)((inputValue) => {
|
|
17784
|
+
const validate = (0, import_react.useCallback)(async (inputValue) => {
|
|
16781
17785
|
setValue(inputValue);
|
|
16782
17786
|
if (!inputValue) {
|
|
16783
17787
|
setError(null);
|
|
16784
17788
|
setResult(null);
|
|
16785
17789
|
return true;
|
|
16786
17790
|
}
|
|
16787
|
-
|
|
16788
|
-
|
|
16789
|
-
|
|
16790
|
-
|
|
16791
|
-
if (
|
|
16792
|
-
|
|
16793
|
-
|
|
16794
|
-
|
|
16795
|
-
onPIIDetected
|
|
17791
|
+
try {
|
|
17792
|
+
const detection = await detector.detect(inputValue);
|
|
17793
|
+
setResult(detection);
|
|
17794
|
+
const relevantDetections = types.length > 0 ? detection.detections.filter((d) => types.includes(d.type)) : detection.detections;
|
|
17795
|
+
if (relevantDetections.length > 0) {
|
|
17796
|
+
if (failOnPII) {
|
|
17797
|
+
setError(`Sensitive information detected: ${relevantDetections[0].type}`);
|
|
17798
|
+
}
|
|
17799
|
+
if (onPIIDetected) {
|
|
17800
|
+
onPIIDetected(detection);
|
|
17801
|
+
}
|
|
17802
|
+
return false;
|
|
16796
17803
|
}
|
|
17804
|
+
setError(null);
|
|
17805
|
+
return true;
|
|
17806
|
+
} catch (error2) {
|
|
17807
|
+
setError("Validation failed");
|
|
16797
17808
|
return false;
|
|
16798
17809
|
}
|
|
16799
|
-
setError(null);
|
|
16800
|
-
return true;
|
|
16801
17810
|
}, [detector, failOnPII, types, onPIIDetected]);
|
|
16802
17811
|
const getFieldProps = (0, import_react.useCallback)(() => ({
|
|
16803
17812
|
value,
|
|
@@ -16824,7 +17833,7 @@ function useBatchDetector(options) {
|
|
|
16824
17833
|
setProgress(0);
|
|
16825
17834
|
const detections = [];
|
|
16826
17835
|
for (let i = 0; i < texts.length; i++) {
|
|
16827
|
-
const result = detector.detect(texts[i]);
|
|
17836
|
+
const result = await detector.detect(texts[i]);
|
|
16828
17837
|
detections.push(result);
|
|
16829
17838
|
setProgress((i + 1) / texts.length * 100);
|
|
16830
17839
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
@@ -16861,9 +17870,12 @@ function useAutoRedact(options) {
|
|
|
16861
17870
|
setResult(null);
|
|
16862
17871
|
return;
|
|
16863
17872
|
}
|
|
16864
|
-
const timer = setTimeout(() => {
|
|
16865
|
-
|
|
16866
|
-
|
|
17873
|
+
const timer = setTimeout(async () => {
|
|
17874
|
+
try {
|
|
17875
|
+
const detection = await detector.detect(text);
|
|
17876
|
+
setResult(detection);
|
|
17877
|
+
} catch (error) {
|
|
17878
|
+
}
|
|
16867
17879
|
}, debounce);
|
|
16868
17880
|
return () => clearTimeout(timer);
|
|
16869
17881
|
}, [text, detector, debounce]);
|
|
@@ -16992,7 +18004,7 @@ var TenantManager = class {
|
|
|
16992
18004
|
await this.checkQuotas(tenantId, text);
|
|
16993
18005
|
this.trackRequest(tenantId, text);
|
|
16994
18006
|
const detector = this.getDetector(tenantId);
|
|
16995
|
-
const result = detector.detect(text);
|
|
18007
|
+
const result = await detector.detect(text);
|
|
16996
18008
|
const usage = this.usage.get(tenantId);
|
|
16997
18009
|
usage.piiDetectedThisMonth += result.detections.length;
|
|
16998
18010
|
usage.lastRequestAt = /* @__PURE__ */ new Date();
|
|
@@ -17279,6 +18291,7 @@ var DEFAULT_TIER_QUOTAS = {
|
|
|
17279
18291
|
// src/webhooks/WebhookManager.ts
|
|
17280
18292
|
var WebhookManager = class {
|
|
17281
18293
|
// 1 minute
|
|
18294
|
+
// private readonly HALF_OPEN_MAX_REQUESTS = 1; // Reserved for future use
|
|
17282
18295
|
constructor(options) {
|
|
17283
18296
|
this.webhooks = /* @__PURE__ */ new Map();
|
|
17284
18297
|
this.deliveryHistory = [];
|
|
@@ -17550,9 +18563,9 @@ var WebhookManager = class {
|
|
|
17550
18563
|
*/
|
|
17551
18564
|
async makeHttpRequest(webhook, event) {
|
|
17552
18565
|
try {
|
|
17553
|
-
let
|
|
18566
|
+
let fetch2;
|
|
17554
18567
|
try {
|
|
17555
|
-
|
|
18568
|
+
fetch2 = globalThis.fetch;
|
|
17556
18569
|
} catch {
|
|
17557
18570
|
throw new Error("[WebhookManager] HTTP client not available. Requires Node 18+ with fetch support.");
|
|
17558
18571
|
}
|
|
@@ -17572,7 +18585,7 @@ var WebhookManager = class {
|
|
|
17572
18585
|
const controller = new AbortController();
|
|
17573
18586
|
const timeoutId = setTimeout(() => controller.abort(), webhook.timeout);
|
|
17574
18587
|
try {
|
|
17575
|
-
const response = await
|
|
18588
|
+
const response = await fetch2(webhook.url, {
|
|
17576
18589
|
method: "POST",
|
|
17577
18590
|
headers,
|
|
17578
18591
|
body: JSON.stringify(event),
|
|
@@ -17943,7 +18956,7 @@ var APIServer = class {
|
|
|
17943
18956
|
if (req.tenantId && this.config.tenantManager) {
|
|
17944
18957
|
result = await this.config.tenantManager.detect(req.tenantId, text);
|
|
17945
18958
|
} else if (this.detector) {
|
|
17946
|
-
result = this.detector.detect(text);
|
|
18959
|
+
result = await this.detector.detect(text);
|
|
17947
18960
|
} else {
|
|
17948
18961
|
throw new Error("No detector available");
|
|
17949
18962
|
}
|
|
@@ -17984,7 +18997,7 @@ var APIServer = class {
|
|
|
17984
18997
|
if (req.tenantId && this.config.tenantManager) {
|
|
17985
18998
|
result = await this.config.tenantManager.detect(req.tenantId, text);
|
|
17986
18999
|
} else if (this.detector) {
|
|
17987
|
-
result = this.detector.detect(text);
|
|
19000
|
+
result = await this.detector.detect(text);
|
|
17988
19001
|
} else {
|
|
17989
19002
|
throw new Error("No detector available");
|
|
17990
19003
|
}
|
|
@@ -18407,10 +19420,12 @@ init_HealthCheck();
|
|
|
18407
19420
|
analyzeFullContext,
|
|
18408
19421
|
calculateContextConfidence,
|
|
18409
19422
|
calculateRisk,
|
|
19423
|
+
callAIDetect,
|
|
18410
19424
|
ccpaPreset,
|
|
18411
19425
|
commonFalsePositives,
|
|
18412
19426
|
compileSafeRegex,
|
|
18413
19427
|
contactPatterns,
|
|
19428
|
+
convertAIEntityToDetection,
|
|
18414
19429
|
createAPIServer,
|
|
18415
19430
|
createBatchProcessor,
|
|
18416
19431
|
createCacheDisabledError,
|
|
@@ -18445,12 +19460,16 @@ init_HealthCheck();
|
|
|
18445
19460
|
createXlsxProcessor,
|
|
18446
19461
|
defaultPasses,
|
|
18447
19462
|
detectPII,
|
|
19463
|
+
detectionsOverlap,
|
|
19464
|
+
educationPreset,
|
|
18448
19465
|
exportForVersionControl,
|
|
18449
19466
|
extractContext,
|
|
18450
19467
|
filterFalsePositives,
|
|
19468
|
+
financePreset,
|
|
18451
19469
|
financialPatterns,
|
|
18452
19470
|
gdprPreset,
|
|
18453
19471
|
generateReport,
|
|
19472
|
+
getAIEndpoint,
|
|
18454
19473
|
getPatternsByCategory,
|
|
18455
19474
|
getPredefinedRole,
|
|
18456
19475
|
getPreset,
|
|
@@ -18458,21 +19477,26 @@ init_HealthCheck();
|
|
|
18458
19477
|
governmentPatterns,
|
|
18459
19478
|
groupPatternsByPass,
|
|
18460
19479
|
healthCheckMiddleware,
|
|
19480
|
+
healthcarePreset,
|
|
19481
|
+
healthcareResearchPreset,
|
|
18461
19482
|
hipaaPreset,
|
|
18462
19483
|
inferDocumentType,
|
|
18463
19484
|
isFalsePositive,
|
|
18464
19485
|
isUnsafePattern,
|
|
19486
|
+
mergeAIEntities,
|
|
18465
19487
|
mergePassDetections,
|
|
18466
19488
|
networkPatterns,
|
|
18467
19489
|
openredactionMiddleware,
|
|
18468
19490
|
personalPatterns,
|
|
18469
19491
|
safeExec,
|
|
18470
19492
|
safeExecAll,
|
|
19493
|
+
transportLogisticsPreset,
|
|
18471
19494
|
useAutoRedact,
|
|
18472
19495
|
useBatchDetector,
|
|
18473
19496
|
useFormFieldValidator,
|
|
18474
19497
|
useOpenRedaction,
|
|
18475
19498
|
usePIIDetector,
|
|
19499
|
+
validateAIEntity,
|
|
18476
19500
|
validateEmail,
|
|
18477
19501
|
validateIBAN,
|
|
18478
19502
|
validateLuhn,
|