openredaction 1.0.0 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -1
- package/dist/index.d.ts +112 -22
- package/dist/index.js +650 -223
- package/dist/index.mjs +592 -180
- package/package.json +12 -27
- package/dist/HealthCheck-A5OD4ATR.mjs +0 -12
- package/dist/HealthCheck-A5OD4ATR.mjs.map +0 -1
- package/dist/chunk-7OGNW2MU.mjs +0 -1701
- package/dist/chunk-7OGNW2MU.mjs.map +0 -1
- package/dist/chunk-MYYLGNXS.mjs +0 -149
- package/dist/chunk-MYYLGNXS.mjs.map +0 -1
- package/dist/chunk-WMJKH4XE.mjs +0 -34
- package/dist/chunk-WMJKH4XE.mjs.map +0 -1
- package/dist/chunk-ZRHGDEPC.mjs +0 -297
- package/dist/chunk-ZRHGDEPC.mjs.map +0 -1
- package/dist/cli/test-pattern.js +0 -430
- package/dist/document-AOMZP7UR.mjs +0 -26
- package/dist/document-AOMZP7UR.mjs.map +0 -1
- package/dist/index.cli.js +0 -15093
- package/dist/index.d.mts +0 -4111
- package/dist/index.js.map +0 -1
- package/dist/index.mjs.map +0 -1
- package/dist/workers-RMN5POM6.mjs +0 -10
- package/dist/workers-RMN5POM6.mjs.map +0 -1
package/dist/index.js
CHANGED
|
@@ -311,7 +311,7 @@ var init_HealthCheck = __esm({
|
|
|
311
311
|
try {
|
|
312
312
|
if (options.testDetection !== false) {
|
|
313
313
|
const testText = "Test email: test@example.com";
|
|
314
|
-
const result = this.detector.detect(testText);
|
|
314
|
+
const result = await this.detector.detect(testText);
|
|
315
315
|
if (!result || !result.detections) {
|
|
316
316
|
return {
|
|
317
317
|
status: "fail",
|
|
@@ -377,7 +377,7 @@ var init_HealthCheck = __esm({
|
|
|
377
377
|
try {
|
|
378
378
|
const testText = "Test: john@example.com, phone: 555-123-4567, IP: 192.168.1.1";
|
|
379
379
|
const start = performance.now();
|
|
380
|
-
this.detector.detect(testText);
|
|
380
|
+
await this.detector.detect(testText);
|
|
381
381
|
const duration = performance.now() - start;
|
|
382
382
|
if (duration > threshold * 2) {
|
|
383
383
|
return {
|
|
@@ -708,51 +708,55 @@ var init_JsonProcessor = __esm({
|
|
|
708
708
|
/**
|
|
709
709
|
* Detect PII in JSON data
|
|
710
710
|
*/
|
|
711
|
-
detect(data, detector, options) {
|
|
711
|
+
async detect(data, detector, options) {
|
|
712
712
|
const opts = { ...this.defaultOptions, ...options };
|
|
713
713
|
const pathsDetected = [];
|
|
714
714
|
const matchesByPath = {};
|
|
715
715
|
const allDetections = [];
|
|
716
|
+
const promises = [];
|
|
716
717
|
this.traverse(data, "", opts, (path3, value, key) => {
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
if (this.shouldAlwaysRedact(path3, opts.alwaysRedact)) {
|
|
721
|
-
const detection = {
|
|
722
|
-
type: "SENSITIVE_FIELD",
|
|
723
|
-
value: String(value),
|
|
724
|
-
placeholder: `[SENSITIVE_FIELD]`,
|
|
725
|
-
position: [0, String(value).length],
|
|
726
|
-
severity: "high",
|
|
727
|
-
confidence: 1
|
|
728
|
-
};
|
|
729
|
-
matchesByPath[path3] = [detection];
|
|
730
|
-
pathsDetected.push(path3);
|
|
731
|
-
allDetections.push(detection);
|
|
732
|
-
return;
|
|
733
|
-
}
|
|
734
|
-
if (opts.scanKeys && key) {
|
|
735
|
-
const keyResult = detector.detect(key);
|
|
736
|
-
if (keyResult.detections.length > 0) {
|
|
737
|
-
const keyPath = `${path3}.__key__`;
|
|
738
|
-
matchesByPath[keyPath] = keyResult.detections;
|
|
739
|
-
pathsDetected.push(keyPath);
|
|
740
|
-
allDetections.push(...keyResult.detections);
|
|
718
|
+
promises.push((async () => {
|
|
719
|
+
if (this.shouldSkip(path3, opts.skipPaths)) {
|
|
720
|
+
return;
|
|
741
721
|
}
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
722
|
+
if (this.shouldAlwaysRedact(path3, opts.alwaysRedact)) {
|
|
723
|
+
const detection = {
|
|
724
|
+
type: "SENSITIVE_FIELD",
|
|
725
|
+
value: String(value),
|
|
726
|
+
placeholder: `[SENSITIVE_FIELD]`,
|
|
727
|
+
position: [0, String(value).length],
|
|
728
|
+
severity: "high",
|
|
729
|
+
confidence: 1
|
|
730
|
+
};
|
|
731
|
+
matchesByPath[path3] = [detection];
|
|
732
|
+
pathsDetected.push(path3);
|
|
733
|
+
allDetections.push(detection);
|
|
734
|
+
return;
|
|
735
|
+
}
|
|
736
|
+
if (opts.scanKeys && key) {
|
|
737
|
+
const keyResult = await detector.detect(key);
|
|
738
|
+
if (keyResult.detections.length > 0) {
|
|
739
|
+
const keyPath = `${path3}.__key__`;
|
|
740
|
+
matchesByPath[keyPath] = keyResult.detections;
|
|
741
|
+
pathsDetected.push(keyPath);
|
|
742
|
+
allDetections.push(...keyResult.detections);
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
const valueStr = String(value);
|
|
746
|
+
const result = await detector.detect(valueStr);
|
|
747
|
+
if (result.detections.length > 0) {
|
|
748
|
+
const boostedDetections = this.boostConfidenceFromKey(
|
|
749
|
+
result.detections,
|
|
750
|
+
key,
|
|
751
|
+
opts.piiIndicatorKeys
|
|
752
|
+
);
|
|
753
|
+
matchesByPath[path3] = boostedDetections;
|
|
754
|
+
pathsDetected.push(path3);
|
|
755
|
+
allDetections.push(...boostedDetections);
|
|
756
|
+
}
|
|
757
|
+
})());
|
|
755
758
|
});
|
|
759
|
+
await Promise.all(promises);
|
|
756
760
|
const original = JSON.stringify(data);
|
|
757
761
|
const redacted = this.redact(data, {
|
|
758
762
|
original,
|
|
@@ -959,9 +963,9 @@ var init_JsonProcessor = __esm({
|
|
|
959
963
|
/**
|
|
960
964
|
* Detect PII in JSON Lines format
|
|
961
965
|
*/
|
|
962
|
-
detectJsonLines(input, detector, options) {
|
|
966
|
+
async detectJsonLines(input, detector, options) {
|
|
963
967
|
const documents = this.parseJsonLines(input);
|
|
964
|
-
return documents.map((doc) => this.detect(doc, detector, options));
|
|
968
|
+
return Promise.all(documents.map((doc) => this.detect(doc, detector, options)));
|
|
965
969
|
}
|
|
966
970
|
};
|
|
967
971
|
}
|
|
@@ -1065,7 +1069,7 @@ var init_CsvProcessor = __esm({
|
|
|
1065
1069
|
/**
|
|
1066
1070
|
* Detect PII in CSV data
|
|
1067
1071
|
*/
|
|
1068
|
-
detect(input, detector, options) {
|
|
1072
|
+
async detect(input, detector, options) {
|
|
1069
1073
|
const opts = { ...this.defaultOptions, ...options };
|
|
1070
1074
|
const rows = this.parse(input, options);
|
|
1071
1075
|
if (rows.length === 0) {
|
|
@@ -1142,7 +1146,7 @@ var init_CsvProcessor = __esm({
|
|
|
1142
1146
|
columnStats[col].piiCount++;
|
|
1143
1147
|
continue;
|
|
1144
1148
|
}
|
|
1145
|
-
const result = detector.detect(cellValue);
|
|
1149
|
+
const result = await detector.detect(cellValue);
|
|
1146
1150
|
if (result.detections.length > 0) {
|
|
1147
1151
|
const boostedDetections = this.boostConfidenceFromColumnName(
|
|
1148
1152
|
result.detections,
|
|
@@ -1482,7 +1486,7 @@ var init_XlsxProcessor = __esm({
|
|
|
1482
1486
|
/**
|
|
1483
1487
|
* Detect PII in XLSX data
|
|
1484
1488
|
*/
|
|
1485
|
-
detect(buffer, detector, options) {
|
|
1489
|
+
async detect(buffer, detector, options) {
|
|
1486
1490
|
if (!this.xlsx) {
|
|
1487
1491
|
throw new Error(
|
|
1488
1492
|
"[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx"
|
|
@@ -1497,7 +1501,7 @@ var init_XlsxProcessor = __esm({
|
|
|
1497
1501
|
for (let sheetIndex = 0; sheetIndex < sheetNames.length; sheetIndex++) {
|
|
1498
1502
|
const sheetName = sheetNames[sheetIndex];
|
|
1499
1503
|
const sheet = workbook.Sheets[sheetName];
|
|
1500
|
-
const sheetResult = this.detectSheet(
|
|
1504
|
+
const sheetResult = await this.detectSheet(
|
|
1501
1505
|
sheet,
|
|
1502
1506
|
sheetName,
|
|
1503
1507
|
sheetIndex,
|
|
@@ -1540,7 +1544,7 @@ var init_XlsxProcessor = __esm({
|
|
|
1540
1544
|
/**
|
|
1541
1545
|
* Detect PII in a single sheet
|
|
1542
1546
|
*/
|
|
1543
|
-
detectSheet(sheet, sheetName, sheetIndex, detector, options) {
|
|
1547
|
+
async detectSheet(sheet, sheetName, sheetIndex, detector, options) {
|
|
1544
1548
|
const range = this.xlsx.utils.decode_range(sheet["!ref"] || "A1");
|
|
1545
1549
|
const startRow = range.s.r;
|
|
1546
1550
|
const endRow = options.maxRows !== void 0 ? Math.min(range.e.r, startRow + options.maxRows - 1) : range.e.r;
|
|
@@ -1615,7 +1619,7 @@ var init_XlsxProcessor = __esm({
|
|
|
1615
1619
|
columnStats[colIndex].piiCount++;
|
|
1616
1620
|
continue;
|
|
1617
1621
|
}
|
|
1618
|
-
const result = detector.detect(cellValue);
|
|
1622
|
+
const result = await detector.detect(cellValue);
|
|
1619
1623
|
if (result.detections.length > 0) {
|
|
1620
1624
|
const boostedDetections = this.boostConfidenceFromColumnName(
|
|
1621
1625
|
result.detections,
|
|
@@ -2480,10 +2484,12 @@ __export(index_exports, {
|
|
|
2480
2484
|
analyzeFullContext: () => analyzeFullContext,
|
|
2481
2485
|
calculateContextConfidence: () => calculateContextConfidence,
|
|
2482
2486
|
calculateRisk: () => calculateRisk,
|
|
2487
|
+
callAIDetect: () => callAIDetect,
|
|
2483
2488
|
ccpaPreset: () => ccpaPreset,
|
|
2484
2489
|
commonFalsePositives: () => commonFalsePositives,
|
|
2485
2490
|
compileSafeRegex: () => compileSafeRegex,
|
|
2486
2491
|
contactPatterns: () => contactPatterns,
|
|
2492
|
+
convertAIEntityToDetection: () => convertAIEntityToDetection,
|
|
2487
2493
|
createAPIServer: () => createAPIServer,
|
|
2488
2494
|
createBatchProcessor: () => createBatchProcessor,
|
|
2489
2495
|
createCacheDisabledError: () => createCacheDisabledError,
|
|
@@ -2518,12 +2524,16 @@ __export(index_exports, {
|
|
|
2518
2524
|
createXlsxProcessor: () => createXlsxProcessor,
|
|
2519
2525
|
defaultPasses: () => defaultPasses,
|
|
2520
2526
|
detectPII: () => detectPII,
|
|
2527
|
+
detectionsOverlap: () => detectionsOverlap,
|
|
2528
|
+
educationPreset: () => educationPreset,
|
|
2521
2529
|
exportForVersionControl: () => exportForVersionControl,
|
|
2522
2530
|
extractContext: () => extractContext,
|
|
2523
2531
|
filterFalsePositives: () => filterFalsePositives,
|
|
2532
|
+
financePreset: () => financePreset,
|
|
2524
2533
|
financialPatterns: () => financialPatterns,
|
|
2525
2534
|
gdprPreset: () => gdprPreset,
|
|
2526
2535
|
generateReport: () => generateReport,
|
|
2536
|
+
getAIEndpoint: () => getAIEndpoint,
|
|
2527
2537
|
getPatternsByCategory: () => getPatternsByCategory,
|
|
2528
2538
|
getPredefinedRole: () => getPredefinedRole,
|
|
2529
2539
|
getPreset: () => getPreset,
|
|
@@ -2531,21 +2541,26 @@ __export(index_exports, {
|
|
|
2531
2541
|
governmentPatterns: () => governmentPatterns,
|
|
2532
2542
|
groupPatternsByPass: () => groupPatternsByPass,
|
|
2533
2543
|
healthCheckMiddleware: () => healthCheckMiddleware,
|
|
2544
|
+
healthcarePreset: () => healthcarePreset,
|
|
2545
|
+
healthcareResearchPreset: () => healthcareResearchPreset,
|
|
2534
2546
|
hipaaPreset: () => hipaaPreset,
|
|
2535
2547
|
inferDocumentType: () => inferDocumentType,
|
|
2536
2548
|
isFalsePositive: () => isFalsePositive,
|
|
2537
2549
|
isUnsafePattern: () => isUnsafePattern,
|
|
2550
|
+
mergeAIEntities: () => mergeAIEntities,
|
|
2538
2551
|
mergePassDetections: () => mergePassDetections,
|
|
2539
2552
|
networkPatterns: () => networkPatterns,
|
|
2540
2553
|
openredactionMiddleware: () => openredactionMiddleware,
|
|
2541
2554
|
personalPatterns: () => personalPatterns,
|
|
2542
2555
|
safeExec: () => safeExec,
|
|
2543
2556
|
safeExecAll: () => safeExecAll,
|
|
2557
|
+
transportLogisticsPreset: () => transportLogisticsPreset,
|
|
2544
2558
|
useAutoRedact: () => useAutoRedact,
|
|
2545
2559
|
useBatchDetector: () => useBatchDetector,
|
|
2546
2560
|
useFormFieldValidator: () => useFormFieldValidator,
|
|
2547
2561
|
useOpenRedaction: () => useOpenRedaction,
|
|
2548
2562
|
usePIIDetector: () => usePIIDetector,
|
|
2563
|
+
validateAIEntity: () => validateAIEntity,
|
|
2549
2564
|
validateEmail: () => validateEmail,
|
|
2550
2565
|
validateIBAN: () => validateIBAN,
|
|
2551
2566
|
validateLuhn: () => validateLuhn,
|
|
@@ -2763,7 +2778,7 @@ var PersistentAuditLogger = class {
|
|
|
2763
2778
|
enableHashing: options.enableHashing ?? true,
|
|
2764
2779
|
hashAlgorithm: options.hashAlgorithm ?? "sha256",
|
|
2765
2780
|
enableWAL: options.enableWAL ?? true,
|
|
2766
|
-
secretKey: options.secretKey
|
|
2781
|
+
secretKey: options.secretKey ?? void 0
|
|
2767
2782
|
};
|
|
2768
2783
|
this.adapter = this.createAdapter(options.database);
|
|
2769
2784
|
}
|
|
@@ -3109,7 +3124,8 @@ var PersistentAuditLogger = class {
|
|
|
3109
3124
|
* Start automatic cleanup schedule
|
|
3110
3125
|
*/
|
|
3111
3126
|
startCleanupSchedule() {
|
|
3112
|
-
const
|
|
3127
|
+
const cleanupIntervalHours = this.options.retention?.cleanupIntervalHours ?? 24;
|
|
3128
|
+
const intervalMs = cleanupIntervalHours * 60 * 60 * 1e3;
|
|
3113
3129
|
this.cleanupTimer = setInterval(() => {
|
|
3114
3130
|
this.runCleanup().catch((err) => {
|
|
3115
3131
|
console.error("[PersistentAuditLogger] Cleanup failed:", err);
|
|
@@ -4115,7 +4131,7 @@ function validateLuhn(cardNumber, _context) {
|
|
|
4115
4131
|
return sum % 10 === 0;
|
|
4116
4132
|
}
|
|
4117
4133
|
function validateIBAN(iban, _context) {
|
|
4118
|
-
const cleaned = iban.replace(
|
|
4134
|
+
const cleaned = iban.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4119
4135
|
if (!/^[A-Z]{2}[0-9]{2}[A-Z0-9]{1,30}$/.test(cleaned)) {
|
|
4120
4136
|
return false;
|
|
4121
4137
|
}
|
|
@@ -4210,7 +4226,7 @@ function mod97(string) {
|
|
|
4210
4226
|
return remainder;
|
|
4211
4227
|
}
|
|
4212
4228
|
function validateNINO(nino, _context) {
|
|
4213
|
-
const cleaned = nino.replace(
|
|
4229
|
+
const cleaned = nino.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4214
4230
|
if (!/^[A-CEGHJ-PR-TW-Z]{2}[0-9]{6}[A-D]$/.test(cleaned)) {
|
|
4215
4231
|
return false;
|
|
4216
4232
|
}
|
|
@@ -4219,7 +4235,7 @@ function validateNINO(nino, _context) {
|
|
|
4219
4235
|
return !invalidPrefixes.includes(prefix);
|
|
4220
4236
|
}
|
|
4221
4237
|
function validateNHS(nhs, _context) {
|
|
4222
|
-
const cleaned = nhs.replace(/[\s
|
|
4238
|
+
const cleaned = nhs.replace(/[\s\u00A0.-]/g, "");
|
|
4223
4239
|
if (!/^\d{10}$/.test(cleaned)) {
|
|
4224
4240
|
return false;
|
|
4225
4241
|
}
|
|
@@ -4232,11 +4248,11 @@ function validateNHS(nhs, _context) {
|
|
|
4232
4248
|
return expectedCheckDigit === parseInt(cleaned[9], 10) && checkDigit !== 10;
|
|
4233
4249
|
}
|
|
4234
4250
|
function validateUKPassport(passport, _context) {
|
|
4235
|
-
const cleaned = passport.replace(
|
|
4251
|
+
const cleaned = passport.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4236
4252
|
return /^\d{9}$/.test(cleaned) || /^\d{3}\d{6}$/.test(cleaned);
|
|
4237
4253
|
}
|
|
4238
4254
|
function validateSSN(ssn, _context) {
|
|
4239
|
-
const cleaned = ssn.replace(/[\s
|
|
4255
|
+
const cleaned = ssn.replace(/[\s\u00A0.-]/g, "");
|
|
4240
4256
|
if (!/^\d{9}$/.test(cleaned)) {
|
|
4241
4257
|
return false;
|
|
4242
4258
|
}
|
|
@@ -4266,6 +4282,15 @@ function validateSortCode(sortCode, _context) {
|
|
|
4266
4282
|
const cleaned = sortCode.replace(/[\s-]/g, "");
|
|
4267
4283
|
return /^\d{6}$/.test(cleaned);
|
|
4268
4284
|
}
|
|
4285
|
+
function validateRoutingNumber(routingNumber, _context) {
|
|
4286
|
+
const cleaned = routingNumber.replace(/[\s\u00A0.-]/g, "");
|
|
4287
|
+
if (!/^\d{9}$/.test(cleaned)) {
|
|
4288
|
+
return false;
|
|
4289
|
+
}
|
|
4290
|
+
const digits = cleaned.split("").map(Number);
|
|
4291
|
+
const checksum = (3 * (digits[0] + digits[3] + digits[6]) + 7 * (digits[1] + digits[4] + digits[7]) + (digits[2] + digits[5] + digits[8])) % 10;
|
|
4292
|
+
return checksum === 0;
|
|
4293
|
+
}
|
|
4269
4294
|
function validateName(name, context) {
|
|
4270
4295
|
const businessTerms = [
|
|
4271
4296
|
"account",
|
|
@@ -4300,10 +4325,29 @@ function validateName(name, context) {
|
|
|
4300
4325
|
"sir",
|
|
4301
4326
|
"madam",
|
|
4302
4327
|
"lord",
|
|
4303
|
-
"lady"
|
|
4328
|
+
"lady",
|
|
4329
|
+
"personal",
|
|
4330
|
+
"sensitive",
|
|
4331
|
+
"information",
|
|
4332
|
+
"data",
|
|
4333
|
+
"details",
|
|
4334
|
+
"content",
|
|
4335
|
+
"document",
|
|
4336
|
+
"text",
|
|
4337
|
+
"example",
|
|
4338
|
+
"simple",
|
|
4339
|
+
"regular",
|
|
4340
|
+
"plain",
|
|
4341
|
+
"send",
|
|
4342
|
+
"reply",
|
|
4343
|
+
"reach",
|
|
4344
|
+
"write",
|
|
4345
|
+
"use",
|
|
4346
|
+
"contact",
|
|
4347
|
+
"message"
|
|
4304
4348
|
];
|
|
4305
4349
|
const nameLower = name.toLowerCase();
|
|
4306
|
-
if (businessTerms.some((term) => nameLower.includes(term))) {
|
|
4350
|
+
if (businessTerms.some((term) => nameLower === term || nameLower.includes(term))) {
|
|
4307
4351
|
return false;
|
|
4308
4352
|
}
|
|
4309
4353
|
if (name === name.toUpperCase() && name.length <= 5) {
|
|
@@ -4313,7 +4357,7 @@ function validateName(name, context) {
|
|
|
4313
4357
|
return false;
|
|
4314
4358
|
}
|
|
4315
4359
|
const contextLower = context.toLowerCase();
|
|
4316
|
-
if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization")) {
|
|
4360
|
+
if (contextLower.includes("company ") || contextLower.includes("business ") || contextLower.includes("organization") || contextLower.includes("without any") || contextLower.includes("simple text") || contextLower.includes("plain text")) {
|
|
4317
4361
|
return false;
|
|
4318
4362
|
}
|
|
4319
4363
|
return true;
|
|
@@ -4346,11 +4390,13 @@ var personalPatterns = [
|
|
|
4346
4390
|
},
|
|
4347
4391
|
{
|
|
4348
4392
|
type: "NAME",
|
|
4349
|
-
|
|
4393
|
+
// Match names with case variations - handles "John Smith", "john smith", "JOHN SMITH", "Lucy jones", etc.
|
|
4394
|
+
// First word must start with uppercase or be all uppercase; subsequent words can be any case
|
|
4395
|
+
regex: /\b(?:(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Sir|Madam|Lady|Lord|Rev|Father|Sister|Brother)\.?\s+)?((?:[A-Z][a-z'’.\-]+|[A-Z]{2,})(?:\s+(?:[A-Z][a-z'’.\-]+|[A-Z]{2,}|[a-z][a-z'’.\-]+)){1,3})(?:\s+(?:Jr|Sr|II|III|IV|PhD|MD|Esq|DDS|DVM|MBA|CPA)\.?)?\b/g,
|
|
4350
4396
|
priority: 50,
|
|
4351
4397
|
validator: validateName,
|
|
4352
4398
|
placeholder: "[NAME_{n}]",
|
|
4353
|
-
description: "Person name with salutations/suffixes",
|
|
4399
|
+
description: "Person name with salutations/suffixes (handles case variations)",
|
|
4354
4400
|
severity: "high"
|
|
4355
4401
|
},
|
|
4356
4402
|
{
|
|
@@ -4371,11 +4417,26 @@ var personalPatterns = [
|
|
|
4371
4417
|
},
|
|
4372
4418
|
{
|
|
4373
4419
|
type: "DATE_OF_BIRTH",
|
|
4374
|
-
regex: /\b(?:DOB|date of birth|birth ?date)[:\s]*(
|
|
4420
|
+
regex: /\b(?:DOB|date of birth|birth ?date)[:\s-]*((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
|
|
4375
4421
|
priority: 95,
|
|
4376
4422
|
placeholder: "[DOB_{n}]",
|
|
4377
4423
|
description: "Date of birth",
|
|
4378
4424
|
severity: "high"
|
|
4425
|
+
},
|
|
4426
|
+
{
|
|
4427
|
+
type: "DATE",
|
|
4428
|
+
regex: /\b((?:\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4})|(?:\d{1,2}\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{2,4}))\b/gi,
|
|
4429
|
+
priority: 60,
|
|
4430
|
+
placeholder: "[DATE_{n}]",
|
|
4431
|
+
description: "Date (standalone, without DOB context)",
|
|
4432
|
+
severity: "medium",
|
|
4433
|
+
validator: (value, context) => {
|
|
4434
|
+
const yearPattern = /^(19|20)\d{2}$/;
|
|
4435
|
+
if (yearPattern.test(value.replace(/[\/\-.\s]/g, ""))) return false;
|
|
4436
|
+
const versionContext = /\b(version|v\d+|release|build|update)\s*[:\s]*/i;
|
|
4437
|
+
if (versionContext.test(context)) return false;
|
|
4438
|
+
return true;
|
|
4439
|
+
}
|
|
4379
4440
|
}
|
|
4380
4441
|
];
|
|
4381
4442
|
|
|
@@ -4383,7 +4444,7 @@ var personalPatterns = [
|
|
|
4383
4444
|
var financialPatterns = [
|
|
4384
4445
|
{
|
|
4385
4446
|
type: "CREDIT_CARD",
|
|
4386
|
-
regex:
|
|
4447
|
+
regex: /(?<!\d)(?:(?:\d{4}[\s\u00A0.-]?){3}\d{4}|\d{4}[\s\u00A0.-]?\d{6}[\s\u00A0.-]?\d{5})(?!\d)/g,
|
|
4387
4448
|
priority: 100,
|
|
4388
4449
|
validator: (match) => validateLuhn(match),
|
|
4389
4450
|
placeholder: "[CREDIT_CARD_{n}]",
|
|
@@ -4392,7 +4453,7 @@ var financialPatterns = [
|
|
|
4392
4453
|
},
|
|
4393
4454
|
{
|
|
4394
4455
|
type: "IBAN",
|
|
4395
|
-
regex: /\b[A-Z]{2}\d{2}[A-Z0-9]{
|
|
4456
|
+
regex: /\b([A-Z]{2}\d{2}(?:[ \u00A0.-]?[A-Z0-9]){11,30})\b/gi,
|
|
4396
4457
|
priority: 95,
|
|
4397
4458
|
validator: (match) => validateIBAN(match),
|
|
4398
4459
|
placeholder: "[IBAN_{n}]",
|
|
@@ -4401,7 +4462,7 @@ var financialPatterns = [
|
|
|
4401
4462
|
},
|
|
4402
4463
|
{
|
|
4403
4464
|
type: "BANK_ACCOUNT_UK",
|
|
4404
|
-
regex: /\b(?:account|acc)[:\s
|
|
4465
|
+
regex: /\b(?:account|acc|a\/c)[:\s#-]*((?:\d{4}[\s-]?\d{4})|(?:\d{2}[\s-]?\d{2}[\s-]?\d{4}))\b/gi,
|
|
4405
4466
|
priority: 90,
|
|
4406
4467
|
placeholder: "[BANK_ACCOUNT_{n}]",
|
|
4407
4468
|
description: "UK bank account number",
|
|
@@ -4409,7 +4470,7 @@ var financialPatterns = [
|
|
|
4409
4470
|
},
|
|
4410
4471
|
{
|
|
4411
4472
|
type: "SORT_CODE_UK",
|
|
4412
|
-
regex: /\b(?:sort[
|
|
4473
|
+
regex: /\b(?:sort[\s-]*code|SC)[:\s.-]*((?:\d{2}[\s.-]?){2}\d{2})\b/gi,
|
|
4413
4474
|
priority: 90,
|
|
4414
4475
|
validator: (match) => validateSortCode(match),
|
|
4415
4476
|
placeholder: "[SORT_CODE_{n}]",
|
|
@@ -4418,8 +4479,9 @@ var financialPatterns = [
|
|
|
4418
4479
|
},
|
|
4419
4480
|
{
|
|
4420
4481
|
type: "ROUTING_NUMBER_US",
|
|
4421
|
-
regex: /\b(?:routing|RTN|ABA)[
|
|
4482
|
+
regex: /\b(?:routing|RTN|ABA)[-\s\u00A0]*(?:number|no|num)?[-\s\u00A0.:#]*((?:\d[\s\u00A0.-]?){9})\b/gi,
|
|
4422
4483
|
priority: 90,
|
|
4484
|
+
validator: (match) => validateRoutingNumber(match),
|
|
4423
4485
|
placeholder: "[ROUTING_NUMBER_{n}]",
|
|
4424
4486
|
description: "US routing number",
|
|
4425
4487
|
severity: "high"
|
|
@@ -4434,11 +4496,15 @@ var financialPatterns = [
|
|
|
4434
4496
|
},
|
|
4435
4497
|
{
|
|
4436
4498
|
type: "IFSC",
|
|
4437
|
-
regex: /\b[A-Z]{4}0[A-Z0-9]{6}\b/
|
|
4499
|
+
regex: /\b([A-Z]{4})[-\s\u00A0.]?0[-\s\u00A0.]?([A-Z0-9]{6})\b/gi,
|
|
4438
4500
|
priority: 90,
|
|
4439
4501
|
placeholder: "[IFSC_{n}]",
|
|
4440
4502
|
description: "Indian Financial System Code",
|
|
4441
|
-
severity: "high"
|
|
4503
|
+
severity: "high",
|
|
4504
|
+
validator: (value) => {
|
|
4505
|
+
const cleaned = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4506
|
+
return /^[A-Z]{4}0[A-Z0-9]{6}$/.test(cleaned);
|
|
4507
|
+
}
|
|
4442
4508
|
},
|
|
4443
4509
|
{
|
|
4444
4510
|
type: "CLABE",
|
|
@@ -4592,7 +4658,7 @@ var financialPatterns = [
|
|
|
4592
4658
|
var governmentPatterns = [
|
|
4593
4659
|
{
|
|
4594
4660
|
type: "SSN",
|
|
4595
|
-
regex: /\b(?:SSN|social
|
|
4661
|
+
regex: /\b(?:SSN|social\s+security)\b[:\s#-]*([0-9]{3}[\s\u00A0.-]?[0-9]{2}[\s\u00A0.-]?[0-9]{4})\b/gi,
|
|
4596
4662
|
priority: 100,
|
|
4597
4663
|
validator: (match) => validateSSN(match),
|
|
4598
4664
|
placeholder: "[SSN_{n}]",
|
|
@@ -4601,7 +4667,7 @@ var governmentPatterns = [
|
|
|
4601
4667
|
},
|
|
4602
4668
|
{
|
|
4603
4669
|
type: "PASSPORT_UK",
|
|
4604
|
-
regex: /\b(?:passport|pass)[:\s
|
|
4670
|
+
regex: /\b(?:passport|pass)[:\s#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{3})\b/gi,
|
|
4605
4671
|
priority: 95,
|
|
4606
4672
|
validator: (match) => validateUKPassport(match),
|
|
4607
4673
|
placeholder: "[PASSPORT_{n}]",
|
|
@@ -4610,7 +4676,7 @@ var governmentPatterns = [
|
|
|
4610
4676
|
},
|
|
4611
4677
|
{
|
|
4612
4678
|
type: "PASSPORT_US",
|
|
4613
|
-
regex: /\b(?:passport|pass)[:\s
|
|
4679
|
+
regex: /\b(?:passport|pass)[:\s#-]*(([A-Z0-9][\s\u00A0.-]?){5,8}[A-Z0-9])\b/gi,
|
|
4614
4680
|
priority: 95,
|
|
4615
4681
|
placeholder: "[PASSPORT_{n}]",
|
|
4616
4682
|
description: "US Passport number",
|
|
@@ -4618,7 +4684,7 @@ var governmentPatterns = [
|
|
|
4618
4684
|
},
|
|
4619
4685
|
{
|
|
4620
4686
|
type: "NATIONAL_INSURANCE_UK",
|
|
4621
|
-
regex: /\b(?:NI|NINO|national
|
|
4687
|
+
regex: /\b(?:NI\b|NINO|national\s+insurance)[:\s#-]*([A-CEGHJ-PR-TW-Z]{2}(?:[\s\u00A0.-]?\d{2}){3}[\s\u00A0.-]?[A-D])\b/gi,
|
|
4622
4688
|
priority: 100,
|
|
4623
4689
|
validator: (match) => validateNINO(match),
|
|
4624
4690
|
placeholder: "[NINO_{n}]",
|
|
@@ -4627,7 +4693,7 @@ var governmentPatterns = [
|
|
|
4627
4693
|
},
|
|
4628
4694
|
{
|
|
4629
4695
|
type: "NHS_NUMBER",
|
|
4630
|
-
regex: /\b(?:NHS|nhs number)[:\s
|
|
4696
|
+
regex: /\b(?:NHS|nhs number)[:\s#-]*((?:\d{3}[\s\u00A0.-]?){2}\d{4})\b/gi,
|
|
4631
4697
|
priority: 95,
|
|
4632
4698
|
validator: (match) => validateNHS(match),
|
|
4633
4699
|
placeholder: "[NHS_{n}]",
|
|
@@ -4636,15 +4702,27 @@ var governmentPatterns = [
|
|
|
4636
4702
|
},
|
|
4637
4703
|
{
|
|
4638
4704
|
type: "DRIVING_LICENSE_UK",
|
|
4639
|
-
regex: /\b([A-Z]{5}\d{
|
|
4705
|
+
regex: /\b(?:DL|DRIVING|DRIVER(?:'S)?|LICEN[SC]E)?[\s#:-]*(?:NO|NUM(?:BER)?|ID)?[\s#:-]*([A-Z]{5}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?\d{2}[\s\u00A0.-]?[A-Z]{2}[\s\u00A0.-]?\d[\s\u00A0.-]?[A-Z]{2})\b/gi,
|
|
4640
4706
|
priority: 90,
|
|
4641
4707
|
placeholder: "[DRIVING_LICENSE_{n}]",
|
|
4642
4708
|
description: "UK Driving License",
|
|
4643
|
-
severity: "high"
|
|
4709
|
+
severity: "high",
|
|
4710
|
+
validator: (value) => {
|
|
4711
|
+
const normalized = value.replace(/[\s\u00A0.-]/g, "").toUpperCase();
|
|
4712
|
+
if (!/^[A-Z]{5}\d{6}[A-Z]{2}\d[A-Z]{2}$/.test(normalized)) {
|
|
4713
|
+
return false;
|
|
4714
|
+
}
|
|
4715
|
+
const dob = normalized.slice(5, 11);
|
|
4716
|
+
const month = parseInt(dob.slice(2, 4), 10);
|
|
4717
|
+
const day = parseInt(dob.slice(4, 6), 10);
|
|
4718
|
+
const validMonth = month >= 1 && month <= 12 || month >= 51 && month <= 62;
|
|
4719
|
+
const validDay = day >= 1 && day <= 31;
|
|
4720
|
+
return validMonth && validDay;
|
|
4721
|
+
}
|
|
4644
4722
|
},
|
|
4645
4723
|
{
|
|
4646
4724
|
type: "DRIVING_LICENSE_US",
|
|
4647
|
-
regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s
|
|
4725
|
+
regex: /\b(?:DL|driver(?:'s)?\slicense)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){3,18}[A-Z0-9])\b/gi,
|
|
4648
4726
|
priority: 90,
|
|
4649
4727
|
placeholder: "[DRIVING_LICENSE_{n}]",
|
|
4650
4728
|
description: "US Driving License",
|
|
@@ -4652,7 +4730,7 @@ var governmentPatterns = [
|
|
|
4652
4730
|
},
|
|
4653
4731
|
{
|
|
4654
4732
|
type: "TAX_ID",
|
|
4655
|
-
regex: /\b(?:TIN|tax id|EIN)[:\s
|
|
4733
|
+
regex: /\b(?:TIN|tax id|EIN)[:\s#-]*(\d{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
|
|
4656
4734
|
priority: 95,
|
|
4657
4735
|
placeholder: "[TAX_ID_{n}]",
|
|
4658
4736
|
description: "Tax identification number",
|
|
@@ -4660,7 +4738,7 @@ var governmentPatterns = [
|
|
|
4660
4738
|
},
|
|
4661
4739
|
{
|
|
4662
4740
|
type: "PASSPORT_MRZ_TD3",
|
|
4663
|
-
regex: /P<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
4741
|
+
regex: /P<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
4664
4742
|
priority: 98,
|
|
4665
4743
|
placeholder: "[PASSPORT_MRZ_{n}]",
|
|
4666
4744
|
description: "Passport Machine Readable Zone (TD3 - 2 lines x 44 chars)",
|
|
@@ -4668,7 +4746,7 @@ var governmentPatterns = [
|
|
|
4668
4746
|
},
|
|
4669
4747
|
{
|
|
4670
4748
|
type: "PASSPORT_MRZ_TD1",
|
|
4671
|
-
regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\n[A-Z<]{30}/g,
|
|
4749
|
+
regex: /[A-Z]{1}[A-Z<][A-Z]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}\r?\n[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z]{3}[A-Z0-9<]{11}[0-9]\r?\n[A-Z<]{30}/g,
|
|
4672
4750
|
priority: 98,
|
|
4673
4751
|
placeholder: "[ID_MRZ_{n}]",
|
|
4674
4752
|
description: "ID Card Machine Readable Zone (TD1 - 3 lines x 30 chars)",
|
|
@@ -4676,7 +4754,7 @@ var governmentPatterns = [
|
|
|
4676
4754
|
},
|
|
4677
4755
|
{
|
|
4678
4756
|
type: "VISA_MRZ",
|
|
4679
|
-
regex: /V<[A-Z]{3}[A-Z<]{39}\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
4757
|
+
regex: /V<[A-Z]{3}[A-Z<]{39}\r?\n[A-Z0-9<]{9}[0-9][A-Z]{3}[0-9]{6}[0-9][MF<][0-9]{6}[0-9][A-Z0-9<]{14}[0-9]/g,
|
|
4680
4758
|
priority: 98,
|
|
4681
4759
|
placeholder: "[VISA_MRZ_{n}]",
|
|
4682
4760
|
description: "Visa Machine Readable Zone",
|
|
@@ -4684,7 +4762,7 @@ var governmentPatterns = [
|
|
|
4684
4762
|
},
|
|
4685
4763
|
{
|
|
4686
4764
|
type: "TRAVEL_DOCUMENT_NUMBER",
|
|
4687
|
-
regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s
|
|
4765
|
+
regex: /\b(?:TRAVEL\s+DOC(?:UMENT)?|TD)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){4,13}[A-Z0-9])\b/gi,
|
|
4688
4766
|
priority: 92,
|
|
4689
4767
|
placeholder: "[TRAVEL_DOC_{n}]",
|
|
4690
4768
|
description: "Travel document numbers",
|
|
@@ -4695,7 +4773,7 @@ var governmentPatterns = [
|
|
|
4695
4773
|
},
|
|
4696
4774
|
{
|
|
4697
4775
|
type: "VISA_NUMBER",
|
|
4698
|
-
regex: /\b(?:VISA)[:\s
|
|
4776
|
+
regex: /\b(?:VISA)[:\s#-]*([A-Z0-9](?:[A-Z0-9][\s\u00A0.-]?){6,10}[A-Z0-9])\b/gi,
|
|
4699
4777
|
priority: 92,
|
|
4700
4778
|
placeholder: "[VISA_{n}]",
|
|
4701
4779
|
description: "Visa numbers",
|
|
@@ -4706,7 +4784,7 @@ var governmentPatterns = [
|
|
|
4706
4784
|
},
|
|
4707
4785
|
{
|
|
4708
4786
|
type: "IMMIGRATION_NUMBER",
|
|
4709
|
-
regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s
|
|
4787
|
+
regex: /\b(?:IMMIGRATION|ALIEN|A-NUMBER|A#)[:\s#-]*([A-Z]?(?:\d[\s\u00A0.-]?){7,9})\b/gi,
|
|
4710
4788
|
priority: 92,
|
|
4711
4789
|
placeholder: "[IMMIGRATION_{n}]",
|
|
4712
4790
|
description: "Immigration and alien registration numbers",
|
|
@@ -4714,7 +4792,7 @@ var governmentPatterns = [
|
|
|
4714
4792
|
},
|
|
4715
4793
|
{
|
|
4716
4794
|
type: "BORDER_CROSSING_CARD",
|
|
4717
|
-
regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s
|
|
4795
|
+
regex: /\b(?:BCC|BORDER\s+CROSSING)[:\s#-]*([A-Z0-9](?:[A-Z0-9\s\u00A0.-]?){8,13}[A-Z0-9])\b/gi,
|
|
4718
4796
|
priority: 90,
|
|
4719
4797
|
placeholder: "[BCC_{n}]",
|
|
4720
4798
|
description: "Border crossing card numbers",
|
|
@@ -4725,7 +4803,7 @@ var governmentPatterns = [
|
|
|
4725
4803
|
},
|
|
4726
4804
|
{
|
|
4727
4805
|
type: "UTR_UK",
|
|
4728
|
-
regex: /\b(?:UTR|unique taxpayer reference)[:\s
|
|
4806
|
+
regex: /\b(?:UTR|unique taxpayer reference)[:\s#-]*((?:\d[\s\u00A0.-]?){10})\b/gi,
|
|
4729
4807
|
priority: 95,
|
|
4730
4808
|
validator: (match) => {
|
|
4731
4809
|
const digits = match.replace(/\D/g, "");
|
|
@@ -4737,10 +4815,10 @@ var governmentPatterns = [
|
|
|
4737
4815
|
},
|
|
4738
4816
|
{
|
|
4739
4817
|
type: "VAT_NUMBER",
|
|
4740
|
-
regex: /\b(?:VAT|vat number)[:\s
|
|
4818
|
+
regex: /\b(?:VAT|vat number)[:\s#-]*([A-Z]{2}(?:[\s\u00A0.-]?[A-Z0-9]){7,12})\b/gi,
|
|
4741
4819
|
priority: 90,
|
|
4742
4820
|
validator: (match) => {
|
|
4743
|
-
const cleaned = match.replace(
|
|
4821
|
+
const cleaned = match.replace(/[\s\u00A0.-]/g, "");
|
|
4744
4822
|
const countryCode = cleaned.substring(0, 2).toUpperCase();
|
|
4745
4823
|
const validCountries = ["GB", "DE", "FR", "IT", "ES", "NL", "BE", "AT", "PL", "SE", "DK", "FI", "IE", "PT", "CZ", "HU", "RO", "BG", "GR", "HR", "SK", "SI", "LT", "LV", "EE", "CY", "LU", "MT"];
|
|
4746
4824
|
if (!validCountries.includes(countryCode)) {
|
|
@@ -4819,7 +4897,7 @@ var governmentPatterns = [
|
|
|
4819
4897
|
var contactPatterns = [
|
|
4820
4898
|
{
|
|
4821
4899
|
type: "PHONE_UK_MOBILE",
|
|
4822
|
-
regex: /\
|
|
4900
|
+
regex: /\b(?:\+?44[\s.-]?7\d{3}|0?7\d{3})[\s.-]?\d{3}[\s.-]?\d{3}\b/g,
|
|
4823
4901
|
priority: 90,
|
|
4824
4902
|
placeholder: "[PHONE_UK_MOBILE_{n}]",
|
|
4825
4903
|
description: "UK mobile phone",
|
|
@@ -4827,7 +4905,7 @@ var contactPatterns = [
|
|
|
4827
4905
|
},
|
|
4828
4906
|
{
|
|
4829
4907
|
type: "PHONE_UK",
|
|
4830
|
-
regex: /\b(?:0[1-9]\d{1,
|
|
4908
|
+
regex: /\b(?:\+?44[\s.-]?(?:0)?\s*)?(?:\(?0?[1-9]\d{1,3}\)?[\s.-]?\d{3,4}[\s.-]?\d{3,4})(?:\s?(?:ext\.?|x)\s?\d{1,5})?\b/g,
|
|
4831
4909
|
priority: 85,
|
|
4832
4910
|
placeholder: "[PHONE_UK_{n}]",
|
|
4833
4911
|
description: "UK phone number",
|
|
@@ -4835,15 +4913,23 @@ var contactPatterns = [
|
|
|
4835
4913
|
},
|
|
4836
4914
|
{
|
|
4837
4915
|
type: "PHONE_US",
|
|
4838
|
-
regex:
|
|
4916
|
+
regex: /\b(?:\+1[\s.-]?)?(?:\(\d{3}\)|\d{3})[\s.-]?\d{3}[\s.-]?\d{4}(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
|
|
4839
4917
|
priority: 85,
|
|
4840
4918
|
placeholder: "[PHONE_US_{n}]",
|
|
4841
4919
|
description: "US phone number",
|
|
4842
|
-
severity: "medium"
|
|
4920
|
+
severity: "medium",
|
|
4921
|
+
validator: (value, context) => {
|
|
4922
|
+
const versionContext = /\b(version|v\d+|release|build)\s*[:\s]*/i;
|
|
4923
|
+
if (versionContext.test(context)) return false;
|
|
4924
|
+
const cleaned = value.replace(/[\s()-]/g, "");
|
|
4925
|
+
const datePattern = /^\d{2}[-/]\d{2}[-/]\d{4}$/;
|
|
4926
|
+
if (datePattern.test(cleaned)) return false;
|
|
4927
|
+
return true;
|
|
4928
|
+
}
|
|
4843
4929
|
},
|
|
4844
4930
|
{
|
|
4845
4931
|
type: "PHONE_INTERNATIONAL",
|
|
4846
|
-
regex: /\b
|
|
4932
|
+
regex: /\b\+(?:\d[\s.\-()]?){6,14}\d(?:\s?(?:ext\.?|x)\s?\d{1,6})?\b/g,
|
|
4847
4933
|
priority: 80,
|
|
4848
4934
|
placeholder: "[PHONE_{n}]",
|
|
4849
4935
|
description: "International phone number",
|
|
@@ -4867,7 +4953,7 @@ var contactPatterns = [
|
|
|
4867
4953
|
},
|
|
4868
4954
|
{
|
|
4869
4955
|
type: "ADDRESS_STREET",
|
|
4870
|
-
regex: /\b
|
|
4956
|
+
regex: /\b\d{1,5}\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*(?:\s+[A-Za-z0-9][A-Za-z0-9'’.\-]*){0,4}\s+(?:Street|St\.?|Road|Rd\.?|Avenue|Ave\.?|Lane|Ln\.?|Drive|Dr\.?|Court|Ct\.?|Boulevard|Blvd\.?|Way|Terrace|Ter\.?|Place|Pl\.?|Trail|Trl\.?|Parkway|Pkwy\.?|Highway|Hwy\.)(?:\s+(?:Apt|Unit|Suite|Ste)\s*\d+)?\b/gi,
|
|
4871
4957
|
priority: 70,
|
|
4872
4958
|
placeholder: "[ADDRESS_{n}]",
|
|
4873
4959
|
description: "Street address",
|
|
@@ -5218,15 +5304,20 @@ var BIOBANK_SAMPLE_ID = {
|
|
|
5218
5304
|
};
|
|
5219
5305
|
var PROVIDER_LICENSE = {
|
|
5220
5306
|
type: "PROVIDER_LICENSE",
|
|
5221
|
-
regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s]
|
|
5307
|
+
regex: /\b(?:MEDICAL|PHYSICIAN|DOCTOR|NURSE|PROVIDER)[-\s\u00A0]*(?:LICENSE|LICENCE|LIC)[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*((?:[A-Z0-9]{2,6}[\s\u00A0./-]?){1,3}[A-Z0-9]{2,6})\b/gi,
|
|
5222
5308
|
placeholder: "[PROVIDER_LIC_{n}]",
|
|
5223
5309
|
priority: 80,
|
|
5224
5310
|
severity: "high",
|
|
5225
|
-
description: "Healthcare provider license numbers"
|
|
5311
|
+
description: "Healthcare provider license numbers",
|
|
5312
|
+
validator: (value) => {
|
|
5313
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "");
|
|
5314
|
+
if (normalized.length < 6 || normalized.length > 18) return false;
|
|
5315
|
+
return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
|
|
5316
|
+
}
|
|
5226
5317
|
};
|
|
5227
5318
|
var NPI_NUMBER = {
|
|
5228
5319
|
type: "NPI_NUMBER",
|
|
5229
|
-
regex: /\b(?:NPI[-\s]
|
|
5320
|
+
regex: /\b(?:NPI[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?((?:\d[\s\u00A0.-]?){10})\b/g,
|
|
5230
5321
|
placeholder: "[NPI_{n}]",
|
|
5231
5322
|
priority: 85,
|
|
5232
5323
|
severity: "high",
|
|
@@ -5235,7 +5326,8 @@ var NPI_NUMBER = {
|
|
|
5235
5326
|
if (!/provider|npi|physician|doctor|clinic|hospital|practice/i.test(context)) {
|
|
5236
5327
|
return false;
|
|
5237
5328
|
}
|
|
5238
|
-
const digits = value.split("").map(Number);
|
|
5329
|
+
const digits = value.replace(/\D/g, "").split("").map(Number);
|
|
5330
|
+
if (digits.length !== 10) return false;
|
|
5239
5331
|
let sum = 0;
|
|
5240
5332
|
for (let i = digits.length - 2; i >= 0; i--) {
|
|
5241
5333
|
let digit = digits[i];
|
|
@@ -5251,17 +5343,19 @@ var NPI_NUMBER = {
|
|
|
5251
5343
|
};
|
|
5252
5344
|
var DEA_NUMBER = {
|
|
5253
5345
|
type: "DEA_NUMBER",
|
|
5254
|
-
regex: /\b(?:DEA[-\s]
|
|
5346
|
+
regex: /\b(?:DEA[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*)?([A-Z]{2}(?:[\s\u00A0.-]?\d){7})\b/gi,
|
|
5255
5347
|
placeholder: "[DEA_{n}]",
|
|
5256
5348
|
priority: 90,
|
|
5257
5349
|
severity: "high",
|
|
5258
5350
|
description: "DEA registration number for controlled substances",
|
|
5259
5351
|
validator: (value, _context) => {
|
|
5352
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "").toUpperCase();
|
|
5353
|
+
if (normalized.length !== 9) return false;
|
|
5260
5354
|
const validFirstLetters = ["A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "P", "R", "S", "T", "U"];
|
|
5261
|
-
if (!validFirstLetters.includes(
|
|
5355
|
+
if (!validFirstLetters.includes(normalized[0])) {
|
|
5262
5356
|
return false;
|
|
5263
5357
|
}
|
|
5264
|
-
const digits =
|
|
5358
|
+
const digits = normalized.substring(2).split("").map(Number);
|
|
5265
5359
|
const sum1 = digits[0] + digits[2] + digits[4];
|
|
5266
5360
|
const sum2 = (digits[1] + digits[3] + digits[5]) * 2;
|
|
5267
5361
|
const checkDigit = (sum1 + sum2) % 10;
|
|
@@ -5286,11 +5380,16 @@ var EMERGENCY_CONTACT_MARKER = {
|
|
|
5286
5380
|
};
|
|
5287
5381
|
var BIOMETRIC_ID = {
|
|
5288
5382
|
type: "BIOMETRIC_ID",
|
|
5289
|
-
regex: /\b(?:FINGERPRINT|RETINAL?[-\s]?SCAN|IRIS[-\s]?SCAN|VOICE[-\s]?PRINT|FACIAL[-\s]?RECOGNITION|BIOMETRIC)[-\s]?(?:ID|DATA|TEMPLATE|HASH)?[-\s]
|
|
5383
|
+
regex: /\b(?:FINGERPRINT|RETINAL?[-\s\u00A0]?SCAN|IRIS[-\s\u00A0]?SCAN|VOICE[-\s\u00A0]?PRINT|FACIAL[-\s\u00A0]?RECOGNITION|BIOMETRIC)[-\s\u00A0]?(?:ID|DATA|TEMPLATE|HASH)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9._-]{7,39})\b/gi,
|
|
5290
5384
|
placeholder: "[BIOMETRIC_{n}]",
|
|
5291
5385
|
priority: 95,
|
|
5292
5386
|
severity: "high",
|
|
5293
|
-
description: "Biometric identifier references"
|
|
5387
|
+
description: "Biometric identifier references",
|
|
5388
|
+
validator: (value) => {
|
|
5389
|
+
const normalized = value.replace(/[^A-Za-z0-9]/g, "");
|
|
5390
|
+
if (normalized.length < 8 || normalized.length > 40) return false;
|
|
5391
|
+
return /[A-Z]/i.test(normalized) && /\d/.test(normalized);
|
|
5392
|
+
}
|
|
5294
5393
|
};
|
|
5295
5394
|
var DNA_SEQUENCE = {
|
|
5296
5395
|
type: "DNA_SEQUENCE",
|
|
@@ -5319,7 +5418,7 @@ var DRUG_DOSAGE = {
|
|
|
5319
5418
|
};
|
|
5320
5419
|
var MEDICAL_IMAGE_REF = {
|
|
5321
5420
|
type: "MEDICAL_IMAGE_REF",
|
|
5322
|
-
regex: /\b(?:X[-\s]?RAY|MRI|CT[-\s]?SCAN|PET[-\s]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s]?(?:IMAGE|FILE|ID)?[-\s]
|
|
5421
|
+
regex: /\b(?:X[-\s\u00A0]?RAY|MRI|CT[-\s\u00A0]?SCAN|PET[-\s\u00A0]?SCAN|ULTRASOUND|MAMMOGRAM)[-\s\u00A0]?(?:IMAGE|FILE|ID)?[-\s\u00A0.:#]*([A-Z0-9][A-Z0-9_.-]{5,23})\b/gi,
|
|
5323
5422
|
placeholder: "[IMAGE_{n}]",
|
|
5324
5423
|
priority: 80,
|
|
5325
5424
|
severity: "high",
|
|
@@ -5482,11 +5581,18 @@ var TRANSACTION_ID = {
|
|
|
5482
5581
|
};
|
|
5483
5582
|
var INVESTMENT_ACCOUNT = {
|
|
5484
5583
|
type: "INVESTMENT_ACCOUNT",
|
|
5485
|
-
regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s]
|
|
5584
|
+
regex: /\b(?:ISA|SIPP|INV(?:ESTMENT)?|PENSION|401K|IRA)[-\s\u00A0]*(?:ACCOUNT|ACCT|A\/C)?[-\s\u00A0]*(?:NO|NUM(?:BER)?)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,18}[A-Z0-9])\b/gi,
|
|
5486
5585
|
placeholder: "[INV_ACCT_{n}]",
|
|
5487
5586
|
priority: 85,
|
|
5488
5587
|
severity: "high",
|
|
5489
|
-
description: "Investment and pension account numbers"
|
|
5588
|
+
description: "Investment and pension account numbers",
|
|
5589
|
+
validator: (value, context) => {
|
|
5590
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
5591
|
+
const hasDigits = /\d{4,}/.test(normalized);
|
|
5592
|
+
const validLength = normalized.length >= 6 && normalized.length <= 15;
|
|
5593
|
+
const inContext = /isa|sipp|invest|pension|401k|ira|account|fund/i.test(context);
|
|
5594
|
+
return hasDigits && validLength && inContext;
|
|
5595
|
+
}
|
|
5490
5596
|
};
|
|
5491
5597
|
var WIRE_TRANSFER_REF = {
|
|
5492
5598
|
type: "WIRE_TRANSFER_REF",
|
|
@@ -6620,13 +6726,17 @@ var RESUME_ID = {
|
|
|
6620
6726
|
};
|
|
6621
6727
|
var BENEFITS_PLAN_NUMBER = {
|
|
6622
6728
|
type: "BENEFITS_PLAN_NUMBER",
|
|
6623
|
-
regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s]?PLAN)[-\s]
|
|
6729
|
+
regex: /\b(?:BENEFITS?|INSURANCE|HEALTH[-\s\u00A0]?PLAN)[-\s\u00A0]*(?:PLAN)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
|
|
6624
6730
|
placeholder: "[BENEFITS_{n}]",
|
|
6625
6731
|
priority: 85,
|
|
6626
6732
|
severity: "high",
|
|
6627
6733
|
description: "Employee benefits and insurance plan numbers",
|
|
6628
|
-
validator: (
|
|
6629
|
-
|
|
6734
|
+
validator: (value, context) => {
|
|
6735
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
6736
|
+
const hasDigits = /\d{4,}/.test(normalized);
|
|
6737
|
+
const validLength = normalized.length >= 6 && normalized.length <= 14;
|
|
6738
|
+
const inContext = /benefit|insurance|health|dental|vision|plan|policy|enrollment/i.test(context);
|
|
6739
|
+
return hasDigits && validLength && inContext;
|
|
6630
6740
|
}
|
|
6631
6741
|
};
|
|
6632
6742
|
var RETIREMENT_ACCOUNT = {
|
|
@@ -6724,13 +6834,16 @@ var EXIT_INTERVIEW_ID = {
|
|
|
6724
6834
|
};
|
|
6725
6835
|
var DISCIPLINARY_ACTION_ID = {
|
|
6726
6836
|
type: "DISCIPLINARY_ACTION_ID",
|
|
6727
|
-
regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s]
|
|
6837
|
+
regex: /\b(?:DISCIPLINARY|INCIDENT|WARNING|VIOLATION)[-\s\u00A0]*(?:ACTION)?[-\s\u00A0]*(?:NO|NUM(?:BER)?|ID)?[-\s\u00A0.:#]*([A-Z0-9](?:[A-Z0-9][\s\u00A0./-]?){5,15}[A-Z0-9])\b/gi,
|
|
6728
6838
|
placeholder: "[DISCIPLINE_{n}]",
|
|
6729
6839
|
priority: 85,
|
|
6730
6840
|
severity: "high",
|
|
6731
6841
|
description: "Disciplinary action and incident identifiers",
|
|
6732
|
-
validator: (
|
|
6733
|
-
|
|
6842
|
+
validator: (value, context) => {
|
|
6843
|
+
const normalized = value.replace(/[\s\u00A0./-]/g, "");
|
|
6844
|
+
const hasDigits = /\d{3,}/.test(normalized);
|
|
6845
|
+
const validLength = normalized.length >= 6 && normalized.length <= 12;
|
|
6846
|
+
return hasDigits && validLength && /disciplinary|incident|warning|violation|misconduct|investigation/i.test(context);
|
|
6734
6847
|
}
|
|
6735
6848
|
};
|
|
6736
6849
|
var EMERGENCY_CONTACT_REF = {
|
|
@@ -7058,7 +7171,7 @@ var TELECOMS_ACCOUNT_NUMBER = {
|
|
|
7058
7171
|
type: "TELECOMS_ACCOUNT_NUMBER",
|
|
7059
7172
|
regex: /\bACC(?:OUNT)?[-\s]?(?:NO|NUM(?:BER)?)?[-\s]?[:#]?\s*(\d{8,12})\b/gi,
|
|
7060
7173
|
placeholder: "[ACCOUNT_{n}]",
|
|
7061
|
-
priority:
|
|
7174
|
+
priority: 90,
|
|
7062
7175
|
severity: "high",
|
|
7063
7176
|
description: "Telecommunications customer account numbers",
|
|
7064
7177
|
validator: (_value, context) => {
|
|
@@ -7936,7 +8049,7 @@ var EMERGENCY_CALL_REF = {
|
|
|
7936
8049
|
};
|
|
7937
8050
|
var POLICE_REPORT_NUMBER = {
|
|
7938
8051
|
type: "POLICE_REPORT_NUMBER",
|
|
7939
|
-
regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s]
|
|
8052
|
+
regex: /\b(?:POLICE|PR|RPT|REPORT|CASE)[-\s\u00A0]*(?:NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{5,10})\b/gi,
|
|
7940
8053
|
placeholder: "[POLICE_RPT_{n}]",
|
|
7941
8054
|
priority: 95,
|
|
7942
8055
|
severity: "high",
|
|
@@ -7947,7 +8060,7 @@ var POLICE_REPORT_NUMBER = {
|
|
|
7947
8060
|
};
|
|
7948
8061
|
var FIRE_INCIDENT_NUMBER = {
|
|
7949
8062
|
type: "FIRE_INCIDENT_NUMBER",
|
|
7950
|
-
regex: /\b(?:FIRE|FI|FD)[-\s]
|
|
8063
|
+
regex: /\b(?:FIRE|FI|FD)[-\s\u00A0]*(?:INCIDENT|INC|NO|NUM|NUMBER|ID)?[-\s\u00A0.:#]*((?:[A-Z]{2,4}[\s\u00A0./-]?\d{2,4}[\s\u00A0./-]?\d{4,10})|\d{4}[\s\u00A0./-]?\d{4,8})\b/gi,
|
|
7951
8064
|
placeholder: "[FIRE_INC_{n}]",
|
|
7952
8065
|
priority: 95,
|
|
7953
8066
|
severity: "high",
|
|
@@ -11782,13 +11895,14 @@ var NINTENDO_FRIEND_CODE = {
|
|
|
11782
11895
|
type: "NINTENDO_FRIEND_CODE",
|
|
11783
11896
|
regex: /\bSW[-\s]?(\d{4}[-\s]?\d{4}[-\s]?\d{4})\b/gi,
|
|
11784
11897
|
placeholder: "[NINTENDO_FC_{n}]",
|
|
11785
|
-
priority:
|
|
11898
|
+
priority: 90,
|
|
11786
11899
|
severity: "medium",
|
|
11787
11900
|
description: "Nintendo Switch Friend Code",
|
|
11788
11901
|
validator: (value, context) => {
|
|
11789
11902
|
const digits = value.replace(/\D/g, "");
|
|
11790
11903
|
if (digits.length !== 12) return false;
|
|
11791
|
-
|
|
11904
|
+
const hasContext = /nintendo|switch|friend[- ]?code|gaming/i.test(context);
|
|
11905
|
+
return hasContext;
|
|
11792
11906
|
}
|
|
11793
11907
|
};
|
|
11794
11908
|
var BATTLETAG = {
|
|
@@ -12083,14 +12197,64 @@ var ccpaPreset = {
|
|
|
12083
12197
|
"USERNAME"
|
|
12084
12198
|
]
|
|
12085
12199
|
};
|
|
12200
|
+
var healthcarePreset = {
|
|
12201
|
+
includeNames: true,
|
|
12202
|
+
includeEmails: true,
|
|
12203
|
+
includePhones: true,
|
|
12204
|
+
includeAddresses: true,
|
|
12205
|
+
categories: ["personal", "contact", "healthcare", "insurance", "government"]
|
|
12206
|
+
};
|
|
12207
|
+
var healthcareResearchPreset = {
|
|
12208
|
+
includeNames: true,
|
|
12209
|
+
includeEmails: true,
|
|
12210
|
+
includePhones: true,
|
|
12211
|
+
includeAddresses: true,
|
|
12212
|
+
categories: ["personal", "contact", "healthcare", "insurance", "government"]
|
|
12213
|
+
};
|
|
12214
|
+
var financePreset = {
|
|
12215
|
+
includeNames: true,
|
|
12216
|
+
includeEmails: true,
|
|
12217
|
+
includePhones: true,
|
|
12218
|
+
includeAddresses: true,
|
|
12219
|
+
categories: ["personal", "contact", "financial", "government", "network"]
|
|
12220
|
+
};
|
|
12221
|
+
var educationPreset = {
|
|
12222
|
+
includeNames: true,
|
|
12223
|
+
includeEmails: true,
|
|
12224
|
+
includePhones: true,
|
|
12225
|
+
includeAddresses: true,
|
|
12226
|
+
categories: ["personal", "contact", "education", "government", "network"]
|
|
12227
|
+
};
|
|
12228
|
+
var transportLogisticsPreset = {
|
|
12229
|
+
includeNames: true,
|
|
12230
|
+
includeEmails: true,
|
|
12231
|
+
includePhones: true,
|
|
12232
|
+
includeAddresses: true,
|
|
12233
|
+
categories: ["personal", "contact", "transportation", "logistics", "vehicles", "network"]
|
|
12234
|
+
};
|
|
12086
12235
|
function getPreset(name) {
|
|
12087
|
-
|
|
12236
|
+
const presetName = name.toLowerCase();
|
|
12237
|
+
switch (presetName) {
|
|
12088
12238
|
case "gdpr":
|
|
12089
12239
|
return gdprPreset;
|
|
12090
12240
|
case "hipaa":
|
|
12091
12241
|
return hipaaPreset;
|
|
12092
12242
|
case "ccpa":
|
|
12093
12243
|
return ccpaPreset;
|
|
12244
|
+
case "healthcare":
|
|
12245
|
+
case "healthcare-provider":
|
|
12246
|
+
return healthcarePreset;
|
|
12247
|
+
case "healthcare-research":
|
|
12248
|
+
return healthcareResearchPreset;
|
|
12249
|
+
case "finance":
|
|
12250
|
+
case "financial-services":
|
|
12251
|
+
return financePreset;
|
|
12252
|
+
case "education":
|
|
12253
|
+
return educationPreset;
|
|
12254
|
+
case "transport-logistics":
|
|
12255
|
+
case "transportation":
|
|
12256
|
+
case "logistics":
|
|
12257
|
+
return transportLogisticsPreset;
|
|
12094
12258
|
default:
|
|
12095
12259
|
return {};
|
|
12096
12260
|
}
|
|
@@ -12613,9 +12777,23 @@ var ConfigLoader = class {
|
|
|
12613
12777
|
};
|
|
12614
12778
|
}
|
|
12615
12779
|
if (preset.startsWith("openredaction:")) {
|
|
12616
|
-
const
|
|
12617
|
-
|
|
12618
|
-
|
|
12780
|
+
const presetName = preset.replace("openredaction:", "");
|
|
12781
|
+
const supportedPresets = [
|
|
12782
|
+
"gdpr",
|
|
12783
|
+
"hipaa",
|
|
12784
|
+
"ccpa",
|
|
12785
|
+
"healthcare",
|
|
12786
|
+
"healthcare-provider",
|
|
12787
|
+
"healthcare-research",
|
|
12788
|
+
"finance",
|
|
12789
|
+
"financial-services",
|
|
12790
|
+
"education",
|
|
12791
|
+
"transport-logistics",
|
|
12792
|
+
"transportation",
|
|
12793
|
+
"logistics"
|
|
12794
|
+
];
|
|
12795
|
+
if (supportedPresets.includes(presetName)) {
|
|
12796
|
+
return { preset: presetName };
|
|
12619
12797
|
}
|
|
12620
12798
|
}
|
|
12621
12799
|
return null;
|
|
@@ -12631,7 +12809,8 @@ var ConfigLoader = class {
|
|
|
12631
12809
|
export default {
|
|
12632
12810
|
// Extend built-in presets
|
|
12633
12811
|
// Options: 'openredaction:recommended', 'openredaction:strict', 'openredaction:minimal'
|
|
12634
|
-
// Or compliance: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa'
|
|
12812
|
+
// Or compliance/industry presets: 'openredaction:gdpr', 'openredaction:hipaa', 'openredaction:ccpa',
|
|
12813
|
+
// 'openredaction:finance', 'openredaction:education', 'openredaction:healthcare', 'openredaction:transport-logistics'
|
|
12635
12814
|
extends: ['openredaction:recommended'],
|
|
12636
12815
|
|
|
12637
12816
|
// Detection options
|
|
@@ -14308,9 +14487,8 @@ var ExplainAPI = class {
|
|
|
14308
14487
|
constructor(detector) {
|
|
14309
14488
|
this.detector = detector;
|
|
14310
14489
|
this.patterns = detector.getPatterns();
|
|
14311
|
-
const testResult = detector.detect("Contact: admin@business.co.uk");
|
|
14312
|
-
const hasConfidence = testResult.detections.length > 0 && testResult.detections[0].confidence !== void 0;
|
|
14313
14490
|
const detectorOptions = detector.options;
|
|
14491
|
+
const hasConfidence = detectorOptions?.enableContextAnalysis || false;
|
|
14314
14492
|
this.options = {
|
|
14315
14493
|
enableContextAnalysis: hasConfidence,
|
|
14316
14494
|
confidenceThreshold: detectorOptions?.confidenceThreshold || 0.5,
|
|
@@ -14322,7 +14500,7 @@ var ExplainAPI = class {
|
|
|
14322
14500
|
/**
|
|
14323
14501
|
* Explain why text was or wasn't detected as PII
|
|
14324
14502
|
*/
|
|
14325
|
-
explain(text) {
|
|
14503
|
+
async explain(text) {
|
|
14326
14504
|
const patternResults = [];
|
|
14327
14505
|
const matchedPatterns = [];
|
|
14328
14506
|
const unmatchedPatterns = [];
|
|
@@ -14412,7 +14590,8 @@ var ExplainAPI = class {
|
|
|
14412
14590
|
patternResults.push(result);
|
|
14413
14591
|
matchedPatterns.push(result);
|
|
14414
14592
|
}
|
|
14415
|
-
const
|
|
14593
|
+
const detectionResult = await this.detector.detect(text);
|
|
14594
|
+
const detections = detectionResult.detections;
|
|
14416
14595
|
return {
|
|
14417
14596
|
text,
|
|
14418
14597
|
patternResults,
|
|
@@ -14431,7 +14610,7 @@ var ExplainAPI = class {
|
|
|
14431
14610
|
/**
|
|
14432
14611
|
* Explain a specific detection
|
|
14433
14612
|
*/
|
|
14434
|
-
explainDetection(detection, text) {
|
|
14613
|
+
async explainDetection(detection, text) {
|
|
14435
14614
|
const pattern = this.patterns.find((p) => p.type === detection.type);
|
|
14436
14615
|
const reasoning = [];
|
|
14437
14616
|
reasoning.push(`Detected as ${detection.type}`);
|
|
@@ -14462,13 +14641,15 @@ var ExplainAPI = class {
|
|
|
14462
14641
|
detection,
|
|
14463
14642
|
pattern,
|
|
14464
14643
|
contextAnalysis,
|
|
14465
|
-
reasoning
|
|
14644
|
+
reasoning,
|
|
14645
|
+
suggestions: []
|
|
14646
|
+
// Will be populated if needed
|
|
14466
14647
|
};
|
|
14467
14648
|
}
|
|
14468
14649
|
/**
|
|
14469
14650
|
* Suggest why text wasn't detected
|
|
14470
14651
|
*/
|
|
14471
|
-
suggestWhy(text, expectedType) {
|
|
14652
|
+
async suggestWhy(text, expectedType) {
|
|
14472
14653
|
const suggestions = [];
|
|
14473
14654
|
const similarPatterns = [];
|
|
14474
14655
|
const typePatterns = this.patterns.filter(
|
|
@@ -14486,7 +14667,7 @@ var ExplainAPI = class {
|
|
|
14486
14667
|
similarPatterns.push(pattern);
|
|
14487
14668
|
const value = match[1] !== void 0 ? match[1] : match[0];
|
|
14488
14669
|
suggestions.push(`Pattern "${pattern.type}" matched value: "${value}"`);
|
|
14489
|
-
const explanation = this.explain(text);
|
|
14670
|
+
const explanation = await this.explain(text);
|
|
14490
14671
|
const filtered = explanation.filteredPatterns.find((r) => r.pattern.type === pattern.type);
|
|
14491
14672
|
if (filtered && filtered.reason) {
|
|
14492
14673
|
suggestions.push(`But was filtered: ${filtered.reason}`);
|
|
@@ -14516,9 +14697,9 @@ Example ${expectedType} pattern: ${examplePattern.regex.source.substring(0, 100)
|
|
|
14516
14697
|
/**
|
|
14517
14698
|
* Get debugging information for entire detection process
|
|
14518
14699
|
*/
|
|
14519
|
-
debug(text) {
|
|
14700
|
+
async debug(text) {
|
|
14520
14701
|
const start = performance.now();
|
|
14521
|
-
const explanation = this.explain(text);
|
|
14702
|
+
const explanation = await this.explain(text);
|
|
14522
14703
|
const duration = performance.now() - start;
|
|
14523
14704
|
const enabledFeatures = [];
|
|
14524
14705
|
if (this.options.enableContextAnalysis) {
|
|
@@ -15406,6 +15587,152 @@ function compileSafeRegex(pattern, flags) {
|
|
|
15406
15587
|
return new RegExp(patternStr, finalFlags);
|
|
15407
15588
|
}
|
|
15408
15589
|
|
|
15590
|
+
// src/utils/ai-assist.ts
|
|
15591
|
+
function getAIEndpoint(aiOptions) {
|
|
15592
|
+
if (!aiOptions?.enabled) {
|
|
15593
|
+
return null;
|
|
15594
|
+
}
|
|
15595
|
+
if (aiOptions.endpoint) {
|
|
15596
|
+
return aiOptions.endpoint;
|
|
15597
|
+
}
|
|
15598
|
+
if (typeof process !== "undefined" && process.env) {
|
|
15599
|
+
const envEndpoint = process.env.OPENREDACTION_AI_ENDPOINT;
|
|
15600
|
+
if (envEndpoint) {
|
|
15601
|
+
return envEndpoint;
|
|
15602
|
+
}
|
|
15603
|
+
}
|
|
15604
|
+
return null;
|
|
15605
|
+
}
|
|
15606
|
+
function isFetchAvailable() {
|
|
15607
|
+
return typeof fetch !== "undefined";
|
|
15608
|
+
}
|
|
15609
|
+
async function callAIDetect(text, endpoint, debug) {
|
|
15610
|
+
if (!isFetchAvailable()) {
|
|
15611
|
+
if (debug) {
|
|
15612
|
+
console.warn("[OpenRedaction] AI assist requires fetch API. Not available in this environment.");
|
|
15613
|
+
}
|
|
15614
|
+
return null;
|
|
15615
|
+
}
|
|
15616
|
+
try {
|
|
15617
|
+
const url = endpoint.endsWith("/ai-detect") ? endpoint : `${endpoint}/ai-detect`;
|
|
15618
|
+
if (debug) {
|
|
15619
|
+
console.log(`[OpenRedaction] Calling AI endpoint: ${url}`);
|
|
15620
|
+
}
|
|
15621
|
+
const response = await fetch(url, {
|
|
15622
|
+
method: "POST",
|
|
15623
|
+
headers: {
|
|
15624
|
+
"Content-Type": "application/json"
|
|
15625
|
+
},
|
|
15626
|
+
body: JSON.stringify({ text })
|
|
15627
|
+
});
|
|
15628
|
+
if (!response.ok) {
|
|
15629
|
+
if (debug) {
|
|
15630
|
+
const statusText = response.status === 429 ? "Rate limit exceeded (429)" : `${response.status}: ${response.statusText}`;
|
|
15631
|
+
console.warn(`[OpenRedaction] AI endpoint returned ${statusText}`);
|
|
15632
|
+
}
|
|
15633
|
+
return null;
|
|
15634
|
+
}
|
|
15635
|
+
const data = await response.json();
|
|
15636
|
+
if (!data.entities || !Array.isArray(data.entities)) {
|
|
15637
|
+
if (debug) {
|
|
15638
|
+
console.warn("[OpenRedaction] Invalid AI response format: missing entities array");
|
|
15639
|
+
}
|
|
15640
|
+
return null;
|
|
15641
|
+
}
|
|
15642
|
+
return data.entities;
|
|
15643
|
+
} catch (error) {
|
|
15644
|
+
if (debug) {
|
|
15645
|
+
console.warn(`[OpenRedaction] AI endpoint error: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
15646
|
+
}
|
|
15647
|
+
return null;
|
|
15648
|
+
}
|
|
15649
|
+
}
|
|
15650
|
+
function validateAIEntity(entity, textLength) {
|
|
15651
|
+
if (!entity.type || !entity.value || typeof entity.start !== "number" || typeof entity.end !== "number") {
|
|
15652
|
+
return false;
|
|
15653
|
+
}
|
|
15654
|
+
if (entity.start < 0 || entity.end < 0 || entity.start >= entity.end) {
|
|
15655
|
+
return false;
|
|
15656
|
+
}
|
|
15657
|
+
if (entity.start >= textLength || entity.end > textLength) {
|
|
15658
|
+
return false;
|
|
15659
|
+
}
|
|
15660
|
+
const actualValue = entity.value;
|
|
15661
|
+
if (actualValue.length !== entity.end - entity.start) {
|
|
15662
|
+
return false;
|
|
15663
|
+
}
|
|
15664
|
+
return true;
|
|
15665
|
+
}
|
|
15666
|
+
function detectionsOverlap(det1, det2) {
|
|
15667
|
+
const [start1, end1] = det1.position;
|
|
15668
|
+
const [start2, end2] = det2.position;
|
|
15669
|
+
const overlapStart = Math.max(start1, start2);
|
|
15670
|
+
const overlapEnd = Math.min(end1, end2);
|
|
15671
|
+
if (overlapStart >= overlapEnd) {
|
|
15672
|
+
return false;
|
|
15673
|
+
}
|
|
15674
|
+
const overlapLength = overlapEnd - overlapStart;
|
|
15675
|
+
const length1 = end1 - start1;
|
|
15676
|
+
const length2 = end2 - start2;
|
|
15677
|
+
const minLength = Math.min(length1, length2);
|
|
15678
|
+
return overlapLength > minLength * 0.5;
|
|
15679
|
+
}
|
|
15680
|
+
function convertAIEntityToDetection(entity, text) {
|
|
15681
|
+
if (!validateAIEntity(entity, text.length)) {
|
|
15682
|
+
return null;
|
|
15683
|
+
}
|
|
15684
|
+
const actualValue = text.substring(entity.start, entity.end);
|
|
15685
|
+
let type = entity.type.toUpperCase();
|
|
15686
|
+
if (type.includes("EMAIL") || type === "EMAIL_ADDRESS") {
|
|
15687
|
+
type = "EMAIL";
|
|
15688
|
+
} else if (type.includes("PHONE") || type === "PHONE_NUMBER") {
|
|
15689
|
+
type = "PHONE_US";
|
|
15690
|
+
} else if (type.includes("NAME") || type === "PERSON") {
|
|
15691
|
+
type = "NAME";
|
|
15692
|
+
} else if (type.includes("SSN") || type === "SOCIAL_SECURITY_NUMBER") {
|
|
15693
|
+
type = "SSN";
|
|
15694
|
+
} else if (type.includes("ADDRESS")) {
|
|
15695
|
+
type = "ADDRESS_STREET";
|
|
15696
|
+
}
|
|
15697
|
+
let severity = "medium";
|
|
15698
|
+
if (type === "SSN" || type === "CREDIT_CARD") {
|
|
15699
|
+
severity = "critical";
|
|
15700
|
+
} else if (type === "EMAIL" || type === "PHONE_US" || type === "NAME") {
|
|
15701
|
+
severity = "high";
|
|
15702
|
+
}
|
|
15703
|
+
return {
|
|
15704
|
+
type,
|
|
15705
|
+
value: actualValue,
|
|
15706
|
+
placeholder: `[${type}_${Math.random().toString(36).substring(2, 9)}]`,
|
|
15707
|
+
position: [entity.start, entity.end],
|
|
15708
|
+
severity,
|
|
15709
|
+
confidence: entity.confidence ?? 0.7
|
|
15710
|
+
// Default confidence for AI entities
|
|
15711
|
+
};
|
|
15712
|
+
}
|
|
15713
|
+
function mergeAIEntities(regexDetections, aiEntities, text) {
|
|
15714
|
+
const merged = [...regexDetections];
|
|
15715
|
+
const processedRanges = regexDetections.map((d) => d.position);
|
|
15716
|
+
for (const aiEntity of aiEntities) {
|
|
15717
|
+
const detection = convertAIEntityToDetection(aiEntity, text);
|
|
15718
|
+
if (!detection) {
|
|
15719
|
+
continue;
|
|
15720
|
+
}
|
|
15721
|
+
let hasOverlap = false;
|
|
15722
|
+
for (const regexDet of regexDetections) {
|
|
15723
|
+
if (detectionsOverlap(regexDet, detection)) {
|
|
15724
|
+
hasOverlap = true;
|
|
15725
|
+
break;
|
|
15726
|
+
}
|
|
15727
|
+
}
|
|
15728
|
+
if (!hasOverlap) {
|
|
15729
|
+
merged.push(detection);
|
|
15730
|
+
processedRanges.push(detection.position);
|
|
15731
|
+
}
|
|
15732
|
+
}
|
|
15733
|
+
return merged;
|
|
15734
|
+
}
|
|
15735
|
+
|
|
15409
15736
|
// src/detector.ts
|
|
15410
15737
|
var OpenRedaction = class _OpenRedaction {
|
|
15411
15738
|
constructor(options = {}) {
|
|
@@ -15615,6 +15942,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15615
15942
|
for (const pattern of this.patterns) {
|
|
15616
15943
|
const regex = new RegExp(pattern.regex.source, pattern.regex.flags);
|
|
15617
15944
|
this.compiledPatterns.set(pattern, regex);
|
|
15945
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
15946
|
+
console.log(`[OpenRedaction] Compiled pattern '${pattern.type}': ${regex}`);
|
|
15947
|
+
}
|
|
15618
15948
|
}
|
|
15619
15949
|
if (this.options.debug) {
|
|
15620
15950
|
console.log(`[OpenRedaction] Pre-compiled ${this.compiledPatterns.size} regex patterns`);
|
|
@@ -15634,12 +15964,18 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15634
15964
|
}
|
|
15635
15965
|
continue;
|
|
15636
15966
|
}
|
|
15967
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
15968
|
+
console.log(`[OpenRedaction] Processing pattern '${pattern.type}' with regex: ${regex}`);
|
|
15969
|
+
}
|
|
15637
15970
|
let match;
|
|
15638
15971
|
let matchCount = 0;
|
|
15639
15972
|
const maxMatches = 1e4;
|
|
15640
15973
|
regex.lastIndex = 0;
|
|
15641
15974
|
try {
|
|
15642
15975
|
while ((match = safeExec(regex, text, { timeout: this.options.regexTimeout })) !== null) {
|
|
15976
|
+
if (this.options.debug && (pattern.type === "NINTENDO_FRIEND_CODE" || pattern.type === "TELECOMS_ACCOUNT_NUMBER")) {
|
|
15977
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' regex match found: '${match[0]}' at position ${match.index}`);
|
|
15978
|
+
}
|
|
15643
15979
|
matchCount++;
|
|
15644
15980
|
if (matchCount >= maxMatches) {
|
|
15645
15981
|
if (this.options.debug) {
|
|
@@ -15660,12 +15996,18 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15660
15996
|
endPos = startPos + value.length;
|
|
15661
15997
|
}
|
|
15662
15998
|
if (this.overlapsWithExisting(startPos, endPos, processedRanges)) {
|
|
15999
|
+
if (this.options.debug) {
|
|
16000
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' skipped due to overlap at ${startPos}-${endPos}`);
|
|
16001
|
+
}
|
|
15663
16002
|
continue;
|
|
15664
16003
|
}
|
|
15665
16004
|
const contextStart = Math.max(0, startPos - 50);
|
|
15666
16005
|
const contextEnd = Math.min(text.length, endPos + 50);
|
|
15667
16006
|
const context = text.substring(contextStart, contextEnd);
|
|
15668
16007
|
if (pattern.validator && !pattern.validator(value, context)) {
|
|
16008
|
+
if (this.options.debug) {
|
|
16009
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' validation failed for value: '${value}' with context: '${context.substring(0, 100)}...'`);
|
|
16010
|
+
}
|
|
15669
16011
|
continue;
|
|
15670
16012
|
}
|
|
15671
16013
|
if (this.options.enableFalsePositiveFilter) {
|
|
@@ -15684,6 +16026,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15684
16026
|
endPos
|
|
15685
16027
|
);
|
|
15686
16028
|
confidence = contextAnalysis.confidence;
|
|
16029
|
+
if (this.options.debug && confidence < this.options.confidenceThreshold) {
|
|
16030
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' failed context analysis. Value: '${value}', Confidence: ${confidence} < ${this.options.confidenceThreshold}`);
|
|
16031
|
+
}
|
|
15687
16032
|
}
|
|
15688
16033
|
if (this.contextRulesEngine) {
|
|
15689
16034
|
const piiMatch = {
|
|
@@ -15709,6 +16054,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15709
16054
|
continue;
|
|
15710
16055
|
}
|
|
15711
16056
|
const placeholder = this.generatePlaceholder(value, pattern);
|
|
16057
|
+
if (this.options.debug) {
|
|
16058
|
+
console.log(`[OpenRedaction] Pattern '${pattern.type}' detected: '${value}' at position ${startPos}-${endPos}, confidence: ${confidence}`);
|
|
16059
|
+
}
|
|
15712
16060
|
detections.push({
|
|
15713
16061
|
type: pattern.type,
|
|
15714
16062
|
value,
|
|
@@ -15769,8 +16117,9 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15769
16117
|
}
|
|
15770
16118
|
/**
|
|
15771
16119
|
* Detect PII in text
|
|
16120
|
+
* Now async to support optional AI assist
|
|
15772
16121
|
*/
|
|
15773
|
-
detect(text) {
|
|
16122
|
+
async detect(text) {
|
|
15774
16123
|
if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) {
|
|
15775
16124
|
throw new Error("[OpenRedaction] Permission denied: detection:detect required");
|
|
15776
16125
|
}
|
|
@@ -15824,12 +16173,42 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15824
16173
|
} else {
|
|
15825
16174
|
detections = this.processPatterns(text, this.patterns, processedRanges);
|
|
15826
16175
|
}
|
|
16176
|
+
if (this.options.ai?.enabled) {
|
|
16177
|
+
const aiEndpoint = getAIEndpoint(this.options.ai);
|
|
16178
|
+
if (aiEndpoint) {
|
|
16179
|
+
try {
|
|
16180
|
+
if (this.options.debug) {
|
|
16181
|
+
console.log("[OpenRedaction] AI assist enabled, calling AI endpoint...");
|
|
16182
|
+
}
|
|
16183
|
+
const aiEntities = await callAIDetect(text, aiEndpoint, this.options.debug);
|
|
16184
|
+
if (aiEntities && aiEntities.length > 0) {
|
|
16185
|
+
if (this.options.debug) {
|
|
16186
|
+
console.log(`[OpenRedaction] AI returned ${aiEntities.length} additional entities`);
|
|
16187
|
+
}
|
|
16188
|
+
detections = mergeAIEntities(detections, aiEntities, text);
|
|
16189
|
+
if (this.options.debug) {
|
|
16190
|
+
console.log(`[OpenRedaction] After AI merge: ${detections.length} total detections`);
|
|
16191
|
+
}
|
|
16192
|
+
} else if (this.options.debug) {
|
|
16193
|
+
console.log("[OpenRedaction] AI endpoint returned no additional entities");
|
|
16194
|
+
}
|
|
16195
|
+
} catch (error) {
|
|
16196
|
+
if (this.options.debug) {
|
|
16197
|
+
console.warn(`[OpenRedaction] AI assist failed, using regex-only: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
16198
|
+
}
|
|
16199
|
+
}
|
|
16200
|
+
} else if (this.options.debug) {
|
|
16201
|
+
console.warn("[OpenRedaction] AI assist enabled but no endpoint configured. Set ai.endpoint or OPENREDACTION_AI_ENDPOINT env var.");
|
|
16202
|
+
}
|
|
16203
|
+
}
|
|
15827
16204
|
detections.sort((a, b) => b.position[0] - a.position[0]);
|
|
15828
16205
|
let redacted = text;
|
|
15829
16206
|
const redactionMap = {};
|
|
15830
16207
|
for (const detection of detections) {
|
|
15831
|
-
|
|
15832
|
-
|
|
16208
|
+
if (!detection.value) continue;
|
|
16209
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
16210
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
16211
|
+
redacted = redacted.replace(pattern, detection.placeholder);
|
|
15833
16212
|
redactionMap[detection.placeholder] = detection.value;
|
|
15834
16213
|
}
|
|
15835
16214
|
const endTime = performance.now();
|
|
@@ -15982,8 +16361,8 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
15982
16361
|
/**
|
|
15983
16362
|
* Get severity-based scan results
|
|
15984
16363
|
*/
|
|
15985
|
-
scan(text) {
|
|
15986
|
-
const result = this.detect(text);
|
|
16364
|
+
async scan(text) {
|
|
16365
|
+
const result = await this.detect(text);
|
|
15987
16366
|
return {
|
|
15988
16367
|
high: result.detections.filter((d) => d.severity === "high"),
|
|
15989
16368
|
medium: result.detections.filter((d) => d.severity === "medium"),
|
|
@@ -16220,7 +16599,7 @@ var OpenRedaction = class _OpenRedaction {
|
|
|
16220
16599
|
const metadata = await processor.getMetadata(buffer, options);
|
|
16221
16600
|
const extractionEnd = performance.now();
|
|
16222
16601
|
const extractionTime = Math.round((extractionEnd - extractionStart) * 100) / 100;
|
|
16223
|
-
const detection = this.detect(text);
|
|
16602
|
+
const detection = await this.detect(text);
|
|
16224
16603
|
return {
|
|
16225
16604
|
text,
|
|
16226
16605
|
metadata,
|
|
@@ -16317,7 +16696,7 @@ var StreamingDetector = class {
|
|
|
16317
16696
|
const end = Math.min(textLength, position + chunkSize);
|
|
16318
16697
|
const chunk = text.substring(start, end);
|
|
16319
16698
|
const byteOffset = start;
|
|
16320
|
-
const result = this.detector.detect(chunk);
|
|
16699
|
+
const result = await this.detector.detect(chunk);
|
|
16321
16700
|
const newDetections = result.detections.filter((detection) => {
|
|
16322
16701
|
const absoluteStart = byteOffset + detection.position[0];
|
|
16323
16702
|
const absoluteEnd = byteOffset + detection.position[1];
|
|
@@ -16347,8 +16726,10 @@ var StreamingDetector = class {
|
|
|
16347
16726
|
(a, b) => b.position[0] - a.position[0]
|
|
16348
16727
|
);
|
|
16349
16728
|
for (const detection of sortedDetections) {
|
|
16350
|
-
|
|
16351
|
-
|
|
16729
|
+
if (!detection.value) continue;
|
|
16730
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
16731
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
16732
|
+
redactedChunk = redactedChunk.replace(pattern, detection.placeholder);
|
|
16352
16733
|
}
|
|
16353
16734
|
}
|
|
16354
16735
|
yield {
|
|
@@ -16374,8 +16755,10 @@ var StreamingDetector = class {
|
|
|
16374
16755
|
allDetections.sort((a, b) => b.position[0] - a.position[0]);
|
|
16375
16756
|
const redactionMap = {};
|
|
16376
16757
|
for (const detection of allDetections) {
|
|
16377
|
-
|
|
16378
|
-
|
|
16758
|
+
if (!detection.value) continue;
|
|
16759
|
+
const escapedValue = this.escapeRegex(detection.value);
|
|
16760
|
+
const pattern = new RegExp(escapedValue, "gi");
|
|
16761
|
+
redactedText = redactedText.replace(pattern, detection.placeholder);
|
|
16379
16762
|
redactionMap[detection.placeholder] = detection.value;
|
|
16380
16763
|
}
|
|
16381
16764
|
return {
|
|
@@ -16450,6 +16833,9 @@ var StreamingDetector = class {
|
|
|
16450
16833
|
estimatedMemory
|
|
16451
16834
|
};
|
|
16452
16835
|
}
|
|
16836
|
+
escapeRegex(str) {
|
|
16837
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
16838
|
+
}
|
|
16453
16839
|
};
|
|
16454
16840
|
function createStreamingDetector(detector, options) {
|
|
16455
16841
|
return new StreamingDetector(detector, options);
|
|
@@ -16466,11 +16852,11 @@ var BatchProcessor = class {
|
|
|
16466
16852
|
/**
|
|
16467
16853
|
* Process multiple documents sequentially
|
|
16468
16854
|
*/
|
|
16469
|
-
processSequential(documents, options = {}) {
|
|
16855
|
+
async processSequential(documents, options = {}) {
|
|
16470
16856
|
const startTime = performance.now();
|
|
16471
16857
|
const results = [];
|
|
16472
16858
|
for (let i = 0; i < documents.length; i++) {
|
|
16473
|
-
const result = this.detector.detect(documents[i]);
|
|
16859
|
+
const result = await this.detector.detect(documents[i]);
|
|
16474
16860
|
results.push(result);
|
|
16475
16861
|
if (options.onProgress) {
|
|
16476
16862
|
options.onProgress(i + 1, documents.length);
|
|
@@ -16484,7 +16870,7 @@ var BatchProcessor = class {
|
|
|
16484
16870
|
totalDocuments: documents.length,
|
|
16485
16871
|
totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
|
|
16486
16872
|
totalTime,
|
|
16487
|
-
avgTimePerDocument: totalTime / documents.length
|
|
16873
|
+
avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
|
|
16488
16874
|
}
|
|
16489
16875
|
};
|
|
16490
16876
|
}
|
|
@@ -16498,16 +16884,14 @@ var BatchProcessor = class {
|
|
|
16498
16884
|
let completed = 0;
|
|
16499
16885
|
for (let i = 0; i < documents.length; i += maxConcurrency) {
|
|
16500
16886
|
const batch = documents.slice(i, i + maxConcurrency);
|
|
16501
|
-
const batchPromises = batch.map((doc, batchIndex) => {
|
|
16502
|
-
|
|
16503
|
-
|
|
16504
|
-
|
|
16505
|
-
|
|
16506
|
-
|
|
16507
|
-
|
|
16508
|
-
|
|
16509
|
-
return result;
|
|
16510
|
-
});
|
|
16887
|
+
const batchPromises = batch.map(async (doc, batchIndex) => {
|
|
16888
|
+
const result = await this.detector.detect(doc);
|
|
16889
|
+
results[i + batchIndex] = result;
|
|
16890
|
+
completed++;
|
|
16891
|
+
if (options.onProgress) {
|
|
16892
|
+
options.onProgress(completed, documents.length);
|
|
16893
|
+
}
|
|
16894
|
+
return result;
|
|
16511
16895
|
});
|
|
16512
16896
|
await Promise.all(batchPromises);
|
|
16513
16897
|
}
|
|
@@ -16519,7 +16903,7 @@ var BatchProcessor = class {
|
|
|
16519
16903
|
totalDocuments: documents.length,
|
|
16520
16904
|
totalDetections: results.reduce((sum, r) => sum + r.detections.length, 0),
|
|
16521
16905
|
totalTime,
|
|
16522
|
-
avgTimePerDocument: totalTime / documents.length
|
|
16906
|
+
avgTimePerDocument: documents.length > 0 ? totalTime / documents.length : 0
|
|
16523
16907
|
}
|
|
16524
16908
|
};
|
|
16525
16909
|
}
|
|
@@ -16530,7 +16914,7 @@ var BatchProcessor = class {
|
|
|
16530
16914
|
if (options.parallel) {
|
|
16531
16915
|
return this.processParallel(documents, options);
|
|
16532
16916
|
} else {
|
|
16533
|
-
return
|
|
16917
|
+
return this.processSequential(documents, options);
|
|
16534
16918
|
}
|
|
16535
16919
|
}
|
|
16536
16920
|
/**
|
|
@@ -16541,7 +16925,7 @@ var BatchProcessor = class {
|
|
|
16541
16925
|
for (let i = 0; i < documents.length; i += batchSize) {
|
|
16542
16926
|
const batch = documents.slice(i, i + batchSize);
|
|
16543
16927
|
for (const doc of batch) {
|
|
16544
|
-
const result = this.detector.detect(doc);
|
|
16928
|
+
const result = await this.detector.detect(doc);
|
|
16545
16929
|
yield result;
|
|
16546
16930
|
}
|
|
16547
16931
|
}
|
|
@@ -16589,7 +16973,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
16589
16973
|
...detectorOptions
|
|
16590
16974
|
} = options;
|
|
16591
16975
|
const detector = new OpenRedaction(detectorOptions);
|
|
16592
|
-
return (req, res, next) => {
|
|
16976
|
+
return async (req, res, next) => {
|
|
16593
16977
|
if (skipRoutes.some((pattern) => pattern.test(req.path))) {
|
|
16594
16978
|
return next();
|
|
16595
16979
|
}
|
|
@@ -16609,7 +16993,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
16609
16993
|
const results = {};
|
|
16610
16994
|
const redactedBody = { ...req.body };
|
|
16611
16995
|
for (const { field, value } of textsToCheck) {
|
|
16612
|
-
const result = detector.detect(value);
|
|
16996
|
+
const result = await detector.detect(value);
|
|
16613
16997
|
if (result.detections.length > 0) {
|
|
16614
16998
|
totalDetections += result.detections.length;
|
|
16615
16999
|
results[field] = result;
|
|
@@ -16659,7 +17043,7 @@ function openredactionMiddleware(options = {}) {
|
|
|
16659
17043
|
}
|
|
16660
17044
|
function detectPII(options = {}) {
|
|
16661
17045
|
const detector = new OpenRedaction(options);
|
|
16662
|
-
return (req, res) => {
|
|
17046
|
+
return async (req, res) => {
|
|
16663
17047
|
const text = req.body?.text || req.query.text;
|
|
16664
17048
|
if (!text) {
|
|
16665
17049
|
res.status(400).json({
|
|
@@ -16668,19 +17052,26 @@ function detectPII(options = {}) {
|
|
|
16668
17052
|
});
|
|
16669
17053
|
return;
|
|
16670
17054
|
}
|
|
16671
|
-
|
|
16672
|
-
|
|
16673
|
-
|
|
16674
|
-
|
|
16675
|
-
|
|
16676
|
-
|
|
16677
|
-
|
|
16678
|
-
|
|
17055
|
+
try {
|
|
17056
|
+
const result = await detector.detect(text);
|
|
17057
|
+
res.json({
|
|
17058
|
+
detected: result.detections.length > 0,
|
|
17059
|
+
count: result.detections.length,
|
|
17060
|
+
detections: result.detections,
|
|
17061
|
+
redacted: result.redacted,
|
|
17062
|
+
stats: result.stats
|
|
17063
|
+
});
|
|
17064
|
+
} catch (error) {
|
|
17065
|
+
res.status(500).json({
|
|
17066
|
+
error: "Detection failed",
|
|
17067
|
+
message: error instanceof Error ? error.message : "Unknown error"
|
|
17068
|
+
});
|
|
17069
|
+
}
|
|
16679
17070
|
};
|
|
16680
17071
|
}
|
|
16681
17072
|
function generateReport(options = {}) {
|
|
16682
17073
|
const detector = new OpenRedaction(options);
|
|
16683
|
-
return (req, res) => {
|
|
17074
|
+
return async (req, res) => {
|
|
16684
17075
|
const text = req.body?.text;
|
|
16685
17076
|
const format = req.body?.format || req.query.format || "json";
|
|
16686
17077
|
if (!text) {
|
|
@@ -16689,28 +17080,35 @@ function generateReport(options = {}) {
|
|
|
16689
17080
|
});
|
|
16690
17081
|
return;
|
|
16691
17082
|
}
|
|
16692
|
-
|
|
16693
|
-
|
|
16694
|
-
|
|
16695
|
-
|
|
16696
|
-
|
|
16697
|
-
|
|
16698
|
-
|
|
16699
|
-
|
|
16700
|
-
|
|
16701
|
-
|
|
16702
|
-
|
|
16703
|
-
|
|
16704
|
-
|
|
16705
|
-
|
|
16706
|
-
|
|
16707
|
-
|
|
16708
|
-
|
|
16709
|
-
|
|
16710
|
-
|
|
16711
|
-
|
|
16712
|
-
|
|
16713
|
-
|
|
17083
|
+
try {
|
|
17084
|
+
const result = await detector.detect(text);
|
|
17085
|
+
if (format === "html") {
|
|
17086
|
+
const html = detector.generateReport(result, {
|
|
17087
|
+
format: "html",
|
|
17088
|
+
title: req.body?.title || "PII Detection Report"
|
|
17089
|
+
});
|
|
17090
|
+
res.setHeader("Content-Type", "text/html");
|
|
17091
|
+
res.send(html);
|
|
17092
|
+
} else if (format === "markdown") {
|
|
17093
|
+
const md = detector.generateReport(result, {
|
|
17094
|
+
format: "markdown",
|
|
17095
|
+
title: req.body?.title || "PII Detection Report"
|
|
17096
|
+
});
|
|
17097
|
+
res.setHeader("Content-Type", "text/markdown");
|
|
17098
|
+
res.send(md);
|
|
17099
|
+
} else {
|
|
17100
|
+
res.json({
|
|
17101
|
+
detected: result.detections.length > 0,
|
|
17102
|
+
count: result.detections.length,
|
|
17103
|
+
detections: result.detections,
|
|
17104
|
+
redacted: result.redacted,
|
|
17105
|
+
stats: result.stats
|
|
17106
|
+
});
|
|
17107
|
+
}
|
|
17108
|
+
} catch (error) {
|
|
17109
|
+
res.status(500).json({
|
|
17110
|
+
error: "Report generation failed",
|
|
17111
|
+
message: error instanceof Error ? error.message : "Unknown error"
|
|
16714
17112
|
});
|
|
16715
17113
|
}
|
|
16716
17114
|
};
|
|
@@ -16722,12 +17120,17 @@ function useOpenRedaction(options) {
|
|
|
16722
17120
|
const detector = (0, import_react.useMemo)(() => new OpenRedaction(options), [options]);
|
|
16723
17121
|
const [result, setResult] = (0, import_react.useState)(null);
|
|
16724
17122
|
const [isDetecting, setIsDetecting] = (0, import_react.useState)(false);
|
|
16725
|
-
const detect = (0, import_react.useCallback)((text) => {
|
|
17123
|
+
const detect = (0, import_react.useCallback)(async (text) => {
|
|
16726
17124
|
setIsDetecting(true);
|
|
16727
|
-
|
|
16728
|
-
|
|
16729
|
-
|
|
16730
|
-
|
|
17125
|
+
try {
|
|
17126
|
+
const detection = await detector.detect(text);
|
|
17127
|
+
setResult(detection);
|
|
17128
|
+
setIsDetecting(false);
|
|
17129
|
+
return detection;
|
|
17130
|
+
} catch (error) {
|
|
17131
|
+
setIsDetecting(false);
|
|
17132
|
+
throw error;
|
|
17133
|
+
}
|
|
16731
17134
|
}, [detector]);
|
|
16732
17135
|
const clear = (0, import_react.useCallback)(() => {
|
|
16733
17136
|
setResult(null);
|
|
@@ -16753,10 +17156,14 @@ function usePIIDetector(text, options) {
|
|
|
16753
17156
|
return;
|
|
16754
17157
|
}
|
|
16755
17158
|
setIsDetecting(true);
|
|
16756
|
-
const timer = setTimeout(() => {
|
|
16757
|
-
|
|
16758
|
-
|
|
16759
|
-
|
|
17159
|
+
const timer = setTimeout(async () => {
|
|
17160
|
+
try {
|
|
17161
|
+
const detection = await detector.detect(text);
|
|
17162
|
+
setResult(detection);
|
|
17163
|
+
setIsDetecting(false);
|
|
17164
|
+
} catch (error) {
|
|
17165
|
+
setIsDetecting(false);
|
|
17166
|
+
}
|
|
16760
17167
|
}, debounce);
|
|
16761
17168
|
return () => {
|
|
16762
17169
|
clearTimeout(timer);
|
|
@@ -16777,27 +17184,32 @@ function useFormFieldValidator(options) {
|
|
|
16777
17184
|
const [value, setValue] = (0, import_react.useState)("");
|
|
16778
17185
|
const [error, setError] = (0, import_react.useState)(null);
|
|
16779
17186
|
const [result, setResult] = (0, import_react.useState)(null);
|
|
16780
|
-
const validate = (0, import_react.useCallback)((inputValue) => {
|
|
17187
|
+
const validate = (0, import_react.useCallback)(async (inputValue) => {
|
|
16781
17188
|
setValue(inputValue);
|
|
16782
17189
|
if (!inputValue) {
|
|
16783
17190
|
setError(null);
|
|
16784
17191
|
setResult(null);
|
|
16785
17192
|
return true;
|
|
16786
17193
|
}
|
|
16787
|
-
|
|
16788
|
-
|
|
16789
|
-
|
|
16790
|
-
|
|
16791
|
-
if (
|
|
16792
|
-
|
|
16793
|
-
|
|
16794
|
-
|
|
16795
|
-
onPIIDetected
|
|
17194
|
+
try {
|
|
17195
|
+
const detection = await detector.detect(inputValue);
|
|
17196
|
+
setResult(detection);
|
|
17197
|
+
const relevantDetections = types.length > 0 ? detection.detections.filter((d) => types.includes(d.type)) : detection.detections;
|
|
17198
|
+
if (relevantDetections.length > 0) {
|
|
17199
|
+
if (failOnPII) {
|
|
17200
|
+
setError(`Sensitive information detected: ${relevantDetections[0].type}`);
|
|
17201
|
+
}
|
|
17202
|
+
if (onPIIDetected) {
|
|
17203
|
+
onPIIDetected(detection);
|
|
17204
|
+
}
|
|
17205
|
+
return false;
|
|
16796
17206
|
}
|
|
17207
|
+
setError(null);
|
|
17208
|
+
return true;
|
|
17209
|
+
} catch (error2) {
|
|
17210
|
+
setError("Validation failed");
|
|
16797
17211
|
return false;
|
|
16798
17212
|
}
|
|
16799
|
-
setError(null);
|
|
16800
|
-
return true;
|
|
16801
17213
|
}, [detector, failOnPII, types, onPIIDetected]);
|
|
16802
17214
|
const getFieldProps = (0, import_react.useCallback)(() => ({
|
|
16803
17215
|
value,
|
|
@@ -16824,7 +17236,7 @@ function useBatchDetector(options) {
|
|
|
16824
17236
|
setProgress(0);
|
|
16825
17237
|
const detections = [];
|
|
16826
17238
|
for (let i = 0; i < texts.length; i++) {
|
|
16827
|
-
const result = detector.detect(texts[i]);
|
|
17239
|
+
const result = await detector.detect(texts[i]);
|
|
16828
17240
|
detections.push(result);
|
|
16829
17241
|
setProgress((i + 1) / texts.length * 100);
|
|
16830
17242
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
@@ -16861,9 +17273,12 @@ function useAutoRedact(options) {
|
|
|
16861
17273
|
setResult(null);
|
|
16862
17274
|
return;
|
|
16863
17275
|
}
|
|
16864
|
-
const timer = setTimeout(() => {
|
|
16865
|
-
|
|
16866
|
-
|
|
17276
|
+
const timer = setTimeout(async () => {
|
|
17277
|
+
try {
|
|
17278
|
+
const detection = await detector.detect(text);
|
|
17279
|
+
setResult(detection);
|
|
17280
|
+
} catch (error) {
|
|
17281
|
+
}
|
|
16867
17282
|
}, debounce);
|
|
16868
17283
|
return () => clearTimeout(timer);
|
|
16869
17284
|
}, [text, detector, debounce]);
|
|
@@ -16992,7 +17407,7 @@ var TenantManager = class {
|
|
|
16992
17407
|
await this.checkQuotas(tenantId, text);
|
|
16993
17408
|
this.trackRequest(tenantId, text);
|
|
16994
17409
|
const detector = this.getDetector(tenantId);
|
|
16995
|
-
const result = detector.detect(text);
|
|
17410
|
+
const result = await detector.detect(text);
|
|
16996
17411
|
const usage = this.usage.get(tenantId);
|
|
16997
17412
|
usage.piiDetectedThisMonth += result.detections.length;
|
|
16998
17413
|
usage.lastRequestAt = /* @__PURE__ */ new Date();
|
|
@@ -17279,6 +17694,7 @@ var DEFAULT_TIER_QUOTAS = {
|
|
|
17279
17694
|
// src/webhooks/WebhookManager.ts
|
|
17280
17695
|
var WebhookManager = class {
|
|
17281
17696
|
// 1 minute
|
|
17697
|
+
// private readonly HALF_OPEN_MAX_REQUESTS = 1; // Reserved for future use
|
|
17282
17698
|
constructor(options) {
|
|
17283
17699
|
this.webhooks = /* @__PURE__ */ new Map();
|
|
17284
17700
|
this.deliveryHistory = [];
|
|
@@ -17550,9 +17966,9 @@ var WebhookManager = class {
|
|
|
17550
17966
|
*/
|
|
17551
17967
|
async makeHttpRequest(webhook, event) {
|
|
17552
17968
|
try {
|
|
17553
|
-
let
|
|
17969
|
+
let fetch2;
|
|
17554
17970
|
try {
|
|
17555
|
-
|
|
17971
|
+
fetch2 = globalThis.fetch;
|
|
17556
17972
|
} catch {
|
|
17557
17973
|
throw new Error("[WebhookManager] HTTP client not available. Requires Node 18+ with fetch support.");
|
|
17558
17974
|
}
|
|
@@ -17572,7 +17988,7 @@ var WebhookManager = class {
|
|
|
17572
17988
|
const controller = new AbortController();
|
|
17573
17989
|
const timeoutId = setTimeout(() => controller.abort(), webhook.timeout);
|
|
17574
17990
|
try {
|
|
17575
|
-
const response = await
|
|
17991
|
+
const response = await fetch2(webhook.url, {
|
|
17576
17992
|
method: "POST",
|
|
17577
17993
|
headers,
|
|
17578
17994
|
body: JSON.stringify(event),
|
|
@@ -17943,7 +18359,7 @@ var APIServer = class {
|
|
|
17943
18359
|
if (req.tenantId && this.config.tenantManager) {
|
|
17944
18360
|
result = await this.config.tenantManager.detect(req.tenantId, text);
|
|
17945
18361
|
} else if (this.detector) {
|
|
17946
|
-
result = this.detector.detect(text);
|
|
18362
|
+
result = await this.detector.detect(text);
|
|
17947
18363
|
} else {
|
|
17948
18364
|
throw new Error("No detector available");
|
|
17949
18365
|
}
|
|
@@ -17984,7 +18400,7 @@ var APIServer = class {
|
|
|
17984
18400
|
if (req.tenantId && this.config.tenantManager) {
|
|
17985
18401
|
result = await this.config.tenantManager.detect(req.tenantId, text);
|
|
17986
18402
|
} else if (this.detector) {
|
|
17987
|
-
result = this.detector.detect(text);
|
|
18403
|
+
result = await this.detector.detect(text);
|
|
17988
18404
|
} else {
|
|
17989
18405
|
throw new Error("No detector available");
|
|
17990
18406
|
}
|
|
@@ -18407,10 +18823,12 @@ init_HealthCheck();
|
|
|
18407
18823
|
analyzeFullContext,
|
|
18408
18824
|
calculateContextConfidence,
|
|
18409
18825
|
calculateRisk,
|
|
18826
|
+
callAIDetect,
|
|
18410
18827
|
ccpaPreset,
|
|
18411
18828
|
commonFalsePositives,
|
|
18412
18829
|
compileSafeRegex,
|
|
18413
18830
|
contactPatterns,
|
|
18831
|
+
convertAIEntityToDetection,
|
|
18414
18832
|
createAPIServer,
|
|
18415
18833
|
createBatchProcessor,
|
|
18416
18834
|
createCacheDisabledError,
|
|
@@ -18445,12 +18863,16 @@ init_HealthCheck();
|
|
|
18445
18863
|
createXlsxProcessor,
|
|
18446
18864
|
defaultPasses,
|
|
18447
18865
|
detectPII,
|
|
18866
|
+
detectionsOverlap,
|
|
18867
|
+
educationPreset,
|
|
18448
18868
|
exportForVersionControl,
|
|
18449
18869
|
extractContext,
|
|
18450
18870
|
filterFalsePositives,
|
|
18871
|
+
financePreset,
|
|
18451
18872
|
financialPatterns,
|
|
18452
18873
|
gdprPreset,
|
|
18453
18874
|
generateReport,
|
|
18875
|
+
getAIEndpoint,
|
|
18454
18876
|
getPatternsByCategory,
|
|
18455
18877
|
getPredefinedRole,
|
|
18456
18878
|
getPreset,
|
|
@@ -18458,21 +18880,26 @@ init_HealthCheck();
|
|
|
18458
18880
|
governmentPatterns,
|
|
18459
18881
|
groupPatternsByPass,
|
|
18460
18882
|
healthCheckMiddleware,
|
|
18883
|
+
healthcarePreset,
|
|
18884
|
+
healthcareResearchPreset,
|
|
18461
18885
|
hipaaPreset,
|
|
18462
18886
|
inferDocumentType,
|
|
18463
18887
|
isFalsePositive,
|
|
18464
18888
|
isUnsafePattern,
|
|
18889
|
+
mergeAIEntities,
|
|
18465
18890
|
mergePassDetections,
|
|
18466
18891
|
networkPatterns,
|
|
18467
18892
|
openredactionMiddleware,
|
|
18468
18893
|
personalPatterns,
|
|
18469
18894
|
safeExec,
|
|
18470
18895
|
safeExecAll,
|
|
18896
|
+
transportLogisticsPreset,
|
|
18471
18897
|
useAutoRedact,
|
|
18472
18898
|
useBatchDetector,
|
|
18473
18899
|
useFormFieldValidator,
|
|
18474
18900
|
useOpenRedaction,
|
|
18475
18901
|
usePIIDetector,
|
|
18902
|
+
validateAIEntity,
|
|
18476
18903
|
validateEmail,
|
|
18477
18904
|
validateIBAN,
|
|
18478
18905
|
validateLuhn,
|