od-temp 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -40,14 +40,13 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
40
40
  var __toCommonJS = (mod) => __hasOwnProp.call(mod, "module.exports") ? mod["module.exports"] : __copyProps(__defProp({}, "__esModule", { value: true }), mod);
41
41
 
42
42
  //#endregion
43
- const require_document = require('./document-C4T2JLdu.js');
44
- const require_workers = require('./workers-BmzAqLSu.js');
45
- const require_HealthCheck = require('./HealthCheck-CFX1wPqE.js');
46
43
  let crypto = require("crypto");
47
44
  let fs = require("fs");
48
45
  fs = __toESM(fs);
49
46
  let path = require("path");
50
47
  path = __toESM(path);
48
+ let worker_threads = require("worker_threads");
49
+ let os = require("os");
51
50
  let react = require("react");
52
51
 
53
52
  //#region src/audit/AuditLogger.ts
@@ -15754,6 +15753,1961 @@ var init_ConfigExporter = __esmMin((() => {
15754
15753
  };
15755
15754
  }));
15756
15755
 
15756
+ //#endregion
15757
+ //#region src/health/HealthCheck.ts
15758
+ var HealthCheck_exports = /* @__PURE__ */ __exportAll({
15759
+ HealthChecker: () => HealthChecker,
15760
+ createHealthChecker: () => createHealthChecker,
15761
+ healthCheckMiddleware: () => healthCheckMiddleware
15762
+ });
15763
+ /**
15764
+ * Create health checker for a detector
15765
+ */
15766
+ function createHealthChecker(detector) {
15767
+ return new HealthChecker(detector);
15768
+ }
15769
+ /**
15770
+ * Express middleware for health check endpoint
15771
+ */
15772
+ function healthCheckMiddleware(detector) {
15773
+ const checker = new HealthChecker(detector);
15774
+ return async (_req, res) => {
15775
+ try {
15776
+ const result = await checker.check({
15777
+ testDetection: true,
15778
+ checkPerformance: true,
15779
+ performanceThreshold: 100,
15780
+ memoryThreshold: 100
15781
+ });
15782
+ const statusCode = result.status === "healthy" ? 200 : result.status === "degraded" ? 200 : 503;
15783
+ res.status(statusCode).json(result);
15784
+ } catch (error) {
15785
+ res.status(503).json({
15786
+ status: "unhealthy",
15787
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
15788
+ error: error.message
15789
+ });
15790
+ }
15791
+ };
15792
+ }
15793
+ var HealthChecker;
15794
+ var init_HealthCheck = __esmMin((() => {
15795
+ HealthChecker = class {
15796
+ constructor(detector) {
15797
+ this.detector = detector;
15798
+ this.initTime = Date.now();
15799
+ }
15800
+ /**
15801
+ * Run complete health check
15802
+ */
15803
+ async check(options = {}) {
15804
+ const result = {
15805
+ status: "healthy",
15806
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
15807
+ checks: {
15808
+ detector: {
15809
+ status: "pass",
15810
+ message: "Detector initialized"
15811
+ },
15812
+ patterns: {
15813
+ status: "pass",
15814
+ message: "Patterns loaded"
15815
+ },
15816
+ performance: {
15817
+ status: "pass",
15818
+ message: "Performance acceptable"
15819
+ },
15820
+ memory: {
15821
+ status: "pass",
15822
+ message: "Memory usage normal"
15823
+ }
15824
+ },
15825
+ metrics: {
15826
+ totalPatterns: 0,
15827
+ compiledPatterns: 0,
15828
+ cacheEnabled: false,
15829
+ uptime: Date.now() - this.initTime
15830
+ },
15831
+ errors: [],
15832
+ warnings: []
15833
+ };
15834
+ try {
15835
+ result.checks.detector = await this.checkDetector(options);
15836
+ result.checks.patterns = await this.checkPatterns();
15837
+ if (options.checkPerformance !== false) result.checks.performance = await this.checkPerformance(options.performanceThreshold);
15838
+ result.checks.memory = await this.checkMemory(options.memoryThreshold);
15839
+ result.metrics = this.collectMetrics();
15840
+ result.status = this.determineOverallStatus(result.checks);
15841
+ for (const check of Object.values(result.checks)) if (check.status === "fail") result.errors.push(check.message);
15842
+ else if (check.status === "warn") result.warnings.push(check.message);
15843
+ } catch (error) {
15844
+ result.status = "unhealthy";
15845
+ result.errors.push(`Health check failed: ${error.message}`);
15846
+ }
15847
+ return result;
15848
+ }
15849
+ /**
15850
+ * Check detector functionality
15851
+ */
15852
+ async checkDetector(options) {
15853
+ try {
15854
+ if (options.testDetection !== false) {
15855
+ const result = await this.detector.detect("Test email: test@example.com");
15856
+ if (!result || !result.detections) return {
15857
+ status: "fail",
15858
+ message: "Detector returned invalid result"
15859
+ };
15860
+ if (result.detections.length === 0) return {
15861
+ status: "warn",
15862
+ message: "Test detection found no PII (expected at least 1)"
15863
+ };
15864
+ }
15865
+ return {
15866
+ status: "pass",
15867
+ message: "Detector functioning correctly"
15868
+ };
15869
+ } catch (error) {
15870
+ return {
15871
+ status: "fail",
15872
+ message: `Detector check failed: ${error.message}`
15873
+ };
15874
+ }
15875
+ }
15876
+ /**
15877
+ * Check patterns are loaded
15878
+ */
15879
+ async checkPatterns() {
15880
+ try {
15881
+ const patterns = this.detector.getPatterns();
15882
+ if (!patterns || patterns.length === 0) return {
15883
+ status: "fail",
15884
+ message: "No patterns loaded",
15885
+ value: 0,
15886
+ threshold: 1
15887
+ };
15888
+ if (patterns.length < 10) return {
15889
+ status: "warn",
15890
+ message: "Very few patterns loaded (expected more)",
15891
+ value: patterns.length,
15892
+ threshold: 10
15893
+ };
15894
+ return {
15895
+ status: "pass",
15896
+ message: `${patterns.length} patterns loaded`,
15897
+ value: patterns.length
15898
+ };
15899
+ } catch (error) {
15900
+ return {
15901
+ status: "fail",
15902
+ message: `Pattern check failed: ${error.message}`
15903
+ };
15904
+ }
15905
+ }
15906
+ /**
15907
+ * Check performance
15908
+ */
15909
+ async checkPerformance(threshold = 100) {
15910
+ try {
15911
+ const testText = "Test: john@example.com, phone: 555-123-4567, IP: 192.168.1.1";
15912
+ const start = performance.now();
15913
+ await this.detector.detect(testText);
15914
+ const duration = performance.now() - start;
15915
+ if (duration > threshold * 2) return {
15916
+ status: "fail",
15917
+ message: `Performance degraded: ${duration.toFixed(2)}ms`,
15918
+ value: duration,
15919
+ threshold
15920
+ };
15921
+ if (duration > threshold) return {
15922
+ status: "warn",
15923
+ message: `Performance slower than expected: ${duration.toFixed(2)}ms`,
15924
+ value: duration,
15925
+ threshold
15926
+ };
15927
+ return {
15928
+ status: "pass",
15929
+ message: `Performance good: ${duration.toFixed(2)}ms`,
15930
+ value: duration,
15931
+ threshold
15932
+ };
15933
+ } catch (error) {
15934
+ return {
15935
+ status: "fail",
15936
+ message: `Performance check failed: ${error.message}`
15937
+ };
15938
+ }
15939
+ }
15940
+ /**
15941
+ * Check memory usage
15942
+ */
15943
+ async checkMemory(threshold = 100) {
15944
+ try {
15945
+ if (typeof process === "undefined" || !process.memoryUsage) return {
15946
+ status: "pass",
15947
+ message: "Memory check skipped (not in Node.js)"
15948
+ };
15949
+ const heapUsedMB = process.memoryUsage().heapUsed / 1024 / 1024;
15950
+ if (heapUsedMB > threshold * 2) return {
15951
+ status: "fail",
15952
+ message: `High memory usage: ${heapUsedMB.toFixed(2)}MB`,
15953
+ value: heapUsedMB,
15954
+ threshold
15955
+ };
15956
+ if (heapUsedMB > threshold) return {
15957
+ status: "warn",
15958
+ message: `Elevated memory usage: ${heapUsedMB.toFixed(2)}MB`,
15959
+ value: heapUsedMB,
15960
+ threshold
15961
+ };
15962
+ return {
15963
+ status: "pass",
15964
+ message: `Memory usage normal: ${heapUsedMB.toFixed(2)}MB`,
15965
+ value: heapUsedMB,
15966
+ threshold
15967
+ };
15968
+ } catch (error) {
15969
+ return {
15970
+ status: "warn",
15971
+ message: `Memory check skipped: ${error.message}`
15972
+ };
15973
+ }
15974
+ }
15975
+ /**
15976
+ * Collect metrics
15977
+ */
15978
+ collectMetrics() {
15979
+ const patterns = this.detector.getPatterns();
15980
+ const cacheStats = this.detector.getCacheStats();
15981
+ return {
15982
+ totalPatterns: patterns.length,
15983
+ compiledPatterns: patterns.length,
15984
+ cacheSize: cacheStats.size,
15985
+ cacheEnabled: cacheStats.enabled,
15986
+ uptime: Date.now() - this.initTime
15987
+ };
15988
+ }
15989
+ /**
15990
+ * Determine overall status
15991
+ */
15992
+ determineOverallStatus(checks) {
15993
+ const statuses = Object.values(checks).map((c) => c.status);
15994
+ if (statuses.includes("fail")) return "unhealthy";
15995
+ if (statuses.includes("warn")) return "degraded";
15996
+ return "healthy";
15997
+ }
15998
+ /**
15999
+ * Quick health check (minimal overhead)
16000
+ */
16001
+ async quickCheck() {
16002
+ try {
16003
+ if (this.detector.getPatterns().length === 0) return {
16004
+ status: "unhealthy",
16005
+ message: "No patterns loaded"
16006
+ };
16007
+ return {
16008
+ status: "healthy",
16009
+ message: "OK"
16010
+ };
16011
+ } catch (error) {
16012
+ return {
16013
+ status: "unhealthy",
16014
+ message: `Error: ${error.message}`
16015
+ };
16016
+ }
16017
+ }
16018
+ /**
16019
+ * Get system info for debugging
16020
+ */
16021
+ getSystemInfo() {
16022
+ const patterns = this.detector.getPatterns();
16023
+ const cacheStats = this.detector.getCacheStats();
16024
+ return {
16025
+ version: "1.0.0",
16026
+ patterns: {
16027
+ total: patterns.length,
16028
+ types: [...new Set(patterns.map((p) => p.type.split("_")[0]))].length
16029
+ },
16030
+ cache: {
16031
+ enabled: cacheStats.enabled,
16032
+ size: cacheStats.size,
16033
+ maxSize: cacheStats.maxSize
16034
+ },
16035
+ uptime: Date.now() - this.initTime,
16036
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
16037
+ };
16038
+ }
16039
+ };
16040
+ }));
16041
+
16042
+ //#endregion
16043
+ //#region src/document/OCRProcessor.ts
16044
+ /**
16045
+ * Create an OCR processor instance
16046
+ */
16047
+ function createOCRProcessor() {
16048
+ return new OCRProcessor();
16049
+ }
16050
+ var OCRProcessor;
16051
+ var init_OCRProcessor = __esmMin((() => {
16052
+ OCRProcessor = class {
16053
+ constructor() {
16054
+ try {
16055
+ this.tesseract = require("tesseract.js");
16056
+ } catch {}
16057
+ }
16058
+ /**
16059
+ * Extract text from image buffer using OCR
16060
+ */
16061
+ async recognizeText(buffer, options) {
16062
+ if (!this.tesseract) throw new Error("[OCRProcessor] OCR support requires tesseract.js. Install with: npm install tesseract.js");
16063
+ const startTime = performance.now();
16064
+ try {
16065
+ const language = Array.isArray(options?.language) ? options.language.join("+") : options?.language || "eng";
16066
+ const worker = await this.tesseract.createWorker(language, options?.oem || 3);
16067
+ if (options?.psm !== void 0) await worker.setParameters({ tessedit_pageseg_mode: options.psm });
16068
+ const result = await worker.recognize(buffer);
16069
+ await worker.terminate();
16070
+ const endTime = performance.now();
16071
+ const processingTime = Math.round((endTime - startTime) * 100) / 100;
16072
+ return {
16073
+ text: result.data.text || "",
16074
+ confidence: result.data.confidence || 0,
16075
+ processingTime
16076
+ };
16077
+ } catch (error) {
16078
+ throw new Error(`[OCRProcessor] OCR recognition failed: ${error.message}`);
16079
+ }
16080
+ }
16081
+ /**
16082
+ * Check if OCR is available (tesseract.js installed)
16083
+ */
16084
+ isAvailable() {
16085
+ return !!this.tesseract;
16086
+ }
16087
+ /**
16088
+ * Create a scheduler for batch OCR processing
16089
+ * More efficient for processing multiple images
16090
+ */
16091
+ async createScheduler(workerCount = 4) {
16092
+ if (!this.tesseract) throw new Error("[OCRProcessor] OCR support requires tesseract.js. Install with: npm install tesseract.js");
16093
+ if (this.scheduler) await this.scheduler.terminate();
16094
+ this.scheduler = this.tesseract.createScheduler();
16095
+ const workers = [];
16096
+ for (let i = 0; i < workerCount; i++) {
16097
+ const worker = await this.tesseract.createWorker("eng");
16098
+ this.scheduler.addWorker(worker);
16099
+ workers.push(worker);
16100
+ }
16101
+ return this.scheduler;
16102
+ }
16103
+ /**
16104
+ * Batch process multiple images
16105
+ */
16106
+ async recognizeBatch(buffers, _options) {
16107
+ if (!this.tesseract) throw new Error("[OCRProcessor] OCR support requires tesseract.js. Install with: npm install tesseract.js");
16108
+ const scheduler = await this.createScheduler();
16109
+ try {
16110
+ const results = await Promise.all(buffers.map(async (buffer) => {
16111
+ const startTime = performance.now();
16112
+ const result = await scheduler.addJob("recognize", buffer);
16113
+ const endTime = performance.now();
16114
+ return {
16115
+ text: result.data.text || "",
16116
+ confidence: result.data.confidence || 0,
16117
+ processingTime: Math.round((endTime - startTime) * 100) / 100
16118
+ };
16119
+ }));
16120
+ await scheduler.terminate();
16121
+ this.scheduler = void 0;
16122
+ return results;
16123
+ } catch (error) {
16124
+ if (scheduler) {
16125
+ await scheduler.terminate();
16126
+ this.scheduler = void 0;
16127
+ }
16128
+ throw new Error(`[OCRProcessor] Batch OCR failed: ${error.message}`);
16129
+ }
16130
+ }
16131
+ /**
16132
+ * Terminate any running scheduler
16133
+ */
16134
+ async cleanup() {
16135
+ if (this.scheduler) {
16136
+ await this.scheduler.terminate();
16137
+ this.scheduler = void 0;
16138
+ }
16139
+ }
16140
+ };
16141
+ }));
16142
+
16143
+ //#endregion
16144
+ //#region src/document/JsonProcessor.ts
16145
+ /**
16146
+ * Create a JSON processor instance
16147
+ */
16148
+ function createJsonProcessor() {
16149
+ return new JsonProcessor();
16150
+ }
16151
+ var JsonProcessor;
16152
+ var init_JsonProcessor = __esmMin((() => {
16153
+ JsonProcessor = class {
16154
+ constructor() {
16155
+ this.defaultOptions = {
16156
+ maxDepth: 100,
16157
+ scanKeys: false,
16158
+ alwaysRedact: [],
16159
+ skipPaths: [],
16160
+ piiIndicatorKeys: [
16161
+ "email",
16162
+ "e-mail",
16163
+ "mail",
16164
+ "phone",
16165
+ "tel",
16166
+ "telephone",
16167
+ "mobile",
16168
+ "ssn",
16169
+ "social_security",
16170
+ "address",
16171
+ "street",
16172
+ "city",
16173
+ "zip",
16174
+ "postal",
16175
+ "name",
16176
+ "firstname",
16177
+ "lastname",
16178
+ "fullname",
16179
+ "password",
16180
+ "pwd",
16181
+ "secret",
16182
+ "token",
16183
+ "key",
16184
+ "card",
16185
+ "credit_card",
16186
+ "creditcard",
16187
+ "account",
16188
+ "iban",
16189
+ "swift",
16190
+ "passport",
16191
+ "license",
16192
+ "licence"
16193
+ ],
16194
+ preserveStructure: true
16195
+ };
16196
+ }
16197
+ /**
16198
+ * Parse JSON from buffer or string
16199
+ */
16200
+ parse(input) {
16201
+ try {
16202
+ const text = typeof input === "string" ? input : input.toString("utf-8");
16203
+ return JSON.parse(text);
16204
+ } catch (error) {
16205
+ throw new Error(`[JsonProcessor] Invalid JSON: ${error.message}`);
16206
+ }
16207
+ }
16208
+ /**
16209
+ * Detect PII in JSON data
16210
+ */
16211
+ async detect(data, detector, options) {
16212
+ const opts = {
16213
+ ...this.defaultOptions,
16214
+ ...options
16215
+ };
16216
+ const pathsDetected = [];
16217
+ const matchesByPath = {};
16218
+ const allDetections = [];
16219
+ const promises = [];
16220
+ this.traverse(data, "", opts, (path, value, key) => {
16221
+ promises.push((async () => {
16222
+ if (this.shouldSkip(path, opts.skipPaths)) return;
16223
+ if (this.shouldAlwaysRedact(path, opts.alwaysRedact)) {
16224
+ const detection = {
16225
+ type: "SENSITIVE_FIELD",
16226
+ value: String(value),
16227
+ placeholder: `[SENSITIVE_FIELD]`,
16228
+ position: [0, String(value).length],
16229
+ severity: "high",
16230
+ confidence: 1
16231
+ };
16232
+ matchesByPath[path] = [detection];
16233
+ pathsDetected.push(path);
16234
+ allDetections.push(detection);
16235
+ return;
16236
+ }
16237
+ if (opts.scanKeys && key) {
16238
+ const keyResult = await detector.detect(key);
16239
+ if (keyResult.detections.length > 0) {
16240
+ const keyPath = `${path}.__key__`;
16241
+ matchesByPath[keyPath] = keyResult.detections;
16242
+ pathsDetected.push(keyPath);
16243
+ allDetections.push(...keyResult.detections);
16244
+ }
16245
+ }
16246
+ const valueStr = String(value);
16247
+ const result = await detector.detect(valueStr);
16248
+ if (result.detections.length > 0) {
16249
+ const boostedDetections = this.boostConfidenceFromKey(result.detections, key, opts.piiIndicatorKeys);
16250
+ matchesByPath[path] = boostedDetections;
16251
+ pathsDetected.push(path);
16252
+ allDetections.push(...boostedDetections);
16253
+ }
16254
+ })());
16255
+ });
16256
+ await Promise.all(promises);
16257
+ const original = JSON.stringify(data);
16258
+ const redacted = this.redact(data, {
16259
+ original,
16260
+ redacted: original,
16261
+ detections: allDetections,
16262
+ redactionMap: {},
16263
+ stats: { piiCount: allDetections.length },
16264
+ pathsDetected,
16265
+ matchesByPath
16266
+ }, opts);
16267
+ const redactionMap = {};
16268
+ allDetections.forEach((det) => {
16269
+ redactionMap[det.placeholder] = det.value;
16270
+ });
16271
+ return {
16272
+ original,
16273
+ redacted: typeof redacted === "string" ? redacted : JSON.stringify(redacted),
16274
+ detections: allDetections,
16275
+ redactionMap,
16276
+ stats: { piiCount: allDetections.length },
16277
+ pathsDetected,
16278
+ matchesByPath
16279
+ };
16280
+ }
16281
+ /**
16282
+ * Redact PII in JSON data
16283
+ */
16284
+ redact(data, detectionResult, options) {
16285
+ if (!{
16286
+ ...this.defaultOptions,
16287
+ ...options
16288
+ }.preserveStructure) return this.parse(this.redactText(JSON.stringify(data, null, 2), detectionResult));
16289
+ return this.redactPreservingStructure(data, detectionResult.pathsDetected);
16290
+ }
16291
+ /**
16292
+ * Redact specific paths in JSON while preserving structure
16293
+ */
16294
+ redactPreservingStructure(data, pathsToRedact) {
16295
+ const pathSet = new Set(pathsToRedact);
16296
+ const redactValue = (value, currentPath) => {
16297
+ if (pathSet.has(currentPath)) {
16298
+ if (typeof value === "string") return "[REDACTED]";
16299
+ else if (typeof value === "number") return 0;
16300
+ else if (typeof value === "boolean") return false;
16301
+ else if (value === null) return null;
16302
+ else if (Array.isArray(value)) return [];
16303
+ else if (typeof value === "object") return {};
16304
+ return "[REDACTED]";
16305
+ }
16306
+ if (Array.isArray(value)) return value.map((item, index) => redactValue(item, `${currentPath}[${index}]`));
16307
+ if (value !== null && typeof value === "object") {
16308
+ const result = {};
16309
+ for (const [key, val] of Object.entries(value)) result[key] = redactValue(val, currentPath ? `${currentPath}.${key}` : key);
16310
+ return result;
16311
+ }
16312
+ return value;
16313
+ };
16314
+ return redactValue(data, "");
16315
+ }
16316
+ /**
16317
+ * Simple text-based redaction (fallback)
16318
+ */
16319
+ redactText(text, detectionResult) {
16320
+ let redacted = text;
16321
+ const sortedDetections = [...detectionResult.detections].sort((a, b) => b.position[0] - a.position[0]);
16322
+ for (const detection of sortedDetections) {
16323
+ const [start, end] = detection.position;
16324
+ redacted = redacted.slice(0, start) + detection.placeholder + redacted.slice(end);
16325
+ }
16326
+ return redacted;
16327
+ }
16328
+ /**
16329
+ * Traverse JSON structure and call callback for each value
16330
+ */
16331
+ traverse(obj, path, options, callback, depth = 0) {
16332
+ if (depth > options.maxDepth) throw new Error(`[JsonProcessor] Maximum depth (${options.maxDepth}) exceeded`);
16333
+ if (obj === null || obj === void 0) return;
16334
+ if (Array.isArray(obj)) {
16335
+ obj.forEach((item, index) => {
16336
+ const itemPath = path ? `${path}[${index}]` : `[${index}]`;
16337
+ if (this.isPrimitive(item)) callback(itemPath, item);
16338
+ this.traverse(item, itemPath, options, callback, depth + 1);
16339
+ });
16340
+ return;
16341
+ }
16342
+ if (typeof obj === "object") {
16343
+ for (const [key, value] of Object.entries(obj)) {
16344
+ const valuePath = path ? `${path}.${key}` : key;
16345
+ if (this.isPrimitive(value)) callback(valuePath, value, key);
16346
+ this.traverse(value, valuePath, options, callback, depth + 1);
16347
+ }
16348
+ return;
16349
+ }
16350
+ if (this.isPrimitive(obj)) callback(path, obj);
16351
+ }
16352
+ /**
16353
+ * Check if value is primitive (string, number, boolean)
16354
+ */
16355
+ isPrimitive(value) {
16356
+ return typeof value === "string" || typeof value === "number" || typeof value === "boolean";
16357
+ }
16358
+ /**
16359
+ * Check if path should be skipped
16360
+ */
16361
+ shouldSkip(path, skipPaths) {
16362
+ return skipPaths.some((skipPath) => {
16363
+ if (path === skipPath) return true;
16364
+ return new RegExp("^" + skipPath.replace(/\*/g, "[^.]+") + "$").test(path);
16365
+ });
16366
+ }
16367
+ /**
16368
+ * Check if path should always be redacted
16369
+ */
16370
+ shouldAlwaysRedact(path, alwaysRedact) {
16371
+ return alwaysRedact.some((redactPath) => {
16372
+ if (path === redactPath) return true;
16373
+ return new RegExp("^" + redactPath.replace(/\*/g, "[^.]+") + "$").test(path);
16374
+ });
16375
+ }
16376
+ /**
16377
+ * Boost confidence if key name indicates PII
16378
+ */
16379
+ boostConfidenceFromKey(detections, key, piiIndicatorKeys) {
16380
+ if (!key) return detections;
16381
+ const keyLower = key.toLowerCase();
16382
+ if (!piiIndicatorKeys.some((indicator) => keyLower.includes(indicator.toLowerCase()))) return detections;
16383
+ return detections.map((detection) => ({
16384
+ ...detection,
16385
+ confidence: Math.min(1, (detection.confidence || .5) * 1.2)
16386
+ }));
16387
+ }
16388
+ /**
16389
+ * Extract all text values from JSON for simple text-based detection
16390
+ */
16391
+ extractText(data, options) {
16392
+ const opts = {
16393
+ ...this.defaultOptions,
16394
+ ...options
16395
+ };
16396
+ const textParts = [];
16397
+ this.traverse(data, "", opts, (_path, value, key) => {
16398
+ if (opts.scanKeys && key) textParts.push(key);
16399
+ if (typeof value === "string") textParts.push(value);
16400
+ });
16401
+ return textParts.join(" ");
16402
+ }
16403
+ /**
16404
+ * Validate JSON buffer/string
16405
+ */
16406
+ isValid(input) {
16407
+ try {
16408
+ this.parse(input);
16409
+ return true;
16410
+ } catch {
16411
+ return false;
16412
+ }
16413
+ }
16414
+ /**
16415
+ * Get JSON Lines (JSONL) support - split by newlines and parse each line
16416
+ */
16417
+ parseJsonLines(input) {
16418
+ return (typeof input === "string" ? input : input.toString("utf-8")).split("\n").filter((line) => line.trim().length > 0).map((line, index) => {
16419
+ try {
16420
+ return JSON.parse(line);
16421
+ } catch (error) {
16422
+ throw new Error(`[JsonProcessor] Invalid JSON at line ${index + 1}: ${error.message}`);
16423
+ }
16424
+ });
16425
+ }
16426
+ /**
16427
+ * Detect PII in JSON Lines format
16428
+ */
16429
+ async detectJsonLines(input, detector, options) {
16430
+ const documents = this.parseJsonLines(input);
16431
+ return Promise.all(documents.map((doc) => this.detect(doc, detector, options)));
16432
+ }
16433
+ };
16434
+ }));
16435
+
16436
+ //#endregion
16437
+ //#region src/document/CsvProcessor.ts
16438
+ /**
16439
+ * Create a CSV processor instance
16440
+ */
16441
+ function createCsvProcessor() {
16442
+ return new CsvProcessor();
16443
+ }
16444
+ var CsvProcessor;
16445
+ var init_CsvProcessor = __esmMin((() => {
16446
+ CsvProcessor = class {
16447
+ constructor() {
16448
+ this.defaultOptions = {
16449
+ quote: "\"",
16450
+ escape: "\"",
16451
+ skipEmptyLines: true,
16452
+ piiIndicatorNames: [
16453
+ "email",
16454
+ "e-mail",
16455
+ "mail",
16456
+ "email_address",
16457
+ "phone",
16458
+ "tel",
16459
+ "telephone",
16460
+ "mobile",
16461
+ "phone_number",
16462
+ "ssn",
16463
+ "social_security",
16464
+ "social_security_number",
16465
+ "address",
16466
+ "street",
16467
+ "street_address",
16468
+ "city",
16469
+ "zip",
16470
+ "zipcode",
16471
+ "postal",
16472
+ "postcode",
16473
+ "name",
16474
+ "firstname",
16475
+ "first_name",
16476
+ "lastname",
16477
+ "last_name",
16478
+ "fullname",
16479
+ "full_name",
16480
+ "password",
16481
+ "pwd",
16482
+ "secret",
16483
+ "token",
16484
+ "api_key",
16485
+ "card",
16486
+ "credit_card",
16487
+ "creditcard",
16488
+ "card_number",
16489
+ "account",
16490
+ "account_number",
16491
+ "iban",
16492
+ "swift",
16493
+ "passport",
16494
+ "passport_number",
16495
+ "license",
16496
+ "licence",
16497
+ "driver_license",
16498
+ "dob",
16499
+ "date_of_birth",
16500
+ "birth_date",
16501
+ "birthdate"
16502
+ ],
16503
+ treatFirstRowAsHeader: true
16504
+ };
16505
+ }
16506
+ /**
16507
+ * Parse CSV from buffer or string
16508
+ */
16509
+ parse(input, options) {
16510
+ const opts = {
16511
+ ...this.defaultOptions,
16512
+ ...options
16513
+ };
16514
+ const text = typeof input === "string" ? input : input.toString("utf-8");
16515
+ const delimiter = opts.delimiter || this.detectDelimiter(text);
16516
+ const lines = text.split(/\r?\n/);
16517
+ const rows = [];
16518
+ let rowIndex = 0;
16519
+ for (let i = 0; i < lines.length; i++) {
16520
+ const line = lines[i];
16521
+ if (opts.skipEmptyLines && line.trim().length === 0) continue;
16522
+ if (opts.maxRows !== void 0 && rowIndex >= opts.maxRows) break;
16523
+ const values = this.parseRow(line, delimiter, opts.quote, opts.escape);
16524
+ rows.push({
16525
+ index: rowIndex,
16526
+ values
16527
+ });
16528
+ rowIndex++;
16529
+ }
16530
+ return rows;
16531
+ }
16532
+ /**
16533
+ * Detect PII in CSV data
16534
+ */
16535
+ async detect(input, detector, options) {
16536
+ const opts = {
16537
+ ...this.defaultOptions,
16538
+ ...options
16539
+ };
16540
+ const rows = this.parse(input, options);
16541
+ if (rows.length === 0) {
16542
+ const original = typeof input === "string" ? input : input.toString("utf-8");
16543
+ return {
16544
+ original,
16545
+ redacted: original,
16546
+ detections: [],
16547
+ redactionMap: {},
16548
+ stats: { piiCount: 0 },
16549
+ rowCount: 0,
16550
+ columnCount: 0,
16551
+ columnStats: {},
16552
+ matchesByCell: []
16553
+ };
16554
+ }
16555
+ const hasHeader = opts.hasHeader !== void 0 ? opts.hasHeader : this.detectHeader(rows);
16556
+ const headers = hasHeader && rows.length > 0 ? rows[0].values : void 0;
16557
+ const dataRows = hasHeader ? rows.slice(1) : rows;
16558
+ const columnCount = rows[0].values.length;
16559
+ const columnNameToIndex = /* @__PURE__ */ new Map();
16560
+ if (headers) headers.forEach((header, index) => {
16561
+ columnNameToIndex.set(header.toLowerCase().trim(), index);
16562
+ });
16563
+ const alwaysRedactCols = new Set(opts.alwaysRedactColumns || []);
16564
+ if (opts.alwaysRedactColumnNames && headers) opts.alwaysRedactColumnNames.forEach((name) => {
16565
+ const index = columnNameToIndex.get(name.toLowerCase().trim());
16566
+ if (index !== void 0) alwaysRedactCols.add(index);
16567
+ });
16568
+ const skipCols = new Set(opts.skipColumns || []);
16569
+ const columnStats = {};
16570
+ const matchesByCell = [];
16571
+ const allDetections = [];
16572
+ for (let col = 0; col < columnCount; col++) columnStats[col] = {
16573
+ columnIndex: col,
16574
+ columnName: headers?.[col],
16575
+ piiCount: 0,
16576
+ piiPercentage: 0,
16577
+ piiTypes: []
16578
+ };
16579
+ for (const row of dataRows) for (let col = 0; col < row.values.length; col++) {
16580
+ if (skipCols.has(col)) continue;
16581
+ const cellValue = row.values[col];
16582
+ if (alwaysRedactCols.has(col)) {
16583
+ const detection = {
16584
+ type: "SENSITIVE_COLUMN",
16585
+ value: cellValue,
16586
+ placeholder: `[SENSITIVE_COLUMN_${col}]`,
16587
+ position: [0, cellValue.length],
16588
+ severity: "high",
16589
+ confidence: 1
16590
+ };
16591
+ matchesByCell.push({
16592
+ row: row.index,
16593
+ column: col,
16594
+ columnName: headers?.[col],
16595
+ value: cellValue,
16596
+ matches: [detection]
16597
+ });
16598
+ allDetections.push(detection);
16599
+ columnStats[col].piiCount++;
16600
+ continue;
16601
+ }
16602
+ const result = await detector.detect(cellValue);
16603
+ if (result.detections.length > 0) {
16604
+ const boostedDetections = this.boostConfidenceFromColumnName(result.detections, headers?.[col], opts.piiIndicatorNames || []);
16605
+ matchesByCell.push({
16606
+ row: row.index,
16607
+ column: col,
16608
+ columnName: headers?.[col],
16609
+ value: cellValue,
16610
+ matches: boostedDetections
16611
+ });
16612
+ allDetections.push(...boostedDetections);
16613
+ columnStats[col].piiCount += boostedDetections.length;
16614
+ const columnTypes = new Set(columnStats[col].piiTypes);
16615
+ boostedDetections.forEach((d) => columnTypes.add(d.type));
16616
+ columnStats[col].piiTypes = Array.from(columnTypes);
16617
+ }
16618
+ }
16619
+ for (let col = 0; col < columnCount; col++) {
16620
+ const rowsWithPii = matchesByCell.filter((m) => m.column === col).length;
16621
+ columnStats[col].piiPercentage = dataRows.length > 0 ? rowsWithPii / dataRows.length * 100 : 0;
16622
+ }
16623
+ const original = typeof input === "string" ? input : input.toString("utf-8");
16624
+ const redacted = this.redact(original, {
16625
+ original,
16626
+ redacted: original,
16627
+ detections: allDetections,
16628
+ redactionMap: {},
16629
+ stats: { piiCount: allDetections.length },
16630
+ rowCount: dataRows.length,
16631
+ columnCount,
16632
+ headers,
16633
+ columnStats,
16634
+ matchesByCell
16635
+ }, opts);
16636
+ const redactionMap = {};
16637
+ allDetections.forEach((det) => {
16638
+ redactionMap[det.placeholder] = det.value;
16639
+ });
16640
+ return {
16641
+ original,
16642
+ redacted,
16643
+ detections: allDetections,
16644
+ redactionMap,
16645
+ stats: { piiCount: allDetections.length },
16646
+ rowCount: dataRows.length,
16647
+ columnCount,
16648
+ headers: headers?.filter((h) => h !== void 0),
16649
+ columnStats,
16650
+ matchesByCell
16651
+ };
16652
+ }
16653
+ /**
16654
+ * Redact PII in CSV data
16655
+ */
16656
+ redact(input, detectionResult, options) {
16657
+ const opts = {
16658
+ ...this.defaultOptions,
16659
+ ...options
16660
+ };
16661
+ const rows = this.parse(input, options);
16662
+ if (rows.length === 0) return "";
16663
+ const delimiter = opts.delimiter || this.detectDelimiter(typeof input === "string" ? input : input.toString("utf-8"));
16664
+ const hasHeader = detectionResult.headers !== void 0;
16665
+ const redactionMap = /* @__PURE__ */ new Map();
16666
+ for (const cellMatch of detectionResult.matchesByCell) {
16667
+ if (!redactionMap.has(cellMatch.row)) redactionMap.set(cellMatch.row, /* @__PURE__ */ new Map());
16668
+ redactionMap.get(cellMatch.row).set(cellMatch.column, "[REDACTED]");
16669
+ }
16670
+ const outputRows = [];
16671
+ for (let i = 0; i < rows.length; i++) {
16672
+ const row = rows[i];
16673
+ if (hasHeader && i === 0) outputRows.push(this.formatRow(row.values, delimiter, opts.quote));
16674
+ else {
16675
+ const rowIndex = hasHeader ? i - 1 : i;
16676
+ const redactedValues = row.values.map((value, colIndex) => {
16677
+ return redactionMap.get(rowIndex)?.get(colIndex) || value;
16678
+ });
16679
+ outputRows.push(this.formatRow(redactedValues, delimiter, opts.quote));
16680
+ }
16681
+ }
16682
+ return outputRows.join("\n");
16683
+ }
16684
+ /**
16685
+ * Parse a single CSV row
16686
+ */
16687
+ parseRow(line, delimiter, quote, _escape) {
16688
+ const values = [];
16689
+ let current = "";
16690
+ let inQuotes = false;
16691
+ let i = 0;
16692
+ while (i < line.length) {
16693
+ const char = line[i];
16694
+ const nextChar = line[i + 1];
16695
+ if (char === quote) if (inQuotes && nextChar === quote) {
16696
+ current += quote;
16697
+ i += 2;
16698
+ } else {
16699
+ inQuotes = !inQuotes;
16700
+ i++;
16701
+ }
16702
+ else if (char === delimiter && !inQuotes) {
16703
+ values.push(current);
16704
+ current = "";
16705
+ i++;
16706
+ } else {
16707
+ current += char;
16708
+ i++;
16709
+ }
16710
+ }
16711
+ values.push(current);
16712
+ return values;
16713
+ }
16714
+ /**
16715
+ * Format a row as CSV
16716
+ */
16717
+ formatRow(values, delimiter, quote) {
16718
+ return values.map((value) => {
16719
+ if (value.includes(delimiter) || value.includes(quote) || value.includes("\n")) return `${quote}${value.replace(new RegExp(quote, "g"), quote + quote)}${quote}`;
16720
+ return value;
16721
+ }).join(delimiter);
16722
+ }
16723
+ /**
16724
+ * Auto-detect CSV delimiter
16725
+ */
16726
+ detectDelimiter(text) {
16727
+ const delimiters = [
16728
+ ",",
16729
+ " ",
16730
+ ";",
16731
+ "|"
16732
+ ];
16733
+ const lines = text.split(/\r?\n/).slice(0, 5);
16734
+ let bestDelimiter = ",";
16735
+ let bestScore = 0;
16736
+ for (const delimiter of delimiters) {
16737
+ const counts = lines.map((line) => {
16738
+ let count = 0;
16739
+ let inQuotes = false;
16740
+ for (const char of line) {
16741
+ if (char === "\"") inQuotes = !inQuotes;
16742
+ if (char === delimiter && !inQuotes) count++;
16743
+ }
16744
+ return count;
16745
+ });
16746
+ if (counts.length > 0 && counts[0] > 0) {
16747
+ const avg = counts.reduce((a, b) => a + b, 0) / counts.length;
16748
+ const score = avg / (counts.reduce((sum, c) => sum + Math.pow(c - avg, 2), 0) / counts.length + 1);
16749
+ if (score > bestScore) {
16750
+ bestScore = score;
16751
+ bestDelimiter = delimiter;
16752
+ }
16753
+ }
16754
+ }
16755
+ return bestDelimiter;
16756
+ }
16757
+ /**
16758
+ * Detect if first row is likely a header
16759
+ */
16760
+ detectHeader(rows) {
16761
+ if (rows.length < 2) return false;
16762
+ const firstRow = rows[0].values;
16763
+ const secondRow = rows[1].values;
16764
+ if (firstRow.reduce((sum, v) => sum + v.length, 0) / firstRow.length > secondRow.reduce((sum, v) => sum + v.length, 0) / secondRow.length * 1.5) return false;
16765
+ const firstRowNumeric = firstRow.filter((v) => !isNaN(Number(v)) && v.trim() !== "").length;
16766
+ return firstRow.length - firstRowNumeric >= firstRowNumeric;
16767
+ }
16768
+ /**
16769
+ * Boost confidence if column name indicates PII
16770
+ */
16771
+ boostConfidenceFromColumnName(detections, columnName, piiIndicatorNames) {
16772
+ if (!columnName) return detections;
16773
+ const nameLower = columnName.toLowerCase().trim();
16774
+ if (!piiIndicatorNames.some((indicator) => nameLower.includes(indicator.toLowerCase()))) return detections;
16775
+ return detections.map((detection) => ({
16776
+ ...detection,
16777
+ confidence: Math.min(1, (detection.confidence || .5) * 1.2)
16778
+ }));
16779
+ }
16780
+ /**
16781
+ * Extract all cell values as text
16782
+ */
16783
+ extractText(input, options) {
16784
+ const rows = this.parse(input, options);
16785
+ const textParts = [];
16786
+ for (const row of rows) for (const value of row.values) if (value.trim().length > 0) textParts.push(value);
16787
+ return textParts.join(" ");
16788
+ }
16789
+ /**
16790
+ * Get column statistics without full PII detection
16791
+ */
16792
+ getColumnInfo(input, options) {
16793
+ const rows = this.parse(input, options);
16794
+ if (rows.length === 0) return {
16795
+ columnCount: 0,
16796
+ rowCount: 0,
16797
+ sampleRows: []
16798
+ };
16799
+ const opts = {
16800
+ ...this.defaultOptions,
16801
+ ...options
16802
+ };
16803
+ const hasHeader = opts.hasHeader !== void 0 ? opts.hasHeader : this.detectHeader(rows);
16804
+ const headers = hasHeader && rows.length > 0 ? rows[0].values : void 0;
16805
+ const dataRows = hasHeader ? rows.slice(1) : rows;
16806
+ const sampleRows = dataRows.slice(0, 5).map((r) => r.values);
16807
+ return {
16808
+ columnCount: rows[0].values.length,
16809
+ rowCount: dataRows.length,
16810
+ headers,
16811
+ sampleRows
16812
+ };
16813
+ }
16814
+ };
16815
+ }));
16816
+
16817
+ //#endregion
16818
+ //#region src/document/XlsxProcessor.ts
16819
+ /**
16820
+ * Create an XLSX processor instance
16821
+ */
16822
+ function createXlsxProcessor() {
16823
+ return new XlsxProcessor();
16824
+ }
16825
+ var XlsxProcessor;
16826
+ var init_XlsxProcessor = __esmMin((() => {
16827
+ XlsxProcessor = class {
16828
+ constructor() {
16829
+ this.defaultOptions = {
16830
+ piiIndicatorNames: [
16831
+ "email",
16832
+ "e-mail",
16833
+ "mail",
16834
+ "email_address",
16835
+ "phone",
16836
+ "tel",
16837
+ "telephone",
16838
+ "mobile",
16839
+ "phone_number",
16840
+ "ssn",
16841
+ "social_security",
16842
+ "social_security_number",
16843
+ "address",
16844
+ "street",
16845
+ "street_address",
16846
+ "city",
16847
+ "zip",
16848
+ "zipcode",
16849
+ "postal",
16850
+ "postcode",
16851
+ "name",
16852
+ "firstname",
16853
+ "first_name",
16854
+ "lastname",
16855
+ "last_name",
16856
+ "fullname",
16857
+ "full_name",
16858
+ "password",
16859
+ "pwd",
16860
+ "secret",
16861
+ "token",
16862
+ "api_key",
16863
+ "card",
16864
+ "credit_card",
16865
+ "creditcard",
16866
+ "card_number",
16867
+ "account",
16868
+ "account_number",
16869
+ "iban",
16870
+ "swift",
16871
+ "passport",
16872
+ "passport_number",
16873
+ "license",
16874
+ "licence",
16875
+ "driver_license",
16876
+ "dob",
16877
+ "date_of_birth",
16878
+ "birth_date",
16879
+ "birthdate"
16880
+ ],
16881
+ preserveFormatting: true,
16882
+ preserveFormulas: true
16883
+ };
16884
+ try {
16885
+ this.xlsx = require("xlsx");
16886
+ } catch {}
16887
+ }
16888
+ /**
16889
+ * Check if XLSX support is available
16890
+ */
16891
+ isAvailable() {
16892
+ return !!this.xlsx;
16893
+ }
16894
+ /**
16895
+ * Parse XLSX from buffer
16896
+ */
16897
+ parse(buffer) {
16898
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
16899
+ try {
16900
+ return this.xlsx.read(buffer, {
16901
+ type: "buffer",
16902
+ cellFormula: true,
16903
+ cellStyles: true
16904
+ });
16905
+ } catch (error) {
16906
+ throw new Error(`[XlsxProcessor] Failed to parse XLSX: ${error.message}`);
16907
+ }
16908
+ }
16909
+ /**
16910
+ * Detect PII in XLSX data
16911
+ */
16912
+ async detect(buffer, detector, options) {
16913
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
16914
+ const opts = {
16915
+ ...this.defaultOptions,
16916
+ ...options
16917
+ };
16918
+ const workbook = this.parse(buffer);
16919
+ const sheetNames = this.getSheetNamesToProcess(workbook, opts);
16920
+ const sheetResults = [];
16921
+ const allDetections = [];
16922
+ const allTypes = /* @__PURE__ */ new Set();
16923
+ for (let sheetIndex = 0; sheetIndex < sheetNames.length; sheetIndex++) {
16924
+ const sheetName = sheetNames[sheetIndex];
16925
+ const sheet = workbook.Sheets[sheetName];
16926
+ const sheetResult = await this.detectSheet(sheet, sheetName, sheetIndex, detector, opts);
16927
+ sheetResults.push(sheetResult);
16928
+ allDetections.push(...sheetResult.matchesByCell.flatMap((c) => c.matches));
16929
+ sheetResult.matchesByCell.forEach((cell) => {
16930
+ cell.matches.forEach((det) => allTypes.add(det.type));
16931
+ });
16932
+ }
16933
+ const original = this.extractText(buffer, options);
16934
+ const redactedBuffer = this.redact(buffer, {
16935
+ original,
16936
+ redacted: original,
16937
+ detections: allDetections,
16938
+ redactionMap: {},
16939
+ stats: { piiCount: allDetections.length },
16940
+ sheetResults,
16941
+ sheetCount: sheetResults.length
16942
+ }, options);
16943
+ const redacted = this.extractText(redactedBuffer, options);
16944
+ const redactionMap = {};
16945
+ allDetections.forEach((det) => {
16946
+ redactionMap[det.placeholder] = det.value;
16947
+ });
16948
+ return {
16949
+ original,
16950
+ redacted,
16951
+ detections: allDetections,
16952
+ redactionMap,
16953
+ stats: { piiCount: allDetections.length },
16954
+ sheetResults,
16955
+ sheetCount: sheetResults.length
16956
+ };
16957
+ }
16958
+ /**
16959
+ * Detect PII in a single sheet
16960
+ */
16961
+ async detectSheet(sheet, sheetName, sheetIndex, detector, options) {
16962
+ const range = this.xlsx.utils.decode_range(sheet["!ref"] || "A1");
16963
+ const startRow = range.s.r;
16964
+ const endRow = options.maxRows !== void 0 ? Math.min(range.e.r, startRow + options.maxRows - 1) : range.e.r;
16965
+ const startCol = range.s.c;
16966
+ const endCol = range.e.c;
16967
+ const columnCount = endCol - startCol + 1;
16968
+ const hasHeader = options.hasHeader !== void 0 ? options.hasHeader : this.detectHeader(sheet, range);
16969
+ const headers = hasHeader ? this.getRowValues(sheet, startRow, startCol, endCol) : void 0;
16970
+ const dataStartRow = hasHeader ? startRow + 1 : startRow;
16971
+ const columnNameToIndex = /* @__PURE__ */ new Map();
16972
+ if (headers) headers.forEach((header, index) => {
16973
+ if (header) columnNameToIndex.set(header.toLowerCase().trim(), index);
16974
+ });
16975
+ const alwaysRedactCols = new Set(options.alwaysRedactColumns || []);
16976
+ if (options.alwaysRedactColumnNames && headers) options.alwaysRedactColumnNames.forEach((name) => {
16977
+ const index = columnNameToIndex.get(name.toLowerCase().trim());
16978
+ if (index !== void 0) alwaysRedactCols.add(index);
16979
+ });
16980
+ const skipCols = new Set(options.skipColumns || []);
16981
+ const columnStats = {};
16982
+ for (let col = 0; col <= endCol - startCol; col++) columnStats[col] = {
16983
+ columnIndex: col,
16984
+ columnLetter: this.columnToLetter(col),
16985
+ columnName: headers?.[col],
16986
+ piiCount: 0,
16987
+ piiPercentage: 0,
16988
+ piiTypes: []
16989
+ };
16990
+ const matchesByCell = [];
16991
+ for (let row = dataStartRow; row <= endRow; row++) for (let col = startCol; col <= endCol; col++) {
16992
+ const colIndex = col - startCol;
16993
+ if (skipCols.has(colIndex)) continue;
16994
+ const cellRef = this.xlsx.utils.encode_cell({
16995
+ r: row,
16996
+ c: col
16997
+ });
16998
+ const cell = sheet[cellRef];
16999
+ if (!cell) continue;
17000
+ const cellValue = this.getCellValue(cell);
17001
+ if (!cellValue) continue;
17002
+ const cellFormula = cell.f;
17003
+ if (alwaysRedactCols.has(colIndex)) {
17004
+ const detection = {
17005
+ type: "SENSITIVE_COLUMN",
17006
+ value: cellValue,
17007
+ placeholder: `[SENSITIVE_COLUMN_${colIndex}]`,
17008
+ position: [0, cellValue.length],
17009
+ severity: "high",
17010
+ confidence: 1
17011
+ };
17012
+ matchesByCell.push({
17013
+ cell: cellRef,
17014
+ row: row + 1,
17015
+ column: colIndex,
17016
+ columnLetter: this.columnToLetter(colIndex),
17017
+ columnName: headers?.[colIndex],
17018
+ value: cellValue,
17019
+ formula: cellFormula,
17020
+ matches: [detection]
17021
+ });
17022
+ columnStats[colIndex].piiCount++;
17023
+ continue;
17024
+ }
17025
+ const result = await detector.detect(cellValue);
17026
+ if (result.detections.length > 0) {
17027
+ const boostedDetections = this.boostConfidenceFromColumnName(result.detections, headers?.[colIndex], options.piiIndicatorNames || []);
17028
+ matchesByCell.push({
17029
+ cell: cellRef,
17030
+ row: row + 1,
17031
+ column: colIndex,
17032
+ columnLetter: this.columnToLetter(colIndex),
17033
+ columnName: headers?.[colIndex],
17034
+ value: cellValue,
17035
+ formula: cellFormula,
17036
+ matches: boostedDetections
17037
+ });
17038
+ columnStats[colIndex].piiCount += boostedDetections.length;
17039
+ const columnTypes = new Set(columnStats[colIndex].piiTypes);
17040
+ boostedDetections.forEach((d) => columnTypes.add(d.type));
17041
+ columnStats[colIndex].piiTypes = Array.from(columnTypes);
17042
+ }
17043
+ }
17044
+ const dataRowCount = endRow - dataStartRow + 1;
17045
+ for (let col = 0; col <= endCol - startCol; col++) {
17046
+ const rowsWithPii = matchesByCell.filter((m) => m.column === col).length;
17047
+ columnStats[col].piiPercentage = dataRowCount > 0 ? rowsWithPii / dataRowCount * 100 : 0;
17048
+ }
17049
+ return {
17050
+ sheetName,
17051
+ sheetIndex,
17052
+ rowCount: dataRowCount,
17053
+ columnCount,
17054
+ headers: headers?.filter((h) => h !== void 0),
17055
+ columnStats,
17056
+ matchesByCell
17057
+ };
17058
+ }
17059
+ /**
17060
+ * Redact PII in XLSX data
17061
+ */
17062
+ redact(buffer, detectionResult, options) {
17063
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
17064
+ const opts = {
17065
+ ...this.defaultOptions,
17066
+ ...options
17067
+ };
17068
+ const workbook = this.parse(buffer);
17069
+ for (const sheetResult of detectionResult.sheetResults) {
17070
+ const sheet = workbook.Sheets[sheetResult.sheetName];
17071
+ for (const cellMatch of sheetResult.matchesByCell) {
17072
+ const cell = sheet[cellMatch.cell];
17073
+ if (!cell) continue;
17074
+ cell.v = "[REDACTED]";
17075
+ cell.w = "[REDACTED]";
17076
+ if (!opts.preserveFormulas) delete cell.f;
17077
+ cell.t = "s";
17078
+ }
17079
+ }
17080
+ return this.xlsx.write(workbook, {
17081
+ type: "buffer",
17082
+ bookType: "xlsx"
17083
+ });
17084
+ }
17085
+ /**
17086
+ * Get cell value as string
17087
+ */
17088
+ getCellValue(cell) {
17089
+ if (!cell) return "";
17090
+ if (cell.w !== void 0) return String(cell.w);
17091
+ if (cell.v !== void 0) return String(cell.v);
17092
+ return "";
17093
+ }
17094
+ /**
17095
+ * Get row values
17096
+ */
17097
+ getRowValues(sheet, row, startCol, endCol) {
17098
+ const values = [];
17099
+ for (let col = startCol; col <= endCol; col++) {
17100
+ const cell = sheet[this.xlsx.utils.encode_cell({
17101
+ r: row,
17102
+ c: col
17103
+ })];
17104
+ values.push(cell ? this.getCellValue(cell) : void 0);
17105
+ }
17106
+ return values;
17107
+ }
17108
+ /**
17109
+ * Detect if first row is likely a header
17110
+ */
17111
+ detectHeader(sheet, range) {
17112
+ const firstRow = this.getRowValues(sheet, range.s.r, range.s.c, range.e.c);
17113
+ const secondRow = range.s.r + 1 <= range.e.r ? this.getRowValues(sheet, range.s.r + 1, range.s.c, range.e.c) : null;
17114
+ if (!secondRow) return false;
17115
+ const firstRowValues = firstRow.filter((v) => v !== void 0);
17116
+ const secondRowValues = secondRow.filter((v) => v !== void 0);
17117
+ if (firstRowValues.length === 0 || secondRowValues.length === 0) return false;
17118
+ if (firstRowValues.reduce((sum, v) => sum + v.length, 0) / firstRowValues.length > secondRowValues.reduce((sum, v) => sum + v.length, 0) / secondRowValues.length * 1.5) return false;
17119
+ const firstRowNumeric = firstRowValues.filter((v) => !isNaN(Number(v)) && v.trim() !== "").length;
17120
+ return firstRowValues.length - firstRowNumeric >= firstRowNumeric;
17121
+ }
17122
+ /**
17123
+ * Convert column index to letter (0 = A, 25 = Z, 26 = AA)
17124
+ */
17125
+ columnToLetter(col) {
17126
+ let letter = "";
17127
+ while (col >= 0) {
17128
+ letter = String.fromCharCode(col % 26 + 65) + letter;
17129
+ col = Math.floor(col / 26) - 1;
17130
+ }
17131
+ return letter;
17132
+ }
17133
+ /**
17134
+ * Get sheet names to process based on options
17135
+ */
17136
+ getSheetNamesToProcess(workbook, options) {
17137
+ const allSheetNames = workbook.SheetNames;
17138
+ if (options.sheets && options.sheets.length > 0) return options.sheets.filter((name) => allSheetNames.includes(name));
17139
+ if (options.sheetIndices && options.sheetIndices.length > 0) return options.sheetIndices.filter((index) => index >= 0 && index < allSheetNames.length).map((index) => allSheetNames[index]);
17140
+ return allSheetNames;
17141
+ }
17142
+ /**
17143
+ * Boost confidence if column name indicates PII
17144
+ */
17145
+ boostConfidenceFromColumnName(detections, columnName, piiIndicatorNames) {
17146
+ if (!columnName) return detections;
17147
+ const nameLower = columnName.toLowerCase().trim();
17148
+ if (!piiIndicatorNames.some((indicator) => nameLower.includes(indicator.toLowerCase()))) return detections;
17149
+ return detections.map((detection) => ({
17150
+ ...detection,
17151
+ confidence: Math.min(1, (detection.confidence || .5) * 1.2)
17152
+ }));
17153
+ }
17154
+ /**
17155
+ * Extract all cell values as text
17156
+ */
17157
+ extractText(buffer, options) {
17158
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
17159
+ const workbook = this.parse(buffer);
17160
+ const opts = {
17161
+ ...this.defaultOptions,
17162
+ ...options
17163
+ };
17164
+ const sheetNames = this.getSheetNamesToProcess(workbook, opts);
17165
+ const textParts = [];
17166
+ for (const sheetName of sheetNames) {
17167
+ const sheet = workbook.Sheets[sheetName];
17168
+ const range = this.xlsx.utils.decode_range(sheet["!ref"] || "A1");
17169
+ for (let row = range.s.r; row <= range.e.r; row++) for (let col = range.s.c; col <= range.e.c; col++) {
17170
+ const cell = sheet[this.xlsx.utils.encode_cell({
17171
+ r: row,
17172
+ c: col
17173
+ })];
17174
+ if (cell) {
17175
+ const value = this.getCellValue(cell);
17176
+ if (value.trim().length > 0) textParts.push(value);
17177
+ }
17178
+ }
17179
+ }
17180
+ return textParts.join(" ");
17181
+ }
17182
+ /**
17183
+ * Get workbook metadata
17184
+ */
17185
+ getMetadata(buffer) {
17186
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
17187
+ const workbook = this.parse(buffer);
17188
+ return {
17189
+ sheetNames: workbook.SheetNames,
17190
+ sheetCount: workbook.SheetNames.length
17191
+ };
17192
+ }
17193
+ };
17194
+ }));
17195
+
17196
+ //#endregion
17197
+ //#region src/document/DocumentProcessor.ts
17198
+ /**
17199
+ * Create a document processor instance
17200
+ */
17201
+ function createDocumentProcessor() {
17202
+ return new DocumentProcessor();
17203
+ }
17204
+ var DocumentProcessor;
17205
+ var init_DocumentProcessor = __esmMin((() => {
17206
+ init_OCRProcessor();
17207
+ init_JsonProcessor();
17208
+ init_CsvProcessor();
17209
+ init_XlsxProcessor();
17210
+ DocumentProcessor = class {
17211
+ constructor() {
17212
+ try {
17213
+ this.pdfParse = require("pdf-parse");
17214
+ } catch {}
17215
+ try {
17216
+ this.mammoth = require("mammoth");
17217
+ } catch {}
17218
+ this.ocrProcessor = new OCRProcessor();
17219
+ this.jsonProcessor = new JsonProcessor();
17220
+ this.csvProcessor = new CsvProcessor();
17221
+ this.xlsxProcessor = new XlsxProcessor();
17222
+ }
17223
+ /**
17224
+ * Extract text from document buffer
17225
+ */
17226
+ async extractText(buffer, options) {
17227
+ const format = options?.format || this.detectFormat(buffer);
17228
+ if (!format) throw new Error("[DocumentProcessor] Unable to detect document format. Supported: PDF, DOCX, TXT, images (with OCR)");
17229
+ const maxSize = options?.maxSize || 50 * 1024 * 1024;
17230
+ if (buffer.length > maxSize) throw new Error(`[DocumentProcessor] Document size (${buffer.length} bytes) exceeds maximum (${maxSize} bytes)`);
17231
+ switch (format) {
17232
+ case "pdf": return this.extractPdfText(buffer, options);
17233
+ case "docx": return this.extractDocxText(buffer, options);
17234
+ case "txt": return buffer.toString("utf-8");
17235
+ case "image": return this.extractImageText(buffer, options);
17236
+ case "json": return this.extractJsonText(buffer, options);
17237
+ case "csv": return this.extractCsvText(buffer, options);
17238
+ case "xlsx": return this.extractXlsxText(buffer, options);
17239
+ default: throw new Error(`[DocumentProcessor] Unsupported format: ${format}`);
17240
+ }
17241
+ }
17242
+ /**
17243
+ * Get document metadata
17244
+ */
17245
+ async getMetadata(buffer, options) {
17246
+ const format = options?.format || this.detectFormat(buffer);
17247
+ if (!format) throw new Error("[DocumentProcessor] Unable to detect document format");
17248
+ switch (format) {
17249
+ case "pdf": return this.getPdfMetadata(buffer, options);
17250
+ case "docx": return this.getDocxMetadata(buffer, options);
17251
+ case "txt": return {
17252
+ format: "txt",
17253
+ pages: void 0
17254
+ };
17255
+ case "image": return this.getImageMetadata(buffer, options);
17256
+ case "json": return this.getJsonMetadata(buffer, options);
17257
+ case "csv": return this.getCsvMetadata(buffer, options);
17258
+ case "xlsx": return this.getXlsxMetadata(buffer, options);
17259
+ default: throw new Error(`[DocumentProcessor] Unsupported format: ${format}`);
17260
+ }
17261
+ }
17262
+ /**
17263
+ * Detect document format from buffer
17264
+ */
17265
+ detectFormat(buffer) {
17266
+ if (buffer.length < 4) return null;
17267
+ if (buffer.toString("utf-8", 0, 4) === "%PDF") return "pdf";
17268
+ if (buffer.length >= 8 && buffer[0] === 137 && buffer[1] === 80 && buffer[2] === 78 && buffer[3] === 71) return "image";
17269
+ if (buffer[0] === 255 && buffer[1] === 216 && buffer[2] === 255) return "image";
17270
+ if (buffer[0] === 73 && buffer[1] === 73 && buffer[2] === 42 && buffer[3] === 0 || buffer[0] === 77 && buffer[1] === 77 && buffer[2] === 0 && buffer[3] === 42) return "image";
17271
+ if (buffer[0] === 66 && buffer[1] === 77) return "image";
17272
+ if (buffer.length >= 12 && buffer[0] === 82 && buffer[1] === 73 && buffer[2] === 70 && buffer[3] === 70 && buffer[8] === 87 && buffer[9] === 69 && buffer[10] === 66 && buffer[11] === 80) return "image";
17273
+ if (buffer[0] === 80 && buffer[1] === 75) {
17274
+ const zipHeader = buffer.toString("utf-8", 0, Math.min(500, buffer.length));
17275
+ if (zipHeader.includes("word/") || zipHeader.includes("[Content_Types].xml")) return "docx";
17276
+ if (zipHeader.includes("xl/")) return "xlsx";
17277
+ }
17278
+ const text = buffer.toString("utf-8");
17279
+ const trimmed = text.trim();
17280
+ if (trimmed.startsWith("{") && trimmed.endsWith("}") || trimmed.startsWith("[") && trimmed.endsWith("]")) {
17281
+ if (this.jsonProcessor.isValid(buffer)) return "json";
17282
+ }
17283
+ const lines = text.split(/\r?\n/).slice(0, 5);
17284
+ if (lines.length >= 2) for (const delimiter of [
17285
+ ",",
17286
+ " ",
17287
+ ";",
17288
+ "|"
17289
+ ]) {
17290
+ const counts = lines.map((line) => (line.match(new RegExp(delimiter, "g")) || []).length);
17291
+ if (counts[0] > 0 && counts.every((c) => c === counts[0])) return "csv";
17292
+ }
17293
+ const sample = buffer.slice(0, Math.min(1e3, buffer.length));
17294
+ if (sample.filter((byte) => byte < 32 && byte !== 9 && byte !== 10 && byte !== 13).length < sample.length * .1) return "txt";
17295
+ return null;
17296
+ }
17297
+ /**
17298
+ * Check if format is supported
17299
+ */
17300
+ isFormatSupported(format) {
17301
+ switch (format) {
17302
+ case "pdf": return !!this.pdfParse;
17303
+ case "docx": return !!this.mammoth;
17304
+ case "txt": return true;
17305
+ case "image": return this.ocrProcessor.isAvailable();
17306
+ case "json": return true;
17307
+ case "csv": return true;
17308
+ case "xlsx": return this.xlsxProcessor.isAvailable();
17309
+ default: return false;
17310
+ }
17311
+ }
17312
+ /**
17313
+ * Extract text from PDF
17314
+ */
17315
+ async extractPdfText(buffer, options) {
17316
+ if (!this.pdfParse) throw new Error("[DocumentProcessor] PDF support requires pdf-parse. Install with: npm install pdf-parse");
17317
+ try {
17318
+ const data = await this.pdfParse(buffer, {
17319
+ password: options?.password,
17320
+ max: options?.pages ? Math.max(...options.pages) : void 0
17321
+ });
17322
+ if (options?.pages) return data.text;
17323
+ return data.text || "";
17324
+ } catch (error) {
17325
+ throw new Error(`[DocumentProcessor] PDF extraction failed: ${error.message}`);
17326
+ }
17327
+ }
17328
+ /**
17329
+ * Extract text from DOCX
17330
+ */
17331
+ async extractDocxText(buffer, _options) {
17332
+ if (!this.mammoth) throw new Error("[DocumentProcessor] DOCX support requires mammoth. Install with: npm install mammoth");
17333
+ try {
17334
+ return (await this.mammoth.extractRawText({ buffer })).value || "";
17335
+ } catch (error) {
17336
+ throw new Error(`[DocumentProcessor] DOCX extraction failed: ${error.message}`);
17337
+ }
17338
+ }
17339
+ /**
17340
+ * Get PDF metadata
17341
+ */
17342
+ async getPdfMetadata(buffer, _options) {
17343
+ if (!this.pdfParse) throw new Error("[DocumentProcessor] PDF support requires pdf-parse. Install with: npm install pdf-parse");
17344
+ try {
17345
+ const data = await this.pdfParse(buffer, { password: _options?.password });
17346
+ return {
17347
+ format: "pdf",
17348
+ pages: data.numpages,
17349
+ title: data.info?.Title,
17350
+ author: data.info?.Author,
17351
+ creationDate: data.info?.CreationDate ? new Date(data.info.CreationDate) : void 0,
17352
+ modifiedDate: data.info?.ModDate ? new Date(data.info.ModDate) : void 0,
17353
+ custom: data.info
17354
+ };
17355
+ } catch (error) {
17356
+ throw new Error(`[DocumentProcessor] PDF metadata extraction failed: ${error.message}`);
17357
+ }
17358
+ }
17359
+ /**
17360
+ * Get DOCX metadata
17361
+ */
17362
+ async getDocxMetadata(_buffer, _options) {
17363
+ return {
17364
+ format: "docx",
17365
+ pages: void 0
17366
+ };
17367
+ }
17368
+ /**
17369
+ * Extract text from image using OCR
17370
+ */
17371
+ async extractImageText(buffer, options) {
17372
+ if (!this.ocrProcessor.isAvailable()) throw new Error("[DocumentProcessor] Image/OCR support requires tesseract.js. Install with: npm install tesseract.js");
17373
+ try {
17374
+ return (await this.ocrProcessor.recognizeText(buffer, options?.ocrOptions)).text;
17375
+ } catch (error) {
17376
+ throw new Error(`[DocumentProcessor] Image text extraction failed: ${error.message}`);
17377
+ }
17378
+ }
17379
+ /**
17380
+ * Get image metadata
17381
+ */
17382
+ async getImageMetadata(buffer, options) {
17383
+ if (!this.ocrProcessor.isAvailable()) return {
17384
+ format: "image",
17385
+ pages: void 0,
17386
+ usedOCR: false
17387
+ };
17388
+ try {
17389
+ return {
17390
+ format: "image",
17391
+ pages: void 0,
17392
+ usedOCR: true,
17393
+ ocrConfidence: (await this.ocrProcessor.recognizeText(buffer, options?.ocrOptions)).confidence
17394
+ };
17395
+ } catch {
17396
+ return {
17397
+ format: "image",
17398
+ pages: void 0,
17399
+ usedOCR: false
17400
+ };
17401
+ }
17402
+ }
17403
+ /**
17404
+ * Extract text from JSON
17405
+ */
17406
+ async extractJsonText(buffer, _options) {
17407
+ try {
17408
+ return this.jsonProcessor.extractText(buffer);
17409
+ } catch (error) {
17410
+ throw new Error(`[DocumentProcessor] JSON extraction failed: ${error.message}`);
17411
+ }
17412
+ }
17413
+ /**
17414
+ * Extract text from CSV
17415
+ */
17416
+ async extractCsvText(buffer, _options) {
17417
+ try {
17418
+ return this.csvProcessor.extractText(buffer);
17419
+ } catch (error) {
17420
+ throw new Error(`[DocumentProcessor] CSV extraction failed: ${error.message}`);
17421
+ }
17422
+ }
17423
+ /**
17424
+ * Extract text from XLSX
17425
+ */
17426
+ async extractXlsxText(buffer, _options) {
17427
+ if (!this.xlsxProcessor.isAvailable()) throw new Error("[DocumentProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
17428
+ try {
17429
+ return this.xlsxProcessor.extractText(buffer);
17430
+ } catch (error) {
17431
+ throw new Error(`[DocumentProcessor] XLSX extraction failed: ${error.message}`);
17432
+ }
17433
+ }
17434
+ /**
17435
+ * Get JSON metadata
17436
+ */
17437
+ async getJsonMetadata(buffer, _options) {
17438
+ try {
17439
+ const data = this.jsonProcessor.parse(buffer);
17440
+ const isArray = Array.isArray(data);
17441
+ return {
17442
+ format: "json",
17443
+ pages: void 0,
17444
+ custom: {
17445
+ isArray,
17446
+ itemCount: isArray ? data.length : Object.keys(data).length
17447
+ }
17448
+ };
17449
+ } catch {
17450
+ return {
17451
+ format: "json",
17452
+ pages: void 0
17453
+ };
17454
+ }
17455
+ }
17456
+ /**
17457
+ * Get CSV metadata
17458
+ */
17459
+ async getCsvMetadata(buffer, _options) {
17460
+ try {
17461
+ const info = this.csvProcessor.getColumnInfo(buffer);
17462
+ return {
17463
+ format: "csv",
17464
+ pages: void 0,
17465
+ custom: {
17466
+ rowCount: info.rowCount,
17467
+ columnCount: info.columnCount,
17468
+ headers: info.headers
17469
+ }
17470
+ };
17471
+ } catch {
17472
+ return {
17473
+ format: "csv",
17474
+ pages: void 0
17475
+ };
17476
+ }
17477
+ }
17478
+ /**
17479
+ * Get XLSX metadata
17480
+ */
17481
+ async getXlsxMetadata(buffer, _options) {
17482
+ if (!this.xlsxProcessor.isAvailable()) return {
17483
+ format: "xlsx",
17484
+ pages: void 0
17485
+ };
17486
+ try {
17487
+ const metadata = this.xlsxProcessor.getMetadata(buffer);
17488
+ return {
17489
+ format: "xlsx",
17490
+ pages: void 0,
17491
+ custom: {
17492
+ sheetNames: metadata.sheetNames,
17493
+ sheetCount: metadata.sheetCount
17494
+ }
17495
+ };
17496
+ } catch {
17497
+ return {
17498
+ format: "xlsx",
17499
+ pages: void 0
17500
+ };
17501
+ }
17502
+ }
17503
+ /**
17504
+ * Get OCR processor instance
17505
+ */
17506
+ getOCRProcessor() {
17507
+ return this.ocrProcessor;
17508
+ }
17509
+ /**
17510
+ * Get JSON processor instance
17511
+ */
17512
+ getJsonProcessor() {
17513
+ return this.jsonProcessor;
17514
+ }
17515
+ /**
17516
+ * Get CSV processor instance
17517
+ */
17518
+ getCsvProcessor() {
17519
+ return this.csvProcessor;
17520
+ }
17521
+ /**
17522
+ * Get XLSX processor instance
17523
+ */
17524
+ getXlsxProcessor() {
17525
+ return this.xlsxProcessor;
17526
+ }
17527
+ };
17528
+ }));
17529
+
17530
+ //#endregion
17531
+ //#region src/document/index.ts
17532
+ var document_exports = /* @__PURE__ */ __exportAll({
17533
+ CsvProcessor: () => CsvProcessor,
17534
+ DocumentProcessor: () => DocumentProcessor,
17535
+ JsonProcessor: () => JsonProcessor,
17536
+ OCRProcessor: () => OCRProcessor,
17537
+ XlsxProcessor: () => XlsxProcessor,
17538
+ createCsvProcessor: () => createCsvProcessor,
17539
+ createDocumentProcessor: () => createDocumentProcessor,
17540
+ createJsonProcessor: () => createJsonProcessor,
17541
+ createOCRProcessor: () => createOCRProcessor,
17542
+ createXlsxProcessor: () => createXlsxProcessor
17543
+ });
17544
+ var init_document = __esmMin((() => {
17545
+ init_DocumentProcessor();
17546
+ init_OCRProcessor();
17547
+ init_JsonProcessor();
17548
+ init_CsvProcessor();
17549
+ init_XlsxProcessor();
17550
+ }));
17551
+
17552
+ //#endregion
17553
+ //#region src/workers/WorkerPool.ts
17554
+ /**
17555
+ * Worker thread pool for parallel processing
17556
+ */
17557
+ /**
17558
+ * Create a worker pool instance
17559
+ */
17560
+ function createWorkerPool(config) {
17561
+ return new WorkerPool(config);
17562
+ }
17563
+ var WorkerPool;
17564
+ var init_WorkerPool = __esmMin((() => {
17565
+ WorkerPool = class {
17566
+ constructor(config = {}) {
17567
+ this.workers = [];
17568
+ this.availableWorkers = [];
17569
+ this.taskQueue = [];
17570
+ this.totalProcessingTime = 0;
17571
+ this.config = {
17572
+ numWorkers: config.numWorkers || (0, os.cpus)().length,
17573
+ maxQueueSize: config.maxQueueSize || 100,
17574
+ idleTimeout: config.idleTimeout || 3e4
17575
+ };
17576
+ this.stats = {
17577
+ activeWorkers: 0,
17578
+ idleWorkers: 0,
17579
+ queueSize: 0,
17580
+ totalProcessed: 0,
17581
+ totalErrors: 0,
17582
+ avgProcessingTime: 0
17583
+ };
17584
+ this.workerPath = (0, path.join)(__dirname, "worker.js");
17585
+ }
17586
+ /**
17587
+ * Initialize worker pool
17588
+ */
17589
+ async initialize() {
17590
+ for (let i = 0; i < this.config.numWorkers; i++) await this.createWorker();
17591
+ }
17592
+ /**
17593
+ * Create a new worker
17594
+ */
17595
+ async createWorker() {
17596
+ const worker = new worker_threads.Worker(this.workerPath);
17597
+ worker.on("message", (result) => {
17598
+ this.handleWorkerResult(worker, result);
17599
+ });
17600
+ worker.on("error", (error) => {
17601
+ console.error("[WorkerPool] Worker error:", error);
17602
+ this.stats.totalErrors++;
17603
+ this.removeWorker(worker);
17604
+ this.createWorker();
17605
+ });
17606
+ worker.on("exit", (code) => {
17607
+ if (code !== 0) console.error(`[WorkerPool] Worker exited with code ${code}`);
17608
+ this.removeWorker(worker);
17609
+ });
17610
+ this.workers.push(worker);
17611
+ this.availableWorkers.push(worker);
17612
+ this.stats.idleWorkers++;
17613
+ return worker;
17614
+ }
17615
+ /**
17616
+ * Execute a task on the worker pool
17617
+ */
17618
+ async execute(task) {
17619
+ if (this.taskQueue.length >= this.config.maxQueueSize) throw new Error(`[WorkerPool] Queue is full (max: ${this.config.maxQueueSize})`);
17620
+ return new Promise((resolve, reject) => {
17621
+ this.taskQueue.push({
17622
+ task,
17623
+ resolve,
17624
+ reject
17625
+ });
17626
+ this.stats.queueSize = this.taskQueue.length;
17627
+ this.processQueue();
17628
+ });
17629
+ }
17630
+ /**
17631
+ * Process task queue
17632
+ */
17633
+ processQueue() {
17634
+ while (this.taskQueue.length > 0 && this.availableWorkers.length > 0) {
17635
+ const worker = this.availableWorkers.shift();
17636
+ const { task, resolve, reject } = this.taskQueue.shift();
17637
+ this.stats.idleWorkers--;
17638
+ this.stats.activeWorkers++;
17639
+ this.stats.queueSize = this.taskQueue.length;
17640
+ worker.__currentTask = {
17641
+ resolve,
17642
+ reject,
17643
+ startTime: Date.now()
17644
+ };
17645
+ worker.postMessage(task);
17646
+ }
17647
+ }
17648
+ /**
17649
+ * Handle worker result
17650
+ */
17651
+ handleWorkerResult(worker, result) {
17652
+ const currentTask = worker.__currentTask;
17653
+ if (!currentTask) return;
17654
+ this.stats.activeWorkers--;
17655
+ this.stats.idleWorkers++;
17656
+ this.stats.totalProcessed++;
17657
+ this.totalProcessingTime += result.processingTime;
17658
+ this.stats.avgProcessingTime = this.totalProcessingTime / this.stats.totalProcessed;
17659
+ this.availableWorkers.push(worker);
17660
+ delete worker.__currentTask;
17661
+ if (result.error) {
17662
+ this.stats.totalErrors++;
17663
+ currentTask.reject(new Error(result.error));
17664
+ } else currentTask.resolve(result.result);
17665
+ this.processQueue();
17666
+ }
17667
+ /**
17668
+ * Remove worker from pool
17669
+ */
17670
+ removeWorker(worker) {
17671
+ const index = this.workers.indexOf(worker);
17672
+ if (index !== -1) this.workers.splice(index, 1);
17673
+ const availableIndex = this.availableWorkers.indexOf(worker);
17674
+ if (availableIndex !== -1) {
17675
+ this.availableWorkers.splice(availableIndex, 1);
17676
+ this.stats.idleWorkers--;
17677
+ }
17678
+ }
17679
+ /**
17680
+ * Get pool statistics
17681
+ */
17682
+ getStats() {
17683
+ return { ...this.stats };
17684
+ }
17685
+ /**
17686
+ * Terminate all workers
17687
+ */
17688
+ async terminate() {
17689
+ const terminatePromises = this.workers.map((worker) => worker.terminate());
17690
+ await Promise.all(terminatePromises);
17691
+ this.workers = [];
17692
+ this.availableWorkers = [];
17693
+ this.taskQueue = [];
17694
+ this.stats.activeWorkers = 0;
17695
+ this.stats.idleWorkers = 0;
17696
+ this.stats.queueSize = 0;
17697
+ }
17698
+ };
17699
+ }));
17700
+
17701
+ //#endregion
17702
+ //#region src/workers/index.ts
17703
+ var workers_exports = /* @__PURE__ */ __exportAll({
17704
+ WorkerPool: () => WorkerPool,
17705
+ createWorkerPool: () => createWorkerPool
17706
+ });
17707
+ var init_workers = __esmMin((() => {
17708
+ init_WorkerPool();
17709
+ }));
17710
+
15757
17711
  //#endregion
15758
17712
  //#region src/detector.ts
15759
17713
  var OpenRedaction = class OpenRedaction {
@@ -16403,14 +18357,14 @@ var OpenRedaction = class OpenRedaction {
16403
18357
  * Run health check
16404
18358
  */
16405
18359
  async healthCheck(options) {
16406
- const { HealthChecker } = await Promise.resolve().then(() => require("./HealthCheck-CNRn3PcM.js"));
18360
+ const { HealthChecker } = await Promise.resolve().then(() => (init_HealthCheck(), HealthCheck_exports));
16407
18361
  return new HealthChecker(this).check(options);
16408
18362
  }
16409
18363
  /**
16410
18364
  * Quick health check (minimal overhead)
16411
18365
  */
16412
18366
  async quickHealthCheck() {
16413
- const { HealthChecker } = await Promise.resolve().then(() => require("./HealthCheck-CNRn3PcM.js"));
18367
+ const { HealthChecker } = await Promise.resolve().then(() => (init_HealthCheck(), HealthCheck_exports));
16414
18368
  return new HealthChecker(this).quickCheck();
16415
18369
  }
16416
18370
  /**
@@ -16421,7 +18375,7 @@ var OpenRedaction = class OpenRedaction {
16421
18375
  */
16422
18376
  async detectDocument(buffer, options) {
16423
18377
  if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) throw new Error("[OpenRedaction] Permission denied: detection:detect required");
16424
- const { createDocumentProcessor } = await Promise.resolve().then(() => require("./document-CaOtDvq_.js"));
18378
+ const { createDocumentProcessor } = await Promise.resolve().then(() => (init_document(), document_exports));
16425
18379
  const processor = createDocumentProcessor();
16426
18380
  const extractionStart = performance.now();
16427
18381
  const text = await processor.extractText(buffer, options);
@@ -16450,7 +18404,7 @@ var OpenRedaction = class OpenRedaction {
16450
18404
  * Significantly faster for processing many texts
16451
18405
  */
16452
18406
  static async detectBatch(texts, options) {
16453
- const { createWorkerPool } = await Promise.resolve().then(() => require("./workers-B2xx_81m.js"));
18407
+ const { createWorkerPool } = await Promise.resolve().then(() => (init_workers(), workers_exports));
16454
18408
  const pool = createWorkerPool({ numWorkers: options?.numWorkers });
16455
18409
  try {
16456
18410
  await pool.initialize();
@@ -16470,7 +18424,7 @@ var OpenRedaction = class OpenRedaction {
16470
18424
  * Efficient for processing many documents at once
16471
18425
  */
16472
18426
  static async detectDocumentsBatch(buffers, options) {
16473
- const { createWorkerPool } = await Promise.resolve().then(() => require("./workers-B2xx_81m.js"));
18427
+ const { createWorkerPool } = await Promise.resolve().then(() => (init_workers(), workers_exports));
16474
18428
  const pool = createWorkerPool({ numWorkers: options?.numWorkers });
16475
18429
  try {
16476
18430
  await pool.initialize();
@@ -16489,6 +18443,7 @@ var OpenRedaction = class OpenRedaction {
16489
18443
 
16490
18444
  //#endregion
16491
18445
  //#region src/streaming/StreamingDetector.ts
18446
+ init_document();
16492
18447
  /**
16493
18448
  * Streaming detector for large documents
16494
18449
  */
@@ -16637,6 +18592,7 @@ function createStreamingDetector(detector, options) {
16637
18592
 
16638
18593
  //#endregion
16639
18594
  //#region src/batch/BatchProcessor.ts
18595
+ init_workers();
16640
18596
  /**
16641
18597
  * Batch processor for processing multiple documents
16642
18598
  */
@@ -18554,6 +20510,7 @@ function createAPIServer(config) {
18554
20510
  //#endregion
18555
20511
  //#region src/index.ts
18556
20512
  init_ConfigExporter();
20513
+ init_HealthCheck();
18557
20514
 
18558
20515
  //#endregion
18559
20516
  exports.ADMIN_ROLE = ADMIN_ROLE;
@@ -18565,21 +20522,21 @@ exports.ConfigExporter = ConfigExporter;
18565
20522
  exports.ConfigLoader = ConfigLoader;
18566
20523
  exports.ConsoleAuditLogger = ConsoleAuditLogger;
18567
20524
  exports.ContextRulesEngine = ContextRulesEngine;
18568
- exports.CsvProcessor = require_document.CsvProcessor;
20525
+ exports.CsvProcessor = CsvProcessor;
18569
20526
  exports.DEFAULT_DOMAIN_VOCABULARIES = DEFAULT_DOMAIN_VOCABULARIES;
18570
20527
  exports.DEFAULT_PROXIMITY_RULES = DEFAULT_PROXIMITY_RULES;
18571
20528
  exports.DEFAULT_SEVERITY_MAP = DEFAULT_SEVERITY_MAP;
18572
20529
  exports.DEFAULT_TIER_QUOTAS = DEFAULT_TIER_QUOTAS;
18573
- exports.DocumentProcessor = require_document.DocumentProcessor;
20530
+ exports.DocumentProcessor = DocumentProcessor;
18574
20531
  exports.ExplainAPI = ExplainAPI;
18575
20532
  exports.GRAFANA_DASHBOARD_TEMPLATE = GRAFANA_DASHBOARD_TEMPLATE;
18576
- exports.HealthChecker = require_HealthCheck.HealthChecker;
20533
+ exports.HealthChecker = HealthChecker;
18577
20534
  exports.InMemoryAuditLogger = InMemoryAuditLogger;
18578
20535
  exports.InMemoryMetricsCollector = InMemoryMetricsCollector;
18579
- exports.JsonProcessor = require_document.JsonProcessor;
20536
+ exports.JsonProcessor = JsonProcessor;
18580
20537
  exports.LocalLearningStore = LocalLearningStore;
18581
20538
  exports.NERDetector = NERDetector;
18582
- exports.OCRProcessor = require_document.OCRProcessor;
20539
+ exports.OCRProcessor = OCRProcessor;
18583
20540
  exports.OPERATOR_ROLE = OPERATOR_ROLE;
18584
20541
  exports.OpenRedaction = OpenRedaction;
18585
20542
  exports.OpenRedactionError = OpenRedactionError;
@@ -18599,10 +20556,8 @@ exports.TenantQuotaExceededError = TenantQuotaExceededError;
18599
20556
  exports.TenantSuspendedError = TenantSuspendedError;
18600
20557
  exports.VIEWER_ROLE = VIEWER_ROLE;
18601
20558
  exports.WebhookManager = WebhookManager;
18602
- exports.WorkerPool = require_workers.WorkerPool;
18603
- exports.XlsxProcessor = require_document.XlsxProcessor;
18604
- exports.__toCommonJS = __toCommonJS;
18605
- exports.__toESM = __toESM;
20559
+ exports.WorkerPool = WorkerPool;
20560
+ exports.XlsxProcessor = XlsxProcessor;
18606
20561
  exports.allPatterns = allPatterns;
18607
20562
  exports.analyzeContextFeatures = analyzeContextFeatures;
18608
20563
  exports.analyzeFullContext = analyzeFullContext;
@@ -18620,18 +20575,18 @@ exports.createCacheDisabledError = createCacheDisabledError;
18620
20575
  exports.createConfigLoadError = createConfigLoadError;
18621
20576
  exports.createConfigPreset = createConfigPreset;
18622
20577
  exports.createContextRulesEngine = createContextRulesEngine;
18623
- exports.createCsvProcessor = require_document.createCsvProcessor;
20578
+ exports.createCsvProcessor = createCsvProcessor;
18624
20579
  exports.createCustomRole = createCustomRole;
18625
- exports.createDocumentProcessor = require_document.createDocumentProcessor;
20580
+ exports.createDocumentProcessor = createDocumentProcessor;
18626
20581
  exports.createExplainAPI = createExplainAPI;
18627
- exports.createHealthChecker = require_HealthCheck.createHealthChecker;
20582
+ exports.createHealthChecker = createHealthChecker;
18628
20583
  exports.createHighMemoryError = createHighMemoryError;
18629
20584
  exports.createInvalidPatternError = createInvalidPatternError;
18630
- exports.createJsonProcessor = require_document.createJsonProcessor;
20585
+ exports.createJsonProcessor = createJsonProcessor;
18631
20586
  exports.createLearningDisabledError = createLearningDisabledError;
18632
20587
  exports.createMultiPassDisabledError = createMultiPassDisabledError;
18633
20588
  exports.createNERDetector = createNERDetector;
18634
- exports.createOCRProcessor = require_document.createOCRProcessor;
20589
+ exports.createOCRProcessor = createOCRProcessor;
18635
20590
  exports.createOptimizationDisabledError = createOptimizationDisabledError;
18636
20591
  exports.createPersistentAuditLogger = createPersistentAuditLogger;
18637
20592
  exports.createPriorityOptimizer = createPriorityOptimizer;
@@ -18644,8 +20599,8 @@ exports.createStreamingDetector = createStreamingDetector;
18644
20599
  exports.createTenantManager = createTenantManager;
18645
20600
  exports.createValidationError = createValidationError;
18646
20601
  exports.createWebhookManager = createWebhookManager;
18647
- exports.createWorkerPool = require_workers.createWorkerPool;
18648
- exports.createXlsxProcessor = require_document.createXlsxProcessor;
20602
+ exports.createWorkerPool = createWorkerPool;
20603
+ exports.createXlsxProcessor = createXlsxProcessor;
18649
20604
  exports.defaultPasses = defaultPasses;
18650
20605
  exports.detectPII = detectPII;
18651
20606
  exports.detectionsOverlap = detectionsOverlap;
@@ -18664,7 +20619,7 @@ exports.getPreset = getPreset;
18664
20619
  exports.getSeverity = getSeverity;
18665
20620
  exports.governmentPatterns = governmentPatterns;
18666
20621
  exports.groupPatternsByPass = groupPatternsByPass;
18667
- exports.healthCheckMiddleware = require_HealthCheck.healthCheckMiddleware;
20622
+ exports.healthCheckMiddleware = healthCheckMiddleware;
18668
20623
  exports.healthcarePreset = healthcarePreset;
18669
20624
  exports.healthcareResearchPreset = healthcareResearchPreset;
18670
20625
  exports.hipaaPreset = hipaaPreset;