od-temp 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,6 +45,8 @@ let fs = require("fs");
45
45
  fs = __toESM(fs);
46
46
  let path = require("path");
47
47
  path = __toESM(path);
48
+ let worker_threads = require("worker_threads");
49
+ let os = require("os");
48
50
 
49
51
  //#region src/audit/AuditLogger.ts
50
52
  /**
@@ -14556,6 +14558,1961 @@ var init_ConfigExporter = __esmMin((() => {
14556
14558
  };
14557
14559
  }));
14558
14560
 
14561
+ //#endregion
14562
+ //#region src/health/HealthCheck.ts
14563
+ var HealthCheck_exports = /* @__PURE__ */ __exportAll({
14564
+ HealthChecker: () => HealthChecker,
14565
+ createHealthChecker: () => createHealthChecker,
14566
+ healthCheckMiddleware: () => healthCheckMiddleware
14567
+ });
14568
+ /**
14569
+ * Create health checker for a detector
14570
+ */
14571
+ function createHealthChecker(detector) {
14572
+ return new HealthChecker(detector);
14573
+ }
14574
+ /**
14575
+ * Express middleware for health check endpoint
14576
+ */
14577
+ function healthCheckMiddleware(detector) {
14578
+ const checker = new HealthChecker(detector);
14579
+ return async (_req, res) => {
14580
+ try {
14581
+ const result = await checker.check({
14582
+ testDetection: true,
14583
+ checkPerformance: true,
14584
+ performanceThreshold: 100,
14585
+ memoryThreshold: 100
14586
+ });
14587
+ const statusCode = result.status === "healthy" ? 200 : result.status === "degraded" ? 200 : 503;
14588
+ res.status(statusCode).json(result);
14589
+ } catch (error) {
14590
+ res.status(503).json({
14591
+ status: "unhealthy",
14592
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
14593
+ error: error.message
14594
+ });
14595
+ }
14596
+ };
14597
+ }
14598
+ var HealthChecker;
14599
+ var init_HealthCheck = __esmMin((() => {
14600
+ HealthChecker = class {
14601
+ constructor(detector) {
14602
+ this.detector = detector;
14603
+ this.initTime = Date.now();
14604
+ }
14605
+ /**
14606
+ * Run complete health check
14607
+ */
14608
+ async check(options = {}) {
14609
+ const result = {
14610
+ status: "healthy",
14611
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
14612
+ checks: {
14613
+ detector: {
14614
+ status: "pass",
14615
+ message: "Detector initialized"
14616
+ },
14617
+ patterns: {
14618
+ status: "pass",
14619
+ message: "Patterns loaded"
14620
+ },
14621
+ performance: {
14622
+ status: "pass",
14623
+ message: "Performance acceptable"
14624
+ },
14625
+ memory: {
14626
+ status: "pass",
14627
+ message: "Memory usage normal"
14628
+ }
14629
+ },
14630
+ metrics: {
14631
+ totalPatterns: 0,
14632
+ compiledPatterns: 0,
14633
+ cacheEnabled: false,
14634
+ uptime: Date.now() - this.initTime
14635
+ },
14636
+ errors: [],
14637
+ warnings: []
14638
+ };
14639
+ try {
14640
+ result.checks.detector = await this.checkDetector(options);
14641
+ result.checks.patterns = await this.checkPatterns();
14642
+ if (options.checkPerformance !== false) result.checks.performance = await this.checkPerformance(options.performanceThreshold);
14643
+ result.checks.memory = await this.checkMemory(options.memoryThreshold);
14644
+ result.metrics = this.collectMetrics();
14645
+ result.status = this.determineOverallStatus(result.checks);
14646
+ for (const check of Object.values(result.checks)) if (check.status === "fail") result.errors.push(check.message);
14647
+ else if (check.status === "warn") result.warnings.push(check.message);
14648
+ } catch (error) {
14649
+ result.status = "unhealthy";
14650
+ result.errors.push(`Health check failed: ${error.message}`);
14651
+ }
14652
+ return result;
14653
+ }
14654
+ /**
14655
+ * Check detector functionality
14656
+ */
14657
+ async checkDetector(options) {
14658
+ try {
14659
+ if (options.testDetection !== false) {
14660
+ const result = await this.detector.detect("Test email: test@example.com");
14661
+ if (!result || !result.detections) return {
14662
+ status: "fail",
14663
+ message: "Detector returned invalid result"
14664
+ };
14665
+ if (result.detections.length === 0) return {
14666
+ status: "warn",
14667
+ message: "Test detection found no PII (expected at least 1)"
14668
+ };
14669
+ }
14670
+ return {
14671
+ status: "pass",
14672
+ message: "Detector functioning correctly"
14673
+ };
14674
+ } catch (error) {
14675
+ return {
14676
+ status: "fail",
14677
+ message: `Detector check failed: ${error.message}`
14678
+ };
14679
+ }
14680
+ }
14681
+ /**
14682
+ * Check patterns are loaded
14683
+ */
14684
+ async checkPatterns() {
14685
+ try {
14686
+ const patterns = this.detector.getPatterns();
14687
+ if (!patterns || patterns.length === 0) return {
14688
+ status: "fail",
14689
+ message: "No patterns loaded",
14690
+ value: 0,
14691
+ threshold: 1
14692
+ };
14693
+ if (patterns.length < 10) return {
14694
+ status: "warn",
14695
+ message: "Very few patterns loaded (expected more)",
14696
+ value: patterns.length,
14697
+ threshold: 10
14698
+ };
14699
+ return {
14700
+ status: "pass",
14701
+ message: `${patterns.length} patterns loaded`,
14702
+ value: patterns.length
14703
+ };
14704
+ } catch (error) {
14705
+ return {
14706
+ status: "fail",
14707
+ message: `Pattern check failed: ${error.message}`
14708
+ };
14709
+ }
14710
+ }
14711
+ /**
14712
+ * Check performance
14713
+ */
14714
+ async checkPerformance(threshold = 100) {
14715
+ try {
14716
+ const testText = "Test: john@example.com, phone: 555-123-4567, IP: 192.168.1.1";
14717
+ const start = performance.now();
14718
+ await this.detector.detect(testText);
14719
+ const duration = performance.now() - start;
14720
+ if (duration > threshold * 2) return {
14721
+ status: "fail",
14722
+ message: `Performance degraded: ${duration.toFixed(2)}ms`,
14723
+ value: duration,
14724
+ threshold
14725
+ };
14726
+ if (duration > threshold) return {
14727
+ status: "warn",
14728
+ message: `Performance slower than expected: ${duration.toFixed(2)}ms`,
14729
+ value: duration,
14730
+ threshold
14731
+ };
14732
+ return {
14733
+ status: "pass",
14734
+ message: `Performance good: ${duration.toFixed(2)}ms`,
14735
+ value: duration,
14736
+ threshold
14737
+ };
14738
+ } catch (error) {
14739
+ return {
14740
+ status: "fail",
14741
+ message: `Performance check failed: ${error.message}`
14742
+ };
14743
+ }
14744
+ }
14745
+ /**
14746
+ * Check memory usage
14747
+ */
14748
+ async checkMemory(threshold = 100) {
14749
+ try {
14750
+ if (typeof process === "undefined" || !process.memoryUsage) return {
14751
+ status: "pass",
14752
+ message: "Memory check skipped (not in Node.js)"
14753
+ };
14754
+ const heapUsedMB = process.memoryUsage().heapUsed / 1024 / 1024;
14755
+ if (heapUsedMB > threshold * 2) return {
14756
+ status: "fail",
14757
+ message: `High memory usage: ${heapUsedMB.toFixed(2)}MB`,
14758
+ value: heapUsedMB,
14759
+ threshold
14760
+ };
14761
+ if (heapUsedMB > threshold) return {
14762
+ status: "warn",
14763
+ message: `Elevated memory usage: ${heapUsedMB.toFixed(2)}MB`,
14764
+ value: heapUsedMB,
14765
+ threshold
14766
+ };
14767
+ return {
14768
+ status: "pass",
14769
+ message: `Memory usage normal: ${heapUsedMB.toFixed(2)}MB`,
14770
+ value: heapUsedMB,
14771
+ threshold
14772
+ };
14773
+ } catch (error) {
14774
+ return {
14775
+ status: "warn",
14776
+ message: `Memory check skipped: ${error.message}`
14777
+ };
14778
+ }
14779
+ }
14780
+ /**
14781
+ * Collect metrics
14782
+ */
14783
+ collectMetrics() {
14784
+ const patterns = this.detector.getPatterns();
14785
+ const cacheStats = this.detector.getCacheStats();
14786
+ return {
14787
+ totalPatterns: patterns.length,
14788
+ compiledPatterns: patterns.length,
14789
+ cacheSize: cacheStats.size,
14790
+ cacheEnabled: cacheStats.enabled,
14791
+ uptime: Date.now() - this.initTime
14792
+ };
14793
+ }
14794
+ /**
14795
+ * Determine overall status
14796
+ */
14797
+ determineOverallStatus(checks) {
14798
+ const statuses = Object.values(checks).map((c) => c.status);
14799
+ if (statuses.includes("fail")) return "unhealthy";
14800
+ if (statuses.includes("warn")) return "degraded";
14801
+ return "healthy";
14802
+ }
14803
+ /**
14804
+ * Quick health check (minimal overhead)
14805
+ */
14806
+ async quickCheck() {
14807
+ try {
14808
+ if (this.detector.getPatterns().length === 0) return {
14809
+ status: "unhealthy",
14810
+ message: "No patterns loaded"
14811
+ };
14812
+ return {
14813
+ status: "healthy",
14814
+ message: "OK"
14815
+ };
14816
+ } catch (error) {
14817
+ return {
14818
+ status: "unhealthy",
14819
+ message: `Error: ${error.message}`
14820
+ };
14821
+ }
14822
+ }
14823
+ /**
14824
+ * Get system info for debugging
14825
+ */
14826
+ getSystemInfo() {
14827
+ const patterns = this.detector.getPatterns();
14828
+ const cacheStats = this.detector.getCacheStats();
14829
+ return {
14830
+ version: "1.0.0",
14831
+ patterns: {
14832
+ total: patterns.length,
14833
+ types: [...new Set(patterns.map((p) => p.type.split("_")[0]))].length
14834
+ },
14835
+ cache: {
14836
+ enabled: cacheStats.enabled,
14837
+ size: cacheStats.size,
14838
+ maxSize: cacheStats.maxSize
14839
+ },
14840
+ uptime: Date.now() - this.initTime,
14841
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
14842
+ };
14843
+ }
14844
+ };
14845
+ }));
14846
+
14847
+ //#endregion
14848
+ //#region src/document/OCRProcessor.ts
14849
+ /**
14850
+ * Create an OCR processor instance
14851
+ */
14852
+ function createOCRProcessor() {
14853
+ return new OCRProcessor();
14854
+ }
14855
+ var OCRProcessor;
14856
+ var init_OCRProcessor = __esmMin((() => {
14857
+ OCRProcessor = class {
14858
+ constructor() {
14859
+ try {
14860
+ this.tesseract = require("tesseract.js");
14861
+ } catch {}
14862
+ }
14863
+ /**
14864
+ * Extract text from image buffer using OCR
14865
+ */
14866
+ async recognizeText(buffer, options) {
14867
+ if (!this.tesseract) throw new Error("[OCRProcessor] OCR support requires tesseract.js. Install with: npm install tesseract.js");
14868
+ const startTime = performance.now();
14869
+ try {
14870
+ const language = Array.isArray(options?.language) ? options.language.join("+") : options?.language || "eng";
14871
+ const worker = await this.tesseract.createWorker(language, options?.oem || 3);
14872
+ if (options?.psm !== void 0) await worker.setParameters({ tessedit_pageseg_mode: options.psm });
14873
+ const result = await worker.recognize(buffer);
14874
+ await worker.terminate();
14875
+ const endTime = performance.now();
14876
+ const processingTime = Math.round((endTime - startTime) * 100) / 100;
14877
+ return {
14878
+ text: result.data.text || "",
14879
+ confidence: result.data.confidence || 0,
14880
+ processingTime
14881
+ };
14882
+ } catch (error) {
14883
+ throw new Error(`[OCRProcessor] OCR recognition failed: ${error.message}`);
14884
+ }
14885
+ }
14886
+ /**
14887
+ * Check if OCR is available (tesseract.js installed)
14888
+ */
14889
+ isAvailable() {
14890
+ return !!this.tesseract;
14891
+ }
14892
+ /**
14893
+ * Create a scheduler for batch OCR processing
14894
+ * More efficient for processing multiple images
14895
+ */
14896
+ async createScheduler(workerCount = 4) {
14897
+ if (!this.tesseract) throw new Error("[OCRProcessor] OCR support requires tesseract.js. Install with: npm install tesseract.js");
14898
+ if (this.scheduler) await this.scheduler.terminate();
14899
+ this.scheduler = this.tesseract.createScheduler();
14900
+ const workers = [];
14901
+ for (let i = 0; i < workerCount; i++) {
14902
+ const worker = await this.tesseract.createWorker("eng");
14903
+ this.scheduler.addWorker(worker);
14904
+ workers.push(worker);
14905
+ }
14906
+ return this.scheduler;
14907
+ }
14908
+ /**
14909
+ * Batch process multiple images
14910
+ */
14911
+ async recognizeBatch(buffers, _options) {
14912
+ if (!this.tesseract) throw new Error("[OCRProcessor] OCR support requires tesseract.js. Install with: npm install tesseract.js");
14913
+ const scheduler = await this.createScheduler();
14914
+ try {
14915
+ const results = await Promise.all(buffers.map(async (buffer) => {
14916
+ const startTime = performance.now();
14917
+ const result = await scheduler.addJob("recognize", buffer);
14918
+ const endTime = performance.now();
14919
+ return {
14920
+ text: result.data.text || "",
14921
+ confidence: result.data.confidence || 0,
14922
+ processingTime: Math.round((endTime - startTime) * 100) / 100
14923
+ };
14924
+ }));
14925
+ await scheduler.terminate();
14926
+ this.scheduler = void 0;
14927
+ return results;
14928
+ } catch (error) {
14929
+ if (scheduler) {
14930
+ await scheduler.terminate();
14931
+ this.scheduler = void 0;
14932
+ }
14933
+ throw new Error(`[OCRProcessor] Batch OCR failed: ${error.message}`);
14934
+ }
14935
+ }
14936
+ /**
14937
+ * Terminate any running scheduler
14938
+ */
14939
+ async cleanup() {
14940
+ if (this.scheduler) {
14941
+ await this.scheduler.terminate();
14942
+ this.scheduler = void 0;
14943
+ }
14944
+ }
14945
+ };
14946
+ }));
14947
+
14948
+ //#endregion
14949
+ //#region src/document/JsonProcessor.ts
14950
+ /**
14951
+ * Create a JSON processor instance
14952
+ */
14953
+ function createJsonProcessor() {
14954
+ return new JsonProcessor();
14955
+ }
14956
+ var JsonProcessor;
14957
+ var init_JsonProcessor = __esmMin((() => {
14958
+ JsonProcessor = class {
14959
+ constructor() {
14960
+ this.defaultOptions = {
14961
+ maxDepth: 100,
14962
+ scanKeys: false,
14963
+ alwaysRedact: [],
14964
+ skipPaths: [],
14965
+ piiIndicatorKeys: [
14966
+ "email",
14967
+ "e-mail",
14968
+ "mail",
14969
+ "phone",
14970
+ "tel",
14971
+ "telephone",
14972
+ "mobile",
14973
+ "ssn",
14974
+ "social_security",
14975
+ "address",
14976
+ "street",
14977
+ "city",
14978
+ "zip",
14979
+ "postal",
14980
+ "name",
14981
+ "firstname",
14982
+ "lastname",
14983
+ "fullname",
14984
+ "password",
14985
+ "pwd",
14986
+ "secret",
14987
+ "token",
14988
+ "key",
14989
+ "card",
14990
+ "credit_card",
14991
+ "creditcard",
14992
+ "account",
14993
+ "iban",
14994
+ "swift",
14995
+ "passport",
14996
+ "license",
14997
+ "licence"
14998
+ ],
14999
+ preserveStructure: true
15000
+ };
15001
+ }
15002
+ /**
15003
+ * Parse JSON from buffer or string
15004
+ */
15005
+ parse(input) {
15006
+ try {
15007
+ const text = typeof input === "string" ? input : input.toString("utf-8");
15008
+ return JSON.parse(text);
15009
+ } catch (error) {
15010
+ throw new Error(`[JsonProcessor] Invalid JSON: ${error.message}`);
15011
+ }
15012
+ }
15013
+ /**
15014
+ * Detect PII in JSON data
15015
+ */
15016
+ async detect(data, detector, options) {
15017
+ const opts = {
15018
+ ...this.defaultOptions,
15019
+ ...options
15020
+ };
15021
+ const pathsDetected = [];
15022
+ const matchesByPath = {};
15023
+ const allDetections = [];
15024
+ const promises = [];
15025
+ this.traverse(data, "", opts, (path, value, key) => {
15026
+ promises.push((async () => {
15027
+ if (this.shouldSkip(path, opts.skipPaths)) return;
15028
+ if (this.shouldAlwaysRedact(path, opts.alwaysRedact)) {
15029
+ const detection = {
15030
+ type: "SENSITIVE_FIELD",
15031
+ value: String(value),
15032
+ placeholder: `[SENSITIVE_FIELD]`,
15033
+ position: [0, String(value).length],
15034
+ severity: "high",
15035
+ confidence: 1
15036
+ };
15037
+ matchesByPath[path] = [detection];
15038
+ pathsDetected.push(path);
15039
+ allDetections.push(detection);
15040
+ return;
15041
+ }
15042
+ if (opts.scanKeys && key) {
15043
+ const keyResult = await detector.detect(key);
15044
+ if (keyResult.detections.length > 0) {
15045
+ const keyPath = `${path}.__key__`;
15046
+ matchesByPath[keyPath] = keyResult.detections;
15047
+ pathsDetected.push(keyPath);
15048
+ allDetections.push(...keyResult.detections);
15049
+ }
15050
+ }
15051
+ const valueStr = String(value);
15052
+ const result = await detector.detect(valueStr);
15053
+ if (result.detections.length > 0) {
15054
+ const boostedDetections = this.boostConfidenceFromKey(result.detections, key, opts.piiIndicatorKeys);
15055
+ matchesByPath[path] = boostedDetections;
15056
+ pathsDetected.push(path);
15057
+ allDetections.push(...boostedDetections);
15058
+ }
15059
+ })());
15060
+ });
15061
+ await Promise.all(promises);
15062
+ const original = JSON.stringify(data);
15063
+ const redacted = this.redact(data, {
15064
+ original,
15065
+ redacted: original,
15066
+ detections: allDetections,
15067
+ redactionMap: {},
15068
+ stats: { piiCount: allDetections.length },
15069
+ pathsDetected,
15070
+ matchesByPath
15071
+ }, opts);
15072
+ const redactionMap = {};
15073
+ allDetections.forEach((det) => {
15074
+ redactionMap[det.placeholder] = det.value;
15075
+ });
15076
+ return {
15077
+ original,
15078
+ redacted: typeof redacted === "string" ? redacted : JSON.stringify(redacted),
15079
+ detections: allDetections,
15080
+ redactionMap,
15081
+ stats: { piiCount: allDetections.length },
15082
+ pathsDetected,
15083
+ matchesByPath
15084
+ };
15085
+ }
15086
+ /**
15087
+ * Redact PII in JSON data
15088
+ */
15089
+ redact(data, detectionResult, options) {
15090
+ if (!{
15091
+ ...this.defaultOptions,
15092
+ ...options
15093
+ }.preserveStructure) return this.parse(this.redactText(JSON.stringify(data, null, 2), detectionResult));
15094
+ return this.redactPreservingStructure(data, detectionResult.pathsDetected);
15095
+ }
15096
+ /**
15097
+ * Redact specific paths in JSON while preserving structure
15098
+ */
15099
+ redactPreservingStructure(data, pathsToRedact) {
15100
+ const pathSet = new Set(pathsToRedact);
15101
+ const redactValue = (value, currentPath) => {
15102
+ if (pathSet.has(currentPath)) {
15103
+ if (typeof value === "string") return "[REDACTED]";
15104
+ else if (typeof value === "number") return 0;
15105
+ else if (typeof value === "boolean") return false;
15106
+ else if (value === null) return null;
15107
+ else if (Array.isArray(value)) return [];
15108
+ else if (typeof value === "object") return {};
15109
+ return "[REDACTED]";
15110
+ }
15111
+ if (Array.isArray(value)) return value.map((item, index) => redactValue(item, `${currentPath}[${index}]`));
15112
+ if (value !== null && typeof value === "object") {
15113
+ const result = {};
15114
+ for (const [key, val] of Object.entries(value)) result[key] = redactValue(val, currentPath ? `${currentPath}.${key}` : key);
15115
+ return result;
15116
+ }
15117
+ return value;
15118
+ };
15119
+ return redactValue(data, "");
15120
+ }
15121
+ /**
15122
+ * Simple text-based redaction (fallback)
15123
+ */
15124
+ redactText(text, detectionResult) {
15125
+ let redacted = text;
15126
+ const sortedDetections = [...detectionResult.detections].sort((a, b) => b.position[0] - a.position[0]);
15127
+ for (const detection of sortedDetections) {
15128
+ const [start, end] = detection.position;
15129
+ redacted = redacted.slice(0, start) + detection.placeholder + redacted.slice(end);
15130
+ }
15131
+ return redacted;
15132
+ }
15133
+ /**
15134
+ * Traverse JSON structure and call callback for each value
15135
+ */
15136
+ traverse(obj, path, options, callback, depth = 0) {
15137
+ if (depth > options.maxDepth) throw new Error(`[JsonProcessor] Maximum depth (${options.maxDepth}) exceeded`);
15138
+ if (obj === null || obj === void 0) return;
15139
+ if (Array.isArray(obj)) {
15140
+ obj.forEach((item, index) => {
15141
+ const itemPath = path ? `${path}[${index}]` : `[${index}]`;
15142
+ if (this.isPrimitive(item)) callback(itemPath, item);
15143
+ this.traverse(item, itemPath, options, callback, depth + 1);
15144
+ });
15145
+ return;
15146
+ }
15147
+ if (typeof obj === "object") {
15148
+ for (const [key, value] of Object.entries(obj)) {
15149
+ const valuePath = path ? `${path}.${key}` : key;
15150
+ if (this.isPrimitive(value)) callback(valuePath, value, key);
15151
+ this.traverse(value, valuePath, options, callback, depth + 1);
15152
+ }
15153
+ return;
15154
+ }
15155
+ if (this.isPrimitive(obj)) callback(path, obj);
15156
+ }
15157
+ /**
15158
+ * Check if value is primitive (string, number, boolean)
15159
+ */
15160
+ isPrimitive(value) {
15161
+ return typeof value === "string" || typeof value === "number" || typeof value === "boolean";
15162
+ }
15163
+ /**
15164
+ * Check if path should be skipped
15165
+ */
15166
+ shouldSkip(path, skipPaths) {
15167
+ return skipPaths.some((skipPath) => {
15168
+ if (path === skipPath) return true;
15169
+ return new RegExp("^" + skipPath.replace(/\*/g, "[^.]+") + "$").test(path);
15170
+ });
15171
+ }
15172
+ /**
15173
+ * Check if path should always be redacted
15174
+ */
15175
+ shouldAlwaysRedact(path, alwaysRedact) {
15176
+ return alwaysRedact.some((redactPath) => {
15177
+ if (path === redactPath) return true;
15178
+ return new RegExp("^" + redactPath.replace(/\*/g, "[^.]+") + "$").test(path);
15179
+ });
15180
+ }
15181
+ /**
15182
+ * Boost confidence if key name indicates PII
15183
+ */
15184
+ boostConfidenceFromKey(detections, key, piiIndicatorKeys) {
15185
+ if (!key) return detections;
15186
+ const keyLower = key.toLowerCase();
15187
+ if (!piiIndicatorKeys.some((indicator) => keyLower.includes(indicator.toLowerCase()))) return detections;
15188
+ return detections.map((detection) => ({
15189
+ ...detection,
15190
+ confidence: Math.min(1, (detection.confidence || .5) * 1.2)
15191
+ }));
15192
+ }
15193
+ /**
15194
+ * Extract all text values from JSON for simple text-based detection
15195
+ */
15196
+ extractText(data, options) {
15197
+ const opts = {
15198
+ ...this.defaultOptions,
15199
+ ...options
15200
+ };
15201
+ const textParts = [];
15202
+ this.traverse(data, "", opts, (_path, value, key) => {
15203
+ if (opts.scanKeys && key) textParts.push(key);
15204
+ if (typeof value === "string") textParts.push(value);
15205
+ });
15206
+ return textParts.join(" ");
15207
+ }
15208
+ /**
15209
+ * Validate JSON buffer/string
15210
+ */
15211
+ isValid(input) {
15212
+ try {
15213
+ this.parse(input);
15214
+ return true;
15215
+ } catch {
15216
+ return false;
15217
+ }
15218
+ }
15219
+ /**
15220
+ * Get JSON Lines (JSONL) support - split by newlines and parse each line
15221
+ */
15222
+ parseJsonLines(input) {
15223
+ return (typeof input === "string" ? input : input.toString("utf-8")).split("\n").filter((line) => line.trim().length > 0).map((line, index) => {
15224
+ try {
15225
+ return JSON.parse(line);
15226
+ } catch (error) {
15227
+ throw new Error(`[JsonProcessor] Invalid JSON at line ${index + 1}: ${error.message}`);
15228
+ }
15229
+ });
15230
+ }
15231
+ /**
15232
+ * Detect PII in JSON Lines format
15233
+ */
15234
+ async detectJsonLines(input, detector, options) {
15235
+ const documents = this.parseJsonLines(input);
15236
+ return Promise.all(documents.map((doc) => this.detect(doc, detector, options)));
15237
+ }
15238
+ };
15239
+ }));
15240
+
15241
+ //#endregion
15242
+ //#region src/document/CsvProcessor.ts
15243
+ /**
15244
+ * Create a CSV processor instance
15245
+ */
15246
+ function createCsvProcessor() {
15247
+ return new CsvProcessor();
15248
+ }
15249
+ var CsvProcessor;
15250
+ var init_CsvProcessor = __esmMin((() => {
15251
+ CsvProcessor = class {
15252
+ constructor() {
15253
+ this.defaultOptions = {
15254
+ quote: "\"",
15255
+ escape: "\"",
15256
+ skipEmptyLines: true,
15257
+ piiIndicatorNames: [
15258
+ "email",
15259
+ "e-mail",
15260
+ "mail",
15261
+ "email_address",
15262
+ "phone",
15263
+ "tel",
15264
+ "telephone",
15265
+ "mobile",
15266
+ "phone_number",
15267
+ "ssn",
15268
+ "social_security",
15269
+ "social_security_number",
15270
+ "address",
15271
+ "street",
15272
+ "street_address",
15273
+ "city",
15274
+ "zip",
15275
+ "zipcode",
15276
+ "postal",
15277
+ "postcode",
15278
+ "name",
15279
+ "firstname",
15280
+ "first_name",
15281
+ "lastname",
15282
+ "last_name",
15283
+ "fullname",
15284
+ "full_name",
15285
+ "password",
15286
+ "pwd",
15287
+ "secret",
15288
+ "token",
15289
+ "api_key",
15290
+ "card",
15291
+ "credit_card",
15292
+ "creditcard",
15293
+ "card_number",
15294
+ "account",
15295
+ "account_number",
15296
+ "iban",
15297
+ "swift",
15298
+ "passport",
15299
+ "passport_number",
15300
+ "license",
15301
+ "licence",
15302
+ "driver_license",
15303
+ "dob",
15304
+ "date_of_birth",
15305
+ "birth_date",
15306
+ "birthdate"
15307
+ ],
15308
+ treatFirstRowAsHeader: true
15309
+ };
15310
+ }
15311
+ /**
15312
+ * Parse CSV from buffer or string
15313
+ */
15314
+ parse(input, options) {
15315
+ const opts = {
15316
+ ...this.defaultOptions,
15317
+ ...options
15318
+ };
15319
+ const text = typeof input === "string" ? input : input.toString("utf-8");
15320
+ const delimiter = opts.delimiter || this.detectDelimiter(text);
15321
+ const lines = text.split(/\r?\n/);
15322
+ const rows = [];
15323
+ let rowIndex = 0;
15324
+ for (let i = 0; i < lines.length; i++) {
15325
+ const line = lines[i];
15326
+ if (opts.skipEmptyLines && line.trim().length === 0) continue;
15327
+ if (opts.maxRows !== void 0 && rowIndex >= opts.maxRows) break;
15328
+ const values = this.parseRow(line, delimiter, opts.quote, opts.escape);
15329
+ rows.push({
15330
+ index: rowIndex,
15331
+ values
15332
+ });
15333
+ rowIndex++;
15334
+ }
15335
+ return rows;
15336
+ }
15337
+ /**
15338
+ * Detect PII in CSV data
15339
+ */
15340
+ async detect(input, detector, options) {
15341
+ const opts = {
15342
+ ...this.defaultOptions,
15343
+ ...options
15344
+ };
15345
+ const rows = this.parse(input, options);
15346
+ if (rows.length === 0) {
15347
+ const original = typeof input === "string" ? input : input.toString("utf-8");
15348
+ return {
15349
+ original,
15350
+ redacted: original,
15351
+ detections: [],
15352
+ redactionMap: {},
15353
+ stats: { piiCount: 0 },
15354
+ rowCount: 0,
15355
+ columnCount: 0,
15356
+ columnStats: {},
15357
+ matchesByCell: []
15358
+ };
15359
+ }
15360
+ const hasHeader = opts.hasHeader !== void 0 ? opts.hasHeader : this.detectHeader(rows);
15361
+ const headers = hasHeader && rows.length > 0 ? rows[0].values : void 0;
15362
+ const dataRows = hasHeader ? rows.slice(1) : rows;
15363
+ const columnCount = rows[0].values.length;
15364
+ const columnNameToIndex = /* @__PURE__ */ new Map();
15365
+ if (headers) headers.forEach((header, index) => {
15366
+ columnNameToIndex.set(header.toLowerCase().trim(), index);
15367
+ });
15368
+ const alwaysRedactCols = new Set(opts.alwaysRedactColumns || []);
15369
+ if (opts.alwaysRedactColumnNames && headers) opts.alwaysRedactColumnNames.forEach((name) => {
15370
+ const index = columnNameToIndex.get(name.toLowerCase().trim());
15371
+ if (index !== void 0) alwaysRedactCols.add(index);
15372
+ });
15373
+ const skipCols = new Set(opts.skipColumns || []);
15374
+ const columnStats = {};
15375
+ const matchesByCell = [];
15376
+ const allDetections = [];
15377
+ for (let col = 0; col < columnCount; col++) columnStats[col] = {
15378
+ columnIndex: col,
15379
+ columnName: headers?.[col],
15380
+ piiCount: 0,
15381
+ piiPercentage: 0,
15382
+ piiTypes: []
15383
+ };
15384
+ for (const row of dataRows) for (let col = 0; col < row.values.length; col++) {
15385
+ if (skipCols.has(col)) continue;
15386
+ const cellValue = row.values[col];
15387
+ if (alwaysRedactCols.has(col)) {
15388
+ const detection = {
15389
+ type: "SENSITIVE_COLUMN",
15390
+ value: cellValue,
15391
+ placeholder: `[SENSITIVE_COLUMN_${col}]`,
15392
+ position: [0, cellValue.length],
15393
+ severity: "high",
15394
+ confidence: 1
15395
+ };
15396
+ matchesByCell.push({
15397
+ row: row.index,
15398
+ column: col,
15399
+ columnName: headers?.[col],
15400
+ value: cellValue,
15401
+ matches: [detection]
15402
+ });
15403
+ allDetections.push(detection);
15404
+ columnStats[col].piiCount++;
15405
+ continue;
15406
+ }
15407
+ const result = await detector.detect(cellValue);
15408
+ if (result.detections.length > 0) {
15409
+ const boostedDetections = this.boostConfidenceFromColumnName(result.detections, headers?.[col], opts.piiIndicatorNames || []);
15410
+ matchesByCell.push({
15411
+ row: row.index,
15412
+ column: col,
15413
+ columnName: headers?.[col],
15414
+ value: cellValue,
15415
+ matches: boostedDetections
15416
+ });
15417
+ allDetections.push(...boostedDetections);
15418
+ columnStats[col].piiCount += boostedDetections.length;
15419
+ const columnTypes = new Set(columnStats[col].piiTypes);
15420
+ boostedDetections.forEach((d) => columnTypes.add(d.type));
15421
+ columnStats[col].piiTypes = Array.from(columnTypes);
15422
+ }
15423
+ }
15424
+ for (let col = 0; col < columnCount; col++) {
15425
+ const rowsWithPii = matchesByCell.filter((m) => m.column === col).length;
15426
+ columnStats[col].piiPercentage = dataRows.length > 0 ? rowsWithPii / dataRows.length * 100 : 0;
15427
+ }
15428
+ const original = typeof input === "string" ? input : input.toString("utf-8");
15429
+ const redacted = this.redact(original, {
15430
+ original,
15431
+ redacted: original,
15432
+ detections: allDetections,
15433
+ redactionMap: {},
15434
+ stats: { piiCount: allDetections.length },
15435
+ rowCount: dataRows.length,
15436
+ columnCount,
15437
+ headers,
15438
+ columnStats,
15439
+ matchesByCell
15440
+ }, opts);
15441
+ const redactionMap = {};
15442
+ allDetections.forEach((det) => {
15443
+ redactionMap[det.placeholder] = det.value;
15444
+ });
15445
+ return {
15446
+ original,
15447
+ redacted,
15448
+ detections: allDetections,
15449
+ redactionMap,
15450
+ stats: { piiCount: allDetections.length },
15451
+ rowCount: dataRows.length,
15452
+ columnCount,
15453
+ headers: headers?.filter((h) => h !== void 0),
15454
+ columnStats,
15455
+ matchesByCell
15456
+ };
15457
+ }
15458
+ /**
15459
+ * Redact PII in CSV data
15460
+ */
15461
+ redact(input, detectionResult, options) {
15462
+ const opts = {
15463
+ ...this.defaultOptions,
15464
+ ...options
15465
+ };
15466
+ const rows = this.parse(input, options);
15467
+ if (rows.length === 0) return "";
15468
+ const delimiter = opts.delimiter || this.detectDelimiter(typeof input === "string" ? input : input.toString("utf-8"));
15469
+ const hasHeader = detectionResult.headers !== void 0;
15470
+ const redactionMap = /* @__PURE__ */ new Map();
15471
+ for (const cellMatch of detectionResult.matchesByCell) {
15472
+ if (!redactionMap.has(cellMatch.row)) redactionMap.set(cellMatch.row, /* @__PURE__ */ new Map());
15473
+ redactionMap.get(cellMatch.row).set(cellMatch.column, "[REDACTED]");
15474
+ }
15475
+ const outputRows = [];
15476
+ for (let i = 0; i < rows.length; i++) {
15477
+ const row = rows[i];
15478
+ if (hasHeader && i === 0) outputRows.push(this.formatRow(row.values, delimiter, opts.quote));
15479
+ else {
15480
+ const rowIndex = hasHeader ? i - 1 : i;
15481
+ const redactedValues = row.values.map((value, colIndex) => {
15482
+ return redactionMap.get(rowIndex)?.get(colIndex) || value;
15483
+ });
15484
+ outputRows.push(this.formatRow(redactedValues, delimiter, opts.quote));
15485
+ }
15486
+ }
15487
+ return outputRows.join("\n");
15488
+ }
15489
+ /**
15490
+ * Parse a single CSV row
15491
+ */
15492
+ parseRow(line, delimiter, quote, _escape) {
15493
+ const values = [];
15494
+ let current = "";
15495
+ let inQuotes = false;
15496
+ let i = 0;
15497
+ while (i < line.length) {
15498
+ const char = line[i];
15499
+ const nextChar = line[i + 1];
15500
+ if (char === quote) if (inQuotes && nextChar === quote) {
15501
+ current += quote;
15502
+ i += 2;
15503
+ } else {
15504
+ inQuotes = !inQuotes;
15505
+ i++;
15506
+ }
15507
+ else if (char === delimiter && !inQuotes) {
15508
+ values.push(current);
15509
+ current = "";
15510
+ i++;
15511
+ } else {
15512
+ current += char;
15513
+ i++;
15514
+ }
15515
+ }
15516
+ values.push(current);
15517
+ return values;
15518
+ }
15519
+ /**
15520
+ * Format a row as CSV
15521
+ */
15522
+ formatRow(values, delimiter, quote) {
15523
+ return values.map((value) => {
15524
+ if (value.includes(delimiter) || value.includes(quote) || value.includes("\n")) return `${quote}${value.replace(new RegExp(quote, "g"), quote + quote)}${quote}`;
15525
+ return value;
15526
+ }).join(delimiter);
15527
+ }
15528
+ /**
15529
+ * Auto-detect CSV delimiter
15530
+ */
15531
+ detectDelimiter(text) {
15532
+ const delimiters = [
15533
+ ",",
15534
+ " ",
15535
+ ";",
15536
+ "|"
15537
+ ];
15538
+ const lines = text.split(/\r?\n/).slice(0, 5);
15539
+ let bestDelimiter = ",";
15540
+ let bestScore = 0;
15541
+ for (const delimiter of delimiters) {
15542
+ const counts = lines.map((line) => {
15543
+ let count = 0;
15544
+ let inQuotes = false;
15545
+ for (const char of line) {
15546
+ if (char === "\"") inQuotes = !inQuotes;
15547
+ if (char === delimiter && !inQuotes) count++;
15548
+ }
15549
+ return count;
15550
+ });
15551
+ if (counts.length > 0 && counts[0] > 0) {
15552
+ const avg = counts.reduce((a, b) => a + b, 0) / counts.length;
15553
+ const score = avg / (counts.reduce((sum, c) => sum + Math.pow(c - avg, 2), 0) / counts.length + 1);
15554
+ if (score > bestScore) {
15555
+ bestScore = score;
15556
+ bestDelimiter = delimiter;
15557
+ }
15558
+ }
15559
+ }
15560
+ return bestDelimiter;
15561
+ }
15562
+ /**
15563
+ * Detect if first row is likely a header
15564
+ */
15565
+ detectHeader(rows) {
15566
+ if (rows.length < 2) return false;
15567
+ const firstRow = rows[0].values;
15568
+ const secondRow = rows[1].values;
15569
+ if (firstRow.reduce((sum, v) => sum + v.length, 0) / firstRow.length > secondRow.reduce((sum, v) => sum + v.length, 0) / secondRow.length * 1.5) return false;
15570
+ const firstRowNumeric = firstRow.filter((v) => !isNaN(Number(v)) && v.trim() !== "").length;
15571
+ return firstRow.length - firstRowNumeric >= firstRowNumeric;
15572
+ }
15573
+ /**
15574
+ * Boost confidence if column name indicates PII
15575
+ */
15576
+ boostConfidenceFromColumnName(detections, columnName, piiIndicatorNames) {
15577
+ if (!columnName) return detections;
15578
+ const nameLower = columnName.toLowerCase().trim();
15579
+ if (!piiIndicatorNames.some((indicator) => nameLower.includes(indicator.toLowerCase()))) return detections;
15580
+ return detections.map((detection) => ({
15581
+ ...detection,
15582
+ confidence: Math.min(1, (detection.confidence || .5) * 1.2)
15583
+ }));
15584
+ }
15585
+ /**
15586
+ * Extract all cell values as text
15587
+ */
15588
+ extractText(input, options) {
15589
+ const rows = this.parse(input, options);
15590
+ const textParts = [];
15591
+ for (const row of rows) for (const value of row.values) if (value.trim().length > 0) textParts.push(value);
15592
+ return textParts.join(" ");
15593
+ }
15594
+ /**
15595
+ * Get column statistics without full PII detection
15596
+ */
15597
+ getColumnInfo(input, options) {
15598
+ const rows = this.parse(input, options);
15599
+ if (rows.length === 0) return {
15600
+ columnCount: 0,
15601
+ rowCount: 0,
15602
+ sampleRows: []
15603
+ };
15604
+ const opts = {
15605
+ ...this.defaultOptions,
15606
+ ...options
15607
+ };
15608
+ const hasHeader = opts.hasHeader !== void 0 ? opts.hasHeader : this.detectHeader(rows);
15609
+ const headers = hasHeader && rows.length > 0 ? rows[0].values : void 0;
15610
+ const dataRows = hasHeader ? rows.slice(1) : rows;
15611
+ const sampleRows = dataRows.slice(0, 5).map((r) => r.values);
15612
+ return {
15613
+ columnCount: rows[0].values.length,
15614
+ rowCount: dataRows.length,
15615
+ headers,
15616
+ sampleRows
15617
+ };
15618
+ }
15619
+ };
15620
+ }));
15621
+
15622
+ //#endregion
15623
+ //#region src/document/XlsxProcessor.ts
15624
+ /**
15625
+ * Create an XLSX processor instance
15626
+ */
15627
+ function createXlsxProcessor() {
15628
+ return new XlsxProcessor();
15629
+ }
15630
+ var XlsxProcessor;
15631
+ var init_XlsxProcessor = __esmMin((() => {
15632
+ XlsxProcessor = class {
15633
+ constructor() {
15634
+ this.defaultOptions = {
15635
+ piiIndicatorNames: [
15636
+ "email",
15637
+ "e-mail",
15638
+ "mail",
15639
+ "email_address",
15640
+ "phone",
15641
+ "tel",
15642
+ "telephone",
15643
+ "mobile",
15644
+ "phone_number",
15645
+ "ssn",
15646
+ "social_security",
15647
+ "social_security_number",
15648
+ "address",
15649
+ "street",
15650
+ "street_address",
15651
+ "city",
15652
+ "zip",
15653
+ "zipcode",
15654
+ "postal",
15655
+ "postcode",
15656
+ "name",
15657
+ "firstname",
15658
+ "first_name",
15659
+ "lastname",
15660
+ "last_name",
15661
+ "fullname",
15662
+ "full_name",
15663
+ "password",
15664
+ "pwd",
15665
+ "secret",
15666
+ "token",
15667
+ "api_key",
15668
+ "card",
15669
+ "credit_card",
15670
+ "creditcard",
15671
+ "card_number",
15672
+ "account",
15673
+ "account_number",
15674
+ "iban",
15675
+ "swift",
15676
+ "passport",
15677
+ "passport_number",
15678
+ "license",
15679
+ "licence",
15680
+ "driver_license",
15681
+ "dob",
15682
+ "date_of_birth",
15683
+ "birth_date",
15684
+ "birthdate"
15685
+ ],
15686
+ preserveFormatting: true,
15687
+ preserveFormulas: true
15688
+ };
15689
+ try {
15690
+ this.xlsx = require("xlsx");
15691
+ } catch {}
15692
+ }
15693
+ /**
15694
+ * Check if XLSX support is available
15695
+ */
15696
+ isAvailable() {
15697
+ return !!this.xlsx;
15698
+ }
15699
+ /**
15700
+ * Parse XLSX from buffer
15701
+ */
15702
+ parse(buffer) {
15703
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
15704
+ try {
15705
+ return this.xlsx.read(buffer, {
15706
+ type: "buffer",
15707
+ cellFormula: true,
15708
+ cellStyles: true
15709
+ });
15710
+ } catch (error) {
15711
+ throw new Error(`[XlsxProcessor] Failed to parse XLSX: ${error.message}`);
15712
+ }
15713
+ }
15714
+ /**
15715
+ * Detect PII in XLSX data
15716
+ */
15717
+ async detect(buffer, detector, options) {
15718
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
15719
+ const opts = {
15720
+ ...this.defaultOptions,
15721
+ ...options
15722
+ };
15723
+ const workbook = this.parse(buffer);
15724
+ const sheetNames = this.getSheetNamesToProcess(workbook, opts);
15725
+ const sheetResults = [];
15726
+ const allDetections = [];
15727
+ const allTypes = /* @__PURE__ */ new Set();
15728
+ for (let sheetIndex = 0; sheetIndex < sheetNames.length; sheetIndex++) {
15729
+ const sheetName = sheetNames[sheetIndex];
15730
+ const sheet = workbook.Sheets[sheetName];
15731
+ const sheetResult = await this.detectSheet(sheet, sheetName, sheetIndex, detector, opts);
15732
+ sheetResults.push(sheetResult);
15733
+ allDetections.push(...sheetResult.matchesByCell.flatMap((c) => c.matches));
15734
+ sheetResult.matchesByCell.forEach((cell) => {
15735
+ cell.matches.forEach((det) => allTypes.add(det.type));
15736
+ });
15737
+ }
15738
+ const original = this.extractText(buffer, options);
15739
+ const redactedBuffer = this.redact(buffer, {
15740
+ original,
15741
+ redacted: original,
15742
+ detections: allDetections,
15743
+ redactionMap: {},
15744
+ stats: { piiCount: allDetections.length },
15745
+ sheetResults,
15746
+ sheetCount: sheetResults.length
15747
+ }, options);
15748
+ const redacted = this.extractText(redactedBuffer, options);
15749
+ const redactionMap = {};
15750
+ allDetections.forEach((det) => {
15751
+ redactionMap[det.placeholder] = det.value;
15752
+ });
15753
+ return {
15754
+ original,
15755
+ redacted,
15756
+ detections: allDetections,
15757
+ redactionMap,
15758
+ stats: { piiCount: allDetections.length },
15759
+ sheetResults,
15760
+ sheetCount: sheetResults.length
15761
+ };
15762
+ }
15763
+ /**
15764
+ * Detect PII in a single sheet
15765
+ */
15766
+ async detectSheet(sheet, sheetName, sheetIndex, detector, options) {
15767
+ const range = this.xlsx.utils.decode_range(sheet["!ref"] || "A1");
15768
+ const startRow = range.s.r;
15769
+ const endRow = options.maxRows !== void 0 ? Math.min(range.e.r, startRow + options.maxRows - 1) : range.e.r;
15770
+ const startCol = range.s.c;
15771
+ const endCol = range.e.c;
15772
+ const columnCount = endCol - startCol + 1;
15773
+ const hasHeader = options.hasHeader !== void 0 ? options.hasHeader : this.detectHeader(sheet, range);
15774
+ const headers = hasHeader ? this.getRowValues(sheet, startRow, startCol, endCol) : void 0;
15775
+ const dataStartRow = hasHeader ? startRow + 1 : startRow;
15776
+ const columnNameToIndex = /* @__PURE__ */ new Map();
15777
+ if (headers) headers.forEach((header, index) => {
15778
+ if (header) columnNameToIndex.set(header.toLowerCase().trim(), index);
15779
+ });
15780
+ const alwaysRedactCols = new Set(options.alwaysRedactColumns || []);
15781
+ if (options.alwaysRedactColumnNames && headers) options.alwaysRedactColumnNames.forEach((name) => {
15782
+ const index = columnNameToIndex.get(name.toLowerCase().trim());
15783
+ if (index !== void 0) alwaysRedactCols.add(index);
15784
+ });
15785
+ const skipCols = new Set(options.skipColumns || []);
15786
+ const columnStats = {};
15787
+ for (let col = 0; col <= endCol - startCol; col++) columnStats[col] = {
15788
+ columnIndex: col,
15789
+ columnLetter: this.columnToLetter(col),
15790
+ columnName: headers?.[col],
15791
+ piiCount: 0,
15792
+ piiPercentage: 0,
15793
+ piiTypes: []
15794
+ };
15795
+ const matchesByCell = [];
15796
+ for (let row = dataStartRow; row <= endRow; row++) for (let col = startCol; col <= endCol; col++) {
15797
+ const colIndex = col - startCol;
15798
+ if (skipCols.has(colIndex)) continue;
15799
+ const cellRef = this.xlsx.utils.encode_cell({
15800
+ r: row,
15801
+ c: col
15802
+ });
15803
+ const cell = sheet[cellRef];
15804
+ if (!cell) continue;
15805
+ const cellValue = this.getCellValue(cell);
15806
+ if (!cellValue) continue;
15807
+ const cellFormula = cell.f;
15808
+ if (alwaysRedactCols.has(colIndex)) {
15809
+ const detection = {
15810
+ type: "SENSITIVE_COLUMN",
15811
+ value: cellValue,
15812
+ placeholder: `[SENSITIVE_COLUMN_${colIndex}]`,
15813
+ position: [0, cellValue.length],
15814
+ severity: "high",
15815
+ confidence: 1
15816
+ };
15817
+ matchesByCell.push({
15818
+ cell: cellRef,
15819
+ row: row + 1,
15820
+ column: colIndex,
15821
+ columnLetter: this.columnToLetter(colIndex),
15822
+ columnName: headers?.[colIndex],
15823
+ value: cellValue,
15824
+ formula: cellFormula,
15825
+ matches: [detection]
15826
+ });
15827
+ columnStats[colIndex].piiCount++;
15828
+ continue;
15829
+ }
15830
+ const result = await detector.detect(cellValue);
15831
+ if (result.detections.length > 0) {
15832
+ const boostedDetections = this.boostConfidenceFromColumnName(result.detections, headers?.[colIndex], options.piiIndicatorNames || []);
15833
+ matchesByCell.push({
15834
+ cell: cellRef,
15835
+ row: row + 1,
15836
+ column: colIndex,
15837
+ columnLetter: this.columnToLetter(colIndex),
15838
+ columnName: headers?.[colIndex],
15839
+ value: cellValue,
15840
+ formula: cellFormula,
15841
+ matches: boostedDetections
15842
+ });
15843
+ columnStats[colIndex].piiCount += boostedDetections.length;
15844
+ const columnTypes = new Set(columnStats[colIndex].piiTypes);
15845
+ boostedDetections.forEach((d) => columnTypes.add(d.type));
15846
+ columnStats[colIndex].piiTypes = Array.from(columnTypes);
15847
+ }
15848
+ }
15849
+ const dataRowCount = endRow - dataStartRow + 1;
15850
+ for (let col = 0; col <= endCol - startCol; col++) {
15851
+ const rowsWithPii = matchesByCell.filter((m) => m.column === col).length;
15852
+ columnStats[col].piiPercentage = dataRowCount > 0 ? rowsWithPii / dataRowCount * 100 : 0;
15853
+ }
15854
+ return {
15855
+ sheetName,
15856
+ sheetIndex,
15857
+ rowCount: dataRowCount,
15858
+ columnCount,
15859
+ headers: headers?.filter((h) => h !== void 0),
15860
+ columnStats,
15861
+ matchesByCell
15862
+ };
15863
+ }
15864
+ /**
15865
+ * Redact PII in XLSX data
15866
+ */
15867
+ redact(buffer, detectionResult, options) {
15868
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
15869
+ const opts = {
15870
+ ...this.defaultOptions,
15871
+ ...options
15872
+ };
15873
+ const workbook = this.parse(buffer);
15874
+ for (const sheetResult of detectionResult.sheetResults) {
15875
+ const sheet = workbook.Sheets[sheetResult.sheetName];
15876
+ for (const cellMatch of sheetResult.matchesByCell) {
15877
+ const cell = sheet[cellMatch.cell];
15878
+ if (!cell) continue;
15879
+ cell.v = "[REDACTED]";
15880
+ cell.w = "[REDACTED]";
15881
+ if (!opts.preserveFormulas) delete cell.f;
15882
+ cell.t = "s";
15883
+ }
15884
+ }
15885
+ return this.xlsx.write(workbook, {
15886
+ type: "buffer",
15887
+ bookType: "xlsx"
15888
+ });
15889
+ }
15890
+ /**
15891
+ * Get cell value as string
15892
+ */
15893
+ getCellValue(cell) {
15894
+ if (!cell) return "";
15895
+ if (cell.w !== void 0) return String(cell.w);
15896
+ if (cell.v !== void 0) return String(cell.v);
15897
+ return "";
15898
+ }
15899
+ /**
15900
+ * Get row values
15901
+ */
15902
+ getRowValues(sheet, row, startCol, endCol) {
15903
+ const values = [];
15904
+ for (let col = startCol; col <= endCol; col++) {
15905
+ const cell = sheet[this.xlsx.utils.encode_cell({
15906
+ r: row,
15907
+ c: col
15908
+ })];
15909
+ values.push(cell ? this.getCellValue(cell) : void 0);
15910
+ }
15911
+ return values;
15912
+ }
15913
+ /**
15914
+ * Detect if first row is likely a header
15915
+ */
15916
+ detectHeader(sheet, range) {
15917
+ const firstRow = this.getRowValues(sheet, range.s.r, range.s.c, range.e.c);
15918
+ const secondRow = range.s.r + 1 <= range.e.r ? this.getRowValues(sheet, range.s.r + 1, range.s.c, range.e.c) : null;
15919
+ if (!secondRow) return false;
15920
+ const firstRowValues = firstRow.filter((v) => v !== void 0);
15921
+ const secondRowValues = secondRow.filter((v) => v !== void 0);
15922
+ if (firstRowValues.length === 0 || secondRowValues.length === 0) return false;
15923
+ if (firstRowValues.reduce((sum, v) => sum + v.length, 0) / firstRowValues.length > secondRowValues.reduce((sum, v) => sum + v.length, 0) / secondRowValues.length * 1.5) return false;
15924
+ const firstRowNumeric = firstRowValues.filter((v) => !isNaN(Number(v)) && v.trim() !== "").length;
15925
+ return firstRowValues.length - firstRowNumeric >= firstRowNumeric;
15926
+ }
15927
+ /**
15928
+ * Convert column index to letter (0 = A, 25 = Z, 26 = AA)
15929
+ */
15930
+ columnToLetter(col) {
15931
+ let letter = "";
15932
+ while (col >= 0) {
15933
+ letter = String.fromCharCode(col % 26 + 65) + letter;
15934
+ col = Math.floor(col / 26) - 1;
15935
+ }
15936
+ return letter;
15937
+ }
15938
+ /**
15939
+ * Get sheet names to process based on options
15940
+ */
15941
+ getSheetNamesToProcess(workbook, options) {
15942
+ const allSheetNames = workbook.SheetNames;
15943
+ if (options.sheets && options.sheets.length > 0) return options.sheets.filter((name) => allSheetNames.includes(name));
15944
+ if (options.sheetIndices && options.sheetIndices.length > 0) return options.sheetIndices.filter((index) => index >= 0 && index < allSheetNames.length).map((index) => allSheetNames[index]);
15945
+ return allSheetNames;
15946
+ }
15947
+ /**
15948
+ * Boost confidence if column name indicates PII
15949
+ */
15950
+ boostConfidenceFromColumnName(detections, columnName, piiIndicatorNames) {
15951
+ if (!columnName) return detections;
15952
+ const nameLower = columnName.toLowerCase().trim();
15953
+ if (!piiIndicatorNames.some((indicator) => nameLower.includes(indicator.toLowerCase()))) return detections;
15954
+ return detections.map((detection) => ({
15955
+ ...detection,
15956
+ confidence: Math.min(1, (detection.confidence || .5) * 1.2)
15957
+ }));
15958
+ }
15959
+ /**
15960
+ * Extract all cell values as text
15961
+ */
15962
+ extractText(buffer, options) {
15963
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
15964
+ const workbook = this.parse(buffer);
15965
+ const opts = {
15966
+ ...this.defaultOptions,
15967
+ ...options
15968
+ };
15969
+ const sheetNames = this.getSheetNamesToProcess(workbook, opts);
15970
+ const textParts = [];
15971
+ for (const sheetName of sheetNames) {
15972
+ const sheet = workbook.Sheets[sheetName];
15973
+ const range = this.xlsx.utils.decode_range(sheet["!ref"] || "A1");
15974
+ for (let row = range.s.r; row <= range.e.r; row++) for (let col = range.s.c; col <= range.e.c; col++) {
15975
+ const cell = sheet[this.xlsx.utils.encode_cell({
15976
+ r: row,
15977
+ c: col
15978
+ })];
15979
+ if (cell) {
15980
+ const value = this.getCellValue(cell);
15981
+ if (value.trim().length > 0) textParts.push(value);
15982
+ }
15983
+ }
15984
+ }
15985
+ return textParts.join(" ");
15986
+ }
15987
+ /**
15988
+ * Get workbook metadata
15989
+ */
15990
+ getMetadata(buffer) {
15991
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
15992
+ const workbook = this.parse(buffer);
15993
+ return {
15994
+ sheetNames: workbook.SheetNames,
15995
+ sheetCount: workbook.SheetNames.length
15996
+ };
15997
+ }
15998
+ };
15999
+ }));
16000
+
16001
+ //#endregion
16002
+ //#region src/document/DocumentProcessor.ts
16003
+ /**
16004
+ * Create a document processor instance
16005
+ */
16006
+ function createDocumentProcessor() {
16007
+ return new DocumentProcessor();
16008
+ }
16009
+ var DocumentProcessor;
16010
+ var init_DocumentProcessor = __esmMin((() => {
16011
+ init_OCRProcessor();
16012
+ init_JsonProcessor();
16013
+ init_CsvProcessor();
16014
+ init_XlsxProcessor();
16015
+ DocumentProcessor = class {
16016
+ constructor() {
16017
+ try {
16018
+ this.pdfParse = require("pdf-parse");
16019
+ } catch {}
16020
+ try {
16021
+ this.mammoth = require("mammoth");
16022
+ } catch {}
16023
+ this.ocrProcessor = new OCRProcessor();
16024
+ this.jsonProcessor = new JsonProcessor();
16025
+ this.csvProcessor = new CsvProcessor();
16026
+ this.xlsxProcessor = new XlsxProcessor();
16027
+ }
16028
+ /**
16029
+ * Extract text from document buffer
16030
+ */
16031
+ async extractText(buffer, options) {
16032
+ const format = options?.format || this.detectFormat(buffer);
16033
+ if (!format) throw new Error("[DocumentProcessor] Unable to detect document format. Supported: PDF, DOCX, TXT, images (with OCR)");
16034
+ const maxSize = options?.maxSize || 50 * 1024 * 1024;
16035
+ if (buffer.length > maxSize) throw new Error(`[DocumentProcessor] Document size (${buffer.length} bytes) exceeds maximum (${maxSize} bytes)`);
16036
+ switch (format) {
16037
+ case "pdf": return this.extractPdfText(buffer, options);
16038
+ case "docx": return this.extractDocxText(buffer, options);
16039
+ case "txt": return buffer.toString("utf-8");
16040
+ case "image": return this.extractImageText(buffer, options);
16041
+ case "json": return this.extractJsonText(buffer, options);
16042
+ case "csv": return this.extractCsvText(buffer, options);
16043
+ case "xlsx": return this.extractXlsxText(buffer, options);
16044
+ default: throw new Error(`[DocumentProcessor] Unsupported format: ${format}`);
16045
+ }
16046
+ }
16047
+ /**
16048
+ * Get document metadata
16049
+ */
16050
+ async getMetadata(buffer, options) {
16051
+ const format = options?.format || this.detectFormat(buffer);
16052
+ if (!format) throw new Error("[DocumentProcessor] Unable to detect document format");
16053
+ switch (format) {
16054
+ case "pdf": return this.getPdfMetadata(buffer, options);
16055
+ case "docx": return this.getDocxMetadata(buffer, options);
16056
+ case "txt": return {
16057
+ format: "txt",
16058
+ pages: void 0
16059
+ };
16060
+ case "image": return this.getImageMetadata(buffer, options);
16061
+ case "json": return this.getJsonMetadata(buffer, options);
16062
+ case "csv": return this.getCsvMetadata(buffer, options);
16063
+ case "xlsx": return this.getXlsxMetadata(buffer, options);
16064
+ default: throw new Error(`[DocumentProcessor] Unsupported format: ${format}`);
16065
+ }
16066
+ }
16067
+ /**
16068
+ * Detect document format from buffer
16069
+ */
16070
+ detectFormat(buffer) {
16071
+ if (buffer.length < 4) return null;
16072
+ if (buffer.toString("utf-8", 0, 4) === "%PDF") return "pdf";
16073
+ if (buffer.length >= 8 && buffer[0] === 137 && buffer[1] === 80 && buffer[2] === 78 && buffer[3] === 71) return "image";
16074
+ if (buffer[0] === 255 && buffer[1] === 216 && buffer[2] === 255) return "image";
16075
+ if (buffer[0] === 73 && buffer[1] === 73 && buffer[2] === 42 && buffer[3] === 0 || buffer[0] === 77 && buffer[1] === 77 && buffer[2] === 0 && buffer[3] === 42) return "image";
16076
+ if (buffer[0] === 66 && buffer[1] === 77) return "image";
16077
+ if (buffer.length >= 12 && buffer[0] === 82 && buffer[1] === 73 && buffer[2] === 70 && buffer[3] === 70 && buffer[8] === 87 && buffer[9] === 69 && buffer[10] === 66 && buffer[11] === 80) return "image";
16078
+ if (buffer[0] === 80 && buffer[1] === 75) {
16079
+ const zipHeader = buffer.toString("utf-8", 0, Math.min(500, buffer.length));
16080
+ if (zipHeader.includes("word/") || zipHeader.includes("[Content_Types].xml")) return "docx";
16081
+ if (zipHeader.includes("xl/")) return "xlsx";
16082
+ }
16083
+ const text = buffer.toString("utf-8");
16084
+ const trimmed = text.trim();
16085
+ if (trimmed.startsWith("{") && trimmed.endsWith("}") || trimmed.startsWith("[") && trimmed.endsWith("]")) {
16086
+ if (this.jsonProcessor.isValid(buffer)) return "json";
16087
+ }
16088
+ const lines = text.split(/\r?\n/).slice(0, 5);
16089
+ if (lines.length >= 2) for (const delimiter of [
16090
+ ",",
16091
+ " ",
16092
+ ";",
16093
+ "|"
16094
+ ]) {
16095
+ const counts = lines.map((line) => (line.match(new RegExp(delimiter, "g")) || []).length);
16096
+ if (counts[0] > 0 && counts.every((c) => c === counts[0])) return "csv";
16097
+ }
16098
+ const sample = buffer.slice(0, Math.min(1e3, buffer.length));
16099
+ if (sample.filter((byte) => byte < 32 && byte !== 9 && byte !== 10 && byte !== 13).length < sample.length * .1) return "txt";
16100
+ return null;
16101
+ }
16102
+ /**
16103
+ * Check if format is supported
16104
+ */
16105
+ isFormatSupported(format) {
16106
+ switch (format) {
16107
+ case "pdf": return !!this.pdfParse;
16108
+ case "docx": return !!this.mammoth;
16109
+ case "txt": return true;
16110
+ case "image": return this.ocrProcessor.isAvailable();
16111
+ case "json": return true;
16112
+ case "csv": return true;
16113
+ case "xlsx": return this.xlsxProcessor.isAvailable();
16114
+ default: return false;
16115
+ }
16116
+ }
16117
+ /**
16118
+ * Extract text from PDF
16119
+ */
16120
+ async extractPdfText(buffer, options) {
16121
+ if (!this.pdfParse) throw new Error("[DocumentProcessor] PDF support requires pdf-parse. Install with: npm install pdf-parse");
16122
+ try {
16123
+ const data = await this.pdfParse(buffer, {
16124
+ password: options?.password,
16125
+ max: options?.pages ? Math.max(...options.pages) : void 0
16126
+ });
16127
+ if (options?.pages) return data.text;
16128
+ return data.text || "";
16129
+ } catch (error) {
16130
+ throw new Error(`[DocumentProcessor] PDF extraction failed: ${error.message}`);
16131
+ }
16132
+ }
16133
+ /**
16134
+ * Extract text from DOCX
16135
+ */
16136
+ async extractDocxText(buffer, _options) {
16137
+ if (!this.mammoth) throw new Error("[DocumentProcessor] DOCX support requires mammoth. Install with: npm install mammoth");
16138
+ try {
16139
+ return (await this.mammoth.extractRawText({ buffer })).value || "";
16140
+ } catch (error) {
16141
+ throw new Error(`[DocumentProcessor] DOCX extraction failed: ${error.message}`);
16142
+ }
16143
+ }
16144
+ /**
16145
+ * Get PDF metadata
16146
+ */
16147
+ async getPdfMetadata(buffer, _options) {
16148
+ if (!this.pdfParse) throw new Error("[DocumentProcessor] PDF support requires pdf-parse. Install with: npm install pdf-parse");
16149
+ try {
16150
+ const data = await this.pdfParse(buffer, { password: _options?.password });
16151
+ return {
16152
+ format: "pdf",
16153
+ pages: data.numpages,
16154
+ title: data.info?.Title,
16155
+ author: data.info?.Author,
16156
+ creationDate: data.info?.CreationDate ? new Date(data.info.CreationDate) : void 0,
16157
+ modifiedDate: data.info?.ModDate ? new Date(data.info.ModDate) : void 0,
16158
+ custom: data.info
16159
+ };
16160
+ } catch (error) {
16161
+ throw new Error(`[DocumentProcessor] PDF metadata extraction failed: ${error.message}`);
16162
+ }
16163
+ }
16164
+ /**
16165
+ * Get DOCX metadata
16166
+ */
16167
+ async getDocxMetadata(_buffer, _options) {
16168
+ return {
16169
+ format: "docx",
16170
+ pages: void 0
16171
+ };
16172
+ }
16173
+ /**
16174
+ * Extract text from image using OCR
16175
+ */
16176
+ async extractImageText(buffer, options) {
16177
+ if (!this.ocrProcessor.isAvailable()) throw new Error("[DocumentProcessor] Image/OCR support requires tesseract.js. Install with: npm install tesseract.js");
16178
+ try {
16179
+ return (await this.ocrProcessor.recognizeText(buffer, options?.ocrOptions)).text;
16180
+ } catch (error) {
16181
+ throw new Error(`[DocumentProcessor] Image text extraction failed: ${error.message}`);
16182
+ }
16183
+ }
16184
+ /**
16185
+ * Get image metadata
16186
+ */
16187
+ async getImageMetadata(buffer, options) {
16188
+ if (!this.ocrProcessor.isAvailable()) return {
16189
+ format: "image",
16190
+ pages: void 0,
16191
+ usedOCR: false
16192
+ };
16193
+ try {
16194
+ return {
16195
+ format: "image",
16196
+ pages: void 0,
16197
+ usedOCR: true,
16198
+ ocrConfidence: (await this.ocrProcessor.recognizeText(buffer, options?.ocrOptions)).confidence
16199
+ };
16200
+ } catch {
16201
+ return {
16202
+ format: "image",
16203
+ pages: void 0,
16204
+ usedOCR: false
16205
+ };
16206
+ }
16207
+ }
16208
+ /**
16209
+ * Extract text from JSON
16210
+ */
16211
+ async extractJsonText(buffer, _options) {
16212
+ try {
16213
+ return this.jsonProcessor.extractText(buffer);
16214
+ } catch (error) {
16215
+ throw new Error(`[DocumentProcessor] JSON extraction failed: ${error.message}`);
16216
+ }
16217
+ }
16218
+ /**
16219
+ * Extract text from CSV
16220
+ */
16221
+ async extractCsvText(buffer, _options) {
16222
+ try {
16223
+ return this.csvProcessor.extractText(buffer);
16224
+ } catch (error) {
16225
+ throw new Error(`[DocumentProcessor] CSV extraction failed: ${error.message}`);
16226
+ }
16227
+ }
16228
+ /**
16229
+ * Extract text from XLSX
16230
+ */
16231
+ async extractXlsxText(buffer, _options) {
16232
+ if (!this.xlsxProcessor.isAvailable()) throw new Error("[DocumentProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
16233
+ try {
16234
+ return this.xlsxProcessor.extractText(buffer);
16235
+ } catch (error) {
16236
+ throw new Error(`[DocumentProcessor] XLSX extraction failed: ${error.message}`);
16237
+ }
16238
+ }
16239
+ /**
16240
+ * Get JSON metadata
16241
+ */
16242
+ async getJsonMetadata(buffer, _options) {
16243
+ try {
16244
+ const data = this.jsonProcessor.parse(buffer);
16245
+ const isArray = Array.isArray(data);
16246
+ return {
16247
+ format: "json",
16248
+ pages: void 0,
16249
+ custom: {
16250
+ isArray,
16251
+ itemCount: isArray ? data.length : Object.keys(data).length
16252
+ }
16253
+ };
16254
+ } catch {
16255
+ return {
16256
+ format: "json",
16257
+ pages: void 0
16258
+ };
16259
+ }
16260
+ }
16261
+ /**
16262
+ * Get CSV metadata
16263
+ */
16264
+ async getCsvMetadata(buffer, _options) {
16265
+ try {
16266
+ const info = this.csvProcessor.getColumnInfo(buffer);
16267
+ return {
16268
+ format: "csv",
16269
+ pages: void 0,
16270
+ custom: {
16271
+ rowCount: info.rowCount,
16272
+ columnCount: info.columnCount,
16273
+ headers: info.headers
16274
+ }
16275
+ };
16276
+ } catch {
16277
+ return {
16278
+ format: "csv",
16279
+ pages: void 0
16280
+ };
16281
+ }
16282
+ }
16283
+ /**
16284
+ * Get XLSX metadata
16285
+ */
16286
+ async getXlsxMetadata(buffer, _options) {
16287
+ if (!this.xlsxProcessor.isAvailable()) return {
16288
+ format: "xlsx",
16289
+ pages: void 0
16290
+ };
16291
+ try {
16292
+ const metadata = this.xlsxProcessor.getMetadata(buffer);
16293
+ return {
16294
+ format: "xlsx",
16295
+ pages: void 0,
16296
+ custom: {
16297
+ sheetNames: metadata.sheetNames,
16298
+ sheetCount: metadata.sheetCount
16299
+ }
16300
+ };
16301
+ } catch {
16302
+ return {
16303
+ format: "xlsx",
16304
+ pages: void 0
16305
+ };
16306
+ }
16307
+ }
16308
+ /**
16309
+ * Get OCR processor instance
16310
+ */
16311
+ getOCRProcessor() {
16312
+ return this.ocrProcessor;
16313
+ }
16314
+ /**
16315
+ * Get JSON processor instance
16316
+ */
16317
+ getJsonProcessor() {
16318
+ return this.jsonProcessor;
16319
+ }
16320
+ /**
16321
+ * Get CSV processor instance
16322
+ */
16323
+ getCsvProcessor() {
16324
+ return this.csvProcessor;
16325
+ }
16326
+ /**
16327
+ * Get XLSX processor instance
16328
+ */
16329
+ getXlsxProcessor() {
16330
+ return this.xlsxProcessor;
16331
+ }
16332
+ };
16333
+ }));
16334
+
16335
+ //#endregion
16336
+ //#region src/document/index.ts
16337
+ var document_exports = /* @__PURE__ */ __exportAll({
16338
+ CsvProcessor: () => CsvProcessor,
16339
+ DocumentProcessor: () => DocumentProcessor,
16340
+ JsonProcessor: () => JsonProcessor,
16341
+ OCRProcessor: () => OCRProcessor,
16342
+ XlsxProcessor: () => XlsxProcessor,
16343
+ createCsvProcessor: () => createCsvProcessor,
16344
+ createDocumentProcessor: () => createDocumentProcessor,
16345
+ createJsonProcessor: () => createJsonProcessor,
16346
+ createOCRProcessor: () => createOCRProcessor,
16347
+ createXlsxProcessor: () => createXlsxProcessor
16348
+ });
16349
+ var init_document = __esmMin((() => {
16350
+ init_DocumentProcessor();
16351
+ init_OCRProcessor();
16352
+ init_JsonProcessor();
16353
+ init_CsvProcessor();
16354
+ init_XlsxProcessor();
16355
+ }));
16356
+
16357
+ //#endregion
16358
+ //#region src/workers/WorkerPool.ts
16359
+ /**
16360
+ * Worker thread pool for parallel processing
16361
+ */
16362
+ /**
16363
+ * Create a worker pool instance
16364
+ */
16365
+ function createWorkerPool(config) {
16366
+ return new WorkerPool(config);
16367
+ }
16368
+ var WorkerPool;
16369
+ var init_WorkerPool = __esmMin((() => {
16370
+ WorkerPool = class {
16371
+ constructor(config = {}) {
16372
+ this.workers = [];
16373
+ this.availableWorkers = [];
16374
+ this.taskQueue = [];
16375
+ this.totalProcessingTime = 0;
16376
+ this.config = {
16377
+ numWorkers: config.numWorkers || (0, os.cpus)().length,
16378
+ maxQueueSize: config.maxQueueSize || 100,
16379
+ idleTimeout: config.idleTimeout || 3e4
16380
+ };
16381
+ this.stats = {
16382
+ activeWorkers: 0,
16383
+ idleWorkers: 0,
16384
+ queueSize: 0,
16385
+ totalProcessed: 0,
16386
+ totalErrors: 0,
16387
+ avgProcessingTime: 0
16388
+ };
16389
+ this.workerPath = (0, path.join)(__dirname, "worker.js");
16390
+ }
16391
+ /**
16392
+ * Initialize worker pool
16393
+ */
16394
+ async initialize() {
16395
+ for (let i = 0; i < this.config.numWorkers; i++) await this.createWorker();
16396
+ }
16397
+ /**
16398
+ * Create a new worker
16399
+ */
16400
+ async createWorker() {
16401
+ const worker = new worker_threads.Worker(this.workerPath);
16402
+ worker.on("message", (result) => {
16403
+ this.handleWorkerResult(worker, result);
16404
+ });
16405
+ worker.on("error", (error) => {
16406
+ console.error("[WorkerPool] Worker error:", error);
16407
+ this.stats.totalErrors++;
16408
+ this.removeWorker(worker);
16409
+ this.createWorker();
16410
+ });
16411
+ worker.on("exit", (code) => {
16412
+ if (code !== 0) console.error(`[WorkerPool] Worker exited with code ${code}`);
16413
+ this.removeWorker(worker);
16414
+ });
16415
+ this.workers.push(worker);
16416
+ this.availableWorkers.push(worker);
16417
+ this.stats.idleWorkers++;
16418
+ return worker;
16419
+ }
16420
+ /**
16421
+ * Execute a task on the worker pool
16422
+ */
16423
+ async execute(task) {
16424
+ if (this.taskQueue.length >= this.config.maxQueueSize) throw new Error(`[WorkerPool] Queue is full (max: ${this.config.maxQueueSize})`);
16425
+ return new Promise((resolve, reject) => {
16426
+ this.taskQueue.push({
16427
+ task,
16428
+ resolve,
16429
+ reject
16430
+ });
16431
+ this.stats.queueSize = this.taskQueue.length;
16432
+ this.processQueue();
16433
+ });
16434
+ }
16435
+ /**
16436
+ * Process task queue
16437
+ */
16438
+ processQueue() {
16439
+ while (this.taskQueue.length > 0 && this.availableWorkers.length > 0) {
16440
+ const worker = this.availableWorkers.shift();
16441
+ const { task, resolve, reject } = this.taskQueue.shift();
16442
+ this.stats.idleWorkers--;
16443
+ this.stats.activeWorkers++;
16444
+ this.stats.queueSize = this.taskQueue.length;
16445
+ worker.__currentTask = {
16446
+ resolve,
16447
+ reject,
16448
+ startTime: Date.now()
16449
+ };
16450
+ worker.postMessage(task);
16451
+ }
16452
+ }
16453
+ /**
16454
+ * Handle worker result
16455
+ */
16456
+ handleWorkerResult(worker, result) {
16457
+ const currentTask = worker.__currentTask;
16458
+ if (!currentTask) return;
16459
+ this.stats.activeWorkers--;
16460
+ this.stats.idleWorkers++;
16461
+ this.stats.totalProcessed++;
16462
+ this.totalProcessingTime += result.processingTime;
16463
+ this.stats.avgProcessingTime = this.totalProcessingTime / this.stats.totalProcessed;
16464
+ this.availableWorkers.push(worker);
16465
+ delete worker.__currentTask;
16466
+ if (result.error) {
16467
+ this.stats.totalErrors++;
16468
+ currentTask.reject(new Error(result.error));
16469
+ } else currentTask.resolve(result.result);
16470
+ this.processQueue();
16471
+ }
16472
+ /**
16473
+ * Remove worker from pool
16474
+ */
16475
+ removeWorker(worker) {
16476
+ const index = this.workers.indexOf(worker);
16477
+ if (index !== -1) this.workers.splice(index, 1);
16478
+ const availableIndex = this.availableWorkers.indexOf(worker);
16479
+ if (availableIndex !== -1) {
16480
+ this.availableWorkers.splice(availableIndex, 1);
16481
+ this.stats.idleWorkers--;
16482
+ }
16483
+ }
16484
+ /**
16485
+ * Get pool statistics
16486
+ */
16487
+ getStats() {
16488
+ return { ...this.stats };
16489
+ }
16490
+ /**
16491
+ * Terminate all workers
16492
+ */
16493
+ async terminate() {
16494
+ const terminatePromises = this.workers.map((worker) => worker.terminate());
16495
+ await Promise.all(terminatePromises);
16496
+ this.workers = [];
16497
+ this.availableWorkers = [];
16498
+ this.taskQueue = [];
16499
+ this.stats.activeWorkers = 0;
16500
+ this.stats.idleWorkers = 0;
16501
+ this.stats.queueSize = 0;
16502
+ }
16503
+ };
16504
+ }));
16505
+
16506
+ //#endregion
16507
+ //#region src/workers/index.ts
16508
+ var workers_exports = /* @__PURE__ */ __exportAll({
16509
+ WorkerPool: () => WorkerPool,
16510
+ createWorkerPool: () => createWorkerPool
16511
+ });
16512
+ var init_workers = __esmMin((() => {
16513
+ init_WorkerPool();
16514
+ }));
16515
+
14559
16516
  //#endregion
14560
16517
  //#region src/detector.ts
14561
16518
  var OpenRedaction = class OpenRedaction {
@@ -15205,14 +17162,14 @@ var OpenRedaction = class OpenRedaction {
15205
17162
  * Run health check
15206
17163
  */
15207
17164
  async healthCheck(options) {
15208
- const { HealthChecker } = await Promise.resolve().then(() => require("./HealthCheck-6-mYE1uY.cjs"));
17165
+ const { HealthChecker } = await Promise.resolve().then(() => (init_HealthCheck(), HealthCheck_exports));
15209
17166
  return new HealthChecker(this).check(options);
15210
17167
  }
15211
17168
  /**
15212
17169
  * Quick health check (minimal overhead)
15213
17170
  */
15214
17171
  async quickHealthCheck() {
15215
- const { HealthChecker } = await Promise.resolve().then(() => require("./HealthCheck-6-mYE1uY.cjs"));
17172
+ const { HealthChecker } = await Promise.resolve().then(() => (init_HealthCheck(), HealthCheck_exports));
15216
17173
  return new HealthChecker(this).quickCheck();
15217
17174
  }
15218
17175
  /**
@@ -15223,7 +17180,7 @@ var OpenRedaction = class OpenRedaction {
15223
17180
  */
15224
17181
  async detectDocument(buffer, options) {
15225
17182
  if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) throw new Error("[OpenRedaction] Permission denied: detection:detect required");
15226
- const { createDocumentProcessor } = await Promise.resolve().then(() => require("./document-DBn-i6L4.cjs"));
17183
+ const { createDocumentProcessor } = await Promise.resolve().then(() => (init_document(), document_exports));
15227
17184
  const processor = createDocumentProcessor();
15228
17185
  const extractionStart = performance.now();
15229
17186
  const text = await processor.extractText(buffer, options);
@@ -15252,7 +17209,7 @@ var OpenRedaction = class OpenRedaction {
15252
17209
  * Significantly faster for processing many texts
15253
17210
  */
15254
17211
  static async detectBatch(texts, options) {
15255
- const { createWorkerPool } = await Promise.resolve().then(() => require("./workers-DSxzXbrR.cjs"));
17212
+ const { createWorkerPool } = await Promise.resolve().then(() => (init_workers(), workers_exports));
15256
17213
  const pool = createWorkerPool({ numWorkers: options?.numWorkers });
15257
17214
  try {
15258
17215
  await pool.initialize();
@@ -15272,7 +17229,7 @@ var OpenRedaction = class OpenRedaction {
15272
17229
  * Efficient for processing many documents at once
15273
17230
  */
15274
17231
  static async detectDocumentsBatch(buffers, options) {
15275
- const { createWorkerPool } = await Promise.resolve().then(() => require("./workers-DSxzXbrR.cjs"));
17232
+ const { createWorkerPool } = await Promise.resolve().then(() => (init_workers(), workers_exports));
15276
17233
  const pool = createWorkerPool({ numWorkers: options?.numWorkers });
15277
17234
  try {
15278
17235
  await pool.initialize();
@@ -15499,6 +17456,4 @@ main().catch((error) => {
15499
17456
  process.exit(1);
15500
17457
  });
15501
17458
 
15502
- //#endregion
15503
- exports.__toCommonJS = __toCommonJS;
15504
- exports.__toESM = __toESM;
17459
+ //#endregion