od-temp 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,11 +1,49 @@
1
- import { a as CsvProcessor, c as createJsonProcessor, d as __esmMin, f as __exportAll, i as createXlsxProcessor, l as OCRProcessor, m as __toCommonJS, n as createDocumentProcessor, o as createCsvProcessor, p as __require, r as XlsxProcessor, s as JsonProcessor, t as DocumentProcessor, u as createOCRProcessor } from "./document-ChTIy4sC.mjs";
2
- import { n as createWorkerPool, t as WorkerPool } from "./workers-vfrB0Vzh.mjs";
3
- import { n as createHealthChecker, r as healthCheckMiddleware, t as HealthChecker } from "./HealthCheck-B79o7xg2.mjs";
1
+ import { createRequire } from "node:module";
4
2
  import { createHash } from "crypto";
5
3
  import * as fs from "fs";
6
4
  import * as path from "path";
5
+ import { join } from "path";
6
+ import { Worker } from "worker_threads";
7
+ import { cpus } from "os";
7
8
  import { useCallback, useEffect, useMemo, useState } from "react";
8
9
 
10
+ //#region rolldown:runtime
11
+ var __defProp = Object.defineProperty;
12
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
13
+ var __getOwnPropNames = Object.getOwnPropertyNames;
14
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
15
+ var __esmMin = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
16
+ var __exportAll = (all, symbols) => {
17
+ let target = {};
18
+ for (var name in all) {
19
+ __defProp(target, name, {
20
+ get: all[name],
21
+ enumerable: true
22
+ });
23
+ }
24
+ if (symbols) {
25
+ __defProp(target, Symbol.toStringTag, { value: "Module" });
26
+ }
27
+ return target;
28
+ };
29
+ var __copyProps = (to, from, except, desc) => {
30
+ if (from && typeof from === "object" || typeof from === "function") {
31
+ for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
32
+ key = keys[i];
33
+ if (!__hasOwnProp.call(to, key) && key !== except) {
34
+ __defProp(to, key, {
35
+ get: ((k) => from[k]).bind(null, key),
36
+ enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
37
+ });
38
+ }
39
+ }
40
+ }
41
+ return to;
42
+ };
43
+ var __toCommonJS = (mod) => __hasOwnProp.call(mod, "module.exports") ? mod["module.exports"] : __copyProps(__defProp({}, "__esModule", { value: true }), mod);
44
+ var __require = /* @__PURE__ */ createRequire(import.meta.url);
45
+
46
+ //#endregion
9
47
  //#region src/audit/AuditLogger.ts
10
48
  /**
11
49
  * In-memory audit logger implementation
@@ -15710,6 +15748,1961 @@ var init_ConfigExporter = __esmMin((() => {
15710
15748
  };
15711
15749
  }));
15712
15750
 
15751
+ //#endregion
15752
+ //#region src/health/HealthCheck.ts
15753
+ var HealthCheck_exports = /* @__PURE__ */ __exportAll({
15754
+ HealthChecker: () => HealthChecker,
15755
+ createHealthChecker: () => createHealthChecker,
15756
+ healthCheckMiddleware: () => healthCheckMiddleware
15757
+ });
15758
+ /**
15759
+ * Create health checker for a detector
15760
+ */
15761
+ function createHealthChecker(detector) {
15762
+ return new HealthChecker(detector);
15763
+ }
15764
+ /**
15765
+ * Express middleware for health check endpoint
15766
+ */
15767
+ function healthCheckMiddleware(detector) {
15768
+ const checker = new HealthChecker(detector);
15769
+ return async (_req, res) => {
15770
+ try {
15771
+ const result = await checker.check({
15772
+ testDetection: true,
15773
+ checkPerformance: true,
15774
+ performanceThreshold: 100,
15775
+ memoryThreshold: 100
15776
+ });
15777
+ const statusCode = result.status === "healthy" ? 200 : result.status === "degraded" ? 200 : 503;
15778
+ res.status(statusCode).json(result);
15779
+ } catch (error) {
15780
+ res.status(503).json({
15781
+ status: "unhealthy",
15782
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
15783
+ error: error.message
15784
+ });
15785
+ }
15786
+ };
15787
+ }
15788
+ var HealthChecker;
15789
+ var init_HealthCheck = __esmMin((() => {
15790
+ HealthChecker = class {
15791
+ constructor(detector) {
15792
+ this.detector = detector;
15793
+ this.initTime = Date.now();
15794
+ }
15795
+ /**
15796
+ * Run complete health check
15797
+ */
15798
+ async check(options = {}) {
15799
+ const result = {
15800
+ status: "healthy",
15801
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
15802
+ checks: {
15803
+ detector: {
15804
+ status: "pass",
15805
+ message: "Detector initialized"
15806
+ },
15807
+ patterns: {
15808
+ status: "pass",
15809
+ message: "Patterns loaded"
15810
+ },
15811
+ performance: {
15812
+ status: "pass",
15813
+ message: "Performance acceptable"
15814
+ },
15815
+ memory: {
15816
+ status: "pass",
15817
+ message: "Memory usage normal"
15818
+ }
15819
+ },
15820
+ metrics: {
15821
+ totalPatterns: 0,
15822
+ compiledPatterns: 0,
15823
+ cacheEnabled: false,
15824
+ uptime: Date.now() - this.initTime
15825
+ },
15826
+ errors: [],
15827
+ warnings: []
15828
+ };
15829
+ try {
15830
+ result.checks.detector = await this.checkDetector(options);
15831
+ result.checks.patterns = await this.checkPatterns();
15832
+ if (options.checkPerformance !== false) result.checks.performance = await this.checkPerformance(options.performanceThreshold);
15833
+ result.checks.memory = await this.checkMemory(options.memoryThreshold);
15834
+ result.metrics = this.collectMetrics();
15835
+ result.status = this.determineOverallStatus(result.checks);
15836
+ for (const check of Object.values(result.checks)) if (check.status === "fail") result.errors.push(check.message);
15837
+ else if (check.status === "warn") result.warnings.push(check.message);
15838
+ } catch (error) {
15839
+ result.status = "unhealthy";
15840
+ result.errors.push(`Health check failed: ${error.message}`);
15841
+ }
15842
+ return result;
15843
+ }
15844
+ /**
15845
+ * Check detector functionality
15846
+ */
15847
+ async checkDetector(options) {
15848
+ try {
15849
+ if (options.testDetection !== false) {
15850
+ const result = await this.detector.detect("Test email: test@example.com");
15851
+ if (!result || !result.detections) return {
15852
+ status: "fail",
15853
+ message: "Detector returned invalid result"
15854
+ };
15855
+ if (result.detections.length === 0) return {
15856
+ status: "warn",
15857
+ message: "Test detection found no PII (expected at least 1)"
15858
+ };
15859
+ }
15860
+ return {
15861
+ status: "pass",
15862
+ message: "Detector functioning correctly"
15863
+ };
15864
+ } catch (error) {
15865
+ return {
15866
+ status: "fail",
15867
+ message: `Detector check failed: ${error.message}`
15868
+ };
15869
+ }
15870
+ }
15871
+ /**
15872
+ * Check patterns are loaded
15873
+ */
15874
+ async checkPatterns() {
15875
+ try {
15876
+ const patterns = this.detector.getPatterns();
15877
+ if (!patterns || patterns.length === 0) return {
15878
+ status: "fail",
15879
+ message: "No patterns loaded",
15880
+ value: 0,
15881
+ threshold: 1
15882
+ };
15883
+ if (patterns.length < 10) return {
15884
+ status: "warn",
15885
+ message: "Very few patterns loaded (expected more)",
15886
+ value: patterns.length,
15887
+ threshold: 10
15888
+ };
15889
+ return {
15890
+ status: "pass",
15891
+ message: `${patterns.length} patterns loaded`,
15892
+ value: patterns.length
15893
+ };
15894
+ } catch (error) {
15895
+ return {
15896
+ status: "fail",
15897
+ message: `Pattern check failed: ${error.message}`
15898
+ };
15899
+ }
15900
+ }
15901
+ /**
15902
+ * Check performance
15903
+ */
15904
+ async checkPerformance(threshold = 100) {
15905
+ try {
15906
+ const testText = "Test: john@example.com, phone: 555-123-4567, IP: 192.168.1.1";
15907
+ const start = performance.now();
15908
+ await this.detector.detect(testText);
15909
+ const duration = performance.now() - start;
15910
+ if (duration > threshold * 2) return {
15911
+ status: "fail",
15912
+ message: `Performance degraded: ${duration.toFixed(2)}ms`,
15913
+ value: duration,
15914
+ threshold
15915
+ };
15916
+ if (duration > threshold) return {
15917
+ status: "warn",
15918
+ message: `Performance slower than expected: ${duration.toFixed(2)}ms`,
15919
+ value: duration,
15920
+ threshold
15921
+ };
15922
+ return {
15923
+ status: "pass",
15924
+ message: `Performance good: ${duration.toFixed(2)}ms`,
15925
+ value: duration,
15926
+ threshold
15927
+ };
15928
+ } catch (error) {
15929
+ return {
15930
+ status: "fail",
15931
+ message: `Performance check failed: ${error.message}`
15932
+ };
15933
+ }
15934
+ }
15935
+ /**
15936
+ * Check memory usage
15937
+ */
15938
+ async checkMemory(threshold = 100) {
15939
+ try {
15940
+ if (typeof process === "undefined" || !process.memoryUsage) return {
15941
+ status: "pass",
15942
+ message: "Memory check skipped (not in Node.js)"
15943
+ };
15944
+ const heapUsedMB = process.memoryUsage().heapUsed / 1024 / 1024;
15945
+ if (heapUsedMB > threshold * 2) return {
15946
+ status: "fail",
15947
+ message: `High memory usage: ${heapUsedMB.toFixed(2)}MB`,
15948
+ value: heapUsedMB,
15949
+ threshold
15950
+ };
15951
+ if (heapUsedMB > threshold) return {
15952
+ status: "warn",
15953
+ message: `Elevated memory usage: ${heapUsedMB.toFixed(2)}MB`,
15954
+ value: heapUsedMB,
15955
+ threshold
15956
+ };
15957
+ return {
15958
+ status: "pass",
15959
+ message: `Memory usage normal: ${heapUsedMB.toFixed(2)}MB`,
15960
+ value: heapUsedMB,
15961
+ threshold
15962
+ };
15963
+ } catch (error) {
15964
+ return {
15965
+ status: "warn",
15966
+ message: `Memory check skipped: ${error.message}`
15967
+ };
15968
+ }
15969
+ }
15970
+ /**
15971
+ * Collect metrics
15972
+ */
15973
+ collectMetrics() {
15974
+ const patterns = this.detector.getPatterns();
15975
+ const cacheStats = this.detector.getCacheStats();
15976
+ return {
15977
+ totalPatterns: patterns.length,
15978
+ compiledPatterns: patterns.length,
15979
+ cacheSize: cacheStats.size,
15980
+ cacheEnabled: cacheStats.enabled,
15981
+ uptime: Date.now() - this.initTime
15982
+ };
15983
+ }
15984
+ /**
15985
+ * Determine overall status
15986
+ */
15987
+ determineOverallStatus(checks) {
15988
+ const statuses = Object.values(checks).map((c) => c.status);
15989
+ if (statuses.includes("fail")) return "unhealthy";
15990
+ if (statuses.includes("warn")) return "degraded";
15991
+ return "healthy";
15992
+ }
15993
+ /**
15994
+ * Quick health check (minimal overhead)
15995
+ */
15996
+ async quickCheck() {
15997
+ try {
15998
+ if (this.detector.getPatterns().length === 0) return {
15999
+ status: "unhealthy",
16000
+ message: "No patterns loaded"
16001
+ };
16002
+ return {
16003
+ status: "healthy",
16004
+ message: "OK"
16005
+ };
16006
+ } catch (error) {
16007
+ return {
16008
+ status: "unhealthy",
16009
+ message: `Error: ${error.message}`
16010
+ };
16011
+ }
16012
+ }
16013
+ /**
16014
+ * Get system info for debugging
16015
+ */
16016
+ getSystemInfo() {
16017
+ const patterns = this.detector.getPatterns();
16018
+ const cacheStats = this.detector.getCacheStats();
16019
+ return {
16020
+ version: "1.0.0",
16021
+ patterns: {
16022
+ total: patterns.length,
16023
+ types: [...new Set(patterns.map((p) => p.type.split("_")[0]))].length
16024
+ },
16025
+ cache: {
16026
+ enabled: cacheStats.enabled,
16027
+ size: cacheStats.size,
16028
+ maxSize: cacheStats.maxSize
16029
+ },
16030
+ uptime: Date.now() - this.initTime,
16031
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
16032
+ };
16033
+ }
16034
+ };
16035
+ }));
16036
+
16037
+ //#endregion
16038
+ //#region src/document/OCRProcessor.ts
16039
+ /**
16040
+ * Create an OCR processor instance
16041
+ */
16042
+ function createOCRProcessor() {
16043
+ return new OCRProcessor();
16044
+ }
16045
+ var OCRProcessor;
16046
+ var init_OCRProcessor = __esmMin((() => {
16047
+ OCRProcessor = class {
16048
+ constructor() {
16049
+ try {
16050
+ this.tesseract = __require("tesseract.js");
16051
+ } catch {}
16052
+ }
16053
+ /**
16054
+ * Extract text from image buffer using OCR
16055
+ */
16056
+ async recognizeText(buffer, options) {
16057
+ if (!this.tesseract) throw new Error("[OCRProcessor] OCR support requires tesseract.js. Install with: npm install tesseract.js");
16058
+ const startTime = performance.now();
16059
+ try {
16060
+ const language = Array.isArray(options?.language) ? options.language.join("+") : options?.language || "eng";
16061
+ const worker = await this.tesseract.createWorker(language, options?.oem || 3);
16062
+ if (options?.psm !== void 0) await worker.setParameters({ tessedit_pageseg_mode: options.psm });
16063
+ const result = await worker.recognize(buffer);
16064
+ await worker.terminate();
16065
+ const endTime = performance.now();
16066
+ const processingTime = Math.round((endTime - startTime) * 100) / 100;
16067
+ return {
16068
+ text: result.data.text || "",
16069
+ confidence: result.data.confidence || 0,
16070
+ processingTime
16071
+ };
16072
+ } catch (error) {
16073
+ throw new Error(`[OCRProcessor] OCR recognition failed: ${error.message}`);
16074
+ }
16075
+ }
16076
+ /**
16077
+ * Check if OCR is available (tesseract.js installed)
16078
+ */
16079
+ isAvailable() {
16080
+ return !!this.tesseract;
16081
+ }
16082
+ /**
16083
+ * Create a scheduler for batch OCR processing
16084
+ * More efficient for processing multiple images
16085
+ */
16086
+ async createScheduler(workerCount = 4) {
16087
+ if (!this.tesseract) throw new Error("[OCRProcessor] OCR support requires tesseract.js. Install with: npm install tesseract.js");
16088
+ if (this.scheduler) await this.scheduler.terminate();
16089
+ this.scheduler = this.tesseract.createScheduler();
16090
+ const workers = [];
16091
+ for (let i = 0; i < workerCount; i++) {
16092
+ const worker = await this.tesseract.createWorker("eng");
16093
+ this.scheduler.addWorker(worker);
16094
+ workers.push(worker);
16095
+ }
16096
+ return this.scheduler;
16097
+ }
16098
+ /**
16099
+ * Batch process multiple images
16100
+ */
16101
+ async recognizeBatch(buffers, _options) {
16102
+ if (!this.tesseract) throw new Error("[OCRProcessor] OCR support requires tesseract.js. Install with: npm install tesseract.js");
16103
+ const scheduler = await this.createScheduler();
16104
+ try {
16105
+ const results = await Promise.all(buffers.map(async (buffer) => {
16106
+ const startTime = performance.now();
16107
+ const result = await scheduler.addJob("recognize", buffer);
16108
+ const endTime = performance.now();
16109
+ return {
16110
+ text: result.data.text || "",
16111
+ confidence: result.data.confidence || 0,
16112
+ processingTime: Math.round((endTime - startTime) * 100) / 100
16113
+ };
16114
+ }));
16115
+ await scheduler.terminate();
16116
+ this.scheduler = void 0;
16117
+ return results;
16118
+ } catch (error) {
16119
+ if (scheduler) {
16120
+ await scheduler.terminate();
16121
+ this.scheduler = void 0;
16122
+ }
16123
+ throw new Error(`[OCRProcessor] Batch OCR failed: ${error.message}`);
16124
+ }
16125
+ }
16126
+ /**
16127
+ * Terminate any running scheduler
16128
+ */
16129
+ async cleanup() {
16130
+ if (this.scheduler) {
16131
+ await this.scheduler.terminate();
16132
+ this.scheduler = void 0;
16133
+ }
16134
+ }
16135
+ };
16136
+ }));
16137
+
16138
+ //#endregion
16139
+ //#region src/document/JsonProcessor.ts
16140
+ /**
16141
+ * Create a JSON processor instance
16142
+ */
16143
+ function createJsonProcessor() {
16144
+ return new JsonProcessor();
16145
+ }
16146
+ var JsonProcessor;
16147
+ var init_JsonProcessor = __esmMin((() => {
16148
+ JsonProcessor = class {
16149
+ constructor() {
16150
+ this.defaultOptions = {
16151
+ maxDepth: 100,
16152
+ scanKeys: false,
16153
+ alwaysRedact: [],
16154
+ skipPaths: [],
16155
+ piiIndicatorKeys: [
16156
+ "email",
16157
+ "e-mail",
16158
+ "mail",
16159
+ "phone",
16160
+ "tel",
16161
+ "telephone",
16162
+ "mobile",
16163
+ "ssn",
16164
+ "social_security",
16165
+ "address",
16166
+ "street",
16167
+ "city",
16168
+ "zip",
16169
+ "postal",
16170
+ "name",
16171
+ "firstname",
16172
+ "lastname",
16173
+ "fullname",
16174
+ "password",
16175
+ "pwd",
16176
+ "secret",
16177
+ "token",
16178
+ "key",
16179
+ "card",
16180
+ "credit_card",
16181
+ "creditcard",
16182
+ "account",
16183
+ "iban",
16184
+ "swift",
16185
+ "passport",
16186
+ "license",
16187
+ "licence"
16188
+ ],
16189
+ preserveStructure: true
16190
+ };
16191
+ }
16192
+ /**
16193
+ * Parse JSON from buffer or string
16194
+ */
16195
+ parse(input) {
16196
+ try {
16197
+ const text = typeof input === "string" ? input : input.toString("utf-8");
16198
+ return JSON.parse(text);
16199
+ } catch (error) {
16200
+ throw new Error(`[JsonProcessor] Invalid JSON: ${error.message}`);
16201
+ }
16202
+ }
16203
+ /**
16204
+ * Detect PII in JSON data
16205
+ */
16206
+ async detect(data, detector, options) {
16207
+ const opts = {
16208
+ ...this.defaultOptions,
16209
+ ...options
16210
+ };
16211
+ const pathsDetected = [];
16212
+ const matchesByPath = {};
16213
+ const allDetections = [];
16214
+ const promises = [];
16215
+ this.traverse(data, "", opts, (path, value, key) => {
16216
+ promises.push((async () => {
16217
+ if (this.shouldSkip(path, opts.skipPaths)) return;
16218
+ if (this.shouldAlwaysRedact(path, opts.alwaysRedact)) {
16219
+ const detection = {
16220
+ type: "SENSITIVE_FIELD",
16221
+ value: String(value),
16222
+ placeholder: `[SENSITIVE_FIELD]`,
16223
+ position: [0, String(value).length],
16224
+ severity: "high",
16225
+ confidence: 1
16226
+ };
16227
+ matchesByPath[path] = [detection];
16228
+ pathsDetected.push(path);
16229
+ allDetections.push(detection);
16230
+ return;
16231
+ }
16232
+ if (opts.scanKeys && key) {
16233
+ const keyResult = await detector.detect(key);
16234
+ if (keyResult.detections.length > 0) {
16235
+ const keyPath = `${path}.__key__`;
16236
+ matchesByPath[keyPath] = keyResult.detections;
16237
+ pathsDetected.push(keyPath);
16238
+ allDetections.push(...keyResult.detections);
16239
+ }
16240
+ }
16241
+ const valueStr = String(value);
16242
+ const result = await detector.detect(valueStr);
16243
+ if (result.detections.length > 0) {
16244
+ const boostedDetections = this.boostConfidenceFromKey(result.detections, key, opts.piiIndicatorKeys);
16245
+ matchesByPath[path] = boostedDetections;
16246
+ pathsDetected.push(path);
16247
+ allDetections.push(...boostedDetections);
16248
+ }
16249
+ })());
16250
+ });
16251
+ await Promise.all(promises);
16252
+ const original = JSON.stringify(data);
16253
+ const redacted = this.redact(data, {
16254
+ original,
16255
+ redacted: original,
16256
+ detections: allDetections,
16257
+ redactionMap: {},
16258
+ stats: { piiCount: allDetections.length },
16259
+ pathsDetected,
16260
+ matchesByPath
16261
+ }, opts);
16262
+ const redactionMap = {};
16263
+ allDetections.forEach((det) => {
16264
+ redactionMap[det.placeholder] = det.value;
16265
+ });
16266
+ return {
16267
+ original,
16268
+ redacted: typeof redacted === "string" ? redacted : JSON.stringify(redacted),
16269
+ detections: allDetections,
16270
+ redactionMap,
16271
+ stats: { piiCount: allDetections.length },
16272
+ pathsDetected,
16273
+ matchesByPath
16274
+ };
16275
+ }
16276
+ /**
16277
+ * Redact PII in JSON data
16278
+ */
16279
+ redact(data, detectionResult, options) {
16280
+ if (!{
16281
+ ...this.defaultOptions,
16282
+ ...options
16283
+ }.preserveStructure) return this.parse(this.redactText(JSON.stringify(data, null, 2), detectionResult));
16284
+ return this.redactPreservingStructure(data, detectionResult.pathsDetected);
16285
+ }
16286
+ /**
16287
+ * Redact specific paths in JSON while preserving structure
16288
+ */
16289
+ redactPreservingStructure(data, pathsToRedact) {
16290
+ const pathSet = new Set(pathsToRedact);
16291
+ const redactValue = (value, currentPath) => {
16292
+ if (pathSet.has(currentPath)) {
16293
+ if (typeof value === "string") return "[REDACTED]";
16294
+ else if (typeof value === "number") return 0;
16295
+ else if (typeof value === "boolean") return false;
16296
+ else if (value === null) return null;
16297
+ else if (Array.isArray(value)) return [];
16298
+ else if (typeof value === "object") return {};
16299
+ return "[REDACTED]";
16300
+ }
16301
+ if (Array.isArray(value)) return value.map((item, index) => redactValue(item, `${currentPath}[${index}]`));
16302
+ if (value !== null && typeof value === "object") {
16303
+ const result = {};
16304
+ for (const [key, val] of Object.entries(value)) result[key] = redactValue(val, currentPath ? `${currentPath}.${key}` : key);
16305
+ return result;
16306
+ }
16307
+ return value;
16308
+ };
16309
+ return redactValue(data, "");
16310
+ }
16311
+ /**
16312
+ * Simple text-based redaction (fallback)
16313
+ */
16314
+ redactText(text, detectionResult) {
16315
+ let redacted = text;
16316
+ const sortedDetections = [...detectionResult.detections].sort((a, b) => b.position[0] - a.position[0]);
16317
+ for (const detection of sortedDetections) {
16318
+ const [start, end] = detection.position;
16319
+ redacted = redacted.slice(0, start) + detection.placeholder + redacted.slice(end);
16320
+ }
16321
+ return redacted;
16322
+ }
16323
+ /**
16324
+ * Traverse JSON structure and call callback for each value
16325
+ */
16326
+ traverse(obj, path, options, callback, depth = 0) {
16327
+ if (depth > options.maxDepth) throw new Error(`[JsonProcessor] Maximum depth (${options.maxDepth}) exceeded`);
16328
+ if (obj === null || obj === void 0) return;
16329
+ if (Array.isArray(obj)) {
16330
+ obj.forEach((item, index) => {
16331
+ const itemPath = path ? `${path}[${index}]` : `[${index}]`;
16332
+ if (this.isPrimitive(item)) callback(itemPath, item);
16333
+ this.traverse(item, itemPath, options, callback, depth + 1);
16334
+ });
16335
+ return;
16336
+ }
16337
+ if (typeof obj === "object") {
16338
+ for (const [key, value] of Object.entries(obj)) {
16339
+ const valuePath = path ? `${path}.${key}` : key;
16340
+ if (this.isPrimitive(value)) callback(valuePath, value, key);
16341
+ this.traverse(value, valuePath, options, callback, depth + 1);
16342
+ }
16343
+ return;
16344
+ }
16345
+ if (this.isPrimitive(obj)) callback(path, obj);
16346
+ }
16347
+ /**
16348
+ * Check if value is primitive (string, number, boolean)
16349
+ */
16350
+ isPrimitive(value) {
16351
+ return typeof value === "string" || typeof value === "number" || typeof value === "boolean";
16352
+ }
16353
+ /**
16354
+ * Check if path should be skipped
16355
+ */
16356
+ shouldSkip(path, skipPaths) {
16357
+ return skipPaths.some((skipPath) => {
16358
+ if (path === skipPath) return true;
16359
+ return new RegExp("^" + skipPath.replace(/\*/g, "[^.]+") + "$").test(path);
16360
+ });
16361
+ }
16362
+ /**
16363
+ * Check if path should always be redacted
16364
+ */
16365
+ shouldAlwaysRedact(path, alwaysRedact) {
16366
+ return alwaysRedact.some((redactPath) => {
16367
+ if (path === redactPath) return true;
16368
+ return new RegExp("^" + redactPath.replace(/\*/g, "[^.]+") + "$").test(path);
16369
+ });
16370
+ }
16371
+ /**
16372
+ * Boost confidence if key name indicates PII
16373
+ */
16374
+ boostConfidenceFromKey(detections, key, piiIndicatorKeys) {
16375
+ if (!key) return detections;
16376
+ const keyLower = key.toLowerCase();
16377
+ if (!piiIndicatorKeys.some((indicator) => keyLower.includes(indicator.toLowerCase()))) return detections;
16378
+ return detections.map((detection) => ({
16379
+ ...detection,
16380
+ confidence: Math.min(1, (detection.confidence || .5) * 1.2)
16381
+ }));
16382
+ }
16383
+ /**
16384
+ * Extract all text values from JSON for simple text-based detection
16385
+ */
16386
+ extractText(data, options) {
16387
+ const opts = {
16388
+ ...this.defaultOptions,
16389
+ ...options
16390
+ };
16391
+ const textParts = [];
16392
+ this.traverse(data, "", opts, (_path, value, key) => {
16393
+ if (opts.scanKeys && key) textParts.push(key);
16394
+ if (typeof value === "string") textParts.push(value);
16395
+ });
16396
+ return textParts.join(" ");
16397
+ }
16398
+ /**
16399
+ * Validate JSON buffer/string
16400
+ */
16401
+ isValid(input) {
16402
+ try {
16403
+ this.parse(input);
16404
+ return true;
16405
+ } catch {
16406
+ return false;
16407
+ }
16408
+ }
16409
+ /**
16410
+ * Get JSON Lines (JSONL) support - split by newlines and parse each line
16411
+ */
16412
+ parseJsonLines(input) {
16413
+ return (typeof input === "string" ? input : input.toString("utf-8")).split("\n").filter((line) => line.trim().length > 0).map((line, index) => {
16414
+ try {
16415
+ return JSON.parse(line);
16416
+ } catch (error) {
16417
+ throw new Error(`[JsonProcessor] Invalid JSON at line ${index + 1}: ${error.message}`);
16418
+ }
16419
+ });
16420
+ }
16421
+ /**
16422
+ * Detect PII in JSON Lines format
16423
+ */
16424
+ async detectJsonLines(input, detector, options) {
16425
+ const documents = this.parseJsonLines(input);
16426
+ return Promise.all(documents.map((doc) => this.detect(doc, detector, options)));
16427
+ }
16428
+ };
16429
+ }));
16430
+
16431
+ //#endregion
16432
+ //#region src/document/CsvProcessor.ts
16433
+ /**
16434
+ * Create a CSV processor instance
16435
+ */
16436
+ function createCsvProcessor() {
16437
+ return new CsvProcessor();
16438
+ }
16439
+ var CsvProcessor;
16440
+ var init_CsvProcessor = __esmMin((() => {
16441
+ CsvProcessor = class {
16442
+ constructor() {
16443
+ this.defaultOptions = {
16444
+ quote: "\"",
16445
+ escape: "\"",
16446
+ skipEmptyLines: true,
16447
+ piiIndicatorNames: [
16448
+ "email",
16449
+ "e-mail",
16450
+ "mail",
16451
+ "email_address",
16452
+ "phone",
16453
+ "tel",
16454
+ "telephone",
16455
+ "mobile",
16456
+ "phone_number",
16457
+ "ssn",
16458
+ "social_security",
16459
+ "social_security_number",
16460
+ "address",
16461
+ "street",
16462
+ "street_address",
16463
+ "city",
16464
+ "zip",
16465
+ "zipcode",
16466
+ "postal",
16467
+ "postcode",
16468
+ "name",
16469
+ "firstname",
16470
+ "first_name",
16471
+ "lastname",
16472
+ "last_name",
16473
+ "fullname",
16474
+ "full_name",
16475
+ "password",
16476
+ "pwd",
16477
+ "secret",
16478
+ "token",
16479
+ "api_key",
16480
+ "card",
16481
+ "credit_card",
16482
+ "creditcard",
16483
+ "card_number",
16484
+ "account",
16485
+ "account_number",
16486
+ "iban",
16487
+ "swift",
16488
+ "passport",
16489
+ "passport_number",
16490
+ "license",
16491
+ "licence",
16492
+ "driver_license",
16493
+ "dob",
16494
+ "date_of_birth",
16495
+ "birth_date",
16496
+ "birthdate"
16497
+ ],
16498
+ treatFirstRowAsHeader: true
16499
+ };
16500
+ }
16501
+ /**
16502
+ * Parse CSV from buffer or string
16503
+ */
16504
+ parse(input, options) {
16505
+ const opts = {
16506
+ ...this.defaultOptions,
16507
+ ...options
16508
+ };
16509
+ const text = typeof input === "string" ? input : input.toString("utf-8");
16510
+ const delimiter = opts.delimiter || this.detectDelimiter(text);
16511
+ const lines = text.split(/\r?\n/);
16512
+ const rows = [];
16513
+ let rowIndex = 0;
16514
+ for (let i = 0; i < lines.length; i++) {
16515
+ const line = lines[i];
16516
+ if (opts.skipEmptyLines && line.trim().length === 0) continue;
16517
+ if (opts.maxRows !== void 0 && rowIndex >= opts.maxRows) break;
16518
+ const values = this.parseRow(line, delimiter, opts.quote, opts.escape);
16519
+ rows.push({
16520
+ index: rowIndex,
16521
+ values
16522
+ });
16523
+ rowIndex++;
16524
+ }
16525
+ return rows;
16526
+ }
16527
+ /**
16528
+ * Detect PII in CSV data
16529
+ */
16530
+ async detect(input, detector, options) {
16531
+ const opts = {
16532
+ ...this.defaultOptions,
16533
+ ...options
16534
+ };
16535
+ const rows = this.parse(input, options);
16536
+ if (rows.length === 0) {
16537
+ const original = typeof input === "string" ? input : input.toString("utf-8");
16538
+ return {
16539
+ original,
16540
+ redacted: original,
16541
+ detections: [],
16542
+ redactionMap: {},
16543
+ stats: { piiCount: 0 },
16544
+ rowCount: 0,
16545
+ columnCount: 0,
16546
+ columnStats: {},
16547
+ matchesByCell: []
16548
+ };
16549
+ }
16550
+ const hasHeader = opts.hasHeader !== void 0 ? opts.hasHeader : this.detectHeader(rows);
16551
+ const headers = hasHeader && rows.length > 0 ? rows[0].values : void 0;
16552
+ const dataRows = hasHeader ? rows.slice(1) : rows;
16553
+ const columnCount = rows[0].values.length;
16554
+ const columnNameToIndex = /* @__PURE__ */ new Map();
16555
+ if (headers) headers.forEach((header, index) => {
16556
+ columnNameToIndex.set(header.toLowerCase().trim(), index);
16557
+ });
16558
+ const alwaysRedactCols = new Set(opts.alwaysRedactColumns || []);
16559
+ if (opts.alwaysRedactColumnNames && headers) opts.alwaysRedactColumnNames.forEach((name) => {
16560
+ const index = columnNameToIndex.get(name.toLowerCase().trim());
16561
+ if (index !== void 0) alwaysRedactCols.add(index);
16562
+ });
16563
+ const skipCols = new Set(opts.skipColumns || []);
16564
+ const columnStats = {};
16565
+ const matchesByCell = [];
16566
+ const allDetections = [];
16567
+ for (let col = 0; col < columnCount; col++) columnStats[col] = {
16568
+ columnIndex: col,
16569
+ columnName: headers?.[col],
16570
+ piiCount: 0,
16571
+ piiPercentage: 0,
16572
+ piiTypes: []
16573
+ };
16574
+ for (const row of dataRows) for (let col = 0; col < row.values.length; col++) {
16575
+ if (skipCols.has(col)) continue;
16576
+ const cellValue = row.values[col];
16577
+ if (alwaysRedactCols.has(col)) {
16578
+ const detection = {
16579
+ type: "SENSITIVE_COLUMN",
16580
+ value: cellValue,
16581
+ placeholder: `[SENSITIVE_COLUMN_${col}]`,
16582
+ position: [0, cellValue.length],
16583
+ severity: "high",
16584
+ confidence: 1
16585
+ };
16586
+ matchesByCell.push({
16587
+ row: row.index,
16588
+ column: col,
16589
+ columnName: headers?.[col],
16590
+ value: cellValue,
16591
+ matches: [detection]
16592
+ });
16593
+ allDetections.push(detection);
16594
+ columnStats[col].piiCount++;
16595
+ continue;
16596
+ }
16597
+ const result = await detector.detect(cellValue);
16598
+ if (result.detections.length > 0) {
16599
+ const boostedDetections = this.boostConfidenceFromColumnName(result.detections, headers?.[col], opts.piiIndicatorNames || []);
16600
+ matchesByCell.push({
16601
+ row: row.index,
16602
+ column: col,
16603
+ columnName: headers?.[col],
16604
+ value: cellValue,
16605
+ matches: boostedDetections
16606
+ });
16607
+ allDetections.push(...boostedDetections);
16608
+ columnStats[col].piiCount += boostedDetections.length;
16609
+ const columnTypes = new Set(columnStats[col].piiTypes);
16610
+ boostedDetections.forEach((d) => columnTypes.add(d.type));
16611
+ columnStats[col].piiTypes = Array.from(columnTypes);
16612
+ }
16613
+ }
16614
+ for (let col = 0; col < columnCount; col++) {
16615
+ const rowsWithPii = matchesByCell.filter((m) => m.column === col).length;
16616
+ columnStats[col].piiPercentage = dataRows.length > 0 ? rowsWithPii / dataRows.length * 100 : 0;
16617
+ }
16618
+ const original = typeof input === "string" ? input : input.toString("utf-8");
16619
+ const redacted = this.redact(original, {
16620
+ original,
16621
+ redacted: original,
16622
+ detections: allDetections,
16623
+ redactionMap: {},
16624
+ stats: { piiCount: allDetections.length },
16625
+ rowCount: dataRows.length,
16626
+ columnCount,
16627
+ headers,
16628
+ columnStats,
16629
+ matchesByCell
16630
+ }, opts);
16631
+ const redactionMap = {};
16632
+ allDetections.forEach((det) => {
16633
+ redactionMap[det.placeholder] = det.value;
16634
+ });
16635
+ return {
16636
+ original,
16637
+ redacted,
16638
+ detections: allDetections,
16639
+ redactionMap,
16640
+ stats: { piiCount: allDetections.length },
16641
+ rowCount: dataRows.length,
16642
+ columnCount,
16643
+ headers: headers?.filter((h) => h !== void 0),
16644
+ columnStats,
16645
+ matchesByCell
16646
+ };
16647
+ }
16648
+ /**
16649
+ * Redact PII in CSV data
16650
+ */
16651
+ redact(input, detectionResult, options) {
16652
+ const opts = {
16653
+ ...this.defaultOptions,
16654
+ ...options
16655
+ };
16656
+ const rows = this.parse(input, options);
16657
+ if (rows.length === 0) return "";
16658
+ const delimiter = opts.delimiter || this.detectDelimiter(typeof input === "string" ? input : input.toString("utf-8"));
16659
+ const hasHeader = detectionResult.headers !== void 0;
16660
+ const redactionMap = /* @__PURE__ */ new Map();
16661
+ for (const cellMatch of detectionResult.matchesByCell) {
16662
+ if (!redactionMap.has(cellMatch.row)) redactionMap.set(cellMatch.row, /* @__PURE__ */ new Map());
16663
+ redactionMap.get(cellMatch.row).set(cellMatch.column, "[REDACTED]");
16664
+ }
16665
+ const outputRows = [];
16666
+ for (let i = 0; i < rows.length; i++) {
16667
+ const row = rows[i];
16668
+ if (hasHeader && i === 0) outputRows.push(this.formatRow(row.values, delimiter, opts.quote));
16669
+ else {
16670
+ const rowIndex = hasHeader ? i - 1 : i;
16671
+ const redactedValues = row.values.map((value, colIndex) => {
16672
+ return redactionMap.get(rowIndex)?.get(colIndex) || value;
16673
+ });
16674
+ outputRows.push(this.formatRow(redactedValues, delimiter, opts.quote));
16675
+ }
16676
+ }
16677
+ return outputRows.join("\n");
16678
+ }
16679
+ /**
16680
+ * Parse a single CSV row
16681
+ */
16682
+ parseRow(line, delimiter, quote, _escape) {
16683
+ const values = [];
16684
+ let current = "";
16685
+ let inQuotes = false;
16686
+ let i = 0;
16687
+ while (i < line.length) {
16688
+ const char = line[i];
16689
+ const nextChar = line[i + 1];
16690
+ if (char === quote) if (inQuotes && nextChar === quote) {
16691
+ current += quote;
16692
+ i += 2;
16693
+ } else {
16694
+ inQuotes = !inQuotes;
16695
+ i++;
16696
+ }
16697
+ else if (char === delimiter && !inQuotes) {
16698
+ values.push(current);
16699
+ current = "";
16700
+ i++;
16701
+ } else {
16702
+ current += char;
16703
+ i++;
16704
+ }
16705
+ }
16706
+ values.push(current);
16707
+ return values;
16708
+ }
16709
+ /**
16710
+ * Format a row as CSV
16711
+ */
16712
+ formatRow(values, delimiter, quote) {
16713
+ return values.map((value) => {
16714
+ if (value.includes(delimiter) || value.includes(quote) || value.includes("\n")) return `${quote}${value.replace(new RegExp(quote, "g"), quote + quote)}${quote}`;
16715
+ return value;
16716
+ }).join(delimiter);
16717
+ }
16718
+ /**
16719
+ * Auto-detect CSV delimiter
16720
+ */
16721
+ detectDelimiter(text) {
16722
+ const delimiters = [
16723
+ ",",
16724
+ " ",
16725
+ ";",
16726
+ "|"
16727
+ ];
16728
+ const lines = text.split(/\r?\n/).slice(0, 5);
16729
+ let bestDelimiter = ",";
16730
+ let bestScore = 0;
16731
+ for (const delimiter of delimiters) {
16732
+ const counts = lines.map((line) => {
16733
+ let count = 0;
16734
+ let inQuotes = false;
16735
+ for (const char of line) {
16736
+ if (char === "\"") inQuotes = !inQuotes;
16737
+ if (char === delimiter && !inQuotes) count++;
16738
+ }
16739
+ return count;
16740
+ });
16741
+ if (counts.length > 0 && counts[0] > 0) {
16742
+ const avg = counts.reduce((a, b) => a + b, 0) / counts.length;
16743
+ const score = avg / (counts.reduce((sum, c) => sum + Math.pow(c - avg, 2), 0) / counts.length + 1);
16744
+ if (score > bestScore) {
16745
+ bestScore = score;
16746
+ bestDelimiter = delimiter;
16747
+ }
16748
+ }
16749
+ }
16750
+ return bestDelimiter;
16751
+ }
16752
+ /**
16753
+ * Detect if first row is likely a header
16754
+ */
16755
+ detectHeader(rows) {
16756
+ if (rows.length < 2) return false;
16757
+ const firstRow = rows[0].values;
16758
+ const secondRow = rows[1].values;
16759
+ if (firstRow.reduce((sum, v) => sum + v.length, 0) / firstRow.length > secondRow.reduce((sum, v) => sum + v.length, 0) / secondRow.length * 1.5) return false;
16760
+ const firstRowNumeric = firstRow.filter((v) => !isNaN(Number(v)) && v.trim() !== "").length;
16761
+ return firstRow.length - firstRowNumeric >= firstRowNumeric;
16762
+ }
16763
+ /**
16764
+ * Boost confidence if column name indicates PII
16765
+ */
16766
+ boostConfidenceFromColumnName(detections, columnName, piiIndicatorNames) {
16767
+ if (!columnName) return detections;
16768
+ const nameLower = columnName.toLowerCase().trim();
16769
+ if (!piiIndicatorNames.some((indicator) => nameLower.includes(indicator.toLowerCase()))) return detections;
16770
+ return detections.map((detection) => ({
16771
+ ...detection,
16772
+ confidence: Math.min(1, (detection.confidence || .5) * 1.2)
16773
+ }));
16774
+ }
16775
+ /**
16776
+ * Extract all cell values as text
16777
+ */
16778
+ extractText(input, options) {
16779
+ const rows = this.parse(input, options);
16780
+ const textParts = [];
16781
+ for (const row of rows) for (const value of row.values) if (value.trim().length > 0) textParts.push(value);
16782
+ return textParts.join(" ");
16783
+ }
16784
+ /**
16785
+ * Get column statistics without full PII detection
16786
+ */
16787
+ getColumnInfo(input, options) {
16788
+ const rows = this.parse(input, options);
16789
+ if (rows.length === 0) return {
16790
+ columnCount: 0,
16791
+ rowCount: 0,
16792
+ sampleRows: []
16793
+ };
16794
+ const opts = {
16795
+ ...this.defaultOptions,
16796
+ ...options
16797
+ };
16798
+ const hasHeader = opts.hasHeader !== void 0 ? opts.hasHeader : this.detectHeader(rows);
16799
+ const headers = hasHeader && rows.length > 0 ? rows[0].values : void 0;
16800
+ const dataRows = hasHeader ? rows.slice(1) : rows;
16801
+ const sampleRows = dataRows.slice(0, 5).map((r) => r.values);
16802
+ return {
16803
+ columnCount: rows[0].values.length,
16804
+ rowCount: dataRows.length,
16805
+ headers,
16806
+ sampleRows
16807
+ };
16808
+ }
16809
+ };
16810
+ }));
16811
+
16812
+ //#endregion
16813
+ //#region src/document/XlsxProcessor.ts
16814
+ /**
16815
+ * Create an XLSX processor instance
16816
+ */
16817
+ function createXlsxProcessor() {
16818
+ return new XlsxProcessor();
16819
+ }
16820
+ var XlsxProcessor;
16821
+ var init_XlsxProcessor = __esmMin((() => {
16822
+ XlsxProcessor = class {
16823
+ constructor() {
16824
+ this.defaultOptions = {
16825
+ piiIndicatorNames: [
16826
+ "email",
16827
+ "e-mail",
16828
+ "mail",
16829
+ "email_address",
16830
+ "phone",
16831
+ "tel",
16832
+ "telephone",
16833
+ "mobile",
16834
+ "phone_number",
16835
+ "ssn",
16836
+ "social_security",
16837
+ "social_security_number",
16838
+ "address",
16839
+ "street",
16840
+ "street_address",
16841
+ "city",
16842
+ "zip",
16843
+ "zipcode",
16844
+ "postal",
16845
+ "postcode",
16846
+ "name",
16847
+ "firstname",
16848
+ "first_name",
16849
+ "lastname",
16850
+ "last_name",
16851
+ "fullname",
16852
+ "full_name",
16853
+ "password",
16854
+ "pwd",
16855
+ "secret",
16856
+ "token",
16857
+ "api_key",
16858
+ "card",
16859
+ "credit_card",
16860
+ "creditcard",
16861
+ "card_number",
16862
+ "account",
16863
+ "account_number",
16864
+ "iban",
16865
+ "swift",
16866
+ "passport",
16867
+ "passport_number",
16868
+ "license",
16869
+ "licence",
16870
+ "driver_license",
16871
+ "dob",
16872
+ "date_of_birth",
16873
+ "birth_date",
16874
+ "birthdate"
16875
+ ],
16876
+ preserveFormatting: true,
16877
+ preserveFormulas: true
16878
+ };
16879
+ try {
16880
+ this.xlsx = __require("xlsx");
16881
+ } catch {}
16882
+ }
16883
+ /**
16884
+ * Check if XLSX support is available
16885
+ */
16886
+ isAvailable() {
16887
+ return !!this.xlsx;
16888
+ }
16889
+ /**
16890
+ * Parse XLSX from buffer
16891
+ */
16892
+ parse(buffer) {
16893
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
16894
+ try {
16895
+ return this.xlsx.read(buffer, {
16896
+ type: "buffer",
16897
+ cellFormula: true,
16898
+ cellStyles: true
16899
+ });
16900
+ } catch (error) {
16901
+ throw new Error(`[XlsxProcessor] Failed to parse XLSX: ${error.message}`);
16902
+ }
16903
+ }
16904
+ /**
16905
+ * Detect PII in XLSX data
16906
+ */
16907
+ async detect(buffer, detector, options) {
16908
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
16909
+ const opts = {
16910
+ ...this.defaultOptions,
16911
+ ...options
16912
+ };
16913
+ const workbook = this.parse(buffer);
16914
+ const sheetNames = this.getSheetNamesToProcess(workbook, opts);
16915
+ const sheetResults = [];
16916
+ const allDetections = [];
16917
+ const allTypes = /* @__PURE__ */ new Set();
16918
+ for (let sheetIndex = 0; sheetIndex < sheetNames.length; sheetIndex++) {
16919
+ const sheetName = sheetNames[sheetIndex];
16920
+ const sheet = workbook.Sheets[sheetName];
16921
+ const sheetResult = await this.detectSheet(sheet, sheetName, sheetIndex, detector, opts);
16922
+ sheetResults.push(sheetResult);
16923
+ allDetections.push(...sheetResult.matchesByCell.flatMap((c) => c.matches));
16924
+ sheetResult.matchesByCell.forEach((cell) => {
16925
+ cell.matches.forEach((det) => allTypes.add(det.type));
16926
+ });
16927
+ }
16928
+ const original = this.extractText(buffer, options);
16929
+ const redactedBuffer = this.redact(buffer, {
16930
+ original,
16931
+ redacted: original,
16932
+ detections: allDetections,
16933
+ redactionMap: {},
16934
+ stats: { piiCount: allDetections.length },
16935
+ sheetResults,
16936
+ sheetCount: sheetResults.length
16937
+ }, options);
16938
+ const redacted = this.extractText(redactedBuffer, options);
16939
+ const redactionMap = {};
16940
+ allDetections.forEach((det) => {
16941
+ redactionMap[det.placeholder] = det.value;
16942
+ });
16943
+ return {
16944
+ original,
16945
+ redacted,
16946
+ detections: allDetections,
16947
+ redactionMap,
16948
+ stats: { piiCount: allDetections.length },
16949
+ sheetResults,
16950
+ sheetCount: sheetResults.length
16951
+ };
16952
+ }
16953
+ /**
16954
+ * Detect PII in a single sheet
16955
+ */
16956
+ async detectSheet(sheet, sheetName, sheetIndex, detector, options) {
16957
+ const range = this.xlsx.utils.decode_range(sheet["!ref"] || "A1");
16958
+ const startRow = range.s.r;
16959
+ const endRow = options.maxRows !== void 0 ? Math.min(range.e.r, startRow + options.maxRows - 1) : range.e.r;
16960
+ const startCol = range.s.c;
16961
+ const endCol = range.e.c;
16962
+ const columnCount = endCol - startCol + 1;
16963
+ const hasHeader = options.hasHeader !== void 0 ? options.hasHeader : this.detectHeader(sheet, range);
16964
+ const headers = hasHeader ? this.getRowValues(sheet, startRow, startCol, endCol) : void 0;
16965
+ const dataStartRow = hasHeader ? startRow + 1 : startRow;
16966
+ const columnNameToIndex = /* @__PURE__ */ new Map();
16967
+ if (headers) headers.forEach((header, index) => {
16968
+ if (header) columnNameToIndex.set(header.toLowerCase().trim(), index);
16969
+ });
16970
+ const alwaysRedactCols = new Set(options.alwaysRedactColumns || []);
16971
+ if (options.alwaysRedactColumnNames && headers) options.alwaysRedactColumnNames.forEach((name) => {
16972
+ const index = columnNameToIndex.get(name.toLowerCase().trim());
16973
+ if (index !== void 0) alwaysRedactCols.add(index);
16974
+ });
16975
+ const skipCols = new Set(options.skipColumns || []);
16976
+ const columnStats = {};
16977
+ for (let col = 0; col <= endCol - startCol; col++) columnStats[col] = {
16978
+ columnIndex: col,
16979
+ columnLetter: this.columnToLetter(col),
16980
+ columnName: headers?.[col],
16981
+ piiCount: 0,
16982
+ piiPercentage: 0,
16983
+ piiTypes: []
16984
+ };
16985
+ const matchesByCell = [];
16986
+ for (let row = dataStartRow; row <= endRow; row++) for (let col = startCol; col <= endCol; col++) {
16987
+ const colIndex = col - startCol;
16988
+ if (skipCols.has(colIndex)) continue;
16989
+ const cellRef = this.xlsx.utils.encode_cell({
16990
+ r: row,
16991
+ c: col
16992
+ });
16993
+ const cell = sheet[cellRef];
16994
+ if (!cell) continue;
16995
+ const cellValue = this.getCellValue(cell);
16996
+ if (!cellValue) continue;
16997
+ const cellFormula = cell.f;
16998
+ if (alwaysRedactCols.has(colIndex)) {
16999
+ const detection = {
17000
+ type: "SENSITIVE_COLUMN",
17001
+ value: cellValue,
17002
+ placeholder: `[SENSITIVE_COLUMN_${colIndex}]`,
17003
+ position: [0, cellValue.length],
17004
+ severity: "high",
17005
+ confidence: 1
17006
+ };
17007
+ matchesByCell.push({
17008
+ cell: cellRef,
17009
+ row: row + 1,
17010
+ column: colIndex,
17011
+ columnLetter: this.columnToLetter(colIndex),
17012
+ columnName: headers?.[colIndex],
17013
+ value: cellValue,
17014
+ formula: cellFormula,
17015
+ matches: [detection]
17016
+ });
17017
+ columnStats[colIndex].piiCount++;
17018
+ continue;
17019
+ }
17020
+ const result = await detector.detect(cellValue);
17021
+ if (result.detections.length > 0) {
17022
+ const boostedDetections = this.boostConfidenceFromColumnName(result.detections, headers?.[colIndex], options.piiIndicatorNames || []);
17023
+ matchesByCell.push({
17024
+ cell: cellRef,
17025
+ row: row + 1,
17026
+ column: colIndex,
17027
+ columnLetter: this.columnToLetter(colIndex),
17028
+ columnName: headers?.[colIndex],
17029
+ value: cellValue,
17030
+ formula: cellFormula,
17031
+ matches: boostedDetections
17032
+ });
17033
+ columnStats[colIndex].piiCount += boostedDetections.length;
17034
+ const columnTypes = new Set(columnStats[colIndex].piiTypes);
17035
+ boostedDetections.forEach((d) => columnTypes.add(d.type));
17036
+ columnStats[colIndex].piiTypes = Array.from(columnTypes);
17037
+ }
17038
+ }
17039
+ const dataRowCount = endRow - dataStartRow + 1;
17040
+ for (let col = 0; col <= endCol - startCol; col++) {
17041
+ const rowsWithPii = matchesByCell.filter((m) => m.column === col).length;
17042
+ columnStats[col].piiPercentage = dataRowCount > 0 ? rowsWithPii / dataRowCount * 100 : 0;
17043
+ }
17044
+ return {
17045
+ sheetName,
17046
+ sheetIndex,
17047
+ rowCount: dataRowCount,
17048
+ columnCount,
17049
+ headers: headers?.filter((h) => h !== void 0),
17050
+ columnStats,
17051
+ matchesByCell
17052
+ };
17053
+ }
17054
+ /**
17055
+ * Redact PII in XLSX data
17056
+ */
17057
+ redact(buffer, detectionResult, options) {
17058
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
17059
+ const opts = {
17060
+ ...this.defaultOptions,
17061
+ ...options
17062
+ };
17063
+ const workbook = this.parse(buffer);
17064
+ for (const sheetResult of detectionResult.sheetResults) {
17065
+ const sheet = workbook.Sheets[sheetResult.sheetName];
17066
+ for (const cellMatch of sheetResult.matchesByCell) {
17067
+ const cell = sheet[cellMatch.cell];
17068
+ if (!cell) continue;
17069
+ cell.v = "[REDACTED]";
17070
+ cell.w = "[REDACTED]";
17071
+ if (!opts.preserveFormulas) delete cell.f;
17072
+ cell.t = "s";
17073
+ }
17074
+ }
17075
+ return this.xlsx.write(workbook, {
17076
+ type: "buffer",
17077
+ bookType: "xlsx"
17078
+ });
17079
+ }
17080
+ /**
17081
+ * Get cell value as string
17082
+ */
17083
+ getCellValue(cell) {
17084
+ if (!cell) return "";
17085
+ if (cell.w !== void 0) return String(cell.w);
17086
+ if (cell.v !== void 0) return String(cell.v);
17087
+ return "";
17088
+ }
17089
+ /**
17090
+ * Get row values
17091
+ */
17092
+ getRowValues(sheet, row, startCol, endCol) {
17093
+ const values = [];
17094
+ for (let col = startCol; col <= endCol; col++) {
17095
+ const cell = sheet[this.xlsx.utils.encode_cell({
17096
+ r: row,
17097
+ c: col
17098
+ })];
17099
+ values.push(cell ? this.getCellValue(cell) : void 0);
17100
+ }
17101
+ return values;
17102
+ }
17103
+ /**
17104
+ * Detect if first row is likely a header
17105
+ */
17106
+ detectHeader(sheet, range) {
17107
+ const firstRow = this.getRowValues(sheet, range.s.r, range.s.c, range.e.c);
17108
+ const secondRow = range.s.r + 1 <= range.e.r ? this.getRowValues(sheet, range.s.r + 1, range.s.c, range.e.c) : null;
17109
+ if (!secondRow) return false;
17110
+ const firstRowValues = firstRow.filter((v) => v !== void 0);
17111
+ const secondRowValues = secondRow.filter((v) => v !== void 0);
17112
+ if (firstRowValues.length === 0 || secondRowValues.length === 0) return false;
17113
+ if (firstRowValues.reduce((sum, v) => sum + v.length, 0) / firstRowValues.length > secondRowValues.reduce((sum, v) => sum + v.length, 0) / secondRowValues.length * 1.5) return false;
17114
+ const firstRowNumeric = firstRowValues.filter((v) => !isNaN(Number(v)) && v.trim() !== "").length;
17115
+ return firstRowValues.length - firstRowNumeric >= firstRowNumeric;
17116
+ }
17117
+ /**
17118
+ * Convert column index to letter (0 = A, 25 = Z, 26 = AA)
17119
+ */
17120
+ columnToLetter(col) {
17121
+ let letter = "";
17122
+ while (col >= 0) {
17123
+ letter = String.fromCharCode(col % 26 + 65) + letter;
17124
+ col = Math.floor(col / 26) - 1;
17125
+ }
17126
+ return letter;
17127
+ }
17128
+ /**
17129
+ * Get sheet names to process based on options
17130
+ */
17131
+ getSheetNamesToProcess(workbook, options) {
17132
+ const allSheetNames = workbook.SheetNames;
17133
+ if (options.sheets && options.sheets.length > 0) return options.sheets.filter((name) => allSheetNames.includes(name));
17134
+ if (options.sheetIndices && options.sheetIndices.length > 0) return options.sheetIndices.filter((index) => index >= 0 && index < allSheetNames.length).map((index) => allSheetNames[index]);
17135
+ return allSheetNames;
17136
+ }
17137
+ /**
17138
+ * Boost confidence if column name indicates PII
17139
+ */
17140
+ boostConfidenceFromColumnName(detections, columnName, piiIndicatorNames) {
17141
+ if (!columnName) return detections;
17142
+ const nameLower = columnName.toLowerCase().trim();
17143
+ if (!piiIndicatorNames.some((indicator) => nameLower.includes(indicator.toLowerCase()))) return detections;
17144
+ return detections.map((detection) => ({
17145
+ ...detection,
17146
+ confidence: Math.min(1, (detection.confidence || .5) * 1.2)
17147
+ }));
17148
+ }
17149
+ /**
17150
+ * Extract all cell values as text
17151
+ */
17152
+ extractText(buffer, options) {
17153
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
17154
+ const workbook = this.parse(buffer);
17155
+ const opts = {
17156
+ ...this.defaultOptions,
17157
+ ...options
17158
+ };
17159
+ const sheetNames = this.getSheetNamesToProcess(workbook, opts);
17160
+ const textParts = [];
17161
+ for (const sheetName of sheetNames) {
17162
+ const sheet = workbook.Sheets[sheetName];
17163
+ const range = this.xlsx.utils.decode_range(sheet["!ref"] || "A1");
17164
+ for (let row = range.s.r; row <= range.e.r; row++) for (let col = range.s.c; col <= range.e.c; col++) {
17165
+ const cell = sheet[this.xlsx.utils.encode_cell({
17166
+ r: row,
17167
+ c: col
17168
+ })];
17169
+ if (cell) {
17170
+ const value = this.getCellValue(cell);
17171
+ if (value.trim().length > 0) textParts.push(value);
17172
+ }
17173
+ }
17174
+ }
17175
+ return textParts.join(" ");
17176
+ }
17177
+ /**
17178
+ * Get workbook metadata
17179
+ */
17180
+ getMetadata(buffer) {
17181
+ if (!this.xlsx) throw new Error("[XlsxProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
17182
+ const workbook = this.parse(buffer);
17183
+ return {
17184
+ sheetNames: workbook.SheetNames,
17185
+ sheetCount: workbook.SheetNames.length
17186
+ };
17187
+ }
17188
+ };
17189
+ }));
17190
+
17191
+ //#endregion
17192
+ //#region src/document/DocumentProcessor.ts
17193
+ /**
17194
+ * Create a document processor instance
17195
+ */
17196
+ function createDocumentProcessor() {
17197
+ return new DocumentProcessor();
17198
+ }
17199
+ var DocumentProcessor;
17200
+ var init_DocumentProcessor = __esmMin((() => {
17201
+ init_OCRProcessor();
17202
+ init_JsonProcessor();
17203
+ init_CsvProcessor();
17204
+ init_XlsxProcessor();
17205
+ DocumentProcessor = class {
17206
+ constructor() {
17207
+ try {
17208
+ this.pdfParse = __require("pdf-parse");
17209
+ } catch {}
17210
+ try {
17211
+ this.mammoth = __require("mammoth");
17212
+ } catch {}
17213
+ this.ocrProcessor = new OCRProcessor();
17214
+ this.jsonProcessor = new JsonProcessor();
17215
+ this.csvProcessor = new CsvProcessor();
17216
+ this.xlsxProcessor = new XlsxProcessor();
17217
+ }
17218
+ /**
17219
+ * Extract text from document buffer
17220
+ */
17221
+ async extractText(buffer, options) {
17222
+ const format = options?.format || this.detectFormat(buffer);
17223
+ if (!format) throw new Error("[DocumentProcessor] Unable to detect document format. Supported: PDF, DOCX, TXT, images (with OCR)");
17224
+ const maxSize = options?.maxSize || 50 * 1024 * 1024;
17225
+ if (buffer.length > maxSize) throw new Error(`[DocumentProcessor] Document size (${buffer.length} bytes) exceeds maximum (${maxSize} bytes)`);
17226
+ switch (format) {
17227
+ case "pdf": return this.extractPdfText(buffer, options);
17228
+ case "docx": return this.extractDocxText(buffer, options);
17229
+ case "txt": return buffer.toString("utf-8");
17230
+ case "image": return this.extractImageText(buffer, options);
17231
+ case "json": return this.extractJsonText(buffer, options);
17232
+ case "csv": return this.extractCsvText(buffer, options);
17233
+ case "xlsx": return this.extractXlsxText(buffer, options);
17234
+ default: throw new Error(`[DocumentProcessor] Unsupported format: ${format}`);
17235
+ }
17236
+ }
17237
+ /**
17238
+ * Get document metadata
17239
+ */
17240
+ async getMetadata(buffer, options) {
17241
+ const format = options?.format || this.detectFormat(buffer);
17242
+ if (!format) throw new Error("[DocumentProcessor] Unable to detect document format");
17243
+ switch (format) {
17244
+ case "pdf": return this.getPdfMetadata(buffer, options);
17245
+ case "docx": return this.getDocxMetadata(buffer, options);
17246
+ case "txt": return {
17247
+ format: "txt",
17248
+ pages: void 0
17249
+ };
17250
+ case "image": return this.getImageMetadata(buffer, options);
17251
+ case "json": return this.getJsonMetadata(buffer, options);
17252
+ case "csv": return this.getCsvMetadata(buffer, options);
17253
+ case "xlsx": return this.getXlsxMetadata(buffer, options);
17254
+ default: throw new Error(`[DocumentProcessor] Unsupported format: ${format}`);
17255
+ }
17256
+ }
17257
+ /**
17258
+ * Detect document format from buffer
17259
+ */
17260
+ detectFormat(buffer) {
17261
+ if (buffer.length < 4) return null;
17262
+ if (buffer.toString("utf-8", 0, 4) === "%PDF") return "pdf";
17263
+ if (buffer.length >= 8 && buffer[0] === 137 && buffer[1] === 80 && buffer[2] === 78 && buffer[3] === 71) return "image";
17264
+ if (buffer[0] === 255 && buffer[1] === 216 && buffer[2] === 255) return "image";
17265
+ if (buffer[0] === 73 && buffer[1] === 73 && buffer[2] === 42 && buffer[3] === 0 || buffer[0] === 77 && buffer[1] === 77 && buffer[2] === 0 && buffer[3] === 42) return "image";
17266
+ if (buffer[0] === 66 && buffer[1] === 77) return "image";
17267
+ if (buffer.length >= 12 && buffer[0] === 82 && buffer[1] === 73 && buffer[2] === 70 && buffer[3] === 70 && buffer[8] === 87 && buffer[9] === 69 && buffer[10] === 66 && buffer[11] === 80) return "image";
17268
+ if (buffer[0] === 80 && buffer[1] === 75) {
17269
+ const zipHeader = buffer.toString("utf-8", 0, Math.min(500, buffer.length));
17270
+ if (zipHeader.includes("word/") || zipHeader.includes("[Content_Types].xml")) return "docx";
17271
+ if (zipHeader.includes("xl/")) return "xlsx";
17272
+ }
17273
+ const text = buffer.toString("utf-8");
17274
+ const trimmed = text.trim();
17275
+ if (trimmed.startsWith("{") && trimmed.endsWith("}") || trimmed.startsWith("[") && trimmed.endsWith("]")) {
17276
+ if (this.jsonProcessor.isValid(buffer)) return "json";
17277
+ }
17278
+ const lines = text.split(/\r?\n/).slice(0, 5);
17279
+ if (lines.length >= 2) for (const delimiter of [
17280
+ ",",
17281
+ " ",
17282
+ ";",
17283
+ "|"
17284
+ ]) {
17285
+ const counts = lines.map((line) => (line.match(new RegExp(delimiter, "g")) || []).length);
17286
+ if (counts[0] > 0 && counts.every((c) => c === counts[0])) return "csv";
17287
+ }
17288
+ const sample = buffer.slice(0, Math.min(1e3, buffer.length));
17289
+ if (sample.filter((byte) => byte < 32 && byte !== 9 && byte !== 10 && byte !== 13).length < sample.length * .1) return "txt";
17290
+ return null;
17291
+ }
17292
+ /**
17293
+ * Check if format is supported
17294
+ */
17295
+ isFormatSupported(format) {
17296
+ switch (format) {
17297
+ case "pdf": return !!this.pdfParse;
17298
+ case "docx": return !!this.mammoth;
17299
+ case "txt": return true;
17300
+ case "image": return this.ocrProcessor.isAvailable();
17301
+ case "json": return true;
17302
+ case "csv": return true;
17303
+ case "xlsx": return this.xlsxProcessor.isAvailable();
17304
+ default: return false;
17305
+ }
17306
+ }
17307
+ /**
17308
+ * Extract text from PDF
17309
+ */
17310
+ async extractPdfText(buffer, options) {
17311
+ if (!this.pdfParse) throw new Error("[DocumentProcessor] PDF support requires pdf-parse. Install with: npm install pdf-parse");
17312
+ try {
17313
+ const data = await this.pdfParse(buffer, {
17314
+ password: options?.password,
17315
+ max: options?.pages ? Math.max(...options.pages) : void 0
17316
+ });
17317
+ if (options?.pages) return data.text;
17318
+ return data.text || "";
17319
+ } catch (error) {
17320
+ throw new Error(`[DocumentProcessor] PDF extraction failed: ${error.message}`);
17321
+ }
17322
+ }
17323
+ /**
17324
+ * Extract text from DOCX
17325
+ */
17326
+ async extractDocxText(buffer, _options) {
17327
+ if (!this.mammoth) throw new Error("[DocumentProcessor] DOCX support requires mammoth. Install with: npm install mammoth");
17328
+ try {
17329
+ return (await this.mammoth.extractRawText({ buffer })).value || "";
17330
+ } catch (error) {
17331
+ throw new Error(`[DocumentProcessor] DOCX extraction failed: ${error.message}`);
17332
+ }
17333
+ }
17334
+ /**
17335
+ * Get PDF metadata
17336
+ */
17337
+ async getPdfMetadata(buffer, _options) {
17338
+ if (!this.pdfParse) throw new Error("[DocumentProcessor] PDF support requires pdf-parse. Install with: npm install pdf-parse");
17339
+ try {
17340
+ const data = await this.pdfParse(buffer, { password: _options?.password });
17341
+ return {
17342
+ format: "pdf",
17343
+ pages: data.numpages,
17344
+ title: data.info?.Title,
17345
+ author: data.info?.Author,
17346
+ creationDate: data.info?.CreationDate ? new Date(data.info.CreationDate) : void 0,
17347
+ modifiedDate: data.info?.ModDate ? new Date(data.info.ModDate) : void 0,
17348
+ custom: data.info
17349
+ };
17350
+ } catch (error) {
17351
+ throw new Error(`[DocumentProcessor] PDF metadata extraction failed: ${error.message}`);
17352
+ }
17353
+ }
17354
+ /**
17355
+ * Get DOCX metadata
17356
+ */
17357
+ async getDocxMetadata(_buffer, _options) {
17358
+ return {
17359
+ format: "docx",
17360
+ pages: void 0
17361
+ };
17362
+ }
17363
+ /**
17364
+ * Extract text from image using OCR
17365
+ */
17366
+ async extractImageText(buffer, options) {
17367
+ if (!this.ocrProcessor.isAvailable()) throw new Error("[DocumentProcessor] Image/OCR support requires tesseract.js. Install with: npm install tesseract.js");
17368
+ try {
17369
+ return (await this.ocrProcessor.recognizeText(buffer, options?.ocrOptions)).text;
17370
+ } catch (error) {
17371
+ throw new Error(`[DocumentProcessor] Image text extraction failed: ${error.message}`);
17372
+ }
17373
+ }
17374
+ /**
17375
+ * Get image metadata
17376
+ */
17377
+ async getImageMetadata(buffer, options) {
17378
+ if (!this.ocrProcessor.isAvailable()) return {
17379
+ format: "image",
17380
+ pages: void 0,
17381
+ usedOCR: false
17382
+ };
17383
+ try {
17384
+ return {
17385
+ format: "image",
17386
+ pages: void 0,
17387
+ usedOCR: true,
17388
+ ocrConfidence: (await this.ocrProcessor.recognizeText(buffer, options?.ocrOptions)).confidence
17389
+ };
17390
+ } catch {
17391
+ return {
17392
+ format: "image",
17393
+ pages: void 0,
17394
+ usedOCR: false
17395
+ };
17396
+ }
17397
+ }
17398
+ /**
17399
+ * Extract text from JSON
17400
+ */
17401
+ async extractJsonText(buffer, _options) {
17402
+ try {
17403
+ return this.jsonProcessor.extractText(buffer);
17404
+ } catch (error) {
17405
+ throw new Error(`[DocumentProcessor] JSON extraction failed: ${error.message}`);
17406
+ }
17407
+ }
17408
+ /**
17409
+ * Extract text from CSV
17410
+ */
17411
+ async extractCsvText(buffer, _options) {
17412
+ try {
17413
+ return this.csvProcessor.extractText(buffer);
17414
+ } catch (error) {
17415
+ throw new Error(`[DocumentProcessor] CSV extraction failed: ${error.message}`);
17416
+ }
17417
+ }
17418
+ /**
17419
+ * Extract text from XLSX
17420
+ */
17421
+ async extractXlsxText(buffer, _options) {
17422
+ if (!this.xlsxProcessor.isAvailable()) throw new Error("[DocumentProcessor] XLSX support requires xlsx package. Install with: npm install xlsx");
17423
+ try {
17424
+ return this.xlsxProcessor.extractText(buffer);
17425
+ } catch (error) {
17426
+ throw new Error(`[DocumentProcessor] XLSX extraction failed: ${error.message}`);
17427
+ }
17428
+ }
17429
+ /**
17430
+ * Get JSON metadata
17431
+ */
17432
+ async getJsonMetadata(buffer, _options) {
17433
+ try {
17434
+ const data = this.jsonProcessor.parse(buffer);
17435
+ const isArray = Array.isArray(data);
17436
+ return {
17437
+ format: "json",
17438
+ pages: void 0,
17439
+ custom: {
17440
+ isArray,
17441
+ itemCount: isArray ? data.length : Object.keys(data).length
17442
+ }
17443
+ };
17444
+ } catch {
17445
+ return {
17446
+ format: "json",
17447
+ pages: void 0
17448
+ };
17449
+ }
17450
+ }
17451
+ /**
17452
+ * Get CSV metadata
17453
+ */
17454
+ async getCsvMetadata(buffer, _options) {
17455
+ try {
17456
+ const info = this.csvProcessor.getColumnInfo(buffer);
17457
+ return {
17458
+ format: "csv",
17459
+ pages: void 0,
17460
+ custom: {
17461
+ rowCount: info.rowCount,
17462
+ columnCount: info.columnCount,
17463
+ headers: info.headers
17464
+ }
17465
+ };
17466
+ } catch {
17467
+ return {
17468
+ format: "csv",
17469
+ pages: void 0
17470
+ };
17471
+ }
17472
+ }
17473
+ /**
17474
+ * Get XLSX metadata
17475
+ */
17476
+ async getXlsxMetadata(buffer, _options) {
17477
+ if (!this.xlsxProcessor.isAvailable()) return {
17478
+ format: "xlsx",
17479
+ pages: void 0
17480
+ };
17481
+ try {
17482
+ const metadata = this.xlsxProcessor.getMetadata(buffer);
17483
+ return {
17484
+ format: "xlsx",
17485
+ pages: void 0,
17486
+ custom: {
17487
+ sheetNames: metadata.sheetNames,
17488
+ sheetCount: metadata.sheetCount
17489
+ }
17490
+ };
17491
+ } catch {
17492
+ return {
17493
+ format: "xlsx",
17494
+ pages: void 0
17495
+ };
17496
+ }
17497
+ }
17498
+ /**
17499
+ * Get OCR processor instance
17500
+ */
17501
+ getOCRProcessor() {
17502
+ return this.ocrProcessor;
17503
+ }
17504
+ /**
17505
+ * Get JSON processor instance
17506
+ */
17507
+ getJsonProcessor() {
17508
+ return this.jsonProcessor;
17509
+ }
17510
+ /**
17511
+ * Get CSV processor instance
17512
+ */
17513
+ getCsvProcessor() {
17514
+ return this.csvProcessor;
17515
+ }
17516
+ /**
17517
+ * Get XLSX processor instance
17518
+ */
17519
+ getXlsxProcessor() {
17520
+ return this.xlsxProcessor;
17521
+ }
17522
+ };
17523
+ }));
17524
+
17525
+ //#endregion
17526
+ //#region src/document/index.ts
17527
+ var document_exports = /* @__PURE__ */ __exportAll({
17528
+ CsvProcessor: () => CsvProcessor,
17529
+ DocumentProcessor: () => DocumentProcessor,
17530
+ JsonProcessor: () => JsonProcessor,
17531
+ OCRProcessor: () => OCRProcessor,
17532
+ XlsxProcessor: () => XlsxProcessor,
17533
+ createCsvProcessor: () => createCsvProcessor,
17534
+ createDocumentProcessor: () => createDocumentProcessor,
17535
+ createJsonProcessor: () => createJsonProcessor,
17536
+ createOCRProcessor: () => createOCRProcessor,
17537
+ createXlsxProcessor: () => createXlsxProcessor
17538
+ });
17539
+ var init_document = __esmMin((() => {
17540
+ init_DocumentProcessor();
17541
+ init_OCRProcessor();
17542
+ init_JsonProcessor();
17543
+ init_CsvProcessor();
17544
+ init_XlsxProcessor();
17545
+ }));
17546
+
17547
+ //#endregion
17548
+ //#region src/workers/WorkerPool.ts
17549
+ /**
17550
+ * Worker thread pool for parallel processing
17551
+ */
17552
+ /**
17553
+ * Create a worker pool instance
17554
+ */
17555
+ function createWorkerPool(config) {
17556
+ return new WorkerPool(config);
17557
+ }
17558
+ var WorkerPool;
17559
+ var init_WorkerPool = __esmMin((() => {
17560
+ WorkerPool = class {
17561
+ constructor(config = {}) {
17562
+ this.workers = [];
17563
+ this.availableWorkers = [];
17564
+ this.taskQueue = [];
17565
+ this.totalProcessingTime = 0;
17566
+ this.config = {
17567
+ numWorkers: config.numWorkers || cpus().length,
17568
+ maxQueueSize: config.maxQueueSize || 100,
17569
+ idleTimeout: config.idleTimeout || 3e4
17570
+ };
17571
+ this.stats = {
17572
+ activeWorkers: 0,
17573
+ idleWorkers: 0,
17574
+ queueSize: 0,
17575
+ totalProcessed: 0,
17576
+ totalErrors: 0,
17577
+ avgProcessingTime: 0
17578
+ };
17579
+ this.workerPath = join(__dirname, "worker.js");
17580
+ }
17581
+ /**
17582
+ * Initialize worker pool
17583
+ */
17584
+ async initialize() {
17585
+ for (let i = 0; i < this.config.numWorkers; i++) await this.createWorker();
17586
+ }
17587
+ /**
17588
+ * Create a new worker
17589
+ */
17590
+ async createWorker() {
17591
+ const worker = new Worker(this.workerPath);
17592
+ worker.on("message", (result) => {
17593
+ this.handleWorkerResult(worker, result);
17594
+ });
17595
+ worker.on("error", (error) => {
17596
+ console.error("[WorkerPool] Worker error:", error);
17597
+ this.stats.totalErrors++;
17598
+ this.removeWorker(worker);
17599
+ this.createWorker();
17600
+ });
17601
+ worker.on("exit", (code) => {
17602
+ if (code !== 0) console.error(`[WorkerPool] Worker exited with code ${code}`);
17603
+ this.removeWorker(worker);
17604
+ });
17605
+ this.workers.push(worker);
17606
+ this.availableWorkers.push(worker);
17607
+ this.stats.idleWorkers++;
17608
+ return worker;
17609
+ }
17610
+ /**
17611
+ * Execute a task on the worker pool
17612
+ */
17613
+ async execute(task) {
17614
+ if (this.taskQueue.length >= this.config.maxQueueSize) throw new Error(`[WorkerPool] Queue is full (max: ${this.config.maxQueueSize})`);
17615
+ return new Promise((resolve, reject) => {
17616
+ this.taskQueue.push({
17617
+ task,
17618
+ resolve,
17619
+ reject
17620
+ });
17621
+ this.stats.queueSize = this.taskQueue.length;
17622
+ this.processQueue();
17623
+ });
17624
+ }
17625
+ /**
17626
+ * Process task queue
17627
+ */
17628
+ processQueue() {
17629
+ while (this.taskQueue.length > 0 && this.availableWorkers.length > 0) {
17630
+ const worker = this.availableWorkers.shift();
17631
+ const { task, resolve, reject } = this.taskQueue.shift();
17632
+ this.stats.idleWorkers--;
17633
+ this.stats.activeWorkers++;
17634
+ this.stats.queueSize = this.taskQueue.length;
17635
+ worker.__currentTask = {
17636
+ resolve,
17637
+ reject,
17638
+ startTime: Date.now()
17639
+ };
17640
+ worker.postMessage(task);
17641
+ }
17642
+ }
17643
+ /**
17644
+ * Handle worker result
17645
+ */
17646
+ handleWorkerResult(worker, result) {
17647
+ const currentTask = worker.__currentTask;
17648
+ if (!currentTask) return;
17649
+ this.stats.activeWorkers--;
17650
+ this.stats.idleWorkers++;
17651
+ this.stats.totalProcessed++;
17652
+ this.totalProcessingTime += result.processingTime;
17653
+ this.stats.avgProcessingTime = this.totalProcessingTime / this.stats.totalProcessed;
17654
+ this.availableWorkers.push(worker);
17655
+ delete worker.__currentTask;
17656
+ if (result.error) {
17657
+ this.stats.totalErrors++;
17658
+ currentTask.reject(new Error(result.error));
17659
+ } else currentTask.resolve(result.result);
17660
+ this.processQueue();
17661
+ }
17662
+ /**
17663
+ * Remove worker from pool
17664
+ */
17665
+ removeWorker(worker) {
17666
+ const index = this.workers.indexOf(worker);
17667
+ if (index !== -1) this.workers.splice(index, 1);
17668
+ const availableIndex = this.availableWorkers.indexOf(worker);
17669
+ if (availableIndex !== -1) {
17670
+ this.availableWorkers.splice(availableIndex, 1);
17671
+ this.stats.idleWorkers--;
17672
+ }
17673
+ }
17674
+ /**
17675
+ * Get pool statistics
17676
+ */
17677
+ getStats() {
17678
+ return { ...this.stats };
17679
+ }
17680
+ /**
17681
+ * Terminate all workers
17682
+ */
17683
+ async terminate() {
17684
+ const terminatePromises = this.workers.map((worker) => worker.terminate());
17685
+ await Promise.all(terminatePromises);
17686
+ this.workers = [];
17687
+ this.availableWorkers = [];
17688
+ this.taskQueue = [];
17689
+ this.stats.activeWorkers = 0;
17690
+ this.stats.idleWorkers = 0;
17691
+ this.stats.queueSize = 0;
17692
+ }
17693
+ };
17694
+ }));
17695
+
17696
+ //#endregion
17697
+ //#region src/workers/index.ts
17698
+ var workers_exports = /* @__PURE__ */ __exportAll({
17699
+ WorkerPool: () => WorkerPool,
17700
+ createWorkerPool: () => createWorkerPool
17701
+ });
17702
+ var init_workers = __esmMin((() => {
17703
+ init_WorkerPool();
17704
+ }));
17705
+
15713
17706
  //#endregion
15714
17707
  //#region src/detector.ts
15715
17708
  var OpenRedaction = class OpenRedaction {
@@ -16359,14 +18352,14 @@ var OpenRedaction = class OpenRedaction {
16359
18352
  * Run health check
16360
18353
  */
16361
18354
  async healthCheck(options) {
16362
- const { HealthChecker } = await import("./HealthCheck-CCC7Wgoe.mjs");
18355
+ const { HealthChecker } = await Promise.resolve().then(() => (init_HealthCheck(), HealthCheck_exports));
16363
18356
  return new HealthChecker(this).check(options);
16364
18357
  }
16365
18358
  /**
16366
18359
  * Quick health check (minimal overhead)
16367
18360
  */
16368
18361
  async quickHealthCheck() {
16369
- const { HealthChecker } = await import("./HealthCheck-CCC7Wgoe.mjs");
18362
+ const { HealthChecker } = await Promise.resolve().then(() => (init_HealthCheck(), HealthCheck_exports));
16370
18363
  return new HealthChecker(this).quickCheck();
16371
18364
  }
16372
18365
  /**
@@ -16377,7 +18370,7 @@ var OpenRedaction = class OpenRedaction {
16377
18370
  */
16378
18371
  async detectDocument(buffer, options) {
16379
18372
  if (this.rbacManager && !this.rbacManager.hasPermission("detection:detect")) throw new Error("[OpenRedaction] Permission denied: detection:detect required");
16380
- const { createDocumentProcessor } = await import("./document-DxUjbEOE.mjs");
18373
+ const { createDocumentProcessor } = await Promise.resolve().then(() => (init_document(), document_exports));
16381
18374
  const processor = createDocumentProcessor();
16382
18375
  const extractionStart = performance.now();
16383
18376
  const text = await processor.extractText(buffer, options);
@@ -16406,7 +18399,7 @@ var OpenRedaction = class OpenRedaction {
16406
18399
  * Significantly faster for processing many texts
16407
18400
  */
16408
18401
  static async detectBatch(texts, options) {
16409
- const { createWorkerPool } = await import("./workers-BGF3tjiF.mjs");
18402
+ const { createWorkerPool } = await Promise.resolve().then(() => (init_workers(), workers_exports));
16410
18403
  const pool = createWorkerPool({ numWorkers: options?.numWorkers });
16411
18404
  try {
16412
18405
  await pool.initialize();
@@ -16426,7 +18419,7 @@ var OpenRedaction = class OpenRedaction {
16426
18419
  * Efficient for processing many documents at once
16427
18420
  */
16428
18421
  static async detectDocumentsBatch(buffers, options) {
16429
- const { createWorkerPool } = await import("./workers-BGF3tjiF.mjs");
18422
+ const { createWorkerPool } = await Promise.resolve().then(() => (init_workers(), workers_exports));
16430
18423
  const pool = createWorkerPool({ numWorkers: options?.numWorkers });
16431
18424
  try {
16432
18425
  await pool.initialize();
@@ -16445,6 +18438,7 @@ var OpenRedaction = class OpenRedaction {
16445
18438
 
16446
18439
  //#endregion
16447
18440
  //#region src/streaming/StreamingDetector.ts
18441
+ init_document();
16448
18442
  /**
16449
18443
  * Streaming detector for large documents
16450
18444
  */
@@ -16593,6 +18587,7 @@ function createStreamingDetector(detector, options) {
16593
18587
 
16594
18588
  //#endregion
16595
18589
  //#region src/batch/BatchProcessor.ts
18590
+ init_workers();
16596
18591
  /**
16597
18592
  * Batch processor for processing multiple documents
16598
18593
  */
@@ -18510,6 +20505,7 @@ function createAPIServer(config) {
18510
20505
  //#endregion
18511
20506
  //#region src/index.ts
18512
20507
  init_ConfigExporter();
20508
+ init_HealthCheck();
18513
20509
 
18514
20510
  //#endregion
18515
20511
  export { ADMIN_ROLE, ALL_PERMISSIONS, ANALYST_ROLE, APIServer, BatchProcessor, ConfigExporter, ConfigLoader, ConsoleAuditLogger, ContextRulesEngine, CsvProcessor, DEFAULT_DOMAIN_VOCABULARIES, DEFAULT_PROXIMITY_RULES, DEFAULT_SEVERITY_MAP, DEFAULT_TIER_QUOTAS, DocumentProcessor, ExplainAPI, GRAFANA_DASHBOARD_TEMPLATE, HealthChecker, InMemoryAuditLogger, InMemoryMetricsCollector, JsonProcessor, LocalLearningStore, NERDetector, OCRProcessor, OPERATOR_ROLE, OpenRedaction, OpenRedactionError, PersistentAuditLogger, PriorityOptimizer, PrometheusServer, RBACManager, RegexMaxMatchesError, RegexTimeoutError, ReportGenerator, SEVERITY_SCORES, SeverityClassifier, StreamingDetector, TenantManager, TenantNotFoundError, TenantQuotaExceededError, TenantSuspendedError, VIEWER_ROLE, WebhookManager, WorkerPool, XlsxProcessor, allPatterns, analyzeContextFeatures, analyzeFullContext, calculateContextConfidence, calculateRisk, callAIDetect, ccpaPreset, commonFalsePositives, compileSafeRegex, contactPatterns, convertAIEntityToDetection, createAPIServer, createBatchProcessor, createCacheDisabledError, createConfigLoadError, createConfigPreset, createContextRulesEngine, createCsvProcessor, createCustomRole, createDocumentProcessor, createExplainAPI, createHealthChecker, createHighMemoryError, createInvalidPatternError, createJsonProcessor, createLearningDisabledError, createMultiPassDisabledError, createNERDetector, createOCRProcessor, createOptimizationDisabledError, createPersistentAuditLogger, createPriorityOptimizer, createPrometheusServer, createRBACManager, createReportGenerator, createSeverityClassifier, createSimpleMultiPass, createStreamingDetector, createTenantManager, createValidationError, createWebhookManager, createWorkerPool, createXlsxProcessor, defaultPasses, detectPII, detectionsOverlap, educationPreset, exportForVersionControl, extractContext, filterFalsePositives, financePreset, financialPatterns, gdprPreset, generateReport, getAIEndpoint, getPatternsByCategory, getPredefinedRole, getPreset, getSeverity, governmentPatterns, groupPatternsByPass, healthCheckMiddleware, healthcarePreset, healthcareResearchPreset, hipaaPreset, inferDocumentType, isFalsePositive, isUnsafePattern, mergeAIEntities, mergePassDetections, networkPatterns, openredactionMiddleware, personalPatterns, safeExec, safeExecAll, transportLogisticsPreset, useAutoRedact, useBatchDetector, useFormFieldValidator, useOpenRedaction, usePIIDetector, validateAIEntity, validateEmail, validateIBAN, validateLuhn, validateNHS, validateNINO, validateName, validatePattern, validateSSN, validateSortCode, validateUKPassport, verifyWebhookSignature };