openredaction 1.0.0 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts DELETED
@@ -1,4111 +0,0 @@
1
- import { Request, Response, NextFunction } from 'express';
2
- import * as react from 'react';
3
-
4
- /**
5
- * Core types for PII Shield
6
- */
7
- /**
8
- * PII pattern definition with validation
9
- */
10
- interface PIIPattern {
11
- /** Pattern type identifier (e.g., "EMAIL", "PHONE_UK_MOBILE") */
12
- type: string;
13
- /** Regular expression for matching */
14
- regex: RegExp;
15
- /** Priority for detection order (higher = checked first) */
16
- priority: number;
17
- /** Optional validator function for false positive reduction */
18
- validator?: (match: string, context: string) => boolean;
19
- /** Placeholder template (e.g., "[EMAIL_{n}]") */
20
- placeholder: string;
21
- /** Optional description */
22
- description?: string;
23
- /** Severity level */
24
- severity?: 'critical' | 'high' | 'medium' | 'low';
25
- }
26
- /**
27
- * Detected PII instance
28
- */
29
- interface PIIDetection {
30
- /** Type of PII detected */
31
- type: string;
32
- /** Original detected value */
33
- value: string;
34
- /** Placeholder used for redaction */
35
- placeholder: string;
36
- /** Position in text [start, end] */
37
- position: [number, number];
38
- /** Severity level */
39
- severity: 'critical' | 'high' | 'medium' | 'low';
40
- /** Confidence score (0-1) based on context analysis */
41
- confidence?: number;
42
- }
43
- /**
44
- * PII match (used internally for processing)
45
- */
46
- interface PIIMatch {
47
- /** Type of PII */
48
- type: string;
49
- /** Matched value */
50
- value: string;
51
- /** Start position */
52
- start: number;
53
- /** End position */
54
- end: number;
55
- /** Confidence score (0-1) */
56
- confidence: number;
57
- /** Context around match */
58
- context: {
59
- before: string;
60
- after: string;
61
- };
62
- }
63
- /**
64
- * Detection result
65
- */
66
- interface DetectionResult {
67
- /** Original text */
68
- original: string;
69
- /** Redacted text */
70
- redacted: string;
71
- /** Array of detections */
72
- detections: PIIDetection[];
73
- /** Map of placeholders to original values for restoration */
74
- redactionMap: Record<string, string>;
75
- /** Statistics */
76
- stats?: {
77
- /** Processing time in milliseconds */
78
- processingTime?: number;
79
- /** Total PII count */
80
- piiCount: number;
81
- };
82
- }
83
- /**
84
- * Redaction mode - controls how PII is replaced
85
- */
86
- type RedactionMode = 'placeholder' | 'mask-middle' | 'mask-all' | 'format-preserving' | 'token-replace';
87
- /**
88
- * Configuration options for OpenRedaction
89
- */
90
- interface OpenRedactionOptions {
91
- /** Include name detection (default: true) */
92
- includeNames?: boolean;
93
- /** Include address detection (default: true) */
94
- includeAddresses?: boolean;
95
- /** Include phone detection (default: true) */
96
- includePhones?: boolean;
97
- /** Include email detection (default: true) */
98
- includeEmails?: boolean;
99
- /** Whitelist specific patterns only */
100
- patterns?: string[];
101
- /** Add custom patterns */
102
- customPatterns?: PIIPattern[];
103
- /** Whitelist of terms to ignore (e.g., company names) */
104
- whitelist?: string[];
105
- /** Enable deterministic placeholders (default: true) */
106
- deterministic?: boolean;
107
- /** Redaction mode (default: 'placeholder') */
108
- redactionMode?: RedactionMode;
109
- /** Compliance preset */
110
- preset?: 'gdpr' | 'hipaa' | 'ccpa';
111
- /** Enable context-aware detection (default: true) */
112
- enableContextAnalysis?: boolean;
113
- /** Minimum confidence threshold for detections (0-1, default: 0.5) */
114
- confidenceThreshold?: number;
115
- /** Enable false positive filtering (default: false, experimental) */
116
- enableFalsePositiveFilter?: boolean;
117
- /** False positive confidence threshold (0-1, default: 0.7) */
118
- falsePositiveThreshold?: number;
119
- /** Enable multi-pass detection for better accuracy (default: false, experimental) */
120
- enableMultiPass?: boolean;
121
- /** Number of detection passes (2-5, default: 3) */
122
- multiPassCount?: number;
123
- /** Enable result caching for repeated inputs (default: false) */
124
- enableCache?: boolean;
125
- /** Maximum cache size (default: 100) */
126
- cacheSize?: number;
127
- /** Enable debug logging (default: false) */
128
- debug?: boolean;
129
- /** Enable audit logging (default: false) */
130
- enableAuditLog?: boolean;
131
- /** Audit logger instance (optional, default: in-memory logger) */
132
- auditLogger?: IAuditLogger;
133
- /** User context for audit logs */
134
- auditUser?: string;
135
- /** Session ID for audit logs */
136
- auditSessionId?: string;
137
- /** Additional metadata for audit logs */
138
- auditMetadata?: Record<string, unknown>;
139
- /** Enable metrics collection (default: false) */
140
- enableMetrics?: boolean;
141
- /** Metrics collector instance (optional, default: in-memory collector) */
142
- metricsCollector?: IMetricsCollector;
143
- /** Enable RBAC (Role-Based Access Control) (default: false) */
144
- enableRBAC?: boolean;
145
- /** RBAC manager instance (optional, default: admin role) */
146
- rbacManager?: IRBACManager;
147
- /** Predefined role name (admin, analyst, operator, viewer) */
148
- role?: RoleName;
149
- }
150
- /**
151
- * Validator function type
152
- */
153
- type Validator = (value: string, context?: string) => boolean;
154
- /**
155
- * Audit log entry for tracking redaction operations
156
- */
157
- interface AuditLogEntry {
158
- /** Unique identifier for this audit entry */
159
- id: string;
160
- /** Timestamp of the operation (ISO 8601) */
161
- timestamp: string;
162
- /** Operation type */
163
- operation: 'redact' | 'detect' | 'restore';
164
- /** Number of PII items found/processed */
165
- piiCount: number;
166
- /** Types of PII detected (e.g., ["EMAIL", "SSN", "PHONE"]) */
167
- piiTypes: string[];
168
- /** Text length processed */
169
- textLength: number;
170
- /** Processing time in milliseconds */
171
- processingTimeMs: number;
172
- /** Redaction mode used */
173
- redactionMode?: RedactionMode;
174
- /** Success status */
175
- success: boolean;
176
- /** Error message if operation failed */
177
- error?: string;
178
- /** Optional user context */
179
- user?: string;
180
- /** Optional session/request identifier */
181
- sessionId?: string;
182
- /** Optional metadata */
183
- metadata?: Record<string, unknown>;
184
- }
185
- /**
186
- * Audit logger interface
187
- */
188
- interface IAuditLogger {
189
- /** Log an audit entry */
190
- log(entry: Omit<AuditLogEntry, 'id' | 'timestamp'>): void;
191
- /** Get all audit logs */
192
- getLogs(): AuditLogEntry[];
193
- /** Get audit logs filtered by operation type */
194
- getLogsByOperation(operation: AuditLogEntry['operation']): AuditLogEntry[];
195
- /** Get audit logs filtered by date range */
196
- getLogsByDateRange(startDate: Date, endDate: Date): AuditLogEntry[];
197
- /** Export audit logs as JSON */
198
- exportAsJson(): string;
199
- /** Export audit logs as CSV */
200
- exportAsCsv(): string;
201
- /** Clear all audit logs */
202
- clear(): void;
203
- /** Get audit statistics */
204
- getStats(): AuditStats;
205
- }
206
- /**
207
- * Audit statistics
208
- */
209
- interface AuditStats {
210
- /** Total number of operations */
211
- totalOperations: number;
212
- /** Total PII items detected */
213
- totalPiiDetected: number;
214
- /** Average processing time in milliseconds */
215
- averageProcessingTime: number;
216
- /** Most common PII types */
217
- topPiiTypes: Array<{
218
- type: string;
219
- count: number;
220
- }>;
221
- /** Operations by type */
222
- operationsByType: Record<string, number>;
223
- /** Success rate (0-1) */
224
- successRate: number;
225
- }
226
- /**
227
- * Metrics for monitoring redaction operations
228
- */
229
- interface RedactionMetrics {
230
- /** Total number of redaction operations */
231
- totalRedactions: number;
232
- /** Total number of PII items detected */
233
- totalPiiDetected: number;
234
- /** Total processing time in milliseconds */
235
- totalProcessingTime: number;
236
- /** Average processing time in milliseconds */
237
- averageProcessingTime: number;
238
- /** Total text length processed (characters) */
239
- totalTextLength: number;
240
- /** PII detection counts by type */
241
- piiByType: Record<string, number>;
242
- /** Operation counts by redaction mode */
243
- byRedactionMode: Record<string, number>;
244
- /** Error count */
245
- totalErrors: number;
246
- /** Timestamp of last update */
247
- lastUpdated: string;
248
- }
249
- /**
250
- * Metrics exporter interface
251
- */
252
- interface IMetricsExporter {
253
- /** Export metrics in Prometheus format */
254
- exportPrometheus(metrics: RedactionMetrics, prefix?: string): string;
255
- /** Export metrics in StatsD format */
256
- exportStatsD(metrics: RedactionMetrics, prefix?: string): string[];
257
- /** Get current metrics snapshot */
258
- getMetrics(): RedactionMetrics;
259
- /** Reset all metrics */
260
- reset(): void;
261
- }
262
- /**
263
- * Metrics collector interface
264
- */
265
- interface IMetricsCollector {
266
- /** Record a redaction operation */
267
- recordRedaction(result: DetectionResult, processingTimeMs: number, redactionMode: RedactionMode): void;
268
- /** Record an error */
269
- recordError(): void;
270
- /** Get metrics exporter */
271
- getExporter(): IMetricsExporter;
272
- }
273
- /**
274
- * RBAC Permission - granular access control
275
- */
276
- type Permission = 'pattern:read' | 'pattern:write' | 'pattern:delete' | 'detection:detect' | 'detection:redact' | 'detection:restore' | 'audit:read' | 'audit:export' | 'audit:delete' | 'metrics:read' | 'metrics:export' | 'metrics:reset' | 'config:read' | 'config:write';
277
- /**
278
- * RBAC Role - collection of permissions
279
- */
280
- interface Role {
281
- /** Role identifier */
282
- name: string;
283
- /** Role description */
284
- description?: string;
285
- /** Permissions granted to this role */
286
- permissions: Permission[];
287
- }
288
- /**
289
- * Predefined role names
290
- */
291
- type RoleName = 'admin' | 'analyst' | 'operator' | 'viewer' | 'custom';
292
- /**
293
- * RBAC manager interface for access control
294
- */
295
- interface IRBACManager {
296
- /** Check if user has specific permission */
297
- hasPermission(permission: Permission): boolean;
298
- /** Check if user has all specified permissions */
299
- hasAllPermissions(permissions: Permission[]): boolean;
300
- /** Check if user has any of the specified permissions */
301
- hasAnyPermission(permissions: Permission[]): boolean;
302
- /** Get current role */
303
- getRole(): Role;
304
- /** Set role */
305
- setRole(role: Role): void;
306
- /** Get all permissions for current role */
307
- getPermissions(): Permission[];
308
- /** Filter patterns based on read permissions */
309
- filterPatterns(patterns: PIIPattern[]): PIIPattern[];
310
- }
311
-
312
- /**
313
- * Document processing types
314
- */
315
-
316
- /**
317
- * Supported document formats
318
- */
319
- type DocumentFormat = 'pdf' | 'docx' | 'txt' | 'image' | 'json' | 'csv' | 'xlsx';
320
- /**
321
- * Supported image formats for OCR
322
- */
323
- type ImageFormat = 'png' | 'jpg' | 'jpeg' | 'tiff' | 'bmp' | 'webp';
324
- /**
325
- * OCR language codes (Tesseract format)
326
- */
327
- type OCRLanguage = 'eng' | 'spa' | 'fra' | 'deu' | 'por' | 'ita' | 'rus' | 'chi_sim' | 'chi_tra' | 'jpn' | 'kor';
328
- /**
329
- * OCR options
330
- */
331
- interface OCROptions {
332
- /** OCR language (default: 'eng' for English) */
333
- language?: OCRLanguage | OCRLanguage[];
334
- /** OCR engine mode (0-3, default: 3 for best accuracy) */
335
- oem?: 0 | 1 | 2 | 3;
336
- /** Page segmentation mode (0-13, default: 3 for automatic) */
337
- psm?: number;
338
- }
339
- /**
340
- * Document processing options
341
- */
342
- interface DocumentOptions {
343
- /** Document format (auto-detected if not specified) */
344
- format?: DocumentFormat;
345
- /** Extract text from specific pages (PDF only, 1-indexed) */
346
- pages?: number[];
347
- /** Password for encrypted PDFs */
348
- password?: string;
349
- /** Maximum document size in bytes (default: 50MB) */
350
- maxSize?: number;
351
- /** Enable OCR for image-based content (default: false) */
352
- enableOCR?: boolean;
353
- /** OCR configuration options */
354
- ocrOptions?: OCROptions;
355
- }
356
- /**
357
- * Document processing result
358
- */
359
- interface DocumentResult {
360
- /** Extracted text from document */
361
- text: string;
362
- /** Document metadata */
363
- metadata: DocumentMetadata;
364
- /** Detection result with PII findings */
365
- detection: DetectionResult;
366
- /** Original file size in bytes */
367
- fileSize: number;
368
- /** Text extraction time in milliseconds */
369
- extractionTime: number;
370
- }
371
- /**
372
- * Document metadata
373
- */
374
- interface DocumentMetadata {
375
- /** Document format */
376
- format: DocumentFormat;
377
- /** Number of pages (if applicable) */
378
- pages?: number;
379
- /** Document title */
380
- title?: string;
381
- /** Document author */
382
- author?: string;
383
- /** Creation date */
384
- creationDate?: Date;
385
- /** Last modified date */
386
- modifiedDate?: Date;
387
- /** OCR confidence (0-100) if OCR was used */
388
- ocrConfidence?: number;
389
- /** Whether OCR was used for extraction */
390
- usedOCR?: boolean;
391
- /** Additional custom metadata */
392
- custom?: Record<string, unknown>;
393
- }
394
- /**
395
- * OCR processor interface
396
- */
397
- interface IOCRProcessor {
398
- /** Extract text from image buffer using OCR */
399
- recognizeText(buffer: Buffer, options?: OCROptions): Promise<OCRResult>;
400
- /** Check if OCR is available (tesseract.js installed) */
401
- isAvailable(): boolean;
402
- }
403
- /**
404
- * OCR recognition result
405
- */
406
- interface OCRResult {
407
- /** Recognized text */
408
- text: string;
409
- /** Confidence score (0-100) */
410
- confidence: number;
411
- /** Processing time in milliseconds */
412
- processingTime: number;
413
- }
414
- /**
415
- * Document processor interface
416
- */
417
- interface IDocumentProcessor {
418
- /** Extract text from document buffer */
419
- extractText(buffer: Buffer, options?: DocumentOptions): Promise<string>;
420
- /** Get document metadata */
421
- getMetadata(buffer: Buffer, options?: DocumentOptions): Promise<DocumentMetadata>;
422
- /** Detect supported format from buffer */
423
- detectFormat(buffer: Buffer): DocumentFormat | null;
424
- /** Check if format is supported */
425
- isFormatSupported(format: DocumentFormat): boolean;
426
- }
427
-
428
- interface WhitelistEntry {
429
- pattern: string;
430
- confidence: number;
431
- occurrences: number;
432
- firstSeen: number;
433
- lastSeen: number;
434
- contexts: string[];
435
- }
436
- interface PatternAdjustment {
437
- type: string;
438
- issue: string;
439
- suggestion: string;
440
- confidence: number;
441
- examples: string[];
442
- occurrences: number;
443
- }
444
- interface LearningStats {
445
- totalDetections: number;
446
- falsePositives: number;
447
- falseNegatives: number;
448
- accuracy: number;
449
- lastUpdated: number;
450
- }
451
- interface LearningData {
452
- version: string;
453
- whitelist: WhitelistEntry[];
454
- patternAdjustments: PatternAdjustment[];
455
- stats: LearningStats;
456
- }
457
- declare class LocalLearningStore {
458
- private filePath;
459
- private data;
460
- private autoSave;
461
- private confidenceThreshold;
462
- constructor(filePath?: string, options?: {
463
- autoSave?: boolean;
464
- confidenceThreshold?: number;
465
- });
466
- /**
467
- * Load learning data from file
468
- */
469
- private load;
470
- /**
471
- * Save learning data to file
472
- */
473
- private save;
474
- /**
475
- * Record a false positive detection
476
- */
477
- recordFalsePositive(text: string, _type: string, context: string): void;
478
- /**
479
- * Record a false negative (missed detection)
480
- */
481
- recordFalseNegative(text: string, type: string, _context: string): void;
482
- /**
483
- * Record a correct detection
484
- */
485
- recordCorrectDetection(): void;
486
- /**
487
- * Update accuracy calculation
488
- */
489
- private updateAccuracy;
490
- /**
491
- * Get whitelist entries above confidence threshold
492
- */
493
- getWhitelist(): string[];
494
- /**
495
- * Get all whitelist entries with metadata
496
- */
497
- getWhitelistEntries(): WhitelistEntry[];
498
- /**
499
- * Get pattern adjustments above confidence threshold
500
- */
501
- getPatternAdjustments(): PatternAdjustment[];
502
- /**
503
- * Get all pattern adjustments
504
- */
505
- getAllPatternAdjustments(): PatternAdjustment[];
506
- /**
507
- * Get learning statistics
508
- */
509
- getStats(): LearningStats;
510
- /**
511
- * Get confidence score for a specific pattern
512
- */
513
- getConfidence(pattern: string): number;
514
- /**
515
- * Get occurrences count for a specific pattern
516
- */
517
- getOccurrences(pattern: string): number;
518
- /**
519
- * Manually add pattern to whitelist
520
- */
521
- addToWhitelist(pattern: string, confidence?: number): void;
522
- /**
523
- * Remove pattern from whitelist
524
- */
525
- removeFromWhitelist(pattern: string): void;
526
- /**
527
- * Clear all learning data
528
- */
529
- clear(): void;
530
- /**
531
- * Export learning data (for sharing)
532
- */
533
- export(options?: {
534
- includeContexts?: boolean;
535
- minConfidence?: number;
536
- }): LearningData;
537
- /**
538
- * Import learning data (merge with existing)
539
- */
540
- import(data: LearningData, merge?: boolean): void;
541
- /**
542
- * Manually save data
543
- */
544
- flush(): void;
545
- }
546
-
547
- /**
548
- * Contextual Rules Engine
549
- * Defines proximity-based rules and keyword patterns for confidence boosting
550
- */
551
-
552
- /**
553
- * Proximity rule for context-based confidence adjustment
554
- */
555
- interface ProximityRule {
556
- /** Pattern type this rule applies to (e.g., 'EMAIL', 'PHONE', 'SSN') */
557
- patternType: string | string[];
558
- /** Keywords to look for near the match */
559
- keywords: string[];
560
- /** Maximum word distance from match (default: 10) */
561
- proximityWindow?: number;
562
- /** Confidence boost if keyword found (0-1) */
563
- confidenceBoost?: number;
564
- /** Confidence penalty if keyword found (0-1) */
565
- confidencePenalty?: number;
566
- /** Whether match must come AFTER keyword (default: both directions) */
567
- keywordBefore?: boolean;
568
- /** Whether match must come BEFORE keyword (default: both directions) */
569
- keywordAfter?: boolean;
570
- /** Rule description */
571
- description?: string;
572
- }
573
- /**
574
- * Domain-specific vocabulary for context detection
575
- */
576
- interface DomainVocabulary {
577
- /** Domain name */
578
- domain: 'medical' | 'legal' | 'financial' | 'technical' | 'hr' | 'custom';
579
- /** Domain-specific terms */
580
- terms: string[];
581
- /** Pattern types to boost in this domain */
582
- boostPatterns?: string[];
583
- /** Confidence boost amount (default: 0.15) */
584
- boostAmount?: number;
585
- }
586
- /**
587
- * Contextual rules configuration
588
- */
589
- interface ContextRulesConfig {
590
- /** Proximity rules */
591
- proximityRules?: ProximityRule[];
592
- /** Domain vocabularies */
593
- domainVocabularies?: DomainVocabulary[];
594
- /** Enable default rules (default: true) */
595
- useDefaultRules?: boolean;
596
- }
597
- /**
598
- * Default proximity rules for common PII patterns
599
- */
600
- declare const DEFAULT_PROXIMITY_RULES: ProximityRule[];
601
- /**
602
- * Default domain vocabularies
603
- */
604
- declare const DEFAULT_DOMAIN_VOCABULARIES: DomainVocabulary[];
605
- /**
606
- * Contextual Rules Engine
607
- */
608
- declare class ContextRulesEngine {
609
- private proximityRules;
610
- private domainVocabularies;
611
- constructor(config?: ContextRulesConfig);
612
- /**
613
- * Apply proximity rules to adjust confidence
614
- */
615
- applyProximityRules(match: PIIMatch, text: string): PIIMatch;
616
- /**
617
- * Apply domain vocabulary boosting
618
- */
619
- applyDomainBoosting(matches: PIIMatch[], text: string): PIIMatch[];
620
- /**
621
- * Check if keywords are within proximity window
622
- */
623
- private checkProximity;
624
- /**
625
- * Detect which domains the text belongs to
626
- */
627
- private detectDomains;
628
- /**
629
- * Add custom proximity rule
630
- */
631
- addProximityRule(rule: ProximityRule): void;
632
- /**
633
- * Add custom domain vocabulary
634
- */
635
- addDomainVocabulary(vocabulary: DomainVocabulary): void;
636
- /**
637
- * Get all proximity rules
638
- */
639
- getProximityRules(): ProximityRule[];
640
- /**
641
- * Get all domain vocabularies
642
- */
643
- getDomainVocabularies(): DomainVocabulary[];
644
- }
645
- /**
646
- * Create a context rules engine instance
647
- */
648
- declare function createContextRulesEngine(config?: ContextRulesConfig): ContextRulesEngine;
649
-
650
- /**
651
- * Context Analysis for PII Detection
652
- * Provides NLP-lite features to reduce false positives
653
- */
654
- interface ContextAnalysis {
655
- /** 5 words before detection */
656
- beforeWords: string[];
657
- /** 5 words after detection */
658
- afterWords: string[];
659
- /** Full sentence containing detection */
660
- sentence: string;
661
- /** Inferred document type */
662
- documentType: 'email' | 'document' | 'code' | 'chat' | 'unknown';
663
- /** Confidence that this is actual PII (0-1) */
664
- confidence: number;
665
- }
666
- interface ContextFeatures {
667
- /** Contains technical terms */
668
- hasTechnicalContext: boolean;
669
- /** Contains business/corporate terms */
670
- hasBusinessContext: boolean;
671
- /** Contains medical/healthcare terms */
672
- hasMedicalContext: boolean;
673
- /** Contains financial terms */
674
- hasFinancialContext: boolean;
675
- /** Contains example/test indicators */
676
- hasExampleContext: boolean;
677
- /** Position in document (0-1, 0 = start, 1 = end) */
678
- relativePosition: number;
679
- }
680
- /**
681
- * Extract context around a detection
682
- */
683
- declare function extractContext(text: string, startPos: number, endPos: number, wordsBefore?: number, wordsAfter?: number): {
684
- before: string;
685
- after: string;
686
- beforeWords: string[];
687
- afterWords: string[];
688
- sentence: string;
689
- };
690
- /**
691
- * Infer document type from content
692
- */
693
- declare function inferDocumentType(text: string): ContextAnalysis['documentType'];
694
- /**
695
- * Extract context features for classification
696
- */
697
- declare function analyzeContextFeatures(fullContext: string): ContextFeatures;
698
- /**
699
- * Calculate confidence score for a detection based on context
700
- */
701
- declare function calculateContextConfidence(_value: string, patternType: string, context: {
702
- before: string;
703
- after: string;
704
- sentence: string;
705
- documentType: ContextAnalysis['documentType'];
706
- features: ContextFeatures;
707
- }): number;
708
- /**
709
- * Perform full context analysis
710
- */
711
- declare function analyzeFullContext(text: string, value: string, patternType: string, startPos: number, endPos: number): ContextAnalysis;
712
-
713
- /**
714
- * Explain API for debugging PII detections
715
- * Provides detailed insights into why text was or wasn't detected
716
- */
717
-
718
- /**
719
- * Pattern match result for explain
720
- */
721
- interface PatternMatchResult {
722
- /** Pattern that was tested */
723
- pattern: PIIPattern;
724
- /** Whether the pattern matched */
725
- matched: boolean;
726
- /** Matched value (if matched) */
727
- matchedValue?: string;
728
- /** Position of match (if matched) */
729
- position?: [number, number];
730
- /** Why it didn't match or was filtered */
731
- reason?: string;
732
- /** Validator result (if validator exists) */
733
- validatorPassed?: boolean;
734
- /** Context analysis (if enabled) */
735
- contextAnalysis?: ContextAnalysis;
736
- /** False positive check (if enabled) */
737
- falsePositiveCheck?: {
738
- isFalsePositive: boolean;
739
- confidence: number;
740
- reason?: string;
741
- };
742
- }
743
- /**
744
- * Explanation for a specific text
745
- */
746
- interface TextExplanation {
747
- /** Original text */
748
- text: string;
749
- /** All pattern match results */
750
- patternResults: PatternMatchResult[];
751
- /** Patterns that matched */
752
- matchedPatterns: PatternMatchResult[];
753
- /** Patterns that didn't match */
754
- unmatchedPatterns: PatternMatchResult[];
755
- /** Patterns that matched but were filtered */
756
- filteredPatterns: PatternMatchResult[];
757
- /** Final detections */
758
- detections: PIIDetection[];
759
- /** Summary statistics */
760
- summary: {
761
- totalPatternsChecked: number;
762
- patternsMatched: number;
763
- patternsFiltered: number;
764
- finalDetections: number;
765
- };
766
- }
767
- /**
768
- * Explain API for debugging
769
- */
770
- declare class ExplainAPI {
771
- private detector;
772
- private patterns;
773
- private options;
774
- constructor(detector: OpenRedaction);
775
- /**
776
- * Explain why text was or wasn't detected as PII
777
- */
778
- explain(text: string): TextExplanation;
779
- /**
780
- * Explain a specific detection
781
- */
782
- explainDetection(detection: PIIDetection, text: string): {
783
- detection: PIIDetection;
784
- pattern?: PIIPattern;
785
- contextAnalysis?: ContextAnalysis;
786
- reasoning: string[];
787
- };
788
- /**
789
- * Suggest why text wasn't detected
790
- */
791
- suggestWhy(text: string, expectedType: string): {
792
- text: string;
793
- expectedType: string;
794
- suggestions: string[];
795
- similarPatterns: PIIPattern[];
796
- };
797
- /**
798
- * Get debugging information for entire detection process
799
- */
800
- debug(text: string): {
801
- text: string;
802
- textLength: number;
803
- enabledFeatures: string[];
804
- patternCount: number;
805
- explanation: TextExplanation;
806
- performance: {
807
- estimatedTime: string;
808
- };
809
- };
810
- }
811
- /**
812
- * Helper to create explain API from detector
813
- */
814
- declare function createExplainAPI(detector: OpenRedaction): ExplainAPI;
815
-
816
- /**
817
- * Report generation for PII detection results
818
- * Generates static HTML and Markdown reports - 100% offline, zero dependencies
819
- */
820
-
821
- /**
822
- * Report format options
823
- */
824
- type ReportFormat = 'html' | 'markdown';
825
- /**
826
- * Report type options
827
- */
828
- type ReportType = 'summary' | 'detailed' | 'compliance';
829
- /**
830
- * Report generation options
831
- */
832
- interface ReportOptions {
833
- /** Report format */
834
- format: ReportFormat;
835
- /** Report type */
836
- type?: ReportType;
837
- /** Report title */
838
- title?: string;
839
- /** Include original text (default: false for privacy) */
840
- includeOriginalText?: boolean;
841
- /** Include redacted text (default: true) */
842
- includeRedactedText?: boolean;
843
- /** Include detection details (default: true) */
844
- includeDetectionDetails?: boolean;
845
- /** Include statistics (default: true) */
846
- includeStatistics?: boolean;
847
- /** Include explanation (requires ExplainAPI, default: false) */
848
- includeExplanation?: boolean;
849
- /** Company/project name for compliance reports */
850
- organizationName?: string;
851
- /** Additional metadata */
852
- metadata?: Record<string, string>;
853
- }
854
- /**
855
- * Report generator for PII detection results
856
- */
857
- declare class ReportGenerator {
858
- constructor(_detector: OpenRedaction);
859
- /**
860
- * Generate a report from detection results
861
- */
862
- generate(result: DetectionResult, options: ReportOptions): string;
863
- /**
864
- * Generate HTML report
865
- */
866
- private generateHTML;
867
- /**
868
- * Generate Markdown report
869
- */
870
- private generateMarkdown;
871
- /**
872
- * Calculate statistics from detection results
873
- */
874
- private calculateStatistics;
875
- /**
876
- * Escape HTML special characters
877
- */
878
- private escapeHtml;
879
- }
880
- /**
881
- * Helper to create report generator
882
- */
883
- declare function createReportGenerator(detector: OpenRedaction): ReportGenerator;
884
-
885
- interface PatternStats {
886
- type: string;
887
- totalDetections: number;
888
- falsePositives: number;
889
- falseNegatives: number;
890
- accuracy: number;
891
- priority: number;
892
- adjustedPriority: number;
893
- }
894
- interface OptimizerOptions {
895
- learningWeight: number;
896
- minSampleSize: number;
897
- maxPriorityAdjustment: number;
898
- }
899
- /**
900
- * Priority Optimizer - Dynamically adjusts pattern priorities based on learning data
901
- */
902
- declare class PriorityOptimizer {
903
- private learningStore;
904
- private options;
905
- constructor(learningStore: LocalLearningStore, options?: Partial<OptimizerOptions>);
906
- /**
907
- * Optimize pattern priorities based on learning data
908
- */
909
- optimizePatterns(patterns: PIIPattern[]): PIIPattern[];
910
- /**
911
- * Get pattern statistics with learning data
912
- */
913
- getPatternStats(patterns: PIIPattern[]): PatternStats[];
914
- /**
915
- * Infer pattern type from a whitelisted value
916
- * This is a heuristic - in production we'd track this explicitly
917
- */
918
- private inferPatternType;
919
- /**
920
- * Reset all priority adjustments
921
- */
922
- resetPriorities(patterns: PIIPattern[]): PIIPattern[];
923
- /**
924
- * Get optimizer configuration
925
- */
926
- getOptions(): OptimizerOptions;
927
- /**
928
- * Update optimizer configuration
929
- */
930
- setOptions(options: Partial<OptimizerOptions>): void;
931
- }
932
- /**
933
- * Create a priority optimizer instance
934
- */
935
- declare function createPriorityOptimizer(learningStore: LocalLearningStore, options?: Partial<OptimizerOptions>): PriorityOptimizer;
936
-
937
- declare class OpenRedaction {
938
- private patterns;
939
- private compiledPatterns;
940
- private options;
941
- private multiPassConfig?;
942
- private resultCache?;
943
- private valueToPlaceholder;
944
- private placeholderCounter;
945
- private learningStore?;
946
- private priorityOptimizer?;
947
- private enableLearning;
948
- private auditLogger?;
949
- private auditUser?;
950
- private auditSessionId?;
951
- private auditMetadata?;
952
- private metricsCollector?;
953
- private rbacManager?;
954
- private nerDetector?;
955
- private contextRulesEngine?;
956
- private severityClassifier;
957
- constructor(options?: OpenRedactionOptions & {
958
- configPath?: string;
959
- enableLearning?: boolean;
960
- learningStorePath?: string;
961
- enablePriorityOptimization?: boolean;
962
- optimizerOptions?: Partial<OptimizerOptions>;
963
- enableNER?: boolean;
964
- enableContextRules?: boolean;
965
- contextRulesConfig?: ContextRulesConfig;
966
- maxInputSize?: number;
967
- regexTimeout?: number;
968
- });
969
- /**
970
- * Create OpenRedaction instance from config file
971
- */
972
- static fromConfig(configPath?: string): Promise<OpenRedaction>;
973
- /**
974
- * Build the list of patterns based on options
975
- * Supports three filtering modes (in order of priority):
976
- * 1. Specific pattern types (patterns option)
977
- * 2. Pattern categories (categories option) - NEW!
978
- * 3. All patterns with type-specific filters (includeNames, etc.)
979
- */
980
- private buildPatternList;
981
- /**
982
- * Validate all patterns to prevent malicious regex injection
983
- * ONLY validates custom patterns - built-in patterns are already vetted
984
- * Timeout protection in safeExec() is the primary defense against ReDoS
985
- */
986
- private validatePatterns;
987
- /**
988
- * Pre-compile all regex patterns for performance
989
- * Avoids creating new RegExp objects on every detect() call
990
- */
991
- private precompilePatterns;
992
- /**
993
- * Process patterns and detect PII
994
- * Used by both single-pass and multi-pass detection
995
- */
996
- private processPatterns;
997
- /**
998
- * Detect PII in text
999
- */
1000
- detect(text: string): DetectionResult;
1001
- /**
1002
- * Restore redacted text using redaction map
1003
- */
1004
- restore(redactedText: string, redactionMap: Record<string, string>): string;
1005
- /**
1006
- * Generate placeholder for a detected value
1007
- */
1008
- private generatePlaceholder;
1009
- /**
1010
- * Check if a range overlaps with existing detections
1011
- */
1012
- private overlapsWithExisting;
1013
- /**
1014
- * Escape special regex characters
1015
- */
1016
- private escapeRegex;
1017
- /**
1018
- * Get the list of active patterns
1019
- */
1020
- getPatterns(): PIIPattern[];
1021
- /**
1022
- * Get severity-based scan results
1023
- */
1024
- scan(text: string): {
1025
- high: PIIDetection[];
1026
- medium: PIIDetection[];
1027
- low: PIIDetection[];
1028
- total: number;
1029
- };
1030
- /**
1031
- * Record a false positive (incorrectly detected as PII)
1032
- */
1033
- recordFalsePositive(detection: PIIDetection, context?: string): void;
1034
- /**
1035
- * Record a false negative (missed PII that should have been detected)
1036
- */
1037
- recordFalseNegative(text: string, expectedType: string, context?: string): void;
1038
- /**
1039
- * Record a correct detection (for accuracy tracking)
1040
- */
1041
- recordCorrectDetection(): void;
1042
- /**
1043
- * Get learning statistics
1044
- */
1045
- getLearningStats(): LearningStats | null;
1046
- /**
1047
- * Get learned whitelist entries
1048
- */
1049
- getLearnedWhitelist(): WhitelistEntry[];
1050
- /**
1051
- * Get pattern adjustment suggestions
1052
- */
1053
- getPatternAdjustments(): PatternAdjustment[];
1054
- /**
1055
- * Export learned patterns for sharing
1056
- */
1057
- exportLearnings(options?: {
1058
- includeContexts?: boolean;
1059
- minConfidence?: number;
1060
- }): LearningData | null;
1061
- /**
1062
- * Import learned patterns from another source
1063
- */
1064
- importLearnings(data: any, merge?: boolean): void;
1065
- /**
1066
- * Manually add a term to the whitelist
1067
- */
1068
- addToWhitelist(pattern: string, confidence?: number): void;
1069
- /**
1070
- * Remove a term from the whitelist
1071
- */
1072
- removeFromWhitelist(pattern: string): void;
1073
- /**
1074
- * Get the learning store instance
1075
- */
1076
- getLearningStore(): LocalLearningStore | undefined;
1077
- /**
1078
- * Get the priority optimizer instance
1079
- */
1080
- getPriorityOptimizer(): PriorityOptimizer | undefined;
1081
- /**
1082
- * Optimize pattern priorities based on learning data
1083
- * Call this to re-optimize priorities after accumulating new learning data
1084
- */
1085
- optimizePriorities(): void;
1086
- /**
1087
- * Get pattern statistics with learning data
1088
- */
1089
- getPatternStats(): PatternStats[] | null;
1090
- /**
1091
- * Clear the result cache (if caching is enabled)
1092
- */
1093
- clearCache(): void;
1094
- /**
1095
- * Get cache statistics
1096
- */
1097
- getCacheStats(): {
1098
- size: number;
1099
- maxSize: number;
1100
- enabled: boolean;
1101
- };
1102
- /**
1103
- * Get the audit logger instance (if audit logging is enabled)
1104
- */
1105
- getAuditLogger(): IAuditLogger | undefined;
1106
- /**
1107
- * Get the metrics collector instance (if metrics collection is enabled)
1108
- */
1109
- getMetricsCollector(): IMetricsCollector | undefined;
1110
- /**
1111
- * Get the RBAC manager instance (if RBAC is enabled)
1112
- */
1113
- getRBACManager(): IRBACManager | undefined;
1114
- /**
1115
- * Create an explain API for debugging detections
1116
- */
1117
- explain(): ExplainAPI;
1118
- /**
1119
- * Generate a report from detection results
1120
- */
1121
- generateReport(result: DetectionResult, options: ReportOptions): string;
1122
- /**
1123
- * Export current configuration
1124
- */
1125
- exportConfig(metadata?: {
1126
- description?: string;
1127
- author?: string;
1128
- tags?: string[];
1129
- }): string;
1130
- /**
1131
- * Run health check
1132
- */
1133
- healthCheck(options?: {
1134
- testDetection?: boolean;
1135
- checkPerformance?: boolean;
1136
- performanceThreshold?: number;
1137
- memoryThreshold?: number;
1138
- }): Promise<any>;
1139
- /**
1140
- * Quick health check (minimal overhead)
1141
- */
1142
- quickHealthCheck(): Promise<{
1143
- status: 'healthy' | 'unhealthy';
1144
- message: string;
1145
- }>;
1146
- /**
1147
- * Detect PII in a document (PDF, DOCX, TXT)
1148
- * Requires optional peer dependencies:
1149
- * - pdf-parse for PDF support
1150
- * - mammoth for DOCX support
1151
- */
1152
- detectDocument(buffer: Buffer, options?: DocumentOptions): Promise<DocumentResult>;
1153
- /**
1154
- * Detect PII in a document file from filesystem
1155
- * Convenience method that reads file and calls detectDocument
1156
- */
1157
- detectDocumentFile(filePath: string, options?: DocumentOptions): Promise<DocumentResult>;
1158
- /**
1159
- * Batch detect PII in multiple texts using worker threads (parallel)
1160
- * Significantly faster for processing many texts
1161
- */
1162
- static detectBatch(texts: string[], options?: OpenRedactionOptions & {
1163
- numWorkers?: number;
1164
- }): Promise<DetectionResult[]>;
1165
- /**
1166
- * Batch process multiple documents using worker threads (parallel)
1167
- * Efficient for processing many documents at once
1168
- */
1169
- static detectDocumentsBatch(buffers: Buffer[], options?: DocumentOptions & {
1170
- numWorkers?: number;
1171
- }): Promise<DocumentResult[]>;
1172
- }
1173
-
1174
- /**
1175
- * Audit logging implementation for tracking redaction operations
1176
- */
1177
-
1178
- /**
1179
- * In-memory audit logger implementation
1180
- * Stores audit logs in memory with support for filtering, export, and statistics
1181
- */
1182
- declare class InMemoryAuditLogger implements IAuditLogger {
1183
- private logs;
1184
- private maxLogs;
1185
- constructor(maxLogs?: number);
1186
- /**
1187
- * Log an audit entry
1188
- */
1189
- log(entry: Omit<AuditLogEntry, 'id' | 'timestamp'>): void;
1190
- /**
1191
- * Get all audit logs
1192
- */
1193
- getLogs(): AuditLogEntry[];
1194
- /**
1195
- * Get audit logs filtered by operation type
1196
- */
1197
- getLogsByOperation(operation: AuditLogEntry['operation']): AuditLogEntry[];
1198
- /**
1199
- * Get audit logs filtered by date range
1200
- */
1201
- getLogsByDateRange(startDate: Date, endDate: Date): AuditLogEntry[];
1202
- /**
1203
- * Export audit logs as JSON
1204
- */
1205
- exportAsJson(): string;
1206
- /**
1207
- * Export audit logs as CSV
1208
- */
1209
- exportAsCsv(): string;
1210
- /**
1211
- * Clear all audit logs
1212
- */
1213
- clear(): void;
1214
- /**
1215
- * Get audit statistics
1216
- */
1217
- getStats(): AuditStats;
1218
- /**
1219
- * Generate a unique ID for audit entries
1220
- */
1221
- private generateId;
1222
- /**
1223
- * Escape CSV values
1224
- */
1225
- private escapeCsv;
1226
- }
1227
- /**
1228
- * Console audit logger implementation
1229
- * Outputs audit logs to console (useful for debugging)
1230
- */
1231
- declare class ConsoleAuditLogger implements IAuditLogger {
1232
- private delegate;
1233
- constructor(maxLogs?: number);
1234
- log(entry: Omit<AuditLogEntry, 'id' | 'timestamp'>): void;
1235
- getLogs(): AuditLogEntry[];
1236
- getLogsByOperation(operation: AuditLogEntry['operation']): AuditLogEntry[];
1237
- getLogsByDateRange(startDate: Date, endDate: Date): AuditLogEntry[];
1238
- exportAsJson(): string;
1239
- exportAsCsv(): string;
1240
- clear(): void;
1241
- getStats(): AuditStats;
1242
- }
1243
-
1244
- /**
1245
- * Persistent Audit Logger with multiple backend support
1246
- * Provides tamper-proof, cryptographic audit logging for production environments
1247
- */
1248
-
1249
- /**
1250
- * Supported database backends
1251
- */
1252
- type AuditBackend = 'sqlite' | 'postgresql' | 'mongodb' | 's3' | 'file';
1253
- /**
1254
- * Database connection configuration
1255
- */
1256
- interface AuditDatabaseConfig {
1257
- /** Backend type */
1258
- backend: AuditBackend;
1259
- /** Connection string (for PostgreSQL/MongoDB) */
1260
- connectionString?: string;
1261
- /** Database file path (for SQLite/file backend) */
1262
- filePath?: string;
1263
- /** S3 bucket configuration */
1264
- s3Config?: {
1265
- bucket: string;
1266
- region: string;
1267
- accessKeyId?: string;
1268
- secretAccessKey?: string;
1269
- prefix?: string;
1270
- };
1271
- /** Table/collection name (default: 'audit_logs') */
1272
- tableName?: string;
1273
- /** Enable compression (default: false) */
1274
- enableCompression?: boolean;
1275
- /** Batch size for bulk inserts (default: 100) */
1276
- batchSize?: number;
1277
- }
1278
- /**
1279
- * Retention policy configuration
1280
- */
1281
- interface RetentionPolicy {
1282
- /** Maximum age of logs in days (default: 90) */
1283
- maxAgeDays?: number;
1284
- /** Maximum number of logs to keep (default: unlimited) */
1285
- maxLogs?: number;
1286
- /** Enable automatic cleanup (default: false) */
1287
- autoCleanup?: boolean;
1288
- /** Cleanup interval in hours (default: 24) */
1289
- cleanupIntervalHours?: number;
1290
- }
1291
- /**
1292
- * Persistent audit logger options
1293
- */
1294
- interface PersistentAuditLoggerOptions {
1295
- /** Database configuration */
1296
- database: AuditDatabaseConfig;
1297
- /** Retention policy */
1298
- retention?: RetentionPolicy;
1299
- /** Enable cryptographic hashing for tamper detection (default: true) */
1300
- enableHashing?: boolean;
1301
- /** Hash algorithm (default: 'sha256') */
1302
- hashAlgorithm?: 'sha256' | 'sha512';
1303
- /** Enable write-ahead logging for crash recovery (default: true) */
1304
- enableWAL?: boolean;
1305
- /** Secret key for HMAC hashing (optional, recommended for production) */
1306
- secretKey?: string;
1307
- }
1308
- /**
1309
- * Audit log entry with cryptographic hash
1310
- */
1311
- interface HashedAuditLogEntry extends AuditLogEntry {
1312
- /** Cryptographic hash of this entry */
1313
- hash: string;
1314
- /** Hash of previous entry for chain verification */
1315
- previousHash?: string;
1316
- /** Sequence number in the log chain */
1317
- sequence: number;
1318
- }
1319
- /**
1320
- * Audit database adapter interface
1321
- */
1322
- interface IAuditDatabaseAdapter {
1323
- /** Initialize the database/table/collection */
1324
- initialize(): Promise<void>;
1325
- /** Insert a single log entry */
1326
- insert(entry: HashedAuditLogEntry): Promise<void>;
1327
- /** Batch insert multiple entries */
1328
- batchInsert(entries: HashedAuditLogEntry[]): Promise<void>;
1329
- /** Query logs with filters */
1330
- query(filter: AuditQueryFilter): Promise<HashedAuditLogEntry[]>;
1331
- /** Get total count of logs */
1332
- count(filter?: Partial<AuditQueryFilter>): Promise<number>;
1333
- /** Delete logs older than date */
1334
- deleteOlderThan(date: Date): Promise<number>;
1335
- /** Get the last log entry */
1336
- getLastEntry(): Promise<HashedAuditLogEntry | null>;
1337
- /** Verify log chain integrity */
1338
- verifyChain(startSequence?: number, endSequence?: number): Promise<{
1339
- valid: boolean;
1340
- brokenAt?: number;
1341
- }>;
1342
- /** Close connection */
1343
- close(): Promise<void>;
1344
- }
1345
- /**
1346
- * Audit query filter
1347
- */
1348
- interface AuditQueryFilter {
1349
- /** Filter by operation type */
1350
- operation?: AuditLogEntry['operation'];
1351
- /** Filter by user */
1352
- user?: string;
1353
- /** Filter by session ID */
1354
- sessionId?: string;
1355
- /** Filter by date range (start) */
1356
- startDate?: Date;
1357
- /** Filter by date range (end) */
1358
- endDate?: Date;
1359
- /** Filter by success status */
1360
- success?: boolean;
1361
- /** Limit results */
1362
- limit?: number;
1363
- /** Offset for pagination */
1364
- offset?: number;
1365
- /** Sort order */
1366
- sort?: 'asc' | 'desc';
1367
- }
1368
- /**
1369
- * Persistent Audit Logger with cryptographic chain verification
1370
- */
1371
- declare class PersistentAuditLogger implements IAuditLogger {
1372
- private adapter;
1373
- private options;
1374
- private batchBuffer;
1375
- private lastHash;
1376
- private sequence;
1377
- private cleanupTimer?;
1378
- private initialized;
1379
- constructor(options: PersistentAuditLoggerOptions);
1380
- /**
1381
- * Initialize the logger (must be called before use)
1382
- */
1383
- initialize(): Promise<void>;
1384
- /**
1385
- * Log an audit entry
1386
- */
1387
- log(entry: Omit<AuditLogEntry, 'id' | 'timestamp'>): void;
1388
- /**
1389
- * Get all audit logs
1390
- */
1391
- getLogs(): AuditLogEntry[];
1392
- /**
1393
- * Query logs with filters (async)
1394
- */
1395
- queryLogs(filter?: AuditQueryFilter): Promise<HashedAuditLogEntry[]>;
1396
- /**
1397
- * Get logs by operation type
1398
- */
1399
- getLogsByOperation(_operation: AuditLogEntry['operation']): AuditLogEntry[];
1400
- /**
1401
- * Get logs by date range
1402
- */
1403
- getLogsByDateRange(_startDate: Date, _endDate: Date): AuditLogEntry[];
1404
- /**
1405
- * Export logs as JSON
1406
- */
1407
- exportAsJson(): string;
1408
- /**
1409
- * Export logs as JSON (async)
1410
- */
1411
- exportAsJsonAsync(filter?: AuditQueryFilter): Promise<string>;
1412
- /**
1413
- * Export logs as CSV
1414
- */
1415
- exportAsCsv(): string;
1416
- /**
1417
- * Export logs as CSV (async)
1418
- */
1419
- exportAsCsvAsync(filter?: AuditQueryFilter): Promise<string>;
1420
- /**
1421
- * Clear all audit logs (dangerous!)
1422
- */
1423
- clear(): void;
1424
- /**
1425
- * Delete logs older than specified date
1426
- */
1427
- deleteOlderThan(date: Date): Promise<number>;
1428
- /**
1429
- * Get audit statistics
1430
- */
1431
- getStats(): AuditStats;
1432
- /**
1433
- * Get audit statistics (async)
1434
- */
1435
- getStatsAsync(filter?: AuditQueryFilter): Promise<AuditStats>;
1436
- /**
1437
- * Verify log chain integrity
1438
- */
1439
- verifyChainIntegrity(startSequence?: number, endSequence?: number): Promise<{
1440
- valid: boolean;
1441
- brokenAt?: number;
1442
- message: string;
1443
- }>;
1444
- /**
1445
- * Flush batch buffer to database
1446
- */
1447
- flushBatch(): Promise<void>;
1448
- /**
1449
- * Close the logger and flush any pending logs
1450
- */
1451
- close(): Promise<void>;
1452
- /**
1453
- * Create hashed entry with chain linking
1454
- */
1455
- private createHashedEntry;
1456
- /**
1457
- * Calculate cryptographic hash of entry
1458
- */
1459
- private calculateHash;
1460
- /**
1461
- * Generate unique ID
1462
- */
1463
- private generateId;
1464
- /**
1465
- * Create database adapter based on backend
1466
- */
1467
- private createAdapter;
1468
- /**
1469
- * Start automatic cleanup schedule
1470
- */
1471
- private startCleanupSchedule;
1472
- /**
1473
- * Run cleanup based on retention policy
1474
- */
1475
- private runCleanup;
1476
- }
1477
- /**
1478
- * Create a persistent audit logger
1479
- */
1480
- declare function createPersistentAuditLogger(options: PersistentAuditLoggerOptions): PersistentAuditLogger;
1481
-
1482
- /**
1483
- * Metrics collection and export for monitoring redaction operations
1484
- */
1485
-
1486
- /**
1487
- * In-memory metrics collector and exporter
1488
- * Collects metrics and provides Prometheus and StatsD export formats
1489
- */
1490
- declare class InMemoryMetricsCollector implements IMetricsCollector, IMetricsExporter {
1491
- private metrics;
1492
- constructor();
1493
- /**
1494
- * Create empty metrics object
1495
- */
1496
- private createEmptyMetrics;
1497
- /**
1498
- * Record a redaction operation
1499
- */
1500
- recordRedaction(result: DetectionResult, processingTimeMs: number, redactionMode: RedactionMode): void;
1501
- /**
1502
- * Record an error
1503
- */
1504
- recordError(): void;
1505
- /**
1506
- * Get metrics exporter
1507
- */
1508
- getExporter(): IMetricsExporter;
1509
- /**
1510
- * Get current metrics snapshot
1511
- */
1512
- getMetrics(): RedactionMetrics;
1513
- /**
1514
- * Reset all metrics
1515
- */
1516
- reset(): void;
1517
- /**
1518
- * Export metrics in Prometheus format
1519
- */
1520
- exportPrometheus(metrics?: RedactionMetrics, prefix?: string): string;
1521
- /**
1522
- * Export metrics in StatsD format
1523
- */
1524
- exportStatsD(metrics?: RedactionMetrics, prefix?: string): string[];
1525
- }
1526
-
1527
- /**
1528
- * Prometheus metrics HTTP server for monitoring
1529
- * Exposes /metrics endpoint for Prometheus scraping
1530
- */
1531
-
1532
- /**
1533
- * Prometheus server options
1534
- */
1535
- interface PrometheusServerOptions {
1536
- /** Port to listen on (default: 9090) */
1537
- port?: number;
1538
- /** Host to bind to (default: '0.0.0.0') */
1539
- host?: string;
1540
- /** Metrics path (default: '/metrics') */
1541
- metricsPath?: string;
1542
- /** Metrics prefix (default: 'openredaction') */
1543
- prefix?: string;
1544
- /** Health check path (default: '/health') */
1545
- healthPath?: string;
1546
- /** Enable CORS (default: false) */
1547
- enableCors?: boolean;
1548
- /** Basic auth username (optional) */
1549
- username?: string;
1550
- /** Basic auth password (optional) */
1551
- password?: string;
1552
- }
1553
- /**
1554
- * Prometheus metrics HTTP server
1555
- * Provides a lightweight HTTP server for exposing metrics to Prometheus
1556
- */
1557
- declare class PrometheusServer {
1558
- private server?;
1559
- private metricsCollector;
1560
- private options;
1561
- private isRunning;
1562
- private requestCount;
1563
- private lastScrapeTime?;
1564
- constructor(metricsCollector: IMetricsCollector, options?: PrometheusServerOptions);
1565
- /**
1566
- * Start the Prometheus metrics server
1567
- */
1568
- start(): Promise<void>;
1569
- /**
1570
- * Stop the server
1571
- */
1572
- stop(): Promise<void>;
1573
- /**
1574
- * Handle incoming HTTP requests
1575
- */
1576
- private handleRequest;
1577
- /**
1578
- * Handle /metrics endpoint
1579
- */
1580
- private handleMetrics;
1581
- /**
1582
- * Handle /health endpoint
1583
- */
1584
- private handleHealth;
1585
- /**
1586
- * Handle / root endpoint
1587
- */
1588
- private handleRoot;
1589
- /**
1590
- * Validate basic authentication
1591
- */
1592
- private validateAuth;
1593
- /**
1594
- * Get server-specific metrics in Prometheus format
1595
- */
1596
- private getServerMetrics;
1597
- /**
1598
- * Get server statistics
1599
- */
1600
- getStats(): {
1601
- isRunning: boolean;
1602
- requestCount: number;
1603
- lastScrapeTime?: Date;
1604
- uptime: number;
1605
- host: string;
1606
- port: number;
1607
- metricsPath: string;
1608
- };
1609
- }
1610
- /**
1611
- * Create a Prometheus server instance
1612
- */
1613
- declare function createPrometheusServer(metricsCollector: IMetricsCollector, options?: PrometheusServerOptions): PrometheusServer;
1614
- /**
1615
- * Example Grafana dashboard JSON for OpenRedaction metrics
1616
- * Can be imported directly into Grafana
1617
- */
1618
- declare const GRAFANA_DASHBOARD_TEMPLATE: {
1619
- dashboard: {
1620
- title: string;
1621
- tags: string[];
1622
- timezone: string;
1623
- panels: {
1624
- id: number;
1625
- title: string;
1626
- type: string;
1627
- targets: {
1628
- expr: string;
1629
- legendFormat: string;
1630
- }[];
1631
- }[];
1632
- };
1633
- };
1634
-
1635
- /**
1636
- * RBAC Manager for role-based access control
1637
- */
1638
-
1639
- /**
1640
- * Default RBAC Manager implementation
1641
- * Provides role-based permission checking and pattern filtering
1642
- */
1643
- declare class RBACManager implements IRBACManager {
1644
- private role;
1645
- constructor(role?: Role);
1646
- /**
1647
- * Check if current role has a specific permission
1648
- */
1649
- hasPermission(permission: Permission): boolean;
1650
- /**
1651
- * Check if current role has all specified permissions
1652
- */
1653
- hasAllPermissions(permissions: Permission[]): boolean;
1654
- /**
1655
- * Check if current role has any of the specified permissions
1656
- */
1657
- hasAnyPermission(permissions: Permission[]): boolean;
1658
- /**
1659
- * Get current role
1660
- */
1661
- getRole(): Role;
1662
- /**
1663
- * Set role (updates permissions)
1664
- */
1665
- setRole(role: Role): void;
1666
- /**
1667
- * Get all permissions for current role
1668
- */
1669
- getPermissions(): Permission[];
1670
- /**
1671
- * Filter patterns based on read permissions
1672
- * Returns empty array if user lacks pattern:read permission
1673
- */
1674
- filterPatterns(patterns: PIIPattern[]): PIIPattern[];
1675
- }
1676
- /**
1677
- * Create RBAC manager with predefined or custom role
1678
- */
1679
- declare function createRBACManager(role: Role): RBACManager;
1680
-
1681
- /**
1682
- * Predefined RBAC roles with permission sets
1683
- */
1684
-
1685
- /**
1686
- * All available permissions
1687
- */
1688
- declare const ALL_PERMISSIONS: Permission[];
1689
- /**
1690
- * Admin role - full access to all operations
1691
- */
1692
- declare const ADMIN_ROLE: Role;
1693
- /**
1694
- * Analyst role - can perform analysis and read audit/metrics
1695
- */
1696
- declare const ANALYST_ROLE: Role;
1697
- /**
1698
- * Operator role - can perform detections and basic operations
1699
- */
1700
- declare const OPERATOR_ROLE: Role;
1701
- /**
1702
- * Viewer role - read-only access to patterns, audit logs, and metrics
1703
- */
1704
- declare const VIEWER_ROLE: Role;
1705
- /**
1706
- * Get predefined role by name
1707
- */
1708
- declare function getPredefinedRole(roleName: string): Role | undefined;
1709
- /**
1710
- * Create a custom role with specific permissions
1711
- */
1712
- declare function createCustomRole(name: string, permissions: Permission[], description?: string): Role;
1713
-
1714
- /**
1715
- * OCR (Optical Character Recognition) processor using Tesseract.js
1716
- */
1717
-
1718
- /**
1719
- * OCR processor with optional Tesseract.js support
1720
- * Requires peer dependency: tesseract.js
1721
- */
1722
- declare class OCRProcessor implements IOCRProcessor {
1723
- private tesseract?;
1724
- private scheduler?;
1725
- constructor();
1726
- /**
1727
- * Extract text from image buffer using OCR
1728
- */
1729
- recognizeText(buffer: Buffer, options?: OCROptions): Promise<OCRResult>;
1730
- /**
1731
- * Check if OCR is available (tesseract.js installed)
1732
- */
1733
- isAvailable(): boolean;
1734
- /**
1735
- * Create a scheduler for batch OCR processing
1736
- * More efficient for processing multiple images
1737
- */
1738
- createScheduler(workerCount?: number): Promise<any>;
1739
- /**
1740
- * Batch process multiple images
1741
- */
1742
- recognizeBatch(buffers: Buffer[], _options?: OCROptions): Promise<OCRResult[]>;
1743
- /**
1744
- * Terminate any running scheduler
1745
- */
1746
- cleanup(): Promise<void>;
1747
- }
1748
- /**
1749
- * Create an OCR processor instance
1750
- */
1751
- declare function createOCRProcessor(): OCRProcessor;
1752
-
1753
- /**
1754
- * JSON document processor for PII detection and redaction in structured data
1755
- */
1756
-
1757
- /**
1758
- * JSON processing options
1759
- */
1760
- interface JsonProcessorOptions {
1761
- /** Maximum depth for nested object traversal (default: 100) */
1762
- maxDepth?: number;
1763
- /** Whether to scan object keys for PII (default: false) */
1764
- scanKeys?: boolean;
1765
- /** Field paths to always redact (e.g., ['user.password', 'auth.token']) */
1766
- alwaysRedact?: string[];
1767
- /** Field paths to never scan (e.g., ['metadata.id', 'timestamp']) */
1768
- skipPaths?: string[];
1769
- /** Field names that indicate PII (boost confidence) */
1770
- piiIndicatorKeys?: string[];
1771
- /** Preserve JSON structure in redacted output (default: true) */
1772
- preserveStructure?: boolean;
1773
- }
1774
- /**
1775
- * JSON detection result with path tracking
1776
- */
1777
- interface JsonDetectionResult extends DetectionResult {
1778
- /** Paths where PII was detected (e.g., 'user.email', 'contacts[0].phone') */
1779
- pathsDetected: string[];
1780
- /** PII matches with path information */
1781
- matchesByPath: Record<string, PIIDetection[]>;
1782
- }
1783
- /**
1784
- * Processor for JSON documents
1785
- */
1786
- declare class JsonProcessor {
1787
- private readonly defaultOptions;
1788
- /**
1789
- * Parse JSON from buffer or string
1790
- */
1791
- parse(input: Buffer | string): any;
1792
- /**
1793
- * Detect PII in JSON data
1794
- */
1795
- detect(data: any, detector: OpenRedaction, options?: JsonProcessorOptions): JsonDetectionResult;
1796
- /**
1797
- * Redact PII in JSON data
1798
- */
1799
- redact(data: any, detectionResult: JsonDetectionResult, options?: JsonProcessorOptions): any;
1800
- /**
1801
- * Redact specific paths in JSON while preserving structure
1802
- */
1803
- private redactPreservingStructure;
1804
- /**
1805
- * Simple text-based redaction (fallback)
1806
- */
1807
- private redactText;
1808
- /**
1809
- * Traverse JSON structure and call callback for each value
1810
- */
1811
- private traverse;
1812
- /**
1813
- * Check if value is primitive (string, number, boolean)
1814
- */
1815
- private isPrimitive;
1816
- /**
1817
- * Check if path should be skipped
1818
- */
1819
- private shouldSkip;
1820
- /**
1821
- * Check if path should always be redacted
1822
- */
1823
- private shouldAlwaysRedact;
1824
- /**
1825
- * Boost confidence if key name indicates PII
1826
- */
1827
- private boostConfidenceFromKey;
1828
- /**
1829
- * Extract all text values from JSON for simple text-based detection
1830
- */
1831
- extractText(data: any, options?: JsonProcessorOptions): string;
1832
- /**
1833
- * Validate JSON buffer/string
1834
- */
1835
- isValid(input: Buffer | string): boolean;
1836
- /**
1837
- * Get JSON Lines (JSONL) support - split by newlines and parse each line
1838
- */
1839
- parseJsonLines(input: Buffer | string): any[];
1840
- /**
1841
- * Detect PII in JSON Lines format
1842
- */
1843
- detectJsonLines(input: Buffer | string, detector: OpenRedaction, options?: JsonProcessorOptions): JsonDetectionResult[];
1844
- }
1845
- /**
1846
- * Create a JSON processor instance
1847
- */
1848
- declare function createJsonProcessor(): JsonProcessor;
1849
-
1850
- /**
1851
- * CSV document processor for PII detection and redaction in tabular data
1852
- */
1853
-
1854
- /**
1855
- * CSV processing options
1856
- */
1857
- interface CsvProcessorOptions {
1858
- /** CSV delimiter (default: auto-detect from ',', '\t', ';', '|') */
1859
- delimiter?: string;
1860
- /** Whether CSV has header row (default: auto-detect) */
1861
- hasHeader?: boolean;
1862
- /** Quote character (default: '"') */
1863
- quote?: string;
1864
- /** Escape character (default: '"') */
1865
- escape?: string;
1866
- /** Skip empty lines (default: true) */
1867
- skipEmptyLines?: boolean;
1868
- /** Maximum rows to process (default: unlimited) */
1869
- maxRows?: number;
1870
- /** Column indices to always redact (0-indexed) */
1871
- alwaysRedactColumns?: number[];
1872
- /** Column names to always redact (requires hasHeader: true) */
1873
- alwaysRedactColumnNames?: string[];
1874
- /** Column indices to skip scanning (0-indexed) */
1875
- skipColumns?: number[];
1876
- /** Column names that indicate PII (boost confidence) */
1877
- piiIndicatorNames?: string[];
1878
- /** Treat first row as header for detection purposes */
1879
- treatFirstRowAsHeader?: boolean;
1880
- }
1881
- /**
1882
- * CSV detection result with column tracking
1883
- */
1884
- interface CsvDetectionResult extends DetectionResult {
1885
- /** Total rows processed */
1886
- rowCount: number;
1887
- /** Column count */
1888
- columnCount: number;
1889
- /** Column headers (if detected) */
1890
- headers?: string[];
1891
- /** PII statistics by column index */
1892
- columnStats: Record<number, ColumnStats$1>;
1893
- /** PII matches by row and column */
1894
- matchesByCell: CellMatch$1[];
1895
- /** Original text */
1896
- original: string;
1897
- /** Redacted text */
1898
- redacted: string;
1899
- /** Array of detections */
1900
- detections: PIIDetection[];
1901
- /** Redaction map */
1902
- redactionMap: Record<string, string>;
1903
- /** Statistics */
1904
- stats?: {
1905
- processingTime?: number;
1906
- piiCount: number;
1907
- };
1908
- }
1909
- /**
1910
- * Column PII statistics
1911
- */
1912
- interface ColumnStats$1 {
1913
- /** Column index */
1914
- columnIndex: number;
1915
- /** Column name (if header available) */
1916
- columnName?: string;
1917
- /** Number of PII instances found */
1918
- piiCount: number;
1919
- /** Percentage of rows with PII (0-100) */
1920
- piiPercentage: number;
1921
- /** PII types found in this column */
1922
- piiTypes: string[];
1923
- }
1924
- /**
1925
- * Cell-level PII match
1926
- */
1927
- interface CellMatch$1 {
1928
- /** Row index (0-indexed, excluding header if present) */
1929
- row: number;
1930
- /** Column index (0-indexed) */
1931
- column: number;
1932
- /** Column name (if header available) */
1933
- columnName?: string;
1934
- /** Cell value */
1935
- value: string;
1936
- /** PII matches in this cell */
1937
- matches: PIIDetection[];
1938
- }
1939
- /**
1940
- * Parsed CSV row
1941
- */
1942
- interface CsvRow {
1943
- /** Row index */
1944
- index: number;
1945
- /** Cell values */
1946
- values: string[];
1947
- }
1948
- /**
1949
- * CSV processor for tabular data
1950
- */
1951
- declare class CsvProcessor {
1952
- private readonly defaultOptions;
1953
- /**
1954
- * Parse CSV from buffer or string
1955
- */
1956
- parse(input: Buffer | string, options?: CsvProcessorOptions): CsvRow[];
1957
- /**
1958
- * Detect PII in CSV data
1959
- */
1960
- detect(input: Buffer | string, detector: OpenRedaction, options?: CsvProcessorOptions): CsvDetectionResult;
1961
- /**
1962
- * Redact PII in CSV data
1963
- */
1964
- redact(input: Buffer | string, detectionResult: CsvDetectionResult, options?: CsvProcessorOptions): string;
1965
- /**
1966
- * Parse a single CSV row
1967
- */
1968
- private parseRow;
1969
- /**
1970
- * Format a row as CSV
1971
- */
1972
- private formatRow;
1973
- /**
1974
- * Auto-detect CSV delimiter
1975
- */
1976
- private detectDelimiter;
1977
- /**
1978
- * Detect if first row is likely a header
1979
- */
1980
- private detectHeader;
1981
- /**
1982
- * Boost confidence if column name indicates PII
1983
- */
1984
- private boostConfidenceFromColumnName;
1985
- /**
1986
- * Extract all cell values as text
1987
- */
1988
- extractText(input: Buffer | string, options?: CsvProcessorOptions): string;
1989
- /**
1990
- * Get column statistics without full PII detection
1991
- */
1992
- getColumnInfo(input: Buffer | string, options?: CsvProcessorOptions): {
1993
- columnCount: number;
1994
- rowCount: number;
1995
- headers?: string[];
1996
- sampleRows: string[][];
1997
- };
1998
- }
1999
- /**
2000
- * Create a CSV processor instance
2001
- */
2002
- declare function createCsvProcessor(): CsvProcessor;
2003
-
2004
- /**
2005
- * XLSX/Excel document processor for PII detection and redaction in spreadsheets
2006
- */
2007
-
2008
- /**
2009
- * XLSX processing options
2010
- */
2011
- interface XlsxProcessorOptions {
2012
- /** Sheet names to process (default: all sheets) */
2013
- sheets?: string[];
2014
- /** Sheet indices to process (0-indexed, default: all sheets) */
2015
- sheetIndices?: number[];
2016
- /** Whether to treat first row as header (default: auto-detect) */
2017
- hasHeader?: boolean;
2018
- /** Maximum rows per sheet to process (default: unlimited) */
2019
- maxRows?: number;
2020
- /** Column indices to always redact (0-indexed) */
2021
- alwaysRedactColumns?: number[];
2022
- /** Column names to always redact (requires hasHeader: true) */
2023
- alwaysRedactColumnNames?: string[];
2024
- /** Column indices to skip scanning (0-indexed) */
2025
- skipColumns?: number[];
2026
- /** Column names that indicate PII (boost confidence) */
2027
- piiIndicatorNames?: string[];
2028
- /** Preserve cell formatting (default: true) */
2029
- preserveFormatting?: boolean;
2030
- /** Preserve formulas (default: true, redact values but keep formula) */
2031
- preserveFormulas?: boolean;
2032
- }
2033
- /**
2034
- * XLSX detection result with sheet and cell tracking
2035
- */
2036
- interface XlsxDetectionResult extends DetectionResult {
2037
- /** Results by sheet */
2038
- sheetResults: SheetDetectionResult[];
2039
- /** Total sheets processed */
2040
- sheetCount: number;
2041
- }
2042
- /**
2043
- * Sheet-level detection result
2044
- */
2045
- interface SheetDetectionResult {
2046
- /** Sheet name */
2047
- sheetName: string;
2048
- /** Sheet index */
2049
- sheetIndex: number;
2050
- /** Total rows in sheet */
2051
- rowCount: number;
2052
- /** Column count */
2053
- columnCount: number;
2054
- /** Column headers (if detected) */
2055
- headers?: string[];
2056
- /** Column statistics */
2057
- columnStats: Record<number, ColumnStats>;
2058
- /** Cell matches */
2059
- matchesByCell: CellMatch[];
2060
- }
2061
- /**
2062
- * Column PII statistics
2063
- */
2064
- interface ColumnStats {
2065
- /** Column index */
2066
- columnIndex: number;
2067
- /** Column letter (A, B, C, etc.) */
2068
- columnLetter: string;
2069
- /** Column name (if header available) */
2070
- columnName?: string;
2071
- /** Number of PII instances found */
2072
- piiCount: number;
2073
- /** Percentage of rows with PII (0-100) */
2074
- piiPercentage: number;
2075
- /** PII types found in this column */
2076
- piiTypes: string[];
2077
- }
2078
- /**
2079
- * Cell-level PII match
2080
- */
2081
- interface CellMatch {
2082
- /** Cell reference (e.g., 'A1', 'B5') */
2083
- cell: string;
2084
- /** Row index (1-indexed, Excel style) */
2085
- row: number;
2086
- /** Column index (0-indexed) */
2087
- column: number;
2088
- /** Column letter */
2089
- columnLetter: string;
2090
- /** Column name (if header available) */
2091
- columnName?: string;
2092
- /** Cell value */
2093
- value: string;
2094
- /** Cell formula (if any) */
2095
- formula?: string;
2096
- /** PII matches in this cell */
2097
- matches: PIIDetection[];
2098
- }
2099
- /**
2100
- * XLSX processor for spreadsheet data
2101
- */
2102
- declare class XlsxProcessor {
2103
- private xlsx?;
2104
- private readonly defaultOptions;
2105
- constructor();
2106
- /**
2107
- * Check if XLSX support is available
2108
- */
2109
- isAvailable(): boolean;
2110
- /**
2111
- * Parse XLSX from buffer
2112
- */
2113
- parse(buffer: Buffer): any;
2114
- /**
2115
- * Detect PII in XLSX data
2116
- */
2117
- detect(buffer: Buffer, detector: OpenRedaction, options?: XlsxProcessorOptions): XlsxDetectionResult;
2118
- /**
2119
- * Detect PII in a single sheet
2120
- */
2121
- private detectSheet;
2122
- /**
2123
- * Redact PII in XLSX data
2124
- */
2125
- redact(buffer: Buffer, detectionResult: XlsxDetectionResult, options?: XlsxProcessorOptions): Buffer;
2126
- /**
2127
- * Get cell value as string
2128
- */
2129
- private getCellValue;
2130
- /**
2131
- * Get row values
2132
- */
2133
- private getRowValues;
2134
- /**
2135
- * Detect if first row is likely a header
2136
- */
2137
- private detectHeader;
2138
- /**
2139
- * Convert column index to letter (0 = A, 25 = Z, 26 = AA)
2140
- */
2141
- private columnToLetter;
2142
- /**
2143
- * Get sheet names to process based on options
2144
- */
2145
- private getSheetNamesToProcess;
2146
- /**
2147
- * Boost confidence if column name indicates PII
2148
- */
2149
- private boostConfidenceFromColumnName;
2150
- /**
2151
- * Extract all cell values as text
2152
- */
2153
- extractText(buffer: Buffer, options?: XlsxProcessorOptions): string;
2154
- /**
2155
- * Get workbook metadata
2156
- */
2157
- getMetadata(buffer: Buffer): {
2158
- sheetNames: string[];
2159
- sheetCount: number;
2160
- };
2161
- }
2162
- /**
2163
- * Create an XLSX processor instance
2164
- */
2165
- declare function createXlsxProcessor(): XlsxProcessor;
2166
-
2167
- /**
2168
- * Document text extraction with optional peer dependencies
2169
- */
2170
-
2171
- /**
2172
- * Document processor with optional PDF, DOCX, OCR, JSON, CSV, and XLSX support
2173
- * Requires peer dependencies:
2174
- * - pdf-parse (for PDF)
2175
- * - mammoth (for DOCX)
2176
- * - tesseract.js (for OCR/images)
2177
- * - xlsx (for Excel/XLSX)
2178
- */
2179
- declare class DocumentProcessor implements IDocumentProcessor {
2180
- private pdfParse?;
2181
- private mammoth?;
2182
- private ocrProcessor;
2183
- private jsonProcessor;
2184
- private csvProcessor;
2185
- private xlsxProcessor;
2186
- constructor();
2187
- /**
2188
- * Extract text from document buffer
2189
- */
2190
- extractText(buffer: Buffer, options?: DocumentOptions): Promise<string>;
2191
- /**
2192
- * Get document metadata
2193
- */
2194
- getMetadata(buffer: Buffer, options?: DocumentOptions): Promise<DocumentMetadata>;
2195
- /**
2196
- * Detect document format from buffer
2197
- */
2198
- detectFormat(buffer: Buffer): DocumentFormat | null;
2199
- /**
2200
- * Check if format is supported
2201
- */
2202
- isFormatSupported(format: DocumentFormat): boolean;
2203
- /**
2204
- * Extract text from PDF
2205
- */
2206
- private extractPdfText;
2207
- /**
2208
- * Extract text from DOCX
2209
- */
2210
- private extractDocxText;
2211
- /**
2212
- * Get PDF metadata
2213
- */
2214
- private getPdfMetadata;
2215
- /**
2216
- * Get DOCX metadata
2217
- */
2218
- private getDocxMetadata;
2219
- /**
2220
- * Extract text from image using OCR
2221
- */
2222
- private extractImageText;
2223
- /**
2224
- * Get image metadata
2225
- */
2226
- private getImageMetadata;
2227
- /**
2228
- * Extract text from JSON
2229
- */
2230
- private extractJsonText;
2231
- /**
2232
- * Extract text from CSV
2233
- */
2234
- private extractCsvText;
2235
- /**
2236
- * Extract text from XLSX
2237
- */
2238
- private extractXlsxText;
2239
- /**
2240
- * Get JSON metadata
2241
- */
2242
- private getJsonMetadata;
2243
- /**
2244
- * Get CSV metadata
2245
- */
2246
- private getCsvMetadata;
2247
- /**
2248
- * Get XLSX metadata
2249
- */
2250
- private getXlsxMetadata;
2251
- /**
2252
- * Get OCR processor instance
2253
- */
2254
- getOCRProcessor(): OCRProcessor;
2255
- /**
2256
- * Get JSON processor instance
2257
- */
2258
- getJsonProcessor(): JsonProcessor;
2259
- /**
2260
- * Get CSV processor instance
2261
- */
2262
- getCsvProcessor(): CsvProcessor;
2263
- /**
2264
- * Get XLSX processor instance
2265
- */
2266
- getXlsxProcessor(): XlsxProcessor;
2267
- }
2268
- /**
2269
- * Create a document processor instance
2270
- */
2271
- declare function createDocumentProcessor(): DocumentProcessor;
2272
-
2273
- /**
2274
- * Personal PII patterns (emails, names, etc.)
2275
- */
2276
-
2277
- declare const personalPatterns: PIIPattern[];
2278
-
2279
- /**
2280
- * Financial PII patterns (credit cards, bank accounts, etc.)
2281
- */
2282
-
2283
- declare const financialPatterns: PIIPattern[];
2284
-
2285
- /**
2286
- * Government ID patterns (passports, SSN, national IDs, etc.)
2287
- */
2288
-
2289
- declare const governmentPatterns: PIIPattern[];
2290
-
2291
- /**
2292
- * Contact information patterns (phones, addresses, etc.)
2293
- */
2294
-
2295
- declare const contactPatterns: PIIPattern[];
2296
-
2297
- /**
2298
- * Network-related PII patterns (IP addresses, MAC addresses, etc.)
2299
- */
2300
-
2301
- declare const networkPatterns: PIIPattern[];
2302
-
2303
- /**
2304
- * Export all pattern categories
2305
- */
2306
-
2307
- /**
2308
- * All default PII patterns
2309
- */
2310
- declare const allPatterns: PIIPattern[];
2311
- /**
2312
- * Get patterns by category
2313
- */
2314
- declare function getPatternsByCategory(category: string): PIIPattern[];
2315
-
2316
- /**
2317
- * Validators for PII pattern matching
2318
- */
2319
- /**
2320
- * Luhn algorithm validator for credit cards
2321
- * https://en.wikipedia.org/wiki/Luhn_algorithm
2322
- */
2323
- declare function validateLuhn(cardNumber: string, _context?: string): boolean;
2324
- /**
2325
- * IBAN validator with checksum verification
2326
- */
2327
- declare function validateIBAN(iban: string, _context?: string): boolean;
2328
- /**
2329
- * UK National Insurance Number validator
2330
- */
2331
- declare function validateNINO(nino: string, _context?: string): boolean;
2332
- /**
2333
- * UK NHS Number validator with checksum
2334
- */
2335
- declare function validateNHS(nhs: string, _context?: string): boolean;
2336
- /**
2337
- * UK Passport validator
2338
- */
2339
- declare function validateUKPassport(passport: string, _context?: string): boolean;
2340
- /**
2341
- * US Social Security Number validator (format check only)
2342
- */
2343
- declare function validateSSN(ssn: string, _context?: string): boolean;
2344
- /**
2345
- * UK Sort Code validator (format check)
2346
- */
2347
- declare function validateSortCode(sortCode: string, _context?: string): boolean;
2348
- /**
2349
- * Context-aware name validator to reduce false positives
2350
- */
2351
- declare function validateName(name: string, context: string): boolean;
2352
- /**
2353
- * Email validator with DNS check capability
2354
- */
2355
- declare function validateEmail(email: string, _context?: string): boolean;
2356
-
2357
- /**
2358
- * Compliance preset configurations
2359
- */
2360
-
2361
- /**
2362
- * GDPR compliance preset - European Union data protection
2363
- */
2364
- declare const gdprPreset: Partial<OpenRedactionOptions>;
2365
- /**
2366
- * HIPAA compliance preset - US healthcare data protection
2367
- */
2368
- declare const hipaaPreset: Partial<OpenRedactionOptions>;
2369
- /**
2370
- * CCPA compliance preset - California consumer privacy
2371
- */
2372
- declare const ccpaPreset: Partial<OpenRedactionOptions>;
2373
- /**
2374
- * Get preset configuration by name
2375
- */
2376
- declare function getPreset(name: string): Partial<OpenRedactionOptions>;
2377
-
2378
- interface OpenRedactionConfig extends OpenRedactionOptions {
2379
- extends?: string | string[];
2380
- learnedPatterns?: string;
2381
- learningOptions?: {
2382
- autoSave?: boolean;
2383
- confidenceThreshold?: number;
2384
- };
2385
- }
2386
- /**
2387
- * Load configuration from .openredaction.config.js
2388
- */
2389
- declare class ConfigLoader {
2390
- private configPath;
2391
- private searchPaths;
2392
- constructor(configPath?: string, cwd?: string);
2393
- /**
2394
- * Find config file in search paths
2395
- */
2396
- private findConfigFile;
2397
- /**
2398
- * Load config file
2399
- */
2400
- load(): Promise<OpenRedactionConfig | null>;
2401
- /**
2402
- * Resolve presets and extends
2403
- */
2404
- resolveConfig(config: OpenRedactionConfig): OpenRedactionOptions;
2405
- /**
2406
- * Load built-in preset
2407
- */
2408
- private loadPreset;
2409
- /**
2410
- * Create a default config file
2411
- */
2412
- static createDefaultConfig(outputPath?: string): void;
2413
- }
2414
-
2415
- /**
2416
- * Lightweight NER (Named Entity Recognition) integration using compromise.js
2417
- * Provides semantic detection to complement regex-based pattern matching
2418
- */
2419
-
2420
- /**
2421
- * NER entity types supported
2422
- */
2423
- type NEREntityType = 'PERSON' | 'ORGANIZATION' | 'PLACE' | 'DATE' | 'MONEY' | 'PHONE' | 'EMAIL' | 'URL';
2424
- /**
2425
- * NER detection result
2426
- */
2427
- interface NERMatch {
2428
- /** Entity type */
2429
- type: NEREntityType;
2430
- /** Matched text */
2431
- text: string;
2432
- /** Start position in text */
2433
- start: number;
2434
- /** End position in text */
2435
- end: number;
2436
- /** Confidence from NER (0-1) */
2437
- confidence: number;
2438
- /** Additional context */
2439
- context?: {
2440
- sentence?: string;
2441
- tags?: string[];
2442
- };
2443
- }
2444
- /**
2445
- * Hybrid detection result (regex + NER)
2446
- */
2447
- interface HybridMatch extends PIIMatch {
2448
- /** Whether this match was confirmed by NER */
2449
- nerConfirmed: boolean;
2450
- /** NER confidence if confirmed */
2451
- nerConfidence?: number;
2452
- }
2453
- /**
2454
- * NER Detector using compromise.js
2455
- * Lightweight NLP library (7KB) for English text analysis
2456
- */
2457
- declare class NERDetector {
2458
- private nlp?;
2459
- private available;
2460
- constructor();
2461
- /**
2462
- * Check if NER is available (compromise.js installed)
2463
- */
2464
- isAvailable(): boolean;
2465
- /**
2466
- * Detect named entities in text
2467
- */
2468
- detect(text: string): NERMatch[];
2469
- /**
2470
- * Check if a regex match is confirmed by NER
2471
- */
2472
- isConfirmedByNER(regexMatch: PIIMatch, nerMatches: NERMatch[]): {
2473
- confirmed: boolean;
2474
- confidence?: number;
2475
- };
2476
- /**
2477
- * Boost confidence of regex matches that are confirmed by NER
2478
- */
2479
- hybridDetection(regexMatches: PIIMatch[], text: string): HybridMatch[];
2480
- /**
2481
- * Calculate overlap between two ranges (0-1)
2482
- */
2483
- private calculateOverlap;
2484
- /**
2485
- * Remove duplicate NER matches
2486
- */
2487
- private deduplicateMatches;
2488
- /**
2489
- * Extract sentence containing the match
2490
- */
2491
- private getSentence;
2492
- /**
2493
- * Find start of sentence
2494
- */
2495
- private findSentenceStart;
2496
- /**
2497
- * Find end of sentence
2498
- */
2499
- private findSentenceEnd;
2500
- /**
2501
- * Extract additional NER-only detections (entities not caught by regex)
2502
- */
2503
- extractNEROnly(nerMatches: NERMatch[], regexMatches: PIIMatch[]): NERMatch[];
2504
- }
2505
- /**
2506
- * Create an NER detector instance
2507
- */
2508
- declare function createNERDetector(): NERDetector;
2509
-
2510
- /**
2511
- * Severity Classification System
2512
- * Assigns severity levels to PII patterns and calculates risk scores
2513
- */
2514
-
2515
- /**
2516
- * Severity level for PII types
2517
- */
2518
- type SeverityLevel = 'critical' | 'high' | 'medium' | 'low';
2519
- /**
2520
- * Severity classification with reasoning
2521
- */
2522
- interface SeverityClassification {
2523
- /** Severity level */
2524
- level: SeverityLevel;
2525
- /** Numeric score (0-10) */
2526
- score: number;
2527
- /** Reasoning for classification */
2528
- reason?: string;
2529
- }
2530
- /**
2531
- * Risk score calculation result
2532
- */
2533
- interface RiskScore {
2534
- /** Overall risk score (0-1) */
2535
- score: number;
2536
- /** Risk level */
2537
- level: 'very-high' | 'high' | 'medium' | 'low' | 'minimal';
2538
- /** Contributing factors */
2539
- factors: {
2540
- piiCount: number;
2541
- avgSeverity: number;
2542
- avgConfidence: number;
2543
- criticalCount: number;
2544
- highCount: number;
2545
- };
2546
- }
2547
- /**
2548
- * Default severity mappings by pattern type
2549
- */
2550
- declare const DEFAULT_SEVERITY_MAP: Record<string, SeverityLevel>;
2551
- /**
2552
- * Severity scores (for numeric calculations)
2553
- */
2554
- declare const SEVERITY_SCORES: Record<SeverityLevel, number>;
2555
- /**
2556
- * Severity Classifier
2557
- */
2558
- declare class SeverityClassifier {
2559
- private severityMap;
2560
- constructor(customMap?: Record<string, SeverityLevel>);
2561
- /**
2562
- * Classify severity for a pattern type
2563
- */
2564
- classify(patternType: string): SeverityClassification;
2565
- /**
2566
- * Ensure pattern has severity assigned
2567
- */
2568
- ensurePatternSeverity(pattern: PIIPattern): PIIPattern;
2569
- /**
2570
- * Ensure all patterns have severity
2571
- */
2572
- ensureAllSeverity(patterns: PIIPattern[]): PIIPattern[];
2573
- /**
2574
- * Calculate risk score for a set of detections
2575
- */
2576
- calculateRiskScore(detections: PIIDetection[]): RiskScore;
2577
- /**
2578
- * Get severity for a pattern type
2579
- */
2580
- getSeverity(patternType: string): SeverityLevel;
2581
- /**
2582
- * Get severity score for a pattern type
2583
- */
2584
- getSeverityScore(patternType: string): number;
2585
- /**
2586
- * Add custom severity mapping
2587
- */
2588
- addSeverityMapping(patternType: string, severity: SeverityLevel): void;
2589
- /**
2590
- * Get all severity mappings
2591
- */
2592
- getSeverityMap(): Record<string, SeverityLevel>;
2593
- /**
2594
- * Filter detections by severity threshold
2595
- */
2596
- filterBySeverity(detections: PIIDetection[], minSeverity: SeverityLevel): PIIDetection[];
2597
- /**
2598
- * Group detections by severity
2599
- */
2600
- groupBySeverity(detections: PIIDetection[]): Record<SeverityLevel, PIIDetection[]>;
2601
- }
2602
- /**
2603
- * Create a severity classifier instance
2604
- */
2605
- declare function createSeverityClassifier(customMap?: Record<string, SeverityLevel>): SeverityClassifier;
2606
- /**
2607
- * Quick helper to get severity for a pattern type
2608
- */
2609
- declare function getSeverity(patternType: string): SeverityLevel;
2610
- /**
2611
- * Quick helper to calculate risk score
2612
- */
2613
- declare function calculateRisk(detections: PIIDetection[]): RiskScore;
2614
-
2615
- /**
2616
- * False Positive Detection and Filtering
2617
- * Identifies and filters out common false positives
2618
- */
2619
- interface FalsePositiveRule {
2620
- /** Pattern type this rule applies to */
2621
- patternType: string | string[];
2622
- /** Matching function */
2623
- matcher: (value: string, context: string) => boolean;
2624
- /** Description of the false positive */
2625
- description: string;
2626
- /** Severity of the false positive (how confident we are it's not PII) */
2627
- severity: 'high' | 'medium' | 'low';
2628
- }
2629
- /**
2630
- * Common false positive rules
2631
- */
2632
- declare const commonFalsePositives: FalsePositiveRule[];
2633
- /**
2634
- * Check if a detection is a false positive
2635
- */
2636
- declare function isFalsePositive(value: string, patternType: string, context: string, rules?: FalsePositiveRule[]): {
2637
- isFalsePositive: boolean;
2638
- matchedRule?: FalsePositiveRule;
2639
- confidence: number;
2640
- };
2641
- /**
2642
- * Filter out false positives from detections
2643
- */
2644
- declare function filterFalsePositives<T extends {
2645
- value: string;
2646
- type: string;
2647
- }>(detections: T[], getText: (detection: T) => {
2648
- value: string;
2649
- context: string;
2650
- }, threshold?: number): T[];
2651
-
2652
- /**
2653
- * Multi-pass Detection System
2654
- * Processes patterns in priority-based passes for better accuracy
2655
- */
2656
-
2657
- /**
2658
- * Detection pass configuration
2659
- */
2660
- interface DetectionPass {
2661
- /** Pass name for debugging */
2662
- name: string;
2663
- /** Minimum priority for this pass */
2664
- minPriority: number;
2665
- /** Maximum priority for this pass */
2666
- maxPriority: number;
2667
- /** Pattern types to include (optional filter) */
2668
- includeTypes?: string[];
2669
- /** Pattern types to exclude (optional filter) */
2670
- excludeTypes?: string[];
2671
- /** Description of what this pass detects */
2672
- description: string;
2673
- }
2674
- /**
2675
- * Default multi-pass configuration
2676
- * Processes patterns in priority order from highest to lowest
2677
- */
2678
- declare const defaultPasses: DetectionPass[];
2679
- /**
2680
- * Group patterns into passes based on priority
2681
- */
2682
- declare function groupPatternsByPass(patterns: PIIPattern[], passes?: DetectionPass[]): Map<string, PIIPattern[]>;
2683
- /**
2684
- * Statistics for multi-pass detection
2685
- */
2686
- interface MultiPassStats {
2687
- /** Total passes executed */
2688
- totalPasses: number;
2689
- /** Detections per pass */
2690
- detectionsPerPass: Map<string, number>;
2691
- /** Patterns processed per pass */
2692
- patternsPerPass: Map<string, number>;
2693
- /** Time spent per pass (ms) */
2694
- timePerPass: Map<string, number>;
2695
- /** Total processing time (ms) */
2696
- totalTime: number;
2697
- }
2698
- /**
2699
- * Merge detections from multiple passes
2700
- * Earlier passes (higher priority) take precedence for overlapping ranges
2701
- */
2702
- declare function mergePassDetections(passDetections: Map<string, PIIDetection[]>, passes: DetectionPass[]): PIIDetection[];
2703
- /**
2704
- * Create a simple multi-pass configuration for common use cases
2705
- */
2706
- declare function createSimpleMultiPass(options?: {
2707
- /** Number of passes (2-5, default: 3) */
2708
- numPasses?: number;
2709
- /** Prioritize credentials first */
2710
- prioritizeCredentials?: boolean;
2711
- }): DetectionPass[];
2712
-
2713
- /**
2714
- * Streaming API for processing large documents
2715
- * Allows efficient processing of documents in chunks
2716
- */
2717
-
2718
- /**
2719
- * Chunk result for streaming detection
2720
- */
2721
- interface ChunkResult {
2722
- /** Chunk index */
2723
- chunkIndex: number;
2724
- /** Detections found in this chunk */
2725
- detections: PIIDetection[];
2726
- /** Redacted chunk text */
2727
- redactedChunk: string;
2728
- /** Original chunk text */
2729
- originalChunk: string;
2730
- /** Byte offset of this chunk in the original document */
2731
- byteOffset: number;
2732
- }
2733
- /**
2734
- * Streaming detection options
2735
- */
2736
- interface StreamingOptions {
2737
- /** Chunk size in characters (default: 2048) */
2738
- chunkSize?: number;
2739
- /** Overlap between chunks to catch patterns at boundaries (default: 100) */
2740
- overlap?: number;
2741
- /** Enable progressive redaction (default: true) */
2742
- progressiveRedaction?: boolean;
2743
- }
2744
- /**
2745
- * Streaming detector for large documents
2746
- */
2747
- declare class StreamingDetector {
2748
- private detector;
2749
- private options;
2750
- constructor(detector: OpenRedaction, options?: StreamingOptions);
2751
- /**
2752
- * Process a large text in chunks
2753
- * Returns an async generator that yields chunk results
2754
- */
2755
- processStream(text: string): AsyncGenerator<ChunkResult, void, undefined>;
2756
- /**
2757
- * Process entire stream and collect all results
2758
- */
2759
- processComplete(text: string): Promise<DetectionResult>;
2760
- /**
2761
- * Process a file stream (Node.js only)
2762
- */
2763
- processFileStream(readableStream: ReadableStream<Uint8Array> | NodeJS.ReadableStream): AsyncGenerator<ChunkResult, void, undefined>;
2764
- /**
2765
- * Get chunk statistics
2766
- */
2767
- getChunkStats(textLength: number): {
2768
- numChunks: number;
2769
- chunkSize: number;
2770
- overlap: number;
2771
- estimatedMemory: number;
2772
- };
2773
- }
2774
- /**
2775
- * Helper to create a streaming detector from OpenRedaction instance
2776
- */
2777
- declare function createStreamingDetector(detector: OpenRedaction, options?: StreamingOptions): StreamingDetector;
2778
-
2779
- /**
2780
- * Worker thread types and interfaces
2781
- */
2782
-
2783
- /**
2784
- * Worker task for text detection
2785
- */
2786
- interface DetectTask {
2787
- type: 'detect';
2788
- id: string;
2789
- text: string;
2790
- options?: OpenRedactionOptions;
2791
- }
2792
- /**
2793
- * Worker task for document processing
2794
- */
2795
- interface DocumentTask {
2796
- type: 'document';
2797
- id: string;
2798
- buffer: Buffer;
2799
- options?: any;
2800
- }
2801
- /**
2802
- * Worker task union type
2803
- */
2804
- type WorkerTask = DetectTask | DocumentTask;
2805
- /**
2806
- * Worker result
2807
- */
2808
- interface WorkerResult {
2809
- id: string;
2810
- result: DetectionResult | any;
2811
- error?: string;
2812
- processingTime: number;
2813
- }
2814
- /**
2815
- * Worker pool configuration
2816
- */
2817
- interface WorkerPoolConfig {
2818
- /** Number of worker threads (default: CPU count) */
2819
- numWorkers?: number;
2820
- /** Maximum queue size (default: 100) */
2821
- maxQueueSize?: number;
2822
- /** Worker idle timeout in ms (default: 30000) */
2823
- idleTimeout?: number;
2824
- }
2825
- /**
2826
- * Worker pool statistics
2827
- */
2828
- interface WorkerPoolStats {
2829
- /** Number of active workers */
2830
- activeWorkers: number;
2831
- /** Number of idle workers */
2832
- idleWorkers: number;
2833
- /** Current queue size */
2834
- queueSize: number;
2835
- /** Total tasks processed */
2836
- totalProcessed: number;
2837
- /** Total errors */
2838
- totalErrors: number;
2839
- /** Average processing time */
2840
- avgProcessingTime: number;
2841
- }
2842
-
2843
- /**
2844
- * Worker thread pool for parallel processing
2845
- */
2846
-
2847
- /**
2848
- * Worker pool for parallel text detection and document processing
2849
- */
2850
- declare class WorkerPool {
2851
- private workers;
2852
- private availableWorkers;
2853
- private taskQueue;
2854
- private config;
2855
- private stats;
2856
- private workerPath;
2857
- private totalProcessingTime;
2858
- constructor(config?: WorkerPoolConfig);
2859
- /**
2860
- * Initialize worker pool
2861
- */
2862
- initialize(): Promise<void>;
2863
- /**
2864
- * Create a new worker
2865
- */
2866
- private createWorker;
2867
- /**
2868
- * Execute a task on the worker pool
2869
- */
2870
- execute<T = any>(task: WorkerTask): Promise<T>;
2871
- /**
2872
- * Process task queue
2873
- */
2874
- private processQueue;
2875
- /**
2876
- * Handle worker result
2877
- */
2878
- private handleWorkerResult;
2879
- /**
2880
- * Remove worker from pool
2881
- */
2882
- private removeWorker;
2883
- /**
2884
- * Get pool statistics
2885
- */
2886
- getStats(): WorkerPoolStats;
2887
- /**
2888
- * Terminate all workers
2889
- */
2890
- terminate(): Promise<void>;
2891
- }
2892
- /**
2893
- * Create a worker pool instance
2894
- */
2895
- declare function createWorkerPool(config?: WorkerPoolConfig): WorkerPool;
2896
-
2897
- /**
2898
- * Batch processing for multiple documents
2899
- * Efficient processing of arrays of texts
2900
- */
2901
-
2902
- /**
2903
- * Batch processing options
2904
- */
2905
- interface BatchOptions {
2906
- /** Enable parallel processing (default: false) */
2907
- parallel?: boolean;
2908
- /** Maximum concurrency for parallel processing (default: 4) */
2909
- maxConcurrency?: number;
2910
- /** Progress callback */
2911
- onProgress?: (completed: number, total: number) => void;
2912
- }
2913
- /**
2914
- * Batch processing result
2915
- */
2916
- interface BatchResult {
2917
- /** Individual results for each document */
2918
- results: DetectionResult[];
2919
- /** Total processing stats */
2920
- stats: {
2921
- /** Total documents processed */
2922
- totalDocuments: number;
2923
- /** Total PII detections across all documents */
2924
- totalDetections: number;
2925
- /** Total processing time in milliseconds */
2926
- totalTime: number;
2927
- /** Average time per document */
2928
- avgTimePerDocument: number;
2929
- };
2930
- }
2931
- /**
2932
- * Batch processor for processing multiple documents
2933
- */
2934
- declare class BatchProcessor {
2935
- private detector;
2936
- constructor(detector: OpenRedaction);
2937
- /**
2938
- * Process multiple documents sequentially
2939
- */
2940
- processSequential(documents: string[], options?: BatchOptions): BatchResult;
2941
- /**
2942
- * Process multiple documents in parallel
2943
- */
2944
- processParallel(documents: string[], options?: BatchOptions): Promise<BatchResult>;
2945
- /**
2946
- * Process multiple documents (automatically chooses sequential or parallel)
2947
- */
2948
- process(documents: string[], options?: BatchOptions): Promise<BatchResult>;
2949
- /**
2950
- * Process documents with automatic batching
2951
- * Useful for very large arrays of documents
2952
- */
2953
- processStream(documents: string[], batchSize?: number): AsyncGenerator<DetectionResult, void, undefined>;
2954
- /**
2955
- * Get aggregated statistics across multiple results
2956
- */
2957
- getAggregatedStats(results: DetectionResult[]): {
2958
- totalDetections: number;
2959
- detectionsByType: Record<string, number>;
2960
- detectionsBySeverity: Record<string, number>;
2961
- avgConfidence: number;
2962
- };
2963
- }
2964
- /**
2965
- * Helper to create a batch processor
2966
- */
2967
- declare function createBatchProcessor(detector: OpenRedaction): BatchProcessor;
2968
-
2969
- /**
2970
- * Express middleware for PII detection
2971
- * Local-first server-side PII detection and redaction
2972
- */
2973
-
2974
- /**
2975
- * Middleware options
2976
- */
2977
- interface OpenRedactionMiddlewareOptions extends OpenRedactionOptions {
2978
- /** Auto-redact request body (default: false) */
2979
- autoRedact?: boolean;
2980
- /** Fields to check in request body (default: all) */
2981
- fields?: string[];
2982
- /** Skip detection for certain routes (regex patterns) */
2983
- skipRoutes?: RegExp[];
2984
- /** Add PII detection results to request object (default: true) */
2985
- attachResults?: boolean;
2986
- /** Custom handler for PII detection */
2987
- onDetection?: (req: Request, result: DetectionResult) => void;
2988
- /** Fail request if PII detected (default: false) */
2989
- failOnPII?: boolean;
2990
- /** Add response headers with PII info (default: false) */
2991
- addHeaders?: boolean;
2992
- }
2993
- /**
2994
- * Extended Express Request with PII detection results
2995
- */
2996
- interface OpenRedactionRequest extends Request {
2997
- pii?: {
2998
- detected: boolean;
2999
- count: number;
3000
- result: DetectionResult;
3001
- redacted?: any;
3002
- };
3003
- }
3004
- /**
3005
- * Create Express middleware for PII detection
3006
- */
3007
- declare function openredactionMiddleware(options?: OpenRedactionMiddlewareOptions): (req: Request, res: Response, next: NextFunction) => void | Response<any, Record<string, any>>;
3008
- /**
3009
- * Express route handler for PII detection
3010
- */
3011
- declare function detectPII(options?: OpenRedactionOptions): (req: Request, res: Response) => void;
3012
- /**
3013
- * Express route handler for generating reports
3014
- */
3015
- declare function generateReport(options?: OpenRedactionOptions): (req: Request, res: Response) => void;
3016
-
3017
- /**
3018
- * Hook for PII detection in React components
3019
- *
3020
- * @example
3021
- * ```tsx
3022
- * function MyForm() {
3023
- * const { detect, result, isDetecting } = useOpenRedaction();
3024
- *
3025
- * const handleSubmit = (text: string) => {
3026
- * const detection = detect(text);
3027
- * if (detection.detections.length > 0) {
3028
- * alert('PII detected!');
3029
- * }
3030
- * };
3031
- * }
3032
- * ```
3033
- */
3034
- declare function useOpenRedaction(options?: OpenRedactionOptions): {
3035
- detect: (text: string) => DetectionResult;
3036
- result: DetectionResult | null;
3037
- isDetecting: boolean;
3038
- hasPII: boolean;
3039
- count: number;
3040
- clear: () => void;
3041
- detector: OpenRedaction;
3042
- };
3043
- /**
3044
- * Hook for real-time PII detection with debouncing
3045
- *
3046
- * @example
3047
- * ```tsx
3048
- * function EmailInput() {
3049
- * const [email, setEmail] = useState('');
3050
- * const { result, hasPII } = usePIIDetector(email, { debounce: 500 });
3051
- *
3052
- * return (
3053
- * <div>
3054
- * <input value={email} onChange={e => setEmail(e.target.value)} />
3055
- * {hasPII && <Warning>PII detected!</Warning>}
3056
- * </div>
3057
- * );
3058
- * }
3059
- * ```
3060
- */
3061
- declare function usePIIDetector(text: string, options?: OpenRedactionOptions & {
3062
- debounce?: number;
3063
- }): {
3064
- result: DetectionResult | null;
3065
- isDetecting: boolean;
3066
- hasPII: boolean;
3067
- count: number;
3068
- detections: PIIDetection[];
3069
- };
3070
- /**
3071
- * Hook for form field PII validation
3072
- *
3073
- * @example
3074
- * ```tsx
3075
- * function UserForm() {
3076
- * const emailValidation = useFormFieldValidator({
3077
- * failOnPII: true,
3078
- * types: ['EMAIL', 'PHONE']
3079
- * });
3080
- *
3081
- * return (
3082
- * <input
3083
- * {...emailValidation.getFieldProps()}
3084
- * onChange={e => emailValidation.validate(e.target.value)}
3085
- * />
3086
- * );
3087
- * }
3088
- * ```
3089
- */
3090
- declare function useFormFieldValidator(options?: OpenRedactionOptions & {
3091
- failOnPII?: boolean;
3092
- types?: string[];
3093
- onPIIDetected?: (result: DetectionResult) => void;
3094
- }): {
3095
- value: string;
3096
- error: string | null;
3097
- result: DetectionResult | null;
3098
- validate: (inputValue: string) => boolean;
3099
- getFieldProps: () => {
3100
- value: string;
3101
- 'aria-invalid': string;
3102
- 'aria-describedby': string | undefined;
3103
- };
3104
- isValid: boolean;
3105
- hasPII: boolean;
3106
- };
3107
- /**
3108
- * Hook for batch PII detection
3109
- *
3110
- * @example
3111
- * ```tsx
3112
- * function BatchProcessor() {
3113
- * const { processAll, results, isProcessing } = useBatchDetector();
3114
- *
3115
- * const handleProcess = async () => {
3116
- * const documents = ['text1', 'text2', 'text3'];
3117
- * await processAll(documents);
3118
- * };
3119
- * }
3120
- * ```
3121
- */
3122
- declare function useBatchDetector(options?: OpenRedactionOptions): {
3123
- processAll: (texts: string[]) => Promise<DetectionResult[]>;
3124
- results: DetectionResult[];
3125
- isProcessing: boolean;
3126
- progress: number;
3127
- totalDetections: number;
3128
- clear: () => void;
3129
- };
3130
- /**
3131
- * Hook for PII detection with auto-redaction
3132
- *
3133
- * @example
3134
- * ```tsx
3135
- * function SecureTextArea() {
3136
- * const { text, setText, redactedText, hasPII } = useAutoRedact();
3137
- *
3138
- * return (
3139
- * <div>
3140
- * <textarea value={text} onChange={e => setText(e.target.value)} />
3141
- * {hasPII && <div>Redacted: {redactedText}</div>}
3142
- * </div>
3143
- * );
3144
- * }
3145
- * ```
3146
- */
3147
- declare function useAutoRedact(options?: OpenRedactionOptions & {
3148
- debounce?: number;
3149
- }): {
3150
- text: string;
3151
- setText: react.Dispatch<react.SetStateAction<string>>;
3152
- result: DetectionResult | null;
3153
- redactedText: string;
3154
- hasPII: boolean;
3155
- detections: PIIDetection[];
3156
- count: number;
3157
- };
3158
-
3159
- /**
3160
- * Custom error class for OpenRedaction with helpful messages and suggestions
3161
- */
3162
- interface ErrorSuggestion {
3163
- message: string;
3164
- code?: string;
3165
- docs?: string;
3166
- }
3167
- declare class OpenRedactionError extends Error {
3168
- code: string;
3169
- suggestion?: ErrorSuggestion;
3170
- context?: Record<string, unknown>;
3171
- constructor(message: string, code?: string, suggestion?: ErrorSuggestion, context?: Record<string, unknown>);
3172
- /**
3173
- * Get formatted error message with suggestions
3174
- */
3175
- getFormattedMessage(): string;
3176
- }
3177
- /**
3178
- * Factory functions for common error scenarios
3179
- */
3180
- declare function createInvalidPatternError(patternType: string, reason: string): OpenRedactionError;
3181
- declare function createValidationError(value: string, patternType: string): OpenRedactionError;
3182
- declare function createHighMemoryError(textSize: number): OpenRedactionError;
3183
- declare function createConfigLoadError(path: string, reason: string): OpenRedactionError;
3184
- declare function createLearningDisabledError(): OpenRedactionError;
3185
- declare function createOptimizationDisabledError(): OpenRedactionError;
3186
- declare function createMultiPassDisabledError(): OpenRedactionError;
3187
- declare function createCacheDisabledError(): OpenRedactionError;
3188
-
3189
- /**
3190
- * Multi-tenancy support for SaaS deployments
3191
- * Provides tenant isolation, quotas, and per-tenant configuration
3192
- */
3193
-
3194
- /**
3195
- * Tenant configuration
3196
- */
3197
- interface TenantConfig {
3198
- /** Tenant unique identifier */
3199
- tenantId: string;
3200
- /** Tenant display name */
3201
- name: string;
3202
- /** Tenant-specific OpenRedaction options */
3203
- options?: OpenRedactionOptions;
3204
- /** Tenant-specific custom patterns */
3205
- customPatterns?: PIIPattern[];
3206
- /** Tenant-specific whitelist */
3207
- whitelist?: string[];
3208
- /** Tenant quota limits */
3209
- quotas?: TenantQuotas;
3210
- /** Tenant API key (for authentication) */
3211
- apiKey?: string;
3212
- /** Tenant metadata */
3213
- metadata?: Record<string, unknown>;
3214
- /** Tenant status */
3215
- status: 'active' | 'suspended' | 'trial';
3216
- /** Trial expiry date (for trial tenants) */
3217
- trialExpiresAt?: Date;
3218
- /** Created timestamp */
3219
- createdAt: Date;
3220
- /** Last updated timestamp */
3221
- updatedAt: Date;
3222
- }
3223
- /**
3224
- * Tenant quota limits
3225
- */
3226
- interface TenantQuotas {
3227
- /** Maximum requests per month (undefined = unlimited) */
3228
- maxRequestsPerMonth?: number;
3229
- /** Maximum text length per request (characters) */
3230
- maxTextLength?: number;
3231
- /** Maximum patterns allowed */
3232
- maxPatterns?: number;
3233
- /** Maximum audit logs to retain */
3234
- maxAuditLogs?: number;
3235
- /** Rate limit: requests per minute */
3236
- rateLimit?: number;
3237
- }
3238
- /**
3239
- * Tenant usage statistics
3240
- */
3241
- interface TenantUsage {
3242
- /** Tenant ID */
3243
- tenantId: string;
3244
- /** Total requests this month */
3245
- requestsThisMonth: number;
3246
- /** Total text processed this month (characters) */
3247
- textProcessedThisMonth: number;
3248
- /** Total PII detected this month */
3249
- piiDetectedThisMonth: number;
3250
- /** Last request timestamp */
3251
- lastRequestAt?: Date;
3252
- /** Monthly usage reset date */
3253
- monthlyResetDate: Date;
3254
- }
3255
- /**
3256
- * Tenant quota exceeded error
3257
- */
3258
- declare class TenantQuotaExceededError extends Error {
3259
- tenantId: string;
3260
- quota: string;
3261
- limit: number;
3262
- current: number;
3263
- constructor(tenantId: string, quota: string, limit: number, current: number);
3264
- }
3265
- /**
3266
- * Tenant not found error
3267
- */
3268
- declare class TenantNotFoundError extends Error {
3269
- tenantId: string;
3270
- constructor(tenantId: string);
3271
- }
3272
- /**
3273
- * Tenant suspended error
3274
- */
3275
- declare class TenantSuspendedError extends Error {
3276
- tenantId: string;
3277
- constructor(tenantId: string);
3278
- }
3279
- /**
3280
- * Multi-tenant manager for SaaS deployments
3281
- */
3282
- declare class TenantManager {
3283
- private tenants;
3284
- private usage;
3285
- private detectors;
3286
- private rateLimitTracking;
3287
- private auditLoggers;
3288
- private metricsCollectors;
3289
- /**
3290
- * Register a new tenant
3291
- */
3292
- registerTenant(config: Omit<TenantConfig, 'createdAt' | 'updatedAt'>): TenantConfig;
3293
- /**
3294
- * Update tenant configuration
3295
- */
3296
- updateTenant(tenantId: string, updates: Partial<Omit<TenantConfig, 'tenantId' | 'createdAt'>>): TenantConfig;
3297
- /**
3298
- * Get tenant configuration
3299
- */
3300
- getTenantConfig(tenantId: string): TenantConfig;
3301
- /**
3302
- * Get or create tenant-specific detector instance
3303
- */
3304
- getDetector(tenantId: string): OpenRedaction;
3305
- /**
3306
- * Perform detection with tenant isolation and quota checks
3307
- */
3308
- detect(tenantId: string, text: string): Promise<any>;
3309
- /**
3310
- * Validate tenant status (active, trial expiry)
3311
- */
3312
- private validateTenantStatus;
3313
- /**
3314
- * Check tenant quotas before processing
3315
- */
3316
- private checkQuotas;
3317
- /**
3318
- * Track request for usage and rate limiting
3319
- */
3320
- private trackRequest;
3321
- /**
3322
- * Get number of requests in last minute
3323
- */
3324
- private getRequestsInLastMinute;
3325
- /**
3326
- * Reset monthly usage statistics
3327
- */
3328
- private resetMonthlyUsage;
3329
- /**
3330
- * Get next monthly reset date (1st of next month)
3331
- */
3332
- private getNextMonthlyResetDate;
3333
- /**
3334
- * Get tenant usage statistics
3335
- */
3336
- getTenantUsage(tenantId: string): TenantUsage;
3337
- /**
3338
- * Get all tenants
3339
- */
3340
- getAllTenants(): TenantConfig[];
3341
- /**
3342
- * Get tenants by status
3343
- */
3344
- getTenantsByStatus(status: TenantConfig['status']): TenantConfig[];
3345
- /**
3346
- * Authenticate tenant by API key
3347
- */
3348
- authenticateByApiKey(apiKey: string): TenantConfig | null;
3349
- /**
3350
- * Suspend a tenant
3351
- */
3352
- suspendTenant(tenantId: string): void;
3353
- /**
3354
- * Activate a tenant
3355
- */
3356
- activateTenant(tenantId: string): void;
3357
- /**
3358
- * Delete a tenant and all associated data
3359
- */
3360
- deleteTenant(tenantId: string): void;
3361
- /**
3362
- * Set tenant-specific audit logger
3363
- */
3364
- setAuditLogger(tenantId: string, logger: IAuditLogger): void;
3365
- /**
3366
- * Get tenant-specific audit logger
3367
- */
3368
- getAuditLogger(tenantId: string): IAuditLogger | undefined;
3369
- /**
3370
- * Set tenant-specific metrics collector
3371
- */
3372
- setMetricsCollector(tenantId: string, collector: IMetricsCollector): void;
3373
- /**
3374
- * Get tenant-specific metrics collector
3375
- */
3376
- getMetricsCollector(tenantId: string): IMetricsCollector | undefined;
3377
- /**
3378
- * Get aggregate statistics across all tenants
3379
- */
3380
- getAggregateStats(): {
3381
- totalTenants: number;
3382
- activeTenants: number;
3383
- trialTenants: number;
3384
- suspendedTenants: number;
3385
- totalRequestsThisMonth: number;
3386
- totalTextProcessedThisMonth: number;
3387
- totalPiiDetectedThisMonth: number;
3388
- };
3389
- /**
3390
- * Validate tenant exists
3391
- */
3392
- private validateTenantExists;
3393
- /**
3394
- * Export tenant configuration as JSON
3395
- */
3396
- exportTenantConfig(tenantId: string): string;
3397
- /**
3398
- * Import tenant configuration from JSON
3399
- */
3400
- importTenantConfig(json: string): TenantConfig;
3401
- }
3402
- /**
3403
- * Create a tenant manager instance
3404
- */
3405
- declare function createTenantManager(): TenantManager;
3406
- /**
3407
- * Default tenant quotas for different tiers
3408
- */
3409
- declare const DEFAULT_TIER_QUOTAS: {
3410
- readonly free: {
3411
- readonly maxRequestsPerMonth: 1000;
3412
- readonly maxTextLength: 10000;
3413
- readonly maxPatterns: 10;
3414
- readonly maxAuditLogs: 100;
3415
- readonly rateLimit: 10;
3416
- };
3417
- readonly starter: {
3418
- readonly maxRequestsPerMonth: 10000;
3419
- readonly maxTextLength: 50000;
3420
- readonly maxPatterns: 50;
3421
- readonly maxAuditLogs: 1000;
3422
- readonly rateLimit: 50;
3423
- };
3424
- readonly professional: {
3425
- readonly maxRequestsPerMonth: 100000;
3426
- readonly maxTextLength: 100000;
3427
- readonly maxPatterns: 200;
3428
- readonly maxAuditLogs: 10000;
3429
- readonly rateLimit: 200;
3430
- };
3431
- readonly enterprise: {
3432
- readonly maxRequestsPerMonth: undefined;
3433
- readonly maxTextLength: undefined;
3434
- readonly maxPatterns: undefined;
3435
- readonly maxAuditLogs: undefined;
3436
- readonly rateLimit: undefined;
3437
- };
3438
- };
3439
-
3440
- /**
3441
- * Webhook and Alert System for event-driven notifications
3442
- * Supports HTTP webhooks with retry logic, circuit breaker, and event filtering
3443
- */
3444
-
3445
- /**
3446
- * Webhook event types
3447
- */
3448
- type WebhookEventType = 'pii.detected.high_risk' | 'pii.detected.bulk' | 'pii.processing.failed' | 'pii.processing.slow' | 'quota.exceeded' | 'tenant.suspended' | 'audit.tamper_detected' | 'custom';
3449
- /**
3450
- * Webhook event payload
3451
- */
3452
- interface WebhookEvent {
3453
- /** Event unique ID */
3454
- id: string;
3455
- /** Event type */
3456
- type: WebhookEventType;
3457
- /** Event timestamp (ISO 8601) */
3458
- timestamp: string;
3459
- /** Event severity */
3460
- severity: 'critical' | 'high' | 'medium' | 'low' | 'info';
3461
- /** Event data */
3462
- data: Record<string, unknown>;
3463
- /** Source identifier (e.g., tenant ID) */
3464
- source?: string;
3465
- /** Custom metadata */
3466
- metadata?: Record<string, unknown>;
3467
- }
3468
- /**
3469
- * Webhook configuration
3470
- */
3471
- interface WebhookConfig {
3472
- /** Webhook unique ID */
3473
- id: string;
3474
- /** Webhook URL to POST events to */
3475
- url: string;
3476
- /** Event types to subscribe to (empty = all events) */
3477
- events?: WebhookEventType[];
3478
- /** Minimum severity to trigger webhook */
3479
- minSeverity?: 'critical' | 'high' | 'medium' | 'low' | 'info';
3480
- /** Custom headers to include in requests */
3481
- headers?: Record<string, string>;
3482
- /** Secret for HMAC signature (optional but recommended) */
3483
- secret?: string;
3484
- /** Enable webhook (default: true) */
3485
- enabled?: boolean;
3486
- /** Retry configuration */
3487
- retry?: {
3488
- /** Maximum retry attempts (default: 3) */
3489
- maxAttempts?: number;
3490
- /** Initial delay in ms (default: 1000) */
3491
- initialDelay?: number;
3492
- /** Maximum delay in ms (default: 60000) */
3493
- maxDelay?: number;
3494
- /** Backoff multiplier (default: 2) */
3495
- backoffMultiplier?: number;
3496
- };
3497
- /** Timeout in ms (default: 5000) */
3498
- timeout?: number;
3499
- /** Tenant ID (for multi-tenant setups) */
3500
- tenantId?: string;
3501
- }
3502
- /**
3503
- * Webhook delivery status
3504
- */
3505
- type WebhookDeliveryStatus = 'pending' | 'success' | 'failed' | 'circuit_open';
3506
- /**
3507
- * Webhook delivery record
3508
- */
3509
- interface WebhookDelivery {
3510
- /** Delivery ID */
3511
- id: string;
3512
- /** Webhook ID */
3513
- webhookId: string;
3514
- /** Event that was delivered */
3515
- event: WebhookEvent;
3516
- /** Delivery status */
3517
- status: WebhookDeliveryStatus;
3518
- /** HTTP status code */
3519
- statusCode?: number;
3520
- /** Delivery timestamp */
3521
- timestamp: Date;
3522
- /** Attempt number */
3523
- attempt: number;
3524
- /** Error message if failed */
3525
- error?: string;
3526
- /** Response body */
3527
- responseBody?: string;
3528
- /** Delivery duration in ms */
3529
- durationMs?: number;
3530
- }
3531
- /**
3532
- * Webhook statistics
3533
- */
3534
- interface WebhookStats {
3535
- /** Webhook ID */
3536
- webhookId: string;
3537
- /** Total deliveries */
3538
- totalDeliveries: number;
3539
- /** Successful deliveries */
3540
- successfulDeliveries: number;
3541
- /** Failed deliveries */
3542
- failedDeliveries: number;
3543
- /** Average delivery time in ms */
3544
- avgDeliveryTimeMs: number;
3545
- /** Last delivery time */
3546
- lastDeliveryTime?: Date;
3547
- /** Circuit breaker state */
3548
- circuitState: 'closed' | 'open' | 'half_open';
3549
- }
3550
- /**
3551
- * Webhook Manager
3552
- * Manages webhook subscriptions, delivery, retries, and circuit breaking
3553
- */
3554
- declare class WebhookManager {
3555
- private webhooks;
3556
- private deliveryHistory;
3557
- private circuitBreakers;
3558
- private pendingRetries;
3559
- private maxHistorySize;
3560
- private readonly FAILURE_THRESHOLD;
3561
- private readonly RESET_TIMEOUT_MS;
3562
- constructor(options?: {
3563
- maxHistorySize?: number;
3564
- });
3565
- /**
3566
- * Register a webhook
3567
- */
3568
- registerWebhook(config: WebhookConfig): void;
3569
- /**
3570
- * Update webhook configuration
3571
- */
3572
- updateWebhook(id: string, updates: Partial<Omit<WebhookConfig, 'id'>>): void;
3573
- /**
3574
- * Delete webhook
3575
- */
3576
- deleteWebhook(id: string): void;
3577
- /**
3578
- * Get webhook configuration
3579
- */
3580
- getWebhook(id: string): WebhookConfig | undefined;
3581
- /**
3582
- * Get all webhooks
3583
- */
3584
- getAllWebhooks(): WebhookConfig[];
3585
- /**
3586
- * Emit an event to all subscribed webhooks
3587
- */
3588
- emitEvent(event: Omit<WebhookEvent, 'id' | 'timestamp'>): Promise<void>;
3589
- /**
3590
- * Emit high-risk PII detection event
3591
- */
3592
- emitHighRiskPII(result: DetectionResult, tenantId?: string): Promise<void>;
3593
- /**
3594
- * Emit bulk PII detection event
3595
- */
3596
- emitBulkPII(result: DetectionResult, threshold?: number, tenantId?: string): Promise<void>;
3597
- /**
3598
- * Emit processing error event
3599
- */
3600
- emitProcessingError(error: Error, tenantId?: string): Promise<void>;
3601
- /**
3602
- * Emit slow processing event
3603
- */
3604
- emitSlowProcessing(durationMs: number, threshold?: number, tenantId?: string): Promise<void>;
3605
- /**
3606
- * Find webhooks that match the event
3607
- */
3608
- private findMatchingWebhooks;
3609
- /**
3610
- * Deliver webhook with retry logic
3611
- */
3612
- private deliverWebhook;
3613
- /**
3614
- * Make HTTP request to webhook URL
3615
- */
3616
- private makeHttpRequest;
3617
- /**
3618
- * Calculate HMAC signature for webhook verification
3619
- */
3620
- private calculateHmacSignature;
3621
- /**
3622
- * Calculate retry delay with exponential backoff
3623
- */
3624
- private calculateRetryDelay;
3625
- /**
3626
- * Record delivery in history
3627
- */
3628
- private recordDelivery;
3629
- /**
3630
- * Get delivery history for a webhook
3631
- */
3632
- getDeliveryHistory(webhookId: string, limit?: number): WebhookDelivery[];
3633
- /**
3634
- * Get webhook statistics
3635
- */
3636
- getWebhookStats(webhookId: string): WebhookStats;
3637
- /**
3638
- * Get aggregate statistics for all webhooks
3639
- */
3640
- getAggregateStats(): {
3641
- totalWebhooks: number;
3642
- enabledWebhooks: number;
3643
- totalDeliveries: number;
3644
- successfulDeliveries: number;
3645
- failedDeliveries: number;
3646
- avgDeliveryTimeMs: number;
3647
- };
3648
- /**
3649
- * Clear delivery history
3650
- */
3651
- clearHistory(): void;
3652
- /**
3653
- * Generate unique ID
3654
- */
3655
- private generateId;
3656
- }
3657
- /**
3658
- * Create a webhook manager instance
3659
- */
3660
- declare function createWebhookManager(options?: {
3661
- maxHistorySize?: number;
3662
- }): WebhookManager;
3663
- /**
3664
- * Verify webhook HMAC signature
3665
- */
3666
- declare function verifyWebhookSignature(payload: string, signature: string, secret: string, algorithm?: 'sha256' | 'sha512'): boolean;
3667
-
3668
- /**
3669
- * REST API Server for OpenRedaction
3670
- * Provides HTTP/REST endpoints for PII detection and redaction
3671
- */
3672
-
3673
- /**
3674
- * API Server configuration
3675
- */
3676
- interface APIServerConfig {
3677
- /** Server port (default: 3000) */
3678
- port?: number;
3679
- /** Server host (default: '0.0.0.0') */
3680
- host?: string;
3681
- /** Enable CORS (default: true) */
3682
- enableCors?: boolean;
3683
- /** CORS origin (default: '*') */
3684
- corsOrigin?: string | string[];
3685
- /** API key for authentication (optional) */
3686
- apiKey?: string;
3687
- /** Enable rate limiting (default: true) */
3688
- enableRateLimit?: boolean;
3689
- /** Rate limit: requests per minute (default: 60) */
3690
- rateLimit?: number;
3691
- /** Request body size limit (default: '10mb') */
3692
- bodyLimit?: string;
3693
- /** Enable request logging (default: true) */
3694
- enableLogging?: boolean;
3695
- /** Tenant manager (for multi-tenant mode) */
3696
- tenantManager?: TenantManager;
3697
- /** Webhook manager */
3698
- webhookManager?: WebhookManager;
3699
- /** Persistent audit logger */
3700
- auditLogger?: PersistentAuditLogger;
3701
- /** Prometheus server */
3702
- prometheusServer?: PrometheusServer;
3703
- /** Default OpenRedaction options (for non-tenant mode) */
3704
- defaultOptions?: OpenRedactionOptions;
3705
- }
3706
- /**
3707
- * API request with authentication
3708
- */
3709
- interface APIRequest {
3710
- /** Request body */
3711
- body: any;
3712
- /** Headers */
3713
- headers: Record<string, string | string[] | undefined>;
3714
- /** Query parameters */
3715
- query: Record<string, string | string[] | undefined>;
3716
- /** Path parameters */
3717
- params: Record<string, string>;
3718
- /** Authenticated tenant ID (if multi-tenant) */
3719
- tenantId?: string;
3720
- /** Client IP address */
3721
- ip?: string;
3722
- }
3723
- /**
3724
- * API response
3725
- */
3726
- interface APIResponse {
3727
- /** Status code */
3728
- status: number;
3729
- /** Response body */
3730
- body: any;
3731
- /** Headers */
3732
- headers?: Record<string, string>;
3733
- }
3734
- /**
3735
- * REST API Server
3736
- * Lightweight HTTP server for OpenRedaction with Express-like interface
3737
- */
3738
- declare class APIServer {
3739
- private server?;
3740
- private config;
3741
- private detector?;
3742
- private isRunning;
3743
- private rateLimitTracking;
3744
- constructor(config?: APIServerConfig);
3745
- /**
3746
- * Start the API server
3747
- */
3748
- start(): Promise<void>;
3749
- /**
3750
- * Stop the server
3751
- */
3752
- stop(): Promise<void>;
3753
- /**
3754
- * Handle incoming HTTP requests
3755
- */
3756
- private handleRequest;
3757
- /**
3758
- * Parse HTTP request
3759
- */
3760
- private parseRequest;
3761
- /**
3762
- * Parse request body
3763
- */
3764
- private parseBody;
3765
- /**
3766
- * Route request to appropriate handler
3767
- */
3768
- private routeRequest;
3769
- /**
3770
- * Handle POST /api/detect
3771
- */
3772
- private handleDetect;
3773
- /**
3774
- * Handle POST /api/redact
3775
- */
3776
- private handleRedact;
3777
- /**
3778
- * Handle POST /api/restore
3779
- */
3780
- private handleRestore;
3781
- /**
3782
- * Handle GET /api/audit/logs
3783
- */
3784
- private handleAuditLogs;
3785
- /**
3786
- * Handle GET /api/audit/stats
3787
- */
3788
- private handleAuditStats;
3789
- /**
3790
- * Handle GET /api/metrics
3791
- */
3792
- private handleMetrics;
3793
- /**
3794
- * Handle GET /api/patterns
3795
- */
3796
- private handleGetPatterns;
3797
- /**
3798
- * Handle GET /api/health
3799
- */
3800
- private handleHealth;
3801
- /**
3802
- * Handle GET /api/docs
3803
- */
3804
- private handleDocs;
3805
- /**
3806
- * Handle GET /
3807
- */
3808
- private handleRoot;
3809
- /**
3810
- * Send HTTP response
3811
- */
3812
- private sendResponse;
3813
- /**
3814
- * Check rate limit
3815
- */
3816
- private checkRateLimit;
3817
- }
3818
- /**
3819
- * Create an API server instance
3820
- */
3821
- declare function createAPIServer(config?: APIServerConfig): APIServer;
3822
-
3823
- /**
3824
- * Configuration export/import utilities
3825
- * Share configurations between projects and version control
3826
- */
3827
-
3828
- interface ExportedConfig {
3829
- version: string;
3830
- timestamp: string;
3831
- options: {
3832
- includeNames?: boolean;
3833
- includeAddresses?: boolean;
3834
- includePhones?: boolean;
3835
- includeEmails?: boolean;
3836
- patterns?: string[];
3837
- categories?: string[];
3838
- whitelist?: string[];
3839
- deterministic?: boolean;
3840
- redactionMode?: string;
3841
- preset?: string;
3842
- enableContextAnalysis?: boolean;
3843
- confidenceThreshold?: number;
3844
- enableFalsePositiveFilter?: boolean;
3845
- falsePositiveThreshold?: number;
3846
- enableMultiPass?: boolean;
3847
- multiPassCount?: number;
3848
- enableCache?: boolean;
3849
- cacheSize?: number;
3850
- maxInputSize?: number;
3851
- regexTimeout?: number;
3852
- };
3853
- customPatterns?: Array<{
3854
- type: string;
3855
- regex: string;
3856
- flags: string;
3857
- priority: number;
3858
- placeholder: string;
3859
- description?: string;
3860
- severity?: string;
3861
- }>;
3862
- metadata?: {
3863
- description?: string;
3864
- author?: string;
3865
- tags?: string[];
3866
- };
3867
- }
3868
- declare class ConfigExporter {
3869
- private static readonly CONFIG_VERSION;
3870
- /**
3871
- * Export configuration to JSON
3872
- */
3873
- static exportConfig(options: OpenRedactionOptions & {
3874
- categories?: string[];
3875
- maxInputSize?: number;
3876
- regexTimeout?: number;
3877
- }, metadata?: {
3878
- description?: string;
3879
- author?: string;
3880
- tags?: string[];
3881
- }): ExportedConfig;
3882
- /**
3883
- * Import configuration from JSON
3884
- */
3885
- static importConfig(exported: ExportedConfig, _options?: {
3886
- mergeWithDefaults?: boolean;
3887
- validatePatterns?: boolean;
3888
- }): OpenRedactionOptions & {
3889
- categories?: string[];
3890
- maxInputSize?: number;
3891
- regexTimeout?: number;
3892
- };
3893
- /**
3894
- * Export configuration to JSON string
3895
- */
3896
- static exportToString(options: OpenRedactionOptions & {
3897
- categories?: string[];
3898
- maxInputSize?: number;
3899
- regexTimeout?: number;
3900
- }, metadata?: {
3901
- description?: string;
3902
- author?: string;
3903
- tags?: string[];
3904
- }, pretty?: boolean): string;
3905
- /**
3906
- * Import configuration from JSON string
3907
- */
3908
- static importFromString(json: string): OpenRedactionOptions & {
3909
- categories?: string[];
3910
- maxInputSize?: number;
3911
- regexTimeout?: number;
3912
- };
3913
- /**
3914
- * Export configuration to file (Node.js only)
3915
- */
3916
- static exportToFile(filePath: string, options: OpenRedactionOptions & {
3917
- categories?: string[];
3918
- maxInputSize?: number;
3919
- regexTimeout?: number;
3920
- }, metadata?: {
3921
- description?: string;
3922
- author?: string;
3923
- tags?: string[];
3924
- }): Promise<void>;
3925
- /**
3926
- * Import configuration from file (Node.js only)
3927
- */
3928
- static importFromFile(filePath: string): Promise<OpenRedactionOptions & {
3929
- categories?: string[];
3930
- maxInputSize?: number;
3931
- regexTimeout?: number;
3932
- }>;
3933
- /**
3934
- * Validate exported config structure
3935
- */
3936
- static validateConfig(exported: ExportedConfig): {
3937
- valid: boolean;
3938
- errors: string[];
3939
- };
3940
- /**
3941
- * Merge two configurations (useful for extending base configs)
3942
- */
3943
- static mergeConfigs(base: ExportedConfig, override: ExportedConfig): ExportedConfig;
3944
- }
3945
- /**
3946
- * Convenience functions for common use cases
3947
- */
3948
- /**
3949
- * Create a shareable config preset
3950
- */
3951
- declare function createConfigPreset(name: string, description: string, options: OpenRedactionOptions & {
3952
- categories?: string[];
3953
- maxInputSize?: number;
3954
- regexTimeout?: number;
3955
- }): string;
3956
- /**
3957
- * Quick export for version control
3958
- */
3959
- declare function exportForVersionControl(options: OpenRedactionOptions & {
3960
- categories?: string[];
3961
- maxInputSize?: number;
3962
- regexTimeout?: number;
3963
- }): string;
3964
-
3965
- /**
3966
- * Health check API for production monitoring
3967
- * Verify detector is working correctly and get system status
3968
- */
3969
-
3970
- interface HealthCheckResult {
3971
- status: 'healthy' | 'degraded' | 'unhealthy';
3972
- timestamp: string;
3973
- checks: {
3974
- detector: HealthCheckStatus;
3975
- patterns: HealthCheckStatus;
3976
- performance: HealthCheckStatus;
3977
- memory: HealthCheckStatus;
3978
- };
3979
- metrics: {
3980
- totalPatterns: number;
3981
- compiledPatterns: number;
3982
- cacheSize?: number;
3983
- cacheEnabled: boolean;
3984
- uptime: number;
3985
- };
3986
- errors: string[];
3987
- warnings: string[];
3988
- }
3989
- interface HealthCheckStatus {
3990
- status: 'pass' | 'warn' | 'fail';
3991
- message: string;
3992
- value?: any;
3993
- threshold?: any;
3994
- }
3995
- interface HealthCheckOptions {
3996
- testDetection?: boolean;
3997
- checkPerformance?: boolean;
3998
- performanceThreshold?: number;
3999
- memoryThreshold?: number;
4000
- }
4001
- declare class HealthChecker {
4002
- private detector;
4003
- private initTime;
4004
- constructor(detector: OpenRedaction);
4005
- /**
4006
- * Run complete health check
4007
- */
4008
- check(options?: HealthCheckOptions): Promise<HealthCheckResult>;
4009
- /**
4010
- * Check detector functionality
4011
- */
4012
- private checkDetector;
4013
- /**
4014
- * Check patterns are loaded
4015
- */
4016
- private checkPatterns;
4017
- /**
4018
- * Check performance
4019
- */
4020
- private checkPerformance;
4021
- /**
4022
- * Check memory usage
4023
- */
4024
- private checkMemory;
4025
- /**
4026
- * Collect metrics
4027
- */
4028
- private collectMetrics;
4029
- /**
4030
- * Determine overall status
4031
- */
4032
- private determineOverallStatus;
4033
- /**
4034
- * Quick health check (minimal overhead)
4035
- */
4036
- quickCheck(): Promise<{
4037
- status: 'healthy' | 'unhealthy';
4038
- message: string;
4039
- }>;
4040
- /**
4041
- * Get system info for debugging
4042
- */
4043
- getSystemInfo(): {
4044
- version: string;
4045
- patterns: {
4046
- total: number;
4047
- types: number;
4048
- };
4049
- cache: {
4050
- enabled: boolean;
4051
- size: number;
4052
- maxSize: number;
4053
- };
4054
- uptime: number;
4055
- timestamp: string;
4056
- };
4057
- }
4058
- /**
4059
- * Create health checker for a detector
4060
- */
4061
- declare function createHealthChecker(detector: OpenRedaction): HealthChecker;
4062
- /**
4063
- * Express middleware for health check endpoint
4064
- */
4065
- declare function healthCheckMiddleware(detector: OpenRedaction): (_req: any, res: any) => Promise<void>;
4066
-
4067
- /**
4068
- * Safe regex execution utilities with ReDoS protection
4069
- * Zero-dependency implementation using time-based checks
4070
- */
4071
- interface SafeRegexOptions {
4072
- timeout?: number;
4073
- maxMatches?: number;
4074
- }
4075
- declare class RegexTimeoutError extends Error {
4076
- constructor(pattern: string, timeout: number);
4077
- }
4078
- declare class RegexMaxMatchesError extends Error {
4079
- constructor(pattern: string, maxMatches: number);
4080
- }
4081
- /**
4082
- * Safely execute regex with timeout protection
4083
- * Uses periodic time checks to prevent catastrophic backtracking
4084
- *
4085
- * Note: Does NOT reset lastIndex - caller is responsible for managing state
4086
- */
4087
- declare function safeExec(regex: RegExp, text: string, options?: SafeRegexOptions): RegExpExecArray | null;
4088
- /**
4089
- * Safely execute regex.exec() in a loop with timeout and match limit protection
4090
- * Returns all matches or throws on timeout/limit exceeded
4091
- */
4092
- declare function safeExecAll(regex: RegExp, text: string, options?: SafeRegexOptions): RegExpExecArray[];
4093
- /**
4094
- * Test if a regex pattern is potentially unsafe (basic static analysis)
4095
- * Detects common ReDoS patterns
4096
- *
4097
- * Note: This is a very basic heuristic check. The real protection comes from
4098
- * the execution timeout in safeExec(). This just catches obvious mistakes.
4099
- */
4100
- declare function isUnsafePattern(pattern: string): boolean;
4101
- /**
4102
- * Validate a regex pattern before use
4103
- * Throws error if pattern is potentially unsafe
4104
- */
4105
- declare function validatePattern(pattern: string | RegExp): void;
4106
- /**
4107
- * Compile a regex with validation and return a safe wrapper
4108
- */
4109
- declare function compileSafeRegex(pattern: string | RegExp, flags?: string): RegExp;
4110
-
4111
- export { ADMIN_ROLE, ALL_PERMISSIONS, ANALYST_ROLE, type APIRequest, type APIResponse, APIServer, type APIServerConfig, type AuditBackend, type AuditDatabaseConfig, type AuditLogEntry, type AuditQueryFilter, type AuditStats, type BatchOptions, BatchProcessor, type BatchResult, type CellMatch$1 as CellMatch, type ChunkResult, type ColumnStats$1 as ColumnStats, ConfigExporter, ConfigLoader, ConsoleAuditLogger, type ContextAnalysis, type ContextFeatures, type ContextRulesConfig, ContextRulesEngine, type CsvDetectionResult, CsvProcessor, type CsvProcessorOptions, DEFAULT_DOMAIN_VOCABULARIES, DEFAULT_PROXIMITY_RULES, DEFAULT_SEVERITY_MAP, DEFAULT_TIER_QUOTAS, type DetectTask, type DetectionPass, type DetectionResult, type DocumentFormat, type DocumentMetadata, type DocumentOptions, DocumentProcessor, type DocumentResult, type DocumentTask, type DomainVocabulary, type ErrorSuggestion, ExplainAPI, type ExportedConfig, type FalsePositiveRule, GRAFANA_DASHBOARD_TEMPLATE, type HashedAuditLogEntry, type HealthCheckOptions, type HealthCheckResult, type HealthCheckStatus, HealthChecker, type HybridMatch, type IAuditDatabaseAdapter, type IAuditLogger, type IDocumentProcessor, type IMetricsCollector, type IMetricsExporter, type IOCRProcessor, type IRBACManager, type ImageFormat, InMemoryAuditLogger, InMemoryMetricsCollector, type JsonDetectionResult, JsonProcessor, type JsonProcessorOptions, type LearningData, type LearningStats, LocalLearningStore, type MultiPassStats, NERDetector, type NEREntityType, type NERMatch, type OCRLanguage, type OCROptions, OCRProcessor, type OCRResult, OPERATOR_ROLE, OpenRedaction, type OpenRedactionConfig, OpenRedactionError, type OpenRedactionMiddlewareOptions, type OpenRedactionOptions, type OpenRedactionRequest, type OptimizerOptions, type PIIDetection, type PIIMatch, type PIIPattern, type PatternAdjustment, type PatternMatchResult, type PatternStats, type Permission, PersistentAuditLogger, type PersistentAuditLoggerOptions, PriorityOptimizer, PrometheusServer, type PrometheusServerOptions, type ProximityRule, RBACManager, type RedactionMetrics, type RedactionMode, RegexMaxMatchesError, RegexTimeoutError, type ReportFormat, ReportGenerator, type ReportOptions, type ReportType, type RetentionPolicy, type RiskScore, type Role, type RoleName, SEVERITY_SCORES, type SafeRegexOptions, type SeverityClassification, SeverityClassifier, type SeverityLevel, type SheetDetectionResult, StreamingDetector, type StreamingOptions, type TenantConfig, TenantManager, TenantNotFoundError, TenantQuotaExceededError, type TenantQuotas, TenantSuspendedError, type TenantUsage, type TextExplanation, VIEWER_ROLE, type Validator, type WebhookConfig, type WebhookDelivery, type WebhookDeliveryStatus, type WebhookEvent, type WebhookEventType, WebhookManager, type WebhookStats, type WhitelistEntry, WorkerPool, type WorkerPoolConfig, type WorkerPoolStats, type WorkerResult, type WorkerTask, type XlsxDetectionResult, XlsxProcessor, type XlsxProcessorOptions, allPatterns, analyzeContextFeatures, analyzeFullContext, calculateContextConfidence, calculateRisk, ccpaPreset, commonFalsePositives, compileSafeRegex, contactPatterns, createAPIServer, createBatchProcessor, createCacheDisabledError, createConfigLoadError, createConfigPreset, createContextRulesEngine, createCsvProcessor, createCustomRole, createDocumentProcessor, createExplainAPI, createHealthChecker, createHighMemoryError, createInvalidPatternError, createJsonProcessor, createLearningDisabledError, createMultiPassDisabledError, createNERDetector, createOCRProcessor, createOptimizationDisabledError, createPersistentAuditLogger, createPriorityOptimizer, createPrometheusServer, createRBACManager, createReportGenerator, createSeverityClassifier, createSimpleMultiPass, createStreamingDetector, createTenantManager, createValidationError, createWebhookManager, createWorkerPool, createXlsxProcessor, defaultPasses, detectPII, exportForVersionControl, extractContext, filterFalsePositives, financialPatterns, gdprPreset, generateReport, getPatternsByCategory, getPredefinedRole, getPreset, getSeverity, governmentPatterns, groupPatternsByPass, healthCheckMiddleware, hipaaPreset, inferDocumentType, isFalsePositive, isUnsafePattern, mergePassDetections, networkPatterns, openredactionMiddleware, personalPatterns, safeExec, safeExecAll, useAutoRedact, useBatchDetector, useFormFieldValidator, useOpenRedaction, usePIIDetector, validateEmail, validateIBAN, validateLuhn, validateNHS, validateNINO, validateName, validatePattern, validateSSN, validateSortCode, validateUKPassport, verifyWebhookSignature };