od-temp 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,130 @@
1
+ # OpenRedaction
2
+
3
+ Production-ready PII detection and redaction library with 571+ built-in patterns, multiple redaction modes, compliance presets, enterprise SaaS features, and zero dependencies.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install openredaction
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```typescript
14
+ import { OpenRedaction } from 'openredaction';
15
+
16
+ const shield = new OpenRedaction();
17
+ const result = shield.detect("Email john@example.com or call 07700900123");
18
+
19
+ console.log(result.redacted);
20
+ // "Email [EMAIL_9619] or call [PHONE_UK_MOBILE_9478]"
21
+ ```
22
+
23
+ ## Optional AI Assist
24
+
25
+ OpenRedaction supports an optional AI-assisted detection mode that enhances regex-based detection by calling a hosted AI endpoint. This feature is **OFF by default** and requires explicit configuration.
26
+
27
+ ### Configuration
28
+
29
+ ```typescript
30
+ import { OpenRedaction } from 'openredaction';
31
+
32
+ const detector = new OpenRedaction({
33
+ // ... other options ...
34
+ ai: {
35
+ enabled: true,
36
+ endpoint: 'https://your-api.example.com' // Optional: defaults to OPENREDACTION_AI_ENDPOINT env var
37
+ }
38
+ });
39
+
40
+ // detect() is now async when AI is enabled
41
+ const result = await detector.detect('Contact John Doe at john@example.com');
42
+ ```
43
+
44
+ ### How It Works
45
+
46
+ 1. **Regex Detection First**: The library always runs regex detection first (existing behavior)
47
+ 2. **AI Enhancement**: If `ai.enabled === true` and an endpoint is configured, the library calls the `/ai-detect` endpoint
48
+ 3. **Smart Merging**: AI entities are merged with regex detections, with regex taking precedence on conflicts
49
+ 4. **Graceful Fallback**: If the AI endpoint fails or is unavailable, the library silently falls back to regex-only detection
50
+
51
+ ### Environment Variables
52
+
53
+ In Node.js environments, you can set the endpoint via environment variable:
54
+
55
+ ```bash
56
+ export OPENREDACTION_AI_ENDPOINT=https://your-api.example.com
57
+ ```
58
+
59
+ ### Important Notes
60
+
61
+ - **AI is optional**: The library works exactly as before when `ai.enabled` is `false` or omitted
62
+ - **Regex is primary**: AI only adds additional entities; regex detections always take precedence
63
+ - **No breaking changes**: When AI is disabled, behavior is identical to previous versions
64
+ - **Browser support**: In browsers, you must provide an explicit `ai.endpoint` (env vars not available)
65
+ - **Network dependency**: AI mode requires network access to the endpoint
66
+
67
+ ### For Sensitive Workloads
68
+
69
+ For maximum security and privacy, keep AI disabled and rely purely on regex detection:
70
+
71
+ ```typescript
72
+ const detector = new OpenRedaction({
73
+ // AI not configured = pure regex detection
74
+ includeNames: true,
75
+ includeEmails: true
76
+ });
77
+ ```
78
+
79
+ ## Documentation
80
+
81
+ Full documentation available at [GitHub](https://github.com/sam247/openredaction)
82
+
83
+ ## Features
84
+
85
+ - ๐Ÿš€ **Fast & Accurate** - 10-20ms for 2-3KB text
86
+ - ๐ŸŽฏ **571+ PII Patterns** - Comprehensive coverage across multiple categories
87
+ - ๐Ÿ” **Enterprise SaaS Ready** - Multi-tenancy, persistent audit logging, webhooks, REST API
88
+ - ๐Ÿ“Š **Production Monitoring** - Prometheus metrics, Grafana dashboards, health checks
89
+ - ๐Ÿง  **Semantic Detection** - Hybrid NER + regex with 40+ contextual rules
90
+ - ๐ŸŽจ **Multiple Redaction Modes** - Placeholder, mask-middle, mask-all, format-preserving, token-replace
91
+ - โœ… **Built-in Validators** - Luhn, IBAN, NHS, National ID checksums
92
+ - ๐Ÿ”’ **Compliance Presets** - GDPR, HIPAA, CCPA plus finance, education, healthcare, and transport presets
93
+ - ๐ŸŽญ **Deterministic Placeholders** - Consistent redaction for same values
94
+ - ๐ŸŒ **Global Coverage** - 50+ countries
95
+ - ๐Ÿ“„ **Structured Data Support** - JSON, CSV, XLSX with path/cell tracking
96
+ - ๐ŸŒณ **Zero Dependencies** - No external packages required (core)
97
+ - ๐Ÿ“ **TypeScript Native** - Full type safety and IntelliSense
98
+ - ๐Ÿงช **Battle Tested** - 276+ passing tests
99
+
100
+ ## Pattern Categories
101
+
102
+ ### Personal Information
103
+ Email, Phone Numbers (US, UK, International), Names, Social Security Numbers, Passports, Driver's Licenses
104
+
105
+ ### Financial (13 patterns)
106
+ Credit Cards, IBANs, Bank Accounts, Swift Codes, Routing Numbers, IFSC, CLABE, BSB, ISIN, CUSIP, SEDOL, LEI, Cryptocurrencies
107
+
108
+ ### Government IDs (50+ countries)
109
+ SSN, NINO, NHS, Passports, Tax IDs, UTR, VAT, Company Numbers, ITIN, SIN, and more
110
+
111
+ ### Healthcare
112
+ Medical Record Numbers, NHS Numbers, CHI, EHIC, Health Insurance, Prescription Numbers, DEA Numbers, Biometric Data
113
+
114
+ ### Digital Identity
115
+ API Keys, OAuth Tokens, JWT, Bearer Tokens, Discord, Steam, Social Media IDs
116
+
117
+ ### Industries (25+)
118
+ Retail, Legal, Real Estate, Logistics, Insurance, Healthcare, Emergency Response, Hospitality, Professional Certifications, and more
119
+
120
+ ## Enterprise Features
121
+
122
+ - **Persistent Audit Logging** - SQLite/PostgreSQL with cryptographic hashing
123
+ - **Multi-Tenancy** - Tenant isolation, quotas, usage tracking
124
+ - **Prometheus Metrics** - HTTP server with Grafana dashboards
125
+ - **Webhook System** - Event-driven alerts with retry logic
126
+ - **REST API** - Production-ready HTTP API with authentication
127
+
128
+ ## License
129
+
130
+ MIT
@@ -0,0 +1,378 @@
1
+ #!/usr/bin/env node
2
+ #!/usr/bin/env node
3
+
4
+ //#region src/utils/safe-regex.ts
5
+ /**
6
+ * Test if a regex pattern is potentially unsafe (basic static analysis)
7
+ * Detects common ReDoS patterns
8
+ *
9
+ * Note: This is a very basic heuristic check. The real protection comes from
10
+ * the execution timeout in safeExec(). This just catches obvious mistakes.
11
+ */
12
+ function isUnsafePattern(pattern) {
13
+ if (/\*\+|\+\*|\+\+|\*\*/.test(pattern)) return true;
14
+ if (/\(a\+\)\+|\(b\*\)\*|\(c\+\)\+/.test(pattern)) return true;
15
+ return false;
16
+ }
17
+ /**
18
+ * Validate a regex pattern before use
19
+ * Throws error if pattern is potentially unsafe
20
+ */
21
+ function validatePattern(pattern) {
22
+ const patternStr = typeof pattern === "string" ? pattern : pattern.source;
23
+ if (patternStr.length > 5e3) throw new Error(`Regex pattern too long: ${patternStr.length} chars (max 5000)`);
24
+ if (isUnsafePattern(patternStr)) throw new Error(`Potentially unsafe regex pattern detected: ${patternStr.substring(0, 100)}...`);
25
+ try {
26
+ new RegExp(patternStr);
27
+ } catch (error) {
28
+ throw new Error(`Invalid regex pattern: ${error.message}`);
29
+ }
30
+ }
31
+
32
+ //#endregion
33
+ //#region src/cli/test-pattern.ts
34
+ const args = process.argv.slice(2);
35
+ function printHelp() {
36
+ console.log(`
37
+ OpenRedaction Pattern Testing Tool
38
+
39
+ Test custom patterns before deployment to prevent ReDoS vulnerabilities and validate functionality.
40
+
41
+ Usage:
42
+ openredaction-test-pattern validate <pattern> Validate pattern safety
43
+ openredaction-test-pattern test <pattern> <text> Test pattern against sample text
44
+ openredaction-test-pattern check <pattern> [flags] Check pattern with optional flags
45
+ openredaction-test-pattern benchmark <pattern> <text> Benchmark pattern performance
46
+ openredaction-test-pattern --help Show this help message
47
+
48
+ Commands:
49
+ validate <pattern>
50
+ Checks if a regex pattern is safe (no ReDoS vulnerabilities)
51
+ Returns: SAFE or UNSAFE with explanation
52
+
53
+ test <pattern> <text>
54
+ Tests a pattern against sample text and shows all matches
55
+ Returns: List of matches with positions
56
+
57
+ check <pattern> [flags]
58
+ Validates pattern syntax and compiles with optional flags
59
+ Returns: Pattern info and any warnings
60
+
61
+ benchmark <pattern> <text>
62
+ Measures pattern execution time and match count
63
+ Returns: Performance metrics
64
+
65
+ Options:
66
+ --flags <flags> Regex flags (g, i, m, etc.)
67
+ --timeout <ms> Regex timeout in milliseconds (default: 100)
68
+ --json Output results as JSON
69
+ --verbose Show detailed output
70
+
71
+ Examples:
72
+ # Validate a pattern for ReDoS
73
+ openredaction-test-pattern validate "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
74
+
75
+ # Test pattern against sample text
76
+ openredaction-test-pattern test "\\b\\d{3}-\\d{2}-\\d{4}\\b" "SSN: 123-45-6789"
77
+
78
+ # Check pattern with flags
79
+ openredaction-test-pattern check "[a-z]+" --flags gi
80
+
81
+ # Benchmark pattern performance
82
+ openredaction-test-pattern benchmark "\\b[A-Z][a-z]+ [A-Z][a-z]+\\b" "John Smith and Jane Doe"
83
+
84
+ # Test a custom pattern as JSON
85
+ openredaction-test-pattern test "\\b\\d{16}\\b" "Card: 4111111111111111" --json
86
+
87
+ Safety Checks:
88
+ โœ“ Nested quantifiers (e.g., (a+)+)
89
+ โœ“ Overlapping alternation (e.g., (a|ab)+)
90
+ โœ“ Consecutive quantifiers (e.g., a*+)
91
+ โœ“ Dangerous backreferences (e.g., \\1+)
92
+ โœ“ Excessive pattern length (>5000 chars)
93
+ โœ“ Pattern compilation errors
94
+ `);
95
+ }
96
+ function parseOptions(args) {
97
+ const options = {};
98
+ for (let i = 0; i < args.length; i++) if (args[i] === "--flags" && args[i + 1]) {
99
+ options.flags = args[i + 1];
100
+ i++;
101
+ } else if (args[i] === "--timeout" && args[i + 1]) {
102
+ options.timeout = parseInt(args[i + 1], 10);
103
+ i++;
104
+ } else if (args[i] === "--json") options.json = true;
105
+ else if (args[i] === "--verbose") options.verbose = true;
106
+ return options;
107
+ }
108
+ function validatePatternCommand(pattern, options) {
109
+ const result = {
110
+ pattern,
111
+ safe: true,
112
+ warnings: [],
113
+ errors: []
114
+ };
115
+ try {
116
+ if (isUnsafePattern(pattern)) {
117
+ result.safe = false;
118
+ result.errors.push("Pattern contains potentially unsafe constructs (ReDoS risk)");
119
+ if (/(\([^)]*[*+{][^)]*\)[*+{])/.test(pattern)) result.warnings.push("Nested quantifiers detected: (a+)+ or (a*)*");
120
+ if (/\([^)]*\|[^)]*\)[*+{]/.test(pattern)) result.warnings.push("Overlapping alternation with quantifier: (a|ab)+");
121
+ if (/[*+?{][*+?{]/.test(pattern)) result.warnings.push("Consecutive quantifiers: a*+ or a+*");
122
+ if (/\\\d[*+{]/.test(pattern)) result.warnings.push("Backreference with quantifier: \\1+");
123
+ }
124
+ if (pattern.length > 5e3) {
125
+ result.safe = false;
126
+ result.errors.push(`Pattern too long: ${pattern.length} chars (max 5000)`);
127
+ }
128
+ validatePattern(pattern);
129
+ if (result.safe) result.message = "โœ“ Pattern is SAFE";
130
+ } catch (error) {
131
+ result.safe = false;
132
+ result.errors.push(error.message);
133
+ }
134
+ if (options.json) console.log(JSON.stringify(result, null, 2));
135
+ else {
136
+ console.log("\nPattern Validation Result:");
137
+ console.log("โ”€".repeat(50));
138
+ console.log(`Pattern: ${pattern}`);
139
+ console.log(`Status: ${result.safe ? "โœ“ SAFE" : "โœ— UNSAFE"}`);
140
+ if (result.warnings.length > 0) {
141
+ console.log("\nWarnings:");
142
+ result.warnings.forEach((w) => console.log(` โš  ${w}`));
143
+ }
144
+ if (result.errors.length > 0) {
145
+ console.log("\nErrors:");
146
+ result.errors.forEach((e) => console.log(` โœ— ${e}`));
147
+ }
148
+ if (result.safe) console.log("\nโœ“ Pattern is safe to use");
149
+ else console.log("\nโœ— Pattern is NOT safe - please revise before use");
150
+ }
151
+ process.exit(result.safe ? 0 : 1);
152
+ }
153
+ function testPatternCommand(pattern, text, options) {
154
+ const result = {
155
+ pattern,
156
+ text,
157
+ matches: [],
158
+ matchCount: 0
159
+ };
160
+ try {
161
+ validatePattern(pattern);
162
+ const flags = options.flags || "g";
163
+ const regex = new RegExp(pattern, flags);
164
+ let match;
165
+ while ((match = regex.exec(text)) !== null) {
166
+ result.matches.push({
167
+ value: match[0],
168
+ captureGroups: match.slice(1),
169
+ index: match.index,
170
+ length: match[0].length
171
+ });
172
+ result.matchCount++;
173
+ if (result.matchCount >= 1e3) {
174
+ result.warning = "Stopped after 1000 matches";
175
+ break;
176
+ }
177
+ if (match.index === regex.lastIndex) regex.lastIndex++;
178
+ }
179
+ result.success = true;
180
+ } catch (error) {
181
+ result.success = false;
182
+ result.error = error.message;
183
+ }
184
+ if (options.json) console.log(JSON.stringify(result, null, 2));
185
+ else {
186
+ console.log("\nPattern Test Result:");
187
+ console.log("โ”€".repeat(50));
188
+ console.log(`Pattern: ${pattern}`);
189
+ console.log(`Flags: ${options.flags || "g"}`);
190
+ console.log(`Text: ${text}`);
191
+ console.log(`Matches: ${result.matchCount}`);
192
+ if (result.matchCount > 0) {
193
+ console.log("\nMatches Found:");
194
+ result.matches.forEach((m, i) => {
195
+ console.log(` ${i + 1}. "${m.value}" at position ${m.index}`);
196
+ if (m.captureGroups.length > 0 && m.captureGroups.some((g) => g)) console.log(` Capture groups: [${m.captureGroups.join(", ")}]`);
197
+ });
198
+ } else console.log("\nโš  No matches found");
199
+ if (result.warning) console.log(`\nโš  ${result.warning}`);
200
+ if (result.error) console.log(`\nโœ— Error: ${result.error}`);
201
+ }
202
+ process.exit(result.success ? 0 : 1);
203
+ }
204
+ function checkPatternCommand(pattern, options) {
205
+ const result = {
206
+ pattern,
207
+ valid: false,
208
+ info: {},
209
+ warnings: []
210
+ };
211
+ try {
212
+ validatePattern(pattern);
213
+ const flags = options.flags || "";
214
+ const regex = new RegExp(pattern, flags);
215
+ result.valid = true;
216
+ result.info = {
217
+ source: regex.source,
218
+ flags: regex.flags,
219
+ length: pattern.length,
220
+ hasGroups: /\([^)]*\)/.test(pattern),
221
+ hasQuantifiers: /[*+?{]/.test(pattern),
222
+ hasAnchors: /[\^$]/.test(pattern),
223
+ hasLookahead: /\(\?[=!]/.test(pattern),
224
+ hasLookbehind: /\(\?<[=!]/.test(pattern)
225
+ };
226
+ if (isUnsafePattern(pattern)) result.warnings.push("Pattern may be vulnerable to ReDoS attacks");
227
+ if (pattern.length > 1e3) result.warnings.push("Pattern is very long, may impact performance");
228
+ if (!flags.includes("g") && /[*+{]/.test(pattern)) result.warnings.push("Pattern has quantifiers but no global flag - will only match once");
229
+ } catch (error) {
230
+ result.valid = false;
231
+ result.error = error.message;
232
+ }
233
+ if (options.json) console.log(JSON.stringify(result, null, 2));
234
+ else {
235
+ console.log("\nPattern Check Result:");
236
+ console.log("โ”€".repeat(50));
237
+ console.log(`Pattern: ${pattern}`);
238
+ console.log(`Flags: ${options.flags || "(none)"}`);
239
+ console.log(`Valid: ${result.valid ? "โœ“ Yes" : "โœ— No"}`);
240
+ if (result.valid) {
241
+ console.log("\nPattern Info:");
242
+ console.log(` Length: ${result.info.length} characters`);
243
+ console.log(` Has capture groups: ${result.info.hasGroups ? "Yes" : "No"}`);
244
+ console.log(` Has quantifiers: ${result.info.hasQuantifiers ? "Yes" : "No"}`);
245
+ console.log(` Has anchors (^/$): ${result.info.hasAnchors ? "Yes" : "No"}`);
246
+ console.log(` Has lookahead: ${result.info.hasLookahead ? "Yes" : "No"}`);
247
+ console.log(` Has lookbehind: ${result.info.hasLookbehind ? "Yes" : "No"}`);
248
+ }
249
+ if (result.warnings.length > 0) {
250
+ console.log("\nWarnings:");
251
+ result.warnings.forEach((w) => console.log(` โš  ${w}`));
252
+ }
253
+ if (result.error) console.log(`\nโœ— Error: ${result.error}`);
254
+ }
255
+ process.exit(result.valid ? 0 : 1);
256
+ }
257
+ function benchmarkPatternCommand(pattern, text, options) {
258
+ const result = {
259
+ pattern,
260
+ text,
261
+ textLength: text.length,
262
+ metrics: {}
263
+ };
264
+ try {
265
+ validatePattern(pattern);
266
+ const flags = options.flags || "g";
267
+ const regex = new RegExp(pattern, flags);
268
+ const startTime = performance.now();
269
+ let matchCount = 0;
270
+ let match;
271
+ while ((match = regex.exec(text)) !== null) {
272
+ matchCount++;
273
+ if (matchCount >= 1e4) {
274
+ result.warning = "Stopped after 10000 matches";
275
+ break;
276
+ }
277
+ if (match.index === regex.lastIndex) regex.lastIndex++;
278
+ }
279
+ const executionTime = performance.now() - startTime;
280
+ result.metrics = {
281
+ executionTime: `${executionTime.toFixed(3)}ms`,
282
+ matchCount,
283
+ matchesPerMs: matchCount > 0 ? (matchCount / executionTime).toFixed(2) : "0",
284
+ charsPerMs: (text.length / executionTime).toFixed(0)
285
+ };
286
+ result.success = true;
287
+ } catch (error) {
288
+ result.success = false;
289
+ result.error = error.message;
290
+ }
291
+ if (options.json) console.log(JSON.stringify(result, null, 2));
292
+ else {
293
+ console.log("\nPattern Benchmark Result:");
294
+ console.log("โ”€".repeat(50));
295
+ console.log(`Pattern: ${pattern}`);
296
+ console.log(`Text length: ${result.textLength} characters`);
297
+ console.log("\nPerformance Metrics:");
298
+ console.log(` Execution time: ${result.metrics.executionTime}`);
299
+ console.log(` Matches found: ${result.metrics.matchCount}`);
300
+ console.log(` Throughput: ${result.metrics.charsPerMs} chars/ms`);
301
+ if (result.warning) console.log(`\nโš  ${result.warning}`);
302
+ if (result.error) console.log(`\nโœ— Error: ${result.error}`);
303
+ const execTime = parseFloat(result.metrics.executionTime);
304
+ console.log("\nPerformance Assessment:");
305
+ if (execTime < 1) console.log(" โœ“ Excellent - Very fast execution");
306
+ else if (execTime < 10) console.log(" โœ“ Good - Acceptable performance");
307
+ else if (execTime < 50) console.log(" โš  Fair - May be slow on large texts");
308
+ else console.log(" โœ— Poor - Pattern needs optimization");
309
+ }
310
+ process.exit(result.success ? 0 : 1);
311
+ }
312
+ async function main() {
313
+ if (args.length === 0 || args.includes("--help") || args.includes("-h")) {
314
+ printHelp();
315
+ process.exit(0);
316
+ }
317
+ const command = args[0];
318
+ const options = parseOptions(args);
319
+ try {
320
+ switch (command) {
321
+ case "validate": {
322
+ const pattern = args[1];
323
+ if (!pattern) {
324
+ console.error("Error: Pattern is required");
325
+ console.log("Usage: openredaction-test-pattern validate <pattern>");
326
+ process.exit(1);
327
+ }
328
+ validatePatternCommand(pattern, options);
329
+ break;
330
+ }
331
+ case "test": {
332
+ const pattern = args[1];
333
+ const text = args[2];
334
+ if (!pattern || !text) {
335
+ console.error("Error: Pattern and text are required");
336
+ console.log("Usage: openredaction-test-pattern test <pattern> <text>");
337
+ process.exit(1);
338
+ }
339
+ testPatternCommand(pattern, text, options);
340
+ break;
341
+ }
342
+ case "check": {
343
+ const pattern = args[1];
344
+ if (!pattern) {
345
+ console.error("Error: Pattern is required");
346
+ console.log("Usage: openredaction-test-pattern check <pattern> [--flags <flags>]");
347
+ process.exit(1);
348
+ }
349
+ checkPatternCommand(pattern, options);
350
+ break;
351
+ }
352
+ case "benchmark": {
353
+ const pattern = args[1];
354
+ const text = args[2];
355
+ if (!pattern || !text) {
356
+ console.error("Error: Pattern and text are required");
357
+ console.log("Usage: openredaction-test-pattern benchmark <pattern> <text>");
358
+ process.exit(1);
359
+ }
360
+ benchmarkPatternCommand(pattern, text, options);
361
+ break;
362
+ }
363
+ default:
364
+ console.error(`Unknown command: ${command}`);
365
+ console.log("Run with --help for usage information");
366
+ process.exit(1);
367
+ }
368
+ } catch (error) {
369
+ console.error("Error:", error.message);
370
+ process.exit(1);
371
+ }
372
+ }
373
+ main().catch((error) => {
374
+ console.error("Fatal error:", error);
375
+ process.exit(1);
376
+ });
377
+
378
+ //#endregion