openredaction 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,75 +10,36 @@ npm install openredaction
10
10
 
11
11
  ## Quick Start
12
12
 
13
+ `detect()` is **async** — use `await` (inside an `async` function or with top-level `await` in ESM).
14
+
13
15
  ```typescript
14
16
  import { OpenRedaction } from 'openredaction';
15
17
 
16
18
  const shield = new OpenRedaction();
17
- const result = shield.detect("Email john@example.com or call 07700900123");
19
+ const result = await shield.detect("Email john@example.com or call 07700900123");
18
20
 
19
21
  console.log(result.redacted);
20
22
  // "Email [EMAIL_9619] or call [PHONE_UK_MOBILE_9478]"
21
23
  ```
22
24
 
23
- ## Optional AI Assist
24
-
25
- OpenRedaction supports an optional AI-assisted detection mode that enhances regex-based detection by calling a hosted AI endpoint. This feature is **OFF by default** and requires explicit configuration.
26
-
27
- ### Configuration
28
-
29
- ```typescript
30
- import { OpenRedaction } from 'openredaction';
31
-
32
- const detector = new OpenRedaction({
33
- // ... other options ...
34
- ai: {
35
- enabled: true,
36
- endpoint: 'https://your-api.example.com' // Optional: defaults to OPENREDACTION_AI_ENDPOINT env var
37
- }
38
- });
39
-
40
- // detect() is now async when AI is enabled
41
- const result = await detector.detect('Contact John Doe at john@example.com');
42
- ```
43
-
44
- ### How It Works
45
-
46
- 1. **Regex Detection First**: The library always runs regex detection first (existing behavior)
47
- 2. **AI Enhancement**: If `ai.enabled === true` and an endpoint is configured, the library calls the `/ai-detect` endpoint
48
- 3. **Smart Merging**: AI entities are merged with regex detections, with regex taking precedence on conflicts
49
- 4. **Graceful Fallback**: If the AI endpoint fails or is unavailable, the library silently falls back to regex-only detection
50
-
51
- ### Environment Variables
25
+ ## React (optional)
52
26
 
53
- In Node.js environments, you can set the endpoint via environment variable:
27
+ React hooks are on a separate entry so the main package stays React-free. If you use React:
54
28
 
55
29
  ```bash
56
- export OPENREDACTION_AI_ENDPOINT=https://your-api.example.com
30
+ npm install openredaction react
57
31
  ```
58
32
 
59
- ### Important Notes
60
-
61
- - **AI is optional**: The library works exactly as before when `ai.enabled` is `false` or omitted
62
- - **Regex is primary**: AI only adds additional entities; regex detections always take precedence
63
- - **No breaking changes**: When AI is disabled, behavior is identical to previous versions
64
- - **Browser support**: In browsers, you must provide an explicit `ai.endpoint` (env vars not available)
65
- - **Network dependency**: AI mode requires network access to the endpoint
66
-
67
- ### For Sensitive Workloads
68
-
69
- For maximum security and privacy, keep AI disabled and rely purely on regex detection:
70
-
71
- ```typescript
72
- const detector = new OpenRedaction({
73
- // AI not configured = pure regex detection
74
- includeNames: true,
75
- includeEmails: true
76
- });
33
+ ```tsx
34
+ import { useOpenRedaction, usePIIDetector } from 'openredaction/react';
77
35
  ```
78
36
 
37
+ `react` is an optional peer dependency; only install it if you use the React entry.
38
+
79
39
  ## Documentation
80
40
 
81
- Full documentation available at [GitHub](https://github.com/sam247/openredaction)
41
+ - Site & playground: [openredaction.com](https://openredaction.com)
42
+ - Source & issues: [GitHub](https://github.com/sam247/openredaction)
82
43
 
83
44
  ## Features
84
45
 
@@ -0,0 +1,378 @@
1
+ #!/usr/bin/env node
2
+ #!/usr/bin/env node
3
+
4
+ //#region src/utils/safe-regex.ts
5
+ /**
6
+ * Test if a regex pattern is potentially unsafe (basic static analysis)
7
+ * Detects common ReDoS patterns
8
+ *
9
+ * Note: This is a very basic heuristic check. The real protection comes from
10
+ * the execution timeout in safeExec(). This just catches obvious mistakes.
11
+ */
12
+ function isUnsafePattern(pattern) {
13
+ if (/\*\+|\+\*|\+\+|\*\*/.test(pattern)) return true;
14
+ if (/\(a\+\)\+|\(b\*\)\*|\(c\+\)\+/.test(pattern)) return true;
15
+ return false;
16
+ }
17
+ /**
18
+ * Validate a regex pattern before use
19
+ * Throws error if pattern is potentially unsafe
20
+ */
21
+ function validatePattern(pattern) {
22
+ const patternStr = typeof pattern === "string" ? pattern : pattern.source;
23
+ if (patternStr.length > 5e3) throw new Error(`Regex pattern too long: ${patternStr.length} chars (max 5000)`);
24
+ if (isUnsafePattern(patternStr)) throw new Error(`Potentially unsafe regex pattern detected: ${patternStr.substring(0, 100)}...`);
25
+ try {
26
+ new RegExp(patternStr);
27
+ } catch (error) {
28
+ throw new Error(`Invalid regex pattern: ${error.message}`);
29
+ }
30
+ }
31
+
32
+ //#endregion
33
+ //#region src/cli/test-pattern.ts
34
+ const args = process.argv.slice(2);
35
+ function printHelp() {
36
+ console.log(`
37
+ OpenRedaction Pattern Testing Tool
38
+
39
+ Test custom patterns before deployment to prevent ReDoS vulnerabilities and validate functionality.
40
+
41
+ Usage:
42
+ openredaction-test-pattern validate <pattern> Validate pattern safety
43
+ openredaction-test-pattern test <pattern> <text> Test pattern against sample text
44
+ openredaction-test-pattern check <pattern> [flags] Check pattern with optional flags
45
+ openredaction-test-pattern benchmark <pattern> <text> Benchmark pattern performance
46
+ openredaction-test-pattern --help Show this help message
47
+
48
+ Commands:
49
+ validate <pattern>
50
+ Checks if a regex pattern is safe (no ReDoS vulnerabilities)
51
+ Returns: SAFE or UNSAFE with explanation
52
+
53
+ test <pattern> <text>
54
+ Tests a pattern against sample text and shows all matches
55
+ Returns: List of matches with positions
56
+
57
+ check <pattern> [flags]
58
+ Validates pattern syntax and compiles with optional flags
59
+ Returns: Pattern info and any warnings
60
+
61
+ benchmark <pattern> <text>
62
+ Measures pattern execution time and match count
63
+ Returns: Performance metrics
64
+
65
+ Options:
66
+ --flags <flags> Regex flags (g, i, m, etc.)
67
+ --timeout <ms> Regex timeout in milliseconds (default: 100)
68
+ --json Output results as JSON
69
+ --verbose Show detailed output
70
+
71
+ Examples:
72
+ # Validate a pattern for ReDoS
73
+ openredaction-test-pattern validate "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
74
+
75
+ # Test pattern against sample text
76
+ openredaction-test-pattern test "\\b\\d{3}-\\d{2}-\\d{4}\\b" "SSN: 123-45-6789"
77
+
78
+ # Check pattern with flags
79
+ openredaction-test-pattern check "[a-z]+" --flags gi
80
+
81
+ # Benchmark pattern performance
82
+ openredaction-test-pattern benchmark "\\b[A-Z][a-z]+ [A-Z][a-z]+\\b" "John Smith and Jane Doe"
83
+
84
+ # Test a custom pattern as JSON
85
+ openredaction-test-pattern test "\\b\\d{16}\\b" "Card: 4111111111111111" --json
86
+
87
+ Safety Checks:
88
+ ✓ Nested quantifiers (e.g., (a+)+)
89
+ ✓ Overlapping alternation (e.g., (a|ab)+)
90
+ ✓ Consecutive quantifiers (e.g., a*+)
91
+ ✓ Dangerous backreferences (e.g., \\1+)
92
+ ✓ Excessive pattern length (>5000 chars)
93
+ ✓ Pattern compilation errors
94
+ `);
95
+ }
96
+ function parseOptions(args) {
97
+ const options = {};
98
+ for (let i = 0; i < args.length; i++) if (args[i] === "--flags" && args[i + 1]) {
99
+ options.flags = args[i + 1];
100
+ i++;
101
+ } else if (args[i] === "--timeout" && args[i + 1]) {
102
+ options.timeout = parseInt(args[i + 1], 10);
103
+ i++;
104
+ } else if (args[i] === "--json") options.json = true;
105
+ else if (args[i] === "--verbose") options.verbose = true;
106
+ return options;
107
+ }
108
+ function validatePatternCommand(pattern, options) {
109
+ const result = {
110
+ pattern,
111
+ safe: true,
112
+ warnings: [],
113
+ errors: []
114
+ };
115
+ try {
116
+ if (isUnsafePattern(pattern)) {
117
+ result.safe = false;
118
+ result.errors.push("Pattern contains potentially unsafe constructs (ReDoS risk)");
119
+ if (/(\([^)]*[*+{][^)]*\)[*+{])/.test(pattern)) result.warnings.push("Nested quantifiers detected: (a+)+ or (a*)*");
120
+ if (/\([^)]*\|[^)]*\)[*+{]/.test(pattern)) result.warnings.push("Overlapping alternation with quantifier: (a|ab)+");
121
+ if (/[*+?{][*+?{]/.test(pattern)) result.warnings.push("Consecutive quantifiers: a*+ or a+*");
122
+ if (/\\\d[*+{]/.test(pattern)) result.warnings.push("Backreference with quantifier: \\1+");
123
+ }
124
+ if (pattern.length > 5e3) {
125
+ result.safe = false;
126
+ result.errors.push(`Pattern too long: ${pattern.length} chars (max 5000)`);
127
+ }
128
+ validatePattern(pattern);
129
+ if (result.safe) result.message = "✓ Pattern is SAFE";
130
+ } catch (error) {
131
+ result.safe = false;
132
+ result.errors.push(error.message);
133
+ }
134
+ if (options.json) console.log(JSON.stringify(result, null, 2));
135
+ else {
136
+ console.log("\nPattern Validation Result:");
137
+ console.log("─".repeat(50));
138
+ console.log(`Pattern: ${pattern}`);
139
+ console.log(`Status: ${result.safe ? "✓ SAFE" : "✗ UNSAFE"}`);
140
+ if (result.warnings.length > 0) {
141
+ console.log("\nWarnings:");
142
+ result.warnings.forEach((w) => console.log(` ⚠ ${w}`));
143
+ }
144
+ if (result.errors.length > 0) {
145
+ console.log("\nErrors:");
146
+ result.errors.forEach((e) => console.log(` ✗ ${e}`));
147
+ }
148
+ if (result.safe) console.log("\n✓ Pattern is safe to use");
149
+ else console.log("\n✗ Pattern is NOT safe - please revise before use");
150
+ }
151
+ process.exit(result.safe ? 0 : 1);
152
+ }
153
+ function testPatternCommand(pattern, text, options) {
154
+ const result = {
155
+ pattern,
156
+ text,
157
+ matches: [],
158
+ matchCount: 0
159
+ };
160
+ try {
161
+ validatePattern(pattern);
162
+ const flags = options.flags || "g";
163
+ const regex = new RegExp(pattern, flags);
164
+ let match;
165
+ while ((match = regex.exec(text)) !== null) {
166
+ result.matches.push({
167
+ value: match[0],
168
+ captureGroups: match.slice(1),
169
+ index: match.index,
170
+ length: match[0].length
171
+ });
172
+ result.matchCount++;
173
+ if (result.matchCount >= 1e3) {
174
+ result.warning = "Stopped after 1000 matches";
175
+ break;
176
+ }
177
+ if (match.index === regex.lastIndex) regex.lastIndex++;
178
+ }
179
+ result.success = true;
180
+ } catch (error) {
181
+ result.success = false;
182
+ result.error = error.message;
183
+ }
184
+ if (options.json) console.log(JSON.stringify(result, null, 2));
185
+ else {
186
+ console.log("\nPattern Test Result:");
187
+ console.log("─".repeat(50));
188
+ console.log(`Pattern: ${pattern}`);
189
+ console.log(`Flags: ${options.flags || "g"}`);
190
+ console.log(`Text: ${text}`);
191
+ console.log(`Matches: ${result.matchCount}`);
192
+ if (result.matchCount > 0) {
193
+ console.log("\nMatches Found:");
194
+ result.matches.forEach((m, i) => {
195
+ console.log(` ${i + 1}. "${m.value}" at position ${m.index}`);
196
+ if (m.captureGroups.length > 0 && m.captureGroups.some((g) => g)) console.log(` Capture groups: [${m.captureGroups.join(", ")}]`);
197
+ });
198
+ } else console.log("\n⚠ No matches found");
199
+ if (result.warning) console.log(`\n⚠ ${result.warning}`);
200
+ if (result.error) console.log(`\n✗ Error: ${result.error}`);
201
+ }
202
+ process.exit(result.success ? 0 : 1);
203
+ }
204
+ function checkPatternCommand(pattern, options) {
205
+ const result = {
206
+ pattern,
207
+ valid: false,
208
+ info: {},
209
+ warnings: []
210
+ };
211
+ try {
212
+ validatePattern(pattern);
213
+ const flags = options.flags || "";
214
+ const regex = new RegExp(pattern, flags);
215
+ result.valid = true;
216
+ result.info = {
217
+ source: regex.source,
218
+ flags: regex.flags,
219
+ length: pattern.length,
220
+ hasGroups: /\([^)]*\)/.test(pattern),
221
+ hasQuantifiers: /[*+?{]/.test(pattern),
222
+ hasAnchors: /[\^$]/.test(pattern),
223
+ hasLookahead: /\(\?[=!]/.test(pattern),
224
+ hasLookbehind: /\(\?<[=!]/.test(pattern)
225
+ };
226
+ if (isUnsafePattern(pattern)) result.warnings.push("Pattern may be vulnerable to ReDoS attacks");
227
+ if (pattern.length > 1e3) result.warnings.push("Pattern is very long, may impact performance");
228
+ if (!flags.includes("g") && /[*+{]/.test(pattern)) result.warnings.push("Pattern has quantifiers but no global flag - will only match once");
229
+ } catch (error) {
230
+ result.valid = false;
231
+ result.error = error.message;
232
+ }
233
+ if (options.json) console.log(JSON.stringify(result, null, 2));
234
+ else {
235
+ console.log("\nPattern Check Result:");
236
+ console.log("─".repeat(50));
237
+ console.log(`Pattern: ${pattern}`);
238
+ console.log(`Flags: ${options.flags || "(none)"}`);
239
+ console.log(`Valid: ${result.valid ? "✓ Yes" : "✗ No"}`);
240
+ if (result.valid) {
241
+ console.log("\nPattern Info:");
242
+ console.log(` Length: ${result.info.length} characters`);
243
+ console.log(` Has capture groups: ${result.info.hasGroups ? "Yes" : "No"}`);
244
+ console.log(` Has quantifiers: ${result.info.hasQuantifiers ? "Yes" : "No"}`);
245
+ console.log(` Has anchors (^/$): ${result.info.hasAnchors ? "Yes" : "No"}`);
246
+ console.log(` Has lookahead: ${result.info.hasLookahead ? "Yes" : "No"}`);
247
+ console.log(` Has lookbehind: ${result.info.hasLookbehind ? "Yes" : "No"}`);
248
+ }
249
+ if (result.warnings.length > 0) {
250
+ console.log("\nWarnings:");
251
+ result.warnings.forEach((w) => console.log(` ⚠ ${w}`));
252
+ }
253
+ if (result.error) console.log(`\n✗ Error: ${result.error}`);
254
+ }
255
+ process.exit(result.valid ? 0 : 1);
256
+ }
257
+ function benchmarkPatternCommand(pattern, text, options) {
258
+ const result = {
259
+ pattern,
260
+ text,
261
+ textLength: text.length,
262
+ metrics: {}
263
+ };
264
+ try {
265
+ validatePattern(pattern);
266
+ const flags = options.flags || "g";
267
+ const regex = new RegExp(pattern, flags);
268
+ const startTime = performance.now();
269
+ let matchCount = 0;
270
+ let match;
271
+ while ((match = regex.exec(text)) !== null) {
272
+ matchCount++;
273
+ if (matchCount >= 1e4) {
274
+ result.warning = "Stopped after 10000 matches";
275
+ break;
276
+ }
277
+ if (match.index === regex.lastIndex) regex.lastIndex++;
278
+ }
279
+ const executionTime = performance.now() - startTime;
280
+ result.metrics = {
281
+ executionTime: `${executionTime.toFixed(3)}ms`,
282
+ matchCount,
283
+ matchesPerMs: matchCount > 0 ? (matchCount / executionTime).toFixed(2) : "0",
284
+ charsPerMs: (text.length / executionTime).toFixed(0)
285
+ };
286
+ result.success = true;
287
+ } catch (error) {
288
+ result.success = false;
289
+ result.error = error.message;
290
+ }
291
+ if (options.json) console.log(JSON.stringify(result, null, 2));
292
+ else {
293
+ console.log("\nPattern Benchmark Result:");
294
+ console.log("─".repeat(50));
295
+ console.log(`Pattern: ${pattern}`);
296
+ console.log(`Text length: ${result.textLength} characters`);
297
+ console.log("\nPerformance Metrics:");
298
+ console.log(` Execution time: ${result.metrics.executionTime}`);
299
+ console.log(` Matches found: ${result.metrics.matchCount}`);
300
+ console.log(` Throughput: ${result.metrics.charsPerMs} chars/ms`);
301
+ if (result.warning) console.log(`\n⚠ ${result.warning}`);
302
+ if (result.error) console.log(`\n✗ Error: ${result.error}`);
303
+ const execTime = parseFloat(result.metrics.executionTime);
304
+ console.log("\nPerformance Assessment:");
305
+ if (execTime < 1) console.log(" ✓ Excellent - Very fast execution");
306
+ else if (execTime < 10) console.log(" ✓ Good - Acceptable performance");
307
+ else if (execTime < 50) console.log(" ⚠ Fair - May be slow on large texts");
308
+ else console.log(" ✗ Poor - Pattern needs optimization");
309
+ }
310
+ process.exit(result.success ? 0 : 1);
311
+ }
312
+ async function main() {
313
+ if (args.length === 0 || args.includes("--help") || args.includes("-h")) {
314
+ printHelp();
315
+ process.exit(0);
316
+ }
317
+ const command = args[0];
318
+ const options = parseOptions(args);
319
+ try {
320
+ switch (command) {
321
+ case "validate": {
322
+ const pattern = args[1];
323
+ if (!pattern) {
324
+ console.error("Error: Pattern is required");
325
+ console.log("Usage: openredaction-test-pattern validate <pattern>");
326
+ process.exit(1);
327
+ }
328
+ validatePatternCommand(pattern, options);
329
+ break;
330
+ }
331
+ case "test": {
332
+ const pattern = args[1];
333
+ const text = args[2];
334
+ if (!pattern || !text) {
335
+ console.error("Error: Pattern and text are required");
336
+ console.log("Usage: openredaction-test-pattern test <pattern> <text>");
337
+ process.exit(1);
338
+ }
339
+ testPatternCommand(pattern, text, options);
340
+ break;
341
+ }
342
+ case "check": {
343
+ const pattern = args[1];
344
+ if (!pattern) {
345
+ console.error("Error: Pattern is required");
346
+ console.log("Usage: openredaction-test-pattern check <pattern> [--flags <flags>]");
347
+ process.exit(1);
348
+ }
349
+ checkPatternCommand(pattern, options);
350
+ break;
351
+ }
352
+ case "benchmark": {
353
+ const pattern = args[1];
354
+ const text = args[2];
355
+ if (!pattern || !text) {
356
+ console.error("Error: Pattern and text are required");
357
+ console.log("Usage: openredaction-test-pattern benchmark <pattern> <text>");
358
+ process.exit(1);
359
+ }
360
+ benchmarkPatternCommand(pattern, text, options);
361
+ break;
362
+ }
363
+ default:
364
+ console.error(`Unknown command: ${command}`);
365
+ console.log("Run with --help for usage information");
366
+ process.exit(1);
367
+ }
368
+ } catch (error) {
369
+ console.error("Error:", error.message);
370
+ process.exit(1);
371
+ }
372
+ }
373
+ main().catch((error) => {
374
+ console.error("Fatal error:", error);
375
+ process.exit(1);
376
+ });
377
+
378
+ //#endregion