openredaction 1.0.0 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,430 +0,0 @@
1
- #!/usr/bin/env node
2
- #!/usr/bin/env node
3
- "use strict";
4
-
5
- // src/utils/safe-regex.ts
6
- function isUnsafePattern(pattern) {
7
- if (/\*\+|\+\*|\+\+|\*\*/.test(pattern)) {
8
- return true;
9
- }
10
- if (/\(a\+\)\+|\(b\*\)\*|\(c\+\)\+/.test(pattern)) {
11
- return true;
12
- }
13
- return false;
14
- }
15
- function validatePattern(pattern) {
16
- const patternStr = typeof pattern === "string" ? pattern : pattern.source;
17
- if (patternStr.length > 5e3) {
18
- throw new Error(`Regex pattern too long: ${patternStr.length} chars (max 5000)`);
19
- }
20
- if (isUnsafePattern(patternStr)) {
21
- throw new Error(`Potentially unsafe regex pattern detected: ${patternStr.substring(0, 100)}...`);
22
- }
23
- try {
24
- new RegExp(patternStr);
25
- } catch (error) {
26
- throw new Error(`Invalid regex pattern: ${error.message}`);
27
- }
28
- }
29
-
30
- // src/cli/test-pattern.ts
31
- var args = process.argv.slice(2);
32
- function printHelp() {
33
- console.log(`
34
- OpenRedaction Pattern Testing Tool
35
-
36
- Test custom patterns before deployment to prevent ReDoS vulnerabilities and validate functionality.
37
-
38
- Usage:
39
- openredaction-test-pattern validate <pattern> Validate pattern safety
40
- openredaction-test-pattern test <pattern> <text> Test pattern against sample text
41
- openredaction-test-pattern check <pattern> [flags] Check pattern with optional flags
42
- openredaction-test-pattern benchmark <pattern> <text> Benchmark pattern performance
43
- openredaction-test-pattern --help Show this help message
44
-
45
- Commands:
46
- validate <pattern>
47
- Checks if a regex pattern is safe (no ReDoS vulnerabilities)
48
- Returns: SAFE or UNSAFE with explanation
49
-
50
- test <pattern> <text>
51
- Tests a pattern against sample text and shows all matches
52
- Returns: List of matches with positions
53
-
54
- check <pattern> [flags]
55
- Validates pattern syntax and compiles with optional flags
56
- Returns: Pattern info and any warnings
57
-
58
- benchmark <pattern> <text>
59
- Measures pattern execution time and match count
60
- Returns: Performance metrics
61
-
62
- Options:
63
- --flags <flags> Regex flags (g, i, m, etc.)
64
- --timeout <ms> Regex timeout in milliseconds (default: 100)
65
- --json Output results as JSON
66
- --verbose Show detailed output
67
-
68
- Examples:
69
- # Validate a pattern for ReDoS
70
- openredaction-test-pattern validate "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
71
-
72
- # Test pattern against sample text
73
- openredaction-test-pattern test "\\b\\d{3}-\\d{2}-\\d{4}\\b" "SSN: 123-45-6789"
74
-
75
- # Check pattern with flags
76
- openredaction-test-pattern check "[a-z]+" --flags gi
77
-
78
- # Benchmark pattern performance
79
- openredaction-test-pattern benchmark "\\b[A-Z][a-z]+ [A-Z][a-z]+\\b" "John Smith and Jane Doe"
80
-
81
- # Test a custom pattern as JSON
82
- openredaction-test-pattern test "\\b\\d{16}\\b" "Card: 4111111111111111" --json
83
-
84
- Safety Checks:
85
- \u2713 Nested quantifiers (e.g., (a+)+)
86
- \u2713 Overlapping alternation (e.g., (a|ab)+)
87
- \u2713 Consecutive quantifiers (e.g., a*+)
88
- \u2713 Dangerous backreferences (e.g., \\1+)
89
- \u2713 Excessive pattern length (>5000 chars)
90
- \u2713 Pattern compilation errors
91
- `);
92
- }
93
- function parseOptions(args2) {
94
- const options = {};
95
- for (let i = 0; i < args2.length; i++) {
96
- if (args2[i] === "--flags" && args2[i + 1]) {
97
- options.flags = args2[i + 1];
98
- i++;
99
- } else if (args2[i] === "--timeout" && args2[i + 1]) {
100
- options.timeout = parseInt(args2[i + 1], 10);
101
- i++;
102
- } else if (args2[i] === "--json") {
103
- options.json = true;
104
- } else if (args2[i] === "--verbose") {
105
- options.verbose = true;
106
- }
107
- }
108
- return options;
109
- }
110
- function validatePatternCommand(pattern, options) {
111
- const result = {
112
- pattern,
113
- safe: true,
114
- warnings: [],
115
- errors: []
116
- };
117
- try {
118
- if (isUnsafePattern(pattern)) {
119
- result.safe = false;
120
- result.errors.push("Pattern contains potentially unsafe constructs (ReDoS risk)");
121
- if (/(\([^)]*[*+{][^)]*\)[*+{])/.test(pattern)) {
122
- result.warnings.push("Nested quantifiers detected: (a+)+ or (a*)*");
123
- }
124
- if (/\([^)]*\|[^)]*\)[*+{]/.test(pattern)) {
125
- result.warnings.push("Overlapping alternation with quantifier: (a|ab)+");
126
- }
127
- if (/[*+?{][*+?{]/.test(pattern)) {
128
- result.warnings.push("Consecutive quantifiers: a*+ or a+*");
129
- }
130
- if (/\\\d[*+{]/.test(pattern)) {
131
- result.warnings.push("Backreference with quantifier: \\1+");
132
- }
133
- }
134
- if (pattern.length > 5e3) {
135
- result.safe = false;
136
- result.errors.push(`Pattern too long: ${pattern.length} chars (max 5000)`);
137
- }
138
- validatePattern(pattern);
139
- if (result.safe) {
140
- result.message = "\u2713 Pattern is SAFE";
141
- }
142
- } catch (error) {
143
- result.safe = false;
144
- result.errors.push(error.message);
145
- }
146
- if (options.json) {
147
- console.log(JSON.stringify(result, null, 2));
148
- } else {
149
- console.log("\nPattern Validation Result:");
150
- console.log("\u2500".repeat(50));
151
- console.log(`Pattern: ${pattern}`);
152
- console.log(`Status: ${result.safe ? "\u2713 SAFE" : "\u2717 UNSAFE"}`);
153
- if (result.warnings.length > 0) {
154
- console.log("\nWarnings:");
155
- result.warnings.forEach((w) => console.log(` \u26A0 ${w}`));
156
- }
157
- if (result.errors.length > 0) {
158
- console.log("\nErrors:");
159
- result.errors.forEach((e) => console.log(` \u2717 ${e}`));
160
- }
161
- if (result.safe) {
162
- console.log("\n\u2713 Pattern is safe to use");
163
- } else {
164
- console.log("\n\u2717 Pattern is NOT safe - please revise before use");
165
- }
166
- }
167
- process.exit(result.safe ? 0 : 1);
168
- }
169
- function testPatternCommand(pattern, text, options) {
170
- const result = {
171
- pattern,
172
- text,
173
- matches: [],
174
- matchCount: 0
175
- };
176
- try {
177
- validatePattern(pattern);
178
- const flags = options.flags || "g";
179
- const regex = new RegExp(pattern, flags);
180
- let match;
181
- while ((match = regex.exec(text)) !== null) {
182
- result.matches.push({
183
- value: match[0],
184
- captureGroups: match.slice(1),
185
- index: match.index,
186
- length: match[0].length
187
- });
188
- result.matchCount++;
189
- if (result.matchCount >= 1e3) {
190
- result.warning = "Stopped after 1000 matches";
191
- break;
192
- }
193
- if (match.index === regex.lastIndex) {
194
- regex.lastIndex++;
195
- }
196
- }
197
- result.success = true;
198
- } catch (error) {
199
- result.success = false;
200
- result.error = error.message;
201
- }
202
- if (options.json) {
203
- console.log(JSON.stringify(result, null, 2));
204
- } else {
205
- console.log("\nPattern Test Result:");
206
- console.log("\u2500".repeat(50));
207
- console.log(`Pattern: ${pattern}`);
208
- console.log(`Flags: ${options.flags || "g"}`);
209
- console.log(`Text: ${text}`);
210
- console.log(`Matches: ${result.matchCount}`);
211
- if (result.matchCount > 0) {
212
- console.log("\nMatches Found:");
213
- result.matches.forEach((m, i) => {
214
- console.log(` ${i + 1}. "${m.value}" at position ${m.index}`);
215
- if (m.captureGroups.length > 0 && m.captureGroups.some((g) => g)) {
216
- console.log(` Capture groups: [${m.captureGroups.join(", ")}]`);
217
- }
218
- });
219
- } else {
220
- console.log("\n\u26A0 No matches found");
221
- }
222
- if (result.warning) {
223
- console.log(`
224
- \u26A0 ${result.warning}`);
225
- }
226
- if (result.error) {
227
- console.log(`
228
- \u2717 Error: ${result.error}`);
229
- }
230
- }
231
- process.exit(result.success ? 0 : 1);
232
- }
233
- function checkPatternCommand(pattern, options) {
234
- const result = {
235
- pattern,
236
- valid: false,
237
- info: {},
238
- warnings: []
239
- };
240
- try {
241
- validatePattern(pattern);
242
- const flags = options.flags || "";
243
- const regex = new RegExp(pattern, flags);
244
- result.valid = true;
245
- result.info = {
246
- source: regex.source,
247
- flags: regex.flags,
248
- length: pattern.length,
249
- hasGroups: /\([^)]*\)/.test(pattern),
250
- hasQuantifiers: /[*+?{]/.test(pattern),
251
- hasAnchors: /[\^$]/.test(pattern),
252
- hasLookahead: /\(\?[=!]/.test(pattern),
253
- hasLookbehind: /\(\?<[=!]/.test(pattern)
254
- };
255
- if (isUnsafePattern(pattern)) {
256
- result.warnings.push("Pattern may be vulnerable to ReDoS attacks");
257
- }
258
- if (pattern.length > 1e3) {
259
- result.warnings.push("Pattern is very long, may impact performance");
260
- }
261
- if (!flags.includes("g") && /[*+{]/.test(pattern)) {
262
- result.warnings.push("Pattern has quantifiers but no global flag - will only match once");
263
- }
264
- } catch (error) {
265
- result.valid = false;
266
- result.error = error.message;
267
- }
268
- if (options.json) {
269
- console.log(JSON.stringify(result, null, 2));
270
- } else {
271
- console.log("\nPattern Check Result:");
272
- console.log("\u2500".repeat(50));
273
- console.log(`Pattern: ${pattern}`);
274
- console.log(`Flags: ${options.flags || "(none)"}`);
275
- console.log(`Valid: ${result.valid ? "\u2713 Yes" : "\u2717 No"}`);
276
- if (result.valid) {
277
- console.log("\nPattern Info:");
278
- console.log(` Length: ${result.info.length} characters`);
279
- console.log(` Has capture groups: ${result.info.hasGroups ? "Yes" : "No"}`);
280
- console.log(` Has quantifiers: ${result.info.hasQuantifiers ? "Yes" : "No"}`);
281
- console.log(` Has anchors (^/$): ${result.info.hasAnchors ? "Yes" : "No"}`);
282
- console.log(` Has lookahead: ${result.info.hasLookahead ? "Yes" : "No"}`);
283
- console.log(` Has lookbehind: ${result.info.hasLookbehind ? "Yes" : "No"}`);
284
- }
285
- if (result.warnings.length > 0) {
286
- console.log("\nWarnings:");
287
- result.warnings.forEach((w) => console.log(` \u26A0 ${w}`));
288
- }
289
- if (result.error) {
290
- console.log(`
291
- \u2717 Error: ${result.error}`);
292
- }
293
- }
294
- process.exit(result.valid ? 0 : 1);
295
- }
296
- function benchmarkPatternCommand(pattern, text, options) {
297
- const result = {
298
- pattern,
299
- text,
300
- textLength: text.length,
301
- metrics: {}
302
- };
303
- try {
304
- validatePattern(pattern);
305
- const flags = options.flags || "g";
306
- const regex = new RegExp(pattern, flags);
307
- const startTime = performance.now();
308
- let matchCount = 0;
309
- let match;
310
- while ((match = regex.exec(text)) !== null) {
311
- matchCount++;
312
- if (matchCount >= 1e4) {
313
- result.warning = "Stopped after 10000 matches";
314
- break;
315
- }
316
- if (match.index === regex.lastIndex) {
317
- regex.lastIndex++;
318
- }
319
- }
320
- const endTime = performance.now();
321
- const executionTime = endTime - startTime;
322
- result.metrics = {
323
- executionTime: `${executionTime.toFixed(3)}ms`,
324
- matchCount,
325
- matchesPerMs: matchCount > 0 ? (matchCount / executionTime).toFixed(2) : "0",
326
- charsPerMs: (text.length / executionTime).toFixed(0)
327
- };
328
- result.success = true;
329
- } catch (error) {
330
- result.success = false;
331
- result.error = error.message;
332
- }
333
- if (options.json) {
334
- console.log(JSON.stringify(result, null, 2));
335
- } else {
336
- console.log("\nPattern Benchmark Result:");
337
- console.log("\u2500".repeat(50));
338
- console.log(`Pattern: ${pattern}`);
339
- console.log(`Text length: ${result.textLength} characters`);
340
- console.log("\nPerformance Metrics:");
341
- console.log(` Execution time: ${result.metrics.executionTime}`);
342
- console.log(` Matches found: ${result.metrics.matchCount}`);
343
- console.log(` Throughput: ${result.metrics.charsPerMs} chars/ms`);
344
- if (result.warning) {
345
- console.log(`
346
- \u26A0 ${result.warning}`);
347
- }
348
- if (result.error) {
349
- console.log(`
350
- \u2717 Error: ${result.error}`);
351
- }
352
- const execTime = parseFloat(result.metrics.executionTime);
353
- console.log("\nPerformance Assessment:");
354
- if (execTime < 1) {
355
- console.log(" \u2713 Excellent - Very fast execution");
356
- } else if (execTime < 10) {
357
- console.log(" \u2713 Good - Acceptable performance");
358
- } else if (execTime < 50) {
359
- console.log(" \u26A0 Fair - May be slow on large texts");
360
- } else {
361
- console.log(" \u2717 Poor - Pattern needs optimization");
362
- }
363
- }
364
- process.exit(result.success ? 0 : 1);
365
- }
366
- async function main() {
367
- if (args.length === 0 || args.includes("--help") || args.includes("-h")) {
368
- printHelp();
369
- process.exit(0);
370
- }
371
- const command = args[0];
372
- const options = parseOptions(args);
373
- try {
374
- switch (command) {
375
- case "validate": {
376
- const pattern = args[1];
377
- if (!pattern) {
378
- console.error("Error: Pattern is required");
379
- console.log("Usage: openredaction-test-pattern validate <pattern>");
380
- process.exit(1);
381
- }
382
- validatePatternCommand(pattern, options);
383
- break;
384
- }
385
- case "test": {
386
- const pattern = args[1];
387
- const text = args[2];
388
- if (!pattern || !text) {
389
- console.error("Error: Pattern and text are required");
390
- console.log("Usage: openredaction-test-pattern test <pattern> <text>");
391
- process.exit(1);
392
- }
393
- testPatternCommand(pattern, text, options);
394
- break;
395
- }
396
- case "check": {
397
- const pattern = args[1];
398
- if (!pattern) {
399
- console.error("Error: Pattern is required");
400
- console.log("Usage: openredaction-test-pattern check <pattern> [--flags <flags>]");
401
- process.exit(1);
402
- }
403
- checkPatternCommand(pattern, options);
404
- break;
405
- }
406
- case "benchmark": {
407
- const pattern = args[1];
408
- const text = args[2];
409
- if (!pattern || !text) {
410
- console.error("Error: Pattern and text are required");
411
- console.log("Usage: openredaction-test-pattern benchmark <pattern> <text>");
412
- process.exit(1);
413
- }
414
- benchmarkPatternCommand(pattern, text, options);
415
- break;
416
- }
417
- default:
418
- console.error(`Unknown command: ${command}`);
419
- console.log("Run with --help for usage information");
420
- process.exit(1);
421
- }
422
- } catch (error) {
423
- console.error("Error:", error.message);
424
- process.exit(1);
425
- }
426
- }
427
- main().catch((error) => {
428
- console.error("Fatal error:", error);
429
- process.exit(1);
430
- });
@@ -1,26 +0,0 @@
1
- import {
2
- CsvProcessor,
3
- DocumentProcessor,
4
- JsonProcessor,
5
- OCRProcessor,
6
- XlsxProcessor,
7
- createCsvProcessor,
8
- createDocumentProcessor,
9
- createJsonProcessor,
10
- createOCRProcessor,
11
- createXlsxProcessor
12
- } from "./chunk-7OGNW2MU.mjs";
13
- import "./chunk-WMJKH4XE.mjs";
14
- export {
15
- CsvProcessor,
16
- DocumentProcessor,
17
- JsonProcessor,
18
- OCRProcessor,
19
- XlsxProcessor,
20
- createCsvProcessor,
21
- createDocumentProcessor,
22
- createJsonProcessor,
23
- createOCRProcessor,
24
- createXlsxProcessor
25
- };
26
- //# sourceMappingURL=document-AOMZP7UR.mjs.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}