ferret-scan 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,7 +30,7 @@ export const injectionRules = [
30
30
  severity: 'HIGH',
31
31
  description: 'Detects attempts to switch Claude into different operational modes',
32
32
  patterns: [
33
- /you\s+are\s+now\s+in\s+[^\n]{0,100}(mode|state)/gi,
33
+ /you\s+are\s+now\s+in\s+\S+\s+(mode|state)/gi,
34
34
  /enter\s+(developer|admin|debug|unrestricted|jailbreak)\s+mode/gi,
35
35
  /switch\s+to\s+(developer|admin|debug|unrestricted)\s+mode/gi,
36
36
  /activate\s+(developer|admin|debug|god)\s+mode/gi,
@@ -59,6 +59,31 @@ export const injectionRules = [
59
59
  remediation: 'Remove jailbreak attempts. These patterns attempt to bypass safety measures.',
60
60
  references: [],
61
61
  enabled: true,
62
+ // Suppress findings when the matched term appears in security-discussion context:
63
+ // documentation explaining what these attacks are, scanner output examples,
64
+ // or skill files that detect/block these patterns rather than deploy them.
65
+ excludePatterns: [
66
+ // Line explicitly discusses detection/blocking of the pattern
67
+ /\b(detect|catch|flag|block|prevent|scan\s+for|identify|reject|report)\b[^\n]{0,80}(jailbreak|DAN|bypass)/gi,
68
+ /\b(jailbreak|DAN|bypass)\b[^\n]{0,80}\b(detect|catch|flag|block|prevent|found|identified)/gi,
69
+ // Term appears inside a quoted string (example output / documentation)
70
+ /["'][^"'\n]{0,120}\b(jailbreak|DAN)\b[^"'\n]{0,120}["']/gi,
71
+ // Markdown rule-ID reference on the same line (scanner output example)
72
+ /\[(?:INJ|AI|SEC|CRED)-\d+\]/gi,
73
+ // Line is a markdown example label
74
+ /^\s*\*\*(?:Input|Output|Example|Finding|Result)\*\*\s*:/i,
75
+ ],
76
+ excludeContext: [
77
+ // Surrounding text discusses security scanning, rules, or findings
78
+ /\b(security\s+(rule|finding|scan|check|gate|scanner|score)|ferret.?scan|scan\s+result)/gi,
79
+ // Surrounding text is clearly educational / explanatory
80
+ /\b(example\s+of|this\s+detects|used\s+to\s+(bypass|attack)|common\s+(attack|technique)|known\s+(jailbreak|attack))/gi,
81
+ // Context indicates the skill is a security tool or scanner itself
82
+ /\b(security\s+scanner|vulnerability\s+scanner|threat\s+detect|scan\s+for\s+(injection|jailbreak))/gi,
83
+ // Markdown example blocks
84
+ /^\s*##\s+Example/im,
85
+ /publication\s+blocked/gi,
86
+ ],
62
87
  },
63
88
  {
64
89
  id: 'INJ-004',
@@ -85,10 +110,10 @@ export const injectionRules = [
85
110
  severity: 'MEDIUM',
86
111
  description: 'Detects suspicious role-play instructions that may be injection attempts',
87
112
  patterns: [
88
- /roleplay\s+as\s+.*(evil|malicious|hacker|attacker)/gi,
89
- /pretend\s+(to\s+be|you\s+are)\s+.*(evil|malicious|hacker)/gi,
90
- /act\s+as\s+.*(evil|malicious|hacker|attacker)/gi,
91
- /you\s+are\s+(an?\s+)?(evil|malicious|rogue|compromised)/gi,
113
+ /roleplay\s+as\s+[^\n]{0,100}(evil|malicious|hacker|attacker)/gi,
114
+ /pretend\s+(to\s+be|you\s+are)\s+[^\n]{0,100}(evil|malicious|hacker)/gi,
115
+ /act\s+as\s+[^\n]{0,100}(evil|malicious|hacker|attacker)/gi,
116
+ /you\s+are\s+(an?\s+)?[^\n]{0,50}(evil|malicious|rogue|compromised)/gi,
92
117
  ],
93
118
  fileTypes: ['md', 'json'],
94
119
  components: ['skill', 'agent', 'ai-config-md'],
@@ -103,9 +128,9 @@ export const injectionRules = [
103
128
  severity: 'HIGH',
104
129
  description: 'Detects hidden instructions using HTML comments or special formatting',
105
130
  patterns: [
106
- /<!--.*?(ignore|override|disregard|bypass).*?-->/gis,
107
- /\[hidden\].*?(ignore|override|disregard)/gi,
108
- /\[SYSTEM\].*?instruction/gi,
131
+ /<!--[\s\S]{0,500}?(ignore|override|disregard|bypass)[\s\S]{0,500}?-->/gi,
132
+ /\[hidden\][^\n]{0,100}(ignore|override|disregard)/gi,
133
+ /\[SYSTEM\][^\n]{0,100}instruction/gi,
109
134
  ],
110
135
  fileTypes: ['md'],
111
136
  components: ['skill', 'agent', 'ai-config-md'],
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Shared regex building blocks for security detection rules
3
+ *
4
+ * Centralises frequently reused keyword sets and pattern factories so rule
5
+ * files stay readable and changes propagate consistently across all rules.
6
+ */
7
+ /** Credential-related keyword alternation used across detection rules */
8
+ export declare const CREDENTIAL_KEYWORDS = "api[_-]?key|token|secret|password|credential";
9
+ /** High-entropy suffix matching strings ≥20 alphanumeric chars */
10
+ export declare const HIGH_ENTROPY_SUFFIX = "[a-zA-Z0-9]{20,}";
11
+ /**
12
+ * Build a credential-harvest detection pattern for a given verb.
13
+ *
14
+ * Matches: `<verb> [up to 100 chars] (credential keyword)`
15
+ * Avoids catastrophic backtracking via bounded non-newline character class.
16
+ *
17
+ * @param verb A plain literal verb string — e.g. "send", "transmit", "upload".
18
+ * Must NOT contain regex metacharacters. The following characters are rejected
19
+ * at runtime: `* + { | \ $ ^ ( )`
20
+ * Callers should pass a hard-coded string, never user-supplied input.
21
+ */
22
+ export declare function buildHarvestPattern(verb: string): RegExp;
23
+ /**
24
+ * Build an assignment detection pattern for a given credential keyword.
25
+ *
26
+ * Matches: `api_key = "abc123..."` or `secret-key: 'xyz...'`
27
+ *
28
+ * @param keyword A plain literal credential keyword — e.g. "api_key", "secret-token".
29
+ * Must NOT contain regex metacharacters. The following characters are rejected
30
+ * at runtime: `* + { | \ $ ^ ( )`
31
+ * Callers should pass a hard-coded string, never user-supplied input.
32
+ */
33
+ export declare function buildCredentialAssignPattern(keyword: string): RegExp;
34
+ //# sourceMappingURL=common.d.ts.map
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Shared regex building blocks for security detection rules
3
+ *
4
+ * Centralises frequently reused keyword sets and pattern factories so rule
5
+ * files stay readable and changes propagate consistently across all rules.
6
+ */
7
+ // ─── Keyword sets ─────────────────────────────────────────────────────────────
8
+ /** Credential-related keyword alternation used across detection rules */
9
+ export const CREDENTIAL_KEYWORDS = 'api[_-]?key|token|secret|password|credential';
10
+ /** High-entropy suffix matching strings ≥20 alphanumeric chars */
11
+ export const HIGH_ENTROPY_SUFFIX = '[a-zA-Z0-9]{20,}';
12
+ // ─── Pattern factories ────────────────────────────────────────────────────────
13
+ /**
14
+ * Build a credential-harvest detection pattern for a given verb.
15
+ *
16
+ * Matches: `<verb> [up to 100 chars] (credential keyword)`
17
+ * Avoids catastrophic backtracking via bounded non-newline character class.
18
+ *
19
+ * @param verb A plain literal verb string — e.g. "send", "transmit", "upload".
20
+ * Must NOT contain regex metacharacters. The following characters are rejected
21
+ * at runtime: `* + { | \ $ ^ ( )`
22
+ * Callers should pass a hard-coded string, never user-supplied input.
23
+ */
24
+ export function buildHarvestPattern(verb) {
25
+ // Reject dangerous patterns that could cause ReDoS or injection
26
+ if (/\*|\+|\{|\||\\|\$|\^|\(|\)/.test(verb)) {
27
+ throw new Error(`buildHarvestPattern: verb contains dangerous regex metacharacters, got: ${verb}`);
28
+ }
29
+ return new RegExp(`${verb}\\s+\\w+(?:\\s+\\w+){0,10}\\s+(${CREDENTIAL_KEYWORDS})`, 'gi');
30
+ }
31
+ /**
32
+ * Build an assignment detection pattern for a given credential keyword.
33
+ *
34
+ * Matches: `api_key = "abc123..."` or `secret-key: 'xyz...'`
35
+ *
36
+ * @param keyword A plain literal credential keyword — e.g. "api_key", "secret-token".
37
+ * Must NOT contain regex metacharacters. The following characters are rejected
38
+ * at runtime: `* + { | \ $ ^ ( )`
39
+ * Callers should pass a hard-coded string, never user-supplied input.
40
+ */
41
+ export function buildCredentialAssignPattern(keyword) {
42
+ // Reject dangerous patterns that could cause ReDoS or injection
43
+ if (/\*|\+|\{|\||\\|\$|\^|\(|\)/.test(keyword)) {
44
+ throw new Error(`buildCredentialAssignPattern: keyword contains dangerous regex metacharacters, got: ${keyword}`);
45
+ }
46
+ return new RegExp(`${keyword}\\s*[:=]\\s*["']${HIGH_ENTROPY_SUFFIX}`, 'gi');
47
+ }
48
+ //# sourceMappingURL=common.js.map
@@ -54,13 +54,29 @@ function calculateRiskScore(severity, matchCount, fileComponent) {
54
54
  /**
55
55
  * Find all pattern matches in content using global regex search
56
56
  */
57
- function findMatches(content, patterns) {
57
+ function findMatches(content, patterns, opts = { maxMatches: 1000, maxRuntimeMs: 5000 }) {
58
+ const startTime = Date.now();
58
59
  const matches = [];
59
60
  for (const pattern of patterns) {
61
+ // Check time budget before starting each pattern
62
+ if (Date.now() - startTime > opts.maxRuntimeMs) {
63
+ logger.warn(`Regex matcher time budget exceeded (${opts.maxRuntimeMs}ms), stopping pattern processing`);
64
+ return matches;
65
+ }
60
66
  // Create a new regex with global flag
61
67
  const globalPattern = new RegExp(pattern.source, pattern.flags.includes('g') ? pattern.flags : pattern.flags + 'g');
62
68
  let match;
63
69
  while ((match = globalPattern.exec(content)) !== null) {
70
+ // Check time budget on each match
71
+ if (Date.now() - startTime > opts.maxRuntimeMs) {
72
+ logger.warn(`Regex matcher time budget exceeded (${opts.maxRuntimeMs}ms) during pattern processing`);
73
+ return matches;
74
+ }
75
+ // Check match count limit
76
+ if (matches.length >= opts.maxMatches) {
77
+ logger.warn(`Max match limit reached (${opts.maxMatches}), stopping pattern processing`);
78
+ return matches;
79
+ }
64
80
  // Guard against zero-length matches to prevent infinite loops
65
81
  if (match[0].length === 0) {
66
82
  globalPattern.lastIndex += 1;
@@ -151,7 +167,8 @@ export function matchRule(rule, file, content, options) {
151
167
  }
152
168
  const findings = [];
153
169
  const lines = splitLines(content);
154
- const matches = findMatches(content, rule.patterns);
170
+ const patternOptions = { maxMatches: 1000, maxRuntimeMs: 5000 };
171
+ const matches = findMatches(content, rule.patterns, patternOptions);
155
172
  // Group matches by line to avoid duplicates
156
173
  const matchesByLine = new Map();
157
174
  for (const match of matches) {
package/dist/types.d.ts CHANGED
@@ -304,6 +304,12 @@ export interface ScannerConfig {
304
304
  verbose: boolean;
305
305
  /** CI mode (simplified output) */
306
306
  ci: boolean;
307
+ /** Maximum wall-clock ms for semantic AST analysis of a single code block (default: 2000) */
308
+ maxSemanticAnalysisMs?: number;
309
+ /** Maximum AST node count before aborting semantic analysis of a single code block (default: 50000) */
310
+ maxAstNodes?: number;
311
+ /** Per-code-block deadline in ms within the file-scoped budget (default: 500) */
312
+ maxBlockMs?: number;
307
313
  }
308
314
  /** Supported output formats */
309
315
  export type OutputFormat = 'console' | 'json' | 'sarif' | 'html' | 'csv' | 'atlas';
@@ -14,21 +14,34 @@ export interface BaselineFinding {
14
14
  reason?: string;
15
15
  expiresDate?: string;
16
16
  }
17
+ export interface BaselineIntegrity {
18
+ algorithm: 'sha256';
19
+ hash: string;
20
+ }
17
21
  export interface Baseline {
18
22
  version: string;
19
23
  createdDate: string;
20
24
  lastUpdated: string;
21
25
  description?: string;
22
26
  findings: BaselineFinding[];
27
+ integrity?: BaselineIntegrity;
23
28
  }
29
+ /**
30
+ * Compute integrity hash of a baseline (excluding the integrity field itself)
31
+ */
32
+ export declare function computeBaselineIntegrity(baseline: Omit<Baseline, 'integrity'>): BaselineIntegrity;
33
+ /**
34
+ * Verify that a loaded baseline has not been tampered with
35
+ */
36
+ export declare function verifyBaselineIntegrity(baseline: Baseline): boolean;
24
37
  /**
25
38
  * Load baseline from file
26
39
  */
27
- export declare function loadBaseline(baselinePath: string): Baseline | null;
40
+ export declare function loadBaseline(baselinePath: string): Promise<Baseline | null>;
28
41
  /**
29
42
  * Save baseline to file
30
43
  */
31
- export declare function saveBaseline(baseline: Baseline, baselinePath: string): void;
44
+ export declare function saveBaseline(baseline: Baseline, baselinePath: string): Promise<void>;
32
45
  /**
33
46
  * Create a new baseline from scan results
34
47
  */
@@ -2,10 +2,10 @@
2
2
  * Baseline Management - Track and ignore accepted findings
3
3
  * Allows users to create baselines of known/accepted security findings
4
4
  */
5
- import { writeFileSync, readFileSync, existsSync, statSync } from 'node:fs';
5
+ import { statSync } from 'node:fs';
6
+ import { writeFile, readFile, mkdir, access } from 'node:fs/promises';
6
7
  import { resolve, dirname, extname } from 'node:path';
7
8
  import { createHash } from 'node:crypto';
8
- import { mkdirSync } from 'node:fs';
9
9
  import logger from './logger.js';
10
10
  /**
11
11
  * Generate a hash for a finding to uniquely identify it
@@ -14,20 +14,51 @@ function generateFindingHash(finding) {
14
14
  const content = `${finding.ruleId}:${finding.relativePath}:${finding.line}:${finding.match}`;
15
15
  return createHash('sha256').update(content).digest('hex');
16
16
  }
17
+ /**
18
+ * Compute integrity hash of a baseline (excluding the integrity field itself)
19
+ */
20
+ export function computeBaselineIntegrity(baseline) {
21
+ const payload = JSON.stringify({
22
+ version: baseline.version,
23
+ createdDate: baseline.createdDate,
24
+ lastUpdated: baseline.lastUpdated,
25
+ description: baseline.description,
26
+ findings: baseline.findings,
27
+ });
28
+ return {
29
+ algorithm: 'sha256',
30
+ hash: createHash('sha256').update(payload).digest('hex'),
31
+ };
32
+ }
33
+ /**
34
+ * Verify that a loaded baseline has not been tampered with
35
+ */
36
+ export function verifyBaselineIntegrity(baseline) {
37
+ if (!baseline.integrity) {
38
+ return true; // Old baselines without integrity field are accepted
39
+ }
40
+ const expected = computeBaselineIntegrity(baseline);
41
+ return expected.hash === baseline.integrity.hash;
42
+ }
17
43
  /**
18
44
  * Load baseline from file
19
45
  */
20
- export function loadBaseline(baselinePath) {
46
+ export async function loadBaseline(baselinePath) {
21
47
  try {
22
- if (!existsSync(baselinePath)) {
23
- return null;
24
- }
25
- const content = readFileSync(baselinePath, 'utf-8');
48
+ await access(baselinePath);
49
+ }
50
+ catch {
51
+ return null;
52
+ }
53
+ try {
54
+ const content = await readFile(baselinePath, 'utf-8');
26
55
  const baseline = JSON.parse(content);
27
- // Validate baseline structure
28
56
  if (!baseline.version || !baseline.findings || !Array.isArray(baseline.findings)) {
29
57
  throw new Error('Invalid baseline format');
30
58
  }
59
+ if (baseline.integrity && !verifyBaselineIntegrity(baseline)) {
60
+ logger.warn(`Baseline integrity check failed for ${baselinePath} — file may have been tampered with`);
61
+ }
31
62
  logger.debug(`Loaded baseline with ${baseline.findings.length} accepted findings`);
32
63
  return baseline;
33
64
  }
@@ -39,17 +70,17 @@ export function loadBaseline(baselinePath) {
39
70
  /**
40
71
  * Save baseline to file
41
72
  */
42
- export function saveBaseline(baseline, baselinePath) {
73
+ export async function saveBaseline(baseline, baselinePath) {
43
74
  try {
44
- // Ensure directory exists
45
- const dir = dirname(baselinePath);
46
- mkdirSync(dir, { recursive: true });
47
- // Update lastUpdated timestamp
48
- baseline.lastUpdated = new Date().toISOString();
49
- // Write baseline file
50
- const content = JSON.stringify(baseline, null, 2);
51
- writeFileSync(baselinePath, content, 'utf-8');
52
- logger.info(`Baseline saved to ${baselinePath} with ${baseline.findings.length} findings`);
75
+ await mkdir(dirname(baselinePath), { recursive: true });
76
+ const updated = { ...baseline, lastUpdated: new Date().toISOString() };
77
+ const baselineWithIntegrity = {
78
+ ...updated,
79
+ integrity: computeBaselineIntegrity(updated),
80
+ };
81
+ const content = JSON.stringify(baselineWithIntegrity, null, 2);
82
+ await writeFile(baselinePath, content, 'utf-8');
83
+ logger.info(`Baseline saved to ${baselinePath} with ${baselineWithIntegrity.findings.length} findings`);
53
84
  }
54
85
  catch (error) {
55
86
  logger.error(`Failed to save baseline to ${baselinePath}:`, error);
@@ -227,7 +258,7 @@ export function getDefaultBaselinePath(scanPaths) {
227
258
  // Try to find a good location for baseline file
228
259
  const firstPath = scanPaths[0] ?? process.cwd();
229
260
  try {
230
- if (existsSync(firstPath) && statSync(firstPath).isFile()) {
261
+ if (statSync(firstPath).isFile()) {
231
262
  return resolve(dirname(firstPath), '.ferret-baseline.json');
232
263
  }
233
264
  }
@@ -0,0 +1,39 @@
1
+ /**
2
+ * LRU-bounded in-memory cache for file content.
3
+ *
4
+ * Prevents unbounded memory growth when the scanner reads thousands of files:
5
+ * - Per-file cap: individual files larger than `maxFileSize` bytes are never cached.
6
+ * - Aggregate cap: once `totalBytes` would exceed `maxBytes`, the least-recently-used
7
+ * entry is evicted first. Same policy applies to the `maxEntries` cap.
8
+ *
9
+ * Insertion order of a Map mirrors access order after each get() refresh,
10
+ * giving us O(1) LRU with the iteration-based eviction below.
11
+ */
12
+ export interface BoundedContentCacheOpts {
13
+ /** Maximum total cached bytes. Default: 256 MB. */
14
+ maxBytes?: number;
15
+ /** Maximum number of cached entries. Default: 10 000. */
16
+ maxEntries?: number;
17
+ /** Maximum size of a single file to admit into the cache. Default: 1 MB. */
18
+ maxFileSize?: number;
19
+ }
20
+ export declare class BoundedContentCache {
21
+ private readonly map;
22
+ private totalBytes;
23
+ private readonly maxBytes;
24
+ private readonly maxEntries;
25
+ private readonly maxFileSize;
26
+ constructor(opts?: BoundedContentCacheOpts);
27
+ set(path: string, content: string): void;
28
+ get(path: string): string | undefined;
29
+ has(path: string): boolean;
30
+ /** Number of cached entries. */
31
+ size(): number;
32
+ /** Total cached bytes (UTF-8 encoded). */
33
+ bytes(): number;
34
+ /** Expose for CorrelationAnalyzer compatibility (read-only iteration). */
35
+ entries(): IterableIterator<[string, string]>;
36
+ /** Allow spread / array-from for compatibility with Map-based consumers. */
37
+ [Symbol.iterator](): IterableIterator<[string, string]>;
38
+ }
39
+ //# sourceMappingURL=contentCache.d.ts.map
@@ -0,0 +1,77 @@
1
+ /**
2
+ * LRU-bounded in-memory cache for file content.
3
+ *
4
+ * Prevents unbounded memory growth when the scanner reads thousands of files:
5
+ * - Per-file cap: individual files larger than `maxFileSize` bytes are never cached.
6
+ * - Aggregate cap: once `totalBytes` would exceed `maxBytes`, the least-recently-used
7
+ * entry is evicted first. Same policy applies to the `maxEntries` cap.
8
+ *
9
+ * Insertion order of a Map mirrors access order after each get() refresh,
10
+ * giving us O(1) LRU with the iteration-based eviction below.
11
+ */
12
+ const DEFAULT_MAX_BYTES = 256 * 1024 * 1024; // 256 MB
13
+ const DEFAULT_MAX_ENTRIES = 10_000;
14
+ const DEFAULT_MAX_FILE = 1_000_000; // 1 MB
15
+ export class BoundedContentCache {
16
+ map = new Map();
17
+ totalBytes = 0;
18
+ maxBytes;
19
+ maxEntries;
20
+ maxFileSize;
21
+ constructor(opts = {}) {
22
+ this.maxBytes = opts.maxBytes ?? DEFAULT_MAX_BYTES;
23
+ this.maxEntries = opts.maxEntries ?? DEFAULT_MAX_ENTRIES;
24
+ this.maxFileSize = opts.maxFileSize ?? DEFAULT_MAX_FILE;
25
+ }
26
+ set(path, content) {
27
+ const incoming = Buffer.byteLength(content, 'utf-8');
28
+ // Refuse files that exceed the per-file cap.
29
+ if (incoming > this.maxFileSize)
30
+ return;
31
+ // If the key already exists, remove its contribution before re-inserting.
32
+ const existing = this.map.get(path);
33
+ if (existing !== undefined) {
34
+ this.map.delete(path);
35
+ this.totalBytes -= Buffer.byteLength(existing, 'utf-8');
36
+ }
37
+ // Evict the oldest (first-in-map) entries until this one fits.
38
+ while (this.map.size > 0 &&
39
+ (this.totalBytes + incoming > this.maxBytes || this.map.size >= this.maxEntries)) {
40
+ const oldestKey = this.map.keys().next().value;
41
+ const oldestVal = this.map.get(oldestKey);
42
+ this.map.delete(oldestKey);
43
+ this.totalBytes -= Buffer.byteLength(oldestVal, 'utf-8');
44
+ }
45
+ this.map.set(path, content);
46
+ this.totalBytes += incoming;
47
+ }
48
+ get(path) {
49
+ const val = this.map.get(path);
50
+ if (val === undefined)
51
+ return undefined;
52
+ // Refresh to most-recently-used position (LRU via Map insertion order).
53
+ this.map.delete(path);
54
+ this.map.set(path, val);
55
+ return val;
56
+ }
57
+ has(path) {
58
+ return this.map.has(path);
59
+ }
60
+ /** Number of cached entries. */
61
+ size() {
62
+ return this.map.size;
63
+ }
64
+ /** Total cached bytes (UTF-8 encoded). */
65
+ bytes() {
66
+ return this.totalBytes;
67
+ }
68
+ /** Expose for CorrelationAnalyzer compatibility (read-only iteration). */
69
+ entries() {
70
+ return this.map.entries();
71
+ }
72
+ /** Allow spread / array-from for compatibility with Map-based consumers. */
73
+ [Symbol.iterator]() {
74
+ return this.map.entries();
75
+ }
76
+ }
77
+ //# sourceMappingURL=contentCache.js.map
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Safe glob-to-regex conversion utility
3
+ *
4
+ * Prevents regex injection attacks and ReDoS by escaping metacharacters
5
+ * and bounding wildcard replacements.
6
+ */
7
+ export interface GlobOptions {
8
+ /** Whether to anchor with ^$ (default: true) */
9
+ anchored?: boolean;
10
+ /** Whether this is a file path (affects wildcard replacement) */
11
+ pathLike?: boolean;
12
+ }
13
+ /**
14
+ * Convert a glob pattern to a safe RegExp with bounded wildcards.
15
+ *
16
+ * - Escapes all regex metacharacters except `*`
17
+ * - Replaces `*` with bounded character classes to prevent ReDoS
18
+ * - Anchors patterns to prevent unintended substring matches
19
+ * - Caches compiled patterns for performance
20
+ *
21
+ * @param glob The glob pattern (e.g. "*.env", "CRED-*")
22
+ * @param opts Configuration options
23
+ * @returns A safe RegExp that won't cause ReDoS or over-match
24
+ *
25
+ * @example
26
+ * ```typescript
27
+ * // File pattern matching
28
+ * const filePattern = globToRegex("*.env", { pathLike: true });
29
+ * filePattern.test("/path/to/file.env"); // true
30
+ * filePattern.test("file.env.backup"); // false (anchored)
31
+ *
32
+ * // Rule ID pattern matching
33
+ * const rulePattern = globToRegex("CRED-*");
34
+ * rulePattern.test("CRED-001"); // true
35
+ * rulePattern.test("CREDENTIAL-001"); // false (literal dot required)
36
+ * ```
37
+ */
38
+ export declare function globToRegex(glob: string, opts?: GlobOptions): RegExp;
39
+ /**
40
+ * Clear the compiled pattern cache (useful for testing)
41
+ */
42
+ export declare function clearCache(): void;
43
+ /**
44
+ * Get cache statistics (useful for debugging)
45
+ */
46
+ export declare function getCacheStats(): {
47
+ size: number;
48
+ keys: string[];
49
+ };
50
+ //# sourceMappingURL=glob.d.ts.map
@@ -0,0 +1,84 @@
1
+ /**
2
+ * Safe glob-to-regex conversion utility
3
+ *
4
+ * Prevents regex injection attacks and ReDoS by escaping metacharacters
5
+ * and bounding wildcard replacements.
6
+ */
7
+ // Regex metacharacters that need escaping (all except asterisk)
8
+ const REGEX_META = /[.+?^${}()|[\]\\]/g;
9
+ // Cache compiled regexes to avoid recompilation in hot paths
10
+ const cache = new Map();
11
+ /**
12
+ * Convert a glob pattern to a safe RegExp with bounded wildcards.
13
+ *
14
+ * - Escapes all regex metacharacters except `*`
15
+ * - Replaces `*` with bounded character classes to prevent ReDoS
16
+ * - Anchors patterns to prevent unintended substring matches
17
+ * - Caches compiled patterns for performance
18
+ *
19
+ * @param glob The glob pattern (e.g. "*.env", "CRED-*")
20
+ * @param opts Configuration options
21
+ * @returns A safe RegExp that won't cause ReDoS or over-match
22
+ *
23
+ * @example
24
+ * ```typescript
25
+ * // File pattern matching
26
+ * const filePattern = globToRegex("*.env", { pathLike: true });
27
+ * filePattern.test("/path/to/file.env"); // true
28
+ * filePattern.test("file.env.backup"); // false (anchored)
29
+ *
30
+ * // Rule ID pattern matching
31
+ * const rulePattern = globToRegex("CRED-*");
32
+ * rulePattern.test("CRED-001"); // true
33
+ * rulePattern.test("CREDENTIAL-001"); // false (literal dot required)
34
+ * ```
35
+ */
36
+ export function globToRegex(glob, opts = {}) {
37
+ const anchored = opts.anchored !== false;
38
+ const pathLike = opts.pathLike ?? false;
39
+ // Create cache key including options
40
+ const key = `${glob}::${anchored}::${pathLike}`;
41
+ // Return cached pattern if available
42
+ const hit = cache.get(key);
43
+ if (hit) {
44
+ return hit;
45
+ }
46
+ // Escape all regex metacharacters except asterisk
47
+ const escaped = glob.replace(REGEX_META, '\\$&');
48
+ // Replace asterisk with bounded character class
49
+ // Path-like: match non-newlines (for file paths)
50
+ // Rule-like: match non-whitespace (for rule IDs)
51
+ const wildcard = pathLike
52
+ ? '[^\\n]{0,200}' // File paths: no newlines, bound to 200 chars
53
+ : '[^\\s]{0,200}'; // Rule IDs: no whitespace, bound to 200 chars
54
+ const body = escaped.replace(/\*/g, wildcard);
55
+ // Anchor pattern if requested (default)
56
+ const pattern = anchored ? `^${body}$` : body;
57
+ try {
58
+ const compiled = new RegExp(pattern);
59
+ cache.set(key, compiled);
60
+ return compiled;
61
+ }
62
+ catch {
63
+ // Fallback to never-matching pattern if compilation fails
64
+ const fallback = /(?!)/; // Negative lookahead - never matches
65
+ cache.set(key, fallback);
66
+ return fallback;
67
+ }
68
+ }
69
+ /**
70
+ * Clear the compiled pattern cache (useful for testing)
71
+ */
72
+ export function clearCache() {
73
+ cache.clear();
74
+ }
75
+ /**
76
+ * Get cache statistics (useful for debugging)
77
+ */
78
+ export function getCacheStats() {
79
+ return {
80
+ size: cache.size,
81
+ keys: Array.from(cache.keys())
82
+ };
83
+ }
84
+ //# sourceMappingURL=glob.js.map
@@ -36,6 +36,7 @@ export function sanitizeFilename(filename) {
36
36
  .replace(/[\/\\]/g, '_') // Replace path separators
37
37
  .replace(/\.\./g, '_') // Replace parent directory references
38
38
  .replace(/[<>:"|?*]/g, '_') // Remove invalid filename characters
39
+ .replace(/\0/g, '_') // Replace null bytes
39
40
  .replace(/^\.+/, '_'); // Remove leading dots
40
41
  }
41
42
  /**