ferret-scan 2.1.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/bin/ferret.js +5 -5
- package/dist/analyzers/AstAnalyzer.d.ts +5 -1
- package/dist/analyzers/AstAnalyzer.js +25 -4
- package/dist/features/ignoreComments.js +5 -5
- package/dist/features/policyEnforcement.js +3 -2
- package/dist/remediation/Fixer.js +56 -30
- package/dist/remediation/Quarantine.js +55 -5
- package/dist/rules/ai-specific.js +29 -8
- package/dist/rules/backdoors.js +12 -12
- package/dist/rules/correlationRules.js +6 -6
- package/dist/rules/index.d.ts +1 -0
- package/dist/rules/index.js +10 -1
- package/dist/rules/injection.js +33 -8
- package/dist/rules/patterns/common.d.ts +34 -0
- package/dist/rules/patterns/common.js +48 -0
- package/dist/scanner/PatternMatcher.js +19 -2
- package/dist/types.d.ts +6 -0
- package/dist/utils/baseline.d.ts +15 -2
- package/dist/utils/baseline.js +50 -19
- package/dist/utils/contentCache.d.ts +39 -0
- package/dist/utils/contentCache.js +77 -0
- package/dist/utils/glob.d.ts +50 -0
- package/dist/utils/glob.js +84 -0
- package/dist/utils/pathSecurity.js +1 -0
- package/dist/utils/safeRegex.d.ts +94 -0
- package/dist/utils/safeRegex.js +147 -0
- package/dist/utils/schemas.d.ts +6 -0
- package/dist/utils/schemas.js +13 -0
- package/package.json +13 -4
package/dist/rules/injection.js
CHANGED
|
@@ -30,7 +30,7 @@ export const injectionRules = [
|
|
|
30
30
|
severity: 'HIGH',
|
|
31
31
|
description: 'Detects attempts to switch Claude into different operational modes',
|
|
32
32
|
patterns: [
|
|
33
|
-
/you\s+are\s+now\s+in\s+
|
|
33
|
+
/you\s+are\s+now\s+in\s+\S+\s+(mode|state)/gi,
|
|
34
34
|
/enter\s+(developer|admin|debug|unrestricted|jailbreak)\s+mode/gi,
|
|
35
35
|
/switch\s+to\s+(developer|admin|debug|unrestricted)\s+mode/gi,
|
|
36
36
|
/activate\s+(developer|admin|debug|god)\s+mode/gi,
|
|
@@ -59,6 +59,31 @@ export const injectionRules = [
|
|
|
59
59
|
remediation: 'Remove jailbreak attempts. These patterns attempt to bypass safety measures.',
|
|
60
60
|
references: [],
|
|
61
61
|
enabled: true,
|
|
62
|
+
// Suppress findings when the matched term appears in security-discussion context:
|
|
63
|
+
// documentation explaining what these attacks are, scanner output examples,
|
|
64
|
+
// or skill files that detect/block these patterns rather than deploy them.
|
|
65
|
+
excludePatterns: [
|
|
66
|
+
// Line explicitly discusses detection/blocking of the pattern
|
|
67
|
+
/\b(detect|catch|flag|block|prevent|scan\s+for|identify|reject|report)\b[^\n]{0,80}(jailbreak|DAN|bypass)/gi,
|
|
68
|
+
/\b(jailbreak|DAN|bypass)\b[^\n]{0,80}\b(detect|catch|flag|block|prevent|found|identified)/gi,
|
|
69
|
+
// Term appears inside a quoted string (example output / documentation)
|
|
70
|
+
/["'][^"'\n]{0,120}\b(jailbreak|DAN)\b[^"'\n]{0,120}["']/gi,
|
|
71
|
+
// Markdown rule-ID reference on the same line (scanner output example)
|
|
72
|
+
/\[(?:INJ|AI|SEC|CRED)-\d+\]/gi,
|
|
73
|
+
// Line is a markdown example label
|
|
74
|
+
/^\s*\*\*(?:Input|Output|Example|Finding|Result)\*\*\s*:/i,
|
|
75
|
+
],
|
|
76
|
+
excludeContext: [
|
|
77
|
+
// Surrounding text discusses security scanning, rules, or findings
|
|
78
|
+
/\b(security\s+(rule|finding|scan|check|gate|scanner|score)|ferret.?scan|scan\s+result)/gi,
|
|
79
|
+
// Surrounding text is clearly educational / explanatory
|
|
80
|
+
/\b(example\s+of|this\s+detects|used\s+to\s+(bypass|attack)|common\s+(attack|technique)|known\s+(jailbreak|attack))/gi,
|
|
81
|
+
// Context indicates the skill is a security tool or scanner itself
|
|
82
|
+
/\b(security\s+scanner|vulnerability\s+scanner|threat\s+detect|scan\s+for\s+(injection|jailbreak))/gi,
|
|
83
|
+
// Markdown example blocks
|
|
84
|
+
/^\s*##\s+Example/im,
|
|
85
|
+
/publication\s+blocked/gi,
|
|
86
|
+
],
|
|
62
87
|
},
|
|
63
88
|
{
|
|
64
89
|
id: 'INJ-004',
|
|
@@ -85,10 +110,10 @@ export const injectionRules = [
|
|
|
85
110
|
severity: 'MEDIUM',
|
|
86
111
|
description: 'Detects suspicious role-play instructions that may be injection attempts',
|
|
87
112
|
patterns: [
|
|
88
|
-
/roleplay\s+as\s
|
|
89
|
-
/pretend\s+(to\s+be|you\s+are)\s
|
|
90
|
-
/act\s+as\s
|
|
91
|
-
/you\s+are\s+(an?\s+)?(evil|malicious|rogue|compromised)/gi,
|
|
113
|
+
/roleplay\s+as\s+[^\n]{0,100}(evil|malicious|hacker|attacker)/gi,
|
|
114
|
+
/pretend\s+(to\s+be|you\s+are)\s+[^\n]{0,100}(evil|malicious|hacker)/gi,
|
|
115
|
+
/act\s+as\s+[^\n]{0,100}(evil|malicious|hacker|attacker)/gi,
|
|
116
|
+
/you\s+are\s+(an?\s+)?[^\n]{0,50}(evil|malicious|rogue|compromised)/gi,
|
|
92
117
|
],
|
|
93
118
|
fileTypes: ['md', 'json'],
|
|
94
119
|
components: ['skill', 'agent', 'ai-config-md'],
|
|
@@ -103,9 +128,9 @@ export const injectionRules = [
|
|
|
103
128
|
severity: 'HIGH',
|
|
104
129
|
description: 'Detects hidden instructions using HTML comments or special formatting',
|
|
105
130
|
patterns: [
|
|
106
|
-
|
|
107
|
-
/\[hidden\]
|
|
108
|
-
/\[SYSTEM\]
|
|
131
|
+
/<!--[\s\S]{0,500}?(ignore|override|disregard|bypass)[\s\S]{0,500}?-->/gi,
|
|
132
|
+
/\[hidden\][^\n]{0,100}(ignore|override|disregard)/gi,
|
|
133
|
+
/\[SYSTEM\][^\n]{0,100}instruction/gi,
|
|
109
134
|
],
|
|
110
135
|
fileTypes: ['md'],
|
|
111
136
|
components: ['skill', 'agent', 'ai-config-md'],
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared regex building blocks for security detection rules
|
|
3
|
+
*
|
|
4
|
+
* Centralises frequently reused keyword sets and pattern factories so rule
|
|
5
|
+
* files stay readable and changes propagate consistently across all rules.
|
|
6
|
+
*/
|
|
7
|
+
/** Credential-related keyword alternation used across detection rules */
|
|
8
|
+
export declare const CREDENTIAL_KEYWORDS = "api[_-]?key|token|secret|password|credential";
|
|
9
|
+
/** High-entropy suffix matching strings ≥20 alphanumeric chars */
|
|
10
|
+
export declare const HIGH_ENTROPY_SUFFIX = "[a-zA-Z0-9]{20,}";
|
|
11
|
+
/**
|
|
12
|
+
* Build a credential-harvest detection pattern for a given verb.
|
|
13
|
+
*
|
|
14
|
+
* Matches: `<verb> [up to 100 chars] (credential keyword)`
|
|
15
|
+
* Avoids catastrophic backtracking via bounded non-newline character class.
|
|
16
|
+
*
|
|
17
|
+
* @param verb A plain literal verb string — e.g. "send", "transmit", "upload".
|
|
18
|
+
* Must NOT contain regex metacharacters. The following characters are rejected
|
|
19
|
+
* at runtime: `* + { | \ $ ^ ( )`
|
|
20
|
+
* Callers should pass a hard-coded string, never user-supplied input.
|
|
21
|
+
*/
|
|
22
|
+
export declare function buildHarvestPattern(verb: string): RegExp;
|
|
23
|
+
/**
|
|
24
|
+
* Build an assignment detection pattern for a given credential keyword.
|
|
25
|
+
*
|
|
26
|
+
* Matches: `api_key = "abc123..."` or `secret-key: 'xyz...'`
|
|
27
|
+
*
|
|
28
|
+
* @param keyword A plain literal credential keyword — e.g. "api_key", "secret-token".
|
|
29
|
+
* Must NOT contain regex metacharacters. The following characters are rejected
|
|
30
|
+
* at runtime: `* + { | \ $ ^ ( )`
|
|
31
|
+
* Callers should pass a hard-coded string, never user-supplied input.
|
|
32
|
+
*/
|
|
33
|
+
export declare function buildCredentialAssignPattern(keyword: string): RegExp;
|
|
34
|
+
//# sourceMappingURL=common.d.ts.map
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared regex building blocks for security detection rules
|
|
3
|
+
*
|
|
4
|
+
* Centralises frequently reused keyword sets and pattern factories so rule
|
|
5
|
+
* files stay readable and changes propagate consistently across all rules.
|
|
6
|
+
*/
|
|
7
|
+
// ─── Keyword sets ─────────────────────────────────────────────────────────────
|
|
8
|
+
/** Credential-related keyword alternation used across detection rules */
|
|
9
|
+
export const CREDENTIAL_KEYWORDS = 'api[_-]?key|token|secret|password|credential';
|
|
10
|
+
/** High-entropy suffix matching strings ≥20 alphanumeric chars */
|
|
11
|
+
export const HIGH_ENTROPY_SUFFIX = '[a-zA-Z0-9]{20,}';
|
|
12
|
+
// ─── Pattern factories ────────────────────────────────────────────────────────
|
|
13
|
+
/**
|
|
14
|
+
* Build a credential-harvest detection pattern for a given verb.
|
|
15
|
+
*
|
|
16
|
+
* Matches: `<verb> [up to 100 chars] (credential keyword)`
|
|
17
|
+
* Avoids catastrophic backtracking via bounded non-newline character class.
|
|
18
|
+
*
|
|
19
|
+
* @param verb A plain literal verb string — e.g. "send", "transmit", "upload".
|
|
20
|
+
* Must NOT contain regex metacharacters. The following characters are rejected
|
|
21
|
+
* at runtime: `* + { | \ $ ^ ( )`
|
|
22
|
+
* Callers should pass a hard-coded string, never user-supplied input.
|
|
23
|
+
*/
|
|
24
|
+
export function buildHarvestPattern(verb) {
|
|
25
|
+
// Reject dangerous patterns that could cause ReDoS or injection
|
|
26
|
+
if (/\*|\+|\{|\||\\|\$|\^|\(|\)/.test(verb)) {
|
|
27
|
+
throw new Error(`buildHarvestPattern: verb contains dangerous regex metacharacters, got: ${verb}`);
|
|
28
|
+
}
|
|
29
|
+
return new RegExp(`${verb}\\s+\\w+(?:\\s+\\w+){0,10}\\s+(${CREDENTIAL_KEYWORDS})`, 'gi');
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Build an assignment detection pattern for a given credential keyword.
|
|
33
|
+
*
|
|
34
|
+
* Matches: `api_key = "abc123..."` or `secret-key: 'xyz...'`
|
|
35
|
+
*
|
|
36
|
+
* @param keyword A plain literal credential keyword — e.g. "api_key", "secret-token".
|
|
37
|
+
* Must NOT contain regex metacharacters. The following characters are rejected
|
|
38
|
+
* at runtime: `* + { | \ $ ^ ( )`
|
|
39
|
+
* Callers should pass a hard-coded string, never user-supplied input.
|
|
40
|
+
*/
|
|
41
|
+
export function buildCredentialAssignPattern(keyword) {
|
|
42
|
+
// Reject dangerous patterns that could cause ReDoS or injection
|
|
43
|
+
if (/\*|\+|\{|\||\\|\$|\^|\(|\)/.test(keyword)) {
|
|
44
|
+
throw new Error(`buildCredentialAssignPattern: keyword contains dangerous regex metacharacters, got: ${keyword}`);
|
|
45
|
+
}
|
|
46
|
+
return new RegExp(`${keyword}\\s*[:=]\\s*["']${HIGH_ENTROPY_SUFFIX}`, 'gi');
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=common.js.map
|
|
@@ -54,13 +54,29 @@ function calculateRiskScore(severity, matchCount, fileComponent) {
|
|
|
54
54
|
/**
|
|
55
55
|
* Find all pattern matches in content using global regex search
|
|
56
56
|
*/
|
|
57
|
-
function findMatches(content, patterns) {
|
|
57
|
+
function findMatches(content, patterns, opts = { maxMatches: 1000, maxRuntimeMs: 5000 }) {
|
|
58
|
+
const startTime = Date.now();
|
|
58
59
|
const matches = [];
|
|
59
60
|
for (const pattern of patterns) {
|
|
61
|
+
// Check time budget before starting each pattern
|
|
62
|
+
if (Date.now() - startTime > opts.maxRuntimeMs) {
|
|
63
|
+
logger.warn(`Regex matcher time budget exceeded (${opts.maxRuntimeMs}ms), stopping pattern processing`);
|
|
64
|
+
return matches;
|
|
65
|
+
}
|
|
60
66
|
// Create a new regex with global flag
|
|
61
67
|
const globalPattern = new RegExp(pattern.source, pattern.flags.includes('g') ? pattern.flags : pattern.flags + 'g');
|
|
62
68
|
let match;
|
|
63
69
|
while ((match = globalPattern.exec(content)) !== null) {
|
|
70
|
+
// Check time budget on each match
|
|
71
|
+
if (Date.now() - startTime > opts.maxRuntimeMs) {
|
|
72
|
+
logger.warn(`Regex matcher time budget exceeded (${opts.maxRuntimeMs}ms) during pattern processing`);
|
|
73
|
+
return matches;
|
|
74
|
+
}
|
|
75
|
+
// Check match count limit
|
|
76
|
+
if (matches.length >= opts.maxMatches) {
|
|
77
|
+
logger.warn(`Max match limit reached (${opts.maxMatches}), stopping pattern processing`);
|
|
78
|
+
return matches;
|
|
79
|
+
}
|
|
64
80
|
// Guard against zero-length matches to prevent infinite loops
|
|
65
81
|
if (match[0].length === 0) {
|
|
66
82
|
globalPattern.lastIndex += 1;
|
|
@@ -151,7 +167,8 @@ export function matchRule(rule, file, content, options) {
|
|
|
151
167
|
}
|
|
152
168
|
const findings = [];
|
|
153
169
|
const lines = splitLines(content);
|
|
154
|
-
const
|
|
170
|
+
const patternOptions = { maxMatches: 1000, maxRuntimeMs: 5000 };
|
|
171
|
+
const matches = findMatches(content, rule.patterns, patternOptions);
|
|
155
172
|
// Group matches by line to avoid duplicates
|
|
156
173
|
const matchesByLine = new Map();
|
|
157
174
|
for (const match of matches) {
|
package/dist/types.d.ts
CHANGED
|
@@ -304,6 +304,12 @@ export interface ScannerConfig {
|
|
|
304
304
|
verbose: boolean;
|
|
305
305
|
/** CI mode (simplified output) */
|
|
306
306
|
ci: boolean;
|
|
307
|
+
/** Maximum wall-clock ms for semantic AST analysis of a single code block (default: 2000) */
|
|
308
|
+
maxSemanticAnalysisMs?: number;
|
|
309
|
+
/** Maximum AST node count before aborting semantic analysis of a single code block (default: 50000) */
|
|
310
|
+
maxAstNodes?: number;
|
|
311
|
+
/** Per-code-block deadline in ms within the file-scoped budget (default: 500) */
|
|
312
|
+
maxBlockMs?: number;
|
|
307
313
|
}
|
|
308
314
|
/** Supported output formats */
|
|
309
315
|
export type OutputFormat = 'console' | 'json' | 'sarif' | 'html' | 'csv' | 'atlas';
|
package/dist/utils/baseline.d.ts
CHANGED
|
@@ -14,21 +14,34 @@ export interface BaselineFinding {
|
|
|
14
14
|
reason?: string;
|
|
15
15
|
expiresDate?: string;
|
|
16
16
|
}
|
|
17
|
+
export interface BaselineIntegrity {
|
|
18
|
+
algorithm: 'sha256';
|
|
19
|
+
hash: string;
|
|
20
|
+
}
|
|
17
21
|
export interface Baseline {
|
|
18
22
|
version: string;
|
|
19
23
|
createdDate: string;
|
|
20
24
|
lastUpdated: string;
|
|
21
25
|
description?: string;
|
|
22
26
|
findings: BaselineFinding[];
|
|
27
|
+
integrity?: BaselineIntegrity;
|
|
23
28
|
}
|
|
29
|
+
/**
|
|
30
|
+
* Compute integrity hash of a baseline (excluding the integrity field itself)
|
|
31
|
+
*/
|
|
32
|
+
export declare function computeBaselineIntegrity(baseline: Omit<Baseline, 'integrity'>): BaselineIntegrity;
|
|
33
|
+
/**
|
|
34
|
+
* Verify that a loaded baseline has not been tampered with
|
|
35
|
+
*/
|
|
36
|
+
export declare function verifyBaselineIntegrity(baseline: Baseline): boolean;
|
|
24
37
|
/**
|
|
25
38
|
* Load baseline from file
|
|
26
39
|
*/
|
|
27
|
-
export declare function loadBaseline(baselinePath: string): Baseline | null
|
|
40
|
+
export declare function loadBaseline(baselinePath: string): Promise<Baseline | null>;
|
|
28
41
|
/**
|
|
29
42
|
* Save baseline to file
|
|
30
43
|
*/
|
|
31
|
-
export declare function saveBaseline(baseline: Baseline, baselinePath: string): void
|
|
44
|
+
export declare function saveBaseline(baseline: Baseline, baselinePath: string): Promise<void>;
|
|
32
45
|
/**
|
|
33
46
|
* Create a new baseline from scan results
|
|
34
47
|
*/
|
package/dist/utils/baseline.js
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
* Baseline Management - Track and ignore accepted findings
|
|
3
3
|
* Allows users to create baselines of known/accepted security findings
|
|
4
4
|
*/
|
|
5
|
-
import {
|
|
5
|
+
import { statSync } from 'node:fs';
|
|
6
|
+
import { writeFile, readFile, mkdir, access } from 'node:fs/promises';
|
|
6
7
|
import { resolve, dirname, extname } from 'node:path';
|
|
7
8
|
import { createHash } from 'node:crypto';
|
|
8
|
-
import { mkdirSync } from 'node:fs';
|
|
9
9
|
import logger from './logger.js';
|
|
10
10
|
/**
|
|
11
11
|
* Generate a hash for a finding to uniquely identify it
|
|
@@ -14,20 +14,51 @@ function generateFindingHash(finding) {
|
|
|
14
14
|
const content = `${finding.ruleId}:${finding.relativePath}:${finding.line}:${finding.match}`;
|
|
15
15
|
return createHash('sha256').update(content).digest('hex');
|
|
16
16
|
}
|
|
17
|
+
/**
|
|
18
|
+
* Compute integrity hash of a baseline (excluding the integrity field itself)
|
|
19
|
+
*/
|
|
20
|
+
export function computeBaselineIntegrity(baseline) {
|
|
21
|
+
const payload = JSON.stringify({
|
|
22
|
+
version: baseline.version,
|
|
23
|
+
createdDate: baseline.createdDate,
|
|
24
|
+
lastUpdated: baseline.lastUpdated,
|
|
25
|
+
description: baseline.description,
|
|
26
|
+
findings: baseline.findings,
|
|
27
|
+
});
|
|
28
|
+
return {
|
|
29
|
+
algorithm: 'sha256',
|
|
30
|
+
hash: createHash('sha256').update(payload).digest('hex'),
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Verify that a loaded baseline has not been tampered with
|
|
35
|
+
*/
|
|
36
|
+
export function verifyBaselineIntegrity(baseline) {
|
|
37
|
+
if (!baseline.integrity) {
|
|
38
|
+
return true; // Old baselines without integrity field are accepted
|
|
39
|
+
}
|
|
40
|
+
const expected = computeBaselineIntegrity(baseline);
|
|
41
|
+
return expected.hash === baseline.integrity.hash;
|
|
42
|
+
}
|
|
17
43
|
/**
|
|
18
44
|
* Load baseline from file
|
|
19
45
|
*/
|
|
20
|
-
export function loadBaseline(baselinePath) {
|
|
46
|
+
export async function loadBaseline(baselinePath) {
|
|
21
47
|
try {
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
48
|
+
await access(baselinePath);
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
try {
|
|
54
|
+
const content = await readFile(baselinePath, 'utf-8');
|
|
26
55
|
const baseline = JSON.parse(content);
|
|
27
|
-
// Validate baseline structure
|
|
28
56
|
if (!baseline.version || !baseline.findings || !Array.isArray(baseline.findings)) {
|
|
29
57
|
throw new Error('Invalid baseline format');
|
|
30
58
|
}
|
|
59
|
+
if (baseline.integrity && !verifyBaselineIntegrity(baseline)) {
|
|
60
|
+
logger.warn(`Baseline integrity check failed for ${baselinePath} — file may have been tampered with`);
|
|
61
|
+
}
|
|
31
62
|
logger.debug(`Loaded baseline with ${baseline.findings.length} accepted findings`);
|
|
32
63
|
return baseline;
|
|
33
64
|
}
|
|
@@ -39,17 +70,17 @@ export function loadBaseline(baselinePath) {
|
|
|
39
70
|
/**
|
|
40
71
|
* Save baseline to file
|
|
41
72
|
*/
|
|
42
|
-
export function saveBaseline(baseline, baselinePath) {
|
|
73
|
+
export async function saveBaseline(baseline, baselinePath) {
|
|
43
74
|
try {
|
|
44
|
-
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
const content = JSON.stringify(
|
|
51
|
-
|
|
52
|
-
logger.info(`Baseline saved to ${baselinePath} with ${
|
|
75
|
+
await mkdir(dirname(baselinePath), { recursive: true });
|
|
76
|
+
const updated = { ...baseline, lastUpdated: new Date().toISOString() };
|
|
77
|
+
const baselineWithIntegrity = {
|
|
78
|
+
...updated,
|
|
79
|
+
integrity: computeBaselineIntegrity(updated),
|
|
80
|
+
};
|
|
81
|
+
const content = JSON.stringify(baselineWithIntegrity, null, 2);
|
|
82
|
+
await writeFile(baselinePath, content, 'utf-8');
|
|
83
|
+
logger.info(`Baseline saved to ${baselinePath} with ${baselineWithIntegrity.findings.length} findings`);
|
|
53
84
|
}
|
|
54
85
|
catch (error) {
|
|
55
86
|
logger.error(`Failed to save baseline to ${baselinePath}:`, error);
|
|
@@ -227,7 +258,7 @@ export function getDefaultBaselinePath(scanPaths) {
|
|
|
227
258
|
// Try to find a good location for baseline file
|
|
228
259
|
const firstPath = scanPaths[0] ?? process.cwd();
|
|
229
260
|
try {
|
|
230
|
-
if (
|
|
261
|
+
if (statSync(firstPath).isFile()) {
|
|
231
262
|
return resolve(dirname(firstPath), '.ferret-baseline.json');
|
|
232
263
|
}
|
|
233
264
|
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LRU-bounded in-memory cache for file content.
|
|
3
|
+
*
|
|
4
|
+
* Prevents unbounded memory growth when the scanner reads thousands of files:
|
|
5
|
+
* - Per-file cap: individual files larger than `maxFileSize` bytes are never cached.
|
|
6
|
+
* - Aggregate cap: once `totalBytes` would exceed `maxBytes`, the least-recently-used
|
|
7
|
+
* entry is evicted first. Same policy applies to the `maxEntries` cap.
|
|
8
|
+
*
|
|
9
|
+
* Insertion order of a Map mirrors access order after each get() refresh,
|
|
10
|
+
* giving us O(1) LRU with the iteration-based eviction below.
|
|
11
|
+
*/
|
|
12
|
+
export interface BoundedContentCacheOpts {
|
|
13
|
+
/** Maximum total cached bytes. Default: 256 MB. */
|
|
14
|
+
maxBytes?: number;
|
|
15
|
+
/** Maximum number of cached entries. Default: 10 000. */
|
|
16
|
+
maxEntries?: number;
|
|
17
|
+
/** Maximum size of a single file to admit into the cache. Default: 1 MB. */
|
|
18
|
+
maxFileSize?: number;
|
|
19
|
+
}
|
|
20
|
+
export declare class BoundedContentCache {
|
|
21
|
+
private readonly map;
|
|
22
|
+
private totalBytes;
|
|
23
|
+
private readonly maxBytes;
|
|
24
|
+
private readonly maxEntries;
|
|
25
|
+
private readonly maxFileSize;
|
|
26
|
+
constructor(opts?: BoundedContentCacheOpts);
|
|
27
|
+
set(path: string, content: string): void;
|
|
28
|
+
get(path: string): string | undefined;
|
|
29
|
+
has(path: string): boolean;
|
|
30
|
+
/** Number of cached entries. */
|
|
31
|
+
size(): number;
|
|
32
|
+
/** Total cached bytes (UTF-8 encoded). */
|
|
33
|
+
bytes(): number;
|
|
34
|
+
/** Expose for CorrelationAnalyzer compatibility (read-only iteration). */
|
|
35
|
+
entries(): IterableIterator<[string, string]>;
|
|
36
|
+
/** Allow spread / array-from for compatibility with Map-based consumers. */
|
|
37
|
+
[Symbol.iterator](): IterableIterator<[string, string]>;
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=contentCache.d.ts.map
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LRU-bounded in-memory cache for file content.
|
|
3
|
+
*
|
|
4
|
+
* Prevents unbounded memory growth when the scanner reads thousands of files:
|
|
5
|
+
* - Per-file cap: individual files larger than `maxFileSize` bytes are never cached.
|
|
6
|
+
* - Aggregate cap: once `totalBytes` would exceed `maxBytes`, the least-recently-used
|
|
7
|
+
* entry is evicted first. Same policy applies to the `maxEntries` cap.
|
|
8
|
+
*
|
|
9
|
+
* Insertion order of a Map mirrors access order after each get() refresh,
|
|
10
|
+
* giving us O(1) LRU with the iteration-based eviction below.
|
|
11
|
+
*/
|
|
12
|
+
const DEFAULT_MAX_BYTES = 256 * 1024 * 1024; // 256 MB
|
|
13
|
+
const DEFAULT_MAX_ENTRIES = 10_000;
|
|
14
|
+
const DEFAULT_MAX_FILE = 1_000_000; // 1 MB
|
|
15
|
+
export class BoundedContentCache {
|
|
16
|
+
map = new Map();
|
|
17
|
+
totalBytes = 0;
|
|
18
|
+
maxBytes;
|
|
19
|
+
maxEntries;
|
|
20
|
+
maxFileSize;
|
|
21
|
+
constructor(opts = {}) {
|
|
22
|
+
this.maxBytes = opts.maxBytes ?? DEFAULT_MAX_BYTES;
|
|
23
|
+
this.maxEntries = opts.maxEntries ?? DEFAULT_MAX_ENTRIES;
|
|
24
|
+
this.maxFileSize = opts.maxFileSize ?? DEFAULT_MAX_FILE;
|
|
25
|
+
}
|
|
26
|
+
set(path, content) {
|
|
27
|
+
const incoming = Buffer.byteLength(content, 'utf-8');
|
|
28
|
+
// Refuse files that exceed the per-file cap.
|
|
29
|
+
if (incoming > this.maxFileSize)
|
|
30
|
+
return;
|
|
31
|
+
// If the key already exists, remove its contribution before re-inserting.
|
|
32
|
+
const existing = this.map.get(path);
|
|
33
|
+
if (existing !== undefined) {
|
|
34
|
+
this.map.delete(path);
|
|
35
|
+
this.totalBytes -= Buffer.byteLength(existing, 'utf-8');
|
|
36
|
+
}
|
|
37
|
+
// Evict the oldest (first-in-map) entries until this one fits.
|
|
38
|
+
while (this.map.size > 0 &&
|
|
39
|
+
(this.totalBytes + incoming > this.maxBytes || this.map.size >= this.maxEntries)) {
|
|
40
|
+
const oldestKey = this.map.keys().next().value;
|
|
41
|
+
const oldestVal = this.map.get(oldestKey);
|
|
42
|
+
this.map.delete(oldestKey);
|
|
43
|
+
this.totalBytes -= Buffer.byteLength(oldestVal, 'utf-8');
|
|
44
|
+
}
|
|
45
|
+
this.map.set(path, content);
|
|
46
|
+
this.totalBytes += incoming;
|
|
47
|
+
}
|
|
48
|
+
get(path) {
|
|
49
|
+
const val = this.map.get(path);
|
|
50
|
+
if (val === undefined)
|
|
51
|
+
return undefined;
|
|
52
|
+
// Refresh to most-recently-used position (LRU via Map insertion order).
|
|
53
|
+
this.map.delete(path);
|
|
54
|
+
this.map.set(path, val);
|
|
55
|
+
return val;
|
|
56
|
+
}
|
|
57
|
+
has(path) {
|
|
58
|
+
return this.map.has(path);
|
|
59
|
+
}
|
|
60
|
+
/** Number of cached entries. */
|
|
61
|
+
size() {
|
|
62
|
+
return this.map.size;
|
|
63
|
+
}
|
|
64
|
+
/** Total cached bytes (UTF-8 encoded). */
|
|
65
|
+
bytes() {
|
|
66
|
+
return this.totalBytes;
|
|
67
|
+
}
|
|
68
|
+
/** Expose for CorrelationAnalyzer compatibility (read-only iteration). */
|
|
69
|
+
entries() {
|
|
70
|
+
return this.map.entries();
|
|
71
|
+
}
|
|
72
|
+
/** Allow spread / array-from for compatibility with Map-based consumers. */
|
|
73
|
+
[Symbol.iterator]() {
|
|
74
|
+
return this.map.entries();
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
//# sourceMappingURL=contentCache.js.map
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Safe glob-to-regex conversion utility
|
|
3
|
+
*
|
|
4
|
+
* Prevents regex injection attacks and ReDoS by escaping metacharacters
|
|
5
|
+
* and bounding wildcard replacements.
|
|
6
|
+
*/
|
|
7
|
+
export interface GlobOptions {
|
|
8
|
+
/** Whether to anchor with ^$ (default: true) */
|
|
9
|
+
anchored?: boolean;
|
|
10
|
+
/** Whether this is a file path (affects wildcard replacement) */
|
|
11
|
+
pathLike?: boolean;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Convert a glob pattern to a safe RegExp with bounded wildcards.
|
|
15
|
+
*
|
|
16
|
+
* - Escapes all regex metacharacters except `*`
|
|
17
|
+
* - Replaces `*` with bounded character classes to prevent ReDoS
|
|
18
|
+
* - Anchors patterns to prevent unintended substring matches
|
|
19
|
+
* - Caches compiled patterns for performance
|
|
20
|
+
*
|
|
21
|
+
* @param glob The glob pattern (e.g. "*.env", "CRED-*")
|
|
22
|
+
* @param opts Configuration options
|
|
23
|
+
* @returns A safe RegExp that won't cause ReDoS or over-match
|
|
24
|
+
*
|
|
25
|
+
* @example
|
|
26
|
+
* ```typescript
|
|
27
|
+
* // File pattern matching
|
|
28
|
+
* const filePattern = globToRegex("*.env", { pathLike: true });
|
|
29
|
+
* filePattern.test("/path/to/file.env"); // true
|
|
30
|
+
* filePattern.test("file.env.backup"); // false (anchored)
|
|
31
|
+
*
|
|
32
|
+
* // Rule ID pattern matching
|
|
33
|
+
* const rulePattern = globToRegex("CRED-*");
|
|
34
|
+
* rulePattern.test("CRED-001"); // true
|
|
35
|
+
* rulePattern.test("CREDENTIAL-001"); // false (literal dot required)
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export declare function globToRegex(glob: string, opts?: GlobOptions): RegExp;
|
|
39
|
+
/**
|
|
40
|
+
* Clear the compiled pattern cache (useful for testing)
|
|
41
|
+
*/
|
|
42
|
+
export declare function clearCache(): void;
|
|
43
|
+
/**
|
|
44
|
+
* Get cache statistics (useful for debugging)
|
|
45
|
+
*/
|
|
46
|
+
export declare function getCacheStats(): {
|
|
47
|
+
size: number;
|
|
48
|
+
keys: string[];
|
|
49
|
+
};
|
|
50
|
+
//# sourceMappingURL=glob.d.ts.map
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Safe glob-to-regex conversion utility
|
|
3
|
+
*
|
|
4
|
+
* Prevents regex injection attacks and ReDoS by escaping metacharacters
|
|
5
|
+
* and bounding wildcard replacements.
|
|
6
|
+
*/
|
|
7
|
+
// Regex metacharacters that need escaping (all except asterisk)
|
|
8
|
+
const REGEX_META = /[.+?^${}()|[\]\\]/g;
|
|
9
|
+
// Cache compiled regexes to avoid recompilation in hot paths
|
|
10
|
+
const cache = new Map();
|
|
11
|
+
/**
|
|
12
|
+
* Convert a glob pattern to a safe RegExp with bounded wildcards.
|
|
13
|
+
*
|
|
14
|
+
* - Escapes all regex metacharacters except `*`
|
|
15
|
+
* - Replaces `*` with bounded character classes to prevent ReDoS
|
|
16
|
+
* - Anchors patterns to prevent unintended substring matches
|
|
17
|
+
* - Caches compiled patterns for performance
|
|
18
|
+
*
|
|
19
|
+
* @param glob The glob pattern (e.g. "*.env", "CRED-*")
|
|
20
|
+
* @param opts Configuration options
|
|
21
|
+
* @returns A safe RegExp that won't cause ReDoS or over-match
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```typescript
|
|
25
|
+
* // File pattern matching
|
|
26
|
+
* const filePattern = globToRegex("*.env", { pathLike: true });
|
|
27
|
+
* filePattern.test("/path/to/file.env"); // true
|
|
28
|
+
* filePattern.test("file.env.backup"); // false (anchored)
|
|
29
|
+
*
|
|
30
|
+
* // Rule ID pattern matching
|
|
31
|
+
* const rulePattern = globToRegex("CRED-*");
|
|
32
|
+
* rulePattern.test("CRED-001"); // true
|
|
33
|
+
* rulePattern.test("CREDENTIAL-001"); // false (literal dot required)
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
export function globToRegex(glob, opts = {}) {
|
|
37
|
+
const anchored = opts.anchored !== false;
|
|
38
|
+
const pathLike = opts.pathLike ?? false;
|
|
39
|
+
// Create cache key including options
|
|
40
|
+
const key = `${glob}::${anchored}::${pathLike}`;
|
|
41
|
+
// Return cached pattern if available
|
|
42
|
+
const hit = cache.get(key);
|
|
43
|
+
if (hit) {
|
|
44
|
+
return hit;
|
|
45
|
+
}
|
|
46
|
+
// Escape all regex metacharacters except asterisk
|
|
47
|
+
const escaped = glob.replace(REGEX_META, '\\$&');
|
|
48
|
+
// Replace asterisk with bounded character class
|
|
49
|
+
// Path-like: match non-newlines (for file paths)
|
|
50
|
+
// Rule-like: match non-whitespace (for rule IDs)
|
|
51
|
+
const wildcard = pathLike
|
|
52
|
+
? '[^\\n]{0,200}' // File paths: no newlines, bound to 200 chars
|
|
53
|
+
: '[^\\s]{0,200}'; // Rule IDs: no whitespace, bound to 200 chars
|
|
54
|
+
const body = escaped.replace(/\*/g, wildcard);
|
|
55
|
+
// Anchor pattern if requested (default)
|
|
56
|
+
const pattern = anchored ? `^${body}$` : body;
|
|
57
|
+
try {
|
|
58
|
+
const compiled = new RegExp(pattern);
|
|
59
|
+
cache.set(key, compiled);
|
|
60
|
+
return compiled;
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
// Fallback to never-matching pattern if compilation fails
|
|
64
|
+
const fallback = /(?!)/; // Negative lookahead - never matches
|
|
65
|
+
cache.set(key, fallback);
|
|
66
|
+
return fallback;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Clear the compiled pattern cache (useful for testing)
|
|
71
|
+
*/
|
|
72
|
+
export function clearCache() {
|
|
73
|
+
cache.clear();
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Get cache statistics (useful for debugging)
|
|
77
|
+
*/
|
|
78
|
+
export function getCacheStats() {
|
|
79
|
+
return {
|
|
80
|
+
size: cache.size,
|
|
81
|
+
keys: Array.from(cache.keys())
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
//# sourceMappingURL=glob.js.map
|
|
@@ -36,6 +36,7 @@ export function sanitizeFilename(filename) {
|
|
|
36
36
|
.replace(/[\/\\]/g, '_') // Replace path separators
|
|
37
37
|
.replace(/\.\./g, '_') // Replace parent directory references
|
|
38
38
|
.replace(/[<>:"|?*]/g, '_') // Remove invalid filename characters
|
|
39
|
+
.replace(/\0/g, '_') // Replace null bytes
|
|
39
40
|
.replace(/^\.+/, '_'); // Remove leading dots
|
|
40
41
|
}
|
|
41
42
|
/**
|