ferret-scan 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -16,6 +16,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
16
16
  - REST API for third-party integrations
17
17
  - SIEM/SOAR integrations
18
18
 
19
+ ## [2.2.0] - 2026-04-23
20
+
21
+ ### Security
22
+ - **Bounded content cache**: Replaced unbounded `Map` with `BoundedContentCache` (256 MB aggregate cap, 10,000 entry limit, 1 MB per-file cap with LRU eviction) to prevent OOM on large repos
23
+ - **Quarantine hardening**: Quarantine directory created with mode `0700` (owner-only) on POSIX; permissions verified after creation with a warning if loose; disk-space pre-checked via `statfsSync` before any quarantine operation
24
+ - **BUILTIN_FIXES startup validation**: All 9 built-in remediation patterns validated by `compileSafePattern` at module load time — a bad pattern fails fast at startup rather than at first use
25
+ - **Hybrid AST deadline**: `analyzeFile` now enforces both a per-code-block cap (default 500 ms, `maxBlockMs`) and a file-scoped total cap (default 2 s, `maxMs`). A single hostile markdown block can no longer starve all subsequent blocks of their analysis budget
26
+ - **ReDoS prevention hardened**: `compileSafePattern` updated to screen alternation-inside-quantified-groups patterns; `globToRegex` escapes all regex metacharacters and anchors patterns; all correlation and AST pattern execution runs through `runBounded`
27
+ - **`statfsSync` bigint safety**: Explicit `Number()` coercion in `hasSufficientDiskSpace` guards against future `{ bigint: true }` call-sites
28
+ - **`ignoreComments` regex fix**: Alternation order corrected (longest-first: `ignore-next-line`, `ignore-line`, `ignore`) so `ferret-ignore-next-line` is no longer mis-parsed as `ferret-ignore`
29
+
30
+ ### Added
31
+ - **JSON schema sync**: `src/schemas/ferret-config.schema.json` now generated from the runtime zod schema via `npm run schema:generate`; CI enforces drift detection with `npm run schema:check`
32
+ - **Coverage thresholds**: Per-module Jest coverage thresholds for `safeRegex`, `glob`, `contentCache`, `Fixer`, `Quarantine`, `AstAnalyzer`, all four reporters, `WatchMode`, and `policyEnforcement` — silent regressions now fail CI
33
+ - **CI benchmark regression detection**: `scripts/bench-compare.mjs` compares benchmark results against the cached main-branch baseline and fails PRs that regress by >20%
34
+
35
+ ### Tests
36
+ - **673 tests** across 39 test suites (was 244 tests)
37
+ - New unit tests: `AstAnalyzer`, `ConsoleReporter`, `HtmlReporter`, `SarifReporter`, `WatchMode`, `contentCache`, `safeRegex`, `glob`, `Fixer`, `Quarantine`, `ignoreComments`, `mcpValidator`, `policyEnforcement`, `cliOptions`
38
+ - New integration tests: `remediation` (scan→fix→rescan, quarantine→restore, dry-run, backup round-trip) and `cli` (subprocess exit-code contract for `--version`, `--help`, scan, SARIF output)
39
+ - HtmlReporter XSS escape verified: `<script>` in finding values renders as `&lt;script&gt;`
40
+ - SarifReporter validates SARIF 2.1.0 shape, severity mapping, rule deduplication, and location encoding
41
+
19
42
  ## [2.1.0] - 2026-02-16
20
43
 
21
44
  ### Added
package/bin/ferret.js CHANGED
@@ -267,7 +267,7 @@ program
267
267
  // Apply baseline filtering if enabled
268
268
  if (!options.ignoreBaseline) {
269
269
  const baselinePath = options.baseline || getDefaultBaselinePath(config.paths);
270
- const baseline = loadBaseline(baselinePath);
270
+ const baseline = await loadBaseline(baselinePath);
271
271
  if (baseline) {
272
272
  console.log(`📋 Applying baseline from: ${baselinePath}`);
273
273
  result = filterAgainstBaseline(result, baseline);
@@ -514,7 +514,7 @@ baselineCmd
514
514
  const baselinePath = options.output || getDefaultBaselinePath(config.paths);
515
515
  const baseline = createBaseline(result, options.description);
516
516
 
517
- saveBaseline(baseline, baselinePath);
517
+ await saveBaseline(baseline, baselinePath);
518
518
  console.log(`✅ Created baseline with ${baseline.findings.length} findings`);
519
519
  console.log(`📋 Baseline saved to: ${baselinePath}`);
520
520
 
@@ -528,10 +528,10 @@ baselineCmd
528
528
  .command('show')
529
529
  .description('Show baseline information')
530
530
  .argument('[file]', 'Baseline file path (defaults to .ferret-baseline.json)')
531
- .action((file) => {
531
+ .action(async (file) => {
532
532
  try {
533
533
  const baselinePath = file || getDefaultBaselinePath([process.cwd()]);
534
- const baseline = loadBaseline(baselinePath);
534
+ const baseline = await loadBaseline(baselinePath);
535
535
 
536
536
  if (!baseline) {
537
537
  console.error(`No baseline found at: ${baselinePath}`);
@@ -578,7 +578,7 @@ baselineCmd
578
578
  .action(async (file, options) => {
579
579
  try {
580
580
  const baselinePath = file || getDefaultBaselinePath([process.cwd()]);
581
- const baseline = loadBaseline(baselinePath);
581
+ const baseline = await loadBaseline(baselinePath);
582
582
 
583
583
  if (!baseline) {
584
584
  console.error(`No baseline found at: ${baselinePath}`);
@@ -6,7 +6,11 @@ import type { SemanticFinding, DiscoveredFile, Rule } from '../types.js';
6
6
  /**
7
7
  * Analyze a single file for semantic patterns
8
8
  */
9
- export declare function analyzeFile(file: DiscoveredFile, content: string, rules: Rule[]): Promise<SemanticFinding[]>;
9
+ export declare function analyzeFile(file: DiscoveredFile, content: string, rules: Rule[], opts?: {
10
+ maxMs?: number;
11
+ maxNodes?: number;
12
+ maxBlockMs?: number;
13
+ }): Promise<SemanticFinding[]>;
10
14
  /**
11
15
  * Check if semantic analysis should be performed
12
16
  */
@@ -133,11 +133,19 @@ function extractSemanticContext(tsLib, sourceFile) {
133
133
  return context;
134
134
  }
135
135
  /**
136
- * Find security patterns in AST
136
+ * Find security patterns in AST, with optional time and node-count guards.
137
137
  */
138
- function findSecurityPatterns(tsLib, sourceFile, patterns) {
138
+ function findSecurityPatterns(tsLib, sourceFile, patterns, opts) {
139
139
  const matches = [];
140
+ let nodeCount = 0;
141
+ const deadline = opts?.deadline;
142
+ const maxNodes = opts?.maxNodes ?? 50_000;
140
143
  function visit(node) {
144
+ nodeCount++;
145
+ if (nodeCount > maxNodes)
146
+ return;
147
+ if (deadline !== undefined && Date.now() > deadline)
148
+ return;
141
149
  for (const pattern of patterns) {
142
150
  const match = matchSemanticPattern(tsLib, node, pattern, sourceFile);
143
151
  if (match) {
@@ -277,8 +285,11 @@ function createContextLines(sourceFile, node, contextLines = 3) {
277
285
  /**
278
286
  * Analyze a single file for semantic patterns
279
287
  */
280
- export async function analyzeFile(file, content, rules) {
288
+ export async function analyzeFile(file, content, rules, opts) {
281
289
  const findings = [];
290
+ const maxMs = opts?.maxMs ?? 2000;
291
+ const maxNodes = opts?.maxNodes ?? 50_000;
292
+ const perBlockMs = Math.min(maxMs, opts?.maxBlockMs ?? 500);
282
293
  try {
283
294
  // Get rules with semantic patterns
284
295
  const semanticRules = rules.filter(rule => rule.semanticPatterns && rule.semanticPatterns.length > 0);
@@ -296,16 +307,26 @@ export async function analyzeFile(file, content, rules) {
296
307
  // Analyze the entire file for TypeScript/JavaScript files
297
308
  codeBlocksToAnalyze = [{ code: content, language: file.type, line: 1 }];
298
309
  }
310
+ const fileDeadline = Date.now() + maxMs;
299
311
  // Analyze each code block
300
312
  for (const codeBlock of codeBlocksToAnalyze) {
313
+ if (Date.now() > fileDeadline) {
314
+ logger.warn(`AST analysis file deadline (${maxMs}ms) reached for ${file.relativePath}; skipping remaining code blocks`);
315
+ break;
316
+ }
301
317
  try {
302
318
  const sourceFile = createAST(tsLib, codeBlock.code, `${file.relativePath}_block_${codeBlock.line}.${codeBlock.language}`);
303
319
  const semanticContext = extractSemanticContext(tsLib, sourceFile);
320
+ // Per-block deadline: min of (remaining file budget, per-block cap).
321
+ const blockDeadline = Math.min(fileDeadline, Date.now() + perBlockMs);
304
322
  // Check each semantic rule
305
323
  for (const rule of semanticRules) {
306
324
  if (!rule.semanticPatterns)
307
325
  continue;
308
- const patternMatches = findSecurityPatterns(tsLib, sourceFile, rule.semanticPatterns);
326
+ const patternMatches = findSecurityPatterns(tsLib, sourceFile, rule.semanticPatterns, {
327
+ deadline: blockDeadline,
328
+ maxNodes,
329
+ });
309
330
  for (const match of patternMatches) {
310
331
  const position = getPositionFromNode(match.node, sourceFile);
311
332
  const astNodeInfo = createASTNodeInfo(tsLib, match.node, sourceFile);
@@ -8,16 +8,16 @@ import logger from '../utils/logger.js';
8
8
  */
9
9
  const COMMENT_PATTERNS = {
10
10
  default: [
11
- /\/\/\s*ferret-(ignore|disable|enable|ignore-line|ignore-next-line)(?:\s+([^\n]+))?/gi,
12
- /\/\*\s*ferret-(ignore|disable|enable|ignore-line|ignore-next-line)(?:\s+([^*]+))?\s*\*\//gi,
13
- /#\s*ferret-(ignore|disable|enable|ignore-line|ignore-next-line)(?:\s+([^\n]+))?/gi,
11
+ /\/\/\s*ferret-(ignore-next-line|ignore-line|ignore|disable|enable)(?:\s+([^\n]+))?/gi,
12
+ /\/\*\s*ferret-(ignore-next-line|ignore-line|ignore|disable|enable)(?:\s+([^*]+))?\s*\*\//gi,
13
+ /#\s*ferret-(ignore-next-line|ignore-line|ignore|disable|enable)(?:\s+([^\n]+))?/gi,
14
14
  ],
15
15
  html: [
16
16
  // Non-greedy capture so rule ids like "INJ-001" (with hyphens) work correctly.
17
- /<!--\s*ferret-(ignore|disable|enable|ignore-line|ignore-next-line)(?:\s+(.+?))?\s*-->/gi,
17
+ /<!--\s*ferret-(ignore-next-line|ignore-line|ignore|disable|enable)(?:\s+(.+?))?\s*-->/gi,
18
18
  ],
19
19
  sql: [
20
- /--\s*ferret-(ignore|disable|enable|ignore-line|ignore-next-line)(?:\s+([^\n]+))?/gi,
20
+ /--\s*ferret-(ignore-next-line|ignore-line|ignore|disable|enable)(?:\s+([^\n]+))?/gi,
21
21
  ],
22
22
  };
23
23
  /**
@@ -6,6 +6,7 @@
6
6
  import { readFileSync, existsSync, writeFileSync } from 'node:fs';
7
7
  import { resolve } from 'node:path';
8
8
  import { z } from 'zod';
9
+ import { globToRegex } from '../utils/glob.js';
9
10
  import logger from '../utils/logger.js';
10
11
  /**
11
12
  * Policy rule schema
@@ -161,7 +162,7 @@ function findingMatchesConditions(finding, conditions) {
161
162
  if (conditions.ruleIds && conditions.ruleIds.length > 0) {
162
163
  const matchesRule = conditions.ruleIds.some(id => {
163
164
  if (id.includes('*')) {
164
- const pattern = new RegExp('^' + id.replace(/\*/g, '.*') + '$');
165
+ const pattern = globToRegex(id, { pathLike: false });
165
166
  return pattern.test(finding.ruleId);
166
167
  }
167
168
  return finding.ruleId === id;
@@ -184,7 +185,7 @@ function findingMatchesConditions(finding, conditions) {
184
185
  // Check file patterns
185
186
  if (conditions.filePatterns && conditions.filePatterns.length > 0) {
186
187
  const matchesFile = conditions.filePatterns.some(pattern => {
187
- const regex = new RegExp(pattern.replace(/\*/g, '.*'));
188
+ const regex = globToRegex(pattern, { pathLike: true });
188
189
  return regex.test(finding.file) || regex.test(finding.relativePath);
189
190
  });
190
191
  if (!matchesFile)
@@ -7,6 +7,7 @@ import { readFileSync, writeFileSync, existsSync, mkdirSync, copyFileSync, statS
7
7
  import { resolve, dirname, basename } from 'node:path';
8
8
  import logger from '../utils/logger.js';
9
9
  import { validatePathWithinBase, sanitizeFilename, isPathWithinBase } from '../utils/pathSecurity.js';
10
+ import { compileSafePattern, safeMatch, safeTest } from '../utils/safeRegex.js';
10
11
  /**
11
12
  * Default remediation options
12
13
  */
@@ -99,6 +100,13 @@ const BUILTIN_FIXES = [
99
100
  automatic: false
100
101
  }
101
102
  ];
103
+ // Fail fast at module load if any built-in pattern is rejected by the safe-regex gate.
104
+ // This catches contributor mistakes at startup rather than silently at fix-application time.
105
+ for (const fix of BUILTIN_FIXES) {
106
+ if (!compileSafePattern(fix.pattern)) {
107
+ throw new Error(`Fixer startup: BUILTIN_FIXES pattern rejected by compileSafePattern: ${fix.description} (${fix.pattern})`);
108
+ }
109
+ }
102
110
  /**
103
111
  * Create backup of file before modification
104
112
  */
@@ -125,34 +133,55 @@ function applyFix(content, fix, _finding) {
125
133
  try {
126
134
  switch (fix.type) {
127
135
  case 'replace': {
128
- const regex = new RegExp(fix.pattern, 'gi');
136
+ const regex = compileSafePattern(fix.pattern, 'gi');
137
+ if (!regex) {
138
+ logger.warn(`Unsafe fix pattern rejected: ${fix.pattern}`);
139
+ return { success: false, newContent: content, linesModified: 0 };
140
+ }
129
141
  const originalLineCount = content.split('\n').length;
130
142
  const replacement = fix.replacement ?? '';
143
+ // Use safe bounded matching to find replacements
144
+ const matchResult = safeMatch(fix.pattern, content, 'gi');
145
+ if (!matchResult) {
146
+ logger.warn(`Safe match failed for pattern: ${fix.pattern}`);
147
+ return { success: false, newContent: content, linesModified: 0 };
148
+ }
149
+ if (matchResult.truncated) {
150
+ logger.warn(`Fix pattern execution truncated for safety: ${fix.pattern}`);
151
+ return { success: false, newContent: content, linesModified: 0 };
152
+ }
153
+ // Apply replacement safely
131
154
  newContent = content.replace(regex, replacement);
132
155
  const newLineCount = newContent.split('\n').length;
133
- linesModified = Math.abs(newLineCount - originalLineCount);
134
- // Count actual replacements
135
- const matches = content.match(regex);
136
- if (matches) {
137
- linesModified = Math.max(linesModified, matches.length);
138
- }
156
+ linesModified = Math.max(Math.abs(newLineCount - originalLineCount), matchResult.matches.length);
139
157
  break;
140
158
  }
141
159
  case 'remove': {
142
- const regex = new RegExp(fix.pattern, 'gi');
160
+ const regex = compileSafePattern(fix.pattern, 'gi');
161
+ if (!regex) {
162
+ logger.warn(`Unsafe fix pattern rejected: ${fix.pattern}`);
163
+ return { success: false, newContent: content, linesModified: 0 };
164
+ }
143
165
  const lines = content.split('\n');
144
- const filteredLines = lines.filter(line => !regex.test(line));
166
+ const filteredLines = lines.filter(line => {
167
+ const isMatch = safeTest(fix.pattern, line, 'i');
168
+ return !isMatch;
169
+ });
145
170
  newContent = filteredLines.join('\n');
146
171
  linesModified = lines.length - filteredLines.length;
147
172
  break;
148
173
  }
149
174
  case 'quarantine': {
150
- // For quarantine, we comment out the problematic lines
151
- const regex = new RegExp(fix.pattern, 'gi');
175
+ const regex = compileSafePattern(fix.pattern, 'gi');
176
+ if (!regex) {
177
+ logger.warn(`Unsafe fix pattern rejected: ${fix.pattern}`);
178
+ return { success: false, newContent: content, linesModified: 0 };
179
+ }
152
180
  const lines = content.split('\n');
153
181
  for (let i = 0; i < lines.length; i++) {
154
182
  const line = lines[i] ?? '';
155
- if (regex.test(line)) {
183
+ const isMatch = safeTest(fix.pattern, line, 'i');
184
+ if (isMatch) {
156
185
  lines[i] = `# QUARANTINED: ${line}`;
157
186
  linesModified++;
158
187
  }
@@ -191,25 +220,22 @@ function findApplicableFixes(finding) {
191
220
  }
192
221
  // Check built-in fixes
193
222
  for (const fix of BUILTIN_FIXES) {
194
- try {
195
- const regex = new RegExp(fix.pattern, 'i');
196
- // Check if fix pattern matches the finding
197
- if (regex.test(finding.match) || regex.test(finding.context.map(c => c.content).join('\n'))) {
198
- applicableFixes.push(fix);
199
- }
200
- // Check by rule category
201
- if (finding.category === 'credentials' && fix.description.includes('credential')) {
202
- applicableFixes.push(fix);
203
- }
204
- if (finding.category === 'injection' && fix.description.includes('jailbreak')) {
205
- applicableFixes.push(fix);
206
- }
207
- if (finding.category === 'permissions' && fix.description.includes('permission')) {
208
- applicableFixes.push(fix);
209
- }
223
+ // Use safe pattern matching
224
+ const matchesDirectly = safeTest(fix.pattern, finding.match, 'i');
225
+ const contextText = finding.context.map(c => c.content).join('\n');
226
+ const matchesContext = safeTest(fix.pattern, contextText, 'i');
227
+ if (matchesDirectly || matchesContext) {
228
+ applicableFixes.push(fix);
229
+ }
230
+ // Check by rule category
231
+ if (finding.category === 'credentials' && fix.description.includes('credential')) {
232
+ applicableFixes.push(fix);
233
+ }
234
+ if (finding.category === 'injection' && fix.description.includes('jailbreak')) {
235
+ applicableFixes.push(fix);
210
236
  }
211
- catch {
212
- logger.warn(`Invalid fix pattern: ${fix.pattern}`);
237
+ if (finding.category === 'permissions' && fix.description.includes('permission')) {
238
+ applicableFixes.push(fix);
213
239
  }
214
240
  }
215
241
  // Remove duplicates
@@ -2,11 +2,49 @@
2
2
  * Quarantine System - Safely isolate suspicious files and content
3
3
  * Provides reversible quarantine operations with audit trails
4
4
  */
5
- import { readFileSync, writeFileSync, existsSync, mkdirSync, copyFileSync, unlinkSync, statSync } from 'node:fs';
5
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, copyFileSync, unlinkSync, statSync, statfsSync } from 'node:fs';
6
6
  import { resolve, dirname, basename } from 'node:path';
7
7
  import { createHash } from 'node:crypto';
8
8
  import logger from '../utils/logger.js';
9
9
  import { validatePathWithinBase, isPathWithinBase } from '../utils/pathSecurity.js';
10
+ /**
11
+ * Create a quarantine-grade directory with restrictive permissions (0700 on POSIX).
12
+ * On Windows, Node silently ignores the mode argument — permissions are managed by
13
+ * the OS ACL instead.
14
+ */
15
+ function ensureSecureDir(dir) {
16
+ // 0o700 = owner-only rwx; harmlessly ignored on Windows
17
+ mkdirSync(dir, { recursive: true, mode: 0o700 });
18
+ // Verify no group/other bits leaked through (e.g. pre-existing dir with loose perms).
19
+ // stat().mode & 0o077 !== 0 means at least one g/o bit is set.
20
+ if (process.platform !== 'win32') {
21
+ const mode = statSync(dir).mode;
22
+ if ((mode & 0o077) !== 0) {
23
+ logger.warn(`Quarantine directory ${dir} has loose permissions (mode ${(mode & 0o777).toString(8)}); secrets may be readable by other users`);
24
+ }
25
+ }
26
+ }
27
+ /**
28
+ * Check whether the quarantine directory has sufficient free space for a file of the given size.
29
+ * Refuses if the file is ≥50% of remaining disk space to prevent filling the disk.
30
+ * Returns true (allow) when statfsSync is unavailable (older Node / Windows).
31
+ */
32
+ function hasSufficientDiskSpace(dir, requiredBytes) {
33
+ try {
34
+ // statfsSync is available in Node ≥18.15.0 on POSIX; falls through on Windows.
35
+ const stats = statfsSync(dir);
36
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-conversion -- guards against a future { bigint: true } call-site; safe up to ~9 PB
37
+ const freeBytes = Number(stats.bavail) * Number(stats.bsize);
38
+ if (requiredBytes >= freeBytes * 0.5) {
39
+ logger.warn(`Insufficient disk space for quarantine: need ${requiredBytes} bytes, ${freeBytes} available`);
40
+ return false;
41
+ }
42
+ }
43
+ catch {
44
+ // statfsSync unavailable — skip the check rather than failing the quarantine.
45
+ }
46
+ return true;
47
+ }
10
48
  /**
11
49
  * Default quarantine options
12
50
  */
@@ -66,8 +104,8 @@ export function loadQuarantineDatabase(quarantineDir) {
66
104
  */
67
105
  export function saveQuarantineDatabase(db, quarantineDir) {
68
106
  try {
69
- // Ensure directory exists
70
- mkdirSync(quarantineDir, { recursive: true });
107
+ // Ensure directory exists with secure permissions
108
+ ensureSecureDir(quarantineDir);
71
109
  // Update stats and metadata
72
110
  db.lastUpdated = new Date().toISOString();
73
111
  db.stats = calculateQuarantineStats(db.entries);
@@ -139,8 +177,13 @@ export function quarantineFile(filePath, findings, reason, options = {}) {
139
177
  const fileName = basename(filePath);
140
178
  const quarantineFileName = `${id}_${fileName}`;
141
179
  const quarantinePath = resolve(config.quarantineDir, 'files', quarantineFileName);
142
- // Ensure quarantine directory exists
143
- mkdirSync(dirname(quarantinePath), { recursive: true });
180
+ // Ensure quarantine directory exists with secure permissions
181
+ ensureSecureDir(dirname(quarantinePath));
182
+ // Refuse quarantine if disk space is critically low
183
+ if (!hasSufficientDiskSpace(dirname(quarantinePath), stats.size)) {
184
+ logger.error(`Quarantine aborted for ${filePath}: insufficient disk space`);
185
+ return null;
186
+ }
144
187
  // Copy file to quarantine
145
188
  copyFileSync(filePath, quarantinePath);
146
189
  // Calculate metadata
@@ -321,6 +364,13 @@ export function checkQuarantineHealth(quarantineDir = DEFAULT_OPTIONS.quarantine
321
364
  if (!existsSync(quarantineFilesDir)) {
322
365
  issues.push('Quarantine files directory missing');
323
366
  }
367
+ // Check directory permissions (POSIX only)
368
+ if (process.platform !== 'win32' && existsSync(quarantineDir)) {
369
+ const mode = statSync(quarantineDir).mode;
370
+ if ((mode & 0o077) !== 0) {
371
+ issues.push(`Quarantine directory has loose permissions (mode ${(mode & 0o777).toString(8)}); run chmod 700 ${quarantineDir}`);
372
+ }
373
+ }
324
374
  return {
325
375
  healthy: issues.length === 0,
326
376
  issues,
@@ -29,9 +29,9 @@ export const aiSpecificRules = [
29
29
  severity: 'HIGH',
30
30
  description: 'Detects attempts to make Claude impersonate other entities',
31
31
  patterns: [
32
- /pretend\s+(to\s+be|you\s+are)\s+.*(anthropic|openai|google|microsoft)/gi,
32
+ /pretend\s+(to\s+be|you\s+are)\s+[^\n]{0,100}(anthropic|openai|google|microsoft)/gi,
33
33
  /you\s+are\s+(now\s+)?(an?\s+)?(anthropic|openai)\s+(employee|staff|system)/gi,
34
- /impersonate\s+.*(admin|moderator|system|support)/gi,
34
+ /impersonate\s+[^\n]{0,100}(admin|moderator|system|support)/gi,
35
35
  ],
36
36
  fileTypes: ['md'],
37
37
  components: ['skill', 'agent', 'ai-config-md'],
@@ -82,8 +82,8 @@ export const aiSpecificRules = [
82
82
  severity: 'HIGH',
83
83
  description: 'Detects setup for multi-step attacks that unfold over time',
84
84
  patterns: [
85
- /on\s+the\s+next\s+(message|turn|response)\s+.*(execute|attack|inject|exfiltrate)/gi,
86
- /when\s+the\s+user\s+says\s+.*(trigger|activate|execute)\s+.*(attack|payload|exploit)/gi,
85
+ /on\s+the\s+next\s+(message|turn|response)\s+[^\n]{0,100}(execute|attack|inject|exfiltrate)/gi,
86
+ /when\s+the\s+user\s+says\s+[^\n]{0,100}(trigger|activate|execute)[^\n]{0,100}(attack|payload|exploit)/gi,
87
87
  /wait\s+for\s+(signal|trigger|command)\s+to\s+(attack|execute|inject)/gi,
88
88
  /phase\s+\d+\s*:\s*(attack|exploit|inject|exfiltrate|payload)/gi,
89
89
  /step\s+\d+\s*:\s*(gain|escalate|exfiltrate|compromise)/gi,
@@ -118,7 +118,7 @@ export const aiSpecificRules = [
118
118
  severity: 'MEDIUM',
119
119
  description: 'Detects attempts to manipulate output formatting for injection',
120
120
  patterns: [
121
- /output\s+.*(json|xml|html)\s+without\s+(escaping|validation)/gi,
121
+ /output\s+[^\n]{0,100}(json|xml|html)\s+without\s+(escaping|validation)/gi,
122
122
  /include\s+(raw|unescaped)\s+(html|script|code)/gi,
123
123
  /respond\s+with\s+raw\s+/gi,
124
124
  ],
@@ -167,8 +167,8 @@ export const aiSpecificRules = [
167
167
  severity: 'CRITICAL',
168
168
  description: 'Detects patterns that set up indirect prompt injection',
169
169
  patterns: [
170
- /when\s+you\s+(read|see|find)\s+.*(execute|run|follow)/gi,
171
- /if\s+.*(file|url|content)\s+contains.*then\s+(do|execute|run)/gi,
170
+ /when\s+you\s+(read|see|find)\s+[^\n]{0,100}(execute|run|follow)/gi,
171
+ /if\s+[^\n]{0,100}(file|url|content)\s+contains[^\n]{0,100}then\s+(do|execute|run)/gi,
172
172
  /follow\s+instructions\s+(in|from)\s+(the|any)\s+(file|url|content)/gi,
173
173
  ],
174
174
  fileTypes: ['md'],
@@ -184,7 +184,7 @@ export const aiSpecificRules = [
184
184
  severity: 'HIGH',
185
185
  description: 'Detects instructions to abuse AI CLI tools',
186
186
  patterns: [
187
- /use\s+(bash|write|edit)\s+tool\s+to.*(delete|remove|destroy)/gi,
187
+ /use\s+(bash|write|edit)\s+tool\s+to[^\n]{0,100}(delete|remove|destroy)/gi,
188
188
  /execute\s+(arbitrary|any)\s+(commands?|code)/gi,
189
189
  /bypass\s+tool\s+(restrictions|limits|permissions)/gi,
190
190
  ],
@@ -215,6 +215,27 @@ export const aiSpecificRules = [
215
215
  'https://owasp.org/www-project-top-10-for-large-language-model-applications/',
216
216
  ],
217
217
  enabled: true,
218
+ // Mirror INJ-003 semantic context suppression: a skill that discusses,
219
+ // documents, detects, or provides examples of these techniques is not
220
+ // itself a jailbreak attempt.
221
+ excludePatterns: [
222
+ // Line discusses detection/blocking rather than deployment
223
+ /\b(detect|catch|flag|block|prevent|scan\s+for|identify|reject|report)\b[^\n]{0,80}(jailbreak|DAN|bypass)/gi,
224
+ /\b(jailbreak|DAN|bypass)\b[^\n]{0,80}\b(detect|catch|flag|block|prevent|found|identified)/gi,
225
+ // Term appears inside a quoted string
226
+ /["'][^"'\n]{0,120}\b(jailbreak|DAN)\b[^"'\n]{0,120}["']/gi,
227
+ // Scanner rule-ID reference on the same line
228
+ /\[(?:INJ|AI|SEC|CRED)-\d+\]/gi,
229
+ // Markdown example label
230
+ /^\s*\*\*(?:Input|Output|Example|Finding|Result)\*\*\s*:/i,
231
+ ],
232
+ excludeContext: [
233
+ /\b(security\s+(rule|finding|scan|check|gate|scanner|score)|ferret.?scan|scan\s+result)/gi,
234
+ /\b(example\s+of|this\s+detects|used\s+to\s+(bypass|attack)|common\s+(attack|technique)|known\s+(jailbreak|attack))/gi,
235
+ /\b(security\s+scanner|vulnerability\s+scanner|threat\s+detect|scan\s+for\s+(injection|jailbreak))/gi,
236
+ /^\s*##\s+Example/im,
237
+ /publication\s+blocked/gi,
238
+ ],
218
239
  },
219
240
  {
220
241
  id: 'AI-011',
@@ -32,10 +32,10 @@ export const backdoorRules = [
32
32
  patterns: [
33
33
  /\/bin\/(ba)?sh\s+-i/gi,
34
34
  /bash\s+-i\s+>&/gi,
35
- /nc\s+.*-e\s+\/bin/gi,
36
- /python.*socket.*connect/gi,
37
- /perl.*socket.*INET/gi,
38
- /ruby.*TCPSocket/gi,
35
+ /nc\s+[^\n]{0,100}-e\s+\/bin/gi,
36
+ /python[^\n]{0,100}socket[^\n]{0,100}connect/gi,
37
+ /perl[^\n]{0,100}socket[^\n]{0,100}INET/gi,
38
+ /ruby[^\n]{0,100}TCPSocket/gi,
39
39
  ],
40
40
  fileTypes: ['sh', 'bash', 'zsh', 'md'],
41
41
  components: ['hook', 'skill', 'agent', 'ai-config-md', 'plugin'],
@@ -50,10 +50,10 @@ export const backdoorRules = [
50
50
  severity: 'CRITICAL',
51
51
  description: 'Detects patterns that download and execute remote code',
52
52
  patterns: [
53
- /curl\s+.*\|\s*(ba)?sh/gi,
54
- /wget\s+.*\|\s*(ba)?sh/gi,
55
- /curl\s+.*\|\s*python/gi,
56
- /wget\s+.*-O\s*-\s*\|\s*(ba)?sh/gi,
53
+ /curl\s+[^\n]{0,200}\|\s*(ba)?sh/gi,
54
+ /wget\s+[^\n]{0,200}\|\s*(ba)?sh/gi,
55
+ /curl\s+[^\n]{0,200}\|\s*python/gi,
56
+ /wget\s+[^\n]{0,100}-O\s*-\s*\|\s*(ba)?sh/gi,
57
57
  ],
58
58
  fileTypes: ['sh', 'bash', 'zsh', 'md'],
59
59
  components: ['hook', 'skill', 'agent', 'ai-config-md', 'plugin'],
@@ -71,7 +71,7 @@ export const backdoorRules = [
71
71
  />\s*\/etc\//gi,
72
72
  />\s*~\/\.(bash|zsh|profile)/gi,
73
73
  /tee\s+\/etc\//gi,
74
- /echo.*>>\s*~\/\.(bash|zsh)/gi,
74
+ /echo[^\n]{0,200}>>\s*~\/\.(bash|zsh)/gi,
75
75
  ],
76
76
  fileTypes: ['sh', 'bash', 'zsh', 'md'],
77
77
  components: ['hook', 'skill', 'agent', 'ai-config-md', 'plugin'],
@@ -104,7 +104,7 @@ export const backdoorRules = [
104
104
  severity: 'MEDIUM',
105
105
  description: 'Detects creation of background processes or daemons',
106
106
  patterns: [
107
- /nohup\s+.*&/gi,
107
+ /nohup\s+[^\n]{0,200}&/gi,
108
108
  /disown/gi,
109
109
  /setsid/gi,
110
110
  /&\s*$/gm,
@@ -122,8 +122,8 @@ export const backdoorRules = [
122
122
  severity: 'CRITICAL',
123
123
  description: 'Detects execution of base64 or otherwise encoded commands',
124
124
  patterns: [
125
- /echo\s+.*\|\s*base64\s+-d\s*\|\s*(ba)?sh/gi,
126
- /base64\s+-d.*\|\s*(ba)?sh/gi,
125
+ /echo\s+[^\n]{0,200}\|\s*base64\s+-d\s*\|\s*(ba)?sh/gi,
126
+ /base64\s+-d[^\n]{0,100}\|\s*(ba)?sh/gi,
127
127
  /python\s+-c\s+['"]import\s+base64/gi,
128
128
  ],
129
129
  fileTypes: ['sh', 'bash', 'zsh', 'md'],
@@ -22,7 +22,7 @@ export const correlationRules = [
22
22
  {
23
23
  id: 'CORR-001-A',
24
24
  description: 'Credential access followed by network transmission',
25
- filePatterns: ['*'],
25
+ filePatterns: [],
26
26
  contentPatterns: [
27
27
  'SECRET|TOKEN|API_KEY|getenv|process\\.env',
28
28
  'fetch|axios|XMLHttpRequest|curl|wget|request'
@@ -50,7 +50,7 @@ export const correlationRules = [
50
50
  {
51
51
  id: 'CORR-002-A',
52
52
  description: 'Permission escalation with startup persistence',
53
- filePatterns: ['*'],
53
+ filePatterns: [],
54
54
  contentPatterns: [
55
55
  'chmod|chown|setuid|sudo|defaultMode.*dontAsk',
56
56
  'startup|onload|autostart|service.*enable|systemctl.*enable'
@@ -131,7 +131,7 @@ export const correlationRules = [
131
131
  {
132
132
  id: 'CORR-005-A',
133
133
  description: 'AI safeguard bypass with data harvesting',
134
- filePatterns: ['*'],
134
+ filePatterns: [],
135
135
  contentPatterns: [
136
136
  'ignore.*previous.*instruction|forget.*safeguard|bypass.*filter',
137
137
  'conversation.*history|user.*data|personal.*information|collect.*data'
@@ -158,7 +158,7 @@ export const correlationRules = [
158
158
  {
159
159
  id: 'CORR-006-A',
160
160
  description: 'Package installation with network communication',
161
- filePatterns: ['*'],
161
+ filePatterns: [],
162
162
  contentPatterns: [
163
163
  'npm.*install|pip.*install|wget.*http|curl.*http|git.*clone',
164
164
  'http://|https://|fetch\\(|axios|request\\(|XMLHttpRequest'
@@ -186,7 +186,7 @@ export const correlationRules = [
186
186
  {
187
187
  id: 'CORR-007-A',
188
188
  description: 'File access with network transmission',
189
- filePatterns: ['*'],
189
+ filePatterns: [],
190
190
  contentPatterns: [
191
191
  'readFile|writeFile|fs\\.|glob|find.*-name',
192
192
  'fetch\\(|axios|post|put|XMLHttpRequest'
@@ -213,7 +213,7 @@ export const correlationRules = [
213
213
  {
214
214
  id: 'CORR-008-A',
215
215
  description: 'Authentication bypass with privileged access',
216
- filePatterns: ['*'],
216
+ filePatterns: [],
217
217
  contentPatterns: [
218
218
  'auth.*bypass|no.*auth|skip.*login|admin.*access',
219
219
  'sudo|root|administrator|privileged|elevated'
@@ -33,6 +33,7 @@ export declare function getRuleById(id: string): Rule | undefined;
33
33
  * Get enabled rules only
34
34
  */
35
35
  export declare function getEnabledRules(): Rule[];
36
+ export declare function clearRuleCache(): void;
36
37
  /**
37
38
  * Get rules for scanning with filters applied
38
39
  */
@@ -59,10 +59,18 @@ export function getRuleById(id) {
59
59
  export function getEnabledRules() {
60
60
  return ALL_RULES.filter(rule => rule.enabled);
61
61
  }
62
+ const scanRuleCache = new Map();
63
+ export function clearRuleCache() {
64
+ scanRuleCache.clear();
65
+ }
62
66
  /**
63
67
  * Get rules for scanning with filters applied
64
68
  */
65
69
  export function getRulesForScan(categories, severities) {
70
+ const key = `${[...categories].sort().join(',')}::${[...severities].sort().join(',')}`;
71
+ const cached = scanRuleCache.get(key);
72
+ if (cached)
73
+ return cached;
66
74
  const rules = ALL_RULES.filter(rule => {
67
75
  if (!rule.enabled)
68
76
  return false;
@@ -72,7 +80,8 @@ export function getRulesForScan(categories, severities) {
72
80
  return false;
73
81
  return true;
74
82
  });
75
- logger.debug(`Loaded ${rules.length} rules for scan`);
83
+ scanRuleCache.set(key, rules);
84
+ logger.debug(`Loaded ${rules.length} rules for scan (cached)`);
76
85
  return rules;
77
86
  }
78
87
  /**