getdoorman 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +181 -0
  3. package/bin/doorman.js +444 -0
  4. package/package.json +74 -0
  5. package/src/ai-fixer.js +559 -0
  6. package/src/ast-scanner.js +434 -0
  7. package/src/auth.js +149 -0
  8. package/src/baseline.js +48 -0
  9. package/src/compliance.js +539 -0
  10. package/src/config.js +466 -0
  11. package/src/custom-rules.js +32 -0
  12. package/src/dashboard.js +202 -0
  13. package/src/detector.js +142 -0
  14. package/src/fix-engine.js +48 -0
  15. package/src/fix-registry-extra.js +95 -0
  16. package/src/fix-registry-go-rust.js +77 -0
  17. package/src/fix-registry-java-csharp.js +77 -0
  18. package/src/fix-registry-js.js +99 -0
  19. package/src/fix-registry-mcp-ai.js +57 -0
  20. package/src/fix-registry-python.js +87 -0
  21. package/src/fixer-ruby-php.js +608 -0
  22. package/src/fixer.js +2113 -0
  23. package/src/hooks.js +115 -0
  24. package/src/ignore.js +176 -0
  25. package/src/index.js +384 -0
  26. package/src/metrics.js +126 -0
  27. package/src/monorepo.js +65 -0
  28. package/src/presets.js +54 -0
  29. package/src/reporter.js +975 -0
  30. package/src/rule-worker.js +36 -0
  31. package/src/rules/ast-rules.js +756 -0
  32. package/src/rules/bugs/accessibility.js +235 -0
  33. package/src/rules/bugs/ai-codegen-fixable.js +172 -0
  34. package/src/rules/bugs/ai-codegen.js +365 -0
  35. package/src/rules/bugs/code-smell-bugs.js +247 -0
  36. package/src/rules/bugs/crypto-bugs.js +195 -0
  37. package/src/rules/bugs/docker-bugs.js +158 -0
  38. package/src/rules/bugs/general.js +361 -0
  39. package/src/rules/bugs/go-bugs.js +279 -0
  40. package/src/rules/bugs/index.js +73 -0
  41. package/src/rules/bugs/js-api.js +257 -0
  42. package/src/rules/bugs/js-array-object.js +210 -0
  43. package/src/rules/bugs/js-async-fixable.js +223 -0
  44. package/src/rules/bugs/js-async.js +211 -0
  45. package/src/rules/bugs/js-closure-scope.js +182 -0
  46. package/src/rules/bugs/js-database.js +203 -0
  47. package/src/rules/bugs/js-error-handling.js +148 -0
  48. package/src/rules/bugs/js-logic.js +261 -0
  49. package/src/rules/bugs/js-memory.js +214 -0
  50. package/src/rules/bugs/js-node.js +361 -0
  51. package/src/rules/bugs/js-react.js +373 -0
  52. package/src/rules/bugs/js-regex.js +200 -0
  53. package/src/rules/bugs/js-state.js +272 -0
  54. package/src/rules/bugs/js-type-coercion.js +318 -0
  55. package/src/rules/bugs/nextjs-bugs.js +242 -0
  56. package/src/rules/bugs/nextjs-fixable.js +120 -0
  57. package/src/rules/bugs/node-fixable.js +178 -0
  58. package/src/rules/bugs/python-advanced.js +245 -0
  59. package/src/rules/bugs/python-fixable.js +98 -0
  60. package/src/rules/bugs/python.js +284 -0
  61. package/src/rules/bugs/react-fixable.js +207 -0
  62. package/src/rules/bugs/ruby-bugs.js +182 -0
  63. package/src/rules/bugs/shell-bugs.js +181 -0
  64. package/src/rules/bugs/silent-failures.js +261 -0
  65. package/src/rules/bugs/ts-bugs.js +235 -0
  66. package/src/rules/bugs/unused-vars.js +65 -0
  67. package/src/rules/compliance/accessibility-ext.js +468 -0
  68. package/src/rules/compliance/education.js +322 -0
  69. package/src/rules/compliance/financial.js +421 -0
  70. package/src/rules/compliance/frameworks.js +507 -0
  71. package/src/rules/compliance/healthcare.js +520 -0
  72. package/src/rules/compliance/index.js +2714 -0
  73. package/src/rules/compliance/regional-eu.js +480 -0
  74. package/src/rules/compliance/regional-international.js +903 -0
  75. package/src/rules/cost/index.js +1993 -0
  76. package/src/rules/data/index.js +2503 -0
  77. package/src/rules/dependencies/index.js +1684 -0
  78. package/src/rules/deployment/index.js +2050 -0
  79. package/src/rules/index.js +71 -0
  80. package/src/rules/infrastructure/index.js +3048 -0
  81. package/src/rules/performance/index.js +3455 -0
  82. package/src/rules/quality/index.js +3175 -0
  83. package/src/rules/reliability/index.js +3040 -0
  84. package/src/rules/scope-rules.js +815 -0
  85. package/src/rules/security/ai-api.js +1177 -0
  86. package/src/rules/security/auth.js +1328 -0
  87. package/src/rules/security/cors.js +127 -0
  88. package/src/rules/security/crypto.js +527 -0
  89. package/src/rules/security/csharp.js +862 -0
  90. package/src/rules/security/csrf.js +193 -0
  91. package/src/rules/security/dart.js +835 -0
  92. package/src/rules/security/deserialization.js +291 -0
  93. package/src/rules/security/file-upload.js +187 -0
  94. package/src/rules/security/go.js +850 -0
  95. package/src/rules/security/headers.js +235 -0
  96. package/src/rules/security/index.js +65 -0
  97. package/src/rules/security/injection.js +1639 -0
  98. package/src/rules/security/mcp-server.js +71 -0
  99. package/src/rules/security/misconfiguration.js +660 -0
  100. package/src/rules/security/oauth-jwt.js +329 -0
  101. package/src/rules/security/path-traversal.js +295 -0
  102. package/src/rules/security/php.js +1054 -0
  103. package/src/rules/security/prototype-pollution.js +283 -0
  104. package/src/rules/security/rate-limiting.js +208 -0
  105. package/src/rules/security/ruby.js +1061 -0
  106. package/src/rules/security/rust.js +693 -0
  107. package/src/rules/security/secrets.js +747 -0
  108. package/src/rules/security/shell.js +647 -0
  109. package/src/rules/security/ssrf.js +298 -0
  110. package/src/rules/security/supply-chain-advanced.js +393 -0
  111. package/src/rules/security/supply-chain.js +734 -0
  112. package/src/rules/security/swift.js +835 -0
  113. package/src/rules/security/taint.js +27 -0
  114. package/src/rules/security/xss.js +520 -0
  115. package/src/scan-cache.js +71 -0
  116. package/src/scanner.js +710 -0
  117. package/src/scope-analyzer.js +685 -0
  118. package/src/share.js +88 -0
  119. package/src/taint.js +300 -0
  120. package/src/telemetry.js +183 -0
  121. package/src/tracer.js +190 -0
  122. package/src/upload.js +35 -0
  123. package/src/worker.js +31 -0
package/src/scanner.js ADDED
@@ -0,0 +1,710 @@
1
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, statSync, lstatSync, realpathSync } from 'fs';
2
+ import { readFile } from 'fs/promises';
3
+ import { execSync } from 'child_process';
4
+ import { createHash } from 'crypto';
5
+ import { glob } from 'glob';
6
+ import { cpus } from 'os';
7
+ import { Worker } from 'worker_threads';
8
+ import { fileURLToPath } from 'url';
9
+ import { join, relative, dirname } from 'path';
10
+ import { loadIgnorePatterns } from './ignore.js';
11
+
12
+ const DOORMAN_VERSION = '1.0.0';
13
+
14
+ const SOURCE_PATTERNS = [
15
+ '**/*.js',
16
+ '**/*.jsx',
17
+ '**/*.ts',
18
+ '**/*.tsx',
19
+ '**/*.mjs',
20
+ '**/*.cjs',
21
+ '**/*.py',
22
+ '**/*.rb',
23
+ '**/*.go',
24
+ '**/*.env*',
25
+ '**/*.tf',
26
+ '**/*.tfvars',
27
+ '**/Dockerfile*',
28
+ '**/docker-compose*.yml',
29
+ '**/docker-compose*.yaml',
30
+ '**/.github/workflows/*.yml',
31
+ '**/.github/workflows/*.yaml',
32
+ '**/.gitlab-ci.yml',
33
+ '**/.gitlab-ci.yaml',
34
+ '**/k8s/**/*.yml',
35
+ '**/k8s/**/*.yaml',
36
+ '**/kubernetes/**/*.yml',
37
+ '**/kubernetes/**/*.yaml',
38
+ '**/helm/**/*.yaml',
39
+ '**/manifests/**/*.yaml',
40
+ '**/deploy/**/*.yaml',
41
+ '**/serverless.yml',
42
+ '**/serverless.yaml',
43
+ '**/package.json',
44
+ '**/requirements.txt',
45
+ '**/Gemfile',
46
+ '**/go.mod',
47
+ '**/*.sql',
48
+ '**/*.prisma',
49
+ '**/nginx.conf',
50
+ '**/next.config.*',
51
+ '**/tsconfig.json',
52
+ '**/.eslintrc*',
53
+ '**/.gitignore',
54
+ '**/.npmrc',
55
+ '**/jest.config.*',
56
+ '**/webpack.config.*',
57
+ '**/vite.config.*',
58
+ '**/rollup.config.*',
59
+ '**/CODEOWNERS',
60
+ '**/Makefile',
61
+ '**/*.sh',
62
+ ];
63
+
64
+ const MAX_FILE_SIZE = 1_000_000; // 1MB
65
+ const MAX_FILE_COUNT = 50_000;
66
+ const PARALLEL_BATCH_SIZE = 50;
67
+ const MEMORY_WARNING_BYTES = 500 * 1024 * 1024; // 500MB
68
+ const RULE_TIMEOUT_MS = 5_000;
69
+ const TOTAL_SCAN_TIMEOUT_MS = 60_000;
70
+ const RULE_CHUNK_SIZE = 20;
71
+ const CACHE_FILE = '.doorman-cache.json';
72
+
73
+ /**
74
+ * Check if a file appears to be binary by looking for null bytes in the first 512 bytes.
75
+ */
76
+ function isBinaryFile(fullPath) {
77
+ try {
78
+ const fd = readFileSync(fullPath, { encoding: null, flag: 'r' });
79
+ const sample = fd.subarray(0, 512);
80
+ for (let i = 0; i < sample.length; i++) {
81
+ if (sample[i] === 0) return true;
82
+ }
83
+ return false;
84
+ } catch {
85
+ return false;
86
+ }
87
+ }
88
+
89
+ /**
90
+ * Check if a path is a symlink and resolve it, detecting loops.
91
+ * Returns the real path if safe, or null if it's a symlink loop or points outside target.
92
+ */
93
+ function resolveSymlink(fullPath, visitedPaths) {
94
+ try {
95
+ const stat = lstatSync(fullPath);
96
+ if (stat.isSymbolicLink()) {
97
+ const realPath = realpathSync(fullPath);
98
+ if (visitedPaths.has(realPath)) {
99
+ return null; // symlink loop detected
100
+ }
101
+ visitedPaths.add(realPath);
102
+ return realPath;
103
+ }
104
+ visitedPaths.add(fullPath);
105
+ return fullPath;
106
+ } catch {
107
+ return null;
108
+ }
109
+ }
110
+
111
+ /**
112
+ * Safely read a file with all edge case checks.
113
+ * Returns { content, skipped, reason } where content is null if skipped.
114
+ */
115
+ export function safeReadFile(fullPath, match, visitedPaths, silent = false) {
116
+ // Check symlink safety
117
+ const resolved = resolveSymlink(fullPath, visitedPaths);
118
+ if (resolved === null) {
119
+ if (!silent) {
120
+ console.warn(`[scanner] Skipping ${match}: symlink loop or unresolvable symlink`);
121
+ }
122
+ return { content: null, skipped: true, reason: 'symlink' };
123
+ }
124
+
125
+ // Check file size before reading full content
126
+ try {
127
+ const stat = statSync(resolved);
128
+ if (stat.size > MAX_FILE_SIZE) {
129
+ if (!silent) {
130
+ console.warn(`[scanner] Skipping ${match}: file size ${(stat.size / 1024 / 1024).toFixed(1)}MB exceeds 1MB limit`);
131
+ }
132
+ return { content: null, skipped: true, reason: 'too-large' };
133
+ }
134
+ } catch (err) {
135
+ if (err.code === 'EACCES' || err.code === 'EPERM') {
136
+ if (!silent) {
137
+ console.warn(`[scanner] Skipping ${match}: permission denied`);
138
+ }
139
+ return { content: null, skipped: true, reason: 'permission' };
140
+ }
141
+ return { content: null, skipped: true, reason: 'stat-error' };
142
+ }
143
+
144
+ // Check for binary content
145
+ if (isBinaryFile(resolved)) {
146
+ return { content: null, skipped: true, reason: 'binary' };
147
+ }
148
+
149
+ // Read the file
150
+ try {
151
+ const content = readFileSync(resolved, 'utf-8');
152
+ return { content, skipped: false, reason: null };
153
+ } catch (err) {
154
+ if (err.code === 'EACCES' || err.code === 'EPERM') {
155
+ if (!silent) {
156
+ console.warn(`[scanner] Skipping ${match}: permission denied`);
157
+ }
158
+ return { content: null, skipped: true, reason: 'permission' };
159
+ }
160
+ return { content: null, skipped: true, reason: 'read-error' };
161
+ }
162
+ }
163
+
164
+ /**
165
+ * Async version of safeReadFile — reads the file content asynchronously
166
+ * while keeping synchronous safety checks (stat, binary detection).
167
+ * Returns { content, skipped, reason }.
168
+ */
169
+ async function safeReadFileAsync(fullPath, match, visitedPaths, silent = false) {
170
+ // Check symlink safety (sync — fast, no I/O wait)
171
+ const resolved = resolveSymlink(fullPath, visitedPaths);
172
+ if (resolved === null) {
173
+ if (!silent) {
174
+ console.warn(`[scanner] Skipping ${match}: symlink loop or unresolvable symlink`);
175
+ }
176
+ return { content: null, skipped: true, reason: 'symlink' };
177
+ }
178
+
179
+ // Check file size before reading full content
180
+ try {
181
+ const stat = statSync(resolved);
182
+ if (stat.size > MAX_FILE_SIZE) {
183
+ if (!silent) {
184
+ console.warn(`[scanner] Skipping ${match}: file size ${(stat.size / 1024 / 1024).toFixed(1)}MB exceeds 1MB limit`);
185
+ }
186
+ return { content: null, skipped: true, reason: 'too-large' };
187
+ }
188
+ } catch (err) {
189
+ if (err.code === 'EACCES' || err.code === 'EPERM') {
190
+ if (!silent) {
191
+ console.warn(`[scanner] Skipping ${match}: permission denied`);
192
+ }
193
+ return { content: null, skipped: true, reason: 'permission' };
194
+ }
195
+ return { content: null, skipped: true, reason: 'stat-error' };
196
+ }
197
+
198
+ // Check for binary content
199
+ if (isBinaryFile(resolved)) {
200
+ return { content: null, skipped: true, reason: 'binary' };
201
+ }
202
+
203
+ // Read the file asynchronously
204
+ try {
205
+ const content = await readFile(resolved, 'utf-8');
206
+ return { content, skipped: false, reason: null };
207
+ } catch (err) {
208
+ if (err.code === 'EACCES' || err.code === 'EPERM') {
209
+ if (!silent) {
210
+ console.warn(`[scanner] Skipping ${match}: permission denied`);
211
+ }
212
+ return { content: null, skipped: true, reason: 'permission' };
213
+ }
214
+ return { content: null, skipped: true, reason: 'read-error' };
215
+ }
216
+ }
217
+
218
+ /**
219
+ * Hash file content with SHA-256.
220
+ */
221
+ function hashContent(content) {
222
+ return createHash('sha256').update(content).digest('hex');
223
+ }
224
+
225
+ /**
226
+ * Check current memory usage and warn if it exceeds the threshold.
227
+ */
228
+ function checkMemoryUsage() {
229
+ const used = process.memoryUsage().heapUsed;
230
+ if (used > MEMORY_WARNING_BYTES) {
231
+ console.warn(
232
+ `[scanner] Warning: memory usage is ${Math.round(used / 1024 / 1024)}MB, exceeding 500MB threshold`
233
+ );
234
+ }
235
+ return used;
236
+ }
237
+
238
+ // ---------------------------------------------------------------------------
239
+ // Cache — keyed by file content SHA-256, invalidated on version change
240
+ // ---------------------------------------------------------------------------
241
+
242
+ /**
243
+ * Read the scan cache from disk.
244
+ * Returns null if cache is missing, corrupted, or from a different Doorman version.
245
+ */
246
+ function readCache(targetPath) {
247
+ const cachePath = join(targetPath, CACHE_FILE);
248
+ try {
249
+ if (existsSync(cachePath)) {
250
+ const data = JSON.parse(readFileSync(cachePath, 'utf-8'));
251
+ // Invalidate if Doorman version has changed
252
+ if (data.version !== DOORMAN_VERSION) {
253
+ return null;
254
+ }
255
+ return data;
256
+ }
257
+ } catch {
258
+ // Corrupted cache — treat as first run
259
+ }
260
+ return null;
261
+ }
262
+
263
+ /**
264
+ * Write the scan cache to disk.
265
+ */
266
+ function writeCache(targetPath, fileHashes, fileResults) {
267
+ const cachePath = join(targetPath, CACHE_FILE);
268
+ const cacheDir = dirname(cachePath);
269
+ if (!existsSync(cacheDir)) {
270
+ mkdirSync(cacheDir, { recursive: true });
271
+ }
272
+ const data = {
273
+ version: DOORMAN_VERSION,
274
+ timestamp: new Date().toISOString(),
275
+ hashes: fileHashes,
276
+ results: fileResults,
277
+ };
278
+ writeFileSync(cachePath, JSON.stringify(data, null, 2));
279
+ }
280
+
281
+ /**
282
+ * Get list of changed files from git.
283
+ * Returns null if not a git repo or git is unavailable.
284
+ */
285
+ function getGitChangedFiles(targetPath) {
286
+ try {
287
+ const output = execSync('git diff --name-only HEAD', {
288
+ cwd: targetPath,
289
+ encoding: 'utf-8',
290
+ timeout: 10_000,
291
+ stdio: ['pipe', 'pipe', 'pipe'],
292
+ });
293
+ return output
294
+ .split('\n')
295
+ .map((f) => f.trim())
296
+ .filter(Boolean);
297
+ } catch {
298
+ return null;
299
+ }
300
+ }
301
+
302
+ // ---------------------------------------------------------------------------
303
+ // Progress reporting
304
+ // ---------------------------------------------------------------------------
305
+
306
+ /**
307
+ * Create a progress reporter.
308
+ * When silent, all callbacks are no-ops.
309
+ */
310
+ function createProgress(total, silent) {
311
+ if (silent) {
312
+ return {
313
+ update() {},
314
+ done() {},
315
+ };
316
+ }
317
+
318
+ let scanned = 0;
319
+ const startTime = performance.now();
320
+ const isTTY = process.stderr?.isTTY;
321
+
322
+ function render(file) {
323
+ if (!isTTY) return;
324
+ const pct = Math.round((scanned / total) * 100);
325
+ const barLen = 20;
326
+ const filled = Math.round(barLen * scanned / total);
327
+ const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(barLen - filled);
328
+ const short = file.length > 40 ? '...' + file.slice(-37) : file;
329
+ process.stderr.write(`\r [${bar}] ${pct}% (${scanned}/${total}) ${short} `);
330
+ }
331
+
332
+ return {
333
+ update(file) {
334
+ scanned++;
335
+ render(file);
336
+ },
337
+ done() {
338
+ if (isTTY) {
339
+ process.stderr.write('\r' + ' '.repeat(80) + '\r');
340
+ }
341
+ const elapsed = ((performance.now() - startTime) / 1000).toFixed(2);
342
+ if (!silent) {
343
+ // Printed by caller via return value
344
+ }
345
+ return { scanned, elapsed: parseFloat(elapsed) };
346
+ },
347
+ };
348
+ }
349
+
350
+ // ---------------------------------------------------------------------------
351
+ // File collection
352
+ // ---------------------------------------------------------------------------
353
+
354
+ /**
355
+ * Collect all scannable files from the target path.
356
+ * Reads files in parallel batches for speed. Supports:
357
+ * - .doormanignore patterns
358
+ * - File hash caching (skip unchanged files)
359
+ * - Progress reporting
360
+ *
361
+ * Options:
362
+ * silent — suppress warnings and progress
363
+ * noCache — skip cache, force full scan
364
+ * batchSize — parallel read batch size (default 50)
365
+ * onProgress — callback({ scanned, total, file })
366
+ */
367
+ export async function collectFiles(targetPath, optionsOrSilent = false) {
368
+ // Backward compat: if a boolean is passed, treat it as `silent`
369
+ const options = typeof optionsOrSilent === 'object' ? optionsOrSilent : { silent: optionsOrSilent };
370
+ const silent = options.silent ?? false;
371
+ const noCache = options.noCache ?? false;
372
+ const batchSize = options.batchSize ?? PARALLEL_BATCH_SIZE;
373
+
374
+ // Load ignore patterns (defaults + .doormanignore + RC config)
375
+ const { patterns: ignorePatterns } = loadIgnorePatterns(targetPath);
376
+ const extraIgnores = options.extraIgnores || [];
377
+ const allIgnorePatterns = [...ignorePatterns, ...extraIgnores];
378
+
379
+ const matches = await glob(SOURCE_PATTERNS, {
380
+ cwd: targetPath,
381
+ ignore: allIgnorePatterns,
382
+ nodir: true,
383
+ dot: true,
384
+ });
385
+
386
+ if (matches.length > MAX_FILE_COUNT) {
387
+ if (!silent) {
388
+ console.warn(`[scanner] Warning: found ${matches.length} files, limiting to ${MAX_FILE_COUNT}`);
389
+ }
390
+ }
391
+
392
+ const capped = matches.slice(0, MAX_FILE_COUNT);
393
+
394
+ // Read cache
395
+ const cache = noCache ? null : readCache(targetPath);
396
+ const cachedHashes = cache?.hashes ?? {};
397
+
398
+ const files = new Map();
399
+ const visitedPaths = new Set();
400
+ const newHashes = {};
401
+ const progress = createProgress(capped.length, silent);
402
+ let cacheHits = 0;
403
+
404
+ // Process in parallel batches
405
+ for (let i = 0; i < capped.length; i += batchSize) {
406
+ const batch = capped.slice(i, i + batchSize);
407
+
408
+ const results = await Promise.all(
409
+ batch.map(async (match) => {
410
+ const fullPath = join(targetPath, match);
411
+ const result = await safeReadFileAsync(fullPath, match, visitedPaths, silent);
412
+ return { match, ...result };
413
+ })
414
+ );
415
+
416
+ for (const { match, content, skipped } of results) {
417
+ progress.update(match);
418
+
419
+ if (skipped || content === null) continue;
420
+
421
+ const contentHash = hashContent(content);
422
+ newHashes[match] = contentHash;
423
+
424
+ // If hash matches cache, we can skip re-scanning this file
425
+ // but we still need its content for rule context
426
+ if (!noCache && cachedHashes[match] === contentHash) {
427
+ cacheHits++;
428
+ }
429
+
430
+ files.set(match, content);
431
+ }
432
+
433
+ checkMemoryUsage();
434
+ }
435
+
436
+ // Persist updated cache
437
+ if (!noCache) {
438
+ writeCache(targetPath, newHashes, {});
439
+ }
440
+
441
+ const stats = progress.done();
442
+
443
+ return files;
444
+ }
445
+
446
+ /**
447
+ * Collect files incrementally — only re-scan files that changed since last run.
448
+ * Falls back to full scan if not a git repo or on first run.
449
+ */
450
+ export async function collectFilesIncremental(targetPath, optionsOrSilent = false) {
451
+ const options = typeof optionsOrSilent === 'object' ? optionsOrSilent : { silent: optionsOrSilent };
452
+ const silent = options.silent ?? false;
453
+ const noCache = options.noCache ?? false;
454
+ const batchSize = options.batchSize ?? PARALLEL_BATCH_SIZE;
455
+
456
+ const cache = noCache ? null : readCache(targetPath);
457
+ const gitChanged = getGitChangedFiles(targetPath);
458
+
459
+ // If no cache exists, do a full scan and build the cache
460
+ if (!cache) {
461
+ return collectFiles(targetPath, options);
462
+ }
463
+
464
+ const cachedHashes = cache.hashes ?? {};
465
+
466
+ // Load ignore patterns (defaults + .doormanignore + RC config)
467
+ const { patterns: ignorePatterns } = loadIgnorePatterns(targetPath);
468
+ const extraIgnores = options.extraIgnores || [];
469
+ const allIgnorePatterns = [...ignorePatterns, ...extraIgnores];
470
+
471
+ // Determine which files to re-scan
472
+ const allMatches = await glob(SOURCE_PATTERNS, {
473
+ cwd: targetPath,
474
+ ignore: allIgnorePatterns,
475
+ nodir: true,
476
+ dot: true,
477
+ });
478
+
479
+ const capped = allMatches.slice(0, MAX_FILE_COUNT);
480
+ const gitChangedSet = gitChanged ? new Set(gitChanged) : null;
481
+
482
+ const files = new Map();
483
+ const newHashes = {};
484
+ const visitedPaths = new Set();
485
+ const progress = createProgress(capped.length, silent);
486
+
487
+ for (let i = 0; i < capped.length; i += batchSize) {
488
+ const batch = capped.slice(i, i + batchSize);
489
+
490
+ const results = await Promise.all(
491
+ batch.map(async (match) => {
492
+ const fullPath = join(targetPath, match);
493
+ const result = await safeReadFileAsync(fullPath, match, visitedPaths, silent);
494
+ return { match, ...result };
495
+ })
496
+ );
497
+
498
+ for (const { match, content, skipped } of results) {
499
+ progress.update(match);
500
+
501
+ if (skipped || content === null) continue;
502
+
503
+ const contentHash = hashContent(content);
504
+ newHashes[match] = contentHash;
505
+
506
+ // Include file if: git says it changed, hash differs from cache, or it is new
507
+ const prevHash = cachedHashes[match];
508
+ const changedInGit = gitChangedSet ? gitChangedSet.has(match) : true;
509
+ const hashChanged = prevHash !== contentHash;
510
+
511
+ if (changedInGit || hashChanged) {
512
+ files.set(match, content);
513
+ }
514
+ }
515
+
516
+ checkMemoryUsage();
517
+ }
518
+
519
+ if (!noCache) {
520
+ writeCache(targetPath, newHashes, {});
521
+ }
522
+
523
+ progress.done();
524
+ return files;
525
+ }
526
+
527
+ // ---------------------------------------------------------------------------
528
+ // Rule execution
529
+ // ---------------------------------------------------------------------------
530
+
531
+ /**
532
+ * Run a single rule with a timeout.
533
+ * Returns the rule results or an empty array if the rule times out.
534
+ */
535
+ /**
536
+ * Run a single rule synchronously (most rules are sync).
537
+ * Falls back to async with timeout for async rules.
538
+ */
539
+ const _failedRules = new Set();
540
+ function runRuleSync(rule, context) {
541
+ const id = rule.id || '';
542
+ // Skip previously failed rules and irrelevant language rules
543
+ if (_failedRules.has(id)) return [];
544
+ if (rule.lang && context._detectedLangs && !context._detectedLangs.has(rule.lang)) return [];
545
+ try {
546
+ const result = rule.check(context);
547
+ if (result && typeof result.then === 'function') return result;
548
+ return Array.isArray(result) ? result : [];
549
+ } catch (e) {
550
+ _failedRules.add(id);
551
+ return [];
552
+ }
553
+ }
554
+
555
+ async function runRuleWithTimeout(rule, context, silent) {
556
+ let timer;
557
+ const timeout = new Promise((_, reject) => {
558
+ timer = setTimeout(() => {
559
+ reject(new Error(`Rule "${rule.id || rule.name || 'unknown'}" timed out after ${RULE_TIMEOUT_MS}ms`));
560
+ }, RULE_TIMEOUT_MS);
561
+ });
562
+
563
+ try {
564
+ const result = await Promise.race([rule.check(context), timeout]);
565
+ clearTimeout(timer);
566
+ return Array.isArray(result) ? result : [];
567
+ } catch (e) {
568
+ clearTimeout(timer);
569
+ if (!silent) {
570
+ if (e.message.includes('timed out')) {
571
+ console.warn(`[scanner] ${e.message}`);
572
+ } else {
573
+ console.warn(`[scanner] Rule "${rule.id || rule.name || 'unknown'}" failed: ${e.message}`);
574
+ }
575
+ }
576
+ return [];
577
+ }
578
+ }
579
+
580
+ /**
581
+ * Run all applicable rules against the collected files.
582
+ * Optimized: runs sync rules without Promise overhead, batches async rules.
583
+ * Pre-filters files to skip empty/irrelevant content.
584
+ * Supports --profile mode for performance profiling.
585
+ */
586
+ export async function runRules(rules, context) {
587
+ const findings = [];
588
+ const scanStart = Date.now();
589
+ const silent = context?.silent ?? false;
590
+ const profile = context?.profile ?? false;
591
+ const profileData = profile ? [] : null;
592
+
593
+ // Pre-compute: skip files with no meaningful content (< 10 chars)
594
+ if (context.files) {
595
+ const toDelete = [];
596
+ for (const [fp, content] of context.files) {
597
+ if (!content || content.length < 10) toDelete.push(fp);
598
+ }
599
+ for (const fp of toDelete) context.files.delete(fp);
600
+ }
601
+
602
+ // Separate sync vs async rules for optimal execution
603
+ const asyncRules = [];
604
+
605
+ // Run rules in chunks to allow timeout checks less frequently
606
+ const CHUNK = 100;
607
+ for (let i = 0; i < rules.length; i += CHUNK) {
608
+ // Check total scan timeout per chunk (not per rule — reduces overhead)
609
+ if (Date.now() - scanStart > TOTAL_SCAN_TIMEOUT_MS) {
610
+ if (!silent) console.warn(`[scanner] Scan timeout exceeded, returning partial results`);
611
+ break;
612
+ }
613
+
614
+ const chunk = rules.slice(i, i + CHUNK);
615
+ for (const rule of chunk) {
616
+ const ruleStart = profile ? performance.now() : 0;
617
+ const result = runRuleSync(rule, context);
618
+
619
+ if (result && typeof result.then === 'function') {
620
+ asyncRules.push({ rule, promise: result });
621
+ } else if (Array.isArray(result) && result.length > 0) {
622
+ findings.push(...result);
623
+ }
624
+
625
+ if (profile) {
626
+ profileData.push({ id: rule.id, ms: performance.now() - ruleStart, findings: Array.isArray(result) ? result.length : 0 });
627
+ }
628
+ }
629
+ }
630
+
631
+ // Process async rules with timeout
632
+ if (asyncRules.length > 0) {
633
+ const ASYNC_CHUNK = 50;
634
+ for (let i = 0; i < asyncRules.length; i += ASYNC_CHUNK) {
635
+ const chunk = asyncRules.slice(i, i + ASYNC_CHUNK);
636
+ const results = await Promise.all(
637
+ chunk.map(({ rule }) => runRuleWithTimeout(rule, context, silent))
638
+ );
639
+ for (const result of results) {
640
+ if (Array.isArray(result)) findings.push(...result);
641
+ }
642
+ }
643
+ }
644
+
645
+ // Sort by severity
646
+ const severityOrder = { critical: 0, high: 1, medium: 2, low: 3, info: 4 };
647
+ findings.sort((a, b) => (severityOrder[a.severity] || 4) - (severityOrder[b.severity] || 4));
648
+
649
+ // Attach profile data if requested
650
+ if (profile && profileData) {
651
+ findings._profile = profileData.sort((a, b) => b.ms - a.ms);
652
+ }
653
+
654
+ return findings;
655
+ }
656
+
657
+ /**
658
+ * Run rules in parallel using worker threads.
659
+ * Splits rules across CPU cores for ~4-8x speedup on multi-core machines.
660
+ */
661
+ export async function runRulesParallel(rules, context) {
662
+ const numCores = Math.min(cpus().length, 8); // Cap at 8 workers
663
+ if (numCores <= 1 || !context.files || context.files.size === 0) {
664
+ return runRules(rules, context); // Fallback to serial
665
+ }
666
+
667
+ const workerPath = join(dirname(fileURLToPath(import.meta.url)), 'rule-worker.js');
668
+
669
+ // Serialize files Map to array for transfer
670
+ const filesData = [...context.files.entries()];
671
+ const stackData = context.stack || {};
672
+ const category = context._categoryFilter || null;
673
+ const _detectedLangs = context._detectedLangs ? [...context._detectedLangs] : null;
674
+
675
+ // Spawn workers
676
+ const workers = [];
677
+ for (let i = 0; i < numCores; i++) {
678
+ workers.push(new Promise((resolve, reject) => {
679
+ const worker = new Worker(workerPath, {
680
+ workerData: {
681
+ filesData,
682
+ stackData,
683
+ category,
684
+ _detectedLangs,
685
+ workerIndex: i,
686
+ totalWorkers: numCores,
687
+ },
688
+ });
689
+ worker.on('message', resolve);
690
+ worker.on('error', reject);
691
+ worker.on('exit', (code) => {
692
+ if (code !== 0) resolve([]); // Don't crash on worker failure
693
+ });
694
+ }));
695
+ }
696
+
697
+ try {
698
+ const results = await Promise.all(workers);
699
+ const findings = results.flat();
700
+
701
+ // Sort by severity
702
+ const severityOrder = { critical: 0, high: 1, medium: 2, low: 3, info: 4 };
703
+ findings.sort((a, b) => (severityOrder[a.severity] || 4) - (severityOrder[b.severity] || 4));
704
+
705
+ return findings;
706
+ } catch {
707
+ // Fallback to serial on any error
708
+ return runRules(rules, context);
709
+ }
710
+ }