getdoorman 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/bin/doorman.js +444 -0
- package/package.json +74 -0
- package/src/ai-fixer.js +559 -0
- package/src/ast-scanner.js +434 -0
- package/src/auth.js +149 -0
- package/src/baseline.js +48 -0
- package/src/compliance.js +539 -0
- package/src/config.js +466 -0
- package/src/custom-rules.js +32 -0
- package/src/dashboard.js +202 -0
- package/src/detector.js +142 -0
- package/src/fix-engine.js +48 -0
- package/src/fix-registry-extra.js +95 -0
- package/src/fix-registry-go-rust.js +77 -0
- package/src/fix-registry-java-csharp.js +77 -0
- package/src/fix-registry-js.js +99 -0
- package/src/fix-registry-mcp-ai.js +57 -0
- package/src/fix-registry-python.js +87 -0
- package/src/fixer-ruby-php.js +608 -0
- package/src/fixer.js +2113 -0
- package/src/hooks.js +115 -0
- package/src/ignore.js +176 -0
- package/src/index.js +384 -0
- package/src/metrics.js +126 -0
- package/src/monorepo.js +65 -0
- package/src/presets.js +54 -0
- package/src/reporter.js +975 -0
- package/src/rule-worker.js +36 -0
- package/src/rules/ast-rules.js +756 -0
- package/src/rules/bugs/accessibility.js +235 -0
- package/src/rules/bugs/ai-codegen-fixable.js +172 -0
- package/src/rules/bugs/ai-codegen.js +365 -0
- package/src/rules/bugs/code-smell-bugs.js +247 -0
- package/src/rules/bugs/crypto-bugs.js +195 -0
- package/src/rules/bugs/docker-bugs.js +158 -0
- package/src/rules/bugs/general.js +361 -0
- package/src/rules/bugs/go-bugs.js +279 -0
- package/src/rules/bugs/index.js +73 -0
- package/src/rules/bugs/js-api.js +257 -0
- package/src/rules/bugs/js-array-object.js +210 -0
- package/src/rules/bugs/js-async-fixable.js +223 -0
- package/src/rules/bugs/js-async.js +211 -0
- package/src/rules/bugs/js-closure-scope.js +182 -0
- package/src/rules/bugs/js-database.js +203 -0
- package/src/rules/bugs/js-error-handling.js +148 -0
- package/src/rules/bugs/js-logic.js +261 -0
- package/src/rules/bugs/js-memory.js +214 -0
- package/src/rules/bugs/js-node.js +361 -0
- package/src/rules/bugs/js-react.js +373 -0
- package/src/rules/bugs/js-regex.js +200 -0
- package/src/rules/bugs/js-state.js +272 -0
- package/src/rules/bugs/js-type-coercion.js +318 -0
- package/src/rules/bugs/nextjs-bugs.js +242 -0
- package/src/rules/bugs/nextjs-fixable.js +120 -0
- package/src/rules/bugs/node-fixable.js +178 -0
- package/src/rules/bugs/python-advanced.js +245 -0
- package/src/rules/bugs/python-fixable.js +98 -0
- package/src/rules/bugs/python.js +284 -0
- package/src/rules/bugs/react-fixable.js +207 -0
- package/src/rules/bugs/ruby-bugs.js +182 -0
- package/src/rules/bugs/shell-bugs.js +181 -0
- package/src/rules/bugs/silent-failures.js +261 -0
- package/src/rules/bugs/ts-bugs.js +235 -0
- package/src/rules/bugs/unused-vars.js +65 -0
- package/src/rules/compliance/accessibility-ext.js +468 -0
- package/src/rules/compliance/education.js +322 -0
- package/src/rules/compliance/financial.js +421 -0
- package/src/rules/compliance/frameworks.js +507 -0
- package/src/rules/compliance/healthcare.js +520 -0
- package/src/rules/compliance/index.js +2714 -0
- package/src/rules/compliance/regional-eu.js +480 -0
- package/src/rules/compliance/regional-international.js +903 -0
- package/src/rules/cost/index.js +1993 -0
- package/src/rules/data/index.js +2503 -0
- package/src/rules/dependencies/index.js +1684 -0
- package/src/rules/deployment/index.js +2050 -0
- package/src/rules/index.js +71 -0
- package/src/rules/infrastructure/index.js +3048 -0
- package/src/rules/performance/index.js +3455 -0
- package/src/rules/quality/index.js +3175 -0
- package/src/rules/reliability/index.js +3040 -0
- package/src/rules/scope-rules.js +815 -0
- package/src/rules/security/ai-api.js +1177 -0
- package/src/rules/security/auth.js +1328 -0
- package/src/rules/security/cors.js +127 -0
- package/src/rules/security/crypto.js +527 -0
- package/src/rules/security/csharp.js +862 -0
- package/src/rules/security/csrf.js +193 -0
- package/src/rules/security/dart.js +835 -0
- package/src/rules/security/deserialization.js +291 -0
- package/src/rules/security/file-upload.js +187 -0
- package/src/rules/security/go.js +850 -0
- package/src/rules/security/headers.js +235 -0
- package/src/rules/security/index.js +65 -0
- package/src/rules/security/injection.js +1639 -0
- package/src/rules/security/mcp-server.js +71 -0
- package/src/rules/security/misconfiguration.js +660 -0
- package/src/rules/security/oauth-jwt.js +329 -0
- package/src/rules/security/path-traversal.js +295 -0
- package/src/rules/security/php.js +1054 -0
- package/src/rules/security/prototype-pollution.js +283 -0
- package/src/rules/security/rate-limiting.js +208 -0
- package/src/rules/security/ruby.js +1061 -0
- package/src/rules/security/rust.js +693 -0
- package/src/rules/security/secrets.js +747 -0
- package/src/rules/security/shell.js +647 -0
- package/src/rules/security/ssrf.js +298 -0
- package/src/rules/security/supply-chain-advanced.js +393 -0
- package/src/rules/security/supply-chain.js +734 -0
- package/src/rules/security/swift.js +835 -0
- package/src/rules/security/taint.js +27 -0
- package/src/rules/security/xss.js +520 -0
- package/src/scan-cache.js +71 -0
- package/src/scanner.js +710 -0
- package/src/scope-analyzer.js +685 -0
- package/src/share.js +88 -0
- package/src/taint.js +300 -0
- package/src/telemetry.js +183 -0
- package/src/tracer.js +190 -0
- package/src/upload.js +35 -0
- package/src/worker.js +31 -0
package/src/scanner.js
ADDED
|
@@ -0,0 +1,710 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync, statSync, lstatSync, realpathSync } from 'fs';
|
|
2
|
+
import { readFile } from 'fs/promises';
|
|
3
|
+
import { execSync } from 'child_process';
|
|
4
|
+
import { createHash } from 'crypto';
|
|
5
|
+
import { glob } from 'glob';
|
|
6
|
+
import { cpus } from 'os';
|
|
7
|
+
import { Worker } from 'worker_threads';
|
|
8
|
+
import { fileURLToPath } from 'url';
|
|
9
|
+
import { join, relative, dirname } from 'path';
|
|
10
|
+
import { loadIgnorePatterns } from './ignore.js';
|
|
11
|
+
|
|
12
|
+
const DOORMAN_VERSION = '1.0.0';
|
|
13
|
+
|
|
14
|
+
const SOURCE_PATTERNS = [
|
|
15
|
+
'**/*.js',
|
|
16
|
+
'**/*.jsx',
|
|
17
|
+
'**/*.ts',
|
|
18
|
+
'**/*.tsx',
|
|
19
|
+
'**/*.mjs',
|
|
20
|
+
'**/*.cjs',
|
|
21
|
+
'**/*.py',
|
|
22
|
+
'**/*.rb',
|
|
23
|
+
'**/*.go',
|
|
24
|
+
'**/*.env*',
|
|
25
|
+
'**/*.tf',
|
|
26
|
+
'**/*.tfvars',
|
|
27
|
+
'**/Dockerfile*',
|
|
28
|
+
'**/docker-compose*.yml',
|
|
29
|
+
'**/docker-compose*.yaml',
|
|
30
|
+
'**/.github/workflows/*.yml',
|
|
31
|
+
'**/.github/workflows/*.yaml',
|
|
32
|
+
'**/.gitlab-ci.yml',
|
|
33
|
+
'**/.gitlab-ci.yaml',
|
|
34
|
+
'**/k8s/**/*.yml',
|
|
35
|
+
'**/k8s/**/*.yaml',
|
|
36
|
+
'**/kubernetes/**/*.yml',
|
|
37
|
+
'**/kubernetes/**/*.yaml',
|
|
38
|
+
'**/helm/**/*.yaml',
|
|
39
|
+
'**/manifests/**/*.yaml',
|
|
40
|
+
'**/deploy/**/*.yaml',
|
|
41
|
+
'**/serverless.yml',
|
|
42
|
+
'**/serverless.yaml',
|
|
43
|
+
'**/package.json',
|
|
44
|
+
'**/requirements.txt',
|
|
45
|
+
'**/Gemfile',
|
|
46
|
+
'**/go.mod',
|
|
47
|
+
'**/*.sql',
|
|
48
|
+
'**/*.prisma',
|
|
49
|
+
'**/nginx.conf',
|
|
50
|
+
'**/next.config.*',
|
|
51
|
+
'**/tsconfig.json',
|
|
52
|
+
'**/.eslintrc*',
|
|
53
|
+
'**/.gitignore',
|
|
54
|
+
'**/.npmrc',
|
|
55
|
+
'**/jest.config.*',
|
|
56
|
+
'**/webpack.config.*',
|
|
57
|
+
'**/vite.config.*',
|
|
58
|
+
'**/rollup.config.*',
|
|
59
|
+
'**/CODEOWNERS',
|
|
60
|
+
'**/Makefile',
|
|
61
|
+
'**/*.sh',
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
const MAX_FILE_SIZE = 1_000_000; // 1MB
|
|
65
|
+
const MAX_FILE_COUNT = 50_000;
|
|
66
|
+
const PARALLEL_BATCH_SIZE = 50;
|
|
67
|
+
const MEMORY_WARNING_BYTES = 500 * 1024 * 1024; // 500MB
|
|
68
|
+
const RULE_TIMEOUT_MS = 5_000;
|
|
69
|
+
const TOTAL_SCAN_TIMEOUT_MS = 60_000;
|
|
70
|
+
const RULE_CHUNK_SIZE = 20;
|
|
71
|
+
const CACHE_FILE = '.doorman-cache.json';
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Check if a file appears to be binary by looking for null bytes in the first 512 bytes.
|
|
75
|
+
*/
|
|
76
|
+
function isBinaryFile(fullPath) {
|
|
77
|
+
try {
|
|
78
|
+
const fd = readFileSync(fullPath, { encoding: null, flag: 'r' });
|
|
79
|
+
const sample = fd.subarray(0, 512);
|
|
80
|
+
for (let i = 0; i < sample.length; i++) {
|
|
81
|
+
if (sample[i] === 0) return true;
|
|
82
|
+
}
|
|
83
|
+
return false;
|
|
84
|
+
} catch {
|
|
85
|
+
return false;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Check if a path is a symlink and resolve it, detecting loops.
|
|
91
|
+
* Returns the real path if safe, or null if it's a symlink loop or points outside target.
|
|
92
|
+
*/
|
|
93
|
+
function resolveSymlink(fullPath, visitedPaths) {
|
|
94
|
+
try {
|
|
95
|
+
const stat = lstatSync(fullPath);
|
|
96
|
+
if (stat.isSymbolicLink()) {
|
|
97
|
+
const realPath = realpathSync(fullPath);
|
|
98
|
+
if (visitedPaths.has(realPath)) {
|
|
99
|
+
return null; // symlink loop detected
|
|
100
|
+
}
|
|
101
|
+
visitedPaths.add(realPath);
|
|
102
|
+
return realPath;
|
|
103
|
+
}
|
|
104
|
+
visitedPaths.add(fullPath);
|
|
105
|
+
return fullPath;
|
|
106
|
+
} catch {
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Safely read a file with all edge case checks.
|
|
113
|
+
* Returns { content, skipped, reason } where content is null if skipped.
|
|
114
|
+
*/
|
|
115
|
+
export function safeReadFile(fullPath, match, visitedPaths, silent = false) {
|
|
116
|
+
// Check symlink safety
|
|
117
|
+
const resolved = resolveSymlink(fullPath, visitedPaths);
|
|
118
|
+
if (resolved === null) {
|
|
119
|
+
if (!silent) {
|
|
120
|
+
console.warn(`[scanner] Skipping ${match}: symlink loop or unresolvable symlink`);
|
|
121
|
+
}
|
|
122
|
+
return { content: null, skipped: true, reason: 'symlink' };
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Check file size before reading full content
|
|
126
|
+
try {
|
|
127
|
+
const stat = statSync(resolved);
|
|
128
|
+
if (stat.size > MAX_FILE_SIZE) {
|
|
129
|
+
if (!silent) {
|
|
130
|
+
console.warn(`[scanner] Skipping ${match}: file size ${(stat.size / 1024 / 1024).toFixed(1)}MB exceeds 1MB limit`);
|
|
131
|
+
}
|
|
132
|
+
return { content: null, skipped: true, reason: 'too-large' };
|
|
133
|
+
}
|
|
134
|
+
} catch (err) {
|
|
135
|
+
if (err.code === 'EACCES' || err.code === 'EPERM') {
|
|
136
|
+
if (!silent) {
|
|
137
|
+
console.warn(`[scanner] Skipping ${match}: permission denied`);
|
|
138
|
+
}
|
|
139
|
+
return { content: null, skipped: true, reason: 'permission' };
|
|
140
|
+
}
|
|
141
|
+
return { content: null, skipped: true, reason: 'stat-error' };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Check for binary content
|
|
145
|
+
if (isBinaryFile(resolved)) {
|
|
146
|
+
return { content: null, skipped: true, reason: 'binary' };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Read the file
|
|
150
|
+
try {
|
|
151
|
+
const content = readFileSync(resolved, 'utf-8');
|
|
152
|
+
return { content, skipped: false, reason: null };
|
|
153
|
+
} catch (err) {
|
|
154
|
+
if (err.code === 'EACCES' || err.code === 'EPERM') {
|
|
155
|
+
if (!silent) {
|
|
156
|
+
console.warn(`[scanner] Skipping ${match}: permission denied`);
|
|
157
|
+
}
|
|
158
|
+
return { content: null, skipped: true, reason: 'permission' };
|
|
159
|
+
}
|
|
160
|
+
return { content: null, skipped: true, reason: 'read-error' };
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Async version of safeReadFile — reads the file content asynchronously
|
|
166
|
+
* while keeping synchronous safety checks (stat, binary detection).
|
|
167
|
+
* Returns { content, skipped, reason }.
|
|
168
|
+
*/
|
|
169
|
+
async function safeReadFileAsync(fullPath, match, visitedPaths, silent = false) {
|
|
170
|
+
// Check symlink safety (sync — fast, no I/O wait)
|
|
171
|
+
const resolved = resolveSymlink(fullPath, visitedPaths);
|
|
172
|
+
if (resolved === null) {
|
|
173
|
+
if (!silent) {
|
|
174
|
+
console.warn(`[scanner] Skipping ${match}: symlink loop or unresolvable symlink`);
|
|
175
|
+
}
|
|
176
|
+
return { content: null, skipped: true, reason: 'symlink' };
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Check file size before reading full content
|
|
180
|
+
try {
|
|
181
|
+
const stat = statSync(resolved);
|
|
182
|
+
if (stat.size > MAX_FILE_SIZE) {
|
|
183
|
+
if (!silent) {
|
|
184
|
+
console.warn(`[scanner] Skipping ${match}: file size ${(stat.size / 1024 / 1024).toFixed(1)}MB exceeds 1MB limit`);
|
|
185
|
+
}
|
|
186
|
+
return { content: null, skipped: true, reason: 'too-large' };
|
|
187
|
+
}
|
|
188
|
+
} catch (err) {
|
|
189
|
+
if (err.code === 'EACCES' || err.code === 'EPERM') {
|
|
190
|
+
if (!silent) {
|
|
191
|
+
console.warn(`[scanner] Skipping ${match}: permission denied`);
|
|
192
|
+
}
|
|
193
|
+
return { content: null, skipped: true, reason: 'permission' };
|
|
194
|
+
}
|
|
195
|
+
return { content: null, skipped: true, reason: 'stat-error' };
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Check for binary content
|
|
199
|
+
if (isBinaryFile(resolved)) {
|
|
200
|
+
return { content: null, skipped: true, reason: 'binary' };
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Read the file asynchronously
|
|
204
|
+
try {
|
|
205
|
+
const content = await readFile(resolved, 'utf-8');
|
|
206
|
+
return { content, skipped: false, reason: null };
|
|
207
|
+
} catch (err) {
|
|
208
|
+
if (err.code === 'EACCES' || err.code === 'EPERM') {
|
|
209
|
+
if (!silent) {
|
|
210
|
+
console.warn(`[scanner] Skipping ${match}: permission denied`);
|
|
211
|
+
}
|
|
212
|
+
return { content: null, skipped: true, reason: 'permission' };
|
|
213
|
+
}
|
|
214
|
+
return { content: null, skipped: true, reason: 'read-error' };
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Hash file content with SHA-256.
|
|
220
|
+
*/
|
|
221
|
+
function hashContent(content) {
|
|
222
|
+
return createHash('sha256').update(content).digest('hex');
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Check current memory usage and warn if it exceeds the threshold.
|
|
227
|
+
*/
|
|
228
|
+
function checkMemoryUsage() {
|
|
229
|
+
const used = process.memoryUsage().heapUsed;
|
|
230
|
+
if (used > MEMORY_WARNING_BYTES) {
|
|
231
|
+
console.warn(
|
|
232
|
+
`[scanner] Warning: memory usage is ${Math.round(used / 1024 / 1024)}MB, exceeding 500MB threshold`
|
|
233
|
+
);
|
|
234
|
+
}
|
|
235
|
+
return used;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ---------------------------------------------------------------------------
|
|
239
|
+
// Cache — keyed by file content SHA-256, invalidated on version change
|
|
240
|
+
// ---------------------------------------------------------------------------
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Read the scan cache from disk.
|
|
244
|
+
* Returns null if cache is missing, corrupted, or from a different Doorman version.
|
|
245
|
+
*/
|
|
246
|
+
function readCache(targetPath) {
|
|
247
|
+
const cachePath = join(targetPath, CACHE_FILE);
|
|
248
|
+
try {
|
|
249
|
+
if (existsSync(cachePath)) {
|
|
250
|
+
const data = JSON.parse(readFileSync(cachePath, 'utf-8'));
|
|
251
|
+
// Invalidate if Doorman version has changed
|
|
252
|
+
if (data.version !== DOORMAN_VERSION) {
|
|
253
|
+
return null;
|
|
254
|
+
}
|
|
255
|
+
return data;
|
|
256
|
+
}
|
|
257
|
+
} catch {
|
|
258
|
+
// Corrupted cache — treat as first run
|
|
259
|
+
}
|
|
260
|
+
return null;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Write the scan cache to disk.
|
|
265
|
+
*/
|
|
266
|
+
function writeCache(targetPath, fileHashes, fileResults) {
|
|
267
|
+
const cachePath = join(targetPath, CACHE_FILE);
|
|
268
|
+
const cacheDir = dirname(cachePath);
|
|
269
|
+
if (!existsSync(cacheDir)) {
|
|
270
|
+
mkdirSync(cacheDir, { recursive: true });
|
|
271
|
+
}
|
|
272
|
+
const data = {
|
|
273
|
+
version: DOORMAN_VERSION,
|
|
274
|
+
timestamp: new Date().toISOString(),
|
|
275
|
+
hashes: fileHashes,
|
|
276
|
+
results: fileResults,
|
|
277
|
+
};
|
|
278
|
+
writeFileSync(cachePath, JSON.stringify(data, null, 2));
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Get list of changed files from git.
|
|
283
|
+
* Returns null if not a git repo or git is unavailable.
|
|
284
|
+
*/
|
|
285
|
+
function getGitChangedFiles(targetPath) {
|
|
286
|
+
try {
|
|
287
|
+
const output = execSync('git diff --name-only HEAD', {
|
|
288
|
+
cwd: targetPath,
|
|
289
|
+
encoding: 'utf-8',
|
|
290
|
+
timeout: 10_000,
|
|
291
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
292
|
+
});
|
|
293
|
+
return output
|
|
294
|
+
.split('\n')
|
|
295
|
+
.map((f) => f.trim())
|
|
296
|
+
.filter(Boolean);
|
|
297
|
+
} catch {
|
|
298
|
+
return null;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// ---------------------------------------------------------------------------
|
|
303
|
+
// Progress reporting
|
|
304
|
+
// ---------------------------------------------------------------------------
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Create a progress reporter.
|
|
308
|
+
* When silent, all callbacks are no-ops.
|
|
309
|
+
*/
|
|
310
|
+
function createProgress(total, silent) {
|
|
311
|
+
if (silent) {
|
|
312
|
+
return {
|
|
313
|
+
update() {},
|
|
314
|
+
done() {},
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
let scanned = 0;
|
|
319
|
+
const startTime = performance.now();
|
|
320
|
+
const isTTY = process.stderr?.isTTY;
|
|
321
|
+
|
|
322
|
+
function render(file) {
|
|
323
|
+
if (!isTTY) return;
|
|
324
|
+
const pct = Math.round((scanned / total) * 100);
|
|
325
|
+
const barLen = 20;
|
|
326
|
+
const filled = Math.round(barLen * scanned / total);
|
|
327
|
+
const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(barLen - filled);
|
|
328
|
+
const short = file.length > 40 ? '...' + file.slice(-37) : file;
|
|
329
|
+
process.stderr.write(`\r [${bar}] ${pct}% (${scanned}/${total}) ${short} `);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
return {
|
|
333
|
+
update(file) {
|
|
334
|
+
scanned++;
|
|
335
|
+
render(file);
|
|
336
|
+
},
|
|
337
|
+
done() {
|
|
338
|
+
if (isTTY) {
|
|
339
|
+
process.stderr.write('\r' + ' '.repeat(80) + '\r');
|
|
340
|
+
}
|
|
341
|
+
const elapsed = ((performance.now() - startTime) / 1000).toFixed(2);
|
|
342
|
+
if (!silent) {
|
|
343
|
+
// Printed by caller via return value
|
|
344
|
+
}
|
|
345
|
+
return { scanned, elapsed: parseFloat(elapsed) };
|
|
346
|
+
},
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// ---------------------------------------------------------------------------
|
|
351
|
+
// File collection
|
|
352
|
+
// ---------------------------------------------------------------------------
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Collect all scannable files from the target path.
|
|
356
|
+
* Reads files in parallel batches for speed. Supports:
|
|
357
|
+
* - .doormanignore patterns
|
|
358
|
+
* - File hash caching (skip unchanged files)
|
|
359
|
+
* - Progress reporting
|
|
360
|
+
*
|
|
361
|
+
* Options:
|
|
362
|
+
* silent — suppress warnings and progress
|
|
363
|
+
* noCache — skip cache, force full scan
|
|
364
|
+
* batchSize — parallel read batch size (default 50)
|
|
365
|
+
* onProgress — callback({ scanned, total, file })
|
|
366
|
+
*/
|
|
367
|
+
export async function collectFiles(targetPath, optionsOrSilent = false) {
|
|
368
|
+
// Backward compat: if a boolean is passed, treat it as `silent`
|
|
369
|
+
const options = typeof optionsOrSilent === 'object' ? optionsOrSilent : { silent: optionsOrSilent };
|
|
370
|
+
const silent = options.silent ?? false;
|
|
371
|
+
const noCache = options.noCache ?? false;
|
|
372
|
+
const batchSize = options.batchSize ?? PARALLEL_BATCH_SIZE;
|
|
373
|
+
|
|
374
|
+
// Load ignore patterns (defaults + .doormanignore + RC config)
|
|
375
|
+
const { patterns: ignorePatterns } = loadIgnorePatterns(targetPath);
|
|
376
|
+
const extraIgnores = options.extraIgnores || [];
|
|
377
|
+
const allIgnorePatterns = [...ignorePatterns, ...extraIgnores];
|
|
378
|
+
|
|
379
|
+
const matches = await glob(SOURCE_PATTERNS, {
|
|
380
|
+
cwd: targetPath,
|
|
381
|
+
ignore: allIgnorePatterns,
|
|
382
|
+
nodir: true,
|
|
383
|
+
dot: true,
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
if (matches.length > MAX_FILE_COUNT) {
|
|
387
|
+
if (!silent) {
|
|
388
|
+
console.warn(`[scanner] Warning: found ${matches.length} files, limiting to ${MAX_FILE_COUNT}`);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
const capped = matches.slice(0, MAX_FILE_COUNT);
|
|
393
|
+
|
|
394
|
+
// Read cache
|
|
395
|
+
const cache = noCache ? null : readCache(targetPath);
|
|
396
|
+
const cachedHashes = cache?.hashes ?? {};
|
|
397
|
+
|
|
398
|
+
const files = new Map();
|
|
399
|
+
const visitedPaths = new Set();
|
|
400
|
+
const newHashes = {};
|
|
401
|
+
const progress = createProgress(capped.length, silent);
|
|
402
|
+
let cacheHits = 0;
|
|
403
|
+
|
|
404
|
+
// Process in parallel batches
|
|
405
|
+
for (let i = 0; i < capped.length; i += batchSize) {
|
|
406
|
+
const batch = capped.slice(i, i + batchSize);
|
|
407
|
+
|
|
408
|
+
const results = await Promise.all(
|
|
409
|
+
batch.map(async (match) => {
|
|
410
|
+
const fullPath = join(targetPath, match);
|
|
411
|
+
const result = await safeReadFileAsync(fullPath, match, visitedPaths, silent);
|
|
412
|
+
return { match, ...result };
|
|
413
|
+
})
|
|
414
|
+
);
|
|
415
|
+
|
|
416
|
+
for (const { match, content, skipped } of results) {
|
|
417
|
+
progress.update(match);
|
|
418
|
+
|
|
419
|
+
if (skipped || content === null) continue;
|
|
420
|
+
|
|
421
|
+
const contentHash = hashContent(content);
|
|
422
|
+
newHashes[match] = contentHash;
|
|
423
|
+
|
|
424
|
+
// If hash matches cache, we can skip re-scanning this file
|
|
425
|
+
// but we still need its content for rule context
|
|
426
|
+
if (!noCache && cachedHashes[match] === contentHash) {
|
|
427
|
+
cacheHits++;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
files.set(match, content);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
checkMemoryUsage();
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// Persist updated cache
|
|
437
|
+
if (!noCache) {
|
|
438
|
+
writeCache(targetPath, newHashes, {});
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
const stats = progress.done();
|
|
442
|
+
|
|
443
|
+
return files;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
/**
|
|
447
|
+
* Collect files incrementally — only re-scan files that changed since last run.
|
|
448
|
+
* Falls back to full scan if not a git repo or on first run.
|
|
449
|
+
*/
|
|
450
|
+
export async function collectFilesIncremental(targetPath, optionsOrSilent = false) {
|
|
451
|
+
const options = typeof optionsOrSilent === 'object' ? optionsOrSilent : { silent: optionsOrSilent };
|
|
452
|
+
const silent = options.silent ?? false;
|
|
453
|
+
const noCache = options.noCache ?? false;
|
|
454
|
+
const batchSize = options.batchSize ?? PARALLEL_BATCH_SIZE;
|
|
455
|
+
|
|
456
|
+
const cache = noCache ? null : readCache(targetPath);
|
|
457
|
+
const gitChanged = getGitChangedFiles(targetPath);
|
|
458
|
+
|
|
459
|
+
// If no cache exists, do a full scan and build the cache
|
|
460
|
+
if (!cache) {
|
|
461
|
+
return collectFiles(targetPath, options);
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
const cachedHashes = cache.hashes ?? {};
|
|
465
|
+
|
|
466
|
+
// Load ignore patterns (defaults + .doormanignore + RC config)
|
|
467
|
+
const { patterns: ignorePatterns } = loadIgnorePatterns(targetPath);
|
|
468
|
+
const extraIgnores = options.extraIgnores || [];
|
|
469
|
+
const allIgnorePatterns = [...ignorePatterns, ...extraIgnores];
|
|
470
|
+
|
|
471
|
+
// Determine which files to re-scan
|
|
472
|
+
const allMatches = await glob(SOURCE_PATTERNS, {
|
|
473
|
+
cwd: targetPath,
|
|
474
|
+
ignore: allIgnorePatterns,
|
|
475
|
+
nodir: true,
|
|
476
|
+
dot: true,
|
|
477
|
+
});
|
|
478
|
+
|
|
479
|
+
const capped = allMatches.slice(0, MAX_FILE_COUNT);
|
|
480
|
+
const gitChangedSet = gitChanged ? new Set(gitChanged) : null;
|
|
481
|
+
|
|
482
|
+
const files = new Map();
|
|
483
|
+
const newHashes = {};
|
|
484
|
+
const visitedPaths = new Set();
|
|
485
|
+
const progress = createProgress(capped.length, silent);
|
|
486
|
+
|
|
487
|
+
for (let i = 0; i < capped.length; i += batchSize) {
|
|
488
|
+
const batch = capped.slice(i, i + batchSize);
|
|
489
|
+
|
|
490
|
+
const results = await Promise.all(
|
|
491
|
+
batch.map(async (match) => {
|
|
492
|
+
const fullPath = join(targetPath, match);
|
|
493
|
+
const result = await safeReadFileAsync(fullPath, match, visitedPaths, silent);
|
|
494
|
+
return { match, ...result };
|
|
495
|
+
})
|
|
496
|
+
);
|
|
497
|
+
|
|
498
|
+
for (const { match, content, skipped } of results) {
|
|
499
|
+
progress.update(match);
|
|
500
|
+
|
|
501
|
+
if (skipped || content === null) continue;
|
|
502
|
+
|
|
503
|
+
const contentHash = hashContent(content);
|
|
504
|
+
newHashes[match] = contentHash;
|
|
505
|
+
|
|
506
|
+
// Include file if: git says it changed, hash differs from cache, or it is new
|
|
507
|
+
const prevHash = cachedHashes[match];
|
|
508
|
+
const changedInGit = gitChangedSet ? gitChangedSet.has(match) : true;
|
|
509
|
+
const hashChanged = prevHash !== contentHash;
|
|
510
|
+
|
|
511
|
+
if (changedInGit || hashChanged) {
|
|
512
|
+
files.set(match, content);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
checkMemoryUsage();
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
if (!noCache) {
|
|
520
|
+
writeCache(targetPath, newHashes, {});
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
progress.done();
|
|
524
|
+
return files;
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// ---------------------------------------------------------------------------
|
|
528
|
+
// Rule execution
|
|
529
|
+
// ---------------------------------------------------------------------------
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Run a single rule with a timeout.
|
|
533
|
+
* Returns the rule results or an empty array if the rule times out.
|
|
534
|
+
*/
|
|
535
|
+
/**
|
|
536
|
+
* Run a single rule synchronously (most rules are sync).
|
|
537
|
+
* Falls back to async with timeout for async rules.
|
|
538
|
+
*/
|
|
539
|
+
const _failedRules = new Set();
|
|
540
|
+
function runRuleSync(rule, context) {
|
|
541
|
+
const id = rule.id || '';
|
|
542
|
+
// Skip previously failed rules and irrelevant language rules
|
|
543
|
+
if (_failedRules.has(id)) return [];
|
|
544
|
+
if (rule.lang && context._detectedLangs && !context._detectedLangs.has(rule.lang)) return [];
|
|
545
|
+
try {
|
|
546
|
+
const result = rule.check(context);
|
|
547
|
+
if (result && typeof result.then === 'function') return result;
|
|
548
|
+
return Array.isArray(result) ? result : [];
|
|
549
|
+
} catch (e) {
|
|
550
|
+
_failedRules.add(id);
|
|
551
|
+
return [];
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
async function runRuleWithTimeout(rule, context, silent) {
|
|
556
|
+
let timer;
|
|
557
|
+
const timeout = new Promise((_, reject) => {
|
|
558
|
+
timer = setTimeout(() => {
|
|
559
|
+
reject(new Error(`Rule "${rule.id || rule.name || 'unknown'}" timed out after ${RULE_TIMEOUT_MS}ms`));
|
|
560
|
+
}, RULE_TIMEOUT_MS);
|
|
561
|
+
});
|
|
562
|
+
|
|
563
|
+
try {
|
|
564
|
+
const result = await Promise.race([rule.check(context), timeout]);
|
|
565
|
+
clearTimeout(timer);
|
|
566
|
+
return Array.isArray(result) ? result : [];
|
|
567
|
+
} catch (e) {
|
|
568
|
+
clearTimeout(timer);
|
|
569
|
+
if (!silent) {
|
|
570
|
+
if (e.message.includes('timed out')) {
|
|
571
|
+
console.warn(`[scanner] ${e.message}`);
|
|
572
|
+
} else {
|
|
573
|
+
console.warn(`[scanner] Rule "${rule.id || rule.name || 'unknown'}" failed: ${e.message}`);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
return [];
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
/**
|
|
581
|
+
* Run all applicable rules against the collected files.
|
|
582
|
+
* Optimized: runs sync rules without Promise overhead, batches async rules.
|
|
583
|
+
* Pre-filters files to skip empty/irrelevant content.
|
|
584
|
+
* Supports --profile mode for performance profiling.
|
|
585
|
+
*/
|
|
586
|
+
export async function runRules(rules, context) {
|
|
587
|
+
const findings = [];
|
|
588
|
+
const scanStart = Date.now();
|
|
589
|
+
const silent = context?.silent ?? false;
|
|
590
|
+
const profile = context?.profile ?? false;
|
|
591
|
+
const profileData = profile ? [] : null;
|
|
592
|
+
|
|
593
|
+
// Pre-compute: skip files with no meaningful content (< 10 chars)
|
|
594
|
+
if (context.files) {
|
|
595
|
+
const toDelete = [];
|
|
596
|
+
for (const [fp, content] of context.files) {
|
|
597
|
+
if (!content || content.length < 10) toDelete.push(fp);
|
|
598
|
+
}
|
|
599
|
+
for (const fp of toDelete) context.files.delete(fp);
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// Separate sync vs async rules for optimal execution
|
|
603
|
+
const asyncRules = [];
|
|
604
|
+
|
|
605
|
+
// Run rules in chunks to allow timeout checks less frequently
|
|
606
|
+
const CHUNK = 100;
|
|
607
|
+
for (let i = 0; i < rules.length; i += CHUNK) {
|
|
608
|
+
// Check total scan timeout per chunk (not per rule — reduces overhead)
|
|
609
|
+
if (Date.now() - scanStart > TOTAL_SCAN_TIMEOUT_MS) {
|
|
610
|
+
if (!silent) console.warn(`[scanner] Scan timeout exceeded, returning partial results`);
|
|
611
|
+
break;
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
const chunk = rules.slice(i, i + CHUNK);
|
|
615
|
+
for (const rule of chunk) {
|
|
616
|
+
const ruleStart = profile ? performance.now() : 0;
|
|
617
|
+
const result = runRuleSync(rule, context);
|
|
618
|
+
|
|
619
|
+
if (result && typeof result.then === 'function') {
|
|
620
|
+
asyncRules.push({ rule, promise: result });
|
|
621
|
+
} else if (Array.isArray(result) && result.length > 0) {
|
|
622
|
+
findings.push(...result);
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
if (profile) {
|
|
626
|
+
profileData.push({ id: rule.id, ms: performance.now() - ruleStart, findings: Array.isArray(result) ? result.length : 0 });
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// Process async rules with timeout
|
|
632
|
+
if (asyncRules.length > 0) {
|
|
633
|
+
const ASYNC_CHUNK = 50;
|
|
634
|
+
for (let i = 0; i < asyncRules.length; i += ASYNC_CHUNK) {
|
|
635
|
+
const chunk = asyncRules.slice(i, i + ASYNC_CHUNK);
|
|
636
|
+
const results = await Promise.all(
|
|
637
|
+
chunk.map(({ rule }) => runRuleWithTimeout(rule, context, silent))
|
|
638
|
+
);
|
|
639
|
+
for (const result of results) {
|
|
640
|
+
if (Array.isArray(result)) findings.push(...result);
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
// Sort by severity
|
|
646
|
+
const severityOrder = { critical: 0, high: 1, medium: 2, low: 3, info: 4 };
|
|
647
|
+
findings.sort((a, b) => (severityOrder[a.severity] || 4) - (severityOrder[b.severity] || 4));
|
|
648
|
+
|
|
649
|
+
// Attach profile data if requested
|
|
650
|
+
if (profile && profileData) {
|
|
651
|
+
findings._profile = profileData.sort((a, b) => b.ms - a.ms);
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
return findings;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
/**
|
|
658
|
+
* Run rules in parallel using worker threads.
|
|
659
|
+
* Splits rules across CPU cores for ~4-8x speedup on multi-core machines.
|
|
660
|
+
*/
|
|
661
|
+
export async function runRulesParallel(rules, context) {
|
|
662
|
+
const numCores = Math.min(cpus().length, 8); // Cap at 8 workers
|
|
663
|
+
if (numCores <= 1 || !context.files || context.files.size === 0) {
|
|
664
|
+
return runRules(rules, context); // Fallback to serial
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
const workerPath = join(dirname(fileURLToPath(import.meta.url)), 'rule-worker.js');
|
|
668
|
+
|
|
669
|
+
// Serialize files Map to array for transfer
|
|
670
|
+
const filesData = [...context.files.entries()];
|
|
671
|
+
const stackData = context.stack || {};
|
|
672
|
+
const category = context._categoryFilter || null;
|
|
673
|
+
const _detectedLangs = context._detectedLangs ? [...context._detectedLangs] : null;
|
|
674
|
+
|
|
675
|
+
// Spawn workers
|
|
676
|
+
const workers = [];
|
|
677
|
+
for (let i = 0; i < numCores; i++) {
|
|
678
|
+
workers.push(new Promise((resolve, reject) => {
|
|
679
|
+
const worker = new Worker(workerPath, {
|
|
680
|
+
workerData: {
|
|
681
|
+
filesData,
|
|
682
|
+
stackData,
|
|
683
|
+
category,
|
|
684
|
+
_detectedLangs,
|
|
685
|
+
workerIndex: i,
|
|
686
|
+
totalWorkers: numCores,
|
|
687
|
+
},
|
|
688
|
+
});
|
|
689
|
+
worker.on('message', resolve);
|
|
690
|
+
worker.on('error', reject);
|
|
691
|
+
worker.on('exit', (code) => {
|
|
692
|
+
if (code !== 0) resolve([]); // Don't crash on worker failure
|
|
693
|
+
});
|
|
694
|
+
}));
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
try {
|
|
698
|
+
const results = await Promise.all(workers);
|
|
699
|
+
const findings = results.flat();
|
|
700
|
+
|
|
701
|
+
// Sort by severity
|
|
702
|
+
const severityOrder = { critical: 0, high: 1, medium: 2, low: 3, info: 4 };
|
|
703
|
+
findings.sort((a, b) => (severityOrder[a.severity] || 4) - (severityOrder[b.severity] || 4));
|
|
704
|
+
|
|
705
|
+
return findings;
|
|
706
|
+
} catch {
|
|
707
|
+
// Fallback to serial on any error
|
|
708
|
+
return runRules(rules, context);
|
|
709
|
+
}
|
|
710
|
+
}
|