@lbroth/rothunter 1.0.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +141 -0
- package/dist/adapters/llm.d.ts +68 -0
- package/dist/adapters/llm.d.ts.map +1 -0
- package/dist/adapters/llm.js +189 -0
- package/dist/adapters/llm.js.map +1 -0
- package/dist/config.d.ts +37 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +81 -0
- package/dist/config.js.map +1 -0
- package/dist/detector-registry.d.ts +32 -0
- package/dist/detector-registry.d.ts.map +1 -0
- package/dist/detector-registry.js +74 -0
- package/dist/detector-registry.js.map +1 -0
- package/dist/detectors/api-race.d.ts +6 -0
- package/dist/detectors/api-race.d.ts.map +1 -0
- package/dist/detectors/api-race.js +222 -0
- package/dist/detectors/api-race.js.map +1 -0
- package/dist/detectors/bad-config.d.ts +6 -0
- package/dist/detectors/bad-config.d.ts.map +1 -0
- package/dist/detectors/bad-config.js +529 -0
- package/dist/detectors/bad-config.js.map +1 -0
- package/dist/detectors/console-log-prod.d.ts +6 -0
- package/dist/detectors/console-log-prod.d.ts.map +1 -0
- package/dist/detectors/console-log-prod.js +72 -0
- package/dist/detectors/console-log-prod.js.map +1 -0
- package/dist/detectors/dead-api.d.ts +10 -0
- package/dist/detectors/dead-api.d.ts.map +1 -0
- package/dist/detectors/dead-api.js +115 -0
- package/dist/detectors/dead-api.js.map +1 -0
- package/dist/detectors/dead-export.d.ts +12 -0
- package/dist/detectors/dead-export.d.ts.map +1 -0
- package/dist/detectors/dead-export.js +140 -0
- package/dist/detectors/dead-export.js.map +1 -0
- package/dist/detectors/dead-handler.d.ts +12 -0
- package/dist/detectors/dead-handler.d.ts.map +1 -0
- package/dist/detectors/dead-handler.js +40 -0
- package/dist/detectors/dead-handler.js.map +1 -0
- package/dist/detectors/dead-module.d.ts +14 -0
- package/dist/detectors/dead-module.d.ts.map +1 -0
- package/dist/detectors/dead-module.js +50 -0
- package/dist/detectors/dead-module.js.map +1 -0
- package/dist/detectors/deep-nesting.d.ts +12 -0
- package/dist/detectors/deep-nesting.d.ts.map +1 -0
- package/dist/detectors/deep-nesting.js +133 -0
- package/dist/detectors/deep-nesting.js.map +1 -0
- package/dist/detectors/duplicate-function.d.ts +9 -0
- package/dist/detectors/duplicate-function.d.ts.map +1 -0
- package/dist/detectors/duplicate-function.js +199 -0
- package/dist/detectors/duplicate-function.js.map +1 -0
- package/dist/detectors/duplicate-type.d.ts +9 -0
- package/dist/detectors/duplicate-type.d.ts.map +1 -0
- package/dist/detectors/duplicate-type.js +166 -0
- package/dist/detectors/duplicate-type.js.map +1 -0
- package/dist/detectors/hot-hub-file.d.ts +11 -0
- package/dist/detectors/hot-hub-file.d.ts.map +1 -0
- package/dist/detectors/hot-hub-file.js +42 -0
- package/dist/detectors/hot-hub-file.js.map +1 -0
- package/dist/detectors/long-file.d.ts +12 -0
- package/dist/detectors/long-file.d.ts.map +1 -0
- package/dist/detectors/long-file.js +82 -0
- package/dist/detectors/long-file.js.map +1 -0
- package/dist/detectors/long-function.d.ts +12 -0
- package/dist/detectors/long-function.d.ts.map +1 -0
- package/dist/detectors/long-function.js +45 -0
- package/dist/detectors/long-function.js.map +1 -0
- package/dist/detectors/magic-numbers.d.ts +10 -0
- package/dist/detectors/magic-numbers.d.ts.map +1 -0
- package/dist/detectors/magic-numbers.js +332 -0
- package/dist/detectors/magic-numbers.js.map +1 -0
- package/dist/detectors/mutable-globals.d.ts +6 -0
- package/dist/detectors/mutable-globals.d.ts.map +1 -0
- package/dist/detectors/mutable-globals.js +95 -0
- package/dist/detectors/mutable-globals.js.map +1 -0
- package/dist/detectors/mutation.d.ts +11 -0
- package/dist/detectors/mutation.d.ts.map +1 -0
- package/dist/detectors/mutation.js +397 -0
- package/dist/detectors/mutation.js.map +1 -0
- package/dist/detectors/public-any.d.ts +6 -0
- package/dist/detectors/public-any.d.ts.map +1 -0
- package/dist/detectors/public-any.js +52 -0
- package/dist/detectors/public-any.js.map +1 -0
- package/dist/detectors/race-condition.d.ts +6 -0
- package/dist/detectors/race-condition.d.ts.map +1 -0
- package/dist/detectors/race-condition.js +608 -0
- package/dist/detectors/race-condition.js.map +1 -0
- package/dist/detectors/shared-db-write.d.ts +6 -0
- package/dist/detectors/shared-db-write.d.ts.map +1 -0
- package/dist/detectors/shared-db-write.js +656 -0
- package/dist/detectors/shared-db-write.js.map +1 -0
- package/dist/detectors/silent-catch.d.ts +6 -0
- package/dist/detectors/silent-catch.d.ts.map +1 -0
- package/dist/detectors/silent-catch.js +167 -0
- package/dist/detectors/silent-catch.js.map +1 -0
- package/dist/detectors/similar-functions.d.ts +15 -0
- package/dist/detectors/similar-functions.d.ts.map +1 -0
- package/dist/detectors/similar-functions.js +334 -0
- package/dist/detectors/similar-functions.js.map +1 -0
- package/dist/detectors/skip-tests.d.ts +6 -0
- package/dist/detectors/skip-tests.d.ts.map +1 -0
- package/dist/detectors/skip-tests.js +69 -0
- package/dist/detectors/skip-tests.js.map +1 -0
- package/dist/detectors/todo-comments.d.ts +29 -0
- package/dist/detectors/todo-comments.d.ts.map +1 -0
- package/dist/detectors/todo-comments.js +154 -0
- package/dist/detectors/todo-comments.js.map +1 -0
- package/dist/detectors/unused-deps.d.ts +8 -0
- package/dist/detectors/unused-deps.d.ts.map +1 -0
- package/dist/detectors/unused-deps.js +115 -0
- package/dist/detectors/unused-deps.js.map +1 -0
- package/dist/extraction/api-race-confirmer.d.ts +31 -0
- package/dist/extraction/api-race-confirmer.d.ts.map +1 -0
- package/dist/extraction/api-race-confirmer.js +110 -0
- package/dist/extraction/api-race-confirmer.js.map +1 -0
- package/dist/extraction/llm-confirmer.d.ts +25 -0
- package/dist/extraction/llm-confirmer.d.ts.map +1 -0
- package/dist/extraction/llm-confirmer.js +118 -0
- package/dist/extraction/llm-confirmer.js.map +1 -0
- package/dist/extraction/mutation-confirmer.d.ts +30 -0
- package/dist/extraction/mutation-confirmer.d.ts.map +1 -0
- package/dist/extraction/mutation-confirmer.js +73 -0
- package/dist/extraction/mutation-confirmer.js.map +1 -0
- package/dist/extraction/prompt-chunking.d.ts +37 -0
- package/dist/extraction/prompt-chunking.d.ts.map +1 -0
- package/dist/extraction/prompt-chunking.js +61 -0
- package/dist/extraction/prompt-chunking.js.map +1 -0
- package/dist/extraction/race-confirmer.d.ts +28 -0
- package/dist/extraction/race-confirmer.d.ts.map +1 -0
- package/dist/extraction/race-confirmer.js +68 -0
- package/dist/extraction/race-confirmer.js.map +1 -0
- package/dist/extraction/shared-db-write-confirmer.d.ts +31 -0
- package/dist/extraction/shared-db-write-confirmer.d.ts.map +1 -0
- package/dist/extraction/shared-db-write-confirmer.js +141 -0
- package/dist/extraction/shared-db-write-confirmer.js.map +1 -0
- package/dist/extraction/triage-confirmer.d.ts +59 -0
- package/dist/extraction/triage-confirmer.d.ts.map +1 -0
- package/dist/extraction/triage-confirmer.js +104 -0
- package/dist/extraction/triage-confirmer.js.map +1 -0
- package/dist/graph/cfg.d.ts +45 -0
- package/dist/graph/cfg.d.ts.map +1 -0
- package/dist/graph/cfg.js +198 -0
- package/dist/graph/cfg.js.map +1 -0
- package/dist/graph/decorator-entries.d.ts +2 -0
- package/dist/graph/decorator-entries.d.ts.map +1 -0
- package/dist/graph/decorator-entries.js +89 -0
- package/dist/graph/decorator-entries.js.map +1 -0
- package/dist/graph/entry-points.d.ts +12 -0
- package/dist/graph/entry-points.d.ts.map +1 -0
- package/dist/graph/entry-points.js +282 -0
- package/dist/graph/entry-points.js.map +1 -0
- package/dist/graph/handler-conventions.d.ts +2 -0
- package/dist/graph/handler-conventions.d.ts.map +1 -0
- package/dist/graph/handler-conventions.js +26 -0
- package/dist/graph/handler-conventions.js.map +1 -0
- package/dist/graph/iac-entries.d.ts +2 -0
- package/dist/graph/iac-entries.d.ts.map +1 -0
- package/dist/graph/iac-entries.js +123 -0
- package/dist/graph/iac-entries.js.map +1 -0
- package/dist/graph/import-graph.d.ts +48 -0
- package/dist/graph/import-graph.d.ts.map +1 -0
- package/dist/graph/import-graph.js +86 -0
- package/dist/graph/import-graph.js.map +1 -0
- package/dist/graph/monorepo-detect.d.ts +3 -0
- package/dist/graph/monorepo-detect.d.ts.map +1 -0
- package/dist/graph/monorepo-detect.js +166 -0
- package/dist/graph/monorepo-detect.js.map +1 -0
- package/dist/graph/tsconfig-paths.d.ts +23 -0
- package/dist/graph/tsconfig-paths.d.ts.map +1 -0
- package/dist/graph/tsconfig-paths.js +217 -0
- package/dist/graph/tsconfig-paths.js.map +1 -0
- package/dist/multi-workspace-scanner.d.ts +13 -0
- package/dist/multi-workspace-scanner.d.ts.map +1 -0
- package/dist/multi-workspace-scanner.js +130 -0
- package/dist/multi-workspace-scanner.js.map +1 -0
- package/dist/normalizers/type-normalizer.d.ts +16 -0
- package/dist/normalizers/type-normalizer.d.ts.map +1 -0
- package/dist/normalizers/type-normalizer.js +189 -0
- package/dist/normalizers/type-normalizer.js.map +1 -0
- package/dist/parsers/typescript-parser.d.ts +57 -0
- package/dist/parsers/typescript-parser.d.ts.map +1 -0
- package/dist/parsers/typescript-parser.js +502 -0
- package/dist/parsers/typescript-parser.js.map +1 -0
- package/dist/reporter/json-reporter.d.ts +12 -0
- package/dist/reporter/json-reporter.d.ts.map +1 -0
- package/dist/reporter/json-reporter.js +28 -0
- package/dist/reporter/json-reporter.js.map +1 -0
- package/dist/reporter/markdown-reporter.d.ts +11 -0
- package/dist/reporter/markdown-reporter.d.ts.map +1 -0
- package/dist/reporter/markdown-reporter.js +77 -0
- package/dist/reporter/markdown-reporter.js.map +1 -0
- package/dist/rothunter.d.ts +125 -0
- package/dist/rothunter.d.ts.map +1 -0
- package/dist/rothunter.js +1038 -0
- package/dist/rothunter.js.map +1 -0
- package/dist/server/false-positives.d.ts +34 -0
- package/dist/server/false-positives.d.ts.map +1 -0
- package/dist/server/false-positives.js +85 -0
- package/dist/server/false-positives.js.map +1 -0
- package/dist/server/index.d.ts +2 -0
- package/dist/server/index.d.ts.map +1 -0
- package/dist/server/index.js +1529 -0
- package/dist/server/index.js.map +1 -0
- package/dist/server/marked-to-fix.d.ts +16 -0
- package/dist/server/marked-to-fix.d.ts.map +1 -0
- package/dist/server/marked-to-fix.js +36 -0
- package/dist/server/marked-to-fix.js.map +1 -0
- package/dist/server/scan-store.d.ts +147 -0
- package/dist/server/scan-store.d.ts.map +1 -0
- package/dist/server/scan-store.js +291 -0
- package/dist/server/scan-store.js.map +1 -0
- package/dist/server/settings-store.d.ts +28 -0
- package/dist/server/settings-store.d.ts.map +1 -0
- package/dist/server/settings-store.js +46 -0
- package/dist/server/settings-store.js.map +1 -0
- package/dist/server/workspace-store.d.ts +39 -0
- package/dist/server/workspace-store.d.ts.map +1 -0
- package/dist/server/workspace-store.js +108 -0
- package/dist/server/workspace-store.js.map +1 -0
- package/dist/types/detector-input.d.ts +37 -0
- package/dist/types/detector-input.d.ts.map +1 -0
- package/dist/types/detector-input.js +2 -0
- package/dist/types/detector-input.js.map +1 -0
- package/dist/types.d.ts +110 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/clustering.d.ts +14 -0
- package/dist/utils/clustering.d.ts.map +1 -0
- package/dist/utils/clustering.js +56 -0
- package/dist/utils/clustering.js.map +1 -0
- package/dist/utils/gitignore.d.ts +32 -0
- package/dist/utils/gitignore.d.ts.map +1 -0
- package/dist/utils/gitignore.js +122 -0
- package/dist/utils/gitignore.js.map +1 -0
- package/dist/utils/hash.d.ts +11 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +14 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/ignore-annotation.d.ts +28 -0
- package/dist/utils/ignore-annotation.d.ts.map +1 -0
- package/dist/utils/ignore-annotation.js +46 -0
- package/dist/utils/ignore-annotation.js.map +1 -0
- package/dist/utils/llm-json.d.ts +2 -0
- package/dist/utils/llm-json.d.ts.map +1 -0
- package/dist/utils/llm-json.js +53 -0
- package/dist/utils/llm-json.js.map +1 -0
- package/dist/utils/logger.d.ts +3 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +4 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/project-conventions.d.ts +2 -0
- package/dist/utils/project-conventions.d.ts.map +1 -0
- package/dist/utils/project-conventions.js +108 -0
- package/dist/utils/project-conventions.js.map +1 -0
- package/dist/utils/regex.d.ts +9 -0
- package/dist/utils/regex.d.ts.map +1 -0
- package/dist/utils/regex.js +11 -0
- package/dist/utils/regex.js.map +1 -0
- package/dist/utils/snippet.d.ts +20 -0
- package/dist/utils/snippet.d.ts.map +1 -0
- package/dist/utils/snippet.js +28 -0
- package/dist/utils/snippet.js.map +1 -0
- package/dist/utils/source-reader.d.ts +19 -0
- package/dist/utils/source-reader.d.ts.map +1 -0
- package/dist/utils/source-reader.js +32 -0
- package/dist/utils/source-reader.js.map +1 -0
- package/logo.png +0 -0
- package/package.json +92 -0
- package/scripts/start-llm.mjs +161 -0
|
@@ -0,0 +1,1038 @@
|
|
|
1
|
+
import * as path from 'node:path';
|
|
2
|
+
import * as fs from 'node:fs';
|
|
3
|
+
import { logger } from './utils/logger.js';
|
|
4
|
+
import { DuplicateTypeDetector } from './detectors/duplicate-type.js';
|
|
5
|
+
import { DuplicateFunctionDetector } from './detectors/duplicate-function.js';
|
|
6
|
+
import { detectDeadModules } from './detectors/dead-module.js';
|
|
7
|
+
import { detectDeadExports } from './detectors/dead-export.js';
|
|
8
|
+
import { detectDeadApis } from './detectors/dead-api.js';
|
|
9
|
+
import { detectDeadHandlers } from './detectors/dead-handler.js';
|
|
10
|
+
import { detectMutations } from './detectors/mutation.js';
|
|
11
|
+
import { detectRaceConditions } from './detectors/race-condition.js';
|
|
12
|
+
import { detectSharedDbWrites } from './detectors/shared-db-write.js';
|
|
13
|
+
import { detectApiRaces } from './detectors/api-race.js';
|
|
14
|
+
import { detectBadConfig } from './detectors/bad-config.js';
|
|
15
|
+
import { detectSilentCatches } from './detectors/silent-catch.js';
|
|
16
|
+
import { detectSkipTests } from './detectors/skip-tests.js';
|
|
17
|
+
import { detectLongFiles } from './detectors/long-file.js';
|
|
18
|
+
import { detectLongFunctions } from './detectors/long-function.js';
|
|
19
|
+
import { detectConsoleLogsInProd } from './detectors/console-log-prod.js';
|
|
20
|
+
import { detectMagicNumbers } from './detectors/magic-numbers.js';
|
|
21
|
+
import { detectDeepNesting } from './detectors/deep-nesting.js';
|
|
22
|
+
import { detectPublicAny } from './detectors/public-any.js';
|
|
23
|
+
import { detectMutableGlobals } from './detectors/mutable-globals.js';
|
|
24
|
+
import { detectUnusedDeps } from './detectors/unused-deps.js';
|
|
25
|
+
import { detectHotHubFiles } from './detectors/hot-hub-file.js';
|
|
26
|
+
import { detectSimilarFunctions } from './detectors/similar-functions.js';
|
|
27
|
+
import { detectTodoComments } from './detectors/todo-comments.js';
|
|
28
|
+
import { TypeScriptParser } from './parsers/typescript-parser.js';
|
|
29
|
+
import { TypeNormalizer } from './normalizers/type-normalizer.js';
|
|
30
|
+
import { buildImportGraph, reachableFrom } from './graph/import-graph.js';
|
|
31
|
+
import { discoverEntryPoints, isPublishedLibrary } from './graph/entry-points.js';
|
|
32
|
+
import { readProjectConventions } from './utils/project-conventions.js';
|
|
33
|
+
import { resolveIacEntryFiles } from './graph/iac-entries.js';
|
|
34
|
+
import { resolveDecoratorEntryFiles } from './graph/decorator-entries.js';
|
|
35
|
+
import { loadRotHunterConfig } from './config.js';
|
|
36
|
+
import { Project } from 'ts-morph';
|
|
37
|
+
import { scanWorkspaces } from './multi-workspace-scanner.js';
|
|
38
|
+
export class RotHunter {
|
|
39
|
+
parser = new TypeScriptParser();
|
|
40
|
+
normalizer = new TypeNormalizer();
|
|
41
|
+
detectors = [
|
|
42
|
+
new DuplicateTypeDetector(),
|
|
43
|
+
new DuplicateFunctionDetector(),
|
|
44
|
+
];
|
|
45
|
+
async run(opts) {
|
|
46
|
+
const startedAt = Date.now();
|
|
47
|
+
// Multi-workspace mode: if a rothunter.config.json exists at the workspace
|
|
48
|
+
// root, parse every linked workspace in a single pass and run the same
|
|
49
|
+
// detectors over the merged graph. dead-api is the cross-repo-only
|
|
50
|
+
// detector and only emits findings in this mode.
|
|
51
|
+
const emit = (event) => {
|
|
52
|
+
if (!opts.onProgress)
|
|
53
|
+
return;
|
|
54
|
+
try {
|
|
55
|
+
opts.onProgress(event);
|
|
56
|
+
}
|
|
57
|
+
catch (err) {
|
|
58
|
+
logger.warn({ err: err.message }, 'onProgress callback threw');
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
emit({ state: 'parsing' });
|
|
62
|
+
const config = loadRotHunterConfig(opts.workspaceRoot);
|
|
63
|
+
let isMulti = false;
|
|
64
|
+
let parsed;
|
|
65
|
+
if (config) {
|
|
66
|
+
logger.info({ configPath: config.configPath, workspaces: config.workspaces.map((w) => w.name) }, 'RotHunter: parsing multi-workspace group');
|
|
67
|
+
const multi = await scanWorkspaces(config);
|
|
68
|
+
parsed = { symbols: multi.symbols, imports: multi.imports, files: multi.files };
|
|
69
|
+
isMulti = true;
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
logger.info({ workspaceRoot: opts.workspaceRoot }, 'RotHunter: parsing workspace');
|
|
73
|
+
parsed = await this.parser.parseWorkspaceFull(opts);
|
|
74
|
+
}
|
|
75
|
+
emit({ state: 'parsing', files: parsed.files.length, symbols: parsed.symbols.length });
|
|
76
|
+
logger.info({ count: parsed.symbols.length }, 'RotHunter: normalizing symbols');
|
|
77
|
+
const symbols = this.normalizer.normalizeAll(parsed.symbols);
|
|
78
|
+
const findings = [];
|
|
79
|
+
for (const detector of this.detectors) {
|
|
80
|
+
logger.info({ detector: detector.id }, 'RotHunter: running detector');
|
|
81
|
+
emit({ state: 'detecting', detector: detector.id });
|
|
82
|
+
const detectorFindings = await detector.run(symbols);
|
|
83
|
+
findings.push(...detectorFindings);
|
|
84
|
+
}
|
|
85
|
+
// Dead-module detection runs at file granularity, not symbol granularity,
|
|
86
|
+
// so it has its own input shape. Build the import graph once and reuse it
|
|
87
|
+
// for any future graph-based detectors (call graph, cross-repo lookups).
|
|
88
|
+
const fileSet = new Set(parsed.files);
|
|
89
|
+
const importGraph = buildImportGraph(parsed.imports);
|
|
90
|
+
const entryPoints = discoverEntryPoints(opts.workspaceRoot, fileSet);
|
|
91
|
+
// CDK / SST / Serverless-framework constructs reference handler files by
|
|
92
|
+
// string path. Resolve those strings and add them to the entry set so
|
|
93
|
+
// dead-module/dead-export don't flag lambda handlers as orphans. The
|
|
94
|
+
// resolved set is also used by the dead-handler detector below to decide
|
|
95
|
+
// whether a handler-convention file is actually wired.
|
|
96
|
+
const iacEntries = isMulti
|
|
97
|
+
? new Set()
|
|
98
|
+
: resolveIacEntryFiles(opts.workspaceRoot, parsed.files);
|
|
99
|
+
for (const f of iacEntries)
|
|
100
|
+
entryPoints.add(f);
|
|
101
|
+
// Framework-decorated classes (NestJS controllers, Angular components,
|
|
102
|
+
// TypeORM entities, ...) are discovered by the framework at runtime —
|
|
103
|
+
// never statically imported. Protect their files from dead-module.
|
|
104
|
+
const decoratorEntries = isMulti
|
|
105
|
+
? new Set()
|
|
106
|
+
: resolveDecoratorEntryFiles(opts.workspaceRoot, parsed.files);
|
|
107
|
+
for (const f of decoratorEntries)
|
|
108
|
+
entryPoints.add(f);
|
|
109
|
+
const reachable = reachableFrom(importGraph, entryPoints);
|
|
110
|
+
logger.info({ entries: entryPoints.size, reachable: reachable.size, total: parsed.files.length }, 'RotHunter: running detector dead-module');
|
|
111
|
+
findings.push(...detectDeadModules({ files: parsed.files, graph: importGraph, entryPoints, reachable }));
|
|
112
|
+
logger.info({ symbols: symbols.length }, 'RotHunter: running detector dead-export');
|
|
113
|
+
findings.push(...detectDeadExports({ symbols, imports: parsed.imports, entryPoints }));
|
|
114
|
+
if (isMulti) {
|
|
115
|
+
logger.info({ symbols: symbols.length }, 'RotHunter: running detector dead-api');
|
|
116
|
+
findings.push(...detectDeadApis({ symbols, imports: parsed.imports }));
|
|
117
|
+
}
|
|
118
|
+
// Symbol/graph-only detectors — safe in both modes (no fs reads, no git,
|
|
119
|
+
// no per-workspace state). File-walking + git-touched + ts-morph-Project
|
|
120
|
+
// detectors stay under the `!isMulti` gate below because their input
|
|
121
|
+
// shape doesn't survive the workspace-name-prefixed paths emitted by
|
|
122
|
+
// multi-workspace-scanner.
|
|
123
|
+
logger.info({ symbols: symbols.length }, 'RotHunter: running detector long-function');
|
|
124
|
+
emit({ state: 'detecting', detector: 'long-function' });
|
|
125
|
+
findings.push(...detectLongFunctions({ symbols }));
|
|
126
|
+
logger.info({ symbols: symbols.length }, 'RotHunter: running detector deep-nesting');
|
|
127
|
+
emit({ state: 'detecting', detector: 'deep-nesting' });
|
|
128
|
+
findings.push(...detectDeepNesting({ symbols }));
|
|
129
|
+
logger.info({ symbols: symbols.length }, 'RotHunter: running detector public-any');
|
|
130
|
+
emit({ state: 'detecting', detector: 'public-any' });
|
|
131
|
+
findings.push(...detectPublicAny({ symbols }));
|
|
132
|
+
logger.info({ files: parsed.files.length }, 'RotHunter: running detector hot-hub-file');
|
|
133
|
+
emit({ state: 'detecting', detector: 'hot-hub-file' });
|
|
134
|
+
findings.push(...detectHotHubFiles({ graph: importGraph }));
|
|
135
|
+
if (!isMulti) {
|
|
136
|
+
// Single-workspace path: paths are already real workspace-relative.
|
|
137
|
+
const local = await runWorkspaceLocalDetectors({
|
|
138
|
+
workspaceRoot: opts.workspaceRoot,
|
|
139
|
+
files: parsed.files,
|
|
140
|
+
imports: parsed.imports,
|
|
141
|
+
symbols,
|
|
142
|
+
iacEntries,
|
|
143
|
+
emit,
|
|
144
|
+
});
|
|
145
|
+
findings.push(...local);
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
// Multi-workspace: each detector that needs real workspace-relative
|
|
149
|
+
// paths (file-walking, git-based, fs-walking) runs once per linked
|
|
150
|
+
// workspace, with paths de-prefixed before invocation and re-prefixed
|
|
151
|
+
// on the way out so findings still point at globally-unique files +
|
|
152
|
+
// workspace-namespaced fingerprints (no cross-workspace collisions).
|
|
153
|
+
if (!config) {
|
|
154
|
+
// Defensive — isMulti is only true when config was set above.
|
|
155
|
+
logger.error('RotHunter: isMulti without config — skipping local detectors');
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
for (const ws of config.workspaces) {
|
|
159
|
+
const wsPrefix = `${ws.name}/`;
|
|
160
|
+
const wsFiles = parsed.files
|
|
161
|
+
.filter((f) => f.startsWith(wsPrefix))
|
|
162
|
+
.map((f) => f.slice(wsPrefix.length));
|
|
163
|
+
const wsSymbols = symbols
|
|
164
|
+
.filter((s) => s.workspace === ws.name)
|
|
165
|
+
.map((s) => ({ ...s, file: stripPrefix(s.file, wsPrefix) }));
|
|
166
|
+
const wsImports = parsed.imports
|
|
167
|
+
.filter((i) => i.sourceWorkspace === ws.name)
|
|
168
|
+
.map((i) => ({
|
|
169
|
+
...i,
|
|
170
|
+
source: stripPrefix(i.source, wsPrefix),
|
|
171
|
+
target: i.target && i.targetWorkspace === ws.name ? stripPrefix(i.target, wsPrefix) : null,
|
|
172
|
+
}));
|
|
173
|
+
const wsIacEntries = resolveIacEntryFiles(ws.rootAbs, wsFiles);
|
|
174
|
+
logger.info({ workspace: ws.name, files: wsFiles.length, symbols: wsSymbols.length }, 'RotHunter: running workspace-local detectors');
|
|
175
|
+
const wsFindings = await runWorkspaceLocalDetectors({
|
|
176
|
+
workspaceRoot: ws.rootAbs,
|
|
177
|
+
files: wsFiles,
|
|
178
|
+
imports: wsImports,
|
|
179
|
+
symbols: wsSymbols,
|
|
180
|
+
iacEntries: wsIacEntries,
|
|
181
|
+
emit,
|
|
182
|
+
});
|
|
183
|
+
for (const f of wsFindings) {
|
|
184
|
+
for (const ev of f.evidence)
|
|
185
|
+
ev.file = `${wsPrefix}${ev.file}`;
|
|
186
|
+
// Namespace the fingerprint by workspace so two workspaces with
|
|
187
|
+
// identically-named files don't collide in the FP store.
|
|
188
|
+
f.fingerprint = `${ws.name}:${f.fingerprint}`;
|
|
189
|
+
}
|
|
190
|
+
findings.push(...wsFindings);
|
|
191
|
+
}
|
|
192
|
+
// Cross-workspace race-condition pass. shared-db-write +
|
|
193
|
+
// api-race fire when ≥ 2 distinct files write the same DB
|
|
194
|
+
// column / hit the same API endpoint — exactly the cross-
|
|
195
|
+
// service race shape that lives between packages in a
|
|
196
|
+
// monorepo (billing-service writes user.tier in one repo,
|
|
197
|
+
// account-service writes it from another). Running these
|
|
198
|
+
// per-workspace misses every cross-service race because each
|
|
199
|
+
// package has only one writer locally.
|
|
200
|
+
const crossFindings = await runCrossWorkspaceRaceDetectors(config.workspaces, emit);
|
|
201
|
+
findings.push(...crossFindings);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
// Drop findings the caller has explicitly de-selected BEFORE the LLM
|
|
205
|
+
// pass — they would be filtered out post-LLM anyway and the verdict cost
|
|
206
|
+
// is wasted otherwise. Most relevant on big repos where the user runs a
|
|
207
|
+
// narrow `--detectors race-condition,shared-db-write,api-race` scan: the
|
|
208
|
+
// dup-type / dup-function / mutation candidates can otherwise dominate
|
|
209
|
+
// LLM cost (e.g. Outline: 715 LLM candidates, ~95 % dup-type).
|
|
210
|
+
if (opts.detectorsAllow || opts.detectorsDeny) {
|
|
211
|
+
const allow = opts.detectorsAllow;
|
|
212
|
+
const deny = opts.detectorsDeny;
|
|
213
|
+
const before = findings.length;
|
|
214
|
+
const kept = findings.filter((f) => {
|
|
215
|
+
if (allow && !allow.has(f.detectorId))
|
|
216
|
+
return false;
|
|
217
|
+
if (deny && deny.has(f.detectorId))
|
|
218
|
+
return false;
|
|
219
|
+
return true;
|
|
220
|
+
});
|
|
221
|
+
findings.length = 0;
|
|
222
|
+
findings.push(...kept);
|
|
223
|
+
if (kept.length !== before) {
|
|
224
|
+
logger.info({ kept: kept.length, dropped: before - kept.length }, 'RotHunter: applied detector allow/deny filter before LLM pass');
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
const threshold = opts.llmRejectionThreshold ?? 0.7;
|
|
228
|
+
const envConc = Number(process.env.ROTHUNTER_LLM_CONCURRENCY);
|
|
229
|
+
const llmConcurrency = Math.max(1, Math.min(16, Math.floor(opts.llmConcurrency ?? (Number.isFinite(envConc) && envConc > 0 ? envConc : 1))));
|
|
230
|
+
await this.runLlmConfirmation(findings, symbols, threshold, opts.llm, emit, llmConcurrency, opts.abortSignal, opts.workspaceRoot, opts.llmAutoFpThreshold);
|
|
231
|
+
const durationMs = Date.now() - startedAt;
|
|
232
|
+
emit({ state: 'done', findings: findings.length, durationMs });
|
|
233
|
+
return {
|
|
234
|
+
symbols,
|
|
235
|
+
findings,
|
|
236
|
+
durationMs,
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
async runLlmConfirmation(findings, symbols, threshold, injectedLlm, emit, concurrency = 1, abortSignal, workspaceRoot, llmAutoFpThreshold) {
|
|
240
|
+
const autoFpThreshold = llmAutoFpThreshold ?? LLM_FP_THRESHOLD;
|
|
241
|
+
const { LlmConfirmer } = await import('./extraction/llm-confirmer.js');
|
|
242
|
+
const { MutationConfirmer } = await import('./extraction/mutation-confirmer.js');
|
|
243
|
+
const { RaceConfirmer } = await import('./extraction/race-confirmer.js');
|
|
244
|
+
const { SharedDbWriteConfirmer } = await import('./extraction/shared-db-write-confirmer.js');
|
|
245
|
+
const { ApiRaceConfirmer } = await import('./extraction/api-race-confirmer.js');
|
|
246
|
+
const { TriageConfirmer } = await import('./extraction/triage-confirmer.js');
|
|
247
|
+
const { createDefaultLlmClient } = await import('./adapters/llm.js');
|
|
248
|
+
const symbolById = new Map(symbols.map((s) => [s.id, s]));
|
|
249
|
+
const candidates = findings.filter((f) => requiresLlmConfirmation(f, symbolById));
|
|
250
|
+
if (candidates.length === 0)
|
|
251
|
+
return;
|
|
252
|
+
const llm = injectedLlm ?? createDefaultLlmClient();
|
|
253
|
+
logger.info({ count: candidates.length }, 'RotHunter: warming up LLM');
|
|
254
|
+
const llmReady = await llm.warmup();
|
|
255
|
+
if (!llmReady) {
|
|
256
|
+
// No LLM reachable — skip the confirmation pass entirely so we
|
|
257
|
+
// don't burn N × verdict-timeout on a scan that has no oracle.
|
|
258
|
+
// Findings stay at their deterministic severity / confidence.
|
|
259
|
+
logger.warn({ count: candidates.length }, 'RotHunter: LLM warmup failed; skipping confirmation pass');
|
|
260
|
+
emit?.({ state: 'llm-start', total: 0 });
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
const dupConfirmer = new LlmConfirmer(llm);
|
|
264
|
+
const mutationConfirmer = new MutationConfirmer(llm);
|
|
265
|
+
const raceConfirmer = new RaceConfirmer(llm);
|
|
266
|
+
const sharedDbConfirmer = new SharedDbWriteConfirmer(llm);
|
|
267
|
+
const apiRaceConfirmer = new ApiRaceConfirmer(llm);
|
|
268
|
+
const triageConfirmer = new TriageConfirmer(llm);
|
|
269
|
+
logger.info({ count: candidates.length }, 'RotHunter: LLM confirmation pass');
|
|
270
|
+
emit?.({ state: 'llm-start', total: candidates.length });
|
|
271
|
+
let llmDone = 0;
|
|
272
|
+
const reportVerdict = (finding, race, confidence, reason, latencyMs) => {
|
|
273
|
+
llmDone += 1;
|
|
274
|
+
emit?.({
|
|
275
|
+
state: 'llm-verdict',
|
|
276
|
+
done: llmDone,
|
|
277
|
+
total: candidates.length,
|
|
278
|
+
detectorId: finding.detectorId,
|
|
279
|
+
race,
|
|
280
|
+
confidence,
|
|
281
|
+
reason: reason.slice(0, 120),
|
|
282
|
+
latencyMs,
|
|
283
|
+
cluster: clusterLabel(finding),
|
|
284
|
+
});
|
|
285
|
+
};
|
|
286
|
+
const processOne = async (finding) => {
|
|
287
|
+
const verdictStart = Date.now();
|
|
288
|
+
if (finding.detectorId === 'duplicate-type' || finding.detectorId === 'duplicate-function') {
|
|
289
|
+
const ids = finding.evidence
|
|
290
|
+
.map((ev) => findSymbolId(symbols, ev.file, ev.range.startLine))
|
|
291
|
+
.filter((id) => Boolean(id));
|
|
292
|
+
if (ids.length < 2)
|
|
293
|
+
return;
|
|
294
|
+
const a = symbolById.get(ids[0]);
|
|
295
|
+
const b = symbolById.get(ids[1]);
|
|
296
|
+
if (!a || !b)
|
|
297
|
+
return;
|
|
298
|
+
const projectConv = workspaceRoot
|
|
299
|
+
? readProjectConventions(workspaceRoot, a.file)
|
|
300
|
+
: undefined;
|
|
301
|
+
const result = await dupConfirmer.confirmSameConcept(a, b, projectConv);
|
|
302
|
+
if (!result)
|
|
303
|
+
return;
|
|
304
|
+
if (result.same_concept) {
|
|
305
|
+
finding.confidence = Math.min(0.97, Math.max(finding.confidence, result.confidence));
|
|
306
|
+
finding.description += `\n\n**LLM confirmation:** ${result.reason} (confidence ${result.confidence.toFixed(2)})`;
|
|
307
|
+
finding.layer = 3;
|
|
308
|
+
}
|
|
309
|
+
else {
|
|
310
|
+
finding.confidence = Math.min(finding.confidence, 1 - result.confidence) * 0.7;
|
|
311
|
+
finding.description += `\n\n**LLM rejection:** ${result.reason} — not considered a domain duplicate.`;
|
|
312
|
+
if (finding.confidence < threshold) {
|
|
313
|
+
finding.severity = 'low';
|
|
314
|
+
}
|
|
315
|
+
// Same auto-FP routing as TriageConfirmer-driven detectors:
|
|
316
|
+
// a high-confidence negative verdict means the LLM is sure
|
|
317
|
+
// these are not the same concept (framework idiom, env-helper
|
|
318
|
+
// symmetry that project conventions endorse, …) — moving
|
|
319
|
+
// them out of the open list matches the user's expectation.
|
|
320
|
+
if (result.confidence >= autoFpThreshold) {
|
|
321
|
+
finding.llmFalsePositive = {
|
|
322
|
+
confidence: result.confidence,
|
|
323
|
+
reason: result.reason,
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
reportVerdict(finding, result.same_concept, result.confidence, result.reason, Date.now() - verdictStart);
|
|
328
|
+
}
|
|
329
|
+
else if (finding.detectorId === 'api-race') {
|
|
330
|
+
// Cluster meta lives in evidence[].note as JSON (emitted by the
|
|
331
|
+
// detector). Title is human-facing only — never re-parse it.
|
|
332
|
+
const first = parseEvidenceNote(finding.evidence[0]);
|
|
333
|
+
const method = first.method ?? '';
|
|
334
|
+
const pathPattern = first.pathPattern ?? '';
|
|
335
|
+
if (!method || !pathPattern)
|
|
336
|
+
return;
|
|
337
|
+
const clientSet = new Set();
|
|
338
|
+
for (const ev of finding.evidence) {
|
|
339
|
+
const meta = parseEvidenceNote(ev);
|
|
340
|
+
if (meta.client)
|
|
341
|
+
clientSet.add(meta.client);
|
|
342
|
+
}
|
|
343
|
+
const clients = clientSet.size > 0 ? [...clientSet].join('+') : 'unknown';
|
|
344
|
+
const sites = finding.evidence.slice(0, 8).map((ev) => {
|
|
345
|
+
const meta = parseEvidenceNote(ev);
|
|
346
|
+
return {
|
|
347
|
+
file: ev.file,
|
|
348
|
+
line: ev.range.startLine,
|
|
349
|
+
enclosingName: meta.enclosingName?.trim() || undefined,
|
|
350
|
+
enclosingSource: ev.snippet,
|
|
351
|
+
};
|
|
352
|
+
});
|
|
353
|
+
const verdict = await apiRaceConfirmer.confirm({
|
|
354
|
+
method,
|
|
355
|
+
pathPattern,
|
|
356
|
+
clients,
|
|
357
|
+
sites,
|
|
358
|
+
});
|
|
359
|
+
if (!verdict)
|
|
360
|
+
return;
|
|
361
|
+
applyClusterVerdict(finding, { positive: verdict.race, confidence: verdict.confidence, reason: verdict.reason }, { threshold, positiveLabel: 'real cross-flow API race', negativeLabel: 'safe', autoFpThreshold });
|
|
362
|
+
reportVerdict(finding, verdict.race, verdict.confidence, verdict.reason, Date.now() - verdictStart);
|
|
363
|
+
}
|
|
364
|
+
else if (finding.detectorId === 'shared-db-write') {
|
|
365
|
+
// Cluster meta lives in evidence[].note as JSON (emitted by the
|
|
366
|
+
// detector). Title is human-facing only — never re-parse it.
|
|
367
|
+
const first = parseEvidenceNote(finding.evidence[0]);
|
|
368
|
+
const entity = first.entity ?? '';
|
|
369
|
+
const column = first.column ?? '';
|
|
370
|
+
if (!entity || !column)
|
|
371
|
+
return;
|
|
372
|
+
const adapterSet = new Set();
|
|
373
|
+
for (const ev of finding.evidence) {
|
|
374
|
+
const meta = parseEvidenceNote(ev);
|
|
375
|
+
if (meta.adapter)
|
|
376
|
+
adapterSet.add(meta.adapter);
|
|
377
|
+
}
|
|
378
|
+
const adapters = adapterSet.size > 0 ? [...adapterSet].join('+') : 'unknown';
|
|
379
|
+
const sites = finding.evidence.slice(0, 8).map((ev) => {
|
|
380
|
+
const meta = parseEvidenceNote(ev);
|
|
381
|
+
return {
|
|
382
|
+
file: ev.file,
|
|
383
|
+
line: ev.range.startLine,
|
|
384
|
+
enclosingName: meta.enclosingName?.trim() || undefined,
|
|
385
|
+
enclosingSource: ev.snippet,
|
|
386
|
+
};
|
|
387
|
+
});
|
|
388
|
+
const verdict = await sharedDbConfirmer.confirm({
|
|
389
|
+
entity,
|
|
390
|
+
column,
|
|
391
|
+
adapters,
|
|
392
|
+
sites,
|
|
393
|
+
});
|
|
394
|
+
if (!verdict)
|
|
395
|
+
return;
|
|
396
|
+
applyClusterVerdict(finding, { positive: verdict.race, confidence: verdict.confidence, reason: verdict.reason }, { threshold, positiveLabel: 'real cross-flow race', negativeLabel: 'safe', autoFpThreshold });
|
|
397
|
+
reportVerdict(finding, verdict.race, verdict.confidence, verdict.reason, Date.now() - verdictStart);
|
|
398
|
+
}
|
|
399
|
+
else if (finding.detectorId === 'race-condition') {
|
|
400
|
+
const ev = finding.evidence[0];
|
|
401
|
+
if (!ev || !ev.note)
|
|
402
|
+
return;
|
|
403
|
+
let meta;
|
|
404
|
+
try {
|
|
405
|
+
meta = JSON.parse(ev.note);
|
|
406
|
+
}
|
|
407
|
+
catch {
|
|
408
|
+
return;
|
|
409
|
+
}
|
|
410
|
+
const isPromiseAll = finding.fingerprint.startsWith('race:promise-all');
|
|
411
|
+
const pattern = isPromiseAll
|
|
412
|
+
? 'promise-all'
|
|
413
|
+
: /emitter/i.test(meta.enclosingName ?? '')
|
|
414
|
+
? 'emitter-handler'
|
|
415
|
+
: 'read-modify-write';
|
|
416
|
+
const verdict = await raceConfirmer.confirm({
|
|
417
|
+
file: ev.file,
|
|
418
|
+
line: ev.range.startLine,
|
|
419
|
+
pattern,
|
|
420
|
+
target: meta.target ?? 'unknown',
|
|
421
|
+
enclosingSource: ev.snippet,
|
|
422
|
+
enclosingName: meta.enclosingName || undefined,
|
|
423
|
+
});
|
|
424
|
+
if (!verdict)
|
|
425
|
+
return;
|
|
426
|
+
applyClusterVerdict(finding, { positive: verdict.race, confidence: verdict.confidence, reason: verdict.reason }, { threshold, positiveLabel: 'real race', negativeLabel: 'safe', autoFpThreshold });
|
|
427
|
+
reportVerdict(finding, verdict.race, verdict.confidence, verdict.reason, Date.now() - verdictStart);
|
|
428
|
+
}
|
|
429
|
+
else if (finding.detectorId === 'mutation') {
|
|
430
|
+
const ev = finding.evidence[0];
|
|
431
|
+
if (!ev || !ev.note)
|
|
432
|
+
return;
|
|
433
|
+
let meta;
|
|
434
|
+
try {
|
|
435
|
+
meta = JSON.parse(ev.note);
|
|
436
|
+
}
|
|
437
|
+
catch {
|
|
438
|
+
return;
|
|
439
|
+
}
|
|
440
|
+
const verdict = await mutationConfirmer.confirm({
|
|
441
|
+
file: ev.file,
|
|
442
|
+
line: ev.range.startLine,
|
|
443
|
+
pattern: meta.pattern ?? 'mutation',
|
|
444
|
+
escapes: Boolean(meta.escapes),
|
|
445
|
+
snippet: ev.snippet,
|
|
446
|
+
enclosingSource: meta.enclosingSource ?? ev.snippet,
|
|
447
|
+
enclosingName: meta.enclosingName || undefined,
|
|
448
|
+
});
|
|
449
|
+
if (!verdict)
|
|
450
|
+
return;
|
|
451
|
+
// Mutation maps to the shared shape: positive = !intentional
|
|
452
|
+
// (bug-shaped). One subtle difference from the other three: the
|
|
453
|
+
// severity bump fires on `severity === 'medium'` regardless of
|
|
454
|
+
// confidence (the original code didn't gate on 0.85). We preserve
|
|
455
|
+
// that by passing positiveLabel/negativeLabel and relying on the
|
|
456
|
+
// shared helper's gate — which is acceptably equivalent in
|
|
457
|
+
// practice because the mutation confirmer rarely emits bug-shaped
|
|
458
|
+
// with confidence < 0.85.
|
|
459
|
+
applyClusterVerdict(finding, { positive: !verdict.intentional, confidence: verdict.confidence, reason: verdict.reason }, { threshold, positiveLabel: 'potential bug', negativeLabel: 'intentional', autoFpThreshold });
|
|
460
|
+
reportVerdict(finding, !verdict.intentional, verdict.confidence, verdict.reason, Date.now() - verdictStart);
|
|
461
|
+
}
|
|
462
|
+
else if (TRIAGE_DETECTORS.has(finding.detectorId)) {
|
|
463
|
+
// Generic real-vs-FP triage for detectors with no cluster
|
|
464
|
+
// confirmer of their own. For reachability + hub detectors we
|
|
465
|
+
// ALSO pass structural context (sibling signatures, file role)
|
|
466
|
+
// so the LLM can answer "is this used through a type surface
|
|
467
|
+
// or framework convention" without guessing from the snippet.
|
|
468
|
+
const ev = finding.evidence[0];
|
|
469
|
+
if (!ev)
|
|
470
|
+
return;
|
|
471
|
+
const verdict = await triageConfirmer.confirm({
|
|
472
|
+
detectorId: finding.detectorId,
|
|
473
|
+
severity: finding.severity,
|
|
474
|
+
title: finding.title,
|
|
475
|
+
description: finding.description,
|
|
476
|
+
suggestion: finding.suggestion,
|
|
477
|
+
evidenceFile: ev.file,
|
|
478
|
+
evidenceStartLine: ev.range.startLine,
|
|
479
|
+
evidenceEndLine: ev.range.endLine,
|
|
480
|
+
evidenceSnippet: ev.snippet,
|
|
481
|
+
extraContext: buildTriageContext(finding, symbolById, workspaceRoot),
|
|
482
|
+
});
|
|
483
|
+
if (!verdict)
|
|
484
|
+
return;
|
|
485
|
+
applyClusterVerdict(finding, { positive: verdict.real, confidence: verdict.confidence, reason: verdict.reason }, { threshold, positiveLabel: 'real defect', negativeLabel: 'intentional pattern', autoFpThreshold });
|
|
486
|
+
reportVerdict(finding, verdict.real, verdict.confidence, verdict.reason, Date.now() - verdictStart);
|
|
487
|
+
}
|
|
488
|
+
};
|
|
489
|
+
// Run with a small worker pool. Each "worker" pulls the next finding
|
|
490
|
+
// off the shared cursor and awaits its verdict — the LLM backend
|
|
491
|
+
// dictates real throughput (llama.cpp `--parallel N -cb`, vLLM dynamic
|
|
492
|
+
// batching). Concurrency 1 reproduces the original sequential flow.
|
|
493
|
+
//
|
|
494
|
+
// Cancellation: workers re-check `abortSignal.aborted` before
|
|
495
|
+
// every verdict task. This is the only reliable abort path — the
|
|
496
|
+
// old "throw inside onProgress" trick was swallowed by `emit()`'s
|
|
497
|
+
// catch and never reached the pool, so cancelled scans kept
|
|
498
|
+
// burning LLM calls (and blocking the queue) until they ran out
|
|
499
|
+
// of findings.
|
|
500
|
+
logger.info({ concurrency }, 'RotHunter: LLM concurrency');
|
|
501
|
+
let cursor = 0;
|
|
502
|
+
const workers = Array.from({ length: concurrency }, async () => {
|
|
503
|
+
while (true) {
|
|
504
|
+
if (abortSignal?.aborted)
|
|
505
|
+
return;
|
|
506
|
+
const idx = cursor++;
|
|
507
|
+
if (idx >= candidates.length)
|
|
508
|
+
return;
|
|
509
|
+
try {
|
|
510
|
+
await processOne(candidates[idx]);
|
|
511
|
+
}
|
|
512
|
+
catch (err) {
|
|
513
|
+
// A single bad finding must not poison the whole pool. The
|
|
514
|
+
// verdict is already accounted for in reportVerdict; log + move on.
|
|
515
|
+
logger.warn({ err: err.message, detector: candidates[idx].detectorId }, 'LLM verdict task threw');
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
});
|
|
519
|
+
await Promise.all(workers);
|
|
520
|
+
if (abortSignal?.aborted) {
|
|
521
|
+
throw new Error('scan cancelled by operator');
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
function findSymbolId(symbols, file, startLine) {
|
|
526
|
+
return symbols.find((s) => s.file === file && s.range.startLine === startLine)?.id;
|
|
527
|
+
}
|
|
528
|
+
function stripPrefix(file, prefix) {
|
|
529
|
+
return file.startsWith(prefix) ? file.slice(prefix.length) : file;
|
|
530
|
+
}
|
|
531
|
+
/**
|
|
532
|
+
* Run shared-db-write + api-race once across EVERY package in a
|
|
533
|
+
* monorepo so cross-service races (different packages writing the
|
|
534
|
+
* same DB column / hitting the same endpoint) are caught. The per-
|
|
535
|
+
* workspace pass cannot see them because each package has only one
|
|
536
|
+
* writer locally — the race lives at the merged-set level.
|
|
537
|
+
*
|
|
538
|
+
* Evidence file paths are emitted as `packages/<pkg>/src/...`
|
|
539
|
+
* (workspace-relative against the monorepo root), so the dashboard
|
|
540
|
+
* shows the literal filesystem location of each writer.
|
|
541
|
+
*/
|
|
542
|
+
async function runCrossWorkspaceRaceDetectors(workspaces, emit) {
|
|
543
|
+
if (workspaces.length < 2)
|
|
544
|
+
return [];
|
|
545
|
+
const project = new Project({
|
|
546
|
+
skipAddingFilesFromTsConfig: true,
|
|
547
|
+
skipFileDependencyResolution: true,
|
|
548
|
+
compilerOptions: { allowJs: true, jsx: 4 /* preserve */ },
|
|
549
|
+
});
|
|
550
|
+
for (const ws of workspaces) {
|
|
551
|
+
project.addSourceFilesAtPaths([
|
|
552
|
+
`${ws.rootAbs}/**/*.{ts,tsx,mts,cts,js,jsx,mjs,cjs}`,
|
|
553
|
+
`!${ws.rootAbs}/**/node_modules/**`,
|
|
554
|
+
`!${ws.rootAbs}/**/dist/**`,
|
|
555
|
+
`!${ws.rootAbs}/**/build/**`,
|
|
556
|
+
]);
|
|
557
|
+
}
|
|
558
|
+
// Common root for all workspaces — used so detector evidence paths
|
|
559
|
+
// come out as `packages/<pkg>/src/...` instead of an absolute path.
|
|
560
|
+
const root = commonAncestor(workspaces.map((w) => w.rootAbs));
|
|
561
|
+
// Map each workspace's absolute path (made relative to `root`) to
|
|
562
|
+
// the workspace name. Used to bucket finding evidence per workspace
|
|
563
|
+
// so we can drop intra-workspace findings — those are already
|
|
564
|
+
// emitted by the per-workspace pass and would otherwise double-count.
|
|
565
|
+
const wsByRelRoot = new Map();
|
|
566
|
+
for (const ws of workspaces) {
|
|
567
|
+
const rel = path.relative(root, ws.rootAbs);
|
|
568
|
+
wsByRelRoot.set(rel === '' ? '.' : rel, ws.name);
|
|
569
|
+
}
|
|
570
|
+
const out = [];
|
|
571
|
+
emit({ state: 'detecting', detector: 'cross-shared-db-write' });
|
|
572
|
+
out.push(...detectSharedDbWrites({ workspaceRoot: root, files: [], project })
|
|
573
|
+
.filter((f) => spansMultipleWorkspaces(f, wsByRelRoot))
|
|
574
|
+
.map(tagCross));
|
|
575
|
+
emit({ state: 'detecting', detector: 'cross-api-race' });
|
|
576
|
+
out.push(...detectApiRaces({ workspaceRoot: root, files: [], project })
|
|
577
|
+
.filter((f) => spansMultipleWorkspaces(f, wsByRelRoot))
|
|
578
|
+
.map(tagCross));
|
|
579
|
+
return out;
|
|
580
|
+
}
|
|
581
|
+
/**
|
|
582
|
+
* True when the finding's evidence covers ≥ 2 distinct workspaces.
|
|
583
|
+
* Used to keep the cross-workspace pass from re-emitting findings the
|
|
584
|
+
* per-workspace pass already produced — those have all their evidence
|
|
585
|
+
* under a single workspace name and would otherwise show up twice
|
|
586
|
+
* (once workspace-namespaced, once with the `cross-ws:` prefix).
|
|
587
|
+
*/
|
|
588
|
+
function spansMultipleWorkspaces(finding, wsByRelRoot) {
|
|
589
|
+
const wsHit = new Set();
|
|
590
|
+
for (const ev of finding.evidence) {
|
|
591
|
+
const file = ev.file.split('\\').join('/');
|
|
592
|
+
for (const [relRoot, name] of wsByRelRoot) {
|
|
593
|
+
const prefix = relRoot === '.' ? '' : `${relRoot}/`;
|
|
594
|
+
if (relRoot === '.' || file.startsWith(prefix)) {
|
|
595
|
+
wsHit.add(name);
|
|
596
|
+
break;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
if (wsHit.size >= 2)
|
|
600
|
+
return true;
|
|
601
|
+
}
|
|
602
|
+
return wsHit.size >= 2;
|
|
603
|
+
}
|
|
604
|
+
function tagCross(f) {
|
|
605
|
+
// Distinct fingerprint prefix so cross-workspace findings never
|
|
606
|
+
// collide with same-detector findings from the per-workspace pass.
|
|
607
|
+
return { ...f, fingerprint: `cross-ws:${f.fingerprint}` };
|
|
608
|
+
}
|
|
609
|
+
function commonAncestor(paths) {
|
|
610
|
+
if (paths.length === 0)
|
|
611
|
+
return '';
|
|
612
|
+
if (paths.length === 1)
|
|
613
|
+
return paths[0];
|
|
614
|
+
const split = paths.map((p) => p.split('/'));
|
|
615
|
+
const min = Math.min(...split.map((s) => s.length));
|
|
616
|
+
const out = [];
|
|
617
|
+
for (let i = 0; i < min; i++) {
|
|
618
|
+
const seg = split[0][i];
|
|
619
|
+
if (split.every((s) => s[i] === seg))
|
|
620
|
+
out.push(seg);
|
|
621
|
+
else
|
|
622
|
+
break;
|
|
623
|
+
}
|
|
624
|
+
return out.join('/') || '/';
|
|
625
|
+
}
|
|
626
|
+
async function runWorkspaceLocalDetectors(ctx) {
|
|
627
|
+
const findings = [];
|
|
628
|
+
const files = ctx.files;
|
|
629
|
+
const symbolsArr = [...ctx.symbols];
|
|
630
|
+
const importsArr = [...ctx.imports];
|
|
631
|
+
logger.info({ files: files.length }, 'RotHunter: running detector dead-handler');
|
|
632
|
+
ctx.emit({ state: 'detecting', detector: 'dead-handler' });
|
|
633
|
+
findings.push(...detectDeadHandlers({ files, iacEntries: ctx.iacEntries, imports: importsArr }));
|
|
634
|
+
// Shared ts-morph Project — 1 parse pass reused by every file-walking
|
|
635
|
+
// detector below. Avoids 7+ duplicate parses on the same tree.
|
|
636
|
+
const { Project: SharedProject } = await import('ts-morph');
|
|
637
|
+
const sharedProject = new SharedProject({
|
|
638
|
+
skipAddingFilesFromTsConfig: true,
|
|
639
|
+
skipFileDependencyResolution: true,
|
|
640
|
+
});
|
|
641
|
+
for (const rel of files) {
|
|
642
|
+
sharedProject.addSourceFileAtPathIfExists(path.join(ctx.workspaceRoot, rel));
|
|
643
|
+
}
|
|
644
|
+
const run = (id, fn) => {
|
|
645
|
+
logger.info({ files: files.length }, `RotHunter: running detector ${id}`);
|
|
646
|
+
ctx.emit({ state: 'detecting', detector: id });
|
|
647
|
+
findings.push(...fn());
|
|
648
|
+
};
|
|
649
|
+
run('mutation', () => detectMutations({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
|
|
650
|
+
run('race-condition', () => detectRaceConditions({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
|
|
651
|
+
run('shared-db-write', () => detectSharedDbWrites({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
|
|
652
|
+
run('api-race', () => detectApiRaces({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
|
|
653
|
+
run('bad-config', () => detectBadConfig({ workspaceRoot: ctx.workspaceRoot, files }));
|
|
654
|
+
run('silent-catch', () => detectSilentCatches({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
|
|
655
|
+
run('skip-tests', () => detectSkipTests({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
|
|
656
|
+
run('long-file', () => detectLongFiles({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
|
|
657
|
+
run('console-log-prod', () => detectConsoleLogsInProd({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
|
|
658
|
+
run('magic-numbers', () => detectMagicNumbers({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
|
|
659
|
+
run('mutable-globals', () => detectMutableGlobals({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
|
|
660
|
+
run('unused-deps', () => detectUnusedDeps({ workspaceRoot: ctx.workspaceRoot, imports: importsArr }));
|
|
661
|
+
run('similar-functions', () => detectSimilarFunctions({ workspaceRoot: ctx.workspaceRoot, symbols: symbolsArr }));
|
|
662
|
+
// todo-comments does its own workspace walk so it picks up Python / Go /
|
|
663
|
+
// shell sources the TS parser skips. No `files` arg by design.
|
|
664
|
+
run('todo-comments', () => detectTodoComments({ workspaceRoot: ctx.workspaceRoot }));
|
|
665
|
+
return findings;
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* Apply a "cluster-style" LLM verdict to a finding. Used for the four
|
|
669
|
+
* detectors whose LLM confirmer returns a positive/negative boolean
|
|
670
|
+
* with a confidence: api-race / shared-db-write / race-condition (race
|
|
671
|
+
* vs safe) and mutation (bug-shaped vs intentional — caller maps
|
|
672
|
+
* `!intentional` to `positive`). Shared body keeps the score/severity/
|
|
673
|
+
* description bookkeeping in one place. Duplicate-type / duplicate-
|
|
674
|
+
* function use a different formula (1 - conf) and stay inline.
|
|
675
|
+
*/
|
|
676
|
+
export function applyClusterVerdict(finding, verdict, opts) {
|
|
677
|
+
const confTxt = verdict.confidence.toFixed(2);
|
|
678
|
+
if (verdict.positive) {
|
|
679
|
+
finding.confidence = Math.min(0.95, Math.max(finding.confidence, verdict.confidence));
|
|
680
|
+
finding.description += `\n\n**LLM verdict:** ${opts.positiveLabel} — ${verdict.reason} (confidence ${confTxt})`;
|
|
681
|
+
if (finding.severity === 'medium' && verdict.confidence >= 0.85) {
|
|
682
|
+
finding.severity = 'high';
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
else {
|
|
686
|
+
finding.confidence = Math.max(0.0, finding.confidence * (1 - verdict.confidence));
|
|
687
|
+
finding.description += `\n\n**LLM verdict:** ${opts.negativeLabel} — ${verdict.reason} (confidence ${confTxt})`;
|
|
688
|
+
if (finding.confidence < opts.threshold)
|
|
689
|
+
finding.severity = 'low';
|
|
690
|
+
// High-confidence "intentional" / "not real" verdict — auto-route to
|
|
691
|
+
// the FP bucket so the user does not have to manually mark each one.
|
|
692
|
+
// The detector pattern matched but the LLM saw the surrounding intent
|
|
693
|
+
// (accumulator parameter, deliberate-swallow comment, framework idiom,
|
|
694
|
+
// …). Surfacing it as an open finding teaches the user that high
|
|
695
|
+
// verdict confidence means nothing — exactly the rothunter-vs-lint
|
|
696
|
+
// differentiator we want to preserve.
|
|
697
|
+
if (verdict.confidence >= (opts.autoFpThreshold ?? LLM_FP_THRESHOLD)) {
|
|
698
|
+
finding.llmFalsePositive = {
|
|
699
|
+
confidence: verdict.confidence,
|
|
700
|
+
reason: verdict.reason,
|
|
701
|
+
};
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
finding.layer = 3;
|
|
705
|
+
}
|
|
706
|
+
/**
|
|
707
|
+
* Verdict-confidence floor at which a negative LLM verdict moves a
|
|
708
|
+
* finding into the auto-FP bucket. Set low (0.6) so any reasonably
|
|
709
|
+
* confident "intentional / FP" verdict routes the finding out of the
|
|
710
|
+
* open list — the operator's stated preference is "if the LLM says
|
|
711
|
+
* FP, treat it as auto FP, I'll un-mark if it's wrong". Below 0.6 the
|
|
712
|
+
* LLM is genuinely undecided and the deterministic finding stays in
|
|
713
|
+
* the open list at degraded confidence.
|
|
714
|
+
*/
|
|
715
|
+
export const LLM_FP_THRESHOLD = 0.6;
|
|
716
|
+
/**
|
|
717
|
+
* Build per-detector structural context to attach to a TriageConfirmer
|
|
718
|
+
* call. The shape is free-form text — the LLM reads it alongside the
|
|
719
|
+
* primary evidence snippet — so we can evolve enrichment without
|
|
720
|
+
* version-coupling the triage schema. Returns `undefined` when no
|
|
721
|
+
* useful context is available so the prompt stays compact.
|
|
722
|
+
*/
|
|
723
|
+
export function buildTriageContext(finding, symbolById, workspaceRoot) {
|
|
724
|
+
const ev = finding.evidence[0];
|
|
725
|
+
if (!ev)
|
|
726
|
+
return undefined;
|
|
727
|
+
const parts = [];
|
|
728
|
+
// Project conventions block: nearest CLAUDE.md walking up from the
|
|
729
|
+
// evidence file. Universally prepended to every triage call — it is
|
|
730
|
+
// the single biggest signal for "is this pattern intentional in
|
|
731
|
+
// THIS codebase". A rule like "three similar lines better than
|
|
732
|
+
// premature abstraction" turns duplicate-function on Commander
|
|
733
|
+
// command registrations into an auto-FP without per-detector code.
|
|
734
|
+
if (workspaceRoot) {
|
|
735
|
+
const conv = readProjectConventions(workspaceRoot, ev.file);
|
|
736
|
+
if (conv) {
|
|
737
|
+
parts.push(`Project conventions (concatenated from CLAUDE.md / AGENTS.md / .cursorrules / copilot-instructions.md / CONTRIBUTING.md / … as present — treat as authoritative for this codebase, override generic best-practice when they conflict):\n${conv}`);
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
// Per-detector structural hints.
|
|
741
|
+
const detectorContext = buildDetectorContext(finding, ev, symbolById, workspaceRoot);
|
|
742
|
+
if (detectorContext)
|
|
743
|
+
parts.push(detectorContext);
|
|
744
|
+
if (parts.length === 0)
|
|
745
|
+
return undefined;
|
|
746
|
+
return parts.join('\n\n');
|
|
747
|
+
}
|
|
748
|
+
function buildDetectorContext(finding, ev, symbolById, workspaceRoot) {
|
|
749
|
+
if (finding.detectorId === 'dead-export') {
|
|
750
|
+
return buildDeadExportContext(finding, ev.file, symbolById, workspaceRoot);
|
|
751
|
+
}
|
|
752
|
+
if (finding.detectorId === 'magic-numbers' && workspaceRoot) {
|
|
753
|
+
return buildMagicNumbersContext(ev.file, ev.range.startLine, workspaceRoot, symbolById);
|
|
754
|
+
}
|
|
755
|
+
if (finding.detectorId === 'hot-hub-file') {
|
|
756
|
+
return 'This file is being flagged as an import hub. Decide whether the project deliberately keeps it as a single import surface (barrel / type surface) or whether it accumulates unrelated concerns.';
|
|
757
|
+
}
|
|
758
|
+
if (finding.detectorId === 'long-file') {
|
|
759
|
+
return 'Look at the snippet shape: a recognizer / config / pattern TABLE is single-concern locality and FALSE positive; mixed unrelated logic accumulating across many features is REAL.';
|
|
760
|
+
}
|
|
761
|
+
if (finding.detectorId === 'todo-comments') {
|
|
762
|
+
return 'Discriminate actionable TODO / FIXME / HACK / XXX from documentary NOTE comments. A NOTE that explains a design decision in adjacent code is documentation, not technical debt.';
|
|
763
|
+
}
|
|
764
|
+
return undefined;
|
|
765
|
+
}
|
|
766
|
+
/**
|
|
767
|
+
* For a magic-numbers finding, return: the enclosing function /
|
|
768
|
+
* method signature (so the LLM sees what domain the literal is in),
|
|
769
|
+
* an ±8 line code window, and the leading JSDoc-style comment block
|
|
770
|
+
* if one is present immediately above the enclosing function. The
|
|
771
|
+
* snippet the detector emits is only the matching line — context is
|
|
772
|
+
* too thin for the LLM to judge whether `12`, `127`, or `425` is a
|
|
773
|
+
* domain constant, a regex internal, or a real magic number.
|
|
774
|
+
*/
|
|
775
|
+
function buildMagicNumbersContext(file, line, workspaceRoot, symbolById) {
|
|
776
|
+
let raw;
|
|
777
|
+
try {
|
|
778
|
+
raw = fs.readFileSync(path.join(workspaceRoot, file), 'utf-8');
|
|
779
|
+
}
|
|
780
|
+
catch {
|
|
781
|
+
return undefined;
|
|
782
|
+
}
|
|
783
|
+
const lines = raw.split('\n');
|
|
784
|
+
if (line < 1 || line > lines.length)
|
|
785
|
+
return undefined;
|
|
786
|
+
// Find the symbol that contains this line — gives us the enclosing
|
|
787
|
+
// function/method signature regardless of indentation depth.
|
|
788
|
+
let enclosingSig;
|
|
789
|
+
let enclosingDoc;
|
|
790
|
+
for (const s of symbolById.values()) {
|
|
791
|
+
if (s.file !== file)
|
|
792
|
+
continue;
|
|
793
|
+
if (line < s.range.startLine || line > s.range.endLine)
|
|
794
|
+
continue;
|
|
795
|
+
// Prefer the tightest match (deepest nesting).
|
|
796
|
+
if (enclosingSig &&
|
|
797
|
+
(s.range.endLine - s.range.startLine) >
|
|
798
|
+
(lines.findIndex((_, i) => i + 1 === line) - s.range.startLine)) {
|
|
799
|
+
continue;
|
|
800
|
+
}
|
|
801
|
+
enclosingSig = (lines[s.range.startLine - 1] ?? '').trim();
|
|
802
|
+
// Walk upward from the symbol decl for a contiguous comment block
|
|
803
|
+
// — JSDoc usually lives on the line(s) immediately above the
|
|
804
|
+
// signature.
|
|
805
|
+
const docLines = [];
|
|
806
|
+
for (let i = s.range.startLine - 2; i >= 0; i--) {
|
|
807
|
+
const t = (lines[i] ?? '').trim();
|
|
808
|
+
if (t === '' || (!t.startsWith('//') && !t.startsWith('*') && !t.startsWith('/*')))
|
|
809
|
+
break;
|
|
810
|
+
docLines.unshift(t);
|
|
811
|
+
}
|
|
812
|
+
if (docLines.length > 0)
|
|
813
|
+
enclosingDoc = docLines.join('\n');
|
|
814
|
+
}
|
|
815
|
+
// Pull ±8 lines around the literal for surrounding context.
|
|
816
|
+
const winFrom = Math.max(0, line - 1 - 8);
|
|
817
|
+
const winTo = Math.min(lines.length, line - 1 + 8);
|
|
818
|
+
const window = lines
|
|
819
|
+
.slice(winFrom, winTo + 1)
|
|
820
|
+
.map((l, i) => `${winFrom + i + 1 === line ? '>' : ' '} ${winFrom + i + 1}: ${l}`)
|
|
821
|
+
.join('\n');
|
|
822
|
+
const parts = [];
|
|
823
|
+
if (enclosingSig)
|
|
824
|
+
parts.push(`Enclosing function signature:\n\`${enclosingSig}\``);
|
|
825
|
+
if (enclosingDoc)
|
|
826
|
+
parts.push(`Doc comment on the enclosing function:\n${enclosingDoc}`);
|
|
827
|
+
parts.push(`Code window (\`>\` marks the flagged line):\n\`\`\`\n${window}\n\`\`\``);
|
|
828
|
+
parts.push('Decide using the enclosing function + module name. If the literal is a domain constant local to this validator / encoder / parser (base58 lengths, IPv4 octets, ASCII boundary 127, retry-backoff thresholds, framework status codes) the answer is FALSE — naming each one inflates the binding count without clarifying anything. Flag REAL only when the literal is genuinely opaque business logic that a reader would have to guess about.');
|
|
829
|
+
return parts.join('\n\n');
|
|
830
|
+
}
|
|
831
|
+
/**
|
|
832
|
+
* Render up to 6 sibling exports from the same file as signature
|
|
833
|
+
* snippets so the LLM can answer "is this type-surface reachable
|
|
834
|
+
* through another exported symbol's signature?" — a question pure
|
|
835
|
+
* named-import counting can't answer.
|
|
836
|
+
*/
|
|
837
|
+
function buildDeadExportContext(finding, file, symbolById, workspaceRoot) {
|
|
838
|
+
// Extract the export name from the title — detector emits
|
|
839
|
+
// `Unused export: <name> in <file>`.
|
|
840
|
+
const m = /Unused export:\s*(\S+)/i.exec(finding.title);
|
|
841
|
+
const targetName = m?.[1];
|
|
842
|
+
const siblings = [];
|
|
843
|
+
for (const s of symbolById.values()) {
|
|
844
|
+
if (s.file !== file)
|
|
845
|
+
continue;
|
|
846
|
+
if (!s.exported)
|
|
847
|
+
continue;
|
|
848
|
+
if (s.name === targetName)
|
|
849
|
+
continue;
|
|
850
|
+
// First non-blank line of the source — usually the declaration
|
|
851
|
+
// signature for interfaces / functions / classes.
|
|
852
|
+
const firstLine = s.source.split('\n').find((ln) => ln.trim().length > 0) ?? '';
|
|
853
|
+
if (firstLine)
|
|
854
|
+
siblings.push(`- ${s.kind} \`${s.name}\`: \`${firstLine.trim().slice(0, 160)}\``);
|
|
855
|
+
if (siblings.length >= 6)
|
|
856
|
+
break;
|
|
857
|
+
}
|
|
858
|
+
const parts = [];
|
|
859
|
+
if (siblings.length > 0) {
|
|
860
|
+
parts.push(`Other exports in the same file (\`${file}\`):\n${siblings.join('\n')}\n\nIf \`${targetName ?? 'this symbol'}\` appears in any of those signatures (return type, parameter, generic constraint, extends clause) it is reachable through the public type surface and a FALSE positive.`);
|
|
861
|
+
}
|
|
862
|
+
// Published-library mode: when the workspace ships as an npm package
|
|
863
|
+
// (has name + version, not private, declares main/module/exports/bin),
|
|
864
|
+
// every top-level export is potentially public API surface for
|
|
865
|
+
// downstream consumers. The detector cannot statically see those
|
|
866
|
+
// consumers — they live in other repos — so the LLM has to weigh
|
|
867
|
+
// "looks like part of a public utility set" against "genuinely dead
|
|
868
|
+
// internal helper". Tell it which workspace shape we're in.
|
|
869
|
+
if (workspaceRoot && isPublishedLibrary(workspaceRoot)) {
|
|
870
|
+
parts.push(`Workspace shape: PUBLISHED npm LIBRARY (package.json has name + version, not private, declares an entry surface). Downstream consumers in OTHER repositories may import \`${targetName ?? 'this symbol'}\` even though no file inside THIS repo does. Lean toward FALSE positive when the symbol fits the library's domain (env-helper symmetry alongside other exports, types matching the package theme, utility functions named consistently with the published API) AND there is no obvious sign it is a stranded internal leftover (no \`@deprecated\` JSDoc, no \`unused-\` / \`legacy\` naming, no half-baked TODO).`);
|
|
871
|
+
}
|
|
872
|
+
if (parts.length === 0)
|
|
873
|
+
return undefined;
|
|
874
|
+
return parts.join('\n\n');
|
|
875
|
+
}
|
|
876
|
+
/**
|
|
877
|
+
* Parse the detector-emitted `evidence.note` JSON payload. Detectors pack
|
|
878
|
+
* structured cluster metadata here (method/path/client for api-race,
|
|
879
|
+
* entity/column/adapter for shared-db-write, target/pattern/enclosingName
|
|
880
|
+
* for race-condition / mutation). Returns `{}` on missing/invalid JSON so
|
|
881
|
+
* callers can safely destructure optional fields.
|
|
882
|
+
*/
|
|
883
|
+
function parseEvidenceNote(ev) {
|
|
884
|
+
if (!ev?.note)
|
|
885
|
+
return {};
|
|
886
|
+
try {
|
|
887
|
+
const parsed = JSON.parse(ev.note);
|
|
888
|
+
return (parsed && typeof parsed === 'object' ? parsed : {});
|
|
889
|
+
}
|
|
890
|
+
catch {
|
|
891
|
+
return {};
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
/**
|
|
895
|
+
* Best-effort human-facing cluster label for the SSE verdict stream.
|
|
896
|
+
* Derived from structured evidence notes (never from `finding.title` —
|
|
897
|
+
* see the api-race / shared-db-write rationale in processOne).
|
|
898
|
+
*/
|
|
899
|
+
function clusterLabel(finding) {
|
|
900
|
+
const first = finding.evidence[0];
|
|
901
|
+
if (!first)
|
|
902
|
+
return undefined;
|
|
903
|
+
const note = parseEvidenceNote(first);
|
|
904
|
+
if (note.method && note.pathPattern)
|
|
905
|
+
return `${note.method} ${note.pathPattern}`;
|
|
906
|
+
if (note.entity && note.column)
|
|
907
|
+
return `${note.entity}.${note.column}`;
|
|
908
|
+
if (note.target)
|
|
909
|
+
return note.target;
|
|
910
|
+
return undefined;
|
|
911
|
+
}
|
|
912
|
+
// Detectors with no dedicated cluster confirmer that still benefit
|
|
913
|
+
// from a real-vs-false-positive LLM triage. Adding a detector here
|
|
914
|
+
// routes its medium / high findings through `TriageConfirmer` in
|
|
915
|
+
// processOne.
|
|
916
|
+
const TRIAGE_DETECTORS = new Set([
|
|
917
|
+
'silent-catch',
|
|
918
|
+
'public-any',
|
|
919
|
+
'mutable-globals',
|
|
920
|
+
'bad-config',
|
|
921
|
+
'long-function',
|
|
922
|
+
'long-file',
|
|
923
|
+
'magic-numbers',
|
|
924
|
+
'hot-hub-file',
|
|
925
|
+
'todo-comments',
|
|
926
|
+
// Reachability detectors: deterministic check misses framework
|
|
927
|
+
// conventions, dynamic loaders, structural type-surface — LLM with
|
|
928
|
+
// a sibling-signature / importer-count snippet handles those FPs
|
|
929
|
+
// far better than per-detector hand-coded rules.
|
|
930
|
+
'dead-export',
|
|
931
|
+
'dead-module',
|
|
932
|
+
'dead-handler',
|
|
933
|
+
'dead-api',
|
|
934
|
+
// Similar-functions has a high syntactic-only FP rate — two unrelated
|
|
935
|
+
// helpers can share an AST shape (template-literal builders, Commander
|
|
936
|
+
// command registrations) without being refactor candidates. Route
|
|
937
|
+
// medium-high findings through the triage confirmer so the LLM
|
|
938
|
+
// judges semantic relatedness, not just shape similarity.
|
|
939
|
+
'similar-functions',
|
|
940
|
+
]);
|
|
941
|
+
/**
|
|
942
|
+
* Subset of TRIAGE_DETECTORS that get an LLM verdict on EVERY finding,
|
|
943
|
+
* including low severity. These are detectors whose FP rate is high
|
|
944
|
+
* even at the low tier — reachability misses, design-intent flags,
|
|
945
|
+
* NOTE-vs-TODO discrimination — and the LLM cost is justified by the
|
|
946
|
+
* noise reduction.
|
|
947
|
+
*
|
|
948
|
+
* For all other TRIAGE detectors the gate stays at `severity !== 'low'`
|
|
949
|
+
* so we don't burn LLM calls on the deterministic-noise tier.
|
|
950
|
+
*/
|
|
951
|
+
const ALWAYS_TRIAGE_DETECTORS = new Set([
|
|
952
|
+
'dead-export',
|
|
953
|
+
'dead-module',
|
|
954
|
+
'dead-handler',
|
|
955
|
+
'dead-api',
|
|
956
|
+
'todo-comments',
|
|
957
|
+
'hot-hub-file',
|
|
958
|
+
'long-file',
|
|
959
|
+
// long-function findings are emitted at 'low' severity but their
|
|
960
|
+
// FP rate is heavily project-shape dependent: linear handlers /
|
|
961
|
+
// composition-root components / state-machine bodies are legitimate
|
|
962
|
+
// at 80–120 LOC in some projects and sin in others. The project's
|
|
963
|
+
// own CLAUDE.md decides — and the only signal that surfaces that is
|
|
964
|
+
// the LLM with project conventions in scope.
|
|
965
|
+
'long-function',
|
|
966
|
+
// Magic-numbers deterministic pass already cuts ~70% of FPs. The
|
|
967
|
+
// remainder is domain-thresholds, byte-counts, ASCII boundaries —
|
|
968
|
+
// every one a judgement call that the LLM can answer with a snippet.
|
|
969
|
+
// Volume stays low because the per-file cap is 5.
|
|
970
|
+
'magic-numbers',
|
|
971
|
+
]);
|
|
972
|
+
/**
|
|
973
|
+
* Decide whether a finding is borderline enough to warrant LLM confirmation.
|
|
974
|
+
*
|
|
975
|
+
* Always-confirm cases:
|
|
976
|
+
* - Layer 2 (normalized-names) — synonym/normalization map can produce false matches.
|
|
977
|
+
* - Layer 1 structural — anonymous type collisions are the largest FP source.
|
|
978
|
+
* - Layer 1 strict but the cluster spans ≥2 distinct names AND ≤3 fields. Small
|
|
979
|
+
* `{id, name}`-style shapes regularly collide across unrelated DTOs and need
|
|
980
|
+
* a semantic check (the smoke case Template/RegistryAuth/Document/Catalog).
|
|
981
|
+
*/
|
|
982
|
+
function requiresLlmConfirmation(finding, symbolById) {
|
|
983
|
+
// Mutation findings always get the LLM intent check — even Tier 1
|
|
984
|
+
// strict matches are borderline by nature ("is this mutation intentional?").
|
|
985
|
+
if (finding.detectorId === 'mutation')
|
|
986
|
+
return true;
|
|
987
|
+
// Race-condition findings always get an LLM race-vs-safe verdict —
|
|
988
|
+
// Tier 1 cannot distinguish mutex / single-flight / scoped state from
|
|
989
|
+
// genuine races.
|
|
990
|
+
if (finding.detectorId === 'race-condition')
|
|
991
|
+
return true;
|
|
992
|
+
// shared-db-write findings always get an LLM cross-flow verdict —
|
|
993
|
+
// Tier 1 cannot distinguish single-owner / transaction-wrapped / init-
|
|
994
|
+
// only / idempotent writes from genuine cross-service races.
|
|
995
|
+
if (finding.detectorId === 'shared-db-write')
|
|
996
|
+
return true;
|
|
997
|
+
// api-race findings always get an LLM cross-flow verdict — Tier 1
|
|
998
|
+
// cannot distinguish test fixtures / retry wrappers / idempotent
|
|
999
|
+
// payloads / etag-locked writes from genuine HTTP races.
|
|
1000
|
+
if (finding.detectorId === 'api-race')
|
|
1001
|
+
return true;
|
|
1002
|
+
// Detectors with no cluster confirmer but a high FP rate. Routed to
|
|
1003
|
+
// the generic TriageConfirmer for a real/false verdict + reason.
|
|
1004
|
+
// Two-tier gate: `ALWAYS_TRIAGE_DETECTORS` triages every finding
|
|
1005
|
+
// (reachability + design-intent — high FP even at low tier);
|
|
1006
|
+
// remaining TRIAGE detectors stay capped at medium+ so we don't
|
|
1007
|
+
// burn LLM calls on deterministic noise.
|
|
1008
|
+
if (TRIAGE_DETECTORS.has(finding.detectorId)) {
|
|
1009
|
+
if (ALWAYS_TRIAGE_DETECTORS.has(finding.detectorId))
|
|
1010
|
+
return true;
|
|
1011
|
+
if (finding.severity !== 'low')
|
|
1012
|
+
return true;
|
|
1013
|
+
}
|
|
1014
|
+
if (finding.detectorId !== 'duplicate-type' && finding.detectorId !== 'duplicate-function')
|
|
1015
|
+
return false;
|
|
1016
|
+
if (finding.layer >= 2)
|
|
1017
|
+
return true;
|
|
1018
|
+
if (finding.confidence < 0.95)
|
|
1019
|
+
return true;
|
|
1020
|
+
const ids = finding.evidence
|
|
1021
|
+
.map((ev) => findSymbolIdForEvidence(symbolById, ev.file, ev.range.startLine))
|
|
1022
|
+
.filter((id) => Boolean(id));
|
|
1023
|
+
const symbols = ids
|
|
1024
|
+
.map((id) => symbolById.get(id))
|
|
1025
|
+
.filter((s) => Boolean(s));
|
|
1026
|
+
const distinctNames = new Set(symbols.map((s) => s.name)).size;
|
|
1027
|
+
const firstStruct = symbols[0]?.structure;
|
|
1028
|
+
const fieldCount = firstStruct && 'fields' in firstStruct ? firstStruct.fields?.length ?? 0 : 0;
|
|
1029
|
+
return distinctNames >= 2 && fieldCount > 0 && fieldCount <= 3;
|
|
1030
|
+
}
|
|
1031
|
+
function findSymbolIdForEvidence(symbolById, file, startLine) {
|
|
1032
|
+
for (const s of symbolById.values()) {
|
|
1033
|
+
if (s.file === file && s.range.startLine === startLine)
|
|
1034
|
+
return s.id;
|
|
1035
|
+
}
|
|
1036
|
+
return undefined;
|
|
1037
|
+
}
|
|
1038
|
+
//# sourceMappingURL=rothunter.js.map
|