@lbroth/rothunter 1.0.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +141 -0
  3. package/dist/adapters/llm.d.ts +68 -0
  4. package/dist/adapters/llm.d.ts.map +1 -0
  5. package/dist/adapters/llm.js +189 -0
  6. package/dist/adapters/llm.js.map +1 -0
  7. package/dist/config.d.ts +37 -0
  8. package/dist/config.d.ts.map +1 -0
  9. package/dist/config.js +81 -0
  10. package/dist/config.js.map +1 -0
  11. package/dist/detector-registry.d.ts +32 -0
  12. package/dist/detector-registry.d.ts.map +1 -0
  13. package/dist/detector-registry.js +74 -0
  14. package/dist/detector-registry.js.map +1 -0
  15. package/dist/detectors/api-race.d.ts +6 -0
  16. package/dist/detectors/api-race.d.ts.map +1 -0
  17. package/dist/detectors/api-race.js +222 -0
  18. package/dist/detectors/api-race.js.map +1 -0
  19. package/dist/detectors/bad-config.d.ts +6 -0
  20. package/dist/detectors/bad-config.d.ts.map +1 -0
  21. package/dist/detectors/bad-config.js +529 -0
  22. package/dist/detectors/bad-config.js.map +1 -0
  23. package/dist/detectors/console-log-prod.d.ts +6 -0
  24. package/dist/detectors/console-log-prod.d.ts.map +1 -0
  25. package/dist/detectors/console-log-prod.js +72 -0
  26. package/dist/detectors/console-log-prod.js.map +1 -0
  27. package/dist/detectors/dead-api.d.ts +10 -0
  28. package/dist/detectors/dead-api.d.ts.map +1 -0
  29. package/dist/detectors/dead-api.js +115 -0
  30. package/dist/detectors/dead-api.js.map +1 -0
  31. package/dist/detectors/dead-export.d.ts +12 -0
  32. package/dist/detectors/dead-export.d.ts.map +1 -0
  33. package/dist/detectors/dead-export.js +140 -0
  34. package/dist/detectors/dead-export.js.map +1 -0
  35. package/dist/detectors/dead-handler.d.ts +12 -0
  36. package/dist/detectors/dead-handler.d.ts.map +1 -0
  37. package/dist/detectors/dead-handler.js +40 -0
  38. package/dist/detectors/dead-handler.js.map +1 -0
  39. package/dist/detectors/dead-module.d.ts +14 -0
  40. package/dist/detectors/dead-module.d.ts.map +1 -0
  41. package/dist/detectors/dead-module.js +50 -0
  42. package/dist/detectors/dead-module.js.map +1 -0
  43. package/dist/detectors/deep-nesting.d.ts +12 -0
  44. package/dist/detectors/deep-nesting.d.ts.map +1 -0
  45. package/dist/detectors/deep-nesting.js +133 -0
  46. package/dist/detectors/deep-nesting.js.map +1 -0
  47. package/dist/detectors/duplicate-function.d.ts +9 -0
  48. package/dist/detectors/duplicate-function.d.ts.map +1 -0
  49. package/dist/detectors/duplicate-function.js +199 -0
  50. package/dist/detectors/duplicate-function.js.map +1 -0
  51. package/dist/detectors/duplicate-type.d.ts +9 -0
  52. package/dist/detectors/duplicate-type.d.ts.map +1 -0
  53. package/dist/detectors/duplicate-type.js +166 -0
  54. package/dist/detectors/duplicate-type.js.map +1 -0
  55. package/dist/detectors/hot-hub-file.d.ts +11 -0
  56. package/dist/detectors/hot-hub-file.d.ts.map +1 -0
  57. package/dist/detectors/hot-hub-file.js +42 -0
  58. package/dist/detectors/hot-hub-file.js.map +1 -0
  59. package/dist/detectors/long-file.d.ts +12 -0
  60. package/dist/detectors/long-file.d.ts.map +1 -0
  61. package/dist/detectors/long-file.js +82 -0
  62. package/dist/detectors/long-file.js.map +1 -0
  63. package/dist/detectors/long-function.d.ts +12 -0
  64. package/dist/detectors/long-function.d.ts.map +1 -0
  65. package/dist/detectors/long-function.js +45 -0
  66. package/dist/detectors/long-function.js.map +1 -0
  67. package/dist/detectors/magic-numbers.d.ts +10 -0
  68. package/dist/detectors/magic-numbers.d.ts.map +1 -0
  69. package/dist/detectors/magic-numbers.js +332 -0
  70. package/dist/detectors/magic-numbers.js.map +1 -0
  71. package/dist/detectors/mutable-globals.d.ts +6 -0
  72. package/dist/detectors/mutable-globals.d.ts.map +1 -0
  73. package/dist/detectors/mutable-globals.js +95 -0
  74. package/dist/detectors/mutable-globals.js.map +1 -0
  75. package/dist/detectors/mutation.d.ts +11 -0
  76. package/dist/detectors/mutation.d.ts.map +1 -0
  77. package/dist/detectors/mutation.js +397 -0
  78. package/dist/detectors/mutation.js.map +1 -0
  79. package/dist/detectors/public-any.d.ts +6 -0
  80. package/dist/detectors/public-any.d.ts.map +1 -0
  81. package/dist/detectors/public-any.js +52 -0
  82. package/dist/detectors/public-any.js.map +1 -0
  83. package/dist/detectors/race-condition.d.ts +6 -0
  84. package/dist/detectors/race-condition.d.ts.map +1 -0
  85. package/dist/detectors/race-condition.js +608 -0
  86. package/dist/detectors/race-condition.js.map +1 -0
  87. package/dist/detectors/shared-db-write.d.ts +6 -0
  88. package/dist/detectors/shared-db-write.d.ts.map +1 -0
  89. package/dist/detectors/shared-db-write.js +656 -0
  90. package/dist/detectors/shared-db-write.js.map +1 -0
  91. package/dist/detectors/silent-catch.d.ts +6 -0
  92. package/dist/detectors/silent-catch.d.ts.map +1 -0
  93. package/dist/detectors/silent-catch.js +167 -0
  94. package/dist/detectors/silent-catch.js.map +1 -0
  95. package/dist/detectors/similar-functions.d.ts +15 -0
  96. package/dist/detectors/similar-functions.d.ts.map +1 -0
  97. package/dist/detectors/similar-functions.js +334 -0
  98. package/dist/detectors/similar-functions.js.map +1 -0
  99. package/dist/detectors/skip-tests.d.ts +6 -0
  100. package/dist/detectors/skip-tests.d.ts.map +1 -0
  101. package/dist/detectors/skip-tests.js +69 -0
  102. package/dist/detectors/skip-tests.js.map +1 -0
  103. package/dist/detectors/todo-comments.d.ts +29 -0
  104. package/dist/detectors/todo-comments.d.ts.map +1 -0
  105. package/dist/detectors/todo-comments.js +154 -0
  106. package/dist/detectors/todo-comments.js.map +1 -0
  107. package/dist/detectors/unused-deps.d.ts +8 -0
  108. package/dist/detectors/unused-deps.d.ts.map +1 -0
  109. package/dist/detectors/unused-deps.js +115 -0
  110. package/dist/detectors/unused-deps.js.map +1 -0
  111. package/dist/extraction/api-race-confirmer.d.ts +31 -0
  112. package/dist/extraction/api-race-confirmer.d.ts.map +1 -0
  113. package/dist/extraction/api-race-confirmer.js +110 -0
  114. package/dist/extraction/api-race-confirmer.js.map +1 -0
  115. package/dist/extraction/llm-confirmer.d.ts +25 -0
  116. package/dist/extraction/llm-confirmer.d.ts.map +1 -0
  117. package/dist/extraction/llm-confirmer.js +118 -0
  118. package/dist/extraction/llm-confirmer.js.map +1 -0
  119. package/dist/extraction/mutation-confirmer.d.ts +30 -0
  120. package/dist/extraction/mutation-confirmer.d.ts.map +1 -0
  121. package/dist/extraction/mutation-confirmer.js +73 -0
  122. package/dist/extraction/mutation-confirmer.js.map +1 -0
  123. package/dist/extraction/prompt-chunking.d.ts +37 -0
  124. package/dist/extraction/prompt-chunking.d.ts.map +1 -0
  125. package/dist/extraction/prompt-chunking.js +61 -0
  126. package/dist/extraction/prompt-chunking.js.map +1 -0
  127. package/dist/extraction/race-confirmer.d.ts +28 -0
  128. package/dist/extraction/race-confirmer.d.ts.map +1 -0
  129. package/dist/extraction/race-confirmer.js +68 -0
  130. package/dist/extraction/race-confirmer.js.map +1 -0
  131. package/dist/extraction/shared-db-write-confirmer.d.ts +31 -0
  132. package/dist/extraction/shared-db-write-confirmer.d.ts.map +1 -0
  133. package/dist/extraction/shared-db-write-confirmer.js +141 -0
  134. package/dist/extraction/shared-db-write-confirmer.js.map +1 -0
  135. package/dist/extraction/triage-confirmer.d.ts +59 -0
  136. package/dist/extraction/triage-confirmer.d.ts.map +1 -0
  137. package/dist/extraction/triage-confirmer.js +104 -0
  138. package/dist/extraction/triage-confirmer.js.map +1 -0
  139. package/dist/graph/cfg.d.ts +45 -0
  140. package/dist/graph/cfg.d.ts.map +1 -0
  141. package/dist/graph/cfg.js +198 -0
  142. package/dist/graph/cfg.js.map +1 -0
  143. package/dist/graph/decorator-entries.d.ts +2 -0
  144. package/dist/graph/decorator-entries.d.ts.map +1 -0
  145. package/dist/graph/decorator-entries.js +89 -0
  146. package/dist/graph/decorator-entries.js.map +1 -0
  147. package/dist/graph/entry-points.d.ts +12 -0
  148. package/dist/graph/entry-points.d.ts.map +1 -0
  149. package/dist/graph/entry-points.js +282 -0
  150. package/dist/graph/entry-points.js.map +1 -0
  151. package/dist/graph/handler-conventions.d.ts +2 -0
  152. package/dist/graph/handler-conventions.d.ts.map +1 -0
  153. package/dist/graph/handler-conventions.js +26 -0
  154. package/dist/graph/handler-conventions.js.map +1 -0
  155. package/dist/graph/iac-entries.d.ts +2 -0
  156. package/dist/graph/iac-entries.d.ts.map +1 -0
  157. package/dist/graph/iac-entries.js +123 -0
  158. package/dist/graph/iac-entries.js.map +1 -0
  159. package/dist/graph/import-graph.d.ts +48 -0
  160. package/dist/graph/import-graph.d.ts.map +1 -0
  161. package/dist/graph/import-graph.js +86 -0
  162. package/dist/graph/import-graph.js.map +1 -0
  163. package/dist/graph/monorepo-detect.d.ts +3 -0
  164. package/dist/graph/monorepo-detect.d.ts.map +1 -0
  165. package/dist/graph/monorepo-detect.js +166 -0
  166. package/dist/graph/monorepo-detect.js.map +1 -0
  167. package/dist/graph/tsconfig-paths.d.ts +23 -0
  168. package/dist/graph/tsconfig-paths.d.ts.map +1 -0
  169. package/dist/graph/tsconfig-paths.js +217 -0
  170. package/dist/graph/tsconfig-paths.js.map +1 -0
  171. package/dist/multi-workspace-scanner.d.ts +13 -0
  172. package/dist/multi-workspace-scanner.d.ts.map +1 -0
  173. package/dist/multi-workspace-scanner.js +130 -0
  174. package/dist/multi-workspace-scanner.js.map +1 -0
  175. package/dist/normalizers/type-normalizer.d.ts +16 -0
  176. package/dist/normalizers/type-normalizer.d.ts.map +1 -0
  177. package/dist/normalizers/type-normalizer.js +189 -0
  178. package/dist/normalizers/type-normalizer.js.map +1 -0
  179. package/dist/parsers/typescript-parser.d.ts +57 -0
  180. package/dist/parsers/typescript-parser.d.ts.map +1 -0
  181. package/dist/parsers/typescript-parser.js +502 -0
  182. package/dist/parsers/typescript-parser.js.map +1 -0
  183. package/dist/reporter/json-reporter.d.ts +12 -0
  184. package/dist/reporter/json-reporter.d.ts.map +1 -0
  185. package/dist/reporter/json-reporter.js +28 -0
  186. package/dist/reporter/json-reporter.js.map +1 -0
  187. package/dist/reporter/markdown-reporter.d.ts +11 -0
  188. package/dist/reporter/markdown-reporter.d.ts.map +1 -0
  189. package/dist/reporter/markdown-reporter.js +77 -0
  190. package/dist/reporter/markdown-reporter.js.map +1 -0
  191. package/dist/rothunter.d.ts +125 -0
  192. package/dist/rothunter.d.ts.map +1 -0
  193. package/dist/rothunter.js +1038 -0
  194. package/dist/rothunter.js.map +1 -0
  195. package/dist/server/false-positives.d.ts +34 -0
  196. package/dist/server/false-positives.d.ts.map +1 -0
  197. package/dist/server/false-positives.js +85 -0
  198. package/dist/server/false-positives.js.map +1 -0
  199. package/dist/server/index.d.ts +2 -0
  200. package/dist/server/index.d.ts.map +1 -0
  201. package/dist/server/index.js +1529 -0
  202. package/dist/server/index.js.map +1 -0
  203. package/dist/server/marked-to-fix.d.ts +16 -0
  204. package/dist/server/marked-to-fix.d.ts.map +1 -0
  205. package/dist/server/marked-to-fix.js +36 -0
  206. package/dist/server/marked-to-fix.js.map +1 -0
  207. package/dist/server/scan-store.d.ts +147 -0
  208. package/dist/server/scan-store.d.ts.map +1 -0
  209. package/dist/server/scan-store.js +291 -0
  210. package/dist/server/scan-store.js.map +1 -0
  211. package/dist/server/settings-store.d.ts +28 -0
  212. package/dist/server/settings-store.d.ts.map +1 -0
  213. package/dist/server/settings-store.js +46 -0
  214. package/dist/server/settings-store.js.map +1 -0
  215. package/dist/server/workspace-store.d.ts +39 -0
  216. package/dist/server/workspace-store.d.ts.map +1 -0
  217. package/dist/server/workspace-store.js +108 -0
  218. package/dist/server/workspace-store.js.map +1 -0
  219. package/dist/types/detector-input.d.ts +37 -0
  220. package/dist/types/detector-input.d.ts.map +1 -0
  221. package/dist/types/detector-input.js +2 -0
  222. package/dist/types/detector-input.js.map +1 -0
  223. package/dist/types.d.ts +110 -0
  224. package/dist/types.d.ts.map +1 -0
  225. package/dist/types.js +2 -0
  226. package/dist/types.js.map +1 -0
  227. package/dist/utils/clustering.d.ts +14 -0
  228. package/dist/utils/clustering.d.ts.map +1 -0
  229. package/dist/utils/clustering.js +56 -0
  230. package/dist/utils/clustering.js.map +1 -0
  231. package/dist/utils/gitignore.d.ts +32 -0
  232. package/dist/utils/gitignore.d.ts.map +1 -0
  233. package/dist/utils/gitignore.js +122 -0
  234. package/dist/utils/gitignore.js.map +1 -0
  235. package/dist/utils/hash.d.ts +11 -0
  236. package/dist/utils/hash.d.ts.map +1 -0
  237. package/dist/utils/hash.js +14 -0
  238. package/dist/utils/hash.js.map +1 -0
  239. package/dist/utils/ignore-annotation.d.ts +28 -0
  240. package/dist/utils/ignore-annotation.d.ts.map +1 -0
  241. package/dist/utils/ignore-annotation.js +46 -0
  242. package/dist/utils/ignore-annotation.js.map +1 -0
  243. package/dist/utils/llm-json.d.ts +2 -0
  244. package/dist/utils/llm-json.d.ts.map +1 -0
  245. package/dist/utils/llm-json.js +53 -0
  246. package/dist/utils/llm-json.js.map +1 -0
  247. package/dist/utils/logger.d.ts +3 -0
  248. package/dist/utils/logger.d.ts.map +1 -0
  249. package/dist/utils/logger.js +4 -0
  250. package/dist/utils/logger.js.map +1 -0
  251. package/dist/utils/project-conventions.d.ts +2 -0
  252. package/dist/utils/project-conventions.d.ts.map +1 -0
  253. package/dist/utils/project-conventions.js +108 -0
  254. package/dist/utils/project-conventions.js.map +1 -0
  255. package/dist/utils/regex.d.ts +9 -0
  256. package/dist/utils/regex.d.ts.map +1 -0
  257. package/dist/utils/regex.js +11 -0
  258. package/dist/utils/regex.js.map +1 -0
  259. package/dist/utils/snippet.d.ts +20 -0
  260. package/dist/utils/snippet.d.ts.map +1 -0
  261. package/dist/utils/snippet.js +28 -0
  262. package/dist/utils/snippet.js.map +1 -0
  263. package/dist/utils/source-reader.d.ts +19 -0
  264. package/dist/utils/source-reader.d.ts.map +1 -0
  265. package/dist/utils/source-reader.js +32 -0
  266. package/dist/utils/source-reader.js.map +1 -0
  267. package/logo.png +0 -0
  268. package/package.json +92 -0
  269. package/scripts/start-llm.mjs +161 -0
@@ -0,0 +1,1038 @@
1
+ import * as path from 'node:path';
2
+ import * as fs from 'node:fs';
3
+ import { logger } from './utils/logger.js';
4
+ import { DuplicateTypeDetector } from './detectors/duplicate-type.js';
5
+ import { DuplicateFunctionDetector } from './detectors/duplicate-function.js';
6
+ import { detectDeadModules } from './detectors/dead-module.js';
7
+ import { detectDeadExports } from './detectors/dead-export.js';
8
+ import { detectDeadApis } from './detectors/dead-api.js';
9
+ import { detectDeadHandlers } from './detectors/dead-handler.js';
10
+ import { detectMutations } from './detectors/mutation.js';
11
+ import { detectRaceConditions } from './detectors/race-condition.js';
12
+ import { detectSharedDbWrites } from './detectors/shared-db-write.js';
13
+ import { detectApiRaces } from './detectors/api-race.js';
14
+ import { detectBadConfig } from './detectors/bad-config.js';
15
+ import { detectSilentCatches } from './detectors/silent-catch.js';
16
+ import { detectSkipTests } from './detectors/skip-tests.js';
17
+ import { detectLongFiles } from './detectors/long-file.js';
18
+ import { detectLongFunctions } from './detectors/long-function.js';
19
+ import { detectConsoleLogsInProd } from './detectors/console-log-prod.js';
20
+ import { detectMagicNumbers } from './detectors/magic-numbers.js';
21
+ import { detectDeepNesting } from './detectors/deep-nesting.js';
22
+ import { detectPublicAny } from './detectors/public-any.js';
23
+ import { detectMutableGlobals } from './detectors/mutable-globals.js';
24
+ import { detectUnusedDeps } from './detectors/unused-deps.js';
25
+ import { detectHotHubFiles } from './detectors/hot-hub-file.js';
26
+ import { detectSimilarFunctions } from './detectors/similar-functions.js';
27
+ import { detectTodoComments } from './detectors/todo-comments.js';
28
+ import { TypeScriptParser } from './parsers/typescript-parser.js';
29
+ import { TypeNormalizer } from './normalizers/type-normalizer.js';
30
+ import { buildImportGraph, reachableFrom } from './graph/import-graph.js';
31
+ import { discoverEntryPoints, isPublishedLibrary } from './graph/entry-points.js';
32
+ import { readProjectConventions } from './utils/project-conventions.js';
33
+ import { resolveIacEntryFiles } from './graph/iac-entries.js';
34
+ import { resolveDecoratorEntryFiles } from './graph/decorator-entries.js';
35
+ import { loadRotHunterConfig } from './config.js';
36
+ import { Project } from 'ts-morph';
37
+ import { scanWorkspaces } from './multi-workspace-scanner.js';
38
+ export class RotHunter {
39
+ parser = new TypeScriptParser();
40
+ normalizer = new TypeNormalizer();
41
+ detectors = [
42
+ new DuplicateTypeDetector(),
43
+ new DuplicateFunctionDetector(),
44
+ ];
45
+ async run(opts) {
46
+ const startedAt = Date.now();
47
+ // Multi-workspace mode: if a rothunter.config.json exists at the workspace
48
+ // root, parse every linked workspace in a single pass and run the same
49
+ // detectors over the merged graph. dead-api is the cross-repo-only
50
+ // detector and only emits findings in this mode.
51
+ const emit = (event) => {
52
+ if (!opts.onProgress)
53
+ return;
54
+ try {
55
+ opts.onProgress(event);
56
+ }
57
+ catch (err) {
58
+ logger.warn({ err: err.message }, 'onProgress callback threw');
59
+ }
60
+ };
61
+ emit({ state: 'parsing' });
62
+ const config = loadRotHunterConfig(opts.workspaceRoot);
63
+ let isMulti = false;
64
+ let parsed;
65
+ if (config) {
66
+ logger.info({ configPath: config.configPath, workspaces: config.workspaces.map((w) => w.name) }, 'RotHunter: parsing multi-workspace group');
67
+ const multi = await scanWorkspaces(config);
68
+ parsed = { symbols: multi.symbols, imports: multi.imports, files: multi.files };
69
+ isMulti = true;
70
+ }
71
+ else {
72
+ logger.info({ workspaceRoot: opts.workspaceRoot }, 'RotHunter: parsing workspace');
73
+ parsed = await this.parser.parseWorkspaceFull(opts);
74
+ }
75
+ emit({ state: 'parsing', files: parsed.files.length, symbols: parsed.symbols.length });
76
+ logger.info({ count: parsed.symbols.length }, 'RotHunter: normalizing symbols');
77
+ const symbols = this.normalizer.normalizeAll(parsed.symbols);
78
+ const findings = [];
79
+ for (const detector of this.detectors) {
80
+ logger.info({ detector: detector.id }, 'RotHunter: running detector');
81
+ emit({ state: 'detecting', detector: detector.id });
82
+ const detectorFindings = await detector.run(symbols);
83
+ findings.push(...detectorFindings);
84
+ }
85
+ // Dead-module detection runs at file granularity, not symbol granularity,
86
+ // so it has its own input shape. Build the import graph once and reuse it
87
+ // for any future graph-based detectors (call graph, cross-repo lookups).
88
+ const fileSet = new Set(parsed.files);
89
+ const importGraph = buildImportGraph(parsed.imports);
90
+ const entryPoints = discoverEntryPoints(opts.workspaceRoot, fileSet);
91
+ // CDK / SST / Serverless-framework constructs reference handler files by
92
+ // string path. Resolve those strings and add them to the entry set so
93
+ // dead-module/dead-export don't flag lambda handlers as orphans. The
94
+ // resolved set is also used by the dead-handler detector below to decide
95
+ // whether a handler-convention file is actually wired.
96
+ const iacEntries = isMulti
97
+ ? new Set()
98
+ : resolveIacEntryFiles(opts.workspaceRoot, parsed.files);
99
+ for (const f of iacEntries)
100
+ entryPoints.add(f);
101
+ // Framework-decorated classes (NestJS controllers, Angular components,
102
+ // TypeORM entities, ...) are discovered by the framework at runtime —
103
+ // never statically imported. Protect their files from dead-module.
104
+ const decoratorEntries = isMulti
105
+ ? new Set()
106
+ : resolveDecoratorEntryFiles(opts.workspaceRoot, parsed.files);
107
+ for (const f of decoratorEntries)
108
+ entryPoints.add(f);
109
+ const reachable = reachableFrom(importGraph, entryPoints);
110
+ logger.info({ entries: entryPoints.size, reachable: reachable.size, total: parsed.files.length }, 'RotHunter: running detector dead-module');
111
+ findings.push(...detectDeadModules({ files: parsed.files, graph: importGraph, entryPoints, reachable }));
112
+ logger.info({ symbols: symbols.length }, 'RotHunter: running detector dead-export');
113
+ findings.push(...detectDeadExports({ symbols, imports: parsed.imports, entryPoints }));
114
+ if (isMulti) {
115
+ logger.info({ symbols: symbols.length }, 'RotHunter: running detector dead-api');
116
+ findings.push(...detectDeadApis({ symbols, imports: parsed.imports }));
117
+ }
118
+ // Symbol/graph-only detectors — safe in both modes (no fs reads, no git,
119
+ // no per-workspace state). File-walking + git-touched + ts-morph-Project
120
+ // detectors stay under the `!isMulti` gate below because their input
121
+ // shape doesn't survive the workspace-name-prefixed paths emitted by
122
+ // multi-workspace-scanner.
123
+ logger.info({ symbols: symbols.length }, 'RotHunter: running detector long-function');
124
+ emit({ state: 'detecting', detector: 'long-function' });
125
+ findings.push(...detectLongFunctions({ symbols }));
126
+ logger.info({ symbols: symbols.length }, 'RotHunter: running detector deep-nesting');
127
+ emit({ state: 'detecting', detector: 'deep-nesting' });
128
+ findings.push(...detectDeepNesting({ symbols }));
129
+ logger.info({ symbols: symbols.length }, 'RotHunter: running detector public-any');
130
+ emit({ state: 'detecting', detector: 'public-any' });
131
+ findings.push(...detectPublicAny({ symbols }));
132
+ logger.info({ files: parsed.files.length }, 'RotHunter: running detector hot-hub-file');
133
+ emit({ state: 'detecting', detector: 'hot-hub-file' });
134
+ findings.push(...detectHotHubFiles({ graph: importGraph }));
135
+ if (!isMulti) {
136
+ // Single-workspace path: paths are already real workspace-relative.
137
+ const local = await runWorkspaceLocalDetectors({
138
+ workspaceRoot: opts.workspaceRoot,
139
+ files: parsed.files,
140
+ imports: parsed.imports,
141
+ symbols,
142
+ iacEntries,
143
+ emit,
144
+ });
145
+ findings.push(...local);
146
+ }
147
+ else {
148
+ // Multi-workspace: each detector that needs real workspace-relative
149
+ // paths (file-walking, git-based, fs-walking) runs once per linked
150
+ // workspace, with paths de-prefixed before invocation and re-prefixed
151
+ // on the way out so findings still point at globally-unique files +
152
+ // workspace-namespaced fingerprints (no cross-workspace collisions).
153
+ if (!config) {
154
+ // Defensive — isMulti is only true when config was set above.
155
+ logger.error('RotHunter: isMulti without config — skipping local detectors');
156
+ }
157
+ else {
158
+ for (const ws of config.workspaces) {
159
+ const wsPrefix = `${ws.name}/`;
160
+ const wsFiles = parsed.files
161
+ .filter((f) => f.startsWith(wsPrefix))
162
+ .map((f) => f.slice(wsPrefix.length));
163
+ const wsSymbols = symbols
164
+ .filter((s) => s.workspace === ws.name)
165
+ .map((s) => ({ ...s, file: stripPrefix(s.file, wsPrefix) }));
166
+ const wsImports = parsed.imports
167
+ .filter((i) => i.sourceWorkspace === ws.name)
168
+ .map((i) => ({
169
+ ...i,
170
+ source: stripPrefix(i.source, wsPrefix),
171
+ target: i.target && i.targetWorkspace === ws.name ? stripPrefix(i.target, wsPrefix) : null,
172
+ }));
173
+ const wsIacEntries = resolveIacEntryFiles(ws.rootAbs, wsFiles);
174
+ logger.info({ workspace: ws.name, files: wsFiles.length, symbols: wsSymbols.length }, 'RotHunter: running workspace-local detectors');
175
+ const wsFindings = await runWorkspaceLocalDetectors({
176
+ workspaceRoot: ws.rootAbs,
177
+ files: wsFiles,
178
+ imports: wsImports,
179
+ symbols: wsSymbols,
180
+ iacEntries: wsIacEntries,
181
+ emit,
182
+ });
183
+ for (const f of wsFindings) {
184
+ for (const ev of f.evidence)
185
+ ev.file = `${wsPrefix}${ev.file}`;
186
+ // Namespace the fingerprint by workspace so two workspaces with
187
+ // identically-named files don't collide in the FP store.
188
+ f.fingerprint = `${ws.name}:${f.fingerprint}`;
189
+ }
190
+ findings.push(...wsFindings);
191
+ }
192
+ // Cross-workspace race-condition pass. shared-db-write +
193
+ // api-race fire when ≥ 2 distinct files write the same DB
194
+ // column / hit the same API endpoint — exactly the cross-
195
+ // service race shape that lives between packages in a
196
+ // monorepo (billing-service writes user.tier in one repo,
197
+ // account-service writes it from another). Running these
198
+ // per-workspace misses every cross-service race because each
199
+ // package has only one writer locally.
200
+ const crossFindings = await runCrossWorkspaceRaceDetectors(config.workspaces, emit);
201
+ findings.push(...crossFindings);
202
+ }
203
+ }
204
+ // Drop findings the caller has explicitly de-selected BEFORE the LLM
205
+ // pass — they would be filtered out post-LLM anyway and the verdict cost
206
+ // is wasted otherwise. Most relevant on big repos where the user runs a
207
+ // narrow `--detectors race-condition,shared-db-write,api-race` scan: the
208
+ // dup-type / dup-function / mutation candidates can otherwise dominate
209
+ // LLM cost (e.g. Outline: 715 LLM candidates, ~95 % dup-type).
210
+ if (opts.detectorsAllow || opts.detectorsDeny) {
211
+ const allow = opts.detectorsAllow;
212
+ const deny = opts.detectorsDeny;
213
+ const before = findings.length;
214
+ const kept = findings.filter((f) => {
215
+ if (allow && !allow.has(f.detectorId))
216
+ return false;
217
+ if (deny && deny.has(f.detectorId))
218
+ return false;
219
+ return true;
220
+ });
221
+ findings.length = 0;
222
+ findings.push(...kept);
223
+ if (kept.length !== before) {
224
+ logger.info({ kept: kept.length, dropped: before - kept.length }, 'RotHunter: applied detector allow/deny filter before LLM pass');
225
+ }
226
+ }
227
+ const threshold = opts.llmRejectionThreshold ?? 0.7;
228
+ const envConc = Number(process.env.ROTHUNTER_LLM_CONCURRENCY);
229
+ const llmConcurrency = Math.max(1, Math.min(16, Math.floor(opts.llmConcurrency ?? (Number.isFinite(envConc) && envConc > 0 ? envConc : 1))));
230
+ await this.runLlmConfirmation(findings, symbols, threshold, opts.llm, emit, llmConcurrency, opts.abortSignal, opts.workspaceRoot, opts.llmAutoFpThreshold);
231
+ const durationMs = Date.now() - startedAt;
232
+ emit({ state: 'done', findings: findings.length, durationMs });
233
+ return {
234
+ symbols,
235
+ findings,
236
+ durationMs,
237
+ };
238
+ }
239
+ async runLlmConfirmation(findings, symbols, threshold, injectedLlm, emit, concurrency = 1, abortSignal, workspaceRoot, llmAutoFpThreshold) {
240
+ const autoFpThreshold = llmAutoFpThreshold ?? LLM_FP_THRESHOLD;
241
+ const { LlmConfirmer } = await import('./extraction/llm-confirmer.js');
242
+ const { MutationConfirmer } = await import('./extraction/mutation-confirmer.js');
243
+ const { RaceConfirmer } = await import('./extraction/race-confirmer.js');
244
+ const { SharedDbWriteConfirmer } = await import('./extraction/shared-db-write-confirmer.js');
245
+ const { ApiRaceConfirmer } = await import('./extraction/api-race-confirmer.js');
246
+ const { TriageConfirmer } = await import('./extraction/triage-confirmer.js');
247
+ const { createDefaultLlmClient } = await import('./adapters/llm.js');
248
+ const symbolById = new Map(symbols.map((s) => [s.id, s]));
249
+ const candidates = findings.filter((f) => requiresLlmConfirmation(f, symbolById));
250
+ if (candidates.length === 0)
251
+ return;
252
+ const llm = injectedLlm ?? createDefaultLlmClient();
253
+ logger.info({ count: candidates.length }, 'RotHunter: warming up LLM');
254
+ const llmReady = await llm.warmup();
255
+ if (!llmReady) {
256
+ // No LLM reachable — skip the confirmation pass entirely so we
257
+ // don't burn N × verdict-timeout on a scan that has no oracle.
258
+ // Findings stay at their deterministic severity / confidence.
259
+ logger.warn({ count: candidates.length }, 'RotHunter: LLM warmup failed; skipping confirmation pass');
260
+ emit?.({ state: 'llm-start', total: 0 });
261
+ return;
262
+ }
263
+ const dupConfirmer = new LlmConfirmer(llm);
264
+ const mutationConfirmer = new MutationConfirmer(llm);
265
+ const raceConfirmer = new RaceConfirmer(llm);
266
+ const sharedDbConfirmer = new SharedDbWriteConfirmer(llm);
267
+ const apiRaceConfirmer = new ApiRaceConfirmer(llm);
268
+ const triageConfirmer = new TriageConfirmer(llm);
269
+ logger.info({ count: candidates.length }, 'RotHunter: LLM confirmation pass');
270
+ emit?.({ state: 'llm-start', total: candidates.length });
271
+ let llmDone = 0;
272
+ const reportVerdict = (finding, race, confidence, reason, latencyMs) => {
273
+ llmDone += 1;
274
+ emit?.({
275
+ state: 'llm-verdict',
276
+ done: llmDone,
277
+ total: candidates.length,
278
+ detectorId: finding.detectorId,
279
+ race,
280
+ confidence,
281
+ reason: reason.slice(0, 120),
282
+ latencyMs,
283
+ cluster: clusterLabel(finding),
284
+ });
285
+ };
286
+ const processOne = async (finding) => {
287
+ const verdictStart = Date.now();
288
+ if (finding.detectorId === 'duplicate-type' || finding.detectorId === 'duplicate-function') {
289
+ const ids = finding.evidence
290
+ .map((ev) => findSymbolId(symbols, ev.file, ev.range.startLine))
291
+ .filter((id) => Boolean(id));
292
+ if (ids.length < 2)
293
+ return;
294
+ const a = symbolById.get(ids[0]);
295
+ const b = symbolById.get(ids[1]);
296
+ if (!a || !b)
297
+ return;
298
+ const projectConv = workspaceRoot
299
+ ? readProjectConventions(workspaceRoot, a.file)
300
+ : undefined;
301
+ const result = await dupConfirmer.confirmSameConcept(a, b, projectConv);
302
+ if (!result)
303
+ return;
304
+ if (result.same_concept) {
305
+ finding.confidence = Math.min(0.97, Math.max(finding.confidence, result.confidence));
306
+ finding.description += `\n\n**LLM confirmation:** ${result.reason} (confidence ${result.confidence.toFixed(2)})`;
307
+ finding.layer = 3;
308
+ }
309
+ else {
310
+ finding.confidence = Math.min(finding.confidence, 1 - result.confidence) * 0.7;
311
+ finding.description += `\n\n**LLM rejection:** ${result.reason} — not considered a domain duplicate.`;
312
+ if (finding.confidence < threshold) {
313
+ finding.severity = 'low';
314
+ }
315
+ // Same auto-FP routing as TriageConfirmer-driven detectors:
316
+ // a high-confidence negative verdict means the LLM is sure
317
+ // these are not the same concept (framework idiom, env-helper
318
+ // symmetry that project conventions endorse, …) — moving
319
+ // them out of the open list matches the user's expectation.
320
+ if (result.confidence >= autoFpThreshold) {
321
+ finding.llmFalsePositive = {
322
+ confidence: result.confidence,
323
+ reason: result.reason,
324
+ };
325
+ }
326
+ }
327
+ reportVerdict(finding, result.same_concept, result.confidence, result.reason, Date.now() - verdictStart);
328
+ }
329
+ else if (finding.detectorId === 'api-race') {
330
+ // Cluster meta lives in evidence[].note as JSON (emitted by the
331
+ // detector). Title is human-facing only — never re-parse it.
332
+ const first = parseEvidenceNote(finding.evidence[0]);
333
+ const method = first.method ?? '';
334
+ const pathPattern = first.pathPattern ?? '';
335
+ if (!method || !pathPattern)
336
+ return;
337
+ const clientSet = new Set();
338
+ for (const ev of finding.evidence) {
339
+ const meta = parseEvidenceNote(ev);
340
+ if (meta.client)
341
+ clientSet.add(meta.client);
342
+ }
343
+ const clients = clientSet.size > 0 ? [...clientSet].join('+') : 'unknown';
344
+ const sites = finding.evidence.slice(0, 8).map((ev) => {
345
+ const meta = parseEvidenceNote(ev);
346
+ return {
347
+ file: ev.file,
348
+ line: ev.range.startLine,
349
+ enclosingName: meta.enclosingName?.trim() || undefined,
350
+ enclosingSource: ev.snippet,
351
+ };
352
+ });
353
+ const verdict = await apiRaceConfirmer.confirm({
354
+ method,
355
+ pathPattern,
356
+ clients,
357
+ sites,
358
+ });
359
+ if (!verdict)
360
+ return;
361
+ applyClusterVerdict(finding, { positive: verdict.race, confidence: verdict.confidence, reason: verdict.reason }, { threshold, positiveLabel: 'real cross-flow API race', negativeLabel: 'safe', autoFpThreshold });
362
+ reportVerdict(finding, verdict.race, verdict.confidence, verdict.reason, Date.now() - verdictStart);
363
+ }
364
+ else if (finding.detectorId === 'shared-db-write') {
365
+ // Cluster meta lives in evidence[].note as JSON (emitted by the
366
+ // detector). Title is human-facing only — never re-parse it.
367
+ const first = parseEvidenceNote(finding.evidence[0]);
368
+ const entity = first.entity ?? '';
369
+ const column = first.column ?? '';
370
+ if (!entity || !column)
371
+ return;
372
+ const adapterSet = new Set();
373
+ for (const ev of finding.evidence) {
374
+ const meta = parseEvidenceNote(ev);
375
+ if (meta.adapter)
376
+ adapterSet.add(meta.adapter);
377
+ }
378
+ const adapters = adapterSet.size > 0 ? [...adapterSet].join('+') : 'unknown';
379
+ const sites = finding.evidence.slice(0, 8).map((ev) => {
380
+ const meta = parseEvidenceNote(ev);
381
+ return {
382
+ file: ev.file,
383
+ line: ev.range.startLine,
384
+ enclosingName: meta.enclosingName?.trim() || undefined,
385
+ enclosingSource: ev.snippet,
386
+ };
387
+ });
388
+ const verdict = await sharedDbConfirmer.confirm({
389
+ entity,
390
+ column,
391
+ adapters,
392
+ sites,
393
+ });
394
+ if (!verdict)
395
+ return;
396
+ applyClusterVerdict(finding, { positive: verdict.race, confidence: verdict.confidence, reason: verdict.reason }, { threshold, positiveLabel: 'real cross-flow race', negativeLabel: 'safe', autoFpThreshold });
397
+ reportVerdict(finding, verdict.race, verdict.confidence, verdict.reason, Date.now() - verdictStart);
398
+ }
399
+ else if (finding.detectorId === 'race-condition') {
400
+ const ev = finding.evidence[0];
401
+ if (!ev || !ev.note)
402
+ return;
403
+ let meta;
404
+ try {
405
+ meta = JSON.parse(ev.note);
406
+ }
407
+ catch {
408
+ return;
409
+ }
410
+ const isPromiseAll = finding.fingerprint.startsWith('race:promise-all');
411
+ const pattern = isPromiseAll
412
+ ? 'promise-all'
413
+ : /emitter/i.test(meta.enclosingName ?? '')
414
+ ? 'emitter-handler'
415
+ : 'read-modify-write';
416
+ const verdict = await raceConfirmer.confirm({
417
+ file: ev.file,
418
+ line: ev.range.startLine,
419
+ pattern,
420
+ target: meta.target ?? 'unknown',
421
+ enclosingSource: ev.snippet,
422
+ enclosingName: meta.enclosingName || undefined,
423
+ });
424
+ if (!verdict)
425
+ return;
426
+ applyClusterVerdict(finding, { positive: verdict.race, confidence: verdict.confidence, reason: verdict.reason }, { threshold, positiveLabel: 'real race', negativeLabel: 'safe', autoFpThreshold });
427
+ reportVerdict(finding, verdict.race, verdict.confidence, verdict.reason, Date.now() - verdictStart);
428
+ }
429
+ else if (finding.detectorId === 'mutation') {
430
+ const ev = finding.evidence[0];
431
+ if (!ev || !ev.note)
432
+ return;
433
+ let meta;
434
+ try {
435
+ meta = JSON.parse(ev.note);
436
+ }
437
+ catch {
438
+ return;
439
+ }
440
+ const verdict = await mutationConfirmer.confirm({
441
+ file: ev.file,
442
+ line: ev.range.startLine,
443
+ pattern: meta.pattern ?? 'mutation',
444
+ escapes: Boolean(meta.escapes),
445
+ snippet: ev.snippet,
446
+ enclosingSource: meta.enclosingSource ?? ev.snippet,
447
+ enclosingName: meta.enclosingName || undefined,
448
+ });
449
+ if (!verdict)
450
+ return;
451
+ // Mutation maps to the shared shape: positive = !intentional
452
+ // (bug-shaped). One subtle difference from the other three: the
453
+ // severity bump fires on `severity === 'medium'` regardless of
454
+ // confidence (the original code didn't gate on 0.85). We preserve
455
+ // that by passing positiveLabel/negativeLabel and relying on the
456
+ // shared helper's gate — which is acceptably equivalent in
457
+ // practice because the mutation confirmer rarely emits bug-shaped
458
+ // with confidence < 0.85.
459
+ applyClusterVerdict(finding, { positive: !verdict.intentional, confidence: verdict.confidence, reason: verdict.reason }, { threshold, positiveLabel: 'potential bug', negativeLabel: 'intentional', autoFpThreshold });
460
+ reportVerdict(finding, !verdict.intentional, verdict.confidence, verdict.reason, Date.now() - verdictStart);
461
+ }
462
+ else if (TRIAGE_DETECTORS.has(finding.detectorId)) {
463
+ // Generic real-vs-FP triage for detectors with no cluster
464
+ // confirmer of their own. For reachability + hub detectors we
465
+ // ALSO pass structural context (sibling signatures, file role)
466
+ // so the LLM can answer "is this used through a type surface
467
+ // or framework convention" without guessing from the snippet.
468
+ const ev = finding.evidence[0];
469
+ if (!ev)
470
+ return;
471
+ const verdict = await triageConfirmer.confirm({
472
+ detectorId: finding.detectorId,
473
+ severity: finding.severity,
474
+ title: finding.title,
475
+ description: finding.description,
476
+ suggestion: finding.suggestion,
477
+ evidenceFile: ev.file,
478
+ evidenceStartLine: ev.range.startLine,
479
+ evidenceEndLine: ev.range.endLine,
480
+ evidenceSnippet: ev.snippet,
481
+ extraContext: buildTriageContext(finding, symbolById, workspaceRoot),
482
+ });
483
+ if (!verdict)
484
+ return;
485
+ applyClusterVerdict(finding, { positive: verdict.real, confidence: verdict.confidence, reason: verdict.reason }, { threshold, positiveLabel: 'real defect', negativeLabel: 'intentional pattern', autoFpThreshold });
486
+ reportVerdict(finding, verdict.real, verdict.confidence, verdict.reason, Date.now() - verdictStart);
487
+ }
488
+ };
489
+ // Run with a small worker pool. Each "worker" pulls the next finding
490
+ // off the shared cursor and awaits its verdict — the LLM backend
491
+ // dictates real throughput (llama.cpp `--parallel N -cb`, vLLM dynamic
492
+ // batching). Concurrency 1 reproduces the original sequential flow.
493
+ //
494
+ // Cancellation: workers re-check `abortSignal.aborted` before
495
+ // every verdict task. This is the only reliable abort path — the
496
+ // old "throw inside onProgress" trick was swallowed by `emit()`'s
497
+ // catch and never reached the pool, so cancelled scans kept
498
+ // burning LLM calls (and blocking the queue) until they ran out
499
+ // of findings.
500
+ logger.info({ concurrency }, 'RotHunter: LLM concurrency');
501
+ let cursor = 0;
502
+ const workers = Array.from({ length: concurrency }, async () => {
503
+ while (true) {
504
+ if (abortSignal?.aborted)
505
+ return;
506
+ const idx = cursor++;
507
+ if (idx >= candidates.length)
508
+ return;
509
+ try {
510
+ await processOne(candidates[idx]);
511
+ }
512
+ catch (err) {
513
+ // A single bad finding must not poison the whole pool. The
514
+ // verdict is already accounted for in reportVerdict; log + move on.
515
+ logger.warn({ err: err.message, detector: candidates[idx].detectorId }, 'LLM verdict task threw');
516
+ }
517
+ }
518
+ });
519
+ await Promise.all(workers);
520
+ if (abortSignal?.aborted) {
521
+ throw new Error('scan cancelled by operator');
522
+ }
523
+ }
524
+ }
525
+ function findSymbolId(symbols, file, startLine) {
526
+ return symbols.find((s) => s.file === file && s.range.startLine === startLine)?.id;
527
+ }
528
+ function stripPrefix(file, prefix) {
529
+ return file.startsWith(prefix) ? file.slice(prefix.length) : file;
530
+ }
531
+ /**
532
+ * Run shared-db-write + api-race once across EVERY package in a
533
+ * monorepo so cross-service races (different packages writing the
534
+ * same DB column / hitting the same endpoint) are caught. The per-
535
+ * workspace pass cannot see them because each package has only one
536
+ * writer locally — the race lives at the merged-set level.
537
+ *
538
+ * Evidence file paths are emitted as `packages/<pkg>/src/...`
539
+ * (workspace-relative against the monorepo root), so the dashboard
540
+ * shows the literal filesystem location of each writer.
541
+ */
542
+ async function runCrossWorkspaceRaceDetectors(workspaces, emit) {
543
+ if (workspaces.length < 2)
544
+ return [];
545
+ const project = new Project({
546
+ skipAddingFilesFromTsConfig: true,
547
+ skipFileDependencyResolution: true,
548
+ compilerOptions: { allowJs: true, jsx: 4 /* preserve */ },
549
+ });
550
+ for (const ws of workspaces) {
551
+ project.addSourceFilesAtPaths([
552
+ `${ws.rootAbs}/**/*.{ts,tsx,mts,cts,js,jsx,mjs,cjs}`,
553
+ `!${ws.rootAbs}/**/node_modules/**`,
554
+ `!${ws.rootAbs}/**/dist/**`,
555
+ `!${ws.rootAbs}/**/build/**`,
556
+ ]);
557
+ }
558
+ // Common root for all workspaces — used so detector evidence paths
559
+ // come out as `packages/<pkg>/src/...` instead of an absolute path.
560
+ const root = commonAncestor(workspaces.map((w) => w.rootAbs));
561
+ // Map each workspace's absolute path (made relative to `root`) to
562
+ // the workspace name. Used to bucket finding evidence per workspace
563
+ // so we can drop intra-workspace findings — those are already
564
+ // emitted by the per-workspace pass and would otherwise double-count.
565
+ const wsByRelRoot = new Map();
566
+ for (const ws of workspaces) {
567
+ const rel = path.relative(root, ws.rootAbs);
568
+ wsByRelRoot.set(rel === '' ? '.' : rel, ws.name);
569
+ }
570
+ const out = [];
571
+ emit({ state: 'detecting', detector: 'cross-shared-db-write' });
572
+ out.push(...detectSharedDbWrites({ workspaceRoot: root, files: [], project })
573
+ .filter((f) => spansMultipleWorkspaces(f, wsByRelRoot))
574
+ .map(tagCross));
575
+ emit({ state: 'detecting', detector: 'cross-api-race' });
576
+ out.push(...detectApiRaces({ workspaceRoot: root, files: [], project })
577
+ .filter((f) => spansMultipleWorkspaces(f, wsByRelRoot))
578
+ .map(tagCross));
579
+ return out;
580
+ }
581
+ /**
582
+ * True when the finding's evidence covers ≥ 2 distinct workspaces.
583
+ * Used to keep the cross-workspace pass from re-emitting findings the
584
+ * per-workspace pass already produced — those have all their evidence
585
+ * under a single workspace name and would otherwise show up twice
586
+ * (once workspace-namespaced, once with the `cross-ws:` prefix).
587
+ */
588
+ function spansMultipleWorkspaces(finding, wsByRelRoot) {
589
+ const wsHit = new Set();
590
+ for (const ev of finding.evidence) {
591
+ const file = ev.file.split('\\').join('/');
592
+ for (const [relRoot, name] of wsByRelRoot) {
593
+ const prefix = relRoot === '.' ? '' : `${relRoot}/`;
594
+ if (relRoot === '.' || file.startsWith(prefix)) {
595
+ wsHit.add(name);
596
+ break;
597
+ }
598
+ }
599
+ if (wsHit.size >= 2)
600
+ return true;
601
+ }
602
+ return wsHit.size >= 2;
603
+ }
604
+ function tagCross(f) {
605
+ // Distinct fingerprint prefix so cross-workspace findings never
606
+ // collide with same-detector findings from the per-workspace pass.
607
+ return { ...f, fingerprint: `cross-ws:${f.fingerprint}` };
608
+ }
609
+ function commonAncestor(paths) {
610
+ if (paths.length === 0)
611
+ return '';
612
+ if (paths.length === 1)
613
+ return paths[0];
614
+ const split = paths.map((p) => p.split('/'));
615
+ const min = Math.min(...split.map((s) => s.length));
616
+ const out = [];
617
+ for (let i = 0; i < min; i++) {
618
+ const seg = split[0][i];
619
+ if (split.every((s) => s[i] === seg))
620
+ out.push(seg);
621
+ else
622
+ break;
623
+ }
624
+ return out.join('/') || '/';
625
+ }
626
+ async function runWorkspaceLocalDetectors(ctx) {
627
+ const findings = [];
628
+ const files = ctx.files;
629
+ const symbolsArr = [...ctx.symbols];
630
+ const importsArr = [...ctx.imports];
631
+ logger.info({ files: files.length }, 'RotHunter: running detector dead-handler');
632
+ ctx.emit({ state: 'detecting', detector: 'dead-handler' });
633
+ findings.push(...detectDeadHandlers({ files, iacEntries: ctx.iacEntries, imports: importsArr }));
634
+ // Shared ts-morph Project — 1 parse pass reused by every file-walking
635
+ // detector below. Avoids 7+ duplicate parses on the same tree.
636
+ const { Project: SharedProject } = await import('ts-morph');
637
+ const sharedProject = new SharedProject({
638
+ skipAddingFilesFromTsConfig: true,
639
+ skipFileDependencyResolution: true,
640
+ });
641
+ for (const rel of files) {
642
+ sharedProject.addSourceFileAtPathIfExists(path.join(ctx.workspaceRoot, rel));
643
+ }
644
+ const run = (id, fn) => {
645
+ logger.info({ files: files.length }, `RotHunter: running detector ${id}`);
646
+ ctx.emit({ state: 'detecting', detector: id });
647
+ findings.push(...fn());
648
+ };
649
+ run('mutation', () => detectMutations({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
650
+ run('race-condition', () => detectRaceConditions({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
651
+ run('shared-db-write', () => detectSharedDbWrites({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
652
+ run('api-race', () => detectApiRaces({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
653
+ run('bad-config', () => detectBadConfig({ workspaceRoot: ctx.workspaceRoot, files }));
654
+ run('silent-catch', () => detectSilentCatches({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
655
+ run('skip-tests', () => detectSkipTests({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
656
+ run('long-file', () => detectLongFiles({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
657
+ run('console-log-prod', () => detectConsoleLogsInProd({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
658
+ run('magic-numbers', () => detectMagicNumbers({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
659
+ run('mutable-globals', () => detectMutableGlobals({ workspaceRoot: ctx.workspaceRoot, files, project: sharedProject }));
660
+ run('unused-deps', () => detectUnusedDeps({ workspaceRoot: ctx.workspaceRoot, imports: importsArr }));
661
+ run('similar-functions', () => detectSimilarFunctions({ workspaceRoot: ctx.workspaceRoot, symbols: symbolsArr }));
662
+ // todo-comments does its own workspace walk so it picks up Python / Go /
663
+ // shell sources the TS parser skips. No `files` arg by design.
664
+ run('todo-comments', () => detectTodoComments({ workspaceRoot: ctx.workspaceRoot }));
665
+ return findings;
666
+ }
667
+ /**
668
+ * Apply a "cluster-style" LLM verdict to a finding. Used for the four
669
+ * detectors whose LLM confirmer returns a positive/negative boolean
670
+ * with a confidence: api-race / shared-db-write / race-condition (race
671
+ * vs safe) and mutation (bug-shaped vs intentional — caller maps
672
+ * `!intentional` to `positive`). Shared body keeps the score/severity/
673
+ * description bookkeeping in one place. Duplicate-type / duplicate-
674
+ * function use a different formula (1 - conf) and stay inline.
675
+ */
676
+ export function applyClusterVerdict(finding, verdict, opts) {
677
+ const confTxt = verdict.confidence.toFixed(2);
678
+ if (verdict.positive) {
679
+ finding.confidence = Math.min(0.95, Math.max(finding.confidence, verdict.confidence));
680
+ finding.description += `\n\n**LLM verdict:** ${opts.positiveLabel} — ${verdict.reason} (confidence ${confTxt})`;
681
+ if (finding.severity === 'medium' && verdict.confidence >= 0.85) {
682
+ finding.severity = 'high';
683
+ }
684
+ }
685
+ else {
686
+ finding.confidence = Math.max(0.0, finding.confidence * (1 - verdict.confidence));
687
+ finding.description += `\n\n**LLM verdict:** ${opts.negativeLabel} — ${verdict.reason} (confidence ${confTxt})`;
688
+ if (finding.confidence < opts.threshold)
689
+ finding.severity = 'low';
690
+ // High-confidence "intentional" / "not real" verdict — auto-route to
691
+ // the FP bucket so the user does not have to manually mark each one.
692
+ // The detector pattern matched but the LLM saw the surrounding intent
693
+ // (accumulator parameter, deliberate-swallow comment, framework idiom,
694
+ // …). Surfacing it as an open finding teaches the user that high
695
+ // verdict confidence means nothing — exactly the rothunter-vs-lint
696
+ // differentiator we want to preserve.
697
+ if (verdict.confidence >= (opts.autoFpThreshold ?? LLM_FP_THRESHOLD)) {
698
+ finding.llmFalsePositive = {
699
+ confidence: verdict.confidence,
700
+ reason: verdict.reason,
701
+ };
702
+ }
703
+ }
704
+ finding.layer = 3;
705
+ }
706
+ /**
707
+ * Verdict-confidence floor at which a negative LLM verdict moves a
708
+ * finding into the auto-FP bucket. Set low (0.6) so any reasonably
709
+ * confident "intentional / FP" verdict routes the finding out of the
710
+ * open list — the operator's stated preference is "if the LLM says
711
+ * FP, treat it as auto FP, I'll un-mark if it's wrong". Below 0.6 the
712
+ * LLM is genuinely undecided and the deterministic finding stays in
713
+ * the open list at degraded confidence.
714
+ */
715
+ export const LLM_FP_THRESHOLD = 0.6;
716
+ /**
717
+ * Build per-detector structural context to attach to a TriageConfirmer
718
+ * call. The shape is free-form text — the LLM reads it alongside the
719
+ * primary evidence snippet — so we can evolve enrichment without
720
+ * version-coupling the triage schema. Returns `undefined` when no
721
+ * useful context is available so the prompt stays compact.
722
+ */
723
+ export function buildTriageContext(finding, symbolById, workspaceRoot) {
724
+ const ev = finding.evidence[0];
725
+ if (!ev)
726
+ return undefined;
727
+ const parts = [];
728
+ // Project conventions block: nearest CLAUDE.md walking up from the
729
+ // evidence file. Universally prepended to every triage call — it is
730
+ // the single biggest signal for "is this pattern intentional in
731
+ // THIS codebase". A rule like "three similar lines better than
732
+ // premature abstraction" turns duplicate-function on Commander
733
+ // command registrations into an auto-FP without per-detector code.
734
+ if (workspaceRoot) {
735
+ const conv = readProjectConventions(workspaceRoot, ev.file);
736
+ if (conv) {
737
+ parts.push(`Project conventions (concatenated from CLAUDE.md / AGENTS.md / .cursorrules / copilot-instructions.md / CONTRIBUTING.md / … as present — treat as authoritative for this codebase, override generic best-practice when they conflict):\n${conv}`);
738
+ }
739
+ }
740
+ // Per-detector structural hints.
741
+ const detectorContext = buildDetectorContext(finding, ev, symbolById, workspaceRoot);
742
+ if (detectorContext)
743
+ parts.push(detectorContext);
744
+ if (parts.length === 0)
745
+ return undefined;
746
+ return parts.join('\n\n');
747
+ }
748
+ function buildDetectorContext(finding, ev, symbolById, workspaceRoot) {
749
+ if (finding.detectorId === 'dead-export') {
750
+ return buildDeadExportContext(finding, ev.file, symbolById, workspaceRoot);
751
+ }
752
+ if (finding.detectorId === 'magic-numbers' && workspaceRoot) {
753
+ return buildMagicNumbersContext(ev.file, ev.range.startLine, workspaceRoot, symbolById);
754
+ }
755
+ if (finding.detectorId === 'hot-hub-file') {
756
+ return 'This file is being flagged as an import hub. Decide whether the project deliberately keeps it as a single import surface (barrel / type surface) or whether it accumulates unrelated concerns.';
757
+ }
758
+ if (finding.detectorId === 'long-file') {
759
+ return 'Look at the snippet shape: a recognizer / config / pattern TABLE is single-concern locality and FALSE positive; mixed unrelated logic accumulating across many features is REAL.';
760
+ }
761
+ if (finding.detectorId === 'todo-comments') {
762
+ return 'Discriminate actionable TODO / FIXME / HACK / XXX from documentary NOTE comments. A NOTE that explains a design decision in adjacent code is documentation, not technical debt.';
763
+ }
764
+ return undefined;
765
+ }
766
+ /**
767
+ * For a magic-numbers finding, return: the enclosing function /
768
+ * method signature (so the LLM sees what domain the literal is in),
769
+ * an ±8 line code window, and the leading JSDoc-style comment block
770
+ * if one is present immediately above the enclosing function. The
771
+ * snippet the detector emits is only the matching line — context is
772
+ * too thin for the LLM to judge whether `12`, `127`, or `425` is a
773
+ * domain constant, a regex internal, or a real magic number.
774
+ */
775
+ function buildMagicNumbersContext(file, line, workspaceRoot, symbolById) {
776
+ let raw;
777
+ try {
778
+ raw = fs.readFileSync(path.join(workspaceRoot, file), 'utf-8');
779
+ }
780
+ catch {
781
+ return undefined;
782
+ }
783
+ const lines = raw.split('\n');
784
+ if (line < 1 || line > lines.length)
785
+ return undefined;
786
+ // Find the symbol that contains this line — gives us the enclosing
787
+ // function/method signature regardless of indentation depth.
788
+ let enclosingSig;
789
+ let enclosingDoc;
790
+ for (const s of symbolById.values()) {
791
+ if (s.file !== file)
792
+ continue;
793
+ if (line < s.range.startLine || line > s.range.endLine)
794
+ continue;
795
+ // Prefer the tightest match (deepest nesting).
796
+ if (enclosingSig &&
797
+ (s.range.endLine - s.range.startLine) >
798
+ (lines.findIndex((_, i) => i + 1 === line) - s.range.startLine)) {
799
+ continue;
800
+ }
801
+ enclosingSig = (lines[s.range.startLine - 1] ?? '').trim();
802
+ // Walk upward from the symbol decl for a contiguous comment block
803
+ // — JSDoc usually lives on the line(s) immediately above the
804
+ // signature.
805
+ const docLines = [];
806
+ for (let i = s.range.startLine - 2; i >= 0; i--) {
807
+ const t = (lines[i] ?? '').trim();
808
+ if (t === '' || (!t.startsWith('//') && !t.startsWith('*') && !t.startsWith('/*')))
809
+ break;
810
+ docLines.unshift(t);
811
+ }
812
+ if (docLines.length > 0)
813
+ enclosingDoc = docLines.join('\n');
814
+ }
815
+ // Pull ±8 lines around the literal for surrounding context.
816
+ const winFrom = Math.max(0, line - 1 - 8);
817
+ const winTo = Math.min(lines.length, line - 1 + 8);
818
+ const window = lines
819
+ .slice(winFrom, winTo + 1)
820
+ .map((l, i) => `${winFrom + i + 1 === line ? '>' : ' '} ${winFrom + i + 1}: ${l}`)
821
+ .join('\n');
822
+ const parts = [];
823
+ if (enclosingSig)
824
+ parts.push(`Enclosing function signature:\n\`${enclosingSig}\``);
825
+ if (enclosingDoc)
826
+ parts.push(`Doc comment on the enclosing function:\n${enclosingDoc}`);
827
+ parts.push(`Code window (\`>\` marks the flagged line):\n\`\`\`\n${window}\n\`\`\``);
828
+ parts.push('Decide using the enclosing function + module name. If the literal is a domain constant local to this validator / encoder / parser (base58 lengths, IPv4 octets, ASCII boundary 127, retry-backoff thresholds, framework status codes) the answer is FALSE — naming each one inflates the binding count without clarifying anything. Flag REAL only when the literal is genuinely opaque business logic that a reader would have to guess about.');
829
+ return parts.join('\n\n');
830
+ }
831
+ /**
832
+ * Render up to 6 sibling exports from the same file as signature
833
+ * snippets so the LLM can answer "is this type-surface reachable
834
+ * through another exported symbol's signature?" — a question pure
835
+ * named-import counting can't answer.
836
+ */
837
+ function buildDeadExportContext(finding, file, symbolById, workspaceRoot) {
838
+ // Extract the export name from the title — detector emits
839
+ // `Unused export: <name> in <file>`.
840
+ const m = /Unused export:\s*(\S+)/i.exec(finding.title);
841
+ const targetName = m?.[1];
842
+ const siblings = [];
843
+ for (const s of symbolById.values()) {
844
+ if (s.file !== file)
845
+ continue;
846
+ if (!s.exported)
847
+ continue;
848
+ if (s.name === targetName)
849
+ continue;
850
+ // First non-blank line of the source — usually the declaration
851
+ // signature for interfaces / functions / classes.
852
+ const firstLine = s.source.split('\n').find((ln) => ln.trim().length > 0) ?? '';
853
+ if (firstLine)
854
+ siblings.push(`- ${s.kind} \`${s.name}\`: \`${firstLine.trim().slice(0, 160)}\``);
855
+ if (siblings.length >= 6)
856
+ break;
857
+ }
858
+ const parts = [];
859
+ if (siblings.length > 0) {
860
+ parts.push(`Other exports in the same file (\`${file}\`):\n${siblings.join('\n')}\n\nIf \`${targetName ?? 'this symbol'}\` appears in any of those signatures (return type, parameter, generic constraint, extends clause) it is reachable through the public type surface and a FALSE positive.`);
861
+ }
862
+ // Published-library mode: when the workspace ships as an npm package
863
+ // (has name + version, not private, declares main/module/exports/bin),
864
+ // every top-level export is potentially public API surface for
865
+ // downstream consumers. The detector cannot statically see those
866
+ // consumers — they live in other repos — so the LLM has to weigh
867
+ // "looks like part of a public utility set" against "genuinely dead
868
+ // internal helper". Tell it which workspace shape we're in.
869
+ if (workspaceRoot && isPublishedLibrary(workspaceRoot)) {
870
+ parts.push(`Workspace shape: PUBLISHED npm LIBRARY (package.json has name + version, not private, declares an entry surface). Downstream consumers in OTHER repositories may import \`${targetName ?? 'this symbol'}\` even though no file inside THIS repo does. Lean toward FALSE positive when the symbol fits the library's domain (env-helper symmetry alongside other exports, types matching the package theme, utility functions named consistently with the published API) AND there is no obvious sign it is a stranded internal leftover (no \`@deprecated\` JSDoc, no \`unused-\` / \`legacy\` naming, no half-baked TODO).`);
871
+ }
872
+ if (parts.length === 0)
873
+ return undefined;
874
+ return parts.join('\n\n');
875
+ }
876
+ /**
877
+ * Parse the detector-emitted `evidence.note` JSON payload. Detectors pack
878
+ * structured cluster metadata here (method/path/client for api-race,
879
+ * entity/column/adapter for shared-db-write, target/pattern/enclosingName
880
+ * for race-condition / mutation). Returns `{}` on missing/invalid JSON so
881
+ * callers can safely destructure optional fields.
882
+ */
883
+ function parseEvidenceNote(ev) {
884
+ if (!ev?.note)
885
+ return {};
886
+ try {
887
+ const parsed = JSON.parse(ev.note);
888
+ return (parsed && typeof parsed === 'object' ? parsed : {});
889
+ }
890
+ catch {
891
+ return {};
892
+ }
893
+ }
894
+ /**
895
+ * Best-effort human-facing cluster label for the SSE verdict stream.
896
+ * Derived from structured evidence notes (never from `finding.title` —
897
+ * see the api-race / shared-db-write rationale in processOne).
898
+ */
899
+ function clusterLabel(finding) {
900
+ const first = finding.evidence[0];
901
+ if (!first)
902
+ return undefined;
903
+ const note = parseEvidenceNote(first);
904
+ if (note.method && note.pathPattern)
905
+ return `${note.method} ${note.pathPattern}`;
906
+ if (note.entity && note.column)
907
+ return `${note.entity}.${note.column}`;
908
+ if (note.target)
909
+ return note.target;
910
+ return undefined;
911
+ }
912
+ // Detectors with no dedicated cluster confirmer that still benefit
913
+ // from a real-vs-false-positive LLM triage. Adding a detector here
914
+ // routes its medium / high findings through `TriageConfirmer` in
915
+ // processOne.
916
+ const TRIAGE_DETECTORS = new Set([
917
+ 'silent-catch',
918
+ 'public-any',
919
+ 'mutable-globals',
920
+ 'bad-config',
921
+ 'long-function',
922
+ 'long-file',
923
+ 'magic-numbers',
924
+ 'hot-hub-file',
925
+ 'todo-comments',
926
+ // Reachability detectors: deterministic check misses framework
927
+ // conventions, dynamic loaders, structural type-surface — LLM with
928
+ // a sibling-signature / importer-count snippet handles those FPs
929
+ // far better than per-detector hand-coded rules.
930
+ 'dead-export',
931
+ 'dead-module',
932
+ 'dead-handler',
933
+ 'dead-api',
934
+ // Similar-functions has a high syntactic-only FP rate — two unrelated
935
+ // helpers can share an AST shape (template-literal builders, Commander
936
+ // command registrations) without being refactor candidates. Route
937
+ // medium-high findings through the triage confirmer so the LLM
938
+ // judges semantic relatedness, not just shape similarity.
939
+ 'similar-functions',
940
+ ]);
941
+ /**
942
+ * Subset of TRIAGE_DETECTORS that get an LLM verdict on EVERY finding,
943
+ * including low severity. These are detectors whose FP rate is high
944
+ * even at the low tier — reachability misses, design-intent flags,
945
+ * NOTE-vs-TODO discrimination — and the LLM cost is justified by the
946
+ * noise reduction.
947
+ *
948
+ * For all other TRIAGE detectors the gate stays at `severity !== 'low'`
949
+ * so we don't burn LLM calls on the deterministic-noise tier.
950
+ */
951
+ const ALWAYS_TRIAGE_DETECTORS = new Set([
952
+ 'dead-export',
953
+ 'dead-module',
954
+ 'dead-handler',
955
+ 'dead-api',
956
+ 'todo-comments',
957
+ 'hot-hub-file',
958
+ 'long-file',
959
+ // long-function findings are emitted at 'low' severity but their
960
+ // FP rate is heavily project-shape dependent: linear handlers /
961
+ // composition-root components / state-machine bodies are legitimate
962
+ // at 80–120 LOC in some projects and sin in others. The project's
963
+ // own CLAUDE.md decides — and the only signal that surfaces that is
964
+ // the LLM with project conventions in scope.
965
+ 'long-function',
966
+ // Magic-numbers deterministic pass already cuts ~70% of FPs. The
967
+ // remainder is domain-thresholds, byte-counts, ASCII boundaries —
968
+ // every one a judgement call that the LLM can answer with a snippet.
969
+ // Volume stays low because the per-file cap is 5.
970
+ 'magic-numbers',
971
+ ]);
972
+ /**
973
+ * Decide whether a finding is borderline enough to warrant LLM confirmation.
974
+ *
975
+ * Always-confirm cases:
976
+ * - Layer 2 (normalized-names) — synonym/normalization map can produce false matches.
977
+ * - Layer 1 structural — anonymous type collisions are the largest FP source.
978
+ * - Layer 1 strict but the cluster spans ≥2 distinct names AND ≤3 fields. Small
979
+ * `{id, name}`-style shapes regularly collide across unrelated DTOs and need
980
+ * a semantic check (the smoke case Template/RegistryAuth/Document/Catalog).
981
+ */
982
+ function requiresLlmConfirmation(finding, symbolById) {
983
+ // Mutation findings always get the LLM intent check — even Tier 1
984
+ // strict matches are borderline by nature ("is this mutation intentional?").
985
+ if (finding.detectorId === 'mutation')
986
+ return true;
987
+ // Race-condition findings always get an LLM race-vs-safe verdict —
988
+ // Tier 1 cannot distinguish mutex / single-flight / scoped state from
989
+ // genuine races.
990
+ if (finding.detectorId === 'race-condition')
991
+ return true;
992
+ // shared-db-write findings always get an LLM cross-flow verdict —
993
+ // Tier 1 cannot distinguish single-owner / transaction-wrapped / init-
994
+ // only / idempotent writes from genuine cross-service races.
995
+ if (finding.detectorId === 'shared-db-write')
996
+ return true;
997
+ // api-race findings always get an LLM cross-flow verdict — Tier 1
998
+ // cannot distinguish test fixtures / retry wrappers / idempotent
999
+ // payloads / etag-locked writes from genuine HTTP races.
1000
+ if (finding.detectorId === 'api-race')
1001
+ return true;
1002
+ // Detectors with no cluster confirmer but a high FP rate. Routed to
1003
+ // the generic TriageConfirmer for a real/false verdict + reason.
1004
+ // Two-tier gate: `ALWAYS_TRIAGE_DETECTORS` triages every finding
1005
+ // (reachability + design-intent — high FP even at low tier);
1006
+ // remaining TRIAGE detectors stay capped at medium+ so we don't
1007
+ // burn LLM calls on deterministic noise.
1008
+ if (TRIAGE_DETECTORS.has(finding.detectorId)) {
1009
+ if (ALWAYS_TRIAGE_DETECTORS.has(finding.detectorId))
1010
+ return true;
1011
+ if (finding.severity !== 'low')
1012
+ return true;
1013
+ }
1014
+ if (finding.detectorId !== 'duplicate-type' && finding.detectorId !== 'duplicate-function')
1015
+ return false;
1016
+ if (finding.layer >= 2)
1017
+ return true;
1018
+ if (finding.confidence < 0.95)
1019
+ return true;
1020
+ const ids = finding.evidence
1021
+ .map((ev) => findSymbolIdForEvidence(symbolById, ev.file, ev.range.startLine))
1022
+ .filter((id) => Boolean(id));
1023
+ const symbols = ids
1024
+ .map((id) => symbolById.get(id))
1025
+ .filter((s) => Boolean(s));
1026
+ const distinctNames = new Set(symbols.map((s) => s.name)).size;
1027
+ const firstStruct = symbols[0]?.structure;
1028
+ const fieldCount = firstStruct && 'fields' in firstStruct ? firstStruct.fields?.length ?? 0 : 0;
1029
+ return distinctNames >= 2 && fieldCount > 0 && fieldCount <= 3;
1030
+ }
1031
+ function findSymbolIdForEvidence(symbolById, file, startLine) {
1032
+ for (const s of symbolById.values()) {
1033
+ if (s.file === file && s.range.startLine === startLine)
1034
+ return s.id;
1035
+ }
1036
+ return undefined;
1037
+ }
1038
+ //# sourceMappingURL=rothunter.js.map