scip-query 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (330) hide show
  1. package/IMPROVEMENTS.md +143 -0
  2. package/PLAN.md +320 -0
  3. package/README.md +1213 -0
  4. package/dist/chunk-2QZ23IBN.js +55 -0
  5. package/dist/chunk-2QZ23IBN.js.map +1 -0
  6. package/dist/chunk-36OMT7ZJ.js +144 -0
  7. package/dist/chunk-36OMT7ZJ.js.map +1 -0
  8. package/dist/chunk-3E2X7RIE.js +101 -0
  9. package/dist/chunk-3E2X7RIE.js.map +1 -0
  10. package/dist/chunk-3UOUTZQT.js +45 -0
  11. package/dist/chunk-3UOUTZQT.js.map +1 -0
  12. package/dist/chunk-3ZZJVBIO.js +88 -0
  13. package/dist/chunk-3ZZJVBIO.js.map +1 -0
  14. package/dist/chunk-4TYLS5XX.js +10 -0
  15. package/dist/chunk-4TYLS5XX.js.map +1 -0
  16. package/dist/chunk-5FGUEU7N.js +101 -0
  17. package/dist/chunk-5FGUEU7N.js.map +1 -0
  18. package/dist/chunk-5WTJAXY2.js +61 -0
  19. package/dist/chunk-5WTJAXY2.js.map +1 -0
  20. package/dist/chunk-6NBLIDF4.js +24 -0
  21. package/dist/chunk-6NBLIDF4.js.map +1 -0
  22. package/dist/chunk-6SXADWLW.js +43 -0
  23. package/dist/chunk-6SXADWLW.js.map +1 -0
  24. package/dist/chunk-6VJ6Q7IE.js +65 -0
  25. package/dist/chunk-6VJ6Q7IE.js.map +1 -0
  26. package/dist/chunk-7OZPA5OO.js +258 -0
  27. package/dist/chunk-7OZPA5OO.js.map +1 -0
  28. package/dist/chunk-BEPIEVLR.js +76 -0
  29. package/dist/chunk-BEPIEVLR.js.map +1 -0
  30. package/dist/chunk-BFSCMC22.js +42 -0
  31. package/dist/chunk-BFSCMC22.js.map +1 -0
  32. package/dist/chunk-BP2ATLK2.js +110 -0
  33. package/dist/chunk-BP2ATLK2.js.map +1 -0
  34. package/dist/chunk-CM454WL3.js +114 -0
  35. package/dist/chunk-CM454WL3.js.map +1 -0
  36. package/dist/chunk-DCKMSTJ4.js +74 -0
  37. package/dist/chunk-DCKMSTJ4.js.map +1 -0
  38. package/dist/chunk-DEZKCZXD.js +40 -0
  39. package/dist/chunk-DEZKCZXD.js.map +1 -0
  40. package/dist/chunk-DVWGWHFW.js +99 -0
  41. package/dist/chunk-DVWGWHFW.js.map +1 -0
  42. package/dist/chunk-EMDQWNYR.js +102 -0
  43. package/dist/chunk-EMDQWNYR.js.map +1 -0
  44. package/dist/chunk-FFSWWE5O.js +33 -0
  45. package/dist/chunk-FFSWWE5O.js.map +1 -0
  46. package/dist/chunk-FGXRVW7G.js +73 -0
  47. package/dist/chunk-FGXRVW7G.js.map +1 -0
  48. package/dist/chunk-FUHJCHS4.js +158 -0
  49. package/dist/chunk-FUHJCHS4.js.map +1 -0
  50. package/dist/chunk-GJFURBEW.js +64 -0
  51. package/dist/chunk-GJFURBEW.js.map +1 -0
  52. package/dist/chunk-GTILYBH6.js +102 -0
  53. package/dist/chunk-GTILYBH6.js.map +1 -0
  54. package/dist/chunk-JJP7KQND.js +1 -0
  55. package/dist/chunk-JJP7KQND.js.map +1 -0
  56. package/dist/chunk-JKP5GH6T.js +213 -0
  57. package/dist/chunk-JKP5GH6T.js.map +1 -0
  58. package/dist/chunk-KCBMVQL5.js +38 -0
  59. package/dist/chunk-KCBMVQL5.js.map +1 -0
  60. package/dist/chunk-KVSW5KYP.js +78 -0
  61. package/dist/chunk-KVSW5KYP.js.map +1 -0
  62. package/dist/chunk-LAWMH22O.js +172 -0
  63. package/dist/chunk-LAWMH22O.js.map +1 -0
  64. package/dist/chunk-LB7OS35Q.js +72 -0
  65. package/dist/chunk-LB7OS35Q.js.map +1 -0
  66. package/dist/chunk-LUSIFBXO.js +57 -0
  67. package/dist/chunk-LUSIFBXO.js.map +1 -0
  68. package/dist/chunk-MBVNHJVN.js +44 -0
  69. package/dist/chunk-MBVNHJVN.js.map +1 -0
  70. package/dist/chunk-MGNMHKX3.js +15 -0
  71. package/dist/chunk-MGNMHKX3.js.map +1 -0
  72. package/dist/chunk-N5KEREIA.js +41 -0
  73. package/dist/chunk-N5KEREIA.js.map +1 -0
  74. package/dist/chunk-NDSQYIWT.js +71 -0
  75. package/dist/chunk-NDSQYIWT.js.map +1 -0
  76. package/dist/chunk-NUZ4OMU3.js +28 -0
  77. package/dist/chunk-NUZ4OMU3.js.map +1 -0
  78. package/dist/chunk-QOV2R2WT.js +170 -0
  79. package/dist/chunk-QOV2R2WT.js.map +1 -0
  80. package/dist/chunk-SEFSL2GF.js +78 -0
  81. package/dist/chunk-SEFSL2GF.js.map +1 -0
  82. package/dist/chunk-T6ARFSBZ.js +103 -0
  83. package/dist/chunk-T6ARFSBZ.js.map +1 -0
  84. package/dist/chunk-TBP6BICL.js +46 -0
  85. package/dist/chunk-TBP6BICL.js.map +1 -0
  86. package/dist/chunk-TDNNOR6D.js +97 -0
  87. package/dist/chunk-TDNNOR6D.js.map +1 -0
  88. package/dist/chunk-TSPZOMHC.js +195 -0
  89. package/dist/chunk-TSPZOMHC.js.map +1 -0
  90. package/dist/chunk-UNTPVD36.js +55 -0
  91. package/dist/chunk-UNTPVD36.js.map +1 -0
  92. package/dist/chunk-VRUJH4BO.js +88 -0
  93. package/dist/chunk-VRUJH4BO.js.map +1 -0
  94. package/dist/chunk-VZ7AMAFL.js +76 -0
  95. package/dist/chunk-VZ7AMAFL.js.map +1 -0
  96. package/dist/chunk-XFXDXEUN.js +24 -0
  97. package/dist/chunk-XFXDXEUN.js.map +1 -0
  98. package/dist/chunk-YZAA4LYG.js +169 -0
  99. package/dist/chunk-YZAA4LYG.js.map +1 -0
  100. package/dist/chunk-Z73NYSBZ.js +92 -0
  101. package/dist/chunk-Z73NYSBZ.js.map +1 -0
  102. package/dist/chunk-ZJRYBOEE.js +125 -0
  103. package/dist/chunk-ZJRYBOEE.js.map +1 -0
  104. package/dist/cli.js +5798 -0
  105. package/dist/cli.js.map +1 -0
  106. package/dist/db-BxaevAyc.d.ts +683 -0
  107. package/dist/index.d.ts +254 -0
  108. package/dist/index.js +1271 -0
  109. package/dist/index.js.map +1 -0
  110. package/dist/postinstall.js +167 -0
  111. package/dist/postinstall.js.map +1 -0
  112. package/dist/queries/affected.d.ts +14 -0
  113. package/dist/queries/affected.js +9 -0
  114. package/dist/queries/affected.js.map +1 -0
  115. package/dist/queries/bottlenecks.d.ts +18 -0
  116. package/dist/queries/bottlenecks.js +8 -0
  117. package/dist/queries/bottlenecks.js.map +1 -0
  118. package/dist/queries/by-kind.d.ts +20 -0
  119. package/dist/queries/by-kind.js +10 -0
  120. package/dist/queries/by-kind.js.map +1 -0
  121. package/dist/queries/call-graph.d.ts +13 -0
  122. package/dist/queries/call-graph.js +9 -0
  123. package/dist/queries/call-graph.js.map +1 -0
  124. package/dist/queries/change-surface.d.ts +10 -0
  125. package/dist/queries/change-surface.js +9 -0
  126. package/dist/queries/change-surface.js.map +1 -0
  127. package/dist/queries/clean-signature.d.ts +9 -0
  128. package/dist/queries/clean-signature.js +7 -0
  129. package/dist/queries/clean-signature.js.map +1 -0
  130. package/dist/queries/code.d.ts +17 -0
  131. package/dist/queries/code.js +9 -0
  132. package/dist/queries/code.js.map +1 -0
  133. package/dist/queries/complexity-hotspots.d.ts +19 -0
  134. package/dist/queries/complexity-hotspots.js +9 -0
  135. package/dist/queries/complexity-hotspots.js.map +1 -0
  136. package/dist/queries/complexity.d.ts +13 -0
  137. package/dist/queries/complexity.js +9 -0
  138. package/dist/queries/complexity.js.map +1 -0
  139. package/dist/queries/convergence.d.ts +11 -0
  140. package/dist/queries/convergence.js +9 -0
  141. package/dist/queries/convergence.js.map +1 -0
  142. package/dist/queries/coupling.d.ts +17 -0
  143. package/dist/queries/coupling.js +9 -0
  144. package/dist/queries/coupling.js.map +1 -0
  145. package/dist/queries/cycles.d.ts +16 -0
  146. package/dist/queries/cycles.js +8 -0
  147. package/dist/queries/cycles.js.map +1 -0
  148. package/dist/queries/dataflow.d.ts +19 -0
  149. package/dist/queries/dataflow.js +9 -0
  150. package/dist/queries/dataflow.js.map +1 -0
  151. package/dist/queries/dead.d.ts +10 -0
  152. package/dist/queries/dead.js +9 -0
  153. package/dist/queries/dead.js.map +1 -0
  154. package/dist/queries/deep-chains.d.ts +16 -0
  155. package/dist/queries/deep-chains.js +8 -0
  156. package/dist/queries/deep-chains.js.map +1 -0
  157. package/dist/queries/deps.d.ts +9 -0
  158. package/dist/queries/deps.js +9 -0
  159. package/dist/queries/deps.js.map +1 -0
  160. package/dist/queries/diff-impact.d.ts +13 -0
  161. package/dist/queries/diff-impact.js +9 -0
  162. package/dist/queries/diff-impact.js.map +1 -0
  163. package/dist/queries/doc-coverage.d.ts +14 -0
  164. package/dist/queries/doc-coverage.js +8 -0
  165. package/dist/queries/doc-coverage.js.map +1 -0
  166. package/dist/queries/drift.d.ts +25 -0
  167. package/dist/queries/drift.js +8 -0
  168. package/dist/queries/drift.js.map +1 -0
  169. package/dist/queries/extract-candidates.d.ts +25 -0
  170. package/dist/queries/extract-candidates.js +9 -0
  171. package/dist/queries/extract-candidates.js.map +1 -0
  172. package/dist/queries/fan.d.ts +29 -0
  173. package/dist/queries/fan.js +14 -0
  174. package/dist/queries/fan.js.map +1 -0
  175. package/dist/queries/files.d.ts +6 -0
  176. package/dist/queries/files.js +7 -0
  177. package/dist/queries/files.js.map +1 -0
  178. package/dist/queries/health.d.ts +18 -0
  179. package/dist/queries/health.js +21 -0
  180. package/dist/queries/health.js.map +1 -0
  181. package/dist/queries/hierarchy.d.ts +13 -0
  182. package/dist/queries/hierarchy.js +8 -0
  183. package/dist/queries/hierarchy.js.map +1 -0
  184. package/dist/queries/hotspots.d.ts +13 -0
  185. package/dist/queries/hotspots.js +8 -0
  186. package/dist/queries/hotspots.js.map +1 -0
  187. package/dist/queries/imports.d.ts +19 -0
  188. package/dist/queries/imports.js +12 -0
  189. package/dist/queries/imports.js.map +1 -0
  190. package/dist/queries/index.d.ts +47 -0
  191. package/dist/queries/index.js +207 -0
  192. package/dist/queries/index.js.map +1 -0
  193. package/dist/queries/isolated.d.ts +14 -0
  194. package/dist/queries/isolated.js +9 -0
  195. package/dist/queries/isolated.js.map +1 -0
  196. package/dist/queries/members.d.ts +10 -0
  197. package/dist/queries/members.js +8 -0
  198. package/dist/queries/members.js.map +1 -0
  199. package/dist/queries/methods.d.ts +6 -0
  200. package/dist/queries/methods.js +8 -0
  201. package/dist/queries/methods.js.map +1 -0
  202. package/dist/queries/outline.d.ts +10 -0
  203. package/dist/queries/outline.js +8 -0
  204. package/dist/queries/outline.js.map +1 -0
  205. package/dist/queries/passthrough-candidates.d.ts +18 -0
  206. package/dist/queries/passthrough-candidates.js +9 -0
  207. package/dist/queries/passthrough-candidates.js.map +1 -0
  208. package/dist/queries/redundant-reexports.d.ts +22 -0
  209. package/dist/queries/redundant-reexports.js +8 -0
  210. package/dist/queries/redundant-reexports.js.map +1 -0
  211. package/dist/queries/refs.d.ts +6 -0
  212. package/dist/queries/refs.js +7 -0
  213. package/dist/queries/refs.js.map +1 -0
  214. package/dist/queries/similar-chains.d.ts +29 -0
  215. package/dist/queries/similar-chains.js +8 -0
  216. package/dist/queries/similar-chains.js.map +1 -0
  217. package/dist/queries/similar-files.d.ts +19 -0
  218. package/dist/queries/similar-files.js +8 -0
  219. package/dist/queries/similar-files.js.map +1 -0
  220. package/dist/queries/similar-signatures.d.ts +21 -0
  221. package/dist/queries/similar-signatures.js +8 -0
  222. package/dist/queries/similar-signatures.js.map +1 -0
  223. package/dist/queries/similar.d.ts +34 -0
  224. package/dist/queries/similar.js +11 -0
  225. package/dist/queries/similar.js.map +1 -0
  226. package/dist/queries/slice.d.ts +21 -0
  227. package/dist/queries/slice.js +9 -0
  228. package/dist/queries/slice.js.map +1 -0
  229. package/dist/queries/stale-abstractions.d.ts +18 -0
  230. package/dist/queries/stale-abstractions.js +9 -0
  231. package/dist/queries/stale-abstractions.js.map +1 -0
  232. package/dist/queries/stats.d.ts +6 -0
  233. package/dist/queries/stats.js +7 -0
  234. package/dist/queries/stats.js.map +1 -0
  235. package/dist/queries/surface.d.ts +7 -0
  236. package/dist/queries/surface.js +8 -0
  237. package/dist/queries/surface.js.map +1 -0
  238. package/dist/queries/symbols.d.ts +6 -0
  239. package/dist/queries/symbols.js +9 -0
  240. package/dist/queries/symbols.js.map +1 -0
  241. package/dist/queries/system.d.ts +7 -0
  242. package/dist/queries/system.js +9 -0
  243. package/dist/queries/system.js.map +1 -0
  244. package/dist/queries/test-coverage.d.ts +22 -0
  245. package/dist/queries/test-coverage.js +11 -0
  246. package/dist/queries/test-coverage.js.map +1 -0
  247. package/dist/queries/trace.d.ts +6 -0
  248. package/dist/queries/trace.js +8 -0
  249. package/dist/queries/trace.js.map +1 -0
  250. package/dist/queries/wrapper-candidates.d.ts +17 -0
  251. package/dist/queries/wrapper-candidates.js +9 -0
  252. package/dist/queries/wrapper-candidates.js.map +1 -0
  253. package/dist/reindex-worker.js +368 -0
  254. package/dist/reindex-worker.js.map +1 -0
  255. package/docs/AGENT_GUIDE.md +359 -0
  256. package/package.json +70 -0
  257. package/reports/debloat/2026-04-10-scip-query-self-audit.md +161 -0
  258. package/skills/concrete-plan/SKILL.md +318 -0
  259. package/skills/scip-debloat/SKILL.md +413 -0
  260. package/skills/scip-explore/SKILL.md +235 -0
  261. package/skills/scip-verify/SKILL.md +323 -0
  262. package/src/cli.ts +1480 -0
  263. package/src/config.ts +117 -0
  264. package/src/db.ts +127 -0
  265. package/src/gitignore-filter.ts +143 -0
  266. package/src/index.ts +11 -0
  267. package/src/postinstall.ts +8 -0
  268. package/src/queries/affected.ts +86 -0
  269. package/src/queries/bottlenecks.ts +67 -0
  270. package/src/queries/by-kind.ts +204 -0
  271. package/src/queries/call-graph.ts +66 -0
  272. package/src/queries/change-surface.ts +110 -0
  273. package/src/queries/clean-signature.ts +22 -0
  274. package/src/queries/code.ts +101 -0
  275. package/src/queries/complexity-hotspots.ts +119 -0
  276. package/src/queries/complexity.ts +152 -0
  277. package/src/queries/convergence.ts +82 -0
  278. package/src/queries/coupling.ts +99 -0
  279. package/src/queries/cycles.ts +78 -0
  280. package/src/queries/dataflow.ts +128 -0
  281. package/src/queries/dead.ts +122 -0
  282. package/src/queries/deep-chains.ts +59 -0
  283. package/src/queries/deps.ts +46 -0
  284. package/src/queries/diff-impact.ts +204 -0
  285. package/src/queries/doc-coverage.ts +86 -0
  286. package/src/queries/drift.ts +224 -0
  287. package/src/queries/extract-candidates.ts +167 -0
  288. package/src/queries/fan.ts +148 -0
  289. package/src/queries/files.ts +16 -0
  290. package/src/queries/health.ts +324 -0
  291. package/src/queries/hierarchy.ts +49 -0
  292. package/src/queries/hotspots.ts +53 -0
  293. package/src/queries/imports.ts +95 -0
  294. package/src/queries/index.ts +45 -0
  295. package/src/queries/isolated.ts +67 -0
  296. package/src/queries/members.ts +54 -0
  297. package/src/queries/methods.ts +27 -0
  298. package/src/queries/outline.ts +52 -0
  299. package/src/queries/passthrough-candidates.ts +94 -0
  300. package/src/queries/redundant-reexports.ts +170 -0
  301. package/src/queries/refs.ts +27 -0
  302. package/src/queries/similar-chains.ts +314 -0
  303. package/src/queries/similar-files.ts +140 -0
  304. package/src/queries/similar-signatures.ts +151 -0
  305. package/src/queries/similar.ts +305 -0
  306. package/src/queries/slice.ts +154 -0
  307. package/src/queries/stale-abstractions.ts +82 -0
  308. package/src/queries/stats.ts +22 -0
  309. package/src/queries/surface.ts +34 -0
  310. package/src/queries/symbols.ts +39 -0
  311. package/src/queries/system.ts +86 -0
  312. package/src/queries/test-coverage.ts +106 -0
  313. package/src/queries/trace.ts +55 -0
  314. package/src/queries/wrapper-candidates.ts +112 -0
  315. package/src/query-support.ts +226 -0
  316. package/src/reindex/detect.ts +58 -0
  317. package/src/reindex/index.ts +153 -0
  318. package/src/reindex/indexers.ts +220 -0
  319. package/src/reindex/install.ts +125 -0
  320. package/src/reindex-worker.ts +35 -0
  321. package/src/setup.ts +202 -0
  322. package/src/symbol-parser.ts +278 -0
  323. package/src/types.ts +654 -0
  324. package/src/watch.ts +274 -0
  325. package/tests/gitignore-filter.test.ts +48 -0
  326. package/tests/queries.test.ts +300 -0
  327. package/tests/symbol-parser.test.ts +157 -0
  328. package/tsconfig.json +20 -0
  329. package/tsup.config.ts +40 -0
  330. package/vitest.config.ts +7 -0
@@ -0,0 +1,167 @@
1
+ import type { ScipDatabase } from '../db.js';
2
+ import { getCalleeRowsForSymbol } from '../query-support.js';
3
+ import type { ExtractCandidate } from '../types.js';
4
+ import { shortenSymbol } from '../symbol-parser.js';
5
+
6
+ /**
7
+ * Find functions with natural extraction seams.
8
+ *
9
+ * A large function that references two distinct groups of symbols —
10
+ * where group A's symbols are never co-referenced with group B's —
11
+ * has a natural extraction boundary. The isolated cluster can likely
12
+ * be pulled into its own function.
13
+ *
14
+ * We detect this by:
15
+ * 1. Finding all callees of a function
16
+ * 2. Building a co-occurrence graph (which callees appear in the same chunk)
17
+ * 3. Finding connected components — disconnected components = extraction seams
18
+ * 4. Scoring each cluster by how isolated it is from the rest
19
+ */
20
+ export function extractCandidates(
21
+ db: ScipDatabase,
22
+ opts: { scope?: string; minLoc?: number; minCallees?: number; limit?: number } = {},
23
+ ): ExtractCandidate[] {
24
+ const { scope, minLoc = 10, minCallees = 6, limit = 20 } = opts;
25
+ const scopeFilter = scope ? `AND d.relative_path LIKE '%${scope}%'` : '';
26
+
27
+ // Find functions large enough to consider
28
+ const symbols = db.all<{
29
+ id: number;
30
+ symbol: string;
31
+ document_id: number;
32
+ start_line: number;
33
+ end_line: number;
34
+ relative_path: string;
35
+ }>(
36
+ `SELECT gs.id, gs.symbol, der.document_id, der.start_line, der.end_line, d.relative_path
37
+ FROM global_symbols gs
38
+ JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
39
+ JOIN documents d ON der.document_id = d.id
40
+ WHERE 1 = 1
41
+ ${db.pathExclusionsFor('d')}
42
+ ${db.symbolNoiseFor('gs')}
43
+ AND (der.end_line - der.start_line + 1) >= ?
44
+ ${scopeFilter}
45
+ ORDER BY (der.end_line - der.start_line + 1) DESC`,
46
+ minLoc,
47
+ );
48
+
49
+ const results: ExtractCandidate[] = [];
50
+
51
+ for (const sym of symbols) {
52
+ if (db.isIgnored(sym.relative_path)) continue;
53
+
54
+ // Skip pure type files — "callees" in a type file are just type references,
55
+ // not function calls. Splitting type files is a cosmetic choice, not an
56
+ // extraction opportunity.
57
+ const basename = sym.relative_path.split('/').pop() ?? '';
58
+ if (basename.includes('types')) continue;
59
+
60
+ // Get callees with their chunk locations (to build co-occurrence)
61
+ const calleeChunks = getCalleeRowsForSymbol(db, {
62
+ documentId: sym.document_id,
63
+ startLine: sym.start_line,
64
+ endLine: sym.end_line,
65
+ symbolId: sym.id,
66
+ });
67
+
68
+ // Collect unique callees
69
+ const calleeSet = new Set(calleeChunks.map((c) => c.symbol));
70
+ if (calleeSet.size < minCallees) continue;
71
+
72
+ // Build co-occurrence graph: two callees are connected if they
73
+ // appear in the same chunk (meaning they're used in proximity)
74
+ const cooccurrence = new Map<string, Set<string>>();
75
+ for (const callee of calleeSet) {
76
+ cooccurrence.set(callee, new Set());
77
+ }
78
+
79
+ // Group by chunk
80
+ const chunkToCallees = new Map<number, Set<string>>();
81
+ for (const cc of calleeChunks) {
82
+ if (!chunkToCallees.has(cc.chunkId)) chunkToCallees.set(cc.chunkId, new Set());
83
+ chunkToCallees.get(cc.chunkId)!.add(cc.symbol);
84
+ }
85
+
86
+ // Callees in the same chunk are co-occurring
87
+ for (const callees of chunkToCallees.values()) {
88
+ const arr = [...callees];
89
+ for (let i = 0; i < arr.length; i++) {
90
+ for (let j = i + 1; j < arr.length; j++) {
91
+ cooccurrence.get(arr[i]!)!.add(arr[j]!);
92
+ cooccurrence.get(arr[j]!)!.add(arr[i]!);
93
+ }
94
+ }
95
+ }
96
+
97
+ // Find connected components via BFS
98
+ const visited = new Set<string>();
99
+ const clusters: Set<string>[] = [];
100
+
101
+ for (const callee of calleeSet) {
102
+ if (visited.has(callee)) continue;
103
+ const cluster = new Set<string>();
104
+ const queue = [callee];
105
+ while (queue.length > 0) {
106
+ const current = queue.pop()!;
107
+ if (visited.has(current)) continue;
108
+ visited.add(current);
109
+ cluster.add(current);
110
+ for (const neighbor of cooccurrence.get(current) ?? []) {
111
+ if (!visited.has(neighbor)) queue.push(neighbor);
112
+ }
113
+ }
114
+ clusters.push(cluster);
115
+ }
116
+
117
+ // Only interesting if there are multiple clusters (= extraction seams exist)
118
+ if (clusters.length < 2) continue;
119
+
120
+ // Score each cluster by isolation:
121
+ // isolation = 1 - (edges to other clusters / total possible edges to other clusters)
122
+ const scoredClusters = clusters
123
+ .filter((c) => c.size >= 2) // single-callee clusters aren't interesting
124
+ .map((cluster) => {
125
+ const otherCallees = new Set<string>();
126
+ for (const c of clusters) {
127
+ if (c !== cluster) {
128
+ for (const s of c) otherCallees.add(s);
129
+ }
130
+ }
131
+
132
+ // Count cross-cluster edges
133
+ let crossEdges = 0;
134
+ for (const callee of cluster) {
135
+ for (const neighbor of cooccurrence.get(callee) ?? []) {
136
+ if (otherCallees.has(neighbor)) crossEdges++;
137
+ }
138
+ }
139
+
140
+ const maxCrossEdges = cluster.size * otherCallees.size;
141
+ const isolation = maxCrossEdges > 0 ? 1 - crossEdges / maxCrossEdges : 1;
142
+
143
+ return {
144
+ callees: [...cluster].map(shortenSymbol),
145
+ isolation,
146
+ };
147
+ })
148
+ .filter((c) => c.isolation > 0.5) // Only report well-isolated clusters
149
+ .sort((a, b) => b.isolation - a.isolation);
150
+
151
+ if (scoredClusters.length > 0) {
152
+ results.push({
153
+ symbol: sym.symbol,
154
+ shortName: shortenSymbol(sym.symbol),
155
+ relativePath: sym.relative_path,
156
+ startLine: sym.start_line,
157
+ endLine: sym.end_line,
158
+ loc: sym.end_line - sym.start_line + 1,
159
+ totalCallees: calleeSet.size,
160
+ clusters: scoredClusters,
161
+ });
162
+ }
163
+ }
164
+
165
+ results.sort((a, b) => b.clusters.length - a.clusters.length || b.loc - a.loc);
166
+ return results.slice(0, limit);
167
+ }
@@ -0,0 +1,148 @@
1
+ import type { ScipDatabase } from '../db.js';
2
+ import type { FanResult } from '../types.js';
3
+ import { shortenSymbol } from '../symbol-parser.js';
4
+
5
+ /**
6
+ * Fan-in: how many distinct files reference this symbol.
7
+ * High fan-in = widely depended upon = high blast radius for changes.
8
+ */
9
+ export function fanIn(
10
+ db: ScipDatabase,
11
+ symbolPattern: string,
12
+ ): FanResult[] {
13
+ const rows = db.all<{
14
+ symbol: string;
15
+ file_count: number;
16
+ }>(
17
+ `SELECT gs.symbol, COUNT(DISTINCT c.document_id) AS file_count
18
+ FROM mentions m
19
+ JOIN chunks c ON m.chunk_id = c.id
20
+ JOIN global_symbols gs ON m.symbol_id = gs.id
21
+ WHERE gs.symbol LIKE ?
22
+ AND m.role = 0
23
+ GROUP BY gs.id
24
+ ORDER BY file_count DESC`,
25
+ `%${symbolPattern}%`,
26
+ );
27
+
28
+ return rows.map((r) => ({
29
+ name: shortenSymbol(r.symbol),
30
+ count: r.file_count,
31
+ }));
32
+ }
33
+
34
+ /**
35
+ * Fan-out: how many external symbols does this file reference.
36
+ * High fan-out = depends on many things = fragile to upstream changes.
37
+ */
38
+ export function fanOut(
39
+ db: ScipDatabase,
40
+ filePattern: string,
41
+ ): FanResult[] {
42
+ const rows = db.all<{
43
+ relative_path: string;
44
+ symbol_count: number;
45
+ }>(
46
+ `SELECT d.relative_path, COUNT(DISTINCT gs.id) AS symbol_count
47
+ FROM mentions m
48
+ JOIN chunks c ON m.chunk_id = c.id
49
+ JOIN documents d ON c.document_id = d.id
50
+ JOIN global_symbols gs ON m.symbol_id = gs.id
51
+ JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
52
+ JOIN documents def_d ON der.document_id = def_d.id
53
+ WHERE d.relative_path LIKE ?
54
+ AND m.role = 0
55
+ AND def_d.id != d.id
56
+ GROUP BY d.id
57
+ ORDER BY symbol_count DESC`,
58
+ `%${filePattern}%`,
59
+ );
60
+
61
+ return rows
62
+ .filter((r) => !db.isIgnored(r.relative_path))
63
+ .map((r) => ({
64
+ name: r.relative_path,
65
+ count: r.symbol_count,
66
+ }));
67
+ }
68
+
69
+ /**
70
+ * Top fan-in across the whole codebase — the most depended-on symbols.
71
+ */
72
+ export function topFanIn(
73
+ db: ScipDatabase,
74
+ opts: { limit?: number; scope?: string } = {},
75
+ ): FanResult[] {
76
+ const { limit = 30, scope } = opts;
77
+ const scopeFilter = scope
78
+ ? `AND def_d.relative_path LIKE '%${scope}%'`
79
+ : '';
80
+
81
+ const rows = db.all<{
82
+ symbol: string;
83
+ file_count: number;
84
+ }>(
85
+ `SELECT gs.symbol, COUNT(DISTINCT c.document_id) AS file_count
86
+ FROM mentions m
87
+ JOIN chunks c ON m.chunk_id = c.id
88
+ JOIN global_symbols gs ON m.symbol_id = gs.id
89
+ JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
90
+ JOIN documents def_d ON der.document_id = def_d.id
91
+ WHERE m.role = 0
92
+ ${db.pathExclusionsFor('def_d')}
93
+ ${db.symbolNoiseFor('gs')}
94
+ ${scopeFilter}
95
+ GROUP BY gs.id
96
+ HAVING file_count > 1
97
+ ORDER BY file_count DESC
98
+ LIMIT ?`,
99
+ limit,
100
+ );
101
+
102
+ return rows.map((r) => ({
103
+ name: shortenSymbol(r.symbol),
104
+ count: r.file_count,
105
+ }));
106
+ }
107
+
108
+ /**
109
+ * Top fan-out across the whole codebase — files that depend on the most external symbols.
110
+ */
111
+ export function topFanOut(
112
+ db: ScipDatabase,
113
+ opts: { limit?: number; scope?: string } = {},
114
+ ): FanResult[] {
115
+ const { limit = 30, scope } = opts;
116
+ const scopeFilter = scope
117
+ ? `AND d.relative_path LIKE '%${scope}%'`
118
+ : '';
119
+
120
+ const rows = db.all<{
121
+ relative_path: string;
122
+ symbol_count: number;
123
+ }>(
124
+ `SELECT d.relative_path, COUNT(DISTINCT gs.id) AS symbol_count
125
+ FROM mentions m
126
+ JOIN chunks c ON m.chunk_id = c.id
127
+ JOIN documents d ON c.document_id = d.id
128
+ JOIN global_symbols gs ON m.symbol_id = gs.id
129
+ JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
130
+ JOIN documents def_d ON der.document_id = def_d.id
131
+ WHERE m.role = 0
132
+ AND def_d.id != d.id
133
+ ${db.pathExclusionsFor('d')}
134
+ ${db.symbolNoiseFor('gs')}
135
+ ${scopeFilter}
136
+ GROUP BY d.id
137
+ ORDER BY symbol_count DESC
138
+ LIMIT ?`,
139
+ limit,
140
+ );
141
+
142
+ return rows
143
+ .filter((r) => !db.isIgnored(r.relative_path))
144
+ .map((r) => ({
145
+ name: r.relative_path,
146
+ count: r.symbol_count,
147
+ }));
148
+ }
@@ -0,0 +1,16 @@
1
+ import type { ScipDatabase } from '../db.js';
2
+ import type { FileResult } from '../types.js';
3
+
4
+ export function files(db: ScipDatabase, pattern: string): FileResult[] {
5
+ const rows = db.all<{ relative_path: string }>(
6
+ `SELECT relative_path FROM documents
7
+ WHERE relative_path LIKE ?
8
+ ORDER BY relative_path`,
9
+ `%${pattern}%`,
10
+ );
11
+
12
+ // Apply gitignore filtering
13
+ return rows
14
+ .filter((r) => !db.isIgnored(r.relative_path))
15
+ .map((r) => ({ relativePath: r.relative_path }));
16
+ }
@@ -0,0 +1,324 @@
1
+ import type { ScipDatabase } from '../db.js';
2
+ import { dead } from './dead.js';
3
+ import { isolated } from './isolated.js';
4
+ import { cycles } from './cycles.js';
5
+ import { similarAll } from './similar.js';
6
+ import { extractCandidates } from './extract-candidates.js';
7
+ import { wrapperCandidates } from './wrapper-candidates.js';
8
+ import { passthroughCandidates } from './passthrough-candidates.js';
9
+ import { staleAbstractions } from './stale-abstractions.js';
10
+ import { drift } from './drift.js';
11
+ import { complexityHotspots } from './complexity-hotspots.js';
12
+ import { testCoverageSummary } from './test-coverage.js';
13
+ import { stats } from './stats.js';
14
+ import type { HealthAction, HealthReport } from '../types.js';
15
+
16
+ /**
17
+ * Single composite health report that runs all de-bloat analyses
18
+ * and produces a prioritized action list.
19
+ *
20
+ * The scoring formula accounts for common false positives:
21
+ * - Entry points (CLI, workers, barrels) appearing as "dead code"
22
+ * - Typed result interfaces with 1 consumer (normal for APIs)
23
+ * - Consistent import patterns across sibling modules (not duplication)
24
+ * - Barrel and orchestrator files deviating from sibling patterns (expected)
25
+ */
26
+ export function health(
27
+ db: ScipDatabase,
28
+ opts: { scope?: string } = {},
29
+ ): HealthReport {
30
+ const { scope } = opts;
31
+
32
+ // Run all analyses
33
+ const s = stats(db);
34
+ const deadResult = dead(db, { scope, minLoc: 3, skipBarrels: false });
35
+ const isolatedResult = isolated(db, { scope, minLoc: 3 });
36
+ const cycleResult = cycles(db, { scope });
37
+ const similarResult = similarAll(db, { scope, minSimilarity: 0.6, limit: 50, minCallees: 4 });
38
+ const extractResult = extractCandidates(db, { scope, minLoc: 15, minCallees: 5, limit: 50 });
39
+ const wrapperResult = wrapperCandidates(db, { scope, maxLoc: 15, limit: 50 });
40
+ const passthroughResult = passthroughCandidates(db, { scope, maxLoc: 15, limit: 50 });
41
+ const staleResult = staleAbstractions(db, { scope, minLoc: 3, limit: 50 });
42
+ const driftResult = drift(db, { scope });
43
+ const complexResult = complexityHotspots(db, { scope, minLoc: 10, limit: 10 });
44
+ const testResult = testCoverageSummary(db, { scope, minLoc: 3 });
45
+
46
+ const isolatedLoc = isolatedResult.reduce((sum, r) => sum + r.loc, 0);
47
+
48
+ // ── False-positive filtering ─────────────────────────────
49
+
50
+ // Entry points and barrels appear as dead/isolated because nothing imports them.
51
+ // Filter them out of the scoring (but still report them with a note).
52
+ const entryPointPatterns = ['/index.ts', '/index.js', 'cli.ts', 'worker.ts', 'postinstall.ts', '/mod.rs', '__init__.py', 'main.ts', 'main.rs', 'main.go', 'main.py'];
53
+ const isEntryPoint = (path: string) => entryPointPatterns.some((p) => path.endsWith(p));
54
+
55
+ // Dead code: only count truly dead symbols (zero refs anywhere),
56
+ // excluding entry points AND file-internal helpers (which are fine).
57
+ const trueDeadSymbols = deadResult.symbols.filter(
58
+ (s) => !isEntryPoint(s.relativePath) && s.kind === 'dead-code',
59
+ );
60
+ const trueDeadCount = trueDeadSymbols.length;
61
+ const trueDeadLoc = trueDeadSymbols.reduce((sum, s) => sum + s.loc, 0);
62
+ const fileInternalCount = deadResult.symbols.filter(
63
+ (s) => !isEntryPoint(s.relativePath) && s.kind === 'file-internal',
64
+ ).length;
65
+
66
+ // Isolated: same entry-point filtering
67
+ const trueIsolatedCount = isolatedResult.filter(
68
+ (s) => !isEntryPoint(s.relativePath),
69
+ ).length;
70
+
71
+ // Stale abstractions: the command filters out types.ts single-consumer types.
72
+ // Also filter out 0-consumer types in files that export functions — these are
73
+ // likely parameter/return types consumed through function signatures, which
74
+ // the SCIP index can't track as direct mentions.
75
+ const filesWithFunctions = new Set(
76
+ db.all<{ relative_path: string }>(
77
+ `SELECT DISTINCT d.relative_path
78
+ FROM global_symbols gs
79
+ JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
80
+ JOIN documents d ON der.document_id = d.id
81
+ WHERE gs.symbol LIKE '%().'
82
+ ${db.pathExclusionsFor('d')}`,
83
+ ).map((r) => r.relative_path),
84
+ );
85
+ const trueStaleCount = staleResult.filter((s) => {
86
+ // 0-consumer types in files with functions are likely param/return types
87
+ if (s.consumers === 0 && filesWithFunctions.has(s.file)) return false;
88
+ return true;
89
+ }).length;
90
+
91
+ // Drift: now uses usage-based detection (unused imports, layer violations, pattern deviations)
92
+ // The drift command already filters structural roles internally.
93
+ const trueDriftCount = driftResult.results.length;
94
+
95
+ // Similar pairs: the similar command now uses TF-IDF weighted cosine
96
+ // similarity which automatically discounts infrastructure callees.
97
+ // The sharedCallees list only contains significant (above-median IDF) callees.
98
+ // We can trust the count directly.
99
+ const trueSimilarCount = similarResult.length;
100
+
101
+ // ── Build prioritized action list ────────────────────────
102
+
103
+ const actions: HealthAction[] = [];
104
+
105
+ if (trueDeadCount > 0) {
106
+ actions.push({
107
+ category: 'Dead code',
108
+ description: `${trueDeadCount} symbols with zero references anywhere — safe to delete`,
109
+ effort: 'low',
110
+ impact: 'high',
111
+ count: trueDeadCount,
112
+ locRecoverable: trueDeadLoc,
113
+ });
114
+ }
115
+
116
+ if (testResult.percent < 50) {
117
+ actions.push({
118
+ category: 'Test coverage',
119
+ description: `${testResult.percent}% of symbols referenced by tests (${testResult.uncovered} uncovered)`,
120
+ effort: 'high',
121
+ impact: 'high',
122
+ count: testResult.uncovered,
123
+ locRecoverable: 0,
124
+ });
125
+ }
126
+
127
+ if (trueIsolatedCount > 0) {
128
+ actions.push({
129
+ category: 'Isolated symbols',
130
+ description: `${trueIsolatedCount} symbols completely disconnected from the codebase graph`,
131
+ effort: 'low',
132
+ impact: 'medium',
133
+ count: trueIsolatedCount,
134
+ locRecoverable: isolatedResult
135
+ .filter((s) => !isEntryPoint(s.relativePath))
136
+ .reduce((sum, s) => sum + s.loc, 0),
137
+ });
138
+ }
139
+
140
+ if (cycleResult.length > 0) {
141
+ actions.push({
142
+ category: 'Circular dependencies',
143
+ description: `${cycleResult.length} cycle(s) — break with dependency inversion or module restructuring`,
144
+ effort: 'medium',
145
+ impact: 'high',
146
+ count: cycleResult.length,
147
+ locRecoverable: 0,
148
+ });
149
+ }
150
+
151
+ if (trueSimilarCount > 0) {
152
+ actions.push({
153
+ category: 'Similar functions',
154
+ description: `${trueSimilarCount} pairs with real logic overlap (beyond shared imports) — consolidation candidates`,
155
+ effort: 'medium',
156
+ impact: 'medium',
157
+ count: trueSimilarCount,
158
+ locRecoverable: 0,
159
+ });
160
+ }
161
+
162
+ if (extractResult.length > 0) {
163
+ actions.push({
164
+ category: 'Extraction candidates',
165
+ description: `${extractResult.length} large functions with isolated callee clusters — extract method opportunities`,
166
+ effort: 'medium',
167
+ impact: 'medium',
168
+ count: extractResult.length,
169
+ locRecoverable: 0,
170
+ });
171
+ }
172
+
173
+ if (wrapperResult.length > 0) {
174
+ actions.push({
175
+ category: 'Wrapper functions',
176
+ description: `${wrapperResult.length} single-consumer symbols that could be inlined`,
177
+ effort: 'low',
178
+ impact: 'low',
179
+ count: wrapperResult.length,
180
+ locRecoverable: wrapperResult.reduce((sum, r) => sum + r.loc, 0),
181
+ });
182
+ }
183
+
184
+ if (passthroughResult.length > 0) {
185
+ actions.push({
186
+ category: 'Passthrough functions',
187
+ description: `${passthroughResult.length} functions that just forward to one callee — unnecessary indirection`,
188
+ effort: 'low',
189
+ impact: 'low',
190
+ count: passthroughResult.length,
191
+ locRecoverable: passthroughResult.reduce((sum, r) => sum + r.loc, 0),
192
+ });
193
+ }
194
+
195
+ if (trueStaleCount > 0) {
196
+ // Count from the filtered set, not the raw result
197
+ const trueStaleSymbols = staleResult.filter((s) => {
198
+ if (s.consumers === 0 && filesWithFunctions.has(s.file)) return false;
199
+ return true;
200
+ });
201
+ const unused = trueStaleSymbols.filter((s) => s.consumers === 0).length;
202
+ const singleUse = trueStaleCount - unused;
203
+ const parts: string[] = [];
204
+ if (unused > 0) parts.push(`${unused} unused`);
205
+ if (singleUse > 0) parts.push(`${singleUse} single-consumer (not in types file)`);
206
+ actions.push({
207
+ category: 'Stale abstractions',
208
+ description: `${parts.join(', ')} — premature abstraction`,
209
+ effort: 'low',
210
+ impact: 'medium',
211
+ count: trueStaleCount,
212
+ locRecoverable: staleResult
213
+ .filter((s) => s.consumers === 0 || !s.file.includes('types'))
214
+ .reduce((sum, r) => sum + r.loc, 0),
215
+ });
216
+ }
217
+
218
+ if (trueDriftCount > 0) {
219
+ const parts: string[] = [];
220
+ if (driftResult.unusedImports > 0) parts.push(`${driftResult.unusedImports} unused imports`);
221
+ if (driftResult.layerViolations > 0) parts.push(`${driftResult.layerViolations} layer violations`);
222
+ if (driftResult.patternDeviations > 0) parts.push(`${driftResult.patternDeviations} unique deps`);
223
+ actions.push({
224
+ category: 'Structural drift',
225
+ description: parts.join(', '),
226
+ effort: driftResult.layerViolations > 0 ? 'medium' : 'low',
227
+ impact: driftResult.layerViolations > 0 ? 'medium' : 'low',
228
+ count: trueDriftCount,
229
+ locRecoverable: 0,
230
+ });
231
+ }
232
+
233
+ // Sort: high impact + low effort first
234
+ const impactWeight = { high: 3, medium: 2, low: 1 };
235
+ const effortWeight = { low: 3, medium: 2, high: 1 };
236
+ actions.sort((a, b) => {
237
+ const scoreA = impactWeight[a.impact] * effortWeight[a.effort];
238
+ const scoreB = impactWeight[b.impact] * effortWeight[b.effort];
239
+ return scoreB - scoreA;
240
+ });
241
+
242
+ // ── Compute health score (0-100) ─────────────────────────
243
+ //
244
+ // Uses filtered counts (false positives removed).
245
+ // Deductions scale with codebase size so a 10-file project
246
+ // and a 1000-file project aren't penalized the same way.
247
+ const fileCount = Math.max(s.documents, 1);
248
+ const symbolCount = Math.max(s.symbols, 1);
249
+
250
+ let score = 100;
251
+
252
+ // Dead code: deduct based on % of symbols that are dead, not raw count
253
+ const deadPercent = trueDeadCount / symbolCount;
254
+ score -= Math.min(20, Math.round(deadPercent * 200));
255
+
256
+ // Isolated: same percentage-based
257
+ const isolatedPercent = trueIsolatedCount / symbolCount;
258
+ score -= Math.min(10, Math.round(isolatedPercent * 200));
259
+
260
+ // Cycles: these are always bad, flat penalty
261
+ score -= Math.min(15, cycleResult.length * 5);
262
+
263
+ // Similar pairs: only count true logic overlap, not boilerplate
264
+ score -= Math.min(10, trueSimilarCount * 2);
265
+
266
+ // Extract candidates: mild penalty
267
+ score -= Math.min(5, extractResult.length * 2);
268
+
269
+ // Wrappers: mild
270
+ score -= Math.min(3, wrapperResult.length);
271
+
272
+ // Passthroughs: mild
273
+ score -= Math.min(3, passthroughResult.length);
274
+
275
+ // Stale abstractions: percentage-based with filtered count
276
+ const stalePercent = trueStaleCount / Math.max(symbolCount * 0.1, 1);
277
+ score -= Math.min(8, Math.round(stalePercent * 10));
278
+
279
+ // Drift: percentage of files that deviate
280
+ const driftPercent = trueDriftCount / fileCount;
281
+ score -= Math.min(5, Math.round(driftPercent * 50));
282
+
283
+ // Complexity: only penalize extreme outliers
284
+ const extremeComplexity = complexResult.filter((r) => r.score > 50).length;
285
+ score -= Math.min(5, extremeComplexity * 2);
286
+
287
+ // Test coverage: significant penalty for low coverage
288
+ // 0% = -15, 25% = -11, 50% = -7, 75% = -4, 100% = 0
289
+ const coverageDeduction = Math.round(15 * (1 - testResult.percent / 100));
290
+ score -= coverageDeduction;
291
+
292
+ score = Math.max(0, Math.min(100, score));
293
+
294
+ return {
295
+ score,
296
+ overview: {
297
+ documents: s.documents,
298
+ symbols: s.symbols,
299
+ indexSizeBytes: s.indexSizeBytes,
300
+ },
301
+ findings: {
302
+ deadSymbols: trueDeadCount,
303
+ deadLoc: trueDeadLoc,
304
+ isolatedSymbols: trueIsolatedCount,
305
+ isolatedLoc: isolatedResult
306
+ .filter((s) => !isEntryPoint(s.relativePath))
307
+ .reduce((sum, s) => sum + s.loc, 0),
308
+ cycles: cycleResult.length,
309
+ similarPairs: trueSimilarCount,
310
+ extractionCandidates: extractResult.length,
311
+ wrappers: wrapperResult.length,
312
+ passthroughs: passthroughResult.length,
313
+ staleTypes: trueStaleCount,
314
+ driftedFiles: trueDriftCount,
315
+ complexityHotspotCount: complexResult.length,
316
+ testCoveragePercent: testResult.percent,
317
+ },
318
+ actions,
319
+ topComplexity: complexResult.slice(0, 5).map((r) => ({
320
+ symbol: r.shortName,
321
+ score: r.score,
322
+ })),
323
+ };
324
+ }