scip-query 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (330) hide show
  1. package/IMPROVEMENTS.md +143 -0
  2. package/PLAN.md +320 -0
  3. package/README.md +1213 -0
  4. package/dist/chunk-2QZ23IBN.js +55 -0
  5. package/dist/chunk-2QZ23IBN.js.map +1 -0
  6. package/dist/chunk-36OMT7ZJ.js +144 -0
  7. package/dist/chunk-36OMT7ZJ.js.map +1 -0
  8. package/dist/chunk-3E2X7RIE.js +101 -0
  9. package/dist/chunk-3E2X7RIE.js.map +1 -0
  10. package/dist/chunk-3UOUTZQT.js +45 -0
  11. package/dist/chunk-3UOUTZQT.js.map +1 -0
  12. package/dist/chunk-3ZZJVBIO.js +88 -0
  13. package/dist/chunk-3ZZJVBIO.js.map +1 -0
  14. package/dist/chunk-4TYLS5XX.js +10 -0
  15. package/dist/chunk-4TYLS5XX.js.map +1 -0
  16. package/dist/chunk-5FGUEU7N.js +101 -0
  17. package/dist/chunk-5FGUEU7N.js.map +1 -0
  18. package/dist/chunk-5WTJAXY2.js +61 -0
  19. package/dist/chunk-5WTJAXY2.js.map +1 -0
  20. package/dist/chunk-6NBLIDF4.js +24 -0
  21. package/dist/chunk-6NBLIDF4.js.map +1 -0
  22. package/dist/chunk-6SXADWLW.js +43 -0
  23. package/dist/chunk-6SXADWLW.js.map +1 -0
  24. package/dist/chunk-6VJ6Q7IE.js +65 -0
  25. package/dist/chunk-6VJ6Q7IE.js.map +1 -0
  26. package/dist/chunk-7OZPA5OO.js +258 -0
  27. package/dist/chunk-7OZPA5OO.js.map +1 -0
  28. package/dist/chunk-BEPIEVLR.js +76 -0
  29. package/dist/chunk-BEPIEVLR.js.map +1 -0
  30. package/dist/chunk-BFSCMC22.js +42 -0
  31. package/dist/chunk-BFSCMC22.js.map +1 -0
  32. package/dist/chunk-BP2ATLK2.js +110 -0
  33. package/dist/chunk-BP2ATLK2.js.map +1 -0
  34. package/dist/chunk-CM454WL3.js +114 -0
  35. package/dist/chunk-CM454WL3.js.map +1 -0
  36. package/dist/chunk-DCKMSTJ4.js +74 -0
  37. package/dist/chunk-DCKMSTJ4.js.map +1 -0
  38. package/dist/chunk-DEZKCZXD.js +40 -0
  39. package/dist/chunk-DEZKCZXD.js.map +1 -0
  40. package/dist/chunk-DVWGWHFW.js +99 -0
  41. package/dist/chunk-DVWGWHFW.js.map +1 -0
  42. package/dist/chunk-EMDQWNYR.js +102 -0
  43. package/dist/chunk-EMDQWNYR.js.map +1 -0
  44. package/dist/chunk-FFSWWE5O.js +33 -0
  45. package/dist/chunk-FFSWWE5O.js.map +1 -0
  46. package/dist/chunk-FGXRVW7G.js +73 -0
  47. package/dist/chunk-FGXRVW7G.js.map +1 -0
  48. package/dist/chunk-FUHJCHS4.js +158 -0
  49. package/dist/chunk-FUHJCHS4.js.map +1 -0
  50. package/dist/chunk-GJFURBEW.js +64 -0
  51. package/dist/chunk-GJFURBEW.js.map +1 -0
  52. package/dist/chunk-GTILYBH6.js +102 -0
  53. package/dist/chunk-GTILYBH6.js.map +1 -0
  54. package/dist/chunk-JJP7KQND.js +1 -0
  55. package/dist/chunk-JJP7KQND.js.map +1 -0
  56. package/dist/chunk-JKP5GH6T.js +213 -0
  57. package/dist/chunk-JKP5GH6T.js.map +1 -0
  58. package/dist/chunk-KCBMVQL5.js +38 -0
  59. package/dist/chunk-KCBMVQL5.js.map +1 -0
  60. package/dist/chunk-KVSW5KYP.js +78 -0
  61. package/dist/chunk-KVSW5KYP.js.map +1 -0
  62. package/dist/chunk-LAWMH22O.js +172 -0
  63. package/dist/chunk-LAWMH22O.js.map +1 -0
  64. package/dist/chunk-LB7OS35Q.js +72 -0
  65. package/dist/chunk-LB7OS35Q.js.map +1 -0
  66. package/dist/chunk-LUSIFBXO.js +57 -0
  67. package/dist/chunk-LUSIFBXO.js.map +1 -0
  68. package/dist/chunk-MBVNHJVN.js +44 -0
  69. package/dist/chunk-MBVNHJVN.js.map +1 -0
  70. package/dist/chunk-MGNMHKX3.js +15 -0
  71. package/dist/chunk-MGNMHKX3.js.map +1 -0
  72. package/dist/chunk-N5KEREIA.js +41 -0
  73. package/dist/chunk-N5KEREIA.js.map +1 -0
  74. package/dist/chunk-NDSQYIWT.js +71 -0
  75. package/dist/chunk-NDSQYIWT.js.map +1 -0
  76. package/dist/chunk-NUZ4OMU3.js +28 -0
  77. package/dist/chunk-NUZ4OMU3.js.map +1 -0
  78. package/dist/chunk-QOV2R2WT.js +170 -0
  79. package/dist/chunk-QOV2R2WT.js.map +1 -0
  80. package/dist/chunk-SEFSL2GF.js +78 -0
  81. package/dist/chunk-SEFSL2GF.js.map +1 -0
  82. package/dist/chunk-T6ARFSBZ.js +103 -0
  83. package/dist/chunk-T6ARFSBZ.js.map +1 -0
  84. package/dist/chunk-TBP6BICL.js +46 -0
  85. package/dist/chunk-TBP6BICL.js.map +1 -0
  86. package/dist/chunk-TDNNOR6D.js +97 -0
  87. package/dist/chunk-TDNNOR6D.js.map +1 -0
  88. package/dist/chunk-TSPZOMHC.js +195 -0
  89. package/dist/chunk-TSPZOMHC.js.map +1 -0
  90. package/dist/chunk-UNTPVD36.js +55 -0
  91. package/dist/chunk-UNTPVD36.js.map +1 -0
  92. package/dist/chunk-VRUJH4BO.js +88 -0
  93. package/dist/chunk-VRUJH4BO.js.map +1 -0
  94. package/dist/chunk-VZ7AMAFL.js +76 -0
  95. package/dist/chunk-VZ7AMAFL.js.map +1 -0
  96. package/dist/chunk-XFXDXEUN.js +24 -0
  97. package/dist/chunk-XFXDXEUN.js.map +1 -0
  98. package/dist/chunk-YZAA4LYG.js +169 -0
  99. package/dist/chunk-YZAA4LYG.js.map +1 -0
  100. package/dist/chunk-Z73NYSBZ.js +92 -0
  101. package/dist/chunk-Z73NYSBZ.js.map +1 -0
  102. package/dist/chunk-ZJRYBOEE.js +125 -0
  103. package/dist/chunk-ZJRYBOEE.js.map +1 -0
  104. package/dist/cli.js +5798 -0
  105. package/dist/cli.js.map +1 -0
  106. package/dist/db-BxaevAyc.d.ts +683 -0
  107. package/dist/index.d.ts +254 -0
  108. package/dist/index.js +1271 -0
  109. package/dist/index.js.map +1 -0
  110. package/dist/postinstall.js +167 -0
  111. package/dist/postinstall.js.map +1 -0
  112. package/dist/queries/affected.d.ts +14 -0
  113. package/dist/queries/affected.js +9 -0
  114. package/dist/queries/affected.js.map +1 -0
  115. package/dist/queries/bottlenecks.d.ts +18 -0
  116. package/dist/queries/bottlenecks.js +8 -0
  117. package/dist/queries/bottlenecks.js.map +1 -0
  118. package/dist/queries/by-kind.d.ts +20 -0
  119. package/dist/queries/by-kind.js +10 -0
  120. package/dist/queries/by-kind.js.map +1 -0
  121. package/dist/queries/call-graph.d.ts +13 -0
  122. package/dist/queries/call-graph.js +9 -0
  123. package/dist/queries/call-graph.js.map +1 -0
  124. package/dist/queries/change-surface.d.ts +10 -0
  125. package/dist/queries/change-surface.js +9 -0
  126. package/dist/queries/change-surface.js.map +1 -0
  127. package/dist/queries/clean-signature.d.ts +9 -0
  128. package/dist/queries/clean-signature.js +7 -0
  129. package/dist/queries/clean-signature.js.map +1 -0
  130. package/dist/queries/code.d.ts +17 -0
  131. package/dist/queries/code.js +9 -0
  132. package/dist/queries/code.js.map +1 -0
  133. package/dist/queries/complexity-hotspots.d.ts +19 -0
  134. package/dist/queries/complexity-hotspots.js +9 -0
  135. package/dist/queries/complexity-hotspots.js.map +1 -0
  136. package/dist/queries/complexity.d.ts +13 -0
  137. package/dist/queries/complexity.js +9 -0
  138. package/dist/queries/complexity.js.map +1 -0
  139. package/dist/queries/convergence.d.ts +11 -0
  140. package/dist/queries/convergence.js +9 -0
  141. package/dist/queries/convergence.js.map +1 -0
  142. package/dist/queries/coupling.d.ts +17 -0
  143. package/dist/queries/coupling.js +9 -0
  144. package/dist/queries/coupling.js.map +1 -0
  145. package/dist/queries/cycles.d.ts +16 -0
  146. package/dist/queries/cycles.js +8 -0
  147. package/dist/queries/cycles.js.map +1 -0
  148. package/dist/queries/dataflow.d.ts +19 -0
  149. package/dist/queries/dataflow.js +9 -0
  150. package/dist/queries/dataflow.js.map +1 -0
  151. package/dist/queries/dead.d.ts +10 -0
  152. package/dist/queries/dead.js +9 -0
  153. package/dist/queries/dead.js.map +1 -0
  154. package/dist/queries/deep-chains.d.ts +16 -0
  155. package/dist/queries/deep-chains.js +8 -0
  156. package/dist/queries/deep-chains.js.map +1 -0
  157. package/dist/queries/deps.d.ts +9 -0
  158. package/dist/queries/deps.js +9 -0
  159. package/dist/queries/deps.js.map +1 -0
  160. package/dist/queries/diff-impact.d.ts +13 -0
  161. package/dist/queries/diff-impact.js +9 -0
  162. package/dist/queries/diff-impact.js.map +1 -0
  163. package/dist/queries/doc-coverage.d.ts +14 -0
  164. package/dist/queries/doc-coverage.js +8 -0
  165. package/dist/queries/doc-coverage.js.map +1 -0
  166. package/dist/queries/drift.d.ts +25 -0
  167. package/dist/queries/drift.js +8 -0
  168. package/dist/queries/drift.js.map +1 -0
  169. package/dist/queries/extract-candidates.d.ts +25 -0
  170. package/dist/queries/extract-candidates.js +9 -0
  171. package/dist/queries/extract-candidates.js.map +1 -0
  172. package/dist/queries/fan.d.ts +29 -0
  173. package/dist/queries/fan.js +14 -0
  174. package/dist/queries/fan.js.map +1 -0
  175. package/dist/queries/files.d.ts +6 -0
  176. package/dist/queries/files.js +7 -0
  177. package/dist/queries/files.js.map +1 -0
  178. package/dist/queries/health.d.ts +18 -0
  179. package/dist/queries/health.js +21 -0
  180. package/dist/queries/health.js.map +1 -0
  181. package/dist/queries/hierarchy.d.ts +13 -0
  182. package/dist/queries/hierarchy.js +8 -0
  183. package/dist/queries/hierarchy.js.map +1 -0
  184. package/dist/queries/hotspots.d.ts +13 -0
  185. package/dist/queries/hotspots.js +8 -0
  186. package/dist/queries/hotspots.js.map +1 -0
  187. package/dist/queries/imports.d.ts +19 -0
  188. package/dist/queries/imports.js +12 -0
  189. package/dist/queries/imports.js.map +1 -0
  190. package/dist/queries/index.d.ts +47 -0
  191. package/dist/queries/index.js +207 -0
  192. package/dist/queries/index.js.map +1 -0
  193. package/dist/queries/isolated.d.ts +14 -0
  194. package/dist/queries/isolated.js +9 -0
  195. package/dist/queries/isolated.js.map +1 -0
  196. package/dist/queries/members.d.ts +10 -0
  197. package/dist/queries/members.js +8 -0
  198. package/dist/queries/members.js.map +1 -0
  199. package/dist/queries/methods.d.ts +6 -0
  200. package/dist/queries/methods.js +8 -0
  201. package/dist/queries/methods.js.map +1 -0
  202. package/dist/queries/outline.d.ts +10 -0
  203. package/dist/queries/outline.js +8 -0
  204. package/dist/queries/outline.js.map +1 -0
  205. package/dist/queries/passthrough-candidates.d.ts +18 -0
  206. package/dist/queries/passthrough-candidates.js +9 -0
  207. package/dist/queries/passthrough-candidates.js.map +1 -0
  208. package/dist/queries/redundant-reexports.d.ts +22 -0
  209. package/dist/queries/redundant-reexports.js +8 -0
  210. package/dist/queries/redundant-reexports.js.map +1 -0
  211. package/dist/queries/refs.d.ts +6 -0
  212. package/dist/queries/refs.js +7 -0
  213. package/dist/queries/refs.js.map +1 -0
  214. package/dist/queries/similar-chains.d.ts +29 -0
  215. package/dist/queries/similar-chains.js +8 -0
  216. package/dist/queries/similar-chains.js.map +1 -0
  217. package/dist/queries/similar-files.d.ts +19 -0
  218. package/dist/queries/similar-files.js +8 -0
  219. package/dist/queries/similar-files.js.map +1 -0
  220. package/dist/queries/similar-signatures.d.ts +21 -0
  221. package/dist/queries/similar-signatures.js +8 -0
  222. package/dist/queries/similar-signatures.js.map +1 -0
  223. package/dist/queries/similar.d.ts +34 -0
  224. package/dist/queries/similar.js +11 -0
  225. package/dist/queries/similar.js.map +1 -0
  226. package/dist/queries/slice.d.ts +21 -0
  227. package/dist/queries/slice.js +9 -0
  228. package/dist/queries/slice.js.map +1 -0
  229. package/dist/queries/stale-abstractions.d.ts +18 -0
  230. package/dist/queries/stale-abstractions.js +9 -0
  231. package/dist/queries/stale-abstractions.js.map +1 -0
  232. package/dist/queries/stats.d.ts +6 -0
  233. package/dist/queries/stats.js +7 -0
  234. package/dist/queries/stats.js.map +1 -0
  235. package/dist/queries/surface.d.ts +7 -0
  236. package/dist/queries/surface.js +8 -0
  237. package/dist/queries/surface.js.map +1 -0
  238. package/dist/queries/symbols.d.ts +6 -0
  239. package/dist/queries/symbols.js +9 -0
  240. package/dist/queries/symbols.js.map +1 -0
  241. package/dist/queries/system.d.ts +7 -0
  242. package/dist/queries/system.js +9 -0
  243. package/dist/queries/system.js.map +1 -0
  244. package/dist/queries/test-coverage.d.ts +22 -0
  245. package/dist/queries/test-coverage.js +11 -0
  246. package/dist/queries/test-coverage.js.map +1 -0
  247. package/dist/queries/trace.d.ts +6 -0
  248. package/dist/queries/trace.js +8 -0
  249. package/dist/queries/trace.js.map +1 -0
  250. package/dist/queries/wrapper-candidates.d.ts +17 -0
  251. package/dist/queries/wrapper-candidates.js +9 -0
  252. package/dist/queries/wrapper-candidates.js.map +1 -0
  253. package/dist/reindex-worker.js +368 -0
  254. package/dist/reindex-worker.js.map +1 -0
  255. package/docs/AGENT_GUIDE.md +359 -0
  256. package/package.json +70 -0
  257. package/reports/debloat/2026-04-10-scip-query-self-audit.md +161 -0
  258. package/skills/concrete-plan/SKILL.md +318 -0
  259. package/skills/scip-debloat/SKILL.md +413 -0
  260. package/skills/scip-explore/SKILL.md +235 -0
  261. package/skills/scip-verify/SKILL.md +323 -0
  262. package/src/cli.ts +1480 -0
  263. package/src/config.ts +117 -0
  264. package/src/db.ts +127 -0
  265. package/src/gitignore-filter.ts +143 -0
  266. package/src/index.ts +11 -0
  267. package/src/postinstall.ts +8 -0
  268. package/src/queries/affected.ts +86 -0
  269. package/src/queries/bottlenecks.ts +67 -0
  270. package/src/queries/by-kind.ts +204 -0
  271. package/src/queries/call-graph.ts +66 -0
  272. package/src/queries/change-surface.ts +110 -0
  273. package/src/queries/clean-signature.ts +22 -0
  274. package/src/queries/code.ts +101 -0
  275. package/src/queries/complexity-hotspots.ts +119 -0
  276. package/src/queries/complexity.ts +152 -0
  277. package/src/queries/convergence.ts +82 -0
  278. package/src/queries/coupling.ts +99 -0
  279. package/src/queries/cycles.ts +78 -0
  280. package/src/queries/dataflow.ts +128 -0
  281. package/src/queries/dead.ts +122 -0
  282. package/src/queries/deep-chains.ts +59 -0
  283. package/src/queries/deps.ts +46 -0
  284. package/src/queries/diff-impact.ts +204 -0
  285. package/src/queries/doc-coverage.ts +86 -0
  286. package/src/queries/drift.ts +224 -0
  287. package/src/queries/extract-candidates.ts +167 -0
  288. package/src/queries/fan.ts +148 -0
  289. package/src/queries/files.ts +16 -0
  290. package/src/queries/health.ts +324 -0
  291. package/src/queries/hierarchy.ts +49 -0
  292. package/src/queries/hotspots.ts +53 -0
  293. package/src/queries/imports.ts +95 -0
  294. package/src/queries/index.ts +45 -0
  295. package/src/queries/isolated.ts +67 -0
  296. package/src/queries/members.ts +54 -0
  297. package/src/queries/methods.ts +27 -0
  298. package/src/queries/outline.ts +52 -0
  299. package/src/queries/passthrough-candidates.ts +94 -0
  300. package/src/queries/redundant-reexports.ts +170 -0
  301. package/src/queries/refs.ts +27 -0
  302. package/src/queries/similar-chains.ts +314 -0
  303. package/src/queries/similar-files.ts +140 -0
  304. package/src/queries/similar-signatures.ts +151 -0
  305. package/src/queries/similar.ts +305 -0
  306. package/src/queries/slice.ts +154 -0
  307. package/src/queries/stale-abstractions.ts +82 -0
  308. package/src/queries/stats.ts +22 -0
  309. package/src/queries/surface.ts +34 -0
  310. package/src/queries/symbols.ts +39 -0
  311. package/src/queries/system.ts +86 -0
  312. package/src/queries/test-coverage.ts +106 -0
  313. package/src/queries/trace.ts +55 -0
  314. package/src/queries/wrapper-candidates.ts +112 -0
  315. package/src/query-support.ts +226 -0
  316. package/src/reindex/detect.ts +58 -0
  317. package/src/reindex/index.ts +153 -0
  318. package/src/reindex/indexers.ts +220 -0
  319. package/src/reindex/install.ts +125 -0
  320. package/src/reindex-worker.ts +35 -0
  321. package/src/setup.ts +202 -0
  322. package/src/symbol-parser.ts +278 -0
  323. package/src/types.ts +654 -0
  324. package/src/watch.ts +274 -0
  325. package/tests/gitignore-filter.test.ts +48 -0
  326. package/tests/queries.test.ts +300 -0
  327. package/tests/symbol-parser.test.ts +157 -0
  328. package/tsconfig.json +20 -0
  329. package/tsup.config.ts +40 -0
  330. package/vitest.config.ts +7 -0
@@ -0,0 +1,151 @@
1
+ import type { ScipDatabase } from '../db.js';
2
+ import type { SimilarSignatureGroup } from '../types.js';
3
+ import { shortenSymbol } from '../symbol-parser.js';
4
+
5
+ /**
6
+ * Find functions with near-identical type signatures (same parameter types
7
+ * and return type) but different names. These are "same shape" functions
8
+ * that may be doing similar work even if their internal implementation differs.
9
+ *
10
+ * The SCIP `documentation` field often contains the full type signature
11
+ * after a `|` delimiter. We parse it, normalize it (strip the function name,
12
+ * whitespace, and case), then group by normalized signature.
13
+ *
14
+ * Groups with 2+ functions = same-shape candidates.
15
+ */
16
+ export function similarSignatures(
17
+ db: ScipDatabase,
18
+ opts: { scope?: string; minLoc?: number; limit?: number } = {},
19
+ ): SimilarSignatureGroup[] {
20
+ const { scope, minLoc = 1, limit } = opts;
21
+
22
+ const scopeFilter = scope ? `AND d.relative_path LIKE '%${scope}%'` : '';
23
+
24
+ // Get all function-level symbols with their documentation/signature strings.
25
+ // We use the same signature extraction pattern as symbols.ts / trace.ts.
26
+ // Filter to symbols that have a documentation field containing '|' (the sig delimiter)
27
+ // and whose signature contains '(' (indicating a callable).
28
+ const rows = db.all<{
29
+ symbol: string;
30
+ relative_path: string;
31
+ start_line: number;
32
+ end_line: number;
33
+ loc: number;
34
+ sig: string;
35
+ }>(
36
+ `SELECT
37
+ gs.symbol,
38
+ d.relative_path,
39
+ der.start_line,
40
+ der.end_line,
41
+ (der.end_line - der.start_line + 1) AS loc,
42
+ REPLACE(SUBSTR(gs.documentation, INSTR(gs.documentation, '|') + 1), char(10), ' ') AS sig
43
+ FROM global_symbols gs
44
+ JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
45
+ JOIN documents d ON der.document_id = d.id
46
+ WHERE gs.documentation IS NOT NULL
47
+ AND gs.documentation != ''
48
+ AND INSTR(gs.documentation, '|') > 0
49
+ AND (der.end_line - der.start_line + 1) >= ?
50
+ ${db.pathExclusionsFor('d')}
51
+ ${db.symbolNoiseFor('gs')}
52
+ ${scopeFilter}
53
+ ORDER BY d.relative_path, der.start_line`,
54
+ minLoc,
55
+ );
56
+
57
+ // Group by normalized signature
58
+ const sigGroups = new Map<string, Array<{
59
+ symbol: string;
60
+ shortName: string;
61
+ file: string;
62
+ startLine: number;
63
+ endLine: number;
64
+ loc: number;
65
+ }>>();
66
+
67
+ for (const row of rows) {
68
+ if (db.isIgnored(row.relative_path)) continue;
69
+
70
+ const normalized = normalizeSignature(row.sig);
71
+ if (!normalized) continue;
72
+
73
+ const entry = {
74
+ symbol: row.symbol,
75
+ shortName: shortenSymbol(row.symbol),
76
+ file: row.relative_path,
77
+ startLine: row.start_line,
78
+ endLine: row.end_line,
79
+ loc: row.loc,
80
+ };
81
+
82
+ const existing = sigGroups.get(normalized);
83
+ if (existing) {
84
+ existing.push(entry);
85
+ } else {
86
+ sigGroups.set(normalized, [entry]);
87
+ }
88
+ }
89
+
90
+ // Collect groups with 2+ functions
91
+ const results: SimilarSignatureGroup[] = [];
92
+
93
+ for (const [signature, functions] of sigGroups) {
94
+ if (functions.length < 2) continue;
95
+
96
+ results.push({ signature, functions });
97
+ }
98
+
99
+ // Sort by group size descending (largest groups = most duplication),
100
+ // then by total LOC in the group
101
+ results.sort((a, b) => {
102
+ const sizeDiff = b.functions.length - a.functions.length;
103
+ if (sizeDiff !== 0) return sizeDiff;
104
+ const locA = a.functions.reduce((sum, f) => sum + f.loc, 0);
105
+ const locB = b.functions.reduce((sum, f) => sum + f.loc, 0);
106
+ return locB - locA;
107
+ });
108
+
109
+ return limit ? results.slice(0, limit) : results;
110
+ }
111
+
112
+ /**
113
+ * Normalize a signature for comparison:
114
+ * 1. Clean markdown fences and SCIP prefixes
115
+ * 2. Strip everything before the first '(' (removes the function name)
116
+ * 3. Strip whitespace and lowercase
117
+ *
118
+ * Returns null if the signature doesn't contain a callable form.
119
+ */
120
+ function normalizeSignature(raw: string): string | null {
121
+ if (!raw || !raw.trim()) return null;
122
+
123
+ // Clean markdown and SCIP decoration (same as cleanSignature)
124
+ let sig = raw
125
+ .replace(/^```\w*\s*/, '')
126
+ .replace(/\s*```$/, '')
127
+ .replace(/^\(method\)\s*/, '')
128
+ .replace(/^\(property\)\s*/, '')
129
+ .replace(/^\(function\)\s*/, '')
130
+ .replace(/^\(class\)\s*/, '')
131
+ .replace(/^\(interface\)\s*/, '')
132
+ .replace(/^\(enum\)\s*/, '')
133
+ .replace(/^\(type alias\)\s*/, '')
134
+ .replace(/^\(const\)\s*/, '')
135
+ .replace(/^\(var\)\s*/, '')
136
+ .trim();
137
+
138
+ // Find the first '(' — everything from there is the parameter/return signature
139
+ const parenIdx = sig.indexOf('(');
140
+ if (parenIdx === -1) return null;
141
+
142
+ sig = sig.slice(parenIdx);
143
+
144
+ // Normalize: strip all whitespace, lowercase
145
+ sig = sig.replace(/\s+/g, '').toLowerCase();
146
+
147
+ // Must have meaningful content after normalization
148
+ if (sig.length < 3) return null; // e.g. "()" alone is too generic
149
+
150
+ return sig;
151
+ }
@@ -0,0 +1,305 @@
1
+ import type { ScipDatabase } from '../db.js';
2
+ import { findFirstSymbolMatch, getCalleeRowsForSymbol } from '../query-support.js';
3
+ import type { SimilarSymbolResult } from '../types.js';
4
+ import { shortenSymbol } from '../symbol-parser.js';
5
+
6
+ /**
7
+ * Find functions with similar callee fingerprints using TF-IDF weighted
8
+ * cosine similarity.
9
+ *
10
+ * Plain Jaccard similarity inflates scores when functions share ubiquitous
11
+ * infrastructure imports (db, types, shortenSymbol). TF-IDF fixes this by
12
+ * weighting each shared callee by how rare it is:
13
+ *
14
+ * - A callee used by 2 functions (rare) gets high weight → strong signal
15
+ * - A callee used by 30 functions (common) gets low weight → noise
16
+ *
17
+ * This means two functions sharing `sendWelcomeEmail()` (rare) score
18
+ * much higher than two functions sharing `db.all()` (ubiquitous), even
19
+ * though both are "shared callees."
20
+ */
21
+ export function similar(
22
+ db: ScipDatabase,
23
+ symbolPattern: string,
24
+ opts: { minSimilarity?: number; limit?: number } = {},
25
+ ): SimilarSymbolResult[] {
26
+ const { minSimilarity = 0.4, limit = 20 } = opts;
27
+
28
+ const target = findCallees(db, symbolPattern);
29
+ if (!target || target.callees.size === 0) return [];
30
+
31
+ const candidates = getAllCalleeFingerprints(db, {
32
+ minCallees: 3,
33
+ excludeSymbol: target.symbol,
34
+ });
35
+
36
+ // Compute IDF weights across all fingerprints + target
37
+ const allFingerprints = [target, ...candidates];
38
+ const idfWeights = computeIdf(allFingerprints);
39
+
40
+ const results: SimilarSymbolResult[] = [];
41
+
42
+ for (const candidate of candidates) {
43
+ if (candidate.callees.size < 3) continue;
44
+
45
+ const { similarity, significantShared, trivialShared } = weightedSimilarity(
46
+ target.callees, candidate.callees, idfWeights,
47
+ );
48
+
49
+ if (similarity < minSimilarity) continue;
50
+ if (significantShared.length < 1) continue; // no real overlap
51
+
52
+ results.push({
53
+ symbolA: target.symbol,
54
+ shortNameA: shortenSymbol(target.symbol),
55
+ fileA: target.file,
56
+ symbolB: candidate.symbol,
57
+ shortNameB: shortenSymbol(candidate.symbol),
58
+ fileB: candidate.file,
59
+ similarity,
60
+ sharedCallees: significantShared.map(shortenSymbol),
61
+ uniqueToA: [...difference(target.callees, candidate.callees)].map(shortenSymbol),
62
+ uniqueToB: [...difference(candidate.callees, target.callees)].map(shortenSymbol),
63
+ });
64
+ }
65
+
66
+ results.sort((a, b) => b.similarity - a.similarity);
67
+ return results.slice(0, limit);
68
+ }
69
+
70
+ /**
71
+ * Find similar symbols across the entire codebase.
72
+ * Uses TF-IDF weighted similarity to filter out infrastructure noise.
73
+ */
74
+ export function similarAll(
75
+ db: ScipDatabase,
76
+ opts: { minSimilarity?: number; limit?: number; scope?: string; minCallees?: number } = {},
77
+ ): SimilarSymbolResult[] {
78
+ const { minSimilarity = 0.5, limit = 20, scope, minCallees = 4 } = opts;
79
+
80
+ const all = getAllCalleeFingerprints(db, { minCallees, scope });
81
+ const idfWeights = computeIdf(all);
82
+
83
+ const results: SimilarSymbolResult[] = [];
84
+
85
+ for (let i = 0; i < all.length; i++) {
86
+ for (let j = i + 1; j < all.length; j++) {
87
+ const a = all[i]!;
88
+ const b = all[j]!;
89
+
90
+ if (a.file === b.file) continue;
91
+
92
+ const { similarity, significantShared } = weightedSimilarity(
93
+ a.callees, b.callees, idfWeights,
94
+ );
95
+
96
+ if (similarity < minSimilarity) continue;
97
+ if (significantShared.length < 2) continue;
98
+
99
+ results.push({
100
+ symbolA: a.symbol,
101
+ shortNameA: shortenSymbol(a.symbol),
102
+ fileA: a.file,
103
+ symbolB: b.symbol,
104
+ shortNameB: shortenSymbol(b.symbol),
105
+ fileB: b.file,
106
+ similarity,
107
+ sharedCallees: significantShared.map(shortenSymbol),
108
+ uniqueToA: [...difference(a.callees, b.callees)].map(shortenSymbol),
109
+ uniqueToB: [...difference(b.callees, a.callees)].map(shortenSymbol),
110
+ });
111
+ }
112
+
113
+ if (results.length > limit * 5) break;
114
+ }
115
+
116
+ results.sort((a, b) => b.similarity - a.similarity);
117
+ return results.slice(0, limit);
118
+ }
119
+
120
+ // ── TF-IDF Engine ──────────────────────────────────────────
121
+
122
+ /**
123
+ * Compute inverse document frequency for each callee.
124
+ * IDF(callee) = log(N / df(callee)) where N is total functions
125
+ * and df is how many functions reference that callee.
126
+ *
127
+ * High IDF = rare callee = strong similarity signal.
128
+ * Low IDF = ubiquitous callee = noise.
129
+ */
130
+ function computeIdf(fingerprints: SymbolFingerprint[]): Map<string, number> {
131
+ const n = fingerprints.length;
132
+ if (n === 0) return new Map();
133
+
134
+ // Count how many functions reference each callee
135
+ const docFreq = new Map<string, number>();
136
+ for (const fp of fingerprints) {
137
+ for (const callee of fp.callees) {
138
+ docFreq.set(callee, (docFreq.get(callee) ?? 0) + 1);
139
+ }
140
+ }
141
+
142
+ // Compute IDF
143
+ const idf = new Map<string, number>();
144
+ for (const [callee, df] of docFreq) {
145
+ idf.set(callee, Math.log(n / df));
146
+ }
147
+
148
+ return idf;
149
+ }
150
+
151
+ /**
152
+ * Compute TF-IDF weighted cosine similarity between two callee sets.
153
+ *
154
+ * Each callee is a dimension. Its weight is its IDF score.
155
+ * Cosine similarity of the weighted vectors gives a similarity
156
+ * that ignores ubiquitous callees and emphasizes rare shared ones.
157
+ *
158
+ * Also returns which shared callees are "significant" (above-median IDF)
159
+ * vs "trivial" (below-median IDF, i.e., infrastructure).
160
+ */
161
+ function weightedSimilarity(
162
+ a: Set<string>,
163
+ b: Set<string>,
164
+ idf: Map<string, number>,
165
+ ): { similarity: number; significantShared: string[]; trivialShared: string[] } {
166
+ const shared = intersection(a, b);
167
+ if (shared.size === 0) return { similarity: 0, significantShared: [], trivialShared: [] };
168
+
169
+ // Compute weighted dot product and magnitudes
170
+ let dotProduct = 0;
171
+ let magA = 0;
172
+ let magB = 0;
173
+
174
+ const allCallees = new Set([...a, ...b]);
175
+ for (const callee of allCallees) {
176
+ const weight = idf.get(callee) ?? 0;
177
+ const inA = a.has(callee) ? weight : 0;
178
+ const inB = b.has(callee) ? weight : 0;
179
+ dotProduct += inA * inB;
180
+ magA += inA * inA;
181
+ magB += inB * inB;
182
+ }
183
+
184
+ const magnitude = Math.sqrt(magA) * Math.sqrt(magB);
185
+ const similarity = magnitude > 0 ? dotProduct / magnitude : 0;
186
+
187
+ // Split shared callees into significant (high IDF) and trivial (low IDF)
188
+ const medianIdf = getMedianIdf(idf);
189
+ const significantShared: string[] = [];
190
+ const trivialShared: string[] = [];
191
+
192
+ for (const callee of shared) {
193
+ const weight = idf.get(callee) ?? 0;
194
+ if (weight >= medianIdf) {
195
+ significantShared.push(callee);
196
+ } else {
197
+ trivialShared.push(callee);
198
+ }
199
+ }
200
+
201
+ // Sort significant callees by IDF descending (most distinctive first)
202
+ significantShared.sort((x, y) => (idf.get(y) ?? 0) - (idf.get(x) ?? 0));
203
+
204
+ return { similarity, significantShared, trivialShared };
205
+ }
206
+
207
+ function getMedianIdf(idf: Map<string, number>): number {
208
+ const values = [...idf.values()].sort((a, b) => a - b);
209
+ if (values.length === 0) return 0;
210
+ const mid = Math.floor(values.length / 2);
211
+ return values.length % 2 === 0
212
+ ? (values[mid - 1]! + values[mid]!) / 2
213
+ : values[mid]!;
214
+ }
215
+
216
+ // ── Internal helpers ───────────────────────────────────────
217
+
218
+ interface SymbolFingerprint {
219
+ symbol: string;
220
+ file: string;
221
+ callees: Set<string>;
222
+ }
223
+
224
+ function findCallees(
225
+ db: ScipDatabase,
226
+ symbolPattern: string,
227
+ ): SymbolFingerprint | null {
228
+ const target = findFirstSymbolMatch(db, symbolPattern);
229
+
230
+ if (!target) return null;
231
+
232
+ const calleeRows = getCalleeRowsForSymbol(db, target);
233
+
234
+ return {
235
+ symbol: target.symbol,
236
+ file: target.relativePath,
237
+ callees: new Set(calleeRows.map((r) => r.symbol)),
238
+ };
239
+ }
240
+
241
+ function getAllCalleeFingerprints(
242
+ db: ScipDatabase,
243
+ opts: { minCallees: number; scope?: string; excludeSymbol?: string },
244
+ ): SymbolFingerprint[] {
245
+ const { minCallees, scope, excludeSymbol } = opts;
246
+ const scopeFilter = scope ? `AND d.relative_path LIKE '%${scope}%'` : '';
247
+ const excludeFilter = excludeSymbol ? `AND gs.symbol != '${excludeSymbol.replace(/'/g, "''")}'` : '';
248
+
249
+ const symbols = db.all<{
250
+ id: number;
251
+ symbol: string;
252
+ document_id: number;
253
+ start_line: number;
254
+ end_line: number;
255
+ relative_path: string;
256
+ }>(
257
+ `SELECT gs.id, gs.symbol, der.document_id, der.start_line, der.end_line, d.relative_path
258
+ FROM global_symbols gs
259
+ JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
260
+ JOIN documents d ON der.document_id = d.id
261
+ WHERE 1 = 1
262
+ ${db.pathExclusionsFor('d')}
263
+ ${db.symbolNoiseFor('gs')}
264
+ AND (der.end_line - der.start_line + 1) >= 5
265
+ ${scopeFilter}
266
+ ${excludeFilter}
267
+ ORDER BY d.relative_path`,
268
+ );
269
+
270
+ const fingerprints: SymbolFingerprint[] = [];
271
+
272
+ for (const sym of symbols) {
273
+ if (db.isIgnored(sym.relative_path)) continue;
274
+
275
+ const calleeRows = getCalleeRowsForSymbol(db, {
276
+ documentId: sym.document_id,
277
+ startLine: sym.start_line,
278
+ endLine: sym.end_line,
279
+ symbolId: sym.id,
280
+ });
281
+
282
+ const callees = new Set(calleeRows.map((r) => r.symbol));
283
+ if (callees.size >= minCallees) {
284
+ fingerprints.push({ symbol: sym.symbol, file: sym.relative_path, callees });
285
+ }
286
+ }
287
+
288
+ return fingerprints;
289
+ }
290
+
291
+ function intersection<T>(a: Set<T>, b: Set<T>): Set<T> {
292
+ const result = new Set<T>();
293
+ for (const item of a) {
294
+ if (b.has(item)) result.add(item);
295
+ }
296
+ return result;
297
+ }
298
+
299
+ function difference<T>(a: Set<T>, b: Set<T>): Set<T> {
300
+ const result = new Set<T>();
301
+ for (const item of a) {
302
+ if (!b.has(item)) result.add(item);
303
+ }
304
+ return result;
305
+ }
@@ -0,0 +1,154 @@
1
+ import type { ScipDatabase } from '../db.js';
2
+ import { findFirstSymbolMatch, getCalleeRowsForSymbol, type SymbolMatch } from '../query-support.js';
3
+ import type { SliceResult } from '../types.js';
4
+ import { shortenSymbol } from '../symbol-parser.js';
5
+
6
+ /**
7
+ * Reference-level program slicing: track what affects a symbol (backward)
8
+ * or what a symbol affects (forward).
9
+ *
10
+ * Backward slice: "What feeds into this?" — symbols referenced in the same
11
+ * function that defines the target. These are the inputs/dependencies.
12
+ *
13
+ * Forward slice: "What does this feed into?" — at each site where the target
14
+ * is referenced, find the enclosing function, then find what that function
15
+ * exports/defines. These are the outputs/consumers.
16
+ *
17
+ * Language-agnostic: works with any SCIP index.
18
+ */
19
+ export function slice(
20
+ db: ScipDatabase,
21
+ symbolPattern: string,
22
+ opts: { direction?: 'backward' | 'forward' } = {},
23
+ ): SliceResult | null {
24
+ const { direction = 'backward' } = opts;
25
+
26
+ const match = findFirstSymbolMatch(db, symbolPattern);
27
+ if (!match) return null;
28
+
29
+ if (direction === 'backward') {
30
+ return backwardSlice(db, match);
31
+ } else {
32
+ return forwardSlice(db, match);
33
+ }
34
+ }
35
+
36
+
37
+ function backwardSlice(db: ScipDatabase, match: SymbolMatch): SliceResult {
38
+ // Find all symbols referenced within the definition range of the target.
39
+ // These are what "feeds into" the target — the inputs.
40
+ const callees = getCalleeRowsForSymbol(db, match);
41
+
42
+ // Also find symbols whose definitions are in the same file and whose
43
+ // ranges overlap or precede the target — local variables, parameters, etc.
44
+ const localPredecessors = db.all<{ symbol: string; file: string }>(
45
+ `SELECT DISTINCT gs.symbol, d.relative_path AS file
46
+ FROM defn_enclosing_ranges der
47
+ JOIN global_symbols gs ON der.symbol_id = gs.id
48
+ JOIN documents d ON der.document_id = d.id
49
+ WHERE der.document_id = ?
50
+ AND der.end_line < ?
51
+ AND gs.id != ?
52
+ ${db.symbolNoiseFor('gs')}
53
+ ORDER BY der.start_line DESC
54
+ LIMIT 15`,
55
+ match.documentId, match.startLine, match.symbolId,
56
+ );
57
+
58
+ const seen = new Set<string>();
59
+ const connected: SliceResult['connectedSymbols'] = [];
60
+
61
+ for (const c of callees) {
62
+ if (seen.has(c.symbol)) continue;
63
+ seen.add(c.symbol);
64
+ connected.push({
65
+ symbol: c.symbol,
66
+ shortName: shortenSymbol(c.symbol),
67
+ file: c.file,
68
+ relationship: 'referenced within definition (callee)',
69
+ });
70
+ }
71
+
72
+ for (const p of localPredecessors) {
73
+ if (seen.has(p.symbol) || db.isIgnored(p.file)) continue;
74
+ seen.add(p.symbol);
75
+ connected.push({
76
+ symbol: p.symbol,
77
+ shortName: shortenSymbol(p.symbol),
78
+ file: p.file,
79
+ relationship: 'defined before target in same file (local predecessor)',
80
+ });
81
+ }
82
+
83
+ return {
84
+ symbol: match.symbol,
85
+ shortName: shortenSymbol(match.symbol),
86
+ direction: 'backward',
87
+ connectedSymbols: connected,
88
+ };
89
+ }
90
+
91
+ function forwardSlice(db: ScipDatabase, match: SymbolMatch): SliceResult {
92
+ // Find where the target is referenced, then at each reference site,
93
+ // find what else the enclosing function defines/exports.
94
+ const rows = db.all<{
95
+ enclosing_symbol: string;
96
+ enclosing_file: string;
97
+ output_symbol: string;
98
+ output_file: string;
99
+ }>(
100
+ `SELECT DISTINCT
101
+ enc_gs.symbol AS enclosing_symbol,
102
+ enc_d.relative_path AS enclosing_file,
103
+ out_gs.symbol AS output_symbol,
104
+ out_d.relative_path AS output_file
105
+ FROM mentions ref_m
106
+ JOIN chunks ref_c ON ref_m.chunk_id = ref_c.id
107
+ JOIN documents ref_d ON ref_c.document_id = ref_d.id
108
+ -- Find enclosing function at each reference site
109
+ JOIN defn_enclosing_ranges enc_der
110
+ ON enc_der.document_id = ref_d.id
111
+ AND enc_der.start_line <= ref_c.start_line
112
+ AND enc_der.end_line >= ref_c.end_line
113
+ JOIN global_symbols enc_gs ON enc_der.symbol_id = enc_gs.id
114
+ JOIN documents enc_d ON enc_der.document_id = enc_d.id
115
+ -- Find other symbols referenced within that enclosing function
116
+ JOIN mentions out_m ON out_m.role = 0
117
+ JOIN chunks out_c ON out_m.chunk_id = out_c.id
118
+ AND out_c.document_id = enc_der.document_id
119
+ AND out_c.start_line >= enc_der.start_line
120
+ AND out_c.end_line <= enc_der.end_line
121
+ JOIN global_symbols out_gs ON out_m.symbol_id = out_gs.id
122
+ JOIN defn_enclosing_ranges out_der ON out_gs.id = out_der.symbol_id
123
+ JOIN documents out_d ON out_der.document_id = out_d.id
124
+ WHERE ref_m.symbol_id = ? AND ref_m.role = 0
125
+ AND out_gs.id != ? AND out_gs.id != enc_gs.id
126
+ AND out_d.id != ref_d.id
127
+ ${db.symbolNoiseFor('out_gs')}
128
+ ${db.pathExclusionsFor('out_d')}
129
+ ORDER BY out_d.relative_path
130
+ LIMIT 30`,
131
+ match.symbolId, match.symbolId,
132
+ );
133
+
134
+ const seen = new Set<string>();
135
+ const connected: SliceResult['connectedSymbols'] = [];
136
+
137
+ for (const r of rows) {
138
+ if (seen.has(r.output_symbol) || db.isIgnored(r.output_file)) continue;
139
+ seen.add(r.output_symbol);
140
+ connected.push({
141
+ symbol: r.output_symbol,
142
+ shortName: shortenSymbol(r.output_symbol),
143
+ file: r.output_file,
144
+ relationship: `used alongside target in ${shortenSymbol(r.enclosing_symbol)}`,
145
+ });
146
+ }
147
+
148
+ return {
149
+ symbol: match.symbol,
150
+ shortName: shortenSymbol(match.symbol),
151
+ direction: 'forward',
152
+ connectedSymbols: connected,
153
+ };
154
+ }
@@ -0,0 +1,82 @@
1
+ import type { ScipDatabase } from '../db.js';
2
+ import { testFileExclusionSql } from '../query-support.js';
3
+ import type { StaleAbstraction } from '../types.js';
4
+ import { shortenSymbol } from '../symbol-parser.js';
5
+
6
+ /**
7
+ * Find stale abstractions: type-level symbols (classes, interfaces, type
8
+ * aliases) that have 0 or 1 cross-file consumers.
9
+ *
10
+ * A type that only one file uses is over-abstracted — it was designed
11
+ * for reuse that never materialized. Large single-consumer types are
12
+ * the strongest signal of wasted abstraction.
13
+ */
14
+ export function staleAbstractions(
15
+ db: ScipDatabase,
16
+ opts?: { scope?: string; minLoc?: number; limit?: number },
17
+ ): StaleAbstraction[] {
18
+ const { scope, minLoc = 3, limit = 30 } = opts ?? {};
19
+ const scopeFilter = scope ? `AND d.relative_path LIKE '%${scope}%'` : '';
20
+
21
+ const rows = db.all<{
22
+ symbol: string;
23
+ file: string;
24
+ start_line: number;
25
+ end_line: number;
26
+ loc: number;
27
+ consumers: number;
28
+ }>(
29
+ `SELECT * FROM (
30
+ SELECT
31
+ gs.symbol,
32
+ d.relative_path AS file,
33
+ der.start_line,
34
+ der.end_line,
35
+ (der.end_line - der.start_line + 1) AS loc,
36
+ (SELECT COUNT(DISTINCT ref_c.document_id)
37
+ FROM mentions ref_m
38
+ JOIN chunks ref_c ON ref_m.chunk_id = ref_c.id
39
+ WHERE ref_m.symbol_id = gs.id
40
+ AND ref_m.role = 0
41
+ AND ref_c.document_id != der.document_id
42
+ ) AS consumers
43
+ FROM global_symbols gs
44
+ JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
45
+ JOIN documents d ON der.document_id = d.id
46
+ WHERE 1 = 1
47
+ ${db.pathExclusionsFor('d')}
48
+ AND ${testFileExclusionSql('d')}
49
+ ${db.symbolNoiseFor('gs')}
50
+ -- Top-level type symbols: ends with # but does not contain nested #
51
+ AND gs.symbol LIKE '%#'
52
+ AND gs.symbol NOT LIKE '%#%#%'
53
+ AND (der.end_line - der.start_line + 1) >= ?
54
+ ${scopeFilter}
55
+ ) WHERE consumers <= 1
56
+ ORDER BY loc DESC
57
+ LIMIT ?`,
58
+ minLoc, limit,
59
+ );
60
+
61
+ return rows
62
+ .filter((r) => !db.isIgnored(r.file))
63
+ // Exclude types defined in dedicated type files (types.ts, types/, etc.)
64
+ // These are intentional public API types, not premature abstractions.
65
+ .filter((r) => {
66
+ const basename = r.file.split('/').pop() ?? '';
67
+ const isTypeFile = basename.includes('types') || r.file.includes('/types/');
68
+ // Types in type files with 1 consumer are normal API types — skip them.
69
+ // Types in type files with 0 consumers are genuinely unused — keep them.
70
+ if (isTypeFile && r.consumers > 0) return false;
71
+ return true;
72
+ })
73
+ .map((r) => ({
74
+ symbol: r.symbol,
75
+ shortName: shortenSymbol(r.symbol),
76
+ file: r.file,
77
+ startLine: r.start_line,
78
+ endLine: r.end_line,
79
+ loc: r.loc,
80
+ consumers: r.consumers,
81
+ }));
82
+ }