scip-query 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/IMPROVEMENTS.md +143 -0
- package/PLAN.md +320 -0
- package/README.md +1213 -0
- package/dist/chunk-2QZ23IBN.js +55 -0
- package/dist/chunk-2QZ23IBN.js.map +1 -0
- package/dist/chunk-36OMT7ZJ.js +144 -0
- package/dist/chunk-36OMT7ZJ.js.map +1 -0
- package/dist/chunk-3E2X7RIE.js +101 -0
- package/dist/chunk-3E2X7RIE.js.map +1 -0
- package/dist/chunk-3UOUTZQT.js +45 -0
- package/dist/chunk-3UOUTZQT.js.map +1 -0
- package/dist/chunk-3ZZJVBIO.js +88 -0
- package/dist/chunk-3ZZJVBIO.js.map +1 -0
- package/dist/chunk-4TYLS5XX.js +10 -0
- package/dist/chunk-4TYLS5XX.js.map +1 -0
- package/dist/chunk-5FGUEU7N.js +101 -0
- package/dist/chunk-5FGUEU7N.js.map +1 -0
- package/dist/chunk-5WTJAXY2.js +61 -0
- package/dist/chunk-5WTJAXY2.js.map +1 -0
- package/dist/chunk-6NBLIDF4.js +24 -0
- package/dist/chunk-6NBLIDF4.js.map +1 -0
- package/dist/chunk-6SXADWLW.js +43 -0
- package/dist/chunk-6SXADWLW.js.map +1 -0
- package/dist/chunk-6VJ6Q7IE.js +65 -0
- package/dist/chunk-6VJ6Q7IE.js.map +1 -0
- package/dist/chunk-7OZPA5OO.js +258 -0
- package/dist/chunk-7OZPA5OO.js.map +1 -0
- package/dist/chunk-BEPIEVLR.js +76 -0
- package/dist/chunk-BEPIEVLR.js.map +1 -0
- package/dist/chunk-BFSCMC22.js +42 -0
- package/dist/chunk-BFSCMC22.js.map +1 -0
- package/dist/chunk-BP2ATLK2.js +110 -0
- package/dist/chunk-BP2ATLK2.js.map +1 -0
- package/dist/chunk-CM454WL3.js +114 -0
- package/dist/chunk-CM454WL3.js.map +1 -0
- package/dist/chunk-DCKMSTJ4.js +74 -0
- package/dist/chunk-DCKMSTJ4.js.map +1 -0
- package/dist/chunk-DEZKCZXD.js +40 -0
- package/dist/chunk-DEZKCZXD.js.map +1 -0
- package/dist/chunk-DVWGWHFW.js +99 -0
- package/dist/chunk-DVWGWHFW.js.map +1 -0
- package/dist/chunk-EMDQWNYR.js +102 -0
- package/dist/chunk-EMDQWNYR.js.map +1 -0
- package/dist/chunk-FFSWWE5O.js +33 -0
- package/dist/chunk-FFSWWE5O.js.map +1 -0
- package/dist/chunk-FGXRVW7G.js +73 -0
- package/dist/chunk-FGXRVW7G.js.map +1 -0
- package/dist/chunk-FUHJCHS4.js +158 -0
- package/dist/chunk-FUHJCHS4.js.map +1 -0
- package/dist/chunk-GJFURBEW.js +64 -0
- package/dist/chunk-GJFURBEW.js.map +1 -0
- package/dist/chunk-GTILYBH6.js +102 -0
- package/dist/chunk-GTILYBH6.js.map +1 -0
- package/dist/chunk-JJP7KQND.js +1 -0
- package/dist/chunk-JJP7KQND.js.map +1 -0
- package/dist/chunk-JKP5GH6T.js +213 -0
- package/dist/chunk-JKP5GH6T.js.map +1 -0
- package/dist/chunk-KCBMVQL5.js +38 -0
- package/dist/chunk-KCBMVQL5.js.map +1 -0
- package/dist/chunk-KVSW5KYP.js +78 -0
- package/dist/chunk-KVSW5KYP.js.map +1 -0
- package/dist/chunk-LAWMH22O.js +172 -0
- package/dist/chunk-LAWMH22O.js.map +1 -0
- package/dist/chunk-LB7OS35Q.js +72 -0
- package/dist/chunk-LB7OS35Q.js.map +1 -0
- package/dist/chunk-LUSIFBXO.js +57 -0
- package/dist/chunk-LUSIFBXO.js.map +1 -0
- package/dist/chunk-MBVNHJVN.js +44 -0
- package/dist/chunk-MBVNHJVN.js.map +1 -0
- package/dist/chunk-MGNMHKX3.js +15 -0
- package/dist/chunk-MGNMHKX3.js.map +1 -0
- package/dist/chunk-N5KEREIA.js +41 -0
- package/dist/chunk-N5KEREIA.js.map +1 -0
- package/dist/chunk-NDSQYIWT.js +71 -0
- package/dist/chunk-NDSQYIWT.js.map +1 -0
- package/dist/chunk-NUZ4OMU3.js +28 -0
- package/dist/chunk-NUZ4OMU3.js.map +1 -0
- package/dist/chunk-QOV2R2WT.js +170 -0
- package/dist/chunk-QOV2R2WT.js.map +1 -0
- package/dist/chunk-SEFSL2GF.js +78 -0
- package/dist/chunk-SEFSL2GF.js.map +1 -0
- package/dist/chunk-T6ARFSBZ.js +103 -0
- package/dist/chunk-T6ARFSBZ.js.map +1 -0
- package/dist/chunk-TBP6BICL.js +46 -0
- package/dist/chunk-TBP6BICL.js.map +1 -0
- package/dist/chunk-TDNNOR6D.js +97 -0
- package/dist/chunk-TDNNOR6D.js.map +1 -0
- package/dist/chunk-TSPZOMHC.js +195 -0
- package/dist/chunk-TSPZOMHC.js.map +1 -0
- package/dist/chunk-UNTPVD36.js +55 -0
- package/dist/chunk-UNTPVD36.js.map +1 -0
- package/dist/chunk-VRUJH4BO.js +88 -0
- package/dist/chunk-VRUJH4BO.js.map +1 -0
- package/dist/chunk-VZ7AMAFL.js +76 -0
- package/dist/chunk-VZ7AMAFL.js.map +1 -0
- package/dist/chunk-XFXDXEUN.js +24 -0
- package/dist/chunk-XFXDXEUN.js.map +1 -0
- package/dist/chunk-YZAA4LYG.js +169 -0
- package/dist/chunk-YZAA4LYG.js.map +1 -0
- package/dist/chunk-Z73NYSBZ.js +92 -0
- package/dist/chunk-Z73NYSBZ.js.map +1 -0
- package/dist/chunk-ZJRYBOEE.js +125 -0
- package/dist/chunk-ZJRYBOEE.js.map +1 -0
- package/dist/cli.js +5798 -0
- package/dist/cli.js.map +1 -0
- package/dist/db-BxaevAyc.d.ts +683 -0
- package/dist/index.d.ts +254 -0
- package/dist/index.js +1271 -0
- package/dist/index.js.map +1 -0
- package/dist/postinstall.js +167 -0
- package/dist/postinstall.js.map +1 -0
- package/dist/queries/affected.d.ts +14 -0
- package/dist/queries/affected.js +9 -0
- package/dist/queries/affected.js.map +1 -0
- package/dist/queries/bottlenecks.d.ts +18 -0
- package/dist/queries/bottlenecks.js +8 -0
- package/dist/queries/bottlenecks.js.map +1 -0
- package/dist/queries/by-kind.d.ts +20 -0
- package/dist/queries/by-kind.js +10 -0
- package/dist/queries/by-kind.js.map +1 -0
- package/dist/queries/call-graph.d.ts +13 -0
- package/dist/queries/call-graph.js +9 -0
- package/dist/queries/call-graph.js.map +1 -0
- package/dist/queries/change-surface.d.ts +10 -0
- package/dist/queries/change-surface.js +9 -0
- package/dist/queries/change-surface.js.map +1 -0
- package/dist/queries/clean-signature.d.ts +9 -0
- package/dist/queries/clean-signature.js +7 -0
- package/dist/queries/clean-signature.js.map +1 -0
- package/dist/queries/code.d.ts +17 -0
- package/dist/queries/code.js +9 -0
- package/dist/queries/code.js.map +1 -0
- package/dist/queries/complexity-hotspots.d.ts +19 -0
- package/dist/queries/complexity-hotspots.js +9 -0
- package/dist/queries/complexity-hotspots.js.map +1 -0
- package/dist/queries/complexity.d.ts +13 -0
- package/dist/queries/complexity.js +9 -0
- package/dist/queries/complexity.js.map +1 -0
- package/dist/queries/convergence.d.ts +11 -0
- package/dist/queries/convergence.js +9 -0
- package/dist/queries/convergence.js.map +1 -0
- package/dist/queries/coupling.d.ts +17 -0
- package/dist/queries/coupling.js +9 -0
- package/dist/queries/coupling.js.map +1 -0
- package/dist/queries/cycles.d.ts +16 -0
- package/dist/queries/cycles.js +8 -0
- package/dist/queries/cycles.js.map +1 -0
- package/dist/queries/dataflow.d.ts +19 -0
- package/dist/queries/dataflow.js +9 -0
- package/dist/queries/dataflow.js.map +1 -0
- package/dist/queries/dead.d.ts +10 -0
- package/dist/queries/dead.js +9 -0
- package/dist/queries/dead.js.map +1 -0
- package/dist/queries/deep-chains.d.ts +16 -0
- package/dist/queries/deep-chains.js +8 -0
- package/dist/queries/deep-chains.js.map +1 -0
- package/dist/queries/deps.d.ts +9 -0
- package/dist/queries/deps.js +9 -0
- package/dist/queries/deps.js.map +1 -0
- package/dist/queries/diff-impact.d.ts +13 -0
- package/dist/queries/diff-impact.js +9 -0
- package/dist/queries/diff-impact.js.map +1 -0
- package/dist/queries/doc-coverage.d.ts +14 -0
- package/dist/queries/doc-coverage.js +8 -0
- package/dist/queries/doc-coverage.js.map +1 -0
- package/dist/queries/drift.d.ts +25 -0
- package/dist/queries/drift.js +8 -0
- package/dist/queries/drift.js.map +1 -0
- package/dist/queries/extract-candidates.d.ts +25 -0
- package/dist/queries/extract-candidates.js +9 -0
- package/dist/queries/extract-candidates.js.map +1 -0
- package/dist/queries/fan.d.ts +29 -0
- package/dist/queries/fan.js +14 -0
- package/dist/queries/fan.js.map +1 -0
- package/dist/queries/files.d.ts +6 -0
- package/dist/queries/files.js +7 -0
- package/dist/queries/files.js.map +1 -0
- package/dist/queries/health.d.ts +18 -0
- package/dist/queries/health.js +21 -0
- package/dist/queries/health.js.map +1 -0
- package/dist/queries/hierarchy.d.ts +13 -0
- package/dist/queries/hierarchy.js +8 -0
- package/dist/queries/hierarchy.js.map +1 -0
- package/dist/queries/hotspots.d.ts +13 -0
- package/dist/queries/hotspots.js +8 -0
- package/dist/queries/hotspots.js.map +1 -0
- package/dist/queries/imports.d.ts +19 -0
- package/dist/queries/imports.js +12 -0
- package/dist/queries/imports.js.map +1 -0
- package/dist/queries/index.d.ts +47 -0
- package/dist/queries/index.js +207 -0
- package/dist/queries/index.js.map +1 -0
- package/dist/queries/isolated.d.ts +14 -0
- package/dist/queries/isolated.js +9 -0
- package/dist/queries/isolated.js.map +1 -0
- package/dist/queries/members.d.ts +10 -0
- package/dist/queries/members.js +8 -0
- package/dist/queries/members.js.map +1 -0
- package/dist/queries/methods.d.ts +6 -0
- package/dist/queries/methods.js +8 -0
- package/dist/queries/methods.js.map +1 -0
- package/dist/queries/outline.d.ts +10 -0
- package/dist/queries/outline.js +8 -0
- package/dist/queries/outline.js.map +1 -0
- package/dist/queries/passthrough-candidates.d.ts +18 -0
- package/dist/queries/passthrough-candidates.js +9 -0
- package/dist/queries/passthrough-candidates.js.map +1 -0
- package/dist/queries/redundant-reexports.d.ts +22 -0
- package/dist/queries/redundant-reexports.js +8 -0
- package/dist/queries/redundant-reexports.js.map +1 -0
- package/dist/queries/refs.d.ts +6 -0
- package/dist/queries/refs.js +7 -0
- package/dist/queries/refs.js.map +1 -0
- package/dist/queries/similar-chains.d.ts +29 -0
- package/dist/queries/similar-chains.js +8 -0
- package/dist/queries/similar-chains.js.map +1 -0
- package/dist/queries/similar-files.d.ts +19 -0
- package/dist/queries/similar-files.js +8 -0
- package/dist/queries/similar-files.js.map +1 -0
- package/dist/queries/similar-signatures.d.ts +21 -0
- package/dist/queries/similar-signatures.js +8 -0
- package/dist/queries/similar-signatures.js.map +1 -0
- package/dist/queries/similar.d.ts +34 -0
- package/dist/queries/similar.js +11 -0
- package/dist/queries/similar.js.map +1 -0
- package/dist/queries/slice.d.ts +21 -0
- package/dist/queries/slice.js +9 -0
- package/dist/queries/slice.js.map +1 -0
- package/dist/queries/stale-abstractions.d.ts +18 -0
- package/dist/queries/stale-abstractions.js +9 -0
- package/dist/queries/stale-abstractions.js.map +1 -0
- package/dist/queries/stats.d.ts +6 -0
- package/dist/queries/stats.js +7 -0
- package/dist/queries/stats.js.map +1 -0
- package/dist/queries/surface.d.ts +7 -0
- package/dist/queries/surface.js +8 -0
- package/dist/queries/surface.js.map +1 -0
- package/dist/queries/symbols.d.ts +6 -0
- package/dist/queries/symbols.js +9 -0
- package/dist/queries/symbols.js.map +1 -0
- package/dist/queries/system.d.ts +7 -0
- package/dist/queries/system.js +9 -0
- package/dist/queries/system.js.map +1 -0
- package/dist/queries/test-coverage.d.ts +22 -0
- package/dist/queries/test-coverage.js +11 -0
- package/dist/queries/test-coverage.js.map +1 -0
- package/dist/queries/trace.d.ts +6 -0
- package/dist/queries/trace.js +8 -0
- package/dist/queries/trace.js.map +1 -0
- package/dist/queries/wrapper-candidates.d.ts +17 -0
- package/dist/queries/wrapper-candidates.js +9 -0
- package/dist/queries/wrapper-candidates.js.map +1 -0
- package/dist/reindex-worker.js +368 -0
- package/dist/reindex-worker.js.map +1 -0
- package/docs/AGENT_GUIDE.md +359 -0
- package/package.json +70 -0
- package/reports/debloat/2026-04-10-scip-query-self-audit.md +161 -0
- package/skills/concrete-plan/SKILL.md +318 -0
- package/skills/scip-debloat/SKILL.md +413 -0
- package/skills/scip-explore/SKILL.md +235 -0
- package/skills/scip-verify/SKILL.md +323 -0
- package/src/cli.ts +1480 -0
- package/src/config.ts +117 -0
- package/src/db.ts +127 -0
- package/src/gitignore-filter.ts +143 -0
- package/src/index.ts +11 -0
- package/src/postinstall.ts +8 -0
- package/src/queries/affected.ts +86 -0
- package/src/queries/bottlenecks.ts +67 -0
- package/src/queries/by-kind.ts +204 -0
- package/src/queries/call-graph.ts +66 -0
- package/src/queries/change-surface.ts +110 -0
- package/src/queries/clean-signature.ts +22 -0
- package/src/queries/code.ts +101 -0
- package/src/queries/complexity-hotspots.ts +119 -0
- package/src/queries/complexity.ts +152 -0
- package/src/queries/convergence.ts +82 -0
- package/src/queries/coupling.ts +99 -0
- package/src/queries/cycles.ts +78 -0
- package/src/queries/dataflow.ts +128 -0
- package/src/queries/dead.ts +122 -0
- package/src/queries/deep-chains.ts +59 -0
- package/src/queries/deps.ts +46 -0
- package/src/queries/diff-impact.ts +204 -0
- package/src/queries/doc-coverage.ts +86 -0
- package/src/queries/drift.ts +224 -0
- package/src/queries/extract-candidates.ts +167 -0
- package/src/queries/fan.ts +148 -0
- package/src/queries/files.ts +16 -0
- package/src/queries/health.ts +324 -0
- package/src/queries/hierarchy.ts +49 -0
- package/src/queries/hotspots.ts +53 -0
- package/src/queries/imports.ts +95 -0
- package/src/queries/index.ts +45 -0
- package/src/queries/isolated.ts +67 -0
- package/src/queries/members.ts +54 -0
- package/src/queries/methods.ts +27 -0
- package/src/queries/outline.ts +52 -0
- package/src/queries/passthrough-candidates.ts +94 -0
- package/src/queries/redundant-reexports.ts +170 -0
- package/src/queries/refs.ts +27 -0
- package/src/queries/similar-chains.ts +314 -0
- package/src/queries/similar-files.ts +140 -0
- package/src/queries/similar-signatures.ts +151 -0
- package/src/queries/similar.ts +305 -0
- package/src/queries/slice.ts +154 -0
- package/src/queries/stale-abstractions.ts +82 -0
- package/src/queries/stats.ts +22 -0
- package/src/queries/surface.ts +34 -0
- package/src/queries/symbols.ts +39 -0
- package/src/queries/system.ts +86 -0
- package/src/queries/test-coverage.ts +106 -0
- package/src/queries/trace.ts +55 -0
- package/src/queries/wrapper-candidates.ts +112 -0
- package/src/query-support.ts +226 -0
- package/src/reindex/detect.ts +58 -0
- package/src/reindex/index.ts +153 -0
- package/src/reindex/indexers.ts +220 -0
- package/src/reindex/install.ts +125 -0
- package/src/reindex-worker.ts +35 -0
- package/src/setup.ts +202 -0
- package/src/symbol-parser.ts +278 -0
- package/src/types.ts +654 -0
- package/src/watch.ts +274 -0
- package/tests/gitignore-filter.test.ts +48 -0
- package/tests/queries.test.ts +300 -0
- package/tests/symbol-parser.test.ts +157 -0
- package/tsconfig.json +20 -0
- package/tsup.config.ts +40 -0
- package/vitest.config.ts +7 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import type { ScipDatabase } from '../db.js';
|
|
2
|
+
import type { SimilarSignatureGroup } from '../types.js';
|
|
3
|
+
import { shortenSymbol } from '../symbol-parser.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Find functions with near-identical type signatures (same parameter types
|
|
7
|
+
* and return type) but different names. These are "same shape" functions
|
|
8
|
+
* that may be doing similar work even if their internal implementation differs.
|
|
9
|
+
*
|
|
10
|
+
* The SCIP `documentation` field often contains the full type signature
|
|
11
|
+
* after a `|` delimiter. We parse it, normalize it (strip the function name,
|
|
12
|
+
* whitespace, and case), then group by normalized signature.
|
|
13
|
+
*
|
|
14
|
+
* Groups with 2+ functions = same-shape candidates.
|
|
15
|
+
*/
|
|
16
|
+
export function similarSignatures(
|
|
17
|
+
db: ScipDatabase,
|
|
18
|
+
opts: { scope?: string; minLoc?: number; limit?: number } = {},
|
|
19
|
+
): SimilarSignatureGroup[] {
|
|
20
|
+
const { scope, minLoc = 1, limit } = opts;
|
|
21
|
+
|
|
22
|
+
const scopeFilter = scope ? `AND d.relative_path LIKE '%${scope}%'` : '';
|
|
23
|
+
|
|
24
|
+
// Get all function-level symbols with their documentation/signature strings.
|
|
25
|
+
// We use the same signature extraction pattern as symbols.ts / trace.ts.
|
|
26
|
+
// Filter to symbols that have a documentation field containing '|' (the sig delimiter)
|
|
27
|
+
// and whose signature contains '(' (indicating a callable).
|
|
28
|
+
const rows = db.all<{
|
|
29
|
+
symbol: string;
|
|
30
|
+
relative_path: string;
|
|
31
|
+
start_line: number;
|
|
32
|
+
end_line: number;
|
|
33
|
+
loc: number;
|
|
34
|
+
sig: string;
|
|
35
|
+
}>(
|
|
36
|
+
`SELECT
|
|
37
|
+
gs.symbol,
|
|
38
|
+
d.relative_path,
|
|
39
|
+
der.start_line,
|
|
40
|
+
der.end_line,
|
|
41
|
+
(der.end_line - der.start_line + 1) AS loc,
|
|
42
|
+
REPLACE(SUBSTR(gs.documentation, INSTR(gs.documentation, '|') + 1), char(10), ' ') AS sig
|
|
43
|
+
FROM global_symbols gs
|
|
44
|
+
JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
|
|
45
|
+
JOIN documents d ON der.document_id = d.id
|
|
46
|
+
WHERE gs.documentation IS NOT NULL
|
|
47
|
+
AND gs.documentation != ''
|
|
48
|
+
AND INSTR(gs.documentation, '|') > 0
|
|
49
|
+
AND (der.end_line - der.start_line + 1) >= ?
|
|
50
|
+
${db.pathExclusionsFor('d')}
|
|
51
|
+
${db.symbolNoiseFor('gs')}
|
|
52
|
+
${scopeFilter}
|
|
53
|
+
ORDER BY d.relative_path, der.start_line`,
|
|
54
|
+
minLoc,
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
// Group by normalized signature
|
|
58
|
+
const sigGroups = new Map<string, Array<{
|
|
59
|
+
symbol: string;
|
|
60
|
+
shortName: string;
|
|
61
|
+
file: string;
|
|
62
|
+
startLine: number;
|
|
63
|
+
endLine: number;
|
|
64
|
+
loc: number;
|
|
65
|
+
}>>();
|
|
66
|
+
|
|
67
|
+
for (const row of rows) {
|
|
68
|
+
if (db.isIgnored(row.relative_path)) continue;
|
|
69
|
+
|
|
70
|
+
const normalized = normalizeSignature(row.sig);
|
|
71
|
+
if (!normalized) continue;
|
|
72
|
+
|
|
73
|
+
const entry = {
|
|
74
|
+
symbol: row.symbol,
|
|
75
|
+
shortName: shortenSymbol(row.symbol),
|
|
76
|
+
file: row.relative_path,
|
|
77
|
+
startLine: row.start_line,
|
|
78
|
+
endLine: row.end_line,
|
|
79
|
+
loc: row.loc,
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
const existing = sigGroups.get(normalized);
|
|
83
|
+
if (existing) {
|
|
84
|
+
existing.push(entry);
|
|
85
|
+
} else {
|
|
86
|
+
sigGroups.set(normalized, [entry]);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Collect groups with 2+ functions
|
|
91
|
+
const results: SimilarSignatureGroup[] = [];
|
|
92
|
+
|
|
93
|
+
for (const [signature, functions] of sigGroups) {
|
|
94
|
+
if (functions.length < 2) continue;
|
|
95
|
+
|
|
96
|
+
results.push({ signature, functions });
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Sort by group size descending (largest groups = most duplication),
|
|
100
|
+
// then by total LOC in the group
|
|
101
|
+
results.sort((a, b) => {
|
|
102
|
+
const sizeDiff = b.functions.length - a.functions.length;
|
|
103
|
+
if (sizeDiff !== 0) return sizeDiff;
|
|
104
|
+
const locA = a.functions.reduce((sum, f) => sum + f.loc, 0);
|
|
105
|
+
const locB = b.functions.reduce((sum, f) => sum + f.loc, 0);
|
|
106
|
+
return locB - locA;
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
return limit ? results.slice(0, limit) : results;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Normalize a signature for comparison:
|
|
114
|
+
* 1. Clean markdown fences and SCIP prefixes
|
|
115
|
+
* 2. Strip everything before the first '(' (removes the function name)
|
|
116
|
+
* 3. Strip whitespace and lowercase
|
|
117
|
+
*
|
|
118
|
+
* Returns null if the signature doesn't contain a callable form.
|
|
119
|
+
*/
|
|
120
|
+
function normalizeSignature(raw: string): string | null {
|
|
121
|
+
if (!raw || !raw.trim()) return null;
|
|
122
|
+
|
|
123
|
+
// Clean markdown and SCIP decoration (same as cleanSignature)
|
|
124
|
+
let sig = raw
|
|
125
|
+
.replace(/^```\w*\s*/, '')
|
|
126
|
+
.replace(/\s*```$/, '')
|
|
127
|
+
.replace(/^\(method\)\s*/, '')
|
|
128
|
+
.replace(/^\(property\)\s*/, '')
|
|
129
|
+
.replace(/^\(function\)\s*/, '')
|
|
130
|
+
.replace(/^\(class\)\s*/, '')
|
|
131
|
+
.replace(/^\(interface\)\s*/, '')
|
|
132
|
+
.replace(/^\(enum\)\s*/, '')
|
|
133
|
+
.replace(/^\(type alias\)\s*/, '')
|
|
134
|
+
.replace(/^\(const\)\s*/, '')
|
|
135
|
+
.replace(/^\(var\)\s*/, '')
|
|
136
|
+
.trim();
|
|
137
|
+
|
|
138
|
+
// Find the first '(' — everything from there is the parameter/return signature
|
|
139
|
+
const parenIdx = sig.indexOf('(');
|
|
140
|
+
if (parenIdx === -1) return null;
|
|
141
|
+
|
|
142
|
+
sig = sig.slice(parenIdx);
|
|
143
|
+
|
|
144
|
+
// Normalize: strip all whitespace, lowercase
|
|
145
|
+
sig = sig.replace(/\s+/g, '').toLowerCase();
|
|
146
|
+
|
|
147
|
+
// Must have meaningful content after normalization
|
|
148
|
+
if (sig.length < 3) return null; // e.g. "()" alone is too generic
|
|
149
|
+
|
|
150
|
+
return sig;
|
|
151
|
+
}
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
import type { ScipDatabase } from '../db.js';
|
|
2
|
+
import { findFirstSymbolMatch, getCalleeRowsForSymbol } from '../query-support.js';
|
|
3
|
+
import type { SimilarSymbolResult } from '../types.js';
|
|
4
|
+
import { shortenSymbol } from '../symbol-parser.js';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Find functions with similar callee fingerprints using TF-IDF weighted
|
|
8
|
+
* cosine similarity.
|
|
9
|
+
*
|
|
10
|
+
* Plain Jaccard similarity inflates scores when functions share ubiquitous
|
|
11
|
+
* infrastructure imports (db, types, shortenSymbol). TF-IDF fixes this by
|
|
12
|
+
* weighting each shared callee by how rare it is:
|
|
13
|
+
*
|
|
14
|
+
* - A callee used by 2 functions (rare) gets high weight → strong signal
|
|
15
|
+
* - A callee used by 30 functions (common) gets low weight → noise
|
|
16
|
+
*
|
|
17
|
+
* This means two functions sharing `sendWelcomeEmail()` (rare) score
|
|
18
|
+
* much higher than two functions sharing `db.all()` (ubiquitous), even
|
|
19
|
+
* though both are "shared callees."
|
|
20
|
+
*/
|
|
21
|
+
export function similar(
|
|
22
|
+
db: ScipDatabase,
|
|
23
|
+
symbolPattern: string,
|
|
24
|
+
opts: { minSimilarity?: number; limit?: number } = {},
|
|
25
|
+
): SimilarSymbolResult[] {
|
|
26
|
+
const { minSimilarity = 0.4, limit = 20 } = opts;
|
|
27
|
+
|
|
28
|
+
const target = findCallees(db, symbolPattern);
|
|
29
|
+
if (!target || target.callees.size === 0) return [];
|
|
30
|
+
|
|
31
|
+
const candidates = getAllCalleeFingerprints(db, {
|
|
32
|
+
minCallees: 3,
|
|
33
|
+
excludeSymbol: target.symbol,
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
// Compute IDF weights across all fingerprints + target
|
|
37
|
+
const allFingerprints = [target, ...candidates];
|
|
38
|
+
const idfWeights = computeIdf(allFingerprints);
|
|
39
|
+
|
|
40
|
+
const results: SimilarSymbolResult[] = [];
|
|
41
|
+
|
|
42
|
+
for (const candidate of candidates) {
|
|
43
|
+
if (candidate.callees.size < 3) continue;
|
|
44
|
+
|
|
45
|
+
const { similarity, significantShared, trivialShared } = weightedSimilarity(
|
|
46
|
+
target.callees, candidate.callees, idfWeights,
|
|
47
|
+
);
|
|
48
|
+
|
|
49
|
+
if (similarity < minSimilarity) continue;
|
|
50
|
+
if (significantShared.length < 1) continue; // no real overlap
|
|
51
|
+
|
|
52
|
+
results.push({
|
|
53
|
+
symbolA: target.symbol,
|
|
54
|
+
shortNameA: shortenSymbol(target.symbol),
|
|
55
|
+
fileA: target.file,
|
|
56
|
+
symbolB: candidate.symbol,
|
|
57
|
+
shortNameB: shortenSymbol(candidate.symbol),
|
|
58
|
+
fileB: candidate.file,
|
|
59
|
+
similarity,
|
|
60
|
+
sharedCallees: significantShared.map(shortenSymbol),
|
|
61
|
+
uniqueToA: [...difference(target.callees, candidate.callees)].map(shortenSymbol),
|
|
62
|
+
uniqueToB: [...difference(candidate.callees, target.callees)].map(shortenSymbol),
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
results.sort((a, b) => b.similarity - a.similarity);
|
|
67
|
+
return results.slice(0, limit);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Find similar symbols across the entire codebase.
|
|
72
|
+
* Uses TF-IDF weighted similarity to filter out infrastructure noise.
|
|
73
|
+
*/
|
|
74
|
+
export function similarAll(
|
|
75
|
+
db: ScipDatabase,
|
|
76
|
+
opts: { minSimilarity?: number; limit?: number; scope?: string; minCallees?: number } = {},
|
|
77
|
+
): SimilarSymbolResult[] {
|
|
78
|
+
const { minSimilarity = 0.5, limit = 20, scope, minCallees = 4 } = opts;
|
|
79
|
+
|
|
80
|
+
const all = getAllCalleeFingerprints(db, { minCallees, scope });
|
|
81
|
+
const idfWeights = computeIdf(all);
|
|
82
|
+
|
|
83
|
+
const results: SimilarSymbolResult[] = [];
|
|
84
|
+
|
|
85
|
+
for (let i = 0; i < all.length; i++) {
|
|
86
|
+
for (let j = i + 1; j < all.length; j++) {
|
|
87
|
+
const a = all[i]!;
|
|
88
|
+
const b = all[j]!;
|
|
89
|
+
|
|
90
|
+
if (a.file === b.file) continue;
|
|
91
|
+
|
|
92
|
+
const { similarity, significantShared } = weightedSimilarity(
|
|
93
|
+
a.callees, b.callees, idfWeights,
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
if (similarity < minSimilarity) continue;
|
|
97
|
+
if (significantShared.length < 2) continue;
|
|
98
|
+
|
|
99
|
+
results.push({
|
|
100
|
+
symbolA: a.symbol,
|
|
101
|
+
shortNameA: shortenSymbol(a.symbol),
|
|
102
|
+
fileA: a.file,
|
|
103
|
+
symbolB: b.symbol,
|
|
104
|
+
shortNameB: shortenSymbol(b.symbol),
|
|
105
|
+
fileB: b.file,
|
|
106
|
+
similarity,
|
|
107
|
+
sharedCallees: significantShared.map(shortenSymbol),
|
|
108
|
+
uniqueToA: [...difference(a.callees, b.callees)].map(shortenSymbol),
|
|
109
|
+
uniqueToB: [...difference(b.callees, a.callees)].map(shortenSymbol),
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (results.length > limit * 5) break;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
results.sort((a, b) => b.similarity - a.similarity);
|
|
117
|
+
return results.slice(0, limit);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// ── TF-IDF Engine ──────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Compute inverse document frequency for each callee.
|
|
124
|
+
* IDF(callee) = log(N / df(callee)) where N is total functions
|
|
125
|
+
* and df is how many functions reference that callee.
|
|
126
|
+
*
|
|
127
|
+
* High IDF = rare callee = strong similarity signal.
|
|
128
|
+
* Low IDF = ubiquitous callee = noise.
|
|
129
|
+
*/
|
|
130
|
+
function computeIdf(fingerprints: SymbolFingerprint[]): Map<string, number> {
|
|
131
|
+
const n = fingerprints.length;
|
|
132
|
+
if (n === 0) return new Map();
|
|
133
|
+
|
|
134
|
+
// Count how many functions reference each callee
|
|
135
|
+
const docFreq = new Map<string, number>();
|
|
136
|
+
for (const fp of fingerprints) {
|
|
137
|
+
for (const callee of fp.callees) {
|
|
138
|
+
docFreq.set(callee, (docFreq.get(callee) ?? 0) + 1);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Compute IDF
|
|
143
|
+
const idf = new Map<string, number>();
|
|
144
|
+
for (const [callee, df] of docFreq) {
|
|
145
|
+
idf.set(callee, Math.log(n / df));
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return idf;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Compute TF-IDF weighted cosine similarity between two callee sets.
|
|
153
|
+
*
|
|
154
|
+
* Each callee is a dimension. Its weight is its IDF score.
|
|
155
|
+
* Cosine similarity of the weighted vectors gives a similarity
|
|
156
|
+
* that ignores ubiquitous callees and emphasizes rare shared ones.
|
|
157
|
+
*
|
|
158
|
+
* Also returns which shared callees are "significant" (above-median IDF)
|
|
159
|
+
* vs "trivial" (below-median IDF, i.e., infrastructure).
|
|
160
|
+
*/
|
|
161
|
+
function weightedSimilarity(
|
|
162
|
+
a: Set<string>,
|
|
163
|
+
b: Set<string>,
|
|
164
|
+
idf: Map<string, number>,
|
|
165
|
+
): { similarity: number; significantShared: string[]; trivialShared: string[] } {
|
|
166
|
+
const shared = intersection(a, b);
|
|
167
|
+
if (shared.size === 0) return { similarity: 0, significantShared: [], trivialShared: [] };
|
|
168
|
+
|
|
169
|
+
// Compute weighted dot product and magnitudes
|
|
170
|
+
let dotProduct = 0;
|
|
171
|
+
let magA = 0;
|
|
172
|
+
let magB = 0;
|
|
173
|
+
|
|
174
|
+
const allCallees = new Set([...a, ...b]);
|
|
175
|
+
for (const callee of allCallees) {
|
|
176
|
+
const weight = idf.get(callee) ?? 0;
|
|
177
|
+
const inA = a.has(callee) ? weight : 0;
|
|
178
|
+
const inB = b.has(callee) ? weight : 0;
|
|
179
|
+
dotProduct += inA * inB;
|
|
180
|
+
magA += inA * inA;
|
|
181
|
+
magB += inB * inB;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const magnitude = Math.sqrt(magA) * Math.sqrt(magB);
|
|
185
|
+
const similarity = magnitude > 0 ? dotProduct / magnitude : 0;
|
|
186
|
+
|
|
187
|
+
// Split shared callees into significant (high IDF) and trivial (low IDF)
|
|
188
|
+
const medianIdf = getMedianIdf(idf);
|
|
189
|
+
const significantShared: string[] = [];
|
|
190
|
+
const trivialShared: string[] = [];
|
|
191
|
+
|
|
192
|
+
for (const callee of shared) {
|
|
193
|
+
const weight = idf.get(callee) ?? 0;
|
|
194
|
+
if (weight >= medianIdf) {
|
|
195
|
+
significantShared.push(callee);
|
|
196
|
+
} else {
|
|
197
|
+
trivialShared.push(callee);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Sort significant callees by IDF descending (most distinctive first)
|
|
202
|
+
significantShared.sort((x, y) => (idf.get(y) ?? 0) - (idf.get(x) ?? 0));
|
|
203
|
+
|
|
204
|
+
return { similarity, significantShared, trivialShared };
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function getMedianIdf(idf: Map<string, number>): number {
|
|
208
|
+
const values = [...idf.values()].sort((a, b) => a - b);
|
|
209
|
+
if (values.length === 0) return 0;
|
|
210
|
+
const mid = Math.floor(values.length / 2);
|
|
211
|
+
return values.length % 2 === 0
|
|
212
|
+
? (values[mid - 1]! + values[mid]!) / 2
|
|
213
|
+
: values[mid]!;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// ── Internal helpers ───────────────────────────────────────
|
|
217
|
+
|
|
218
|
+
interface SymbolFingerprint {
|
|
219
|
+
symbol: string;
|
|
220
|
+
file: string;
|
|
221
|
+
callees: Set<string>;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function findCallees(
|
|
225
|
+
db: ScipDatabase,
|
|
226
|
+
symbolPattern: string,
|
|
227
|
+
): SymbolFingerprint | null {
|
|
228
|
+
const target = findFirstSymbolMatch(db, symbolPattern);
|
|
229
|
+
|
|
230
|
+
if (!target) return null;
|
|
231
|
+
|
|
232
|
+
const calleeRows = getCalleeRowsForSymbol(db, target);
|
|
233
|
+
|
|
234
|
+
return {
|
|
235
|
+
symbol: target.symbol,
|
|
236
|
+
file: target.relativePath,
|
|
237
|
+
callees: new Set(calleeRows.map((r) => r.symbol)),
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function getAllCalleeFingerprints(
|
|
242
|
+
db: ScipDatabase,
|
|
243
|
+
opts: { minCallees: number; scope?: string; excludeSymbol?: string },
|
|
244
|
+
): SymbolFingerprint[] {
|
|
245
|
+
const { minCallees, scope, excludeSymbol } = opts;
|
|
246
|
+
const scopeFilter = scope ? `AND d.relative_path LIKE '%${scope}%'` : '';
|
|
247
|
+
const excludeFilter = excludeSymbol ? `AND gs.symbol != '${excludeSymbol.replace(/'/g, "''")}'` : '';
|
|
248
|
+
|
|
249
|
+
const symbols = db.all<{
|
|
250
|
+
id: number;
|
|
251
|
+
symbol: string;
|
|
252
|
+
document_id: number;
|
|
253
|
+
start_line: number;
|
|
254
|
+
end_line: number;
|
|
255
|
+
relative_path: string;
|
|
256
|
+
}>(
|
|
257
|
+
`SELECT gs.id, gs.symbol, der.document_id, der.start_line, der.end_line, d.relative_path
|
|
258
|
+
FROM global_symbols gs
|
|
259
|
+
JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
|
|
260
|
+
JOIN documents d ON der.document_id = d.id
|
|
261
|
+
WHERE 1 = 1
|
|
262
|
+
${db.pathExclusionsFor('d')}
|
|
263
|
+
${db.symbolNoiseFor('gs')}
|
|
264
|
+
AND (der.end_line - der.start_line + 1) >= 5
|
|
265
|
+
${scopeFilter}
|
|
266
|
+
${excludeFilter}
|
|
267
|
+
ORDER BY d.relative_path`,
|
|
268
|
+
);
|
|
269
|
+
|
|
270
|
+
const fingerprints: SymbolFingerprint[] = [];
|
|
271
|
+
|
|
272
|
+
for (const sym of symbols) {
|
|
273
|
+
if (db.isIgnored(sym.relative_path)) continue;
|
|
274
|
+
|
|
275
|
+
const calleeRows = getCalleeRowsForSymbol(db, {
|
|
276
|
+
documentId: sym.document_id,
|
|
277
|
+
startLine: sym.start_line,
|
|
278
|
+
endLine: sym.end_line,
|
|
279
|
+
symbolId: sym.id,
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
const callees = new Set(calleeRows.map((r) => r.symbol));
|
|
283
|
+
if (callees.size >= minCallees) {
|
|
284
|
+
fingerprints.push({ symbol: sym.symbol, file: sym.relative_path, callees });
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
return fingerprints;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function intersection<T>(a: Set<T>, b: Set<T>): Set<T> {
|
|
292
|
+
const result = new Set<T>();
|
|
293
|
+
for (const item of a) {
|
|
294
|
+
if (b.has(item)) result.add(item);
|
|
295
|
+
}
|
|
296
|
+
return result;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
function difference<T>(a: Set<T>, b: Set<T>): Set<T> {
|
|
300
|
+
const result = new Set<T>();
|
|
301
|
+
for (const item of a) {
|
|
302
|
+
if (!b.has(item)) result.add(item);
|
|
303
|
+
}
|
|
304
|
+
return result;
|
|
305
|
+
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import type { ScipDatabase } from '../db.js';
|
|
2
|
+
import { findFirstSymbolMatch, getCalleeRowsForSymbol, type SymbolMatch } from '../query-support.js';
|
|
3
|
+
import type { SliceResult } from '../types.js';
|
|
4
|
+
import { shortenSymbol } from '../symbol-parser.js';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Reference-level program slicing: track what affects a symbol (backward)
|
|
8
|
+
* or what a symbol affects (forward).
|
|
9
|
+
*
|
|
10
|
+
* Backward slice: "What feeds into this?" — symbols referenced in the same
|
|
11
|
+
* function that defines the target. These are the inputs/dependencies.
|
|
12
|
+
*
|
|
13
|
+
* Forward slice: "What does this feed into?" — at each site where the target
|
|
14
|
+
* is referenced, find the enclosing function, then find what that function
|
|
15
|
+
* exports/defines. These are the outputs/consumers.
|
|
16
|
+
*
|
|
17
|
+
* Language-agnostic: works with any SCIP index.
|
|
18
|
+
*/
|
|
19
|
+
export function slice(
|
|
20
|
+
db: ScipDatabase,
|
|
21
|
+
symbolPattern: string,
|
|
22
|
+
opts: { direction?: 'backward' | 'forward' } = {},
|
|
23
|
+
): SliceResult | null {
|
|
24
|
+
const { direction = 'backward' } = opts;
|
|
25
|
+
|
|
26
|
+
const match = findFirstSymbolMatch(db, symbolPattern);
|
|
27
|
+
if (!match) return null;
|
|
28
|
+
|
|
29
|
+
if (direction === 'backward') {
|
|
30
|
+
return backwardSlice(db, match);
|
|
31
|
+
} else {
|
|
32
|
+
return forwardSlice(db, match);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
function backwardSlice(db: ScipDatabase, match: SymbolMatch): SliceResult {
|
|
38
|
+
// Find all symbols referenced within the definition range of the target.
|
|
39
|
+
// These are what "feeds into" the target — the inputs.
|
|
40
|
+
const callees = getCalleeRowsForSymbol(db, match);
|
|
41
|
+
|
|
42
|
+
// Also find symbols whose definitions are in the same file and whose
|
|
43
|
+
// ranges overlap or precede the target — local variables, parameters, etc.
|
|
44
|
+
const localPredecessors = db.all<{ symbol: string; file: string }>(
|
|
45
|
+
`SELECT DISTINCT gs.symbol, d.relative_path AS file
|
|
46
|
+
FROM defn_enclosing_ranges der
|
|
47
|
+
JOIN global_symbols gs ON der.symbol_id = gs.id
|
|
48
|
+
JOIN documents d ON der.document_id = d.id
|
|
49
|
+
WHERE der.document_id = ?
|
|
50
|
+
AND der.end_line < ?
|
|
51
|
+
AND gs.id != ?
|
|
52
|
+
${db.symbolNoiseFor('gs')}
|
|
53
|
+
ORDER BY der.start_line DESC
|
|
54
|
+
LIMIT 15`,
|
|
55
|
+
match.documentId, match.startLine, match.symbolId,
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
const seen = new Set<string>();
|
|
59
|
+
const connected: SliceResult['connectedSymbols'] = [];
|
|
60
|
+
|
|
61
|
+
for (const c of callees) {
|
|
62
|
+
if (seen.has(c.symbol)) continue;
|
|
63
|
+
seen.add(c.symbol);
|
|
64
|
+
connected.push({
|
|
65
|
+
symbol: c.symbol,
|
|
66
|
+
shortName: shortenSymbol(c.symbol),
|
|
67
|
+
file: c.file,
|
|
68
|
+
relationship: 'referenced within definition (callee)',
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
for (const p of localPredecessors) {
|
|
73
|
+
if (seen.has(p.symbol) || db.isIgnored(p.file)) continue;
|
|
74
|
+
seen.add(p.symbol);
|
|
75
|
+
connected.push({
|
|
76
|
+
symbol: p.symbol,
|
|
77
|
+
shortName: shortenSymbol(p.symbol),
|
|
78
|
+
file: p.file,
|
|
79
|
+
relationship: 'defined before target in same file (local predecessor)',
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
symbol: match.symbol,
|
|
85
|
+
shortName: shortenSymbol(match.symbol),
|
|
86
|
+
direction: 'backward',
|
|
87
|
+
connectedSymbols: connected,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function forwardSlice(db: ScipDatabase, match: SymbolMatch): SliceResult {
|
|
92
|
+
// Find where the target is referenced, then at each reference site,
|
|
93
|
+
// find what else the enclosing function defines/exports.
|
|
94
|
+
const rows = db.all<{
|
|
95
|
+
enclosing_symbol: string;
|
|
96
|
+
enclosing_file: string;
|
|
97
|
+
output_symbol: string;
|
|
98
|
+
output_file: string;
|
|
99
|
+
}>(
|
|
100
|
+
`SELECT DISTINCT
|
|
101
|
+
enc_gs.symbol AS enclosing_symbol,
|
|
102
|
+
enc_d.relative_path AS enclosing_file,
|
|
103
|
+
out_gs.symbol AS output_symbol,
|
|
104
|
+
out_d.relative_path AS output_file
|
|
105
|
+
FROM mentions ref_m
|
|
106
|
+
JOIN chunks ref_c ON ref_m.chunk_id = ref_c.id
|
|
107
|
+
JOIN documents ref_d ON ref_c.document_id = ref_d.id
|
|
108
|
+
-- Find enclosing function at each reference site
|
|
109
|
+
JOIN defn_enclosing_ranges enc_der
|
|
110
|
+
ON enc_der.document_id = ref_d.id
|
|
111
|
+
AND enc_der.start_line <= ref_c.start_line
|
|
112
|
+
AND enc_der.end_line >= ref_c.end_line
|
|
113
|
+
JOIN global_symbols enc_gs ON enc_der.symbol_id = enc_gs.id
|
|
114
|
+
JOIN documents enc_d ON enc_der.document_id = enc_d.id
|
|
115
|
+
-- Find other symbols referenced within that enclosing function
|
|
116
|
+
JOIN mentions out_m ON out_m.role = 0
|
|
117
|
+
JOIN chunks out_c ON out_m.chunk_id = out_c.id
|
|
118
|
+
AND out_c.document_id = enc_der.document_id
|
|
119
|
+
AND out_c.start_line >= enc_der.start_line
|
|
120
|
+
AND out_c.end_line <= enc_der.end_line
|
|
121
|
+
JOIN global_symbols out_gs ON out_m.symbol_id = out_gs.id
|
|
122
|
+
JOIN defn_enclosing_ranges out_der ON out_gs.id = out_der.symbol_id
|
|
123
|
+
JOIN documents out_d ON out_der.document_id = out_d.id
|
|
124
|
+
WHERE ref_m.symbol_id = ? AND ref_m.role = 0
|
|
125
|
+
AND out_gs.id != ? AND out_gs.id != enc_gs.id
|
|
126
|
+
AND out_d.id != ref_d.id
|
|
127
|
+
${db.symbolNoiseFor('out_gs')}
|
|
128
|
+
${db.pathExclusionsFor('out_d')}
|
|
129
|
+
ORDER BY out_d.relative_path
|
|
130
|
+
LIMIT 30`,
|
|
131
|
+
match.symbolId, match.symbolId,
|
|
132
|
+
);
|
|
133
|
+
|
|
134
|
+
const seen = new Set<string>();
|
|
135
|
+
const connected: SliceResult['connectedSymbols'] = [];
|
|
136
|
+
|
|
137
|
+
for (const r of rows) {
|
|
138
|
+
if (seen.has(r.output_symbol) || db.isIgnored(r.output_file)) continue;
|
|
139
|
+
seen.add(r.output_symbol);
|
|
140
|
+
connected.push({
|
|
141
|
+
symbol: r.output_symbol,
|
|
142
|
+
shortName: shortenSymbol(r.output_symbol),
|
|
143
|
+
file: r.output_file,
|
|
144
|
+
relationship: `used alongside target in ${shortenSymbol(r.enclosing_symbol)}`,
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
symbol: match.symbol,
|
|
150
|
+
shortName: shortenSymbol(match.symbol),
|
|
151
|
+
direction: 'forward',
|
|
152
|
+
connectedSymbols: connected,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import type { ScipDatabase } from '../db.js';
|
|
2
|
+
import { testFileExclusionSql } from '../query-support.js';
|
|
3
|
+
import type { StaleAbstraction } from '../types.js';
|
|
4
|
+
import { shortenSymbol } from '../symbol-parser.js';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Find stale abstractions: type-level symbols (classes, interfaces, type
|
|
8
|
+
* aliases) that have 0 or 1 cross-file consumers.
|
|
9
|
+
*
|
|
10
|
+
* A type that only one file uses is over-abstracted — it was designed
|
|
11
|
+
* for reuse that never materialized. Large single-consumer types are
|
|
12
|
+
* the strongest signal of wasted abstraction.
|
|
13
|
+
*/
|
|
14
|
+
export function staleAbstractions(
|
|
15
|
+
db: ScipDatabase,
|
|
16
|
+
opts?: { scope?: string; minLoc?: number; limit?: number },
|
|
17
|
+
): StaleAbstraction[] {
|
|
18
|
+
const { scope, minLoc = 3, limit = 30 } = opts ?? {};
|
|
19
|
+
const scopeFilter = scope ? `AND d.relative_path LIKE '%${scope}%'` : '';
|
|
20
|
+
|
|
21
|
+
const rows = db.all<{
|
|
22
|
+
symbol: string;
|
|
23
|
+
file: string;
|
|
24
|
+
start_line: number;
|
|
25
|
+
end_line: number;
|
|
26
|
+
loc: number;
|
|
27
|
+
consumers: number;
|
|
28
|
+
}>(
|
|
29
|
+
`SELECT * FROM (
|
|
30
|
+
SELECT
|
|
31
|
+
gs.symbol,
|
|
32
|
+
d.relative_path AS file,
|
|
33
|
+
der.start_line,
|
|
34
|
+
der.end_line,
|
|
35
|
+
(der.end_line - der.start_line + 1) AS loc,
|
|
36
|
+
(SELECT COUNT(DISTINCT ref_c.document_id)
|
|
37
|
+
FROM mentions ref_m
|
|
38
|
+
JOIN chunks ref_c ON ref_m.chunk_id = ref_c.id
|
|
39
|
+
WHERE ref_m.symbol_id = gs.id
|
|
40
|
+
AND ref_m.role = 0
|
|
41
|
+
AND ref_c.document_id != der.document_id
|
|
42
|
+
) AS consumers
|
|
43
|
+
FROM global_symbols gs
|
|
44
|
+
JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
|
|
45
|
+
JOIN documents d ON der.document_id = d.id
|
|
46
|
+
WHERE 1 = 1
|
|
47
|
+
${db.pathExclusionsFor('d')}
|
|
48
|
+
AND ${testFileExclusionSql('d')}
|
|
49
|
+
${db.symbolNoiseFor('gs')}
|
|
50
|
+
-- Top-level type symbols: ends with # but does not contain nested #
|
|
51
|
+
AND gs.symbol LIKE '%#'
|
|
52
|
+
AND gs.symbol NOT LIKE '%#%#%'
|
|
53
|
+
AND (der.end_line - der.start_line + 1) >= ?
|
|
54
|
+
${scopeFilter}
|
|
55
|
+
) WHERE consumers <= 1
|
|
56
|
+
ORDER BY loc DESC
|
|
57
|
+
LIMIT ?`,
|
|
58
|
+
minLoc, limit,
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
return rows
|
|
62
|
+
.filter((r) => !db.isIgnored(r.file))
|
|
63
|
+
// Exclude types defined in dedicated type files (types.ts, types/, etc.)
|
|
64
|
+
// These are intentional public API types, not premature abstractions.
|
|
65
|
+
.filter((r) => {
|
|
66
|
+
const basename = r.file.split('/').pop() ?? '';
|
|
67
|
+
const isTypeFile = basename.includes('types') || r.file.includes('/types/');
|
|
68
|
+
// Types in type files with 1 consumer are normal API types — skip them.
|
|
69
|
+
// Types in type files with 0 consumers are genuinely unused — keep them.
|
|
70
|
+
if (isTypeFile && r.consumers > 0) return false;
|
|
71
|
+
return true;
|
|
72
|
+
})
|
|
73
|
+
.map((r) => ({
|
|
74
|
+
symbol: r.symbol,
|
|
75
|
+
shortName: shortenSymbol(r.symbol),
|
|
76
|
+
file: r.file,
|
|
77
|
+
startLine: r.start_line,
|
|
78
|
+
endLine: r.end_line,
|
|
79
|
+
loc: r.loc,
|
|
80
|
+
consumers: r.consumers,
|
|
81
|
+
}));
|
|
82
|
+
}
|