scip-query 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/IMPROVEMENTS.md +143 -0
- package/PLAN.md +320 -0
- package/README.md +1213 -0
- package/dist/chunk-2QZ23IBN.js +55 -0
- package/dist/chunk-2QZ23IBN.js.map +1 -0
- package/dist/chunk-36OMT7ZJ.js +144 -0
- package/dist/chunk-36OMT7ZJ.js.map +1 -0
- package/dist/chunk-3E2X7RIE.js +101 -0
- package/dist/chunk-3E2X7RIE.js.map +1 -0
- package/dist/chunk-3UOUTZQT.js +45 -0
- package/dist/chunk-3UOUTZQT.js.map +1 -0
- package/dist/chunk-3ZZJVBIO.js +88 -0
- package/dist/chunk-3ZZJVBIO.js.map +1 -0
- package/dist/chunk-4TYLS5XX.js +10 -0
- package/dist/chunk-4TYLS5XX.js.map +1 -0
- package/dist/chunk-5FGUEU7N.js +101 -0
- package/dist/chunk-5FGUEU7N.js.map +1 -0
- package/dist/chunk-5WTJAXY2.js +61 -0
- package/dist/chunk-5WTJAXY2.js.map +1 -0
- package/dist/chunk-6NBLIDF4.js +24 -0
- package/dist/chunk-6NBLIDF4.js.map +1 -0
- package/dist/chunk-6SXADWLW.js +43 -0
- package/dist/chunk-6SXADWLW.js.map +1 -0
- package/dist/chunk-6VJ6Q7IE.js +65 -0
- package/dist/chunk-6VJ6Q7IE.js.map +1 -0
- package/dist/chunk-7OZPA5OO.js +258 -0
- package/dist/chunk-7OZPA5OO.js.map +1 -0
- package/dist/chunk-BEPIEVLR.js +76 -0
- package/dist/chunk-BEPIEVLR.js.map +1 -0
- package/dist/chunk-BFSCMC22.js +42 -0
- package/dist/chunk-BFSCMC22.js.map +1 -0
- package/dist/chunk-BP2ATLK2.js +110 -0
- package/dist/chunk-BP2ATLK2.js.map +1 -0
- package/dist/chunk-CM454WL3.js +114 -0
- package/dist/chunk-CM454WL3.js.map +1 -0
- package/dist/chunk-DCKMSTJ4.js +74 -0
- package/dist/chunk-DCKMSTJ4.js.map +1 -0
- package/dist/chunk-DEZKCZXD.js +40 -0
- package/dist/chunk-DEZKCZXD.js.map +1 -0
- package/dist/chunk-DVWGWHFW.js +99 -0
- package/dist/chunk-DVWGWHFW.js.map +1 -0
- package/dist/chunk-EMDQWNYR.js +102 -0
- package/dist/chunk-EMDQWNYR.js.map +1 -0
- package/dist/chunk-FFSWWE5O.js +33 -0
- package/dist/chunk-FFSWWE5O.js.map +1 -0
- package/dist/chunk-FGXRVW7G.js +73 -0
- package/dist/chunk-FGXRVW7G.js.map +1 -0
- package/dist/chunk-FUHJCHS4.js +158 -0
- package/dist/chunk-FUHJCHS4.js.map +1 -0
- package/dist/chunk-GJFURBEW.js +64 -0
- package/dist/chunk-GJFURBEW.js.map +1 -0
- package/dist/chunk-GTILYBH6.js +102 -0
- package/dist/chunk-GTILYBH6.js.map +1 -0
- package/dist/chunk-JJP7KQND.js +1 -0
- package/dist/chunk-JJP7KQND.js.map +1 -0
- package/dist/chunk-JKP5GH6T.js +213 -0
- package/dist/chunk-JKP5GH6T.js.map +1 -0
- package/dist/chunk-KCBMVQL5.js +38 -0
- package/dist/chunk-KCBMVQL5.js.map +1 -0
- package/dist/chunk-KVSW5KYP.js +78 -0
- package/dist/chunk-KVSW5KYP.js.map +1 -0
- package/dist/chunk-LAWMH22O.js +172 -0
- package/dist/chunk-LAWMH22O.js.map +1 -0
- package/dist/chunk-LB7OS35Q.js +72 -0
- package/dist/chunk-LB7OS35Q.js.map +1 -0
- package/dist/chunk-LUSIFBXO.js +57 -0
- package/dist/chunk-LUSIFBXO.js.map +1 -0
- package/dist/chunk-MBVNHJVN.js +44 -0
- package/dist/chunk-MBVNHJVN.js.map +1 -0
- package/dist/chunk-MGNMHKX3.js +15 -0
- package/dist/chunk-MGNMHKX3.js.map +1 -0
- package/dist/chunk-N5KEREIA.js +41 -0
- package/dist/chunk-N5KEREIA.js.map +1 -0
- package/dist/chunk-NDSQYIWT.js +71 -0
- package/dist/chunk-NDSQYIWT.js.map +1 -0
- package/dist/chunk-NUZ4OMU3.js +28 -0
- package/dist/chunk-NUZ4OMU3.js.map +1 -0
- package/dist/chunk-QOV2R2WT.js +170 -0
- package/dist/chunk-QOV2R2WT.js.map +1 -0
- package/dist/chunk-SEFSL2GF.js +78 -0
- package/dist/chunk-SEFSL2GF.js.map +1 -0
- package/dist/chunk-T6ARFSBZ.js +103 -0
- package/dist/chunk-T6ARFSBZ.js.map +1 -0
- package/dist/chunk-TBP6BICL.js +46 -0
- package/dist/chunk-TBP6BICL.js.map +1 -0
- package/dist/chunk-TDNNOR6D.js +97 -0
- package/dist/chunk-TDNNOR6D.js.map +1 -0
- package/dist/chunk-TSPZOMHC.js +195 -0
- package/dist/chunk-TSPZOMHC.js.map +1 -0
- package/dist/chunk-UNTPVD36.js +55 -0
- package/dist/chunk-UNTPVD36.js.map +1 -0
- package/dist/chunk-VRUJH4BO.js +88 -0
- package/dist/chunk-VRUJH4BO.js.map +1 -0
- package/dist/chunk-VZ7AMAFL.js +76 -0
- package/dist/chunk-VZ7AMAFL.js.map +1 -0
- package/dist/chunk-XFXDXEUN.js +24 -0
- package/dist/chunk-XFXDXEUN.js.map +1 -0
- package/dist/chunk-YZAA4LYG.js +169 -0
- package/dist/chunk-YZAA4LYG.js.map +1 -0
- package/dist/chunk-Z73NYSBZ.js +92 -0
- package/dist/chunk-Z73NYSBZ.js.map +1 -0
- package/dist/chunk-ZJRYBOEE.js +125 -0
- package/dist/chunk-ZJRYBOEE.js.map +1 -0
- package/dist/cli.js +5798 -0
- package/dist/cli.js.map +1 -0
- package/dist/db-BxaevAyc.d.ts +683 -0
- package/dist/index.d.ts +254 -0
- package/dist/index.js +1271 -0
- package/dist/index.js.map +1 -0
- package/dist/postinstall.js +167 -0
- package/dist/postinstall.js.map +1 -0
- package/dist/queries/affected.d.ts +14 -0
- package/dist/queries/affected.js +9 -0
- package/dist/queries/affected.js.map +1 -0
- package/dist/queries/bottlenecks.d.ts +18 -0
- package/dist/queries/bottlenecks.js +8 -0
- package/dist/queries/bottlenecks.js.map +1 -0
- package/dist/queries/by-kind.d.ts +20 -0
- package/dist/queries/by-kind.js +10 -0
- package/dist/queries/by-kind.js.map +1 -0
- package/dist/queries/call-graph.d.ts +13 -0
- package/dist/queries/call-graph.js +9 -0
- package/dist/queries/call-graph.js.map +1 -0
- package/dist/queries/change-surface.d.ts +10 -0
- package/dist/queries/change-surface.js +9 -0
- package/dist/queries/change-surface.js.map +1 -0
- package/dist/queries/clean-signature.d.ts +9 -0
- package/dist/queries/clean-signature.js +7 -0
- package/dist/queries/clean-signature.js.map +1 -0
- package/dist/queries/code.d.ts +17 -0
- package/dist/queries/code.js +9 -0
- package/dist/queries/code.js.map +1 -0
- package/dist/queries/complexity-hotspots.d.ts +19 -0
- package/dist/queries/complexity-hotspots.js +9 -0
- package/dist/queries/complexity-hotspots.js.map +1 -0
- package/dist/queries/complexity.d.ts +13 -0
- package/dist/queries/complexity.js +9 -0
- package/dist/queries/complexity.js.map +1 -0
- package/dist/queries/convergence.d.ts +11 -0
- package/dist/queries/convergence.js +9 -0
- package/dist/queries/convergence.js.map +1 -0
- package/dist/queries/coupling.d.ts +17 -0
- package/dist/queries/coupling.js +9 -0
- package/dist/queries/coupling.js.map +1 -0
- package/dist/queries/cycles.d.ts +16 -0
- package/dist/queries/cycles.js +8 -0
- package/dist/queries/cycles.js.map +1 -0
- package/dist/queries/dataflow.d.ts +19 -0
- package/dist/queries/dataflow.js +9 -0
- package/dist/queries/dataflow.js.map +1 -0
- package/dist/queries/dead.d.ts +10 -0
- package/dist/queries/dead.js +9 -0
- package/dist/queries/dead.js.map +1 -0
- package/dist/queries/deep-chains.d.ts +16 -0
- package/dist/queries/deep-chains.js +8 -0
- package/dist/queries/deep-chains.js.map +1 -0
- package/dist/queries/deps.d.ts +9 -0
- package/dist/queries/deps.js +9 -0
- package/dist/queries/deps.js.map +1 -0
- package/dist/queries/diff-impact.d.ts +13 -0
- package/dist/queries/diff-impact.js +9 -0
- package/dist/queries/diff-impact.js.map +1 -0
- package/dist/queries/doc-coverage.d.ts +14 -0
- package/dist/queries/doc-coverage.js +8 -0
- package/dist/queries/doc-coverage.js.map +1 -0
- package/dist/queries/drift.d.ts +25 -0
- package/dist/queries/drift.js +8 -0
- package/dist/queries/drift.js.map +1 -0
- package/dist/queries/extract-candidates.d.ts +25 -0
- package/dist/queries/extract-candidates.js +9 -0
- package/dist/queries/extract-candidates.js.map +1 -0
- package/dist/queries/fan.d.ts +29 -0
- package/dist/queries/fan.js +14 -0
- package/dist/queries/fan.js.map +1 -0
- package/dist/queries/files.d.ts +6 -0
- package/dist/queries/files.js +7 -0
- package/dist/queries/files.js.map +1 -0
- package/dist/queries/health.d.ts +18 -0
- package/dist/queries/health.js +21 -0
- package/dist/queries/health.js.map +1 -0
- package/dist/queries/hierarchy.d.ts +13 -0
- package/dist/queries/hierarchy.js +8 -0
- package/dist/queries/hierarchy.js.map +1 -0
- package/dist/queries/hotspots.d.ts +13 -0
- package/dist/queries/hotspots.js +8 -0
- package/dist/queries/hotspots.js.map +1 -0
- package/dist/queries/imports.d.ts +19 -0
- package/dist/queries/imports.js +12 -0
- package/dist/queries/imports.js.map +1 -0
- package/dist/queries/index.d.ts +47 -0
- package/dist/queries/index.js +207 -0
- package/dist/queries/index.js.map +1 -0
- package/dist/queries/isolated.d.ts +14 -0
- package/dist/queries/isolated.js +9 -0
- package/dist/queries/isolated.js.map +1 -0
- package/dist/queries/members.d.ts +10 -0
- package/dist/queries/members.js +8 -0
- package/dist/queries/members.js.map +1 -0
- package/dist/queries/methods.d.ts +6 -0
- package/dist/queries/methods.js +8 -0
- package/dist/queries/methods.js.map +1 -0
- package/dist/queries/outline.d.ts +10 -0
- package/dist/queries/outline.js +8 -0
- package/dist/queries/outline.js.map +1 -0
- package/dist/queries/passthrough-candidates.d.ts +18 -0
- package/dist/queries/passthrough-candidates.js +9 -0
- package/dist/queries/passthrough-candidates.js.map +1 -0
- package/dist/queries/redundant-reexports.d.ts +22 -0
- package/dist/queries/redundant-reexports.js +8 -0
- package/dist/queries/redundant-reexports.js.map +1 -0
- package/dist/queries/refs.d.ts +6 -0
- package/dist/queries/refs.js +7 -0
- package/dist/queries/refs.js.map +1 -0
- package/dist/queries/similar-chains.d.ts +29 -0
- package/dist/queries/similar-chains.js +8 -0
- package/dist/queries/similar-chains.js.map +1 -0
- package/dist/queries/similar-files.d.ts +19 -0
- package/dist/queries/similar-files.js +8 -0
- package/dist/queries/similar-files.js.map +1 -0
- package/dist/queries/similar-signatures.d.ts +21 -0
- package/dist/queries/similar-signatures.js +8 -0
- package/dist/queries/similar-signatures.js.map +1 -0
- package/dist/queries/similar.d.ts +34 -0
- package/dist/queries/similar.js +11 -0
- package/dist/queries/similar.js.map +1 -0
- package/dist/queries/slice.d.ts +21 -0
- package/dist/queries/slice.js +9 -0
- package/dist/queries/slice.js.map +1 -0
- package/dist/queries/stale-abstractions.d.ts +18 -0
- package/dist/queries/stale-abstractions.js +9 -0
- package/dist/queries/stale-abstractions.js.map +1 -0
- package/dist/queries/stats.d.ts +6 -0
- package/dist/queries/stats.js +7 -0
- package/dist/queries/stats.js.map +1 -0
- package/dist/queries/surface.d.ts +7 -0
- package/dist/queries/surface.js +8 -0
- package/dist/queries/surface.js.map +1 -0
- package/dist/queries/symbols.d.ts +6 -0
- package/dist/queries/symbols.js +9 -0
- package/dist/queries/symbols.js.map +1 -0
- package/dist/queries/system.d.ts +7 -0
- package/dist/queries/system.js +9 -0
- package/dist/queries/system.js.map +1 -0
- package/dist/queries/test-coverage.d.ts +22 -0
- package/dist/queries/test-coverage.js +11 -0
- package/dist/queries/test-coverage.js.map +1 -0
- package/dist/queries/trace.d.ts +6 -0
- package/dist/queries/trace.js +8 -0
- package/dist/queries/trace.js.map +1 -0
- package/dist/queries/wrapper-candidates.d.ts +17 -0
- package/dist/queries/wrapper-candidates.js +9 -0
- package/dist/queries/wrapper-candidates.js.map +1 -0
- package/dist/reindex-worker.js +368 -0
- package/dist/reindex-worker.js.map +1 -0
- package/docs/AGENT_GUIDE.md +359 -0
- package/package.json +70 -0
- package/reports/debloat/2026-04-10-scip-query-self-audit.md +161 -0
- package/skills/concrete-plan/SKILL.md +318 -0
- package/skills/scip-debloat/SKILL.md +413 -0
- package/skills/scip-explore/SKILL.md +235 -0
- package/skills/scip-verify/SKILL.md +323 -0
- package/src/cli.ts +1480 -0
- package/src/config.ts +117 -0
- package/src/db.ts +127 -0
- package/src/gitignore-filter.ts +143 -0
- package/src/index.ts +11 -0
- package/src/postinstall.ts +8 -0
- package/src/queries/affected.ts +86 -0
- package/src/queries/bottlenecks.ts +67 -0
- package/src/queries/by-kind.ts +204 -0
- package/src/queries/call-graph.ts +66 -0
- package/src/queries/change-surface.ts +110 -0
- package/src/queries/clean-signature.ts +22 -0
- package/src/queries/code.ts +101 -0
- package/src/queries/complexity-hotspots.ts +119 -0
- package/src/queries/complexity.ts +152 -0
- package/src/queries/convergence.ts +82 -0
- package/src/queries/coupling.ts +99 -0
- package/src/queries/cycles.ts +78 -0
- package/src/queries/dataflow.ts +128 -0
- package/src/queries/dead.ts +122 -0
- package/src/queries/deep-chains.ts +59 -0
- package/src/queries/deps.ts +46 -0
- package/src/queries/diff-impact.ts +204 -0
- package/src/queries/doc-coverage.ts +86 -0
- package/src/queries/drift.ts +224 -0
- package/src/queries/extract-candidates.ts +167 -0
- package/src/queries/fan.ts +148 -0
- package/src/queries/files.ts +16 -0
- package/src/queries/health.ts +324 -0
- package/src/queries/hierarchy.ts +49 -0
- package/src/queries/hotspots.ts +53 -0
- package/src/queries/imports.ts +95 -0
- package/src/queries/index.ts +45 -0
- package/src/queries/isolated.ts +67 -0
- package/src/queries/members.ts +54 -0
- package/src/queries/methods.ts +27 -0
- package/src/queries/outline.ts +52 -0
- package/src/queries/passthrough-candidates.ts +94 -0
- package/src/queries/redundant-reexports.ts +170 -0
- package/src/queries/refs.ts +27 -0
- package/src/queries/similar-chains.ts +314 -0
- package/src/queries/similar-files.ts +140 -0
- package/src/queries/similar-signatures.ts +151 -0
- package/src/queries/similar.ts +305 -0
- package/src/queries/slice.ts +154 -0
- package/src/queries/stale-abstractions.ts +82 -0
- package/src/queries/stats.ts +22 -0
- package/src/queries/surface.ts +34 -0
- package/src/queries/symbols.ts +39 -0
- package/src/queries/system.ts +86 -0
- package/src/queries/test-coverage.ts +106 -0
- package/src/queries/trace.ts +55 -0
- package/src/queries/wrapper-candidates.ts +112 -0
- package/src/query-support.ts +226 -0
- package/src/reindex/detect.ts +58 -0
- package/src/reindex/index.ts +153 -0
- package/src/reindex/indexers.ts +220 -0
- package/src/reindex/install.ts +125 -0
- package/src/reindex-worker.ts +35 -0
- package/src/setup.ts +202 -0
- package/src/symbol-parser.ts +278 -0
- package/src/types.ts +654 -0
- package/src/watch.ts +274 -0
- package/tests/gitignore-filter.test.ts +48 -0
- package/tests/queries.test.ts +300 -0
- package/tests/symbol-parser.test.ts +157 -0
- package/tsconfig.json +20 -0
- package/tsup.config.ts +40 -0
- package/vitest.config.ts +7 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import type { ScipDatabase } from '../db.js';
|
|
2
|
+
import type { RedundantReexport } from '../types.js';
|
|
3
|
+
import { shortenSymbol } from '../symbol-parser.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Find barrel re-exports that no consumer actually imports through.
|
|
7
|
+
*
|
|
8
|
+
* If `queries/index.ts` re-exports `byKind` from `by-kind.ts`, but every
|
|
9
|
+
* consumer of `byKind` imports it directly from `by-kind.ts` (not through
|
|
10
|
+
* `index.ts`), the re-export in the barrel is dead weight.
|
|
11
|
+
*
|
|
12
|
+
* Algorithm:
|
|
13
|
+
* 1. Find all barrel files (index.ts / index.js)
|
|
14
|
+
* 2. For each barrel, find symbols it re-exports (defined elsewhere, referenced in barrel with role=0)
|
|
15
|
+
* 3. For each re-exported symbol, count consumers through the barrel vs direct from the source
|
|
16
|
+
* 4. If zero consumers go through the barrel, the re-export is redundant
|
|
17
|
+
*/
|
|
18
|
+
export function redundantReexports(
|
|
19
|
+
db: ScipDatabase,
|
|
20
|
+
opts: { scope?: string; limit?: number } = {},
|
|
21
|
+
): RedundantReexport[] {
|
|
22
|
+
const { scope, limit } = opts;
|
|
23
|
+
|
|
24
|
+
const scopeFilter = scope ? `AND barrel_d.relative_path LIKE '%${scope}%'` : '';
|
|
25
|
+
|
|
26
|
+
// Step 1 + 2: Find all barrel files and symbols they re-export.
|
|
27
|
+
// A re-export is a symbol that:
|
|
28
|
+
// - is mentioned in a barrel file with role=0 (reference/import)
|
|
29
|
+
// - has its definition (defn_enclosing_ranges) in a DIFFERENT file
|
|
30
|
+
const reexportRows = db.all<{
|
|
31
|
+
barrel_doc_id: number;
|
|
32
|
+
barrel_path: string;
|
|
33
|
+
symbol_id: number;
|
|
34
|
+
symbol: string;
|
|
35
|
+
original_doc_id: number;
|
|
36
|
+
original_path: string;
|
|
37
|
+
}>(
|
|
38
|
+
`SELECT DISTINCT
|
|
39
|
+
barrel_d.id AS barrel_doc_id,
|
|
40
|
+
barrel_d.relative_path AS barrel_path,
|
|
41
|
+
gs.id AS symbol_id,
|
|
42
|
+
gs.symbol AS symbol,
|
|
43
|
+
orig_d.id AS original_doc_id,
|
|
44
|
+
orig_d.relative_path AS original_path
|
|
45
|
+
FROM mentions m
|
|
46
|
+
JOIN chunks c ON m.chunk_id = c.id
|
|
47
|
+
JOIN documents barrel_d ON c.document_id = barrel_d.id
|
|
48
|
+
JOIN global_symbols gs ON m.symbol_id = gs.id
|
|
49
|
+
JOIN defn_enclosing_ranges der ON gs.id = der.symbol_id
|
|
50
|
+
JOIN documents orig_d ON der.document_id = orig_d.id
|
|
51
|
+
WHERE m.role = 0
|
|
52
|
+
AND (barrel_d.relative_path LIKE '%/index.ts'
|
|
53
|
+
OR barrel_d.relative_path LIKE '%/index.js'
|
|
54
|
+
OR barrel_d.relative_path = 'index.ts'
|
|
55
|
+
OR barrel_d.relative_path = 'index.js')
|
|
56
|
+
AND orig_d.id != barrel_d.id
|
|
57
|
+
${db.pathExclusionsFor('barrel_d', 'orig_d')}
|
|
58
|
+
${db.symbolNoiseFor('gs')}
|
|
59
|
+
-- Only function-level symbols (ending with ().), not module-level
|
|
60
|
+
AND gs.symbol LIKE '%().'
|
|
61
|
+
${scopeFilter}
|
|
62
|
+
ORDER BY barrel_d.relative_path, gs.symbol`,
|
|
63
|
+
);
|
|
64
|
+
|
|
65
|
+
const results: RedundantReexport[] = [];
|
|
66
|
+
|
|
67
|
+
for (const row of reexportRows) {
|
|
68
|
+
if (db.isIgnored(row.barrel_path) || db.isIgnored(row.original_path)) continue;
|
|
69
|
+
|
|
70
|
+
// Step 3: Count consumers that reference this symbol through the barrel
|
|
71
|
+
// A "barrel consumer" is a file (other than the barrel itself and the original file)
|
|
72
|
+
// that mentions this symbol AND also mentions something from the barrel document.
|
|
73
|
+
// More precisely: count distinct files that reference this symbol AND whose
|
|
74
|
+
// chunk is in a document that also has a role=0 mention pointing to the barrel file's symbols.
|
|
75
|
+
//
|
|
76
|
+
// Simpler approach: count distinct documents that reference this symbol (role=0)
|
|
77
|
+
// grouped by whether the reference chunk is in a file that imports from the barrel
|
|
78
|
+
// or from the original.
|
|
79
|
+
//
|
|
80
|
+
// Actually, the most reliable approach with SCIP data: count how many distinct
|
|
81
|
+
// consumer documents reference this symbol_id with role=0, excluding the barrel
|
|
82
|
+
// and the original file themselves. Then check if those consumers also reference
|
|
83
|
+
// ANY symbol through a mention in the barrel doc vs the original doc.
|
|
84
|
+
//
|
|
85
|
+
// Simplest correct approach: In SCIP, when file A does `import { foo } from './bar/index'`,
|
|
86
|
+
// the mention of `foo` in file A points to the same global symbol regardless of import path.
|
|
87
|
+
// SCIP doesn't track import provenance. BUT the barrel file itself contains mentions
|
|
88
|
+
// (role=0 references) of the re-exported symbols. So we can check:
|
|
89
|
+
// - barrelConsumers: files that mention both this symbol AND any symbol whose definition
|
|
90
|
+
// is in the barrel (i.e., they import the barrel)
|
|
91
|
+
// - directConsumers: files that mention this symbol but don't import the barrel
|
|
92
|
+
//
|
|
93
|
+
// Even simpler: check if the barrel document is in the deps of the consumer.
|
|
94
|
+
// A consumer "goes through the barrel" if it has ANY role=0 mention pointing to a
|
|
95
|
+
// chunk in the barrel file. Otherwise it goes direct.
|
|
96
|
+
|
|
97
|
+
const consumerCounts = db.get<{
|
|
98
|
+
barrel_consumers: number;
|
|
99
|
+
direct_consumers: number;
|
|
100
|
+
}>(
|
|
101
|
+
`SELECT
|
|
102
|
+
SUM(CASE WHEN uses_barrel = 1 THEN 1 ELSE 0 END) AS barrel_consumers,
|
|
103
|
+
SUM(CASE WHEN uses_barrel = 0 THEN 1 ELSE 0 END) AS direct_consumers
|
|
104
|
+
FROM (
|
|
105
|
+
SELECT
|
|
106
|
+
consumer_d.id AS consumer_doc_id,
|
|
107
|
+
MAX(CASE WHEN EXISTS (
|
|
108
|
+
SELECT 1
|
|
109
|
+
FROM mentions barrel_m
|
|
110
|
+
JOIN chunks barrel_c ON barrel_m.chunk_id = barrel_c.id
|
|
111
|
+
WHERE barrel_c.document_id = consumer_d.id
|
|
112
|
+
AND barrel_m.role = 0
|
|
113
|
+
AND barrel_m.symbol_id IN (
|
|
114
|
+
SELECT m2.symbol_id
|
|
115
|
+
FROM mentions m2
|
|
116
|
+
JOIN chunks c2 ON m2.chunk_id = c2.id
|
|
117
|
+
WHERE c2.document_id = ?
|
|
118
|
+
AND m2.role = 0
|
|
119
|
+
)
|
|
120
|
+
) THEN 1 ELSE 0 END) AS uses_barrel
|
|
121
|
+
FROM mentions ref_m
|
|
122
|
+
JOIN chunks ref_c ON ref_m.chunk_id = ref_c.id
|
|
123
|
+
JOIN documents consumer_d ON ref_c.document_id = consumer_d.id
|
|
124
|
+
WHERE ref_m.symbol_id = ?
|
|
125
|
+
AND ref_m.role = 0
|
|
126
|
+
AND consumer_d.id != ?
|
|
127
|
+
AND consumer_d.id != ?
|
|
128
|
+
${db.pathExclusionsFor('consumer_d')}
|
|
129
|
+
GROUP BY consumer_d.id
|
|
130
|
+
)`,
|
|
131
|
+
row.barrel_doc_id, // for the inner subquery checking barrel mentions
|
|
132
|
+
row.symbol_id, // the re-exported symbol
|
|
133
|
+
row.barrel_doc_id, // exclude the barrel itself
|
|
134
|
+
row.original_doc_id, // exclude the original file
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
const barrelConsumers = consumerCounts?.barrel_consumers ?? 0;
|
|
138
|
+
const directConsumers = consumerCounts?.direct_consumers ?? 0;
|
|
139
|
+
|
|
140
|
+
// In TypeScript, `import * as X from './barrel'` resolves all references
|
|
141
|
+
// directly to the source file — the barrel is transparent to SCIP.
|
|
142
|
+
// This means barrelConsumers is always 0 for namespace imports.
|
|
143
|
+
//
|
|
144
|
+
// We can only confidently report symbols with 0 consumers EVERYWHERE
|
|
145
|
+
// (both barrel and direct). These are truly dead re-exports.
|
|
146
|
+
//
|
|
147
|
+
// Symbols with directConsumers > 0 but barrelConsumers === 0 might be
|
|
148
|
+
// consumed through a namespace import — we can't tell, so we skip them.
|
|
149
|
+
if (barrelConsumers === 0 && directConsumers === 0) {
|
|
150
|
+
results.push({
|
|
151
|
+
barrelFile: row.barrel_path,
|
|
152
|
+
symbol: row.symbol,
|
|
153
|
+
shortName: shortenSymbol(row.symbol),
|
|
154
|
+
originalFile: row.original_path,
|
|
155
|
+
barrelConsumers,
|
|
156
|
+
directConsumers,
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Sort: symbols with the most direct consumers first (biggest cleanup wins),
|
|
162
|
+
// then by barrel file path for stable output
|
|
163
|
+
results.sort((a, b) =>
|
|
164
|
+
b.directConsumers - a.directConsumers
|
|
165
|
+
|| a.barrelFile.localeCompare(b.barrelFile)
|
|
166
|
+
|| a.shortName.localeCompare(b.shortName),
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
return limit ? results.slice(0, limit) : results;
|
|
170
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { ScipDatabase } from '../db.js';
|
|
2
|
+
import type { RefResult } from '../types.js';
|
|
3
|
+
|
|
4
|
+
export function refs(db: ScipDatabase, symbolPattern: string): RefResult[] {
|
|
5
|
+
const rows = db.all<{
|
|
6
|
+
relative_path: string;
|
|
7
|
+
start_line: number;
|
|
8
|
+
}>(
|
|
9
|
+
`SELECT DISTINCT d.relative_path, c.start_line
|
|
10
|
+
FROM mentions m
|
|
11
|
+
JOIN chunks c ON m.chunk_id = c.id
|
|
12
|
+
JOIN documents d ON c.document_id = d.id
|
|
13
|
+
JOIN global_symbols gs ON m.symbol_id = gs.id
|
|
14
|
+
WHERE gs.symbol LIKE ?
|
|
15
|
+
AND ${db.localSymbolPredicate}
|
|
16
|
+
AND m.role = 0
|
|
17
|
+
ORDER BY d.relative_path, c.start_line`,
|
|
18
|
+
`%${symbolPattern}%`,
|
|
19
|
+
);
|
|
20
|
+
|
|
21
|
+
return rows
|
|
22
|
+
.filter((r) => !db.isIgnored(r.relative_path))
|
|
23
|
+
.map((r) => ({
|
|
24
|
+
relativePath: r.relative_path,
|
|
25
|
+
line: r.start_line,
|
|
26
|
+
}));
|
|
27
|
+
}
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
import type { ScipDatabase } from '../db.js';
|
|
2
|
+
import { buildFileDepGraph } from '../query-support.js';
|
|
3
|
+
import type { SimilarChainResult } from '../types.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Find end-to-end dependency flows that are structurally similar
|
|
7
|
+
* but diverge at a few points — indicating duplicated pipelines
|
|
8
|
+
* that could be consolidated.
|
|
9
|
+
*
|
|
10
|
+
* Uses infrastructure-filtered chain comparison:
|
|
11
|
+
*
|
|
12
|
+
* 1. Build all dependency chains via DFS
|
|
13
|
+
* 2. Compute node frequency across all chains
|
|
14
|
+
* 3. Filter out infrastructure nodes (appearing in >50% of chains)
|
|
15
|
+
* 4. Compare the filtered chains — what's left is the unique pipeline
|
|
16
|
+
* 5. Edit distance on filtered chains finds real structural duplicates
|
|
17
|
+
*
|
|
18
|
+
* Two chains that both pass through db.ts → types.ts is meaningless.
|
|
19
|
+
* Two chains that both pass through userValidation → userRepo → emailService
|
|
20
|
+
* after filtering is a strong consolidation signal.
|
|
21
|
+
*/
|
|
22
|
+
export function similarChains(
|
|
23
|
+
db: ScipDatabase,
|
|
24
|
+
opts: {
|
|
25
|
+
minSimilarity?: number;
|
|
26
|
+
limit?: number;
|
|
27
|
+
scope?: string;
|
|
28
|
+
minChainLength?: number;
|
|
29
|
+
maxChainLength?: number;
|
|
30
|
+
} = {},
|
|
31
|
+
): SimilarChainResult[] {
|
|
32
|
+
const {
|
|
33
|
+
minSimilarity = 0.5,
|
|
34
|
+
limit = 15,
|
|
35
|
+
scope,
|
|
36
|
+
minChainLength = 3,
|
|
37
|
+
maxChainLength = 8,
|
|
38
|
+
} = opts;
|
|
39
|
+
|
|
40
|
+
const graph = buildFileDepGraph(db, scope);
|
|
41
|
+
const rawChains = generateChains(graph, minChainLength, maxChainLength);
|
|
42
|
+
|
|
43
|
+
if (rawChains.length === 0) return [];
|
|
44
|
+
|
|
45
|
+
// Compute node frequency to identify infrastructure.
|
|
46
|
+
// Count both general frequency (appears anywhere in chain) and
|
|
47
|
+
// tail frequency (appears as one of the last 2 nodes). Shared
|
|
48
|
+
// tails like "→ db.ts → types.ts" are infrastructure even if
|
|
49
|
+
// they don't appear in >50% of chains overall.
|
|
50
|
+
const nodeFreq = new Map<string, number>();
|
|
51
|
+
const tailFreq = new Map<string, number>();
|
|
52
|
+
for (const chain of rawChains) {
|
|
53
|
+
const seen = new Set<string>();
|
|
54
|
+
for (const node of chain) {
|
|
55
|
+
if (!seen.has(node)) {
|
|
56
|
+
nodeFreq.set(node, (nodeFreq.get(node) ?? 0) + 1);
|
|
57
|
+
seen.add(node);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
// Track tail nodes (last 2)
|
|
61
|
+
for (let t = Math.max(0, chain.length - 2); t < chain.length; t++) {
|
|
62
|
+
tailFreq.set(chain[t]!, (tailFreq.get(chain[t]!) ?? 0) + 1);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Infrastructure: nodes in >40% of chains OR tail nodes in >30% of chains
|
|
67
|
+
const infraThreshold = rawChains.length * 0.4;
|
|
68
|
+
const tailThreshold = rawChains.length * 0.3;
|
|
69
|
+
const infraNodes = new Set<string>();
|
|
70
|
+
for (const [node, freq] of nodeFreq) {
|
|
71
|
+
if (freq > infraThreshold) infraNodes.add(node);
|
|
72
|
+
}
|
|
73
|
+
for (const [node, freq] of tailFreq) {
|
|
74
|
+
if (freq > tailThreshold) infraNodes.add(node);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Also treat structural role files as infrastructure — entry points,
|
|
78
|
+
// barrels, and orchestrators are not meaningful pipeline nodes.
|
|
79
|
+
const structuralNames = ['index.ts', 'index.js', 'cli.ts', 'main.ts', 'health.ts', 'health.js'];
|
|
80
|
+
for (const node of nodeFreq.keys()) {
|
|
81
|
+
const basename = node.split('/').pop() ?? '';
|
|
82
|
+
if (structuralNames.includes(basename)) infraNodes.add(node);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Filter chains: remove infrastructure nodes, keep the "unique pipeline"
|
|
86
|
+
const filteredChains: { original: string[]; filtered: string[] }[] = [];
|
|
87
|
+
for (const chain of rawChains) {
|
|
88
|
+
const filtered = chain.filter((n) => !infraNodes.has(n));
|
|
89
|
+
// Only keep chains that have at least 3 non-infrastructure nodes.
|
|
90
|
+
// Chains with 1-2 unique nodes are just "query → infra" which is
|
|
91
|
+
// the expected pattern, not a consolidation opportunity.
|
|
92
|
+
if (filtered.length >= 3) {
|
|
93
|
+
filteredChains.push({ original: chain, filtered });
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (filteredChains.length < 2) return [];
|
|
98
|
+
|
|
99
|
+
// Pairwise comparison on filtered chains
|
|
100
|
+
const results: SimilarChainResult[] = [];
|
|
101
|
+
|
|
102
|
+
for (let i = 0; i < filteredChains.length; i++) {
|
|
103
|
+
for (let j = i + 1; j < filteredChains.length; j++) {
|
|
104
|
+
const a = filteredChains[i]!;
|
|
105
|
+
const b = filteredChains[j]!;
|
|
106
|
+
|
|
107
|
+
// Quick reject: filtered chains must share at least one non-infra node
|
|
108
|
+
const setA = new Set(a.filtered);
|
|
109
|
+
let hasShared = false;
|
|
110
|
+
for (const node of b.filtered) {
|
|
111
|
+
if (setA.has(node)) { hasShared = true; break; }
|
|
112
|
+
}
|
|
113
|
+
if (!hasShared) continue;
|
|
114
|
+
|
|
115
|
+
// Edit distance on the FILTERED chains (infrastructure stripped)
|
|
116
|
+
const { distance, ops } = editDistance(a.filtered, b.filtered);
|
|
117
|
+
const maxLen = Math.max(a.filtered.length, b.filtered.length);
|
|
118
|
+
if (maxLen === 0) continue;
|
|
119
|
+
|
|
120
|
+
const similarity = 1 - distance / maxLen;
|
|
121
|
+
if (similarity < minSimilarity) continue;
|
|
122
|
+
if (distance === 0) continue; // identical filtered chains = not interesting
|
|
123
|
+
|
|
124
|
+
// Divergence points from the filtered comparison
|
|
125
|
+
const divergencePoints = ops
|
|
126
|
+
.filter((op) => op.type === 'substitute')
|
|
127
|
+
.map((op) => ({
|
|
128
|
+
index: op.indexA,
|
|
129
|
+
nodeA: a.filtered[op.indexA]!,
|
|
130
|
+
nodeB: b.filtered[op.indexB]!,
|
|
131
|
+
}));
|
|
132
|
+
|
|
133
|
+
if (divergencePoints.length === 0) continue;
|
|
134
|
+
|
|
135
|
+
// Require at least 2 matching (non-divergent) filtered nodes.
|
|
136
|
+
// A chain pair with 1 match and 1 divergence is just "two things
|
|
137
|
+
// that share one dependency" — not a duplicated pipeline.
|
|
138
|
+
const matchCount = ops.filter((op) => op.type === 'match').length;
|
|
139
|
+
if (matchCount < 2) continue;
|
|
140
|
+
|
|
141
|
+
// Report using original chains for context, but similarity is from filtered
|
|
142
|
+
const commonPrefix = getCommonPrefix(a.original, b.original);
|
|
143
|
+
const commonSuffix = getCommonSuffix(a.original, b.original);
|
|
144
|
+
|
|
145
|
+
results.push({
|
|
146
|
+
chainA: a.original,
|
|
147
|
+
chainB: b.original,
|
|
148
|
+
similarity,
|
|
149
|
+
editDistance: distance,
|
|
150
|
+
divergencePoints,
|
|
151
|
+
commonPrefix,
|
|
152
|
+
commonSuffix,
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (results.length > limit * 10) break;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Sort by similarity desc, then fewest divergence points
|
|
160
|
+
results.sort((a, b) => {
|
|
161
|
+
if (Math.abs(b.similarity - a.similarity) > 0.01) return b.similarity - a.similarity;
|
|
162
|
+
return a.divergencePoints.length - b.divergencePoints.length;
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
// Deduplicate sub-chains
|
|
166
|
+
const deduped: SimilarChainResult[] = [];
|
|
167
|
+
for (const r of results) {
|
|
168
|
+
const isDuplicate = deduped.some(
|
|
169
|
+
(existing) =>
|
|
170
|
+
isSubChain(r.chainA, existing.chainA) && isSubChain(r.chainB, existing.chainB),
|
|
171
|
+
);
|
|
172
|
+
if (!isDuplicate) deduped.push(r);
|
|
173
|
+
if (deduped.length >= limit) break;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return deduped;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// ── Chain generation ───────────────────────────────────────
|
|
180
|
+
|
|
181
|
+
function generateChains(
|
|
182
|
+
graph: Map<string, Set<string>>,
|
|
183
|
+
minLen: number,
|
|
184
|
+
maxLen: number,
|
|
185
|
+
): string[][] {
|
|
186
|
+
const chains: string[][] = [];
|
|
187
|
+
const maxChains = 500;
|
|
188
|
+
|
|
189
|
+
for (const startNode of graph.keys()) {
|
|
190
|
+
if (chains.length >= maxChains) break;
|
|
191
|
+
dfsChains(graph, startNode, [startNode], new Set([startNode]), minLen, maxLen, chains, maxChains);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
return chains;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function dfsChains(
|
|
198
|
+
graph: Map<string, Set<string>>,
|
|
199
|
+
node: string,
|
|
200
|
+
path: string[],
|
|
201
|
+
visited: Set<string>,
|
|
202
|
+
minLen: number,
|
|
203
|
+
maxLen: number,
|
|
204
|
+
results: string[][],
|
|
205
|
+
maxResults: number,
|
|
206
|
+
): void {
|
|
207
|
+
if (results.length >= maxResults) return;
|
|
208
|
+
if (path.length >= maxLen) {
|
|
209
|
+
if (path.length >= minLen) results.push([...path]);
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const neighbors = graph.get(node);
|
|
214
|
+
if (!neighbors || neighbors.size === 0) {
|
|
215
|
+
if (path.length >= minLen) results.push([...path]);
|
|
216
|
+
return;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
let extended = false;
|
|
220
|
+
for (const next of neighbors) {
|
|
221
|
+
if (visited.has(next)) continue;
|
|
222
|
+
visited.add(next);
|
|
223
|
+
path.push(next);
|
|
224
|
+
dfsChains(graph, next, path, visited, minLen, maxLen, results, maxResults);
|
|
225
|
+
path.pop();
|
|
226
|
+
visited.delete(next);
|
|
227
|
+
extended = true;
|
|
228
|
+
if (results.length >= maxResults) return;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (!extended && path.length >= minLen) {
|
|
232
|
+
results.push([...path]);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// ── Edit distance ──────────────────────────────────────────
|
|
237
|
+
|
|
238
|
+
interface EditOp {
|
|
239
|
+
type: 'match' | 'substitute' | 'insert' | 'delete';
|
|
240
|
+
indexA: number;
|
|
241
|
+
indexB: number;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function editDistance(a: string[], b: string[]): { distance: number; ops: EditOp[] } {
|
|
245
|
+
const m = a.length;
|
|
246
|
+
const n = b.length;
|
|
247
|
+
|
|
248
|
+
const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
|
|
249
|
+
for (let i = 0; i <= m; i++) dp[i]![0] = i;
|
|
250
|
+
for (let j = 0; j <= n; j++) dp[0]![j] = j;
|
|
251
|
+
|
|
252
|
+
for (let i = 1; i <= m; i++) {
|
|
253
|
+
for (let j = 1; j <= n; j++) {
|
|
254
|
+
if (a[i - 1] === b[j - 1]) {
|
|
255
|
+
dp[i]![j] = dp[i - 1]![j - 1]!;
|
|
256
|
+
} else {
|
|
257
|
+
dp[i]![j] = 1 + Math.min(
|
|
258
|
+
dp[i - 1]![j]!,
|
|
259
|
+
dp[i]![j - 1]!,
|
|
260
|
+
dp[i - 1]![j - 1]!,
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
const ops: EditOp[] = [];
|
|
267
|
+
let i = m, j = n;
|
|
268
|
+
while (i > 0 || j > 0) {
|
|
269
|
+
if (i > 0 && j > 0 && a[i - 1] === b[j - 1]) {
|
|
270
|
+
ops.unshift({ type: 'match', indexA: i - 1, indexB: j - 1 });
|
|
271
|
+
i--; j--;
|
|
272
|
+
} else if (i > 0 && j > 0 && dp[i]![j] === dp[i - 1]![j - 1]! + 1) {
|
|
273
|
+
ops.unshift({ type: 'substitute', indexA: i - 1, indexB: j - 1 });
|
|
274
|
+
i--; j--;
|
|
275
|
+
} else if (j > 0 && dp[i]![j] === dp[i]![j - 1]! + 1) {
|
|
276
|
+
ops.unshift({ type: 'insert', indexA: i, indexB: j - 1 });
|
|
277
|
+
j--;
|
|
278
|
+
} else {
|
|
279
|
+
ops.unshift({ type: 'delete', indexA: i - 1, indexB: j });
|
|
280
|
+
i--;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return { distance: dp[m]![n]!, ops };
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// ── Utility ────────────────────────────────────────────────
|
|
288
|
+
|
|
289
|
+
function getCommonPrefix(a: string[], b: string[]): string[] {
|
|
290
|
+
const prefix: string[] = [];
|
|
291
|
+
for (let i = 0; i < Math.min(a.length, b.length); i++) {
|
|
292
|
+
if (a[i] === b[i]) prefix.push(a[i]!);
|
|
293
|
+
else break;
|
|
294
|
+
}
|
|
295
|
+
return prefix;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function getCommonSuffix(a: string[], b: string[]): string[] {
|
|
299
|
+
const suffix: string[] = [];
|
|
300
|
+
let ai = a.length - 1;
|
|
301
|
+
let bi = b.length - 1;
|
|
302
|
+
while (ai >= 0 && bi >= 0 && a[ai] === b[bi]) {
|
|
303
|
+
suffix.unshift(a[ai]!);
|
|
304
|
+
ai--; bi--;
|
|
305
|
+
}
|
|
306
|
+
return suffix;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
function isSubChain(sub: string[], full: string[]): boolean {
|
|
310
|
+
if (sub.length > full.length) return false;
|
|
311
|
+
const fullStr = full.join('→');
|
|
312
|
+
const subStr = sub.join('→');
|
|
313
|
+
return fullStr.includes(subStr);
|
|
314
|
+
}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import type { ScipDatabase } from '../db.js';
|
|
2
|
+
import { buildFileDepGraph } from '../query-support.js';
|
|
3
|
+
import type { SimilarFileResult } from '../types.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Find files with similar dependency profiles.
|
|
7
|
+
*
|
|
8
|
+
* Two files that depend on (import from) the same set of other files
|
|
9
|
+
* are structurally doing similar work. High Jaccard similarity between
|
|
10
|
+
* their dependency sets = likely copy-paste variants or consolidation candidates.
|
|
11
|
+
*/
|
|
12
|
+
export function similarFiles(
|
|
13
|
+
db: ScipDatabase,
|
|
14
|
+
opts: {
|
|
15
|
+
minSimilarity?: number;
|
|
16
|
+
limit?: number;
|
|
17
|
+
scope?: string;
|
|
18
|
+
minDeps?: number;
|
|
19
|
+
filePattern?: string;
|
|
20
|
+
} = {},
|
|
21
|
+
): SimilarFileResult[] {
|
|
22
|
+
const { minSimilarity = 0.5, limit = 20, scope, minDeps = 3, filePattern } = opts;
|
|
23
|
+
|
|
24
|
+
// Build dependency profile for each file
|
|
25
|
+
const profiles = buildFileProfiles(db, { scope, minDeps });
|
|
26
|
+
|
|
27
|
+
const results: SimilarFileResult[] = [];
|
|
28
|
+
|
|
29
|
+
if (filePattern) {
|
|
30
|
+
// Compare one file against all others
|
|
31
|
+
const target = profiles.find((p) => p.file.includes(filePattern));
|
|
32
|
+
if (!target) return [];
|
|
33
|
+
|
|
34
|
+
for (const candidate of profiles) {
|
|
35
|
+
if (candidate.file === target.file) continue;
|
|
36
|
+
const result = compareProfiles(target, candidate, minSimilarity);
|
|
37
|
+
if (result) results.push(result);
|
|
38
|
+
}
|
|
39
|
+
} else {
|
|
40
|
+
// Pairwise comparison across all files
|
|
41
|
+
for (let i = 0; i < profiles.length; i++) {
|
|
42
|
+
for (let j = i + 1; j < profiles.length; j++) {
|
|
43
|
+
const result = compareProfiles(profiles[i]!, profiles[j]!, minSimilarity);
|
|
44
|
+
if (result) results.push(result);
|
|
45
|
+
}
|
|
46
|
+
if (results.length > limit * 5) break;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
results.sort((a, b) => b.similarity - a.similarity);
|
|
51
|
+
return results.slice(0, limit);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ── Internal ───────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
interface FileProfile {
|
|
57
|
+
file: string;
|
|
58
|
+
deps: Set<string>;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function buildFileProfiles(
|
|
62
|
+
db: ScipDatabase,
|
|
63
|
+
opts: { scope?: string; minDeps: number },
|
|
64
|
+
): FileProfile[] {
|
|
65
|
+
const { scope, minDeps } = opts;
|
|
66
|
+
const depMap = buildFileDepGraph(db, scope);
|
|
67
|
+
const universalDeps = findUniversalDependencies(depMap);
|
|
68
|
+
|
|
69
|
+
// Filter to files with enough deps
|
|
70
|
+
const profiles: FileProfile[] = [];
|
|
71
|
+
for (const [file, deps] of depMap) {
|
|
72
|
+
if (deps.size >= minDeps) {
|
|
73
|
+
profiles.push({
|
|
74
|
+
file,
|
|
75
|
+
deps: new Set([...deps].filter((dep) => !universalDeps.has(dep))),
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return profiles;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function findUniversalDependencies(
|
|
84
|
+
depMap: Map<string, Set<string>>,
|
|
85
|
+
): Set<string> {
|
|
86
|
+
const universalDeps = new Set<string>();
|
|
87
|
+
const fileCount = depMap.size;
|
|
88
|
+
if (fileCount === 0) return universalDeps;
|
|
89
|
+
|
|
90
|
+
const depCounts = new Map<string, number>();
|
|
91
|
+
for (const deps of depMap.values()) {
|
|
92
|
+
for (const dep of deps) {
|
|
93
|
+
depCounts.set(dep, (depCounts.get(dep) ?? 0) + 1);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
for (const [dep, count] of depCounts) {
|
|
98
|
+
if (count / fileCount > 0.5) {
|
|
99
|
+
universalDeps.add(dep);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return universalDeps;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function compareProfiles(
|
|
107
|
+
a: FileProfile,
|
|
108
|
+
b: FileProfile,
|
|
109
|
+
minSimilarity: number,
|
|
110
|
+
): SimilarFileResult | null {
|
|
111
|
+
const shared = new Set<string>();
|
|
112
|
+
for (const dep of a.deps) {
|
|
113
|
+
if (b.deps.has(dep)) shared.add(dep);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (shared.size === 0) return null;
|
|
117
|
+
|
|
118
|
+
const unionSize = new Set([...a.deps, ...b.deps]).size;
|
|
119
|
+
const similarity = shared.size / unionSize;
|
|
120
|
+
|
|
121
|
+
if (similarity < minSimilarity) return null;
|
|
122
|
+
|
|
123
|
+
const uniqueA: string[] = [];
|
|
124
|
+
for (const dep of a.deps) {
|
|
125
|
+
if (!b.deps.has(dep)) uniqueA.push(dep);
|
|
126
|
+
}
|
|
127
|
+
const uniqueB: string[] = [];
|
|
128
|
+
for (const dep of b.deps) {
|
|
129
|
+
if (!a.deps.has(dep)) uniqueB.push(dep);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
fileA: a.file,
|
|
134
|
+
fileB: b.file,
|
|
135
|
+
similarity,
|
|
136
|
+
sharedDeps: [...shared],
|
|
137
|
+
uniqueToA: uniqueA,
|
|
138
|
+
uniqueToB: uniqueB,
|
|
139
|
+
};
|
|
140
|
+
}
|