@mishasinitcyn/betterrank 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/graph.js ADDED
@@ -0,0 +1,311 @@
1
+ import graphology from 'graphology';
2
+ const { MultiDirectedGraph } = graphology;
3
+ import pagerankModule from 'graphology-metrics/centrality/pagerank.js';
4
+ const pagerank = pagerankModule.default || pagerankModule;
5
+ import { writeFile, readFile, mkdir } from 'fs/promises';
6
+ import { dirname } from 'path';
7
+
8
+ /**
9
+ * Build a multi-directed graph from parsed symbol data.
10
+ *
11
+ * Uses MultiDirectedGraph so that DEFINES and REFERENCES edges can coexist
12
+ * between the same (file, symbol) pair — fixing same-file caller tracking.
13
+ * Dedup Sets prevent duplicate REFERENCES/IMPORTS edges from repeated calls
14
+ * to the same symbol within a single file.
15
+ *
16
+ * @param {Array<{file: string, definitions: Array, references: Array}>} allSymbols
17
+ * @returns {MultiDirectedGraph}
18
+ */
19
+ // Max definitions for a name before cross-file wiring is skipped entirely.
20
+ // Names with more definitions than this (main, run, get, close, etc.) are
21
+ // too ambiguous to provide useful structural signal.
22
+ const AMBIGUITY_CAP = 5;
23
+
24
+ /**
25
+ * Disambiguate which targets a reference should wire to.
26
+ *
27
+ * 1. If the name is unambiguous (1 definition), wire to it.
28
+ * 2. If a same-file definition exists, wire to it only (skip cross-file).
29
+ * 3. If no same-file match and targets exceed AMBIGUITY_CAP, skip all (too noisy).
30
+ * 4. Otherwise wire to all cross-file targets (low-ambiguity, probably real).
31
+ */
32
+ function disambiguateTargets(targets, sourceFile, graph) {
33
+ if (targets.length === 1) return targets;
34
+
35
+ // Check for same-file definition
36
+ const sameFile = targets.filter(t => {
37
+ try { return graph.getNodeAttribute(t, 'file') === sourceFile; } catch { return false; }
38
+ });
39
+
40
+ if (sameFile.length > 0) return sameFile;
41
+
42
+ // No same-file match — apply ambiguity cap
43
+ if (targets.length > AMBIGUITY_CAP) return [];
44
+
45
+ return targets;
46
+ }
47
+
48
+ function buildGraph(allSymbols) {
49
+ const graph = new MultiDirectedGraph({ allowSelfLoops: false });
50
+
51
+ for (const { file, definitions } of allSymbols) {
52
+ graph.mergeNode(file, { type: 'file', symbolCount: definitions.length });
53
+
54
+ for (const def of definitions) {
55
+ const symbolKey = `${file}::${def.name}`;
56
+ graph.mergeNode(symbolKey, {
57
+ type: 'symbol',
58
+ kind: def.kind,
59
+ name: def.name,
60
+ file,
61
+ lineStart: def.lineStart,
62
+ lineEnd: def.lineEnd,
63
+ signature: def.signature,
64
+ });
65
+ graph.addEdge(file, symbolKey, { type: 'DEFINES' });
66
+ }
67
+ }
68
+
69
+ // Build a name→symbolKey index for wiring references
70
+ const defIndex = new Map();
71
+ for (const { file, definitions } of allSymbols) {
72
+ for (const def of definitions) {
73
+ const key = `${file}::${def.name}`;
74
+ if (!defIndex.has(def.name)) defIndex.set(def.name, []);
75
+ defIndex.get(def.name).push(key);
76
+ }
77
+ }
78
+
79
+ // Dedup: one REFERENCES edge and one IMPORTS edge per unique (source, target) pair
80
+ const addedRefs = new Set();
81
+ const addedImports = new Set();
82
+
83
+ for (const { file, references } of allSymbols) {
84
+ for (const ref of references) {
85
+ const targets = defIndex.get(ref.name);
86
+ if (!targets) continue;
87
+
88
+ // Disambiguate: resolve which targets to actually wire
89
+ const resolvedTargets = disambiguateTargets(targets, file, graph);
90
+
91
+ for (const target of resolvedTargets) {
92
+ const targetFile = graph.getNodeAttribute(target, 'file');
93
+
94
+ const refKey = `${file}\0${target}`;
95
+ if (!addedRefs.has(refKey)) {
96
+ addedRefs.add(refKey);
97
+ graph.addEdge(file, target, { type: 'REFERENCES' });
98
+ }
99
+
100
+ if (targetFile !== file) {
101
+ const impKey = `${file}\0${targetFile}`;
102
+ if (!addedImports.has(impKey)) {
103
+ addedImports.add(impKey);
104
+ graph.addEdge(file, targetFile, { type: 'IMPORTS' });
105
+ }
106
+ }
107
+ }
108
+ }
109
+ }
110
+
111
+ return graph;
112
+ }
113
+
114
+ /**
115
+ * Incrementally update the graph: remove all nodes for the given files,
116
+ * then re-add from fresh parse results.
117
+ */
118
+ function updateGraphFiles(graph, removedFiles, newSymbols) {
119
+ for (const filePath of removedFiles) {
120
+ removeFileNodes(graph, filePath);
121
+ }
122
+
123
+ // Re-add from newSymbols using the same logic as buildGraph,
124
+ // but operating on the existing graph.
125
+ const defIndex = new Map();
126
+
127
+ // Rebuild defIndex from the entire graph (existing + new)
128
+ graph.forEachNode((node, attrs) => {
129
+ if (attrs.type === 'symbol') {
130
+ if (!defIndex.has(attrs.name)) defIndex.set(attrs.name, []);
131
+ defIndex.get(attrs.name).push(node);
132
+ }
133
+ });
134
+
135
+ const addedRefs = new Set();
136
+ const addedImports = new Set();
137
+
138
+ for (const { file, definitions, references } of newSymbols) {
139
+ graph.mergeNode(file, { type: 'file', symbolCount: definitions.length });
140
+
141
+ for (const def of definitions) {
142
+ const symbolKey = `${file}::${def.name}`;
143
+ graph.mergeNode(symbolKey, {
144
+ type: 'symbol',
145
+ kind: def.kind,
146
+ name: def.name,
147
+ file,
148
+ lineStart: def.lineStart,
149
+ lineEnd: def.lineEnd,
150
+ signature: def.signature,
151
+ });
152
+ graph.addEdge(file, symbolKey, { type: 'DEFINES' });
153
+
154
+ if (!defIndex.has(def.name)) defIndex.set(def.name, []);
155
+ defIndex.get(def.name).push(symbolKey);
156
+ }
157
+
158
+ for (const ref of references) {
159
+ const targets = defIndex.get(ref.name);
160
+ if (!targets) continue;
161
+
162
+ const resolvedTargets = disambiguateTargets(targets, file, graph);
163
+
164
+ for (const target of resolvedTargets) {
165
+ const targetFile = graph.getNodeAttribute(target, 'file');
166
+
167
+ const refKey = `${file}\0${target}`;
168
+ if (!addedRefs.has(refKey)) {
169
+ addedRefs.add(refKey);
170
+ graph.addEdge(file, target, { type: 'REFERENCES' });
171
+ }
172
+
173
+ if (targetFile !== file) {
174
+ const impKey = `${file}\0${targetFile}`;
175
+ if (!addedImports.has(impKey)) {
176
+ addedImports.add(impKey);
177
+ graph.addEdge(file, targetFile, { type: 'IMPORTS' });
178
+ }
179
+ }
180
+ }
181
+ }
182
+ }
183
+ }
184
+
185
+ function removeFileNodes(graph, filePath) {
186
+ const toRemove = [];
187
+ graph.forEachNode((node, attrs) => {
188
+ if (node === filePath || attrs.file === filePath) {
189
+ toRemove.push(node);
190
+ }
191
+ });
192
+ for (const n of toRemove) {
193
+ graph.dropNode(n);
194
+ }
195
+ }
196
+
197
+ // Path-tier dampening: files outside core source directories get their
198
+ // PageRank scores multiplied by a fraction. This prevents scripts, tests,
199
+ // and temp files from dominating the map output over actual source code.
200
+ //
201
+ // Configurable via .code-index/config.json:
202
+ // { "pathTiers": { "scripts/": 0.3, "tests/": 0.2 } }
203
+ const DEFAULT_PATH_TIERS = [
204
+ // Order matters: first match wins. More specific prefixes should come first.
205
+ { pattern: 'temp_qa/', weight: 0.1 },
206
+ { pattern: 'qa/temp_', weight: 0.1 },
207
+ { pattern: 'qa/', weight: 0.2 },
208
+ { pattern: 'tests/', weight: 0.2 },
209
+ { pattern: 'test/', weight: 0.2 },
210
+ { pattern: 'scripts/', weight: 0.3 },
211
+ { pattern: 'deploy/', weight: 0.3 },
212
+ ];
213
+
214
+ function getPathWeight(filePath, pathTiers = DEFAULT_PATH_TIERS) {
215
+ for (const { pattern, weight } of pathTiers) {
216
+ if (filePath.startsWith(pattern) || filePath.includes('/' + pattern)) {
217
+ return weight;
218
+ }
219
+ }
220
+ return 1.0;
221
+ }
222
+
223
+ /**
224
+ * Compute PageRank scores, optionally biased toward focusFiles.
225
+ * Applies path-tier dampening so source code ranks above scripts/tests.
226
+ * Returns an array of [symbolKey, score] sorted descending.
227
+ */
228
+ function rankedSymbols(graph, focusFiles = [], pathTiers = DEFAULT_PATH_TIERS) {
229
+ if (graph.order === 0) return [];
230
+
231
+ const g = graph.copy();
232
+
233
+ if (focusFiles.length > 0) {
234
+ g.mergeNode('__focus__', { type: 'virtual' });
235
+ for (const f of focusFiles) {
236
+ if (g.hasNode(f)) {
237
+ g.addEdge('__focus__', f, { weight: 10.0 });
238
+ }
239
+ }
240
+ }
241
+
242
+ const scores = pagerank(g, {
243
+ alpha: 0.85,
244
+ maxIterations: 100,
245
+ tolerance: 1e-6,
246
+ getEdgeWeight: 'weight',
247
+ });
248
+
249
+ delete scores['__focus__'];
250
+
251
+ // Apply path-tier dampening to symbol scores
252
+ return Object.entries(scores)
253
+ .filter(([key]) => {
254
+ try {
255
+ return graph.hasNode(key) && graph.getNodeAttribute(key, 'type') === 'symbol';
256
+ } catch {
257
+ return false;
258
+ }
259
+ })
260
+ .map(([key, score]) => {
261
+ try {
262
+ const file = graph.getNodeAttribute(key, 'file');
263
+ return [key, score * getPathWeight(file, pathTiers)];
264
+ } catch {
265
+ return [key, score];
266
+ }
267
+ })
268
+ .sort((a, b) => b[1] - a[1]);
269
+ }
270
+
271
+ /**
272
+ * Serialize graph + mtime map to disk.
273
+ */
274
+ async function saveGraph(graph, mtimeMap, cachePath) {
275
+ await mkdir(dirname(cachePath), { recursive: true });
276
+ const data = {
277
+ version: 2,
278
+ graph: graph.export(),
279
+ mtimes: Object.fromEntries(mtimeMap),
280
+ };
281
+ await writeFile(cachePath, JSON.stringify(data));
282
+ }
283
+
284
+ /**
285
+ * Load graph + mtime map from disk. Returns null if cache doesn't exist.
286
+ */
287
+ async function loadGraph(cachePath) {
288
+ try {
289
+ const raw = JSON.parse(await readFile(cachePath, 'utf-8'));
290
+ if (raw.version !== 1 && raw.version !== 2) return null;
291
+ const graph = new MultiDirectedGraph({ allowSelfLoops: false });
292
+ graph.import(raw.graph);
293
+ const mtimes = new Map(Object.entries(raw.mtimes));
294
+ return { graph, mtimes };
295
+ } catch {
296
+ return null;
297
+ }
298
+ }
299
+
300
+ export {
301
+ AMBIGUITY_CAP,
302
+ DEFAULT_PATH_TIERS,
303
+ disambiguateTargets,
304
+ getPathWeight,
305
+ buildGraph,
306
+ updateGraphFiles,
307
+ removeFileNodes,
308
+ rankedSymbols,
309
+ saveGraph,
310
+ loadGraph,
311
+ };