@mishasinitcyn/betterrank 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -2
- package/package.json +1 -1
- package/src/cli.js +171 -1
- package/src/compare.js +288 -0
- package/src/graph.js +6 -0
- package/src/index.js +464 -2
- package/src/parser.js +120 -1
package/README.md
CHANGED
|
@@ -31,9 +31,12 @@ betterrank callers authenticateUser --root /path/to/project --context
|
|
|
31
31
|
# Everything about a function: source, types, deps, callers
|
|
32
32
|
betterrank context calculate_bid --root /path/to/project
|
|
33
33
|
|
|
34
|
-
# Trace the full call chain from entry point to function
|
|
34
|
+
# Trace the full call chain from entry point to function (upward)
|
|
35
35
|
betterrank trace calculate_bid --root /path/to/project
|
|
36
36
|
|
|
37
|
+
# What does a function call, recursively? (downward)
|
|
38
|
+
betterrank callees calculate_bid --root /path/to/project
|
|
39
|
+
|
|
37
40
|
# What symbols changed and what might break?
|
|
38
41
|
betterrank diff --root /path/to/project
|
|
39
42
|
|
|
@@ -211,7 +214,7 @@ calculate_bid (src/engine/bidding.py:489-718)
|
|
|
211
214
|
5d236d3 2026-02-06 feat: wire ad_position to ValuePredictor
|
|
212
215
|
```
|
|
213
216
|
|
|
214
|
-
### `trace` — Recursive caller chain
|
|
217
|
+
### `trace` — Recursive caller chain (upward)
|
|
215
218
|
|
|
216
219
|
Walk UP the call graph from a symbol to see the full path from entry points to your function. At each hop, resolves which function in the caller file contains the call site.
|
|
217
220
|
|
|
@@ -228,6 +231,26 @@ calculate_bid (src/engine/bidding.py:489)
|
|
|
228
231
|
← app (src/main.py:45)
|
|
229
232
|
```
|
|
230
233
|
|
|
234
|
+
### `callees` — Recursive callee chain (downward)
|
|
235
|
+
|
|
236
|
+
Walk DOWN the call graph from a symbol to see everything it calls, transitively. The mirror of `trace`. Use before refactoring to understand downstream dependencies.
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
betterrank callees calculate_bid --root /path/to/project
|
|
240
|
+
betterrank callees calculate_bid --root /path/to/project --depth 5
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
**Example output:**
|
|
244
|
+
```
|
|
245
|
+
calculate_bid (src/engine/bidding.py:489)
|
|
246
|
+
→ from_microdollars (src/core/currency.py:108)
|
|
247
|
+
→ get_config (src/engine/predictor/config.py:316)
|
|
248
|
+
→ load_yaml (src/core/config.py:22)
|
|
249
|
+
→ get_value_predictor (src/engine/predictor/persistence.py:123)
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
Use both together for a full "sandwich view" of a function — who calls it (upstream) and what it touches (downstream).
|
|
253
|
+
|
|
231
254
|
### `diff` — Git-aware blast radius
|
|
232
255
|
|
|
233
256
|
Shows which symbols changed in the working tree and how many external files call each changed symbol. Compares current disk state against a git ref.
|
|
@@ -315,6 +338,7 @@ const idx = new CodeIndex('/path/to/project');
|
|
|
315
338
|
const map = await idx.map({ limit: 100, focusFiles: ['src/main.ts'] });
|
|
316
339
|
const results = await idx.search({ query: 'auth', kind: 'function', limit: 10 });
|
|
317
340
|
const callers = await idx.callers({ symbol: 'authenticate', context: 2 });
|
|
341
|
+
const tree = await idx.callees({ symbol: 'authenticate', depth: 3 });
|
|
318
342
|
const counts = await idx.getCallerCounts('src/auth.ts');
|
|
319
343
|
const deps = await idx.dependencies({ file: 'src/auth.ts' });
|
|
320
344
|
const dependents = await idx.dependents({ file: 'src/auth.ts' });
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mishasinitcyn/betterrank",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.5",
|
|
4
4
|
"description": "Structural code index with PageRank-ranked repo maps, symbol search, call-graph queries, and dependency analysis. Built on tree-sitter and graphology.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
package/src/cli.js
CHANGED
|
@@ -20,12 +20,14 @@ Commands:
|
|
|
20
20
|
callers <symbol> [--file path] [--context] All call sites (ranked, with context lines)
|
|
21
21
|
context <symbol> [--file path] Full context: source, deps, types, callers
|
|
22
22
|
history <symbol> [--file path] Git history of a specific function
|
|
23
|
-
trace <symbol> [--depth N] Recursive caller chain (
|
|
23
|
+
trace <symbol> [--depth N] Recursive caller chain (upward)
|
|
24
|
+
callees <symbol> [--depth N] Recursive callee chain (downward)
|
|
24
25
|
diff [--ref <commit>] Git-aware blast radius (changed symbols + callers)
|
|
25
26
|
deps <file> What this file imports (ranked)
|
|
26
27
|
dependents <file> What imports this file (ranked)
|
|
27
28
|
neighborhood <file> [--hops N] [--max-files N] Local subgraph (ranked by PageRank)
|
|
28
29
|
orphans [--level file|symbol] [--kind type] Find disconnected files/symbols
|
|
30
|
+
compare <pathA> <pathB> Structural diff between two files/dirs
|
|
29
31
|
reindex Force full rebuild
|
|
30
32
|
stats Index statistics
|
|
31
33
|
|
|
@@ -269,6 +271,27 @@ Examples:
|
|
|
269
271
|
betterrank orphans --level symbol --kind function --root .
|
|
270
272
|
betterrank orphans --count --root .`,
|
|
271
273
|
|
|
274
|
+
compare: `betterrank compare <pathA> <pathB> [--kind type] [--include-tests]
|
|
275
|
+
|
|
276
|
+
Structural diff between two files or directories. Shows which symbols
|
|
277
|
+
exist in both, which are unique to each side, and how their signatures
|
|
278
|
+
and dependencies differ. No scores — just deterministic structural facts.
|
|
279
|
+
|
|
280
|
+
For directories: also shows file-level overlap (shared basenames).
|
|
281
|
+
By default filters out test files and generic names (get, set, __init__, etc.)
|
|
282
|
+
to focus on meaningful structural overlap.
|
|
283
|
+
|
|
284
|
+
Options:
|
|
285
|
+
--kind <type> Filter to: function, class, type, variable
|
|
286
|
+
--include-tests Include test files and test_ functions
|
|
287
|
+
--limit N Max items per section (default: 30)
|
|
288
|
+
|
|
289
|
+
Examples:
|
|
290
|
+
betterrank compare src/auth.py lib/auth.py
|
|
291
|
+
betterrank compare ./repo-a ./repo-b
|
|
292
|
+
betterrank compare ./repo-a ./repo-b --kind function
|
|
293
|
+
betterrank compare flask/app.py bottle/bottle.py --kind class`,
|
|
294
|
+
|
|
272
295
|
reindex: `betterrank reindex [--root <path>]
|
|
273
296
|
|
|
274
297
|
Force a full rebuild of the index. Use after branch switches, large merges,
|
|
@@ -328,6 +351,126 @@ async function main() {
|
|
|
328
351
|
return; // Keep process alive (server is listening)
|
|
329
352
|
}
|
|
330
353
|
|
|
354
|
+
// Compare command — standalone, doesn't need CodeIndex
|
|
355
|
+
if (command === 'compare') {
|
|
356
|
+
const pathA = flags._positional[0];
|
|
357
|
+
const pathB = flags._positional[1];
|
|
358
|
+
if (!pathA || !pathB) {
|
|
359
|
+
console.error('Usage: betterrank compare <pathA> <pathB> [--kind type]');
|
|
360
|
+
process.exit(1);
|
|
361
|
+
}
|
|
362
|
+
const absA = resolve(pathA);
|
|
363
|
+
const absB = resolve(pathB);
|
|
364
|
+
const { compare } = await import('./compare.js');
|
|
365
|
+
const includeTests = flags['include-tests'] === true;
|
|
366
|
+
const countMode = flags.count === true;
|
|
367
|
+
|
|
368
|
+
let result;
|
|
369
|
+
try {
|
|
370
|
+
result = await compare(absA, absB, { kind: flags.kind, includeTests });
|
|
371
|
+
} catch (err) {
|
|
372
|
+
console.error(err.message);
|
|
373
|
+
process.exit(1);
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
// --count mode: just print totals
|
|
377
|
+
if (countMode) {
|
|
378
|
+
const sm = result.summary;
|
|
379
|
+
console.log(`shared: ${sm.sharedNames}`);
|
|
380
|
+
console.log(`only_a: ${sm.onlyACount}`);
|
|
381
|
+
console.log(`only_b: ${sm.onlyBCount}`);
|
|
382
|
+
console.log(`total_a: ${sm.totalA}`);
|
|
383
|
+
console.log(`total_b: ${sm.totalB}`);
|
|
384
|
+
return;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const limit = flags.limit !== undefined ? parseInt(flags.limit, 10) : DEFAULT_LIMIT;
|
|
388
|
+
const off = flags.offset !== undefined ? parseInt(flags.offset, 10) : 0;
|
|
389
|
+
|
|
390
|
+
// Helper: paginate a list and print a range header
|
|
391
|
+
const paginateSection = (items, label) => {
|
|
392
|
+
const total = items.length;
|
|
393
|
+
if (total === 0) return [];
|
|
394
|
+
const shown = items.slice(off, off + limit);
|
|
395
|
+
if (shown.length === 0) {
|
|
396
|
+
console.log(`\n── ${label} (${total}) ── (offset ${off} exceeds ${total} results)`);
|
|
397
|
+
return [];
|
|
398
|
+
}
|
|
399
|
+
const rangeStr = total > limit || off > 0 ? `, showing ${off + 1}-${off + shown.length}` : '';
|
|
400
|
+
console.log(`\n── ${label} (${total}${rangeStr}) ──`);
|
|
401
|
+
return shown;
|
|
402
|
+
};
|
|
403
|
+
|
|
404
|
+
// Warn if either side had zero symbols
|
|
405
|
+
if (result.summary.totalA === 0) {
|
|
406
|
+
process.stderr.write(`⚠ No parseable symbols found in A: ${result.labelA}\n`);
|
|
407
|
+
}
|
|
408
|
+
if (result.summary.totalB === 0) {
|
|
409
|
+
process.stderr.write(`⚠ No parseable symbols found in B: ${result.labelB}\n`);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Summary first (most useful at a glance)
|
|
413
|
+
const sm = result.summary;
|
|
414
|
+
console.log(`── Summary ──`);
|
|
415
|
+
console.log(` A: ${result.labelA} (${sm.totalA} symbols)`);
|
|
416
|
+
console.log(` B: ${result.labelB} (${sm.totalB} symbols)`);
|
|
417
|
+
console.log(` Shared names: ${sm.sharedNames} | Only A: ${sm.onlyACount} | Only B: ${sm.onlyBCount}`);
|
|
418
|
+
|
|
419
|
+
// File-level overlap (directory mode)
|
|
420
|
+
if (result.isDirectoryMode) {
|
|
421
|
+
console.log(`\n── Files ──`);
|
|
422
|
+
console.log(` A: ${result.files.totalA} files | B: ${result.files.totalB} files`);
|
|
423
|
+
if (result.files.shared.length > 0) {
|
|
424
|
+
const fileList = result.files.shared.length > 15
|
|
425
|
+
? result.files.shared.slice(0, 15).join(', ') + ` (+${result.files.shared.length - 15} more)`
|
|
426
|
+
: result.files.shared.join(', ');
|
|
427
|
+
console.log(` Shared basenames (${result.files.shared.length}): ${fileList}`);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// Shared symbols — compact grouped format, sorted by sharedRefs
|
|
432
|
+
const sharedShown = paginateSection(result.shared, 'Shared symbols');
|
|
433
|
+
for (const s of sharedShown) {
|
|
434
|
+
const kinds = new Set([...s.inA.map(d => d.kind), ...s.inB.map(d => d.kind)]);
|
|
435
|
+
const kindStr = [...kinds].join('/');
|
|
436
|
+
const refTag = s.sharedRefs.length > 0 ? ` ${s.sharedRefs.length} shared refs` : '';
|
|
437
|
+
console.log(` ${s.name} [${kindStr}] A:${s.inA.length} def${s.inA.length > 1 ? 's' : ''} B:${s.inB.length} def${s.inB.length > 1 ? 's' : ''}${refTag}`);
|
|
438
|
+
for (const d of s.inA.slice(0, 2)) {
|
|
439
|
+
console.log(` A: ${d.file}:${d.line} ${d.signature}`);
|
|
440
|
+
}
|
|
441
|
+
if (s.inA.length > 2) console.log(` A: ... and ${s.inA.length - 2} more`);
|
|
442
|
+
for (const d of s.inB.slice(0, 2)) {
|
|
443
|
+
console.log(` B: ${d.file}:${d.line} ${d.signature}`);
|
|
444
|
+
}
|
|
445
|
+
if (s.inB.length > 2) console.log(` B: ... and ${s.inB.length - 2} more`);
|
|
446
|
+
if (s.sharedRefs.length > 0) {
|
|
447
|
+
console.log(` Shared refs: ${s.sharedRefs.slice(0, 10).join(', ')}${s.sharedRefs.length > 10 ? ` (+${s.sharedRefs.length - 10} more)` : ''}`);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
if (result.shared.length > off + limit) {
|
|
451
|
+
console.log(` (use --offset ${off + limit} to see more)`);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// Only in A
|
|
455
|
+
const onlyAShown = paginateSection(result.onlyA, 'Only in A');
|
|
456
|
+
for (const s of onlyAShown) {
|
|
457
|
+
console.log(` [${s.kind}] ${s.file}:${s.line} ${s.signature}`);
|
|
458
|
+
}
|
|
459
|
+
if (result.onlyA.length > off + limit) {
|
|
460
|
+
console.log(` (use --offset ${off + limit} to see more)`);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// Only in B
|
|
464
|
+
const onlyBShown = paginateSection(result.onlyB, 'Only in B');
|
|
465
|
+
for (const s of onlyBShown) {
|
|
466
|
+
console.log(` [${s.kind}] ${s.file}:${s.line} ${s.signature}`);
|
|
467
|
+
}
|
|
468
|
+
if (result.onlyB.length > off + limit) {
|
|
469
|
+
console.log(` (use --offset ${off + limit} to see more)`);
|
|
470
|
+
}
|
|
471
|
+
return;
|
|
472
|
+
}
|
|
473
|
+
|
|
331
474
|
// Outline command — standalone by default, needs CodeIndex for --annotate
|
|
332
475
|
if (command === 'outline') {
|
|
333
476
|
const filePath = flags._positional[0];
|
|
@@ -661,6 +804,27 @@ async function main() {
|
|
|
661
804
|
break;
|
|
662
805
|
}
|
|
663
806
|
|
|
807
|
+
case 'callees': {
|
|
808
|
+
const symbol = flags._positional[0];
|
|
809
|
+
if (!symbol) { console.error('Usage: betterrank callees <symbol> [--depth N]'); process.exit(1); }
|
|
810
|
+
const calleesDepth = flags.depth ? parseInt(flags.depth, 10) : 3;
|
|
811
|
+
const tree = await idx.callees({ symbol, file: normalizeFilePath(flags.file), depth: calleesDepth });
|
|
812
|
+
if (!tree) {
|
|
813
|
+
console.log(`(symbol "${symbol}" not found)`);
|
|
814
|
+
} else {
|
|
815
|
+
const printNode = (node, depth) => {
|
|
816
|
+
const indent = depth === 0 ? '' : ' '.repeat(depth) + '→ ';
|
|
817
|
+
const loc = `(${node.file}:${node.line || '?'})`;
|
|
818
|
+
console.log(`${indent}${node.name} ${loc}`);
|
|
819
|
+
for (const callee of node.callees) {
|
|
820
|
+
printNode(callee, depth + 1);
|
|
821
|
+
}
|
|
822
|
+
};
|
|
823
|
+
printNode(tree, 0);
|
|
824
|
+
}
|
|
825
|
+
break;
|
|
826
|
+
}
|
|
827
|
+
|
|
664
828
|
case 'diff': {
|
|
665
829
|
const result = await idx.diff({ ref: flags.ref || 'HEAD' });
|
|
666
830
|
if (result.error) {
|
|
@@ -838,6 +1002,12 @@ async function main() {
|
|
|
838
1002
|
break;
|
|
839
1003
|
}
|
|
840
1004
|
|
|
1005
|
+
case 'similar': {
|
|
1006
|
+
console.error('The "similar" command has been replaced by "compare".');
|
|
1007
|
+
console.error('Usage: betterrank compare <pathA> <pathB>');
|
|
1008
|
+
process.exit(1);
|
|
1009
|
+
}
|
|
1010
|
+
|
|
841
1011
|
case 'reindex': {
|
|
842
1012
|
const t0 = Date.now();
|
|
843
1013
|
const result = await idx.reindex();
|
package/src/compare.js
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
import { readFile, stat as fsStat } from 'fs/promises';
|
|
2
|
+
import { glob } from 'glob';
|
|
3
|
+
import { join, relative, basename } from 'path';
|
|
4
|
+
import { parseFile, SUPPORTED_EXTENSIONS } from './parser.js';
|
|
5
|
+
|
|
6
|
+
const IGNORE_PATTERNS = [
|
|
7
|
+
'**/node_modules/**', '**/.npm/**', '**/.yarn/**', '**/.pnp.*',
|
|
8
|
+
'**/bower_components/**', '**/*.min.js', '**/*.bundle.js', '**/*.map',
|
|
9
|
+
'**/__pycache__/**', '**/.venv/**', '**/venv/**', '**/env/**',
|
|
10
|
+
'**/.env/**', '**/.virtualenvs/**', '**/site-packages/**',
|
|
11
|
+
'**/*.egg-info/**', '**/.eggs/**', '**/dist/**', '**/build/**',
|
|
12
|
+
'**/.git/**', '**/.svn/**', '**/.hg/**',
|
|
13
|
+
'**/vendor/**', '**/tmp/**', '**/temp/**',
|
|
14
|
+
'**/.idea/**', '**/.vscode/**', '**/.DS_Store',
|
|
15
|
+
'**/Pods/**', '**/DerivedData/**',
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
// Names too generic to be meaningful matches across codebases.
|
|
19
|
+
// These are filtered on EXACT name match only — `processEvent` survives,
|
|
20
|
+
// only bare `process` is dropped.
|
|
21
|
+
const NOISE_NAMES = new Set([
|
|
22
|
+
// Ultra-common function names
|
|
23
|
+
'get', 'set', 'run', 'main', 'init', 'setup', 'start', 'stop',
|
|
24
|
+
'open', 'close', 'read', 'write', 'delete', 'update', 'create',
|
|
25
|
+
'add', 'remove', 'clear', 'reset', 'test', 'check', 'load',
|
|
26
|
+
'toString', 'toJSON', 'valueOf', 'hash', 'eq', 'repr', 'str',
|
|
27
|
+
'copy', 'keys', 'values', 'items', 'pop', 'push', 'append',
|
|
28
|
+
'default', 'setdefault', 'apply', 'call', 'bind',
|
|
29
|
+
'map', 'filter', 'reduce', 'format', 'parse', 'validate',
|
|
30
|
+
'serialize', 'deserialize', 'configure', 'connect',
|
|
31
|
+
// Python dunders
|
|
32
|
+
'__init__', '__repr__', '__str__', '__eq__', '__hash__',
|
|
33
|
+
'__enter__', '__exit__', '__iter__', '__next__', '__len__',
|
|
34
|
+
'__getitem__', '__setitem__', '__delitem__', '__contains__',
|
|
35
|
+
'__call__', '__bool__', '__getattr__', '__setattr__', '__delattr__',
|
|
36
|
+
'__get__', '__set__', '__delete__',
|
|
37
|
+
// JS common
|
|
38
|
+
'constructor', 'render', 'process', 'handle', 'execute',
|
|
39
|
+
// Single-char and trivially short names
|
|
40
|
+
'a', 'b', 'c', 'd', 'e', 'f', 'x', 'y', 'n', 'i', 'j', 'k',
|
|
41
|
+
// Common test fixture names
|
|
42
|
+
'foo', 'bar', 'baz', 'wrapper', 'decorator', 'callback',
|
|
43
|
+
'index', 'app', 'client', 'response', 'request',
|
|
44
|
+
]);
|
|
45
|
+
|
|
46
|
+
async function scanAndParse(dirPath) {
|
|
47
|
+
const pattern = `**/*{${SUPPORTED_EXTENSIONS.join(',')}}`;
|
|
48
|
+
const files = await glob(pattern, {
|
|
49
|
+
cwd: dirPath,
|
|
50
|
+
ignore: IGNORE_PATTERNS,
|
|
51
|
+
absolute: true,
|
|
52
|
+
nodir: true,
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
const results = [];
|
|
56
|
+
for (const absPath of files) {
|
|
57
|
+
const relPath = relative(dirPath, absPath);
|
|
58
|
+
try {
|
|
59
|
+
const source = await readFile(absPath, 'utf-8');
|
|
60
|
+
const result = parseFile(relPath, source);
|
|
61
|
+
if (result) results.push(result);
|
|
62
|
+
} catch {
|
|
63
|
+
// skip unparseable files
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return results;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async function parseSingleFile(filePath) {
|
|
70
|
+
const source = await readFile(filePath, 'utf-8');
|
|
71
|
+
const result = parseFile(basename(filePath), source);
|
|
72
|
+
return result ? [result] : [];
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function extractSymbols(parseResults) {
|
|
76
|
+
const symbols = [];
|
|
77
|
+
for (const fileResult of parseResults) {
|
|
78
|
+
for (const def of fileResult.definitions) {
|
|
79
|
+
symbols.push({
|
|
80
|
+
name: def.name,
|
|
81
|
+
kind: def.kind,
|
|
82
|
+
file: fileResult.file,
|
|
83
|
+
lineStart: def.lineStart,
|
|
84
|
+
lineEnd: def.lineEnd,
|
|
85
|
+
signature: def.signature,
|
|
86
|
+
paramCount: (def.paramNames || []).length,
|
|
87
|
+
paramNames: def.paramNames || [],
|
|
88
|
+
localRefs: def.localRefs || [],
|
|
89
|
+
bodyLines: (def.lineEnd || 0) - (def.lineStart || 0),
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return symbols;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Test file detection
|
|
97
|
+
const TEST_SEGMENTS = ['test/', 'tests/', '__tests__/', 'spec/', 'specs/', 'conftest'];
|
|
98
|
+
function isTestFile(file) {
|
|
99
|
+
const lower = file.toLowerCase();
|
|
100
|
+
return TEST_SEGMENTS.some(s => lower.includes(s)) || basename(file).startsWith('test_');
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Compare two codebases (files or directories).
|
|
105
|
+
*
|
|
106
|
+
* Returns deterministic structural facts grouped by symbol name.
|
|
107
|
+
* Filters out noise (test_ prefixes, dunders, trivially generic names).
|
|
108
|
+
*
|
|
109
|
+
* Shared symbols are ranked by sharedRefs count (how many internal
|
|
110
|
+
* function calls they have in common — the strongest signal for
|
|
111
|
+
* "these are likely doing the same thing").
|
|
112
|
+
*/
|
|
113
|
+
async function compare(pathA, pathB, { kind, includeTests = false } = {}) {
|
|
114
|
+
// Validate paths exist
|
|
115
|
+
let statA, statB;
|
|
116
|
+
try {
|
|
117
|
+
statA = await fsStat(pathA);
|
|
118
|
+
} catch {
|
|
119
|
+
throw new Error(`Path A does not exist: ${pathA}`);
|
|
120
|
+
}
|
|
121
|
+
try {
|
|
122
|
+
statB = await fsStat(pathB);
|
|
123
|
+
} catch {
|
|
124
|
+
throw new Error(`Path B does not exist: ${pathB}`);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const parseResultsA = statA.isDirectory()
|
|
128
|
+
? await scanAndParse(pathA)
|
|
129
|
+
: await parseSingleFile(pathA);
|
|
130
|
+
const parseResultsB = statB.isDirectory()
|
|
131
|
+
? await scanAndParse(pathB)
|
|
132
|
+
: await parseSingleFile(pathB);
|
|
133
|
+
|
|
134
|
+
let symbolsA = extractSymbols(parseResultsA);
|
|
135
|
+
let symbolsB = extractSymbols(parseResultsB);
|
|
136
|
+
|
|
137
|
+
// Apply kind filter
|
|
138
|
+
if (kind) {
|
|
139
|
+
symbolsA = symbolsA.filter(s => s.kind === kind);
|
|
140
|
+
symbolsB = symbolsB.filter(s => s.kind === kind);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Filter out test functions, test files, and noise unless explicitly included
|
|
144
|
+
const isSignificant = (s) => {
|
|
145
|
+
if (!includeTests && s.name.startsWith('test_')) return false;
|
|
146
|
+
if (!includeTests && s.name.startsWith('Test')) return false;
|
|
147
|
+
if (!includeTests && isTestFile(s.file)) return false;
|
|
148
|
+
if (NOISE_NAMES.has(s.name)) return false;
|
|
149
|
+
return true;
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
symbolsA = symbolsA.filter(isSignificant);
|
|
153
|
+
symbolsB = symbolsB.filter(isSignificant);
|
|
154
|
+
|
|
155
|
+
// Deduplicate (same name+file+line can appear from overlapping tree-sitter captures)
|
|
156
|
+
const dedup = (syms) => {
|
|
157
|
+
const seen = new Set();
|
|
158
|
+
return syms.filter(s => {
|
|
159
|
+
const key = `${s.name}::${s.file}::${s.lineStart}`;
|
|
160
|
+
if (seen.has(key)) return false;
|
|
161
|
+
seen.add(key);
|
|
162
|
+
return true;
|
|
163
|
+
});
|
|
164
|
+
};
|
|
165
|
+
symbolsA = dedup(symbolsA);
|
|
166
|
+
symbolsB = dedup(symbolsB);
|
|
167
|
+
|
|
168
|
+
// Group by name
|
|
169
|
+
const byNameA = new Map();
|
|
170
|
+
for (const s of symbolsA) {
|
|
171
|
+
if (!byNameA.has(s.name)) byNameA.set(s.name, []);
|
|
172
|
+
byNameA.get(s.name).push(s);
|
|
173
|
+
}
|
|
174
|
+
const byNameB = new Map();
|
|
175
|
+
for (const s of symbolsB) {
|
|
176
|
+
if (!byNameB.has(s.name)) byNameB.set(s.name, []);
|
|
177
|
+
byNameB.get(s.name).push(s);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Shared: names that exist in both. Group all definitions under one entry.
|
|
181
|
+
const shared = [];
|
|
182
|
+
for (const [name, symsA] of byNameA) {
|
|
183
|
+
if (!byNameB.has(name)) continue;
|
|
184
|
+
const symsB = byNameB.get(name);
|
|
185
|
+
|
|
186
|
+
// Collect all local refs across all definitions of this name
|
|
187
|
+
const allRefsA = new Set();
|
|
188
|
+
const allRefsB = new Set();
|
|
189
|
+
for (const s of symsA) for (const r of s.localRefs) allRefsA.add(r);
|
|
190
|
+
for (const s of symsB) for (const r of s.localRefs) allRefsB.add(r);
|
|
191
|
+
const sharedRefs = [...allRefsA].filter(r => allRefsB.has(r));
|
|
192
|
+
|
|
193
|
+
// Check if any pair has matching kind and similar param count
|
|
194
|
+
const sameKind = symsA.some(a => symsB.some(b => a.kind === b.kind));
|
|
195
|
+
const sameParamCount = symsA.some(a => symsB.some(b => a.paramCount === b.paramCount));
|
|
196
|
+
|
|
197
|
+
shared.push({
|
|
198
|
+
name,
|
|
199
|
+
inA: symsA.map(s => ({ kind: s.kind, file: s.file, line: s.lineStart, signature: s.signature, paramCount: s.paramCount, bodyLines: s.bodyLines })),
|
|
200
|
+
inB: symsB.map(s => ({ kind: s.kind, file: s.file, line: s.lineStart, signature: s.signature, paramCount: s.paramCount, bodyLines: s.bodyLines })),
|
|
201
|
+
sharedRefs,
|
|
202
|
+
sameKind,
|
|
203
|
+
sameParamCount,
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Sort shared by sharedRefs count (strongest consolidation signal),
|
|
208
|
+
// then by whether kind/params match, then alphabetically
|
|
209
|
+
shared.sort((a, b) => {
|
|
210
|
+
const refDiff = b.sharedRefs.length - a.sharedRefs.length;
|
|
211
|
+
if (refDiff !== 0) return refDiff;
|
|
212
|
+
// Prefer same-kind matches
|
|
213
|
+
if (a.sameKind !== b.sameKind) return a.sameKind ? -1 : 1;
|
|
214
|
+
// Prefer same-param-count matches
|
|
215
|
+
if (a.sameParamCount !== b.sameParamCount) return a.sameParamCount ? -1 : 1;
|
|
216
|
+
// Alphabetical tiebreak
|
|
217
|
+
return a.name.localeCompare(b.name);
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
// Only in A / Only in B — sorted alphabetically
|
|
221
|
+
const onlyA = [];
|
|
222
|
+
for (const [name, syms] of byNameA) {
|
|
223
|
+
if (byNameB.has(name)) continue;
|
|
224
|
+
for (const s of syms) {
|
|
225
|
+
onlyA.push({ name, kind: s.kind, file: s.file, line: s.lineStart, signature: s.signature });
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
// Sort: public names first, then _private, alphabetical within each group
|
|
229
|
+
const privateLast = (a, b) => {
|
|
230
|
+
const aPrivate = a.name.startsWith('_');
|
|
231
|
+
const bPrivate = b.name.startsWith('_');
|
|
232
|
+
if (aPrivate !== bPrivate) return aPrivate ? 1 : -1;
|
|
233
|
+
return a.name.localeCompare(b.name);
|
|
234
|
+
};
|
|
235
|
+
onlyA.sort(privateLast);
|
|
236
|
+
|
|
237
|
+
const onlyB = [];
|
|
238
|
+
for (const [name, syms] of byNameB) {
|
|
239
|
+
if (byNameA.has(name)) continue;
|
|
240
|
+
for (const s of syms) {
|
|
241
|
+
onlyB.push({ name, kind: s.kind, file: s.file, line: s.lineStart, signature: s.signature });
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
onlyB.sort(privateLast);
|
|
245
|
+
|
|
246
|
+
// File-level comparison (basename matching for directory mode)
|
|
247
|
+
// Filter out basenames that are too generic to be meaningful matches
|
|
248
|
+
const NOISE_BASENAMES = new Set([
|
|
249
|
+
'__init__.py', 'conftest.py', 'conf.py', 'setup.py', 'setup.cfg',
|
|
250
|
+
'index.js', 'index.ts', 'index.tsx', 'main.py', 'main.go', 'main.rs',
|
|
251
|
+
'app.py', 'app.js', 'app.ts', 'mod.rs', 'lib.rs',
|
|
252
|
+
'utils.py', 'utils.js', 'utils.ts', 'helpers.py', 'helpers.js',
|
|
253
|
+
'types.ts', 'types.py', 'config.py', 'config.js', 'config.ts',
|
|
254
|
+
'constants.py', 'constants.js', 'constants.ts',
|
|
255
|
+
]);
|
|
256
|
+
const filesA = parseResultsA.map(r => r.file);
|
|
257
|
+
const filesB = parseResultsB.map(r => r.file);
|
|
258
|
+
const basenamesA = new Set(filesA.map(f => basename(f)));
|
|
259
|
+
const basenamesB = new Set(filesB.map(f => basename(f)));
|
|
260
|
+
const sharedFiles = [...basenamesA].filter(f => basenamesB.has(f) && !NOISE_BASENAMES.has(f));
|
|
261
|
+
const onlyFilesA = [...basenamesA].filter(f => !basenamesB.has(f));
|
|
262
|
+
const onlyFilesB = [...basenamesB].filter(f => !basenamesA.has(f));
|
|
263
|
+
|
|
264
|
+
return {
|
|
265
|
+
labelA: statA.isDirectory() ? pathA : basename(pathA),
|
|
266
|
+
labelB: statB.isDirectory() ? pathB : basename(pathB),
|
|
267
|
+
isDirectoryMode: statA.isDirectory() || statB.isDirectory(),
|
|
268
|
+
shared,
|
|
269
|
+
onlyA,
|
|
270
|
+
onlyB,
|
|
271
|
+
files: {
|
|
272
|
+
shared: sharedFiles,
|
|
273
|
+
onlyA: onlyFilesA,
|
|
274
|
+
onlyB: onlyFilesB,
|
|
275
|
+
totalA: filesA.length,
|
|
276
|
+
totalB: filesB.length,
|
|
277
|
+
},
|
|
278
|
+
summary: {
|
|
279
|
+
totalA: symbolsA.length,
|
|
280
|
+
totalB: symbolsB.length,
|
|
281
|
+
sharedNames: shared.length,
|
|
282
|
+
onlyACount: onlyA.length,
|
|
283
|
+
onlyBCount: onlyB.length,
|
|
284
|
+
},
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
export { compare };
|
package/src/graph.js
CHANGED
|
@@ -61,6 +61,9 @@ function buildGraph(allSymbols) {
|
|
|
61
61
|
lineStart: def.lineStart,
|
|
62
62
|
lineEnd: def.lineEnd,
|
|
63
63
|
signature: def.signature,
|
|
64
|
+
astProfile: def.astProfile || null,
|
|
65
|
+
paramNames: def.paramNames || null,
|
|
66
|
+
localRefs: def.localRefs || null,
|
|
64
67
|
});
|
|
65
68
|
graph.addEdge(file, symbolKey, { type: 'DEFINES' });
|
|
66
69
|
}
|
|
@@ -148,6 +151,9 @@ function updateGraphFiles(graph, removedFiles, newSymbols) {
|
|
|
148
151
|
lineStart: def.lineStart,
|
|
149
152
|
lineEnd: def.lineEnd,
|
|
150
153
|
signature: def.signature,
|
|
154
|
+
astProfile: def.astProfile || null,
|
|
155
|
+
paramNames: def.paramNames || null,
|
|
156
|
+
localRefs: def.localRefs || null,
|
|
151
157
|
});
|
|
152
158
|
graph.addEdge(file, symbolKey, { type: 'DEFINES' });
|
|
153
159
|
|
package/src/index.js
CHANGED
|
@@ -4,6 +4,73 @@ import { CodeIndexCache } from './cache.js';
|
|
|
4
4
|
import { rankedSymbols } from './graph.js';
|
|
5
5
|
import { parseFile } from './parser.js';
|
|
6
6
|
|
|
7
|
+
/**
|
|
8
|
+
* Collapse unchanged context lines in git log -L diff output.
|
|
9
|
+
* Keeps `ctx` lines of context around each +/- change, replaces
|
|
10
|
+
* long unchanged runs with "...".
|
|
11
|
+
*/
|
|
12
|
+
function _collapseDiffContext(raw, ctx = 2) {
|
|
13
|
+
const output = [];
|
|
14
|
+
// Split into per-commit sections (each starts with "commit ")
|
|
15
|
+
const sections = raw.split(/^(?=commit )/m);
|
|
16
|
+
|
|
17
|
+
for (const section of sections) {
|
|
18
|
+
if (!section.trim()) continue;
|
|
19
|
+
const lines = section.split('\n');
|
|
20
|
+
// Find the diff start (line starting with "diff --git")
|
|
21
|
+
const diffStart = lines.findIndex(l => l.startsWith('diff --git'));
|
|
22
|
+
if (diffStart === -1) {
|
|
23
|
+
// No diff in this section (e.g., initial commit with just +++ lines)
|
|
24
|
+
output.push(section);
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Keep the commit header (everything before "diff --git")
|
|
29
|
+
output.push(lines.slice(0, diffStart).join('\n'));
|
|
30
|
+
|
|
31
|
+
// Process the diff portion
|
|
32
|
+
const diffLines = lines.slice(diffStart);
|
|
33
|
+
// Find lines that are actual diff content (after the @@ hunk header)
|
|
34
|
+
const hunkStart = diffLines.findIndex(l => l.startsWith('@@'));
|
|
35
|
+
if (hunkStart === -1) {
|
|
36
|
+
output.push(diffLines.join('\n'));
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Keep diff headers (diff --git, ---, +++, @@)
|
|
41
|
+
output.push(diffLines.slice(0, hunkStart + 1).join('\n'));
|
|
42
|
+
|
|
43
|
+
const content = diffLines.slice(hunkStart + 1);
|
|
44
|
+
// Mark which lines are "interesting" (changed or near a change)
|
|
45
|
+
const isChange = content.map(l => l.startsWith('+') || l.startsWith('-'));
|
|
46
|
+
const show = new Array(content.length).fill(false);
|
|
47
|
+
|
|
48
|
+
for (let i = 0; i < content.length; i++) {
|
|
49
|
+
if (isChange[i]) {
|
|
50
|
+
for (let j = Math.max(0, i - ctx); j <= Math.min(content.length - 1, i + ctx); j++) {
|
|
51
|
+
show[j] = true;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Build collapsed output
|
|
57
|
+
let inEllipsis = false;
|
|
58
|
+
const collapsed = [];
|
|
59
|
+
for (let i = 0; i < content.length; i++) {
|
|
60
|
+
if (show[i]) {
|
|
61
|
+
inEllipsis = false;
|
|
62
|
+
collapsed.push(content[i]);
|
|
63
|
+
} else if (!inEllipsis) {
|
|
64
|
+
inEllipsis = true;
|
|
65
|
+
collapsed.push(' ...');
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
output.push(collapsed.join('\n'));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return output.join('\n');
|
|
72
|
+
}
|
|
73
|
+
|
|
7
74
|
// ── Orphan false-positive filters ──────────────────────────────────────────
|
|
8
75
|
//
|
|
9
76
|
// Orphan detection finds files/symbols with no cross-file connections.
|
|
@@ -1194,12 +1261,13 @@ class CodeIndex {
|
|
|
1194
1261
|
const { execSync } = await import('child_process');
|
|
1195
1262
|
try {
|
|
1196
1263
|
if (patch) {
|
|
1197
|
-
// Full output with diffs — return raw text
|
|
1198
1264
|
const raw = execSync(
|
|
1199
1265
|
`git log -L ${target.lineStart},${target.lineEnd}:${target.file} --skip=${offset} -n ${limit}`,
|
|
1200
1266
|
{ cwd: this.projectRoot, encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 30000 }
|
|
1201
1267
|
).trim();
|
|
1202
|
-
|
|
1268
|
+
// Collapse unchanged context lines in diffs — keep 2 lines around changes
|
|
1269
|
+
const collapsed = _collapseDiffContext(raw, 2);
|
|
1270
|
+
return { definition: target, commits: [], raw: collapsed };
|
|
1203
1271
|
}
|
|
1204
1272
|
// Summary only
|
|
1205
1273
|
const output = execSync(
|
|
@@ -1347,6 +1415,133 @@ class CodeIndex {
|
|
|
1347
1415
|
return buildNode(rootAttrs.name, rootAttrs.file, rootAttrs.lineStart, 0);
|
|
1348
1416
|
}
|
|
1349
1417
|
|
|
1418
|
+
/**
|
|
1419
|
+
* Recursive callee chain — walk DOWN the call graph.
|
|
1420
|
+
* Mirror of trace(): shows what a function calls, transitively.
|
|
1421
|
+
*
|
|
1422
|
+
* @param {string} opts.symbol - Symbol name
|
|
1423
|
+
* @param {string} [opts.file] - Disambiguate by file
|
|
1424
|
+
* @param {number} [opts.depth=3] - Max hops downward
|
|
1425
|
+
* @returns {object} Tree root node with .callees[]
|
|
1426
|
+
*/
|
|
1427
|
+
async callees({ symbol, file, depth = 3 }) {
|
|
1428
|
+
await this._ensureReady();
|
|
1429
|
+
const graph = this.cache.getGraph();
|
|
1430
|
+
if (!graph) return null;
|
|
1431
|
+
|
|
1432
|
+
// Find the target symbol node(s)
|
|
1433
|
+
const targetKeys = [];
|
|
1434
|
+
graph.forEachNode((node, attrs) => {
|
|
1435
|
+
if (attrs.type !== 'symbol') return;
|
|
1436
|
+
if (attrs.name !== symbol) return;
|
|
1437
|
+
if (file && attrs.file !== file) return;
|
|
1438
|
+
targetKeys.push(node);
|
|
1439
|
+
});
|
|
1440
|
+
|
|
1441
|
+
if (targetKeys.length === 0) return null;
|
|
1442
|
+
|
|
1443
|
+
// Use the first match (highest PageRank if multiple)
|
|
1444
|
+
const ranked = this._getRanked();
|
|
1445
|
+
const scoreMap = new Map(ranked);
|
|
1446
|
+
targetKeys.sort((a, b) => (scoreMap.get(b) || 0) - (scoreMap.get(a) || 0));
|
|
1447
|
+
const rootKey = targetKeys[0];
|
|
1448
|
+
const rootAttrs = graph.getNodeAttributes(rootKey);
|
|
1449
|
+
|
|
1450
|
+
// Build a map: symbolKey -> set of symbol keys it references (outgoing REFERENCES)
|
|
1451
|
+
// For each file node that has an outgoing REFERENCES edge to a symbol,
|
|
1452
|
+
// we need to resolve which function in that file makes the call.
|
|
1453
|
+
// Approach: for each symbol, find what other symbols it references
|
|
1454
|
+
// by looking at REFERENCES edges from the symbol's file to other symbols,
|
|
1455
|
+
// then filtering to references that occur within the symbol's line range.
|
|
1456
|
+
|
|
1457
|
+
// Cache of file -> definitions
|
|
1458
|
+
const defCache = new Map();
|
|
1459
|
+
const getFileDefs = async (filePath) => {
|
|
1460
|
+
if (defCache.has(filePath)) return defCache.get(filePath);
|
|
1461
|
+
try {
|
|
1462
|
+
const absPath = join(this.projectRoot, filePath);
|
|
1463
|
+
const source = await readFile(absPath, 'utf-8');
|
|
1464
|
+
const parsed = parseFile(filePath, source);
|
|
1465
|
+
const defs = parsed ? parsed.definitions.sort((a, b) => a.lineStart - b.lineStart) : [];
|
|
1466
|
+
defCache.set(filePath, defs);
|
|
1467
|
+
return defs;
|
|
1468
|
+
} catch {
|
|
1469
|
+
defCache.set(filePath, []);
|
|
1470
|
+
return [];
|
|
1471
|
+
}
|
|
1472
|
+
};
|
|
1473
|
+
|
|
1474
|
+
const visited = new Set();
|
|
1475
|
+
|
|
1476
|
+
const buildNode = async (symbolName, symbolFile, symbolLine, currentDepth) => {
|
|
1477
|
+
const nodeKey = `${symbolFile}::${symbolName}`;
|
|
1478
|
+
const node = { name: symbolName, file: symbolFile, line: symbolLine, callees: [] };
|
|
1479
|
+
|
|
1480
|
+
if (currentDepth >= depth) return node;
|
|
1481
|
+
if (visited.has(nodeKey)) return node;
|
|
1482
|
+
visited.add(nodeKey);
|
|
1483
|
+
|
|
1484
|
+
// Find the definition's line range so we know which references belong to it
|
|
1485
|
+
const defs = await getFileDefs(symbolFile);
|
|
1486
|
+
const thisDef = defs.find(d => d.name === symbolName && d.lineStart === symbolLine)
|
|
1487
|
+
|| defs.find(d => d.name === symbolName);
|
|
1488
|
+
if (!thisDef) return node;
|
|
1489
|
+
|
|
1490
|
+
// Get the source to find call sites within this function's body
|
|
1491
|
+
let sourceLines;
|
|
1492
|
+
try {
|
|
1493
|
+
const absPath = join(this.projectRoot, symbolFile);
|
|
1494
|
+
const source = await readFile(absPath, 'utf-8');
|
|
1495
|
+
sourceLines = source.split('\n');
|
|
1496
|
+
} catch {
|
|
1497
|
+
return node;
|
|
1498
|
+
}
|
|
1499
|
+
|
|
1500
|
+
// Find the file node in the graph for this symbol's file
|
|
1501
|
+
const fileNodeKey = symbolFile;
|
|
1502
|
+
|
|
1503
|
+
// Collect all symbols that this file references (outgoing REFERENCES from file node)
|
|
1504
|
+
const referencedSymbols = new Map(); // name -> {file, line, name}
|
|
1505
|
+
if (graph.hasNode(fileNodeKey)) {
|
|
1506
|
+
graph.forEachOutEdge(fileNodeKey, (_edge, attrs, _source, target) => {
|
|
1507
|
+
if (attrs.type !== 'REFERENCES') return;
|
|
1508
|
+
const targetAttrs = graph.getNodeAttributes(target);
|
|
1509
|
+
if (targetAttrs.type !== 'symbol') return;
|
|
1510
|
+
// Skip self-references
|
|
1511
|
+
if (targetAttrs.name === symbolName && targetAttrs.file === symbolFile) return;
|
|
1512
|
+
referencedSymbols.set(`${targetAttrs.file}::${targetAttrs.name}`, {
|
|
1513
|
+
name: targetAttrs.name,
|
|
1514
|
+
file: targetAttrs.file,
|
|
1515
|
+
line: targetAttrs.lineStart
|
|
1516
|
+
});
|
|
1517
|
+
});
|
|
1518
|
+
}
|
|
1519
|
+
|
|
1520
|
+
// Filter to references that appear within this function's line range
|
|
1521
|
+
for (const [key, ref] of referencedSymbols) {
|
|
1522
|
+
const callPattern = new RegExp(
|
|
1523
|
+
`(?<![a-zA-Z0-9_])${ref.name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s*\\(`
|
|
1524
|
+
);
|
|
1525
|
+
|
|
1526
|
+
let found = false;
|
|
1527
|
+
for (let i = thisDef.lineStart - 1; i < Math.min(thisDef.lineEnd, sourceLines.length); i++) {
|
|
1528
|
+
if (callPattern.test(sourceLines[i])) { found = true; break; }
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
if (found) {
|
|
1532
|
+
const calleeNode = await buildNode(ref.name, ref.file, ref.line, currentDepth + 1);
|
|
1533
|
+
node.callees.push(calleeNode);
|
|
1534
|
+
}
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
// Sort callees by file then name for deterministic output
|
|
1538
|
+
node.callees.sort((a, b) => a.file.localeCompare(b.file) || a.name.localeCompare(b.name));
|
|
1539
|
+
return node;
|
|
1540
|
+
};
|
|
1541
|
+
|
|
1542
|
+
return buildNode(rootAttrs.name, rootAttrs.file, rootAttrs.lineStart, 0);
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1350
1545
|
/**
|
|
1351
1546
|
* Git-aware blast radius — what symbols changed and who calls them.
|
|
1352
1547
|
*
|
|
@@ -1491,6 +1686,273 @@ class CodeIndex {
|
|
|
1491
1686
|
return files.size;
|
|
1492
1687
|
}
|
|
1493
1688
|
|
|
1689
|
+
/**
|
|
1690
|
+
* Find structurally similar functions/classes across the codebase.
|
|
1691
|
+
*
|
|
1692
|
+
* Similarity is computed from multiple signals:
|
|
1693
|
+
* - AST shape profile (node-type frequency vector) — captures structural patterns
|
|
1694
|
+
* - Reference overlap — functions that call the same things do similar work
|
|
1695
|
+
* - Parameter name overlap — shared param names suggest shared purpose
|
|
1696
|
+
* - Name similarity — tokenized name overlap (camelCase/snake_case aware)
|
|
1697
|
+
*
|
|
1698
|
+
* @param {object} opts
|
|
1699
|
+
* @param {string} [opts.symbol] - Find symbols similar to this one
|
|
1700
|
+
* @param {string} [opts.file] - Disambiguate symbol by file, or find similar symbols across this file
|
|
1701
|
+
* @param {string} [opts.kind] - Filter candidates to this kind (function, class, type)
|
|
1702
|
+
* @param {number} [opts.threshold=0.4] - Minimum similarity score (0-1)
|
|
1703
|
+
* @param {number} [opts.offset] - Skip first N results
|
|
1704
|
+
* @param {number} [opts.limit=20] - Max results to return
|
|
1705
|
+
* @param {boolean} [opts.count=false] - If true, return only { total }
|
|
1706
|
+
* @returns {Array<{symbol, file, line, signature, score, breakdown}>|{total: number}}
|
|
1707
|
+
*/
|
|
1708
|
+
async similar({ symbol, file, kind, threshold = 0.4, offset, limit = 20, count = false } = {}) {
|
|
1709
|
+
await this._ensureReady();
|
|
1710
|
+
const graph = this.cache.getGraph();
|
|
1711
|
+
if (!graph) return count ? { total: 0 } : [];
|
|
1712
|
+
|
|
1713
|
+
// Collect all symbol nodes with their attributes
|
|
1714
|
+
// Filter out trivial symbols that match everything due to lack of structure
|
|
1715
|
+
const allSymbols = [];
|
|
1716
|
+
graph.forEachNode((key, attrs) => {
|
|
1717
|
+
if (attrs.type !== 'symbol') return;
|
|
1718
|
+
if (kind && attrs.kind !== kind) return;
|
|
1719
|
+
allSymbols.push({ key, ...attrs });
|
|
1720
|
+
});
|
|
1721
|
+
|
|
1722
|
+
// Identify which symbols are "non-trivial" (enough structure to be meaningful)
|
|
1723
|
+
const isNonTrivial = (attrs) => {
|
|
1724
|
+
const profile = attrs.astProfile;
|
|
1725
|
+
if (!profile) return false;
|
|
1726
|
+
const bodyLines = (attrs.lineEnd || 0) - (attrs.lineStart || 0);
|
|
1727
|
+
const structuralNodes = Object.entries(profile)
|
|
1728
|
+
.filter(([k]) => k !== '_totalNodes')
|
|
1729
|
+
.reduce((sum, [, v]) => sum + v, 0);
|
|
1730
|
+
// At least 3 structural nodes (ifs, calls, returns, etc.) or 5+ lines
|
|
1731
|
+
return structuralNodes >= 3 || bodyLines >= 5;
|
|
1732
|
+
};
|
|
1733
|
+
|
|
1734
|
+
// Build reference sets per symbol from localRefs (per-function scoped refs from parser)
|
|
1735
|
+
const refSets = new Map();
|
|
1736
|
+
for (const sym of allSymbols) {
|
|
1737
|
+
refSets.set(sym.key, new Set(sym.localRefs || []));
|
|
1738
|
+
}
|
|
1739
|
+
|
|
1740
|
+
// Compute IDF weights for AST node types (rare types are more discriminative)
|
|
1741
|
+
const nodeTypeDocFreq = new Map(); // nodeType -> count of symbols that have it
|
|
1742
|
+
for (const sym of allSymbols) {
|
|
1743
|
+
if (!sym.astProfile) continue;
|
|
1744
|
+
for (const [k, v] of Object.entries(sym.astProfile)) {
|
|
1745
|
+
if (k === '_totalNodes' || v === 0) continue;
|
|
1746
|
+
nodeTypeDocFreq.set(k, (nodeTypeDocFreq.get(k) || 0) + 1);
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1749
|
+
const totalDocs = allSymbols.length;
|
|
1750
|
+
const idfWeights = new Map();
|
|
1751
|
+
for (const [nodeType, docFreq] of nodeTypeDocFreq) {
|
|
1752
|
+
// IDF: log(N / df) — rare types get higher weight
|
|
1753
|
+
idfWeights.set(nodeType, Math.log(totalDocs / docFreq));
|
|
1754
|
+
}
|
|
1755
|
+
|
|
1756
|
+
// Find the target symbol(s) to compare against
|
|
1757
|
+
let targets;
|
|
1758
|
+
if (symbol) {
|
|
1759
|
+
targets = allSymbols.filter(s => {
|
|
1760
|
+
if (s.name !== symbol) return false;
|
|
1761
|
+
if (file && s.file !== file) return false;
|
|
1762
|
+
return true;
|
|
1763
|
+
});
|
|
1764
|
+
if (targets.length === 0) return count ? { total: 0 } : [];
|
|
1765
|
+
} else if (file) {
|
|
1766
|
+
// Compare all symbols in this file against the rest
|
|
1767
|
+
targets = allSymbols.filter(s => s.file === file);
|
|
1768
|
+
if (targets.length === 0) return count ? { total: 0 } : [];
|
|
1769
|
+
} else {
|
|
1770
|
+
return count ? { total: 0 } : [];
|
|
1771
|
+
}
|
|
1772
|
+
|
|
1773
|
+
// Compute similarity for each candidate against each target
|
|
1774
|
+
const results = [];
|
|
1775
|
+
for (const candidate of allSymbols) {
|
|
1776
|
+
// Skip self-matches
|
|
1777
|
+
if (targets.some(t => t.key === candidate.key)) continue;
|
|
1778
|
+
// Skip trivial candidates — they match everything and are noise
|
|
1779
|
+
if (!isNonTrivial(candidate)) continue;
|
|
1780
|
+
|
|
1781
|
+
let bestScore = 0;
|
|
1782
|
+
let bestBreakdown = null;
|
|
1783
|
+
let bestTarget = null;
|
|
1784
|
+
|
|
1785
|
+
for (const target of targets) {
|
|
1786
|
+
const breakdown = this._computeSimilarity(target, candidate, refSets, idfWeights);
|
|
1787
|
+
if (breakdown.total > bestScore) {
|
|
1788
|
+
bestScore = breakdown.total;
|
|
1789
|
+
bestBreakdown = breakdown;
|
|
1790
|
+
bestTarget = target;
|
|
1791
|
+
}
|
|
1792
|
+
}
|
|
1793
|
+
|
|
1794
|
+
if (bestScore >= threshold) {
|
|
1795
|
+
results.push({
|
|
1796
|
+
symbol: candidate.name,
|
|
1797
|
+
file: candidate.file,
|
|
1798
|
+
line: candidate.lineStart,
|
|
1799
|
+
signature: candidate.signature,
|
|
1800
|
+
score: Math.round(bestScore * 100) / 100,
|
|
1801
|
+
matchedWith: bestTarget ? `${bestTarget.file}::${bestTarget.name}` : null,
|
|
1802
|
+
breakdown: bestBreakdown,
|
|
1803
|
+
});
|
|
1804
|
+
}
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1807
|
+
// Sort by score descending
|
|
1808
|
+
results.sort((a, b) => b.score - a.score);
|
|
1809
|
+
|
|
1810
|
+
if (count) return { total: results.length };
|
|
1811
|
+
return paginate(results, { offset, limit }).items;
|
|
1812
|
+
}
|
|
1813
|
+
|
|
1814
|
+
/**
|
|
1815
|
+
* Compute multi-signal similarity between two symbols.
|
|
1816
|
+
* Returns { total, astShape, refOverlap, paramOverlap, nameScore }
|
|
1817
|
+
*/
|
|
1818
|
+
_computeSimilarity(a, b, refSets, idfWeights = null) {
|
|
1819
|
+
const astShape = this._astProfileSimilarity(a.astProfile, b.astProfile, idfWeights);
|
|
1820
|
+
const refOverlap = this._setOverlap(refSets.get(a.key), refSets.get(b.key), 2);
|
|
1821
|
+
const paramOverlap = this._paramSimilarity(a.paramNames, b.paramNames);
|
|
1822
|
+
const nameScore = this._nameSimilarity(a.name, b.name);
|
|
1823
|
+
|
|
1824
|
+
// Weighted combination — AST shape is most important, refs second
|
|
1825
|
+
const total = (
|
|
1826
|
+
astShape * 0.40 +
|
|
1827
|
+
refOverlap * 0.30 +
|
|
1828
|
+
paramOverlap * 0.15 +
|
|
1829
|
+
nameScore * 0.15
|
|
1830
|
+
);
|
|
1831
|
+
|
|
1832
|
+
return {
|
|
1833
|
+
total: Math.round(total * 100) / 100,
|
|
1834
|
+
astShape: Math.round(astShape * 100) / 100,
|
|
1835
|
+
refOverlap: Math.round(refOverlap * 100) / 100,
|
|
1836
|
+
paramOverlap: Math.round(paramOverlap * 100) / 100,
|
|
1837
|
+
nameScore: Math.round(nameScore * 100) / 100,
|
|
1838
|
+
};
|
|
1839
|
+
}
|
|
1840
|
+
|
|
1841
|
+
/**
|
|
1842
|
+
* Cosine similarity between two AST profile vectors.
|
|
1843
|
+
* Ignores _totalNodes (used separately for size gating).
|
|
1844
|
+
*/
|
|
1845
|
+
_astProfileSimilarity(a, b, idfWeights = null) {
|
|
1846
|
+
if (!a || !b) return 0;
|
|
1847
|
+
|
|
1848
|
+
// Collect all keys (excluding _totalNodes)
|
|
1849
|
+
const keys = new Set([
|
|
1850
|
+
...Object.keys(a).filter(k => k !== '_totalNodes'),
|
|
1851
|
+
...Object.keys(b).filter(k => k !== '_totalNodes'),
|
|
1852
|
+
]);
|
|
1853
|
+
if (keys.size === 0) return 0;
|
|
1854
|
+
|
|
1855
|
+
// Size ratio penalty: very different sized functions get dampened
|
|
1856
|
+
const sizeA = a._totalNodes || 1;
|
|
1857
|
+
const sizeB = b._totalNodes || 1;
|
|
1858
|
+
const sizeRatio = Math.min(sizeA, sizeB) / Math.max(sizeA, sizeB);
|
|
1859
|
+
// Only penalize extreme size differences (>10x)
|
|
1860
|
+
const sizePenalty = sizeRatio < 0.1 ? sizeRatio * 2 : 1;
|
|
1861
|
+
|
|
1862
|
+
// If both profiles have very few distinct node types, similarity is unreliable.
|
|
1863
|
+
// Two functions both having {call_expression: 1, return_statement: 1} is not meaningful.
|
|
1864
|
+
const distinctA = Object.keys(a).filter(k => k !== '_totalNodes').length;
|
|
1865
|
+
const distinctB = Object.keys(b).filter(k => k !== '_totalNodes').length;
|
|
1866
|
+
const minDistinct = Math.min(distinctA, distinctB);
|
|
1867
|
+
// Penalize low-diversity profiles
|
|
1868
|
+
const diversityPenalty = minDistinct <= 2 ? 0.4 : minDistinct <= 3 ? 0.7 : 1.0;
|
|
1869
|
+
|
|
1870
|
+
// Normalize counts to proportions, then apply IDF weighting.
|
|
1871
|
+
// IDF makes rare node types (try_statement, list_comprehension) more
|
|
1872
|
+
// discriminative than ubiquitous ones (call, return_statement).
|
|
1873
|
+
const normalize = (profile) => {
|
|
1874
|
+
const total = Object.entries(profile)
|
|
1875
|
+
.filter(([k]) => k !== '_totalNodes')
|
|
1876
|
+
.reduce((sum, [, v]) => sum + v, 0) || 1;
|
|
1877
|
+
const result = {};
|
|
1878
|
+
for (const k of keys) {
|
|
1879
|
+
const proportion = (profile[k] || 0) / total;
|
|
1880
|
+
const idf = (idfWeights && idfWeights.has(k)) ? idfWeights.get(k) : 1;
|
|
1881
|
+
result[k] = proportion * idf;
|
|
1882
|
+
}
|
|
1883
|
+
return result;
|
|
1884
|
+
};
|
|
1885
|
+
|
|
1886
|
+
const na = normalize(a);
|
|
1887
|
+
const nb = normalize(b);
|
|
1888
|
+
|
|
1889
|
+
// Cosine similarity
|
|
1890
|
+
let dot = 0, magA = 0, magB = 0;
|
|
1891
|
+
for (const k of keys) {
|
|
1892
|
+
dot += na[k] * nb[k];
|
|
1893
|
+
magA += na[k] * na[k];
|
|
1894
|
+
magB += nb[k] * nb[k];
|
|
1895
|
+
}
|
|
1896
|
+
const denom = Math.sqrt(magA) * Math.sqrt(magB);
|
|
1897
|
+
const cosine = denom > 0 ? dot / denom : 0;
|
|
1898
|
+
|
|
1899
|
+
return cosine * sizePenalty * diversityPenalty;
|
|
1900
|
+
}
|
|
1901
|
+
|
|
1902
|
+
/**
|
|
1903
|
+
* Jaccard similarity between two sets.
|
|
1904
|
+
* @param {number} [minSize=0] - Minimum set size for overlap to count
|
|
1905
|
+
*/
|
|
1906
|
+
_setOverlap(a, b, minSize = 0) {
|
|
1907
|
+
if (!a || !b || a.size === 0 || b.size === 0) return 0;
|
|
1908
|
+
if (a.size < minSize && b.size < minSize) return 0;
|
|
1909
|
+
let intersection = 0;
|
|
1910
|
+
for (const item of a) {
|
|
1911
|
+
if (b.has(item)) intersection++;
|
|
1912
|
+
}
|
|
1913
|
+
const union = a.size + b.size - intersection;
|
|
1914
|
+
return union > 0 ? intersection / union : 0;
|
|
1915
|
+
}
|
|
1916
|
+
|
|
1917
|
+
/**
|
|
1918
|
+
* Parameter name similarity: Jaccard overlap on lowercased param names.
|
|
1919
|
+
*/
|
|
1920
|
+
_paramSimilarity(a, b) {
|
|
1921
|
+
if (!a || !b || a.length === 0 || b.length === 0) return 0;
|
|
1922
|
+
const setA = new Set(a.map(p => p.toLowerCase()));
|
|
1923
|
+
const setB = new Set(b.map(p => p.toLowerCase()));
|
|
1924
|
+
// Remove 'self', 'cls', 'this' — they're noise
|
|
1925
|
+
for (const noise of ['self', 'cls', 'this']) {
|
|
1926
|
+
setA.delete(noise);
|
|
1927
|
+
setB.delete(noise);
|
|
1928
|
+
}
|
|
1929
|
+
if (setA.size === 0 || setB.size === 0) return 0;
|
|
1930
|
+
return this._setOverlap(setA, setB);
|
|
1931
|
+
}
|
|
1932
|
+
|
|
1933
|
+
/**
|
|
1934
|
+
* Name similarity: tokenize camelCase/snake_case names, compute Jaccard overlap.
|
|
1935
|
+
*/
|
|
1936
|
+
_nameSimilarity(a, b) {
|
|
1937
|
+
if (!a || !b) return 0;
|
|
1938
|
+
if (a === b) return 1;
|
|
1939
|
+
|
|
1940
|
+
const tokenize = (name) => {
|
|
1941
|
+
// Split on _ and camelCase boundaries, lowercase
|
|
1942
|
+
return name
|
|
1943
|
+
.replace(/([a-z])([A-Z])/g, '$1_$2')
|
|
1944
|
+
.toLowerCase()
|
|
1945
|
+
.split(/[_\s]+/)
|
|
1946
|
+
.filter(t => t.length > 1); // drop single-char tokens
|
|
1947
|
+
};
|
|
1948
|
+
|
|
1949
|
+
const tokA = new Set(tokenize(a));
|
|
1950
|
+
const tokB = new Set(tokenize(b));
|
|
1951
|
+
if (tokA.size === 0 || tokB.size === 0) return 0;
|
|
1952
|
+
|
|
1953
|
+
return this._setOverlap(tokA, tokB);
|
|
1954
|
+
}
|
|
1955
|
+
|
|
1494
1956
|
/**
|
|
1495
1957
|
* Force a full rebuild.
|
|
1496
1958
|
*/
|
package/src/parser.js
CHANGED
|
@@ -246,6 +246,99 @@ const KIND_MAP = {
|
|
|
246
246
|
decorated_definition: 'function',
|
|
247
247
|
};
|
|
248
248
|
|
|
249
|
+
/**
|
|
250
|
+
* Walk an AST subtree and count node types that reveal structural shape.
|
|
251
|
+
* Returns a flat object like { if_statement: 3, for_statement: 1, call_expression: 7, ... }
|
|
252
|
+
* This is intentionally coarse — we want "shape" not identity.
|
|
253
|
+
*/
|
|
254
|
+
const STRUCTURAL_NODE_TYPES = new Set([
|
|
255
|
+
// Control flow
|
|
256
|
+
'if_statement', 'if_expression', 'elif_clause', 'else_clause',
|
|
257
|
+
'for_statement', 'for_in_statement', 'for_expression',
|
|
258
|
+
'while_statement', 'loop_expression',
|
|
259
|
+
'match_statement', 'match_expression', 'switch_statement', 'case_clause',
|
|
260
|
+
'try_statement', 'try_expression', 'except_clause', 'catch_clause', 'finally_clause',
|
|
261
|
+
'with_statement',
|
|
262
|
+
// Returns / yields
|
|
263
|
+
'return_statement', 'yield', 'yield_expression', 'await_expression',
|
|
264
|
+
// Calls & access
|
|
265
|
+
'call_expression', 'call', 'method_call_expression',
|
|
266
|
+
'member_expression', 'attribute', 'subscript_expression', 'subscript',
|
|
267
|
+
// Assignments
|
|
268
|
+
'assignment', 'assignment_expression', 'augmented_assignment',
|
|
269
|
+
// Data structures
|
|
270
|
+
'list', 'list_comprehension', 'dictionary', 'dictionary_comprehension',
|
|
271
|
+
'array', 'object', 'tuple',
|
|
272
|
+
// Assertions / raises
|
|
273
|
+
'assert_statement', 'raise_statement', 'throw_statement',
|
|
274
|
+
// Boolean logic
|
|
275
|
+
'boolean_operator', 'binary_expression', 'comparison_operator', 'not_operator',
|
|
276
|
+
// Conditionals
|
|
277
|
+
'conditional_expression', 'ternary_expression',
|
|
278
|
+
// String operations
|
|
279
|
+
'string', 'f_string', 'template_string',
|
|
280
|
+
// Decorators
|
|
281
|
+
'decorator',
|
|
282
|
+
]);
|
|
283
|
+
|
|
284
|
+
function buildAstProfile(node) {
|
|
285
|
+
const profile = {};
|
|
286
|
+
let totalNodes = 0;
|
|
287
|
+
|
|
288
|
+
function walk(n) {
|
|
289
|
+
if (STRUCTURAL_NODE_TYPES.has(n.type)) {
|
|
290
|
+
profile[n.type] = (profile[n.type] || 0) + 1;
|
|
291
|
+
}
|
|
292
|
+
totalNodes++;
|
|
293
|
+
for (let i = 0; i < n.namedChildCount; i++) {
|
|
294
|
+
walk(n.namedChild(i));
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
walk(node);
|
|
299
|
+
profile._totalNodes = totalNodes;
|
|
300
|
+
return profile;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Extract parameter names from a function's tree-sitter node.
|
|
305
|
+
* Works across languages by looking for common parameter node patterns.
|
|
306
|
+
*/
|
|
307
|
+
function extractParamNames(node) {
|
|
308
|
+
const params = [];
|
|
309
|
+
// Find the parameter list node
|
|
310
|
+
const paramNodes = [];
|
|
311
|
+
for (let i = 0; i < node.namedChildCount; i++) {
|
|
312
|
+
const child = node.namedChild(i);
|
|
313
|
+
if (child.type === 'parameters' || child.type === 'formal_parameters' ||
|
|
314
|
+
child.type === 'parameter_list') {
|
|
315
|
+
paramNodes.push(child);
|
|
316
|
+
}
|
|
317
|
+
// Drill into wrappers (e.g. variable_declarator -> arrow_function)
|
|
318
|
+
for (let j = 0; j < child.namedChildCount; j++) {
|
|
319
|
+
const gc = child.namedChild(j);
|
|
320
|
+
if (gc.type === 'parameters' || gc.type === 'formal_parameters' ||
|
|
321
|
+
gc.type === 'parameter_list') {
|
|
322
|
+
paramNodes.push(gc);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
for (const paramList of paramNodes) {
|
|
328
|
+
for (let i = 0; i < paramList.namedChildCount; i++) {
|
|
329
|
+
const p = paramList.namedChild(i);
|
|
330
|
+
// Try to get the identifier name from various param shapes
|
|
331
|
+
const nameNode = p.childForFieldName('name') || p.childForFieldName('pattern');
|
|
332
|
+
if (nameNode && nameNode.type === 'identifier') {
|
|
333
|
+
params.push(nameNode.text);
|
|
334
|
+
} else if (p.type === 'identifier') {
|
|
335
|
+
params.push(p.text);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
return params;
|
|
340
|
+
}
|
|
341
|
+
|
|
249
342
|
/**
|
|
250
343
|
* Find the body/block node of a definition, drilling into wrappers like
|
|
251
344
|
* lexical_declaration → variable_declarator → arrow_function → body.
|
|
@@ -340,6 +433,11 @@ function parseFile(filePath, source) {
|
|
|
340
433
|
bodyStartLine = bodyRow === defRow ? bodyRow + 2 : bodyRow + 1; // 1-indexed
|
|
341
434
|
}
|
|
342
435
|
|
|
436
|
+
// Build AST profile from function body (or whole node if no body)
|
|
437
|
+
const profileNode = bodyNode || defNode.node;
|
|
438
|
+
const astProfile = buildAstProfile(profileNode);
|
|
439
|
+
const paramNames = extractParamNames(defNode.node);
|
|
440
|
+
|
|
343
441
|
definitions.push({
|
|
344
442
|
name: nameCapture.node.text,
|
|
345
443
|
kind: nodeKind(defNode.node.type),
|
|
@@ -348,6 +446,8 @@ function parseFile(filePath, source) {
|
|
|
348
446
|
lineEnd: defNode.node.endPosition.row + 1,
|
|
349
447
|
signature: extractSignature(defNode.node, langName),
|
|
350
448
|
bodyStartLine,
|
|
449
|
+
astProfile,
|
|
450
|
+
paramNames,
|
|
351
451
|
});
|
|
352
452
|
}
|
|
353
453
|
} catch (e) {
|
|
@@ -373,9 +473,28 @@ function parseFile(filePath, source) {
|
|
|
373
473
|
}
|
|
374
474
|
}
|
|
375
475
|
|
|
476
|
+
// Associate each reference with its enclosing definition (by line range).
|
|
477
|
+
// This gives us per-function reference sets for similarity analysis.
|
|
478
|
+
// Sort definitions by lineStart for binary search.
|
|
479
|
+
const sortedDefs = [...definitions].sort((a, b) => a.lineStart - b.lineStart);
|
|
480
|
+
for (const ref of references) {
|
|
481
|
+
// Find the innermost enclosing definition
|
|
482
|
+
let enclosing = null;
|
|
483
|
+
for (const def of sortedDefs) {
|
|
484
|
+
if (ref.line >= def.lineStart && ref.line <= def.lineEnd) {
|
|
485
|
+
// Pick innermost (last matching, since sorted by start and nested defs start later)
|
|
486
|
+
enclosing = def;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
if (enclosing) {
|
|
490
|
+
if (!enclosing.localRefs) enclosing.localRefs = [];
|
|
491
|
+
enclosing.localRefs.push(ref.name);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
376
495
|
// No tree.delete()/parser.delete() needed — native GC handles cleanup
|
|
377
496
|
|
|
378
497
|
return { file: filePath, definitions, references };
|
|
379
498
|
}
|
|
380
499
|
|
|
381
|
-
export { parseFile, SUPPORTED_EXTENSIONS, LANG_MAP };
|
|
500
|
+
export { parseFile, buildAstProfile, extractParamNames, SUPPORTED_EXTENSIONS, LANG_MAP };
|