npm - @mishasinitcyn/betterrank - Versions diffs - 0.2.3 → 0.2.5 - Mend

@mishasinitcyn/betterrank 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -31,9 +31,12 @@ betterrank callers authenticateUser --root /path/to/project --context
 # Everything about a function: source, types, deps, callers
 betterrank context calculate_bid --root /path/to/project
-# Trace the full call chain from entry point to function
+# Trace the full call chain from entry point to function (upward)
 betterrank trace calculate_bid --root /path/to/project
+# What does a function call, recursively? (downward)
+betterrank callees calculate_bid --root /path/to/project
 # What symbols changed and what might break?
 betterrank diff --root /path/to/project
@@ -211,7 +214,7 @@ calculate_bid (src/engine/bidding.py:489-718)
   5d236d3  2026-02-06  feat: wire ad_position to ValuePredictor
 ```
-### `trace` — Recursive caller chain
+### `trace` — Recursive caller chain (upward)
 Walk UP the call graph from a symbol to see the full path from entry points to your function. At each hop, resolves which function in the caller file contains the call site.
@@ -228,6 +231,26 @@ calculate_bid (src/engine/bidding.py:489)
       ← app (src/main.py:45)
 ```
+### `callees` — Recursive callee chain (downward)
+Walk DOWN the call graph from a symbol to see everything it calls, transitively. The mirror of `trace`. Use before refactoring to understand downstream dependencies.
+```bash
+betterrank callees calculate_bid --root /path/to/project
+betterrank callees calculate_bid --root /path/to/project --depth 5
+```
+**Example output:**
+```
+calculate_bid (src/engine/bidding.py:489)
+  → from_microdollars (src/core/currency.py:108)
+  → get_config (src/engine/predictor/config.py:316)
+    → load_yaml (src/core/config.py:22)
+  → get_value_predictor (src/engine/predictor/persistence.py:123)
+```
+Use both together for a full "sandwich view" of a function — who calls it (upstream) and what it touches (downstream).
 ### `diff` — Git-aware blast radius
 Shows which symbols changed in the working tree and how many external files call each changed symbol. Compares current disk state against a git ref.
@@ -315,6 +338,7 @@ const idx = new CodeIndex('/path/to/project');
 const map = await idx.map({ limit: 100, focusFiles: ['src/main.ts'] });
 const results = await idx.search({ query: 'auth', kind: 'function', limit: 10 });
 const callers = await idx.callers({ symbol: 'authenticate', context: 2 });
+const tree = await idx.callees({ symbol: 'authenticate', depth: 3 });
 const counts = await idx.getCallerCounts('src/auth.ts');
 const deps = await idx.dependencies({ file: 'src/auth.ts' });
 const dependents = await idx.dependents({ file: 'src/auth.ts' });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mishasinitcyn/betterrank",
-  "version": "0.2.3",
+  "version": "0.2.5",
   "description": "Structural code index with PageRank-ranked repo maps, symbol search, call-graph queries, and dependency analysis. Built on tree-sitter and graphology.",
   "type": "module",
   "main": "src/index.js",

package/src/cli.js CHANGED Viewed

@@ -20,12 +20,14 @@ Commands:
   callers     <symbol> [--file path] [--context]     All call sites (ranked, with context lines)
   context     <symbol> [--file path]                 Full context: source, deps, types, callers
   history     <symbol> [--file path]                Git history of a specific function
-  trace       <symbol> [--depth N]                  Recursive caller chain (call tree)
+  trace       <symbol> [--depth N]                  Recursive caller chain (upward)
+  callees     <symbol> [--depth N]                  Recursive callee chain (downward)
   diff        [--ref <commit>]                      Git-aware blast radius (changed symbols + callers)
   deps        <file>                                What this file imports (ranked)
   dependents  <file>                                What imports this file (ranked)
   neighborhood <file> [--hops N] [--max-files N]    Local subgraph (ranked by PageRank)
   orphans     [--level file|symbol] [--kind type]   Find disconnected files/symbols
+  compare     <pathA> <pathB>                        Structural diff between two files/dirs
   reindex                                           Force full rebuild
   stats                                             Index statistics
@@ -269,6 +271,27 @@ Examples:
   betterrank orphans --level symbol --kind function --root .
   betterrank orphans --count --root .`,
+  compare: `betterrank compare <pathA> <pathB> [--kind type] [--include-tests]
+Structural diff between two files or directories. Shows which symbols
+exist in both, which are unique to each side, and how their signatures
+and dependencies differ. No scores — just deterministic structural facts.
+For directories: also shows file-level overlap (shared basenames).
+By default filters out test files and generic names (get, set, __init__, etc.)
+to focus on meaningful structural overlap.
+Options:
+  --kind <type>         Filter to: function, class, type, variable
+  --include-tests       Include test files and test_ functions
+  --limit N             Max items per section (default: 30)
+Examples:
+  betterrank compare src/auth.py lib/auth.py
+  betterrank compare ./repo-a ./repo-b
+  betterrank compare ./repo-a ./repo-b --kind function
+  betterrank compare flask/app.py bottle/bottle.py --kind class`,
   reindex: `betterrank reindex [--root <path>]
 Force a full rebuild of the index. Use after branch switches, large merges,
@@ -328,6 +351,126 @@ async function main() {
     return; // Keep process alive (server is listening)
   }
+  // Compare command — standalone, doesn't need CodeIndex
+  if (command === 'compare') {
+    const pathA = flags._positional[0];
+    const pathB = flags._positional[1];
+    if (!pathA || !pathB) {
+      console.error('Usage: betterrank compare <pathA> <pathB> [--kind type]');
+      process.exit(1);
+    }
+    const absA = resolve(pathA);
+    const absB = resolve(pathB);
+    const { compare } = await import('./compare.js');
+    const includeTests = flags['include-tests'] === true;
+    const countMode = flags.count === true;
+    let result;
+    try {
+      result = await compare(absA, absB, { kind: flags.kind, includeTests });
+    } catch (err) {
+      console.error(err.message);
+      process.exit(1);
+    }
+    // --count mode: just print totals
+    if (countMode) {
+      const sm = result.summary;
+      console.log(`shared: ${sm.sharedNames}`);
+      console.log(`only_a: ${sm.onlyACount}`);
+      console.log(`only_b: ${sm.onlyBCount}`);
+      console.log(`total_a: ${sm.totalA}`);
+      console.log(`total_b: ${sm.totalB}`);
+      return;
+    }
+    const limit = flags.limit !== undefined ? parseInt(flags.limit, 10) : DEFAULT_LIMIT;
+    const off = flags.offset !== undefined ? parseInt(flags.offset, 10) : 0;
+    // Helper: paginate a list and print a range header
+    const paginateSection = (items, label) => {
+      const total = items.length;
+      if (total === 0) return [];
+      const shown = items.slice(off, off + limit);
+      if (shown.length === 0) {
+        console.log(`\n── ${label} (${total}) ── (offset ${off} exceeds ${total} results)`);
+        return [];
+      }
+      const rangeStr = total > limit || off > 0 ? `, showing ${off + 1}-${off + shown.length}` : '';
+      console.log(`\n── ${label} (${total}${rangeStr}) ──`);
+      return shown;
+    };
+    // Warn if either side had zero symbols
+    if (result.summary.totalA === 0) {
+      process.stderr.write(`⚠ No parseable symbols found in A: ${result.labelA}\n`);
+    }
+    if (result.summary.totalB === 0) {
+      process.stderr.write(`⚠ No parseable symbols found in B: ${result.labelB}\n`);
+    }
+    // Summary first (most useful at a glance)
+    const sm = result.summary;
+    console.log(`── Summary ──`);
+    console.log(`  A: ${result.labelA} (${sm.totalA} symbols)`);
+    console.log(`  B: ${result.labelB} (${sm.totalB} symbols)`);
+    console.log(`  Shared names: ${sm.sharedNames}  |  Only A: ${sm.onlyACount}  |  Only B: ${sm.onlyBCount}`);
+    // File-level overlap (directory mode)
+    if (result.isDirectoryMode) {
+      console.log(`\n── Files ──`);
+      console.log(`  A: ${result.files.totalA} files  |  B: ${result.files.totalB} files`);
+      if (result.files.shared.length > 0) {
+        const fileList = result.files.shared.length > 15
+          ? result.files.shared.slice(0, 15).join(', ') + ` (+${result.files.shared.length - 15} more)`
+          : result.files.shared.join(', ');
+        console.log(`  Shared basenames (${result.files.shared.length}): ${fileList}`);
+      }
+    }
+    // Shared symbols — compact grouped format, sorted by sharedRefs
+    const sharedShown = paginateSection(result.shared, 'Shared symbols');
+    for (const s of sharedShown) {
+      const kinds = new Set([...s.inA.map(d => d.kind), ...s.inB.map(d => d.kind)]);
+      const kindStr = [...kinds].join('/');
+      const refTag = s.sharedRefs.length > 0 ? `  ${s.sharedRefs.length} shared refs` : '';
+      console.log(`  ${s.name}  [${kindStr}]  A:${s.inA.length} def${s.inA.length > 1 ? 's' : ''}  B:${s.inB.length} def${s.inB.length > 1 ? 's' : ''}${refTag}`);
+      for (const d of s.inA.slice(0, 2)) {
+        console.log(`    A: ${d.file}:${d.line}  ${d.signature}`);
+      }
+      if (s.inA.length > 2) console.log(`    A: ... and ${s.inA.length - 2} more`);
+      for (const d of s.inB.slice(0, 2)) {
+        console.log(`    B: ${d.file}:${d.line}  ${d.signature}`);
+      }
+      if (s.inB.length > 2) console.log(`    B: ... and ${s.inB.length - 2} more`);
+      if (s.sharedRefs.length > 0) {
+        console.log(`    Shared refs: ${s.sharedRefs.slice(0, 10).join(', ')}${s.sharedRefs.length > 10 ? ` (+${s.sharedRefs.length - 10} more)` : ''}`);
+      }
+    }
+    if (result.shared.length > off + limit) {
+      console.log(`  (use --offset ${off + limit} to see more)`);
+    }
+    // Only in A
+    const onlyAShown = paginateSection(result.onlyA, 'Only in A');
+    for (const s of onlyAShown) {
+      console.log(`  [${s.kind}] ${s.file}:${s.line}  ${s.signature}`);
+    }
+    if (result.onlyA.length > off + limit) {
+      console.log(`  (use --offset ${off + limit} to see more)`);
+    }
+    // Only in B
+    const onlyBShown = paginateSection(result.onlyB, 'Only in B');
+    for (const s of onlyBShown) {
+      console.log(`  [${s.kind}] ${s.file}:${s.line}  ${s.signature}`);
+    }
+    if (result.onlyB.length > off + limit) {
+      console.log(`  (use --offset ${off + limit} to see more)`);
+    }
+    return;
+  }
   // Outline command — standalone by default, needs CodeIndex for --annotate
   if (command === 'outline') {
     const filePath = flags._positional[0];
@@ -661,6 +804,27 @@ async function main() {
       break;
     }
+    case 'callees': {
+      const symbol = flags._positional[0];
+      if (!symbol) { console.error('Usage: betterrank callees <symbol> [--depth N]'); process.exit(1); }
+      const calleesDepth = flags.depth ? parseInt(flags.depth, 10) : 3;
+      const tree = await idx.callees({ symbol, file: normalizeFilePath(flags.file), depth: calleesDepth });
+      if (!tree) {
+        console.log(`(symbol "${symbol}" not found)`);
+      } else {
+        const printNode = (node, depth) => {
+          const indent = depth === 0 ? '' : '  '.repeat(depth) + '→ ';
+          const loc = `(${node.file}:${node.line || '?'})`;
+          console.log(`${indent}${node.name} ${loc}`);
+          for (const callee of node.callees) {
+            printNode(callee, depth + 1);
+          }
+        };
+        printNode(tree, 0);
+      }
+      break;
+    }
     case 'diff': {
       const result = await idx.diff({ ref: flags.ref || 'HEAD' });
       if (result.error) {
@@ -838,6 +1002,12 @@ async function main() {
       break;
     }
+    case 'similar': {
+      console.error('The "similar" command has been replaced by "compare".');
+      console.error('Usage: betterrank compare <pathA> <pathB>');
+      process.exit(1);
+    }
     case 'reindex': {
       const t0 = Date.now();
       const result = await idx.reindex();

package/src/compare.js ADDED Viewed

@@ -0,0 +1,288 @@
+import { readFile, stat as fsStat } from 'fs/promises';
+import { glob } from 'glob';
+import { join, relative, basename } from 'path';
+import { parseFile, SUPPORTED_EXTENSIONS } from './parser.js';
+const IGNORE_PATTERNS = [
+  '**/node_modules/**', '**/.npm/**', '**/.yarn/**', '**/.pnp.*',
+  '**/bower_components/**', '**/*.min.js', '**/*.bundle.js', '**/*.map',
+  '**/__pycache__/**', '**/.venv/**', '**/venv/**', '**/env/**',
+  '**/.env/**', '**/.virtualenvs/**', '**/site-packages/**',
+  '**/*.egg-info/**', '**/.eggs/**', '**/dist/**', '**/build/**',
+  '**/.git/**', '**/.svn/**', '**/.hg/**',
+  '**/vendor/**', '**/tmp/**', '**/temp/**',
+  '**/.idea/**', '**/.vscode/**', '**/.DS_Store',
+  '**/Pods/**', '**/DerivedData/**',
+];
+// Names too generic to be meaningful matches across codebases.
+// These are filtered on EXACT name match only — `processEvent` survives,
+// only bare `process` is dropped.
+const NOISE_NAMES = new Set([
+  // Ultra-common function names
+  'get', 'set', 'run', 'main', 'init', 'setup', 'start', 'stop',
+  'open', 'close', 'read', 'write', 'delete', 'update', 'create',
+  'add', 'remove', 'clear', 'reset', 'test', 'check', 'load',
+  'toString', 'toJSON', 'valueOf', 'hash', 'eq', 'repr', 'str',
+  'copy', 'keys', 'values', 'items', 'pop', 'push', 'append',
+  'default', 'setdefault', 'apply', 'call', 'bind',
+  'map', 'filter', 'reduce', 'format', 'parse', 'validate',
+  'serialize', 'deserialize', 'configure', 'connect',
+  // Python dunders
+  '__init__', '__repr__', '__str__', '__eq__', '__hash__',
+  '__enter__', '__exit__', '__iter__', '__next__', '__len__',
+  '__getitem__', '__setitem__', '__delitem__', '__contains__',
+  '__call__', '__bool__', '__getattr__', '__setattr__', '__delattr__',
+  '__get__', '__set__', '__delete__',
+  // JS common
+  'constructor', 'render', 'process', 'handle', 'execute',
+  // Single-char and trivially short names
+  'a', 'b', 'c', 'd', 'e', 'f', 'x', 'y', 'n', 'i', 'j', 'k',
+  // Common test fixture names
+  'foo', 'bar', 'baz', 'wrapper', 'decorator', 'callback',
+  'index', 'app', 'client', 'response', 'request',
+]);
+async function scanAndParse(dirPath) {
+  const pattern = `**/*{${SUPPORTED_EXTENSIONS.join(',')}}`;
+  const files = await glob(pattern, {
+    cwd: dirPath,
+    ignore: IGNORE_PATTERNS,
+    absolute: true,
+    nodir: true,
+  });
+  const results = [];
+  for (const absPath of files) {
+    const relPath = relative(dirPath, absPath);
+    try {
+      const source = await readFile(absPath, 'utf-8');
+      const result = parseFile(relPath, source);
+      if (result) results.push(result);
+    } catch {
+      // skip unparseable files
+    }
+  }
+  return results;
+}
+async function parseSingleFile(filePath) {
+  const source = await readFile(filePath, 'utf-8');
+  const result = parseFile(basename(filePath), source);
+  return result ? [result] : [];
+}
+function extractSymbols(parseResults) {
+  const symbols = [];
+  for (const fileResult of parseResults) {
+    for (const def of fileResult.definitions) {
+      symbols.push({
+        name: def.name,
+        kind: def.kind,
+        file: fileResult.file,
+        lineStart: def.lineStart,
+        lineEnd: def.lineEnd,
+        signature: def.signature,
+        paramCount: (def.paramNames || []).length,
+        paramNames: def.paramNames || [],
+        localRefs: def.localRefs || [],
+        bodyLines: (def.lineEnd || 0) - (def.lineStart || 0),
+      });
+    }
+  }
+  return symbols;
+}
+// Test file detection
+const TEST_SEGMENTS = ['test/', 'tests/', '__tests__/', 'spec/', 'specs/', 'conftest'];
+function isTestFile(file) {
+  const lower = file.toLowerCase();
+  return TEST_SEGMENTS.some(s => lower.includes(s)) || basename(file).startsWith('test_');
+}
+/**
+ * Compare two codebases (files or directories).
+ *
+ * Returns deterministic structural facts grouped by symbol name.
+ * Filters out noise (test_ prefixes, dunders, trivially generic names).
+ *
+ * Shared symbols are ranked by sharedRefs count (how many internal
+ * function calls they have in common — the strongest signal for
+ * "these are likely doing the same thing").
+ */
+async function compare(pathA, pathB, { kind, includeTests = false } = {}) {
+  // Validate paths exist
+  let statA, statB;
+  try {
+    statA = await fsStat(pathA);
+  } catch {
+    throw new Error(`Path A does not exist: ${pathA}`);
+  }
+  try {
+    statB = await fsStat(pathB);
+  } catch {
+    throw new Error(`Path B does not exist: ${pathB}`);
+  }
+  const parseResultsA = statA.isDirectory()
+    ? await scanAndParse(pathA)
+    : await parseSingleFile(pathA);
+  const parseResultsB = statB.isDirectory()
+    ? await scanAndParse(pathB)
+    : await parseSingleFile(pathB);
+  let symbolsA = extractSymbols(parseResultsA);
+  let symbolsB = extractSymbols(parseResultsB);
+  // Apply kind filter
+  if (kind) {
+    symbolsA = symbolsA.filter(s => s.kind === kind);
+    symbolsB = symbolsB.filter(s => s.kind === kind);
+  }
+  // Filter out test functions, test files, and noise unless explicitly included
+  const isSignificant = (s) => {
+    if (!includeTests && s.name.startsWith('test_')) return false;
+    if (!includeTests && s.name.startsWith('Test')) return false;
+    if (!includeTests && isTestFile(s.file)) return false;
+    if (NOISE_NAMES.has(s.name)) return false;
+    return true;
+  };
+  symbolsA = symbolsA.filter(isSignificant);
+  symbolsB = symbolsB.filter(isSignificant);
+  // Deduplicate (same name+file+line can appear from overlapping tree-sitter captures)
+  const dedup = (syms) => {
+    const seen = new Set();
+    return syms.filter(s => {
+      const key = `${s.name}::${s.file}::${s.lineStart}`;
+      if (seen.has(key)) return false;
+      seen.add(key);
+      return true;
+    });
+  };
+  symbolsA = dedup(symbolsA);
+  symbolsB = dedup(symbolsB);
+  // Group by name
+  const byNameA = new Map();
+  for (const s of symbolsA) {
+    if (!byNameA.has(s.name)) byNameA.set(s.name, []);
+    byNameA.get(s.name).push(s);
+  }
+  const byNameB = new Map();
+  for (const s of symbolsB) {
+    if (!byNameB.has(s.name)) byNameB.set(s.name, []);
+    byNameB.get(s.name).push(s);
+  }
+  // Shared: names that exist in both. Group all definitions under one entry.
+  const shared = [];
+  for (const [name, symsA] of byNameA) {
+    if (!byNameB.has(name)) continue;
+    const symsB = byNameB.get(name);
+    // Collect all local refs across all definitions of this name
+    const allRefsA = new Set();
+    const allRefsB = new Set();
+    for (const s of symsA) for (const r of s.localRefs) allRefsA.add(r);
+    for (const s of symsB) for (const r of s.localRefs) allRefsB.add(r);
+    const sharedRefs = [...allRefsA].filter(r => allRefsB.has(r));
+    // Check if any pair has matching kind and similar param count
+    const sameKind = symsA.some(a => symsB.some(b => a.kind === b.kind));
+    const sameParamCount = symsA.some(a => symsB.some(b => a.paramCount === b.paramCount));
+    shared.push({
+      name,
+      inA: symsA.map(s => ({ kind: s.kind, file: s.file, line: s.lineStart, signature: s.signature, paramCount: s.paramCount, bodyLines: s.bodyLines })),
+      inB: symsB.map(s => ({ kind: s.kind, file: s.file, line: s.lineStart, signature: s.signature, paramCount: s.paramCount, bodyLines: s.bodyLines })),
+      sharedRefs,
+      sameKind,
+      sameParamCount,
+    });
+  }
+  // Sort shared by sharedRefs count (strongest consolidation signal),
+  // then by whether kind/params match, then alphabetically
+  shared.sort((a, b) => {
+    const refDiff = b.sharedRefs.length - a.sharedRefs.length;
+    if (refDiff !== 0) return refDiff;
+    // Prefer same-kind matches
+    if (a.sameKind !== b.sameKind) return a.sameKind ? -1 : 1;
+    // Prefer same-param-count matches
+    if (a.sameParamCount !== b.sameParamCount) return a.sameParamCount ? -1 : 1;
+    // Alphabetical tiebreak
+    return a.name.localeCompare(b.name);
+  });
+  // Only in A / Only in B — sorted alphabetically
+  const onlyA = [];
+  for (const [name, syms] of byNameA) {
+    if (byNameB.has(name)) continue;
+    for (const s of syms) {
+      onlyA.push({ name, kind: s.kind, file: s.file, line: s.lineStart, signature: s.signature });
+    }
+  }
+  // Sort: public names first, then _private, alphabetical within each group
+  const privateLast = (a, b) => {
+    const aPrivate = a.name.startsWith('_');
+    const bPrivate = b.name.startsWith('_');
+    if (aPrivate !== bPrivate) return aPrivate ? 1 : -1;
+    return a.name.localeCompare(b.name);
+  };
+  onlyA.sort(privateLast);
+  const onlyB = [];
+  for (const [name, syms] of byNameB) {
+    if (byNameA.has(name)) continue;
+    for (const s of syms) {
+      onlyB.push({ name, kind: s.kind, file: s.file, line: s.lineStart, signature: s.signature });
+    }
+  }
+  onlyB.sort(privateLast);
+  // File-level comparison (basename matching for directory mode)
+  // Filter out basenames that are too generic to be meaningful matches
+  const NOISE_BASENAMES = new Set([
+    '__init__.py', 'conftest.py', 'conf.py', 'setup.py', 'setup.cfg',
+    'index.js', 'index.ts', 'index.tsx', 'main.py', 'main.go', 'main.rs',
+    'app.py', 'app.js', 'app.ts', 'mod.rs', 'lib.rs',
+    'utils.py', 'utils.js', 'utils.ts', 'helpers.py', 'helpers.js',
+    'types.ts', 'types.py', 'config.py', 'config.js', 'config.ts',
+    'constants.py', 'constants.js', 'constants.ts',
+  ]);
+  const filesA = parseResultsA.map(r => r.file);
+  const filesB = parseResultsB.map(r => r.file);
+  const basenamesA = new Set(filesA.map(f => basename(f)));
+  const basenamesB = new Set(filesB.map(f => basename(f)));
+  const sharedFiles = [...basenamesA].filter(f => basenamesB.has(f) && !NOISE_BASENAMES.has(f));
+  const onlyFilesA = [...basenamesA].filter(f => !basenamesB.has(f));
+  const onlyFilesB = [...basenamesB].filter(f => !basenamesA.has(f));
+  return {
+    labelA: statA.isDirectory() ? pathA : basename(pathA),
+    labelB: statB.isDirectory() ? pathB : basename(pathB),
+    isDirectoryMode: statA.isDirectory() || statB.isDirectory(),
+    shared,
+    onlyA,
+    onlyB,
+    files: {
+      shared: sharedFiles,
+      onlyA: onlyFilesA,
+      onlyB: onlyFilesB,
+      totalA: filesA.length,
+      totalB: filesB.length,
+    },
+    summary: {
+      totalA: symbolsA.length,
+      totalB: symbolsB.length,
+      sharedNames: shared.length,
+      onlyACount: onlyA.length,
+      onlyBCount: onlyB.length,
+    },
+  };
+}
+export { compare };

package/src/graph.js CHANGED Viewed

@@ -61,6 +61,9 @@ function buildGraph(allSymbols) {
         lineStart: def.lineStart,
         lineEnd: def.lineEnd,
         signature: def.signature,
+        astProfile: def.astProfile || null,
+        paramNames: def.paramNames || null,
+        localRefs: def.localRefs || null,
       });
       graph.addEdge(file, symbolKey, { type: 'DEFINES' });
     }
@@ -148,6 +151,9 @@ function updateGraphFiles(graph, removedFiles, newSymbols) {
         lineStart: def.lineStart,
         lineEnd: def.lineEnd,
         signature: def.signature,
+        astProfile: def.astProfile || null,
+        paramNames: def.paramNames || null,
+        localRefs: def.localRefs || null,
       });
       graph.addEdge(file, symbolKey, { type: 'DEFINES' });

package/src/index.js CHANGED Viewed

@@ -4,6 +4,73 @@ import { CodeIndexCache } from './cache.js';
 import { rankedSymbols } from './graph.js';
 import { parseFile } from './parser.js';
+/**
+ * Collapse unchanged context lines in git log -L diff output.
+ * Keeps `ctx` lines of context around each +/- change, replaces
+ * long unchanged runs with "...".
+ */
+function _collapseDiffContext(raw, ctx = 2) {
+  const output = [];
+  // Split into per-commit sections (each starts with "commit ")
+  const sections = raw.split(/^(?=commit )/m);
+  for (const section of sections) {
+    if (!section.trim()) continue;
+    const lines = section.split('\n');
+    // Find the diff start (line starting with "diff --git")
+    const diffStart = lines.findIndex(l => l.startsWith('diff --git'));
+    if (diffStart === -1) {
+      // No diff in this section (e.g., initial commit with just +++ lines)
+      output.push(section);
+      continue;
+    }
+    // Keep the commit header (everything before "diff --git")
+    output.push(lines.slice(0, diffStart).join('\n'));
+    // Process the diff portion
+    const diffLines = lines.slice(diffStart);
+    // Find lines that are actual diff content (after the @@ hunk header)
+    const hunkStart = diffLines.findIndex(l => l.startsWith('@@'));
+    if (hunkStart === -1) {
+      output.push(diffLines.join('\n'));
+      continue;
+    }
+    // Keep diff headers (diff --git, ---, +++, @@)
+    output.push(diffLines.slice(0, hunkStart + 1).join('\n'));
+    const content = diffLines.slice(hunkStart + 1);
+    // Mark which lines are "interesting" (changed or near a change)
+    const isChange = content.map(l => l.startsWith('+') || l.startsWith('-'));
+    const show = new Array(content.length).fill(false);
+    for (let i = 0; i < content.length; i++) {
+      if (isChange[i]) {
+        for (let j = Math.max(0, i - ctx); j <= Math.min(content.length - 1, i + ctx); j++) {
+          show[j] = true;
+        }
+      }
+    }
+    // Build collapsed output
+    let inEllipsis = false;
+    const collapsed = [];
+    for (let i = 0; i < content.length; i++) {
+      if (show[i]) {
+        inEllipsis = false;
+        collapsed.push(content[i]);
+      } else if (!inEllipsis) {
+        inEllipsis = true;
+        collapsed.push('   ...');
+      }
+    }
+    output.push(collapsed.join('\n'));
+  }
+  return output.join('\n');
+}
 // ── Orphan false-positive filters ──────────────────────────────────────────
 //
 // Orphan detection finds files/symbols with no cross-file connections.
@@ -1194,12 +1261,13 @@ class CodeIndex {
     const { execSync } = await import('child_process');
     try {
       if (patch) {
-        // Full output with diffs — return raw text
         const raw = execSync(
           `git log -L ${target.lineStart},${target.lineEnd}:${target.file} --skip=${offset} -n ${limit}`,
           { cwd: this.projectRoot, encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 30000 }
         ).trim();
-        return { definition: target, commits: [], raw };
+        // Collapse unchanged context lines in diffs — keep 2 lines around changes
+        const collapsed = _collapseDiffContext(raw, 2);
+        return { definition: target, commits: [], raw: collapsed };
       }
       // Summary only
       const output = execSync(
@@ -1347,6 +1415,133 @@ class CodeIndex {
     return buildNode(rootAttrs.name, rootAttrs.file, rootAttrs.lineStart, 0);
   }
+  /**
+   * Recursive callee chain — walk DOWN the call graph.
+   * Mirror of trace(): shows what a function calls, transitively.
+   *
+   * @param {string} opts.symbol - Symbol name
+   * @param {string} [opts.file] - Disambiguate by file
+   * @param {number} [opts.depth=3] - Max hops downward
+   * @returns {object} Tree root node with .callees[]
+   */
+  async callees({ symbol, file, depth = 3 }) {
+    await this._ensureReady();
+    const graph = this.cache.getGraph();
+    if (!graph) return null;
+    // Find the target symbol node(s)
+    const targetKeys = [];
+    graph.forEachNode((node, attrs) => {
+      if (attrs.type !== 'symbol') return;
+      if (attrs.name !== symbol) return;
+      if (file && attrs.file !== file) return;
+      targetKeys.push(node);
+    });
+    if (targetKeys.length === 0) return null;
+    // Use the first match (highest PageRank if multiple)
+    const ranked = this._getRanked();
+    const scoreMap = new Map(ranked);
+    targetKeys.sort((a, b) => (scoreMap.get(b) || 0) - (scoreMap.get(a) || 0));
+    const rootKey = targetKeys[0];
+    const rootAttrs = graph.getNodeAttributes(rootKey);
+    // Build a map: symbolKey -> set of symbol keys it references (outgoing REFERENCES)
+    // For each file node that has an outgoing REFERENCES edge to a symbol,
+    // we need to resolve which function in that file makes the call.
+    // Approach: for each symbol, find what other symbols it references
+    // by looking at REFERENCES edges from the symbol's file to other symbols,
+    // then filtering to references that occur within the symbol's line range.
+    // Cache of file -> definitions
+    const defCache = new Map();
+    const getFileDefs = async (filePath) => {
+      if (defCache.has(filePath)) return defCache.get(filePath);
+      try {
+        const absPath = join(this.projectRoot, filePath);
+        const source = await readFile(absPath, 'utf-8');
+        const parsed = parseFile(filePath, source);
+        const defs = parsed ? parsed.definitions.sort((a, b) => a.lineStart - b.lineStart) : [];
+        defCache.set(filePath, defs);
+        return defs;
+      } catch {
+        defCache.set(filePath, []);
+        return [];
+      }
+    };
+    const visited = new Set();
+    const buildNode = async (symbolName, symbolFile, symbolLine, currentDepth) => {
+      const nodeKey = `${symbolFile}::${symbolName}`;
+      const node = { name: symbolName, file: symbolFile, line: symbolLine, callees: [] };
+      if (currentDepth >= depth) return node;
+      if (visited.has(nodeKey)) return node;
+      visited.add(nodeKey);
+      // Find the definition's line range so we know which references belong to it
+      const defs = await getFileDefs(symbolFile);
+      const thisDef = defs.find(d => d.name === symbolName && d.lineStart === symbolLine)
+        || defs.find(d => d.name === symbolName);
+      if (!thisDef) return node;
+      // Get the source to find call sites within this function's body
+      let sourceLines;
+      try {
+        const absPath = join(this.projectRoot, symbolFile);
+        const source = await readFile(absPath, 'utf-8');
+        sourceLines = source.split('\n');
+      } catch {
+        return node;
+      }
+      // Find the file node in the graph for this symbol's file
+      const fileNodeKey = symbolFile;
+      // Collect all symbols that this file references (outgoing REFERENCES from file node)
+      const referencedSymbols = new Map(); // name -> {file, line, name}
+      if (graph.hasNode(fileNodeKey)) {
+        graph.forEachOutEdge(fileNodeKey, (_edge, attrs, _source, target) => {
+          if (attrs.type !== 'REFERENCES') return;
+          const targetAttrs = graph.getNodeAttributes(target);
+          if (targetAttrs.type !== 'symbol') return;
+          // Skip self-references
+          if (targetAttrs.name === symbolName && targetAttrs.file === symbolFile) return;
+          referencedSymbols.set(`${targetAttrs.file}::${targetAttrs.name}`, {
+            name: targetAttrs.name,
+            file: targetAttrs.file,
+            line: targetAttrs.lineStart
+          });
+        });
+      }
+      // Filter to references that appear within this function's line range
+      for (const [key, ref] of referencedSymbols) {
+        const callPattern = new RegExp(
+          `(?<![a-zA-Z0-9_])${ref.name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s*\\(`
+        );
+        let found = false;
+        for (let i = thisDef.lineStart - 1; i < Math.min(thisDef.lineEnd, sourceLines.length); i++) {
+          if (callPattern.test(sourceLines[i])) { found = true; break; }
+        }
+        if (found) {
+          const calleeNode = await buildNode(ref.name, ref.file, ref.line, currentDepth + 1);
+          node.callees.push(calleeNode);
+        }
+      }
+      // Sort callees by file then name for deterministic output
+      node.callees.sort((a, b) => a.file.localeCompare(b.file) || a.name.localeCompare(b.name));
+      return node;
+    };
+    return buildNode(rootAttrs.name, rootAttrs.file, rootAttrs.lineStart, 0);
+  }
   /**
    * Git-aware blast radius — what symbols changed and who calls them.
    *
@@ -1491,6 +1686,273 @@ class CodeIndex {
     return files.size;
   }
+  /**
+   * Find structurally similar functions/classes across the codebase.
+   *
+   * Similarity is computed from multiple signals:
+   * - AST shape profile (node-type frequency vector) — captures structural patterns
+   * - Reference overlap — functions that call the same things do similar work
+   * - Parameter name overlap — shared param names suggest shared purpose
+   * - Name similarity — tokenized name overlap (camelCase/snake_case aware)
+   *
+   * @param {object} opts
+   * @param {string} [opts.symbol] - Find symbols similar to this one
+   * @param {string} [opts.file] - Disambiguate symbol by file, or find similar symbols across this file
+   * @param {string} [opts.kind] - Filter candidates to this kind (function, class, type)
+   * @param {number} [opts.threshold=0.4] - Minimum similarity score (0-1)
+   * @param {number} [opts.offset] - Skip first N results
+   * @param {number} [opts.limit=20] - Max results to return
+   * @param {boolean} [opts.count=false] - If true, return only { total }
+   * @returns {Array<{symbol, file, line, signature, score, breakdown}>|{total: number}}
+   */
+  async similar({ symbol, file, kind, threshold = 0.4, offset, limit = 20, count = false } = {}) {
+    await this._ensureReady();
+    const graph = this.cache.getGraph();
+    if (!graph) return count ? { total: 0 } : [];
+    // Collect all symbol nodes with their attributes
+    // Filter out trivial symbols that match everything due to lack of structure
+    const allSymbols = [];
+    graph.forEachNode((key, attrs) => {
+      if (attrs.type !== 'symbol') return;
+      if (kind && attrs.kind !== kind) return;
+      allSymbols.push({ key, ...attrs });
+    });
+    // Identify which symbols are "non-trivial" (enough structure to be meaningful)
+    const isNonTrivial = (attrs) => {
+      const profile = attrs.astProfile;
+      if (!profile) return false;
+      const bodyLines = (attrs.lineEnd || 0) - (attrs.lineStart || 0);
+      const structuralNodes = Object.entries(profile)
+        .filter(([k]) => k !== '_totalNodes')
+        .reduce((sum, [, v]) => sum + v, 0);
+      // At least 3 structural nodes (ifs, calls, returns, etc.) or 5+ lines
+      return structuralNodes >= 3 || bodyLines >= 5;
+    };
+    // Build reference sets per symbol from localRefs (per-function scoped refs from parser)
+    const refSets = new Map();
+    for (const sym of allSymbols) {
+      refSets.set(sym.key, new Set(sym.localRefs || []));
+    }
+    // Compute IDF weights for AST node types (rare types are more discriminative)
+    const nodeTypeDocFreq = new Map(); // nodeType -> count of symbols that have it
+    for (const sym of allSymbols) {
+      if (!sym.astProfile) continue;
+      for (const [k, v] of Object.entries(sym.astProfile)) {
+        if (k === '_totalNodes' || v === 0) continue;
+        nodeTypeDocFreq.set(k, (nodeTypeDocFreq.get(k) || 0) + 1);
+      }
+    }
+    const totalDocs = allSymbols.length;
+    const idfWeights = new Map();
+    for (const [nodeType, docFreq] of nodeTypeDocFreq) {
+      // IDF: log(N / df) — rare types get higher weight
+      idfWeights.set(nodeType, Math.log(totalDocs / docFreq));
+    }
+    // Find the target symbol(s) to compare against
+    let targets;
+    if (symbol) {
+      targets = allSymbols.filter(s => {
+        if (s.name !== symbol) return false;
+        if (file && s.file !== file) return false;
+        return true;
+      });
+      if (targets.length === 0) return count ? { total: 0 } : [];
+    } else if (file) {
+      // Compare all symbols in this file against the rest
+      targets = allSymbols.filter(s => s.file === file);
+      if (targets.length === 0) return count ? { total: 0 } : [];
+    } else {
+      return count ? { total: 0 } : [];
+    }
+    // Compute similarity for each candidate against each target
+    const results = [];
+    for (const candidate of allSymbols) {
+      // Skip self-matches
+      if (targets.some(t => t.key === candidate.key)) continue;
+      // Skip trivial candidates — they match everything and are noise
+      if (!isNonTrivial(candidate)) continue;
+      let bestScore = 0;
+      let bestBreakdown = null;
+      let bestTarget = null;
+      for (const target of targets) {
+        const breakdown = this._computeSimilarity(target, candidate, refSets, idfWeights);
+        if (breakdown.total > bestScore) {
+          bestScore = breakdown.total;
+          bestBreakdown = breakdown;
+          bestTarget = target;
+        }
+      }
+      if (bestScore >= threshold) {
+        results.push({
+          symbol: candidate.name,
+          file: candidate.file,
+          line: candidate.lineStart,
+          signature: candidate.signature,
+          score: Math.round(bestScore * 100) / 100,
+          matchedWith: bestTarget ? `${bestTarget.file}::${bestTarget.name}` : null,
+          breakdown: bestBreakdown,
+        });
+      }
+    }
+    // Sort by score descending
+    results.sort((a, b) => b.score - a.score);
+    if (count) return { total: results.length };
+    return paginate(results, { offset, limit }).items;
+  }
+  /**
+   * Compute multi-signal similarity between two symbols.
+   * Returns { total, astShape, refOverlap, paramOverlap, nameScore }
+   */
+  _computeSimilarity(a, b, refSets, idfWeights = null) {
+    const astShape = this._astProfileSimilarity(a.astProfile, b.astProfile, idfWeights);
+    const refOverlap = this._setOverlap(refSets.get(a.key), refSets.get(b.key), 2);
+    const paramOverlap = this._paramSimilarity(a.paramNames, b.paramNames);
+    const nameScore = this._nameSimilarity(a.name, b.name);
+    // Weighted combination — AST shape is most important, refs second
+    const total = (
+      astShape   * 0.40 +
+      refOverlap * 0.30 +
+      paramOverlap * 0.15 +
+      nameScore  * 0.15
+    );
+    return {
+      total: Math.round(total * 100) / 100,
+      astShape: Math.round(astShape * 100) / 100,
+      refOverlap: Math.round(refOverlap * 100) / 100,
+      paramOverlap: Math.round(paramOverlap * 100) / 100,
+      nameScore: Math.round(nameScore * 100) / 100,
+    };
+  }
+  /**
+   * Cosine similarity between two AST profile vectors.
+   * Ignores _totalNodes (used separately for size gating).
+   */
+  _astProfileSimilarity(a, b, idfWeights = null) {
+    if (!a || !b) return 0;
+    // Collect all keys (excluding _totalNodes)
+    const keys = new Set([
+      ...Object.keys(a).filter(k => k !== '_totalNodes'),
+      ...Object.keys(b).filter(k => k !== '_totalNodes'),
+    ]);
+    if (keys.size === 0) return 0;
+    // Size ratio penalty: very different sized functions get dampened
+    const sizeA = a._totalNodes || 1;
+    const sizeB = b._totalNodes || 1;
+    const sizeRatio = Math.min(sizeA, sizeB) / Math.max(sizeA, sizeB);
+    // Only penalize extreme size differences (>10x)
+    const sizePenalty = sizeRatio < 0.1 ? sizeRatio * 2 : 1;
+    // If both profiles have very few distinct node types, similarity is unreliable.
+    // Two functions both having {call_expression: 1, return_statement: 1} is not meaningful.
+    const distinctA = Object.keys(a).filter(k => k !== '_totalNodes').length;
+    const distinctB = Object.keys(b).filter(k => k !== '_totalNodes').length;
+    const minDistinct = Math.min(distinctA, distinctB);
+    // Penalize low-diversity profiles
+    const diversityPenalty = minDistinct <= 2 ? 0.4 : minDistinct <= 3 ? 0.7 : 1.0;
+    // Normalize counts to proportions, then apply IDF weighting.
+    // IDF makes rare node types (try_statement, list_comprehension) more
+    // discriminative than ubiquitous ones (call, return_statement).
+    const normalize = (profile) => {
+      const total = Object.entries(profile)
+        .filter(([k]) => k !== '_totalNodes')
+        .reduce((sum, [, v]) => sum + v, 0) || 1;
+      const result = {};
+      for (const k of keys) {
+        const proportion = (profile[k] || 0) / total;
+        const idf = (idfWeights && idfWeights.has(k)) ? idfWeights.get(k) : 1;
+        result[k] = proportion * idf;
+      }
+      return result;
+    };
+    const na = normalize(a);
+    const nb = normalize(b);
+    // Cosine similarity
+    let dot = 0, magA = 0, magB = 0;
+    for (const k of keys) {
+      dot += na[k] * nb[k];
+      magA += na[k] * na[k];
+      magB += nb[k] * nb[k];
+    }
+    const denom = Math.sqrt(magA) * Math.sqrt(magB);
+    const cosine = denom > 0 ? dot / denom : 0;
+    return cosine * sizePenalty * diversityPenalty;
+  }
+  /**
+   * Jaccard similarity between two sets.
+   * @param {number} [minSize=0] - Minimum set size for overlap to count
+   */
+  _setOverlap(a, b, minSize = 0) {
+    if (!a || !b || a.size === 0 || b.size === 0) return 0;
+    if (a.size < minSize && b.size < minSize) return 0;
+    let intersection = 0;
+    for (const item of a) {
+      if (b.has(item)) intersection++;
+    }
+    const union = a.size + b.size - intersection;
+    return union > 0 ? intersection / union : 0;
+  }
+  /**
+   * Parameter name similarity: Jaccard overlap on lowercased param names.
+   */
+  _paramSimilarity(a, b) {
+    if (!a || !b || a.length === 0 || b.length === 0) return 0;
+    const setA = new Set(a.map(p => p.toLowerCase()));
+    const setB = new Set(b.map(p => p.toLowerCase()));
+    // Remove 'self', 'cls', 'this' — they're noise
+    for (const noise of ['self', 'cls', 'this']) {
+      setA.delete(noise);
+      setB.delete(noise);
+    }
+    if (setA.size === 0 || setB.size === 0) return 0;
+    return this._setOverlap(setA, setB);
+  }
+  /**
+   * Name similarity: tokenize camelCase/snake_case names, compute Jaccard overlap.
+   */
+  _nameSimilarity(a, b) {
+    if (!a || !b) return 0;
+    if (a === b) return 1;
+    const tokenize = (name) => {
+      // Split on _ and camelCase boundaries, lowercase
+      return name
+        .replace(/([a-z])([A-Z])/g, '$1_$2')
+        .toLowerCase()
+        .split(/[_\s]+/)
+        .filter(t => t.length > 1); // drop single-char tokens
+    };
+    const tokA = new Set(tokenize(a));
+    const tokB = new Set(tokenize(b));
+    if (tokA.size === 0 || tokB.size === 0) return 0;
+    return this._setOverlap(tokA, tokB);
+  }
   /**
    * Force a full rebuild.
    */

package/src/parser.js CHANGED Viewed

@@ -246,6 +246,99 @@ const KIND_MAP = {
   decorated_definition: 'function',
 };
+/**
+ * Walk an AST subtree and count node types that reveal structural shape.
+ * Returns a flat object like { if_statement: 3, for_statement: 1, call_expression: 7, ... }
+ * This is intentionally coarse — we want "shape" not identity.
+ */
+const STRUCTURAL_NODE_TYPES = new Set([
+  // Control flow
+  'if_statement', 'if_expression', 'elif_clause', 'else_clause',
+  'for_statement', 'for_in_statement', 'for_expression',
+  'while_statement', 'loop_expression',
+  'match_statement', 'match_expression', 'switch_statement', 'case_clause',
+  'try_statement', 'try_expression', 'except_clause', 'catch_clause', 'finally_clause',
+  'with_statement',
+  // Returns / yields
+  'return_statement', 'yield', 'yield_expression', 'await_expression',
+  // Calls & access
+  'call_expression', 'call', 'method_call_expression',
+  'member_expression', 'attribute', 'subscript_expression', 'subscript',
+  // Assignments
+  'assignment', 'assignment_expression', 'augmented_assignment',
+  // Data structures
+  'list', 'list_comprehension', 'dictionary', 'dictionary_comprehension',
+  'array', 'object', 'tuple',
+  // Assertions / raises
+  'assert_statement', 'raise_statement', 'throw_statement',
+  // Boolean logic
+  'boolean_operator', 'binary_expression', 'comparison_operator', 'not_operator',
+  // Conditionals
+  'conditional_expression', 'ternary_expression',
+  // String operations
+  'string', 'f_string', 'template_string',
+  // Decorators
+  'decorator',
+]);
+function buildAstProfile(node) {
+  const profile = {};
+  let totalNodes = 0;
+  function walk(n) {
+    if (STRUCTURAL_NODE_TYPES.has(n.type)) {
+      profile[n.type] = (profile[n.type] || 0) + 1;
+    }
+    totalNodes++;
+    for (let i = 0; i < n.namedChildCount; i++) {
+      walk(n.namedChild(i));
+    }
+  }
+  walk(node);
+  profile._totalNodes = totalNodes;
+  return profile;
+}
+/**
+ * Extract parameter names from a function's tree-sitter node.
+ * Works across languages by looking for common parameter node patterns.
+ */
+function extractParamNames(node) {
+  const params = [];
+  // Find the parameter list node
+  const paramNodes = [];
+  for (let i = 0; i < node.namedChildCount; i++) {
+    const child = node.namedChild(i);
+    if (child.type === 'parameters' || child.type === 'formal_parameters' ||
+        child.type === 'parameter_list') {
+      paramNodes.push(child);
+    }
+    // Drill into wrappers (e.g. variable_declarator -> arrow_function)
+    for (let j = 0; j < child.namedChildCount; j++) {
+      const gc = child.namedChild(j);
+      if (gc.type === 'parameters' || gc.type === 'formal_parameters' ||
+          gc.type === 'parameter_list') {
+        paramNodes.push(gc);
+      }
+    }
+  }
+  for (const paramList of paramNodes) {
+    for (let i = 0; i < paramList.namedChildCount; i++) {
+      const p = paramList.namedChild(i);
+      // Try to get the identifier name from various param shapes
+      const nameNode = p.childForFieldName('name') || p.childForFieldName('pattern');
+      if (nameNode && nameNode.type === 'identifier') {
+        params.push(nameNode.text);
+      } else if (p.type === 'identifier') {
+        params.push(p.text);
+      }
+    }
+  }
+  return params;
+}
 /**
  * Find the body/block node of a definition, drilling into wrappers like
  * lexical_declaration → variable_declarator → arrow_function → body.
@@ -340,6 +433,11 @@ function parseFile(filePath, source) {
           bodyStartLine = bodyRow === defRow ? bodyRow + 2 : bodyRow + 1; // 1-indexed
         }
+        // Build AST profile from function body (or whole node if no body)
+        const profileNode = bodyNode || defNode.node;
+        const astProfile = buildAstProfile(profileNode);
+        const paramNames = extractParamNames(defNode.node);
         definitions.push({
           name: nameCapture.node.text,
           kind: nodeKind(defNode.node.type),
@@ -348,6 +446,8 @@ function parseFile(filePath, source) {
           lineEnd: defNode.node.endPosition.row + 1,
           signature: extractSignature(defNode.node, langName),
           bodyStartLine,
+          astProfile,
+          paramNames,
         });
       }
     } catch (e) {
@@ -373,9 +473,28 @@ function parseFile(filePath, source) {
     }
   }
+  // Associate each reference with its enclosing definition (by line range).
+  // This gives us per-function reference sets for similarity analysis.
+  // Sort definitions by lineStart for binary search.
+  const sortedDefs = [...definitions].sort((a, b) => a.lineStart - b.lineStart);
+  for (const ref of references) {
+    // Find the innermost enclosing definition
+    let enclosing = null;
+    for (const def of sortedDefs) {
+      if (ref.line >= def.lineStart && ref.line <= def.lineEnd) {
+        // Pick innermost (last matching, since sorted by start and nested defs start later)
+        enclosing = def;
+      }
+    }
+    if (enclosing) {
+      if (!enclosing.localRefs) enclosing.localRefs = [];
+      enclosing.localRefs.push(ref.name);
+    }
+  }
   // No tree.delete()/parser.delete() needed — native GC handles cleanup
   return { file: filePath, definitions, references };
 }
-export { parseFile, SUPPORTED_EXTENSIONS, LANG_MAP };
+export { parseFile, buildAstProfile, extractParamNames, SUPPORTED_EXTENSIONS, LANG_MAP };