@mishasinitcyn/betterrank 0.2.8 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mishasinitcyn/betterrank",
3
- "version": "0.2.8",
3
+ "version": "0.2.10",
4
4
  "description": "Structural code index with PageRank-ranked repo maps, symbol search, call-graph queries, and dependency analysis. Built on tree-sitter and graphology.",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/src/cli.js CHANGED
@@ -51,6 +51,10 @@ function/class signatures with bodies replaced by "... (N lines)".
51
51
  With symbol names (comma-separated): shows the full source of those
52
52
  specific functions/classes with line numbers.
53
53
 
54
+ Also works well for indexed JS/TS object members like router procedures:
55
+ use \`outline file.ts getTeamStatistics\` instead of expanding a whole giant
56
+ \`createTRPCRouter({ ... })\` blob.
57
+
54
58
  Options:
55
59
  --root <path> Resolve file path relative to this directory
56
60
  --annotate Show caller counts next to each function (requires --root)
@@ -67,7 +71,8 @@ Examples:
67
71
  Aider-style repo map: the most structurally important definitions ranked by PageRank.
68
72
 
69
73
  Options:
70
- --focus <files> Comma-separated files to bias ranking toward
74
+ --focus <files> Comma-separated files to strongly bias ranking toward the
75
+ local import neighborhood of those files
71
76
  --count Return total symbol count only
72
77
  --offset N Skip first N symbols
73
78
  --limit N Max symbols to return (default: ${DEFAULT_LIMIT})
@@ -83,7 +88,7 @@ Substring search on symbol names + full signatures (param names, types, defaults
83
88
  Results ranked by PageRank (most structurally important first).
84
89
 
85
90
  Options:
86
- --kind <type> Filter: function, class, type, variable, namespace, import
91
+ --kind <type> Filter: function, class, type, variable, property, namespace, import
87
92
  --count Return match count only
88
93
  --offset N Skip first N results
89
94
  --limit N Max results (default: ${DEFAULT_LIMIT})
@@ -92,6 +97,7 @@ Tips:
92
97
  Use short substrings (3-5 chars) — PageRank ranking handles noise.
93
98
  "imp" finds encrypt_imp_payload, increment_impression, etc.
94
99
  Searches match against both symbol names AND full signatures (param names, types).
100
+ JS/TS router procedures and other indexed object members appear as [property].
95
101
 
96
102
  Examples:
97
103
  betterrank search resolve --root ./backend
@@ -115,19 +121,23 @@ Results ranked by PageRank (most structurally important first).
115
121
 
116
122
  Options:
117
123
  --file <path> Filter to a specific file (relative to --root)
118
- --kind <type> Filter: function, class, type, variable, namespace, import
124
+ --kind <type> Filter: function, class, type, variable, property, namespace, import
119
125
  --count Return count only
120
126
  --offset N Skip first N results
121
127
  --limit N Max results (default: ${DEFAULT_LIMIT})
122
128
 
123
129
  Examples:
124
130
  betterrank symbols --file src/auth/handlers.ts --root ./backend
125
- betterrank symbols --kind class --root . --limit 20`,
131
+ betterrank symbols --kind class --root . --limit 20
132
+ betterrank symbols --file src/server/api/routers/project.ts --kind property --root .`,
126
133
 
127
134
  callers: `betterrank callers <symbol> [--file path] [--context [N]] [--root <path>]
128
135
 
129
136
  Find all files that reference a symbol. Ranked by file-level PageRank.
130
137
 
138
+ For React/TS apps this includes JSX render sites like <Providers /> and
139
+ property access sites like api.project.getTeamStatistics.useQuery().
140
+
131
141
  Options:
132
142
  --file <path> Disambiguate when multiple symbols share a name
133
143
  --context [N] Show N lines of context around each call site (default: 2)
@@ -142,7 +152,7 @@ Examples:
142
152
 
143
153
  context: `betterrank context <symbol> [--file path] [--root <path>]
144
154
 
145
- Everything you need to understand a function in one shot.
155
+ Everything you need to understand a function or indexed property in one shot.
146
156
 
147
157
  Shows: the function's source, signatures of all functions/types it references,
148
158
  expanded type definitions from the signature, and a callers summary.
@@ -155,7 +165,8 @@ Options:
155
165
 
156
166
  Examples:
157
167
  betterrank context calculate_bid --root .
158
- betterrank context Router --file src/llm.py --root .`,
168
+ betterrank context Router --file src/llm.py --root .
169
+ betterrank context getTeamStatistics --file src/server/api/routers/project.ts --root .`,
159
170
 
160
171
  history: `betterrank history <symbol> [--file path] [--patch] [--limit N] [--root <path>]
161
172
 
@@ -212,6 +223,8 @@ Examples:
212
223
 
213
224
  What this file imports / depends on. Ranked by PageRank.
214
225
 
226
+ Resolves explicit import paths including TS/JS aliases and Python relative imports.
227
+
215
228
  Options:
216
229
  --count Return count only
217
230
  --offset N Skip first N results
@@ -224,6 +237,9 @@ Examples:
224
237
 
225
238
  What files import this file. Ranked by PageRank.
226
239
 
240
+ Resolves explicit import paths including side-effect imports, TS/JS aliases,
241
+ and Python relative imports.
242
+
227
243
  Options:
228
244
  --count Return count only
229
245
  --offset N Skip first N results
package/src/graph.js CHANGED
@@ -25,6 +25,13 @@ const IMPORT_RESOLVE_EXTENSIONS = [
25
25
  '.py', '.rs', '.go', '.rb', '.java',
26
26
  '.c', '.h', '.cpp', '.hpp', '.cc', '.cs', '.php',
27
27
  ];
28
+ const FOCUS_MAX_HOPS = 2;
29
+ const FOCUS_DISTANCE_WEIGHTS = new Map([
30
+ [0, 250],
31
+ [1, 12],
32
+ [2, 2.5],
33
+ ]);
34
+ const FOCUS_DEFAULT_WEIGHT = 0.15;
28
35
 
29
36
  /**
30
37
  * Disambiguate which targets a reference should wire to.
@@ -77,6 +84,37 @@ function buildFileLookup(graph) {
77
84
  return fileLookup;
78
85
  }
79
86
 
87
+ function isPythonFile(filePath) {
88
+ return normalizeFilePath(filePath).endsWith('.py');
89
+ }
90
+
91
+ function resolvePythonImportBase(sourceFile, specifier) {
92
+ if (!isPythonFile(sourceFile)) return null;
93
+ if (!specifier) return null;
94
+
95
+ if (specifier.startsWith('.')) {
96
+ const match = specifier.match(/^(\.+)(.*)$/);
97
+ if (!match) return null;
98
+
99
+ const dots = match[1].length;
100
+ let baseDir = posix.dirname(sourceFile);
101
+ for (let i = 1; i < dots; i++) {
102
+ baseDir = posix.dirname(baseDir);
103
+ }
104
+
105
+ const remainder = match[2].replace(/^\./, '');
106
+ if (!remainder) return baseDir;
107
+
108
+ return posix.normalize(posix.join(baseDir, remainder.replace(/\./g, '/')));
109
+ }
110
+
111
+ if (/^[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*$/.test(specifier)) {
112
+ return specifier.replace(/\./g, '/');
113
+ }
114
+
115
+ return null;
116
+ }
117
+
80
118
  function resolveImportBase(sourceFile, specifier) {
81
119
  const normalizedSource = normalizeFilePath(sourceFile);
82
120
  const normalizedSpecifier = normalizeFilePath(specifier);
@@ -98,6 +136,9 @@ function resolveImportBase(sourceFile, specifier) {
98
136
  return posix.normalize(normalizedSpecifier);
99
137
  }
100
138
 
139
+ const pythonBase = resolvePythonImportBase(normalizedSource, normalizedSpecifier);
140
+ if (pythonBase) return pythonBase;
141
+
101
142
  return null;
102
143
  }
103
144
 
@@ -114,8 +155,11 @@ function resolveImportTargetFile(sourceFile, specifier, fileLookup) {
114
155
  for (const ext of IMPORT_RESOLVE_EXTENSIONS) {
115
156
  candidates.push(`${importBase}${ext}`);
116
157
  }
158
+ const packageEntryNames = isPythonFile(sourceFile) ? ['__init__'] : ['index'];
117
159
  for (const ext of IMPORT_RESOLVE_EXTENSIONS) {
118
- candidates.push(posix.join(importBase, `index${ext}`));
160
+ for (const entryName of packageEntryNames) {
161
+ candidates.push(posix.join(importBase, `${entryName}${ext}`));
162
+ }
119
163
  }
120
164
  }
121
165
 
@@ -133,7 +177,14 @@ function wireSymbolReferences(records, graph, defIndex, addedRefs, addedImports)
133
177
  const targets = defIndex.get(ref.name);
134
178
  if (!targets) continue;
135
179
 
136
- const resolvedTargets = disambiguateTargets(targets, file, graph);
180
+ const filteredTargets = ref.kind === 'property'
181
+ ? targets.filter(target => {
182
+ try { return graph.getNodeAttribute(target, 'kind') === 'property'; } catch { return false; }
183
+ })
184
+ : targets;
185
+ if (filteredTargets.length === 0) continue;
186
+
187
+ const resolvedTargets = disambiguateTargets(filteredTargets, file, graph);
137
188
 
138
189
  for (const target of resolvedTargets) {
139
190
  const targetFile = graph.getNodeAttribute(target, 'file');
@@ -222,6 +273,10 @@ function updateGraphFiles(graph, removedFiles, newSymbols) {
222
273
  continue;
223
274
  }
224
275
 
276
+ if (!graph.hasNode(filePath)) {
277
+ continue;
278
+ }
279
+
225
280
  const outgoingEdges = [];
226
281
  graph.forEachOutEdge(filePath, edge => {
227
282
  outgoingEdges.push(edge);
@@ -263,6 +318,56 @@ function removeFileNodes(graph, filePath) {
263
318
  }
264
319
  }
265
320
 
321
+ function buildFocusDistanceMap(graph, focusFiles, maxHops = FOCUS_MAX_HOPS) {
322
+ const distances = new Map();
323
+ const queue = [];
324
+
325
+ for (const file of focusFiles) {
326
+ if (!graph.hasNode(file)) continue;
327
+ const attrs = graph.getNodeAttributes(file);
328
+ if (attrs.type !== 'file') continue;
329
+ distances.set(file, 0);
330
+ queue.push(file);
331
+ }
332
+
333
+ for (let i = 0; i < queue.length; i++) {
334
+ const current = queue[i];
335
+ const currentDistance = distances.get(current);
336
+ if (currentDistance >= maxHops) continue;
337
+
338
+ const visitNeighbor = neighbor => {
339
+ if (!graph.hasNode(neighbor)) return;
340
+ const neighborAttrs = graph.getNodeAttributes(neighbor);
341
+ if (neighborAttrs.type !== 'file') return;
342
+
343
+ const nextDistance = currentDistance + 1;
344
+ const existing = distances.get(neighbor);
345
+ if (existing !== undefined && existing <= nextDistance) return;
346
+ distances.set(neighbor, nextDistance);
347
+ queue.push(neighbor);
348
+ };
349
+
350
+ graph.forEachOutEdge(current, (_edge, attrs, _source, target) => {
351
+ if (attrs.type !== 'IMPORTS') return;
352
+ visitNeighbor(target);
353
+ });
354
+
355
+ graph.forEachInEdge(current, (_edge, attrs, source) => {
356
+ if (attrs.type !== 'IMPORTS') return;
357
+ visitNeighbor(source);
358
+ });
359
+ }
360
+
361
+ return distances;
362
+ }
363
+
364
+ function getFocusWeight(filePath, focusDistances) {
365
+ if (!focusDistances || focusDistances.size === 0) return 1.0;
366
+ const distance = focusDistances.get(filePath);
367
+ if (distance === undefined) return FOCUS_DEFAULT_WEIGHT;
368
+ return FOCUS_DISTANCE_WEIGHTS.get(distance) || FOCUS_DEFAULT_WEIGHT;
369
+ }
370
+
266
371
  // Path-tier dampening: files outside core source directories get their
267
372
  // PageRank scores multiplied by a fraction. This prevents scripts, tests,
268
373
  // and temp files from dominating the map output over actual source code.
@@ -298,6 +403,7 @@ function rankedSymbols(graph, focusFiles = [], pathTiers = DEFAULT_PATH_TIERS) {
298
403
  if (graph.order === 0) return [];
299
404
 
300
405
  const g = graph.copy();
406
+ const focusDistances = focusFiles.length > 0 ? buildFocusDistanceMap(graph, focusFiles) : null;
301
407
 
302
408
  if (focusFiles.length > 0) {
303
409
  g.mergeNode('__focus__', { type: 'virtual' });
@@ -329,7 +435,7 @@ function rankedSymbols(graph, focusFiles = [], pathTiers = DEFAULT_PATH_TIERS) {
329
435
  .map(([key, score]) => {
330
436
  try {
331
437
  const file = graph.getNodeAttribute(key, 'file');
332
- return [key, score * getPathWeight(file, pathTiers)];
438
+ return [key, score * getPathWeight(file, pathTiers) * getFocusWeight(file, focusDistances)];
333
439
  } catch {
334
440
  return [key, score];
335
441
  }
package/src/index.js CHANGED
@@ -232,6 +232,26 @@ function paginate(arr, { offset = 0, limit } = {}) {
232
232
  return { items, total };
233
233
  }
234
234
 
235
+ const CONTEXT_NOISE_NAMES = new Set([
236
+ 'get', 'set', 'put', 'post', 'delete', 'head', 'patch',
237
+ 'start', 'stop', 'run', 'main', 'init', 'setup', 'close',
238
+ 'dict', 'list', 'str', 'int', 'bool', 'float', 'type',
239
+ 'key', 'value', 'name', 'data', 'config', 'result', 'error',
240
+ 'test', 'self', 'cls', 'app', 'log', 'logger',
241
+ 'enabled', 'default', 'constructor', 'length', 'size',
242
+ 'fetch', 'send', 'table', 'one', 'append', 'write', 'read',
243
+ 'update', 'create', 'find', 'add', 'remove', 'index', 'map',
244
+ 'filter', 'sort', 'join', 'split', 'trim', 'replace',
245
+ 'push', 'pop', 'shift', 'reduce', 'keys', 'values', 'items',
246
+ 'search', 'match', 'query', 'count', 'call', 'apply', 'bind',
247
+ ]);
248
+
249
+ const CONTEXT_KIND_ORDER = { function: 0, class: 1, type: 2, property: 3, variable: 4 };
250
+
251
+ function escapeRegExp(value) {
252
+ return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
253
+ }
254
+
235
255
  class CodeIndex {
236
256
  constructor(projectRoot, opts = {}) {
237
257
  this.projectRoot = projectRoot;
@@ -579,6 +599,8 @@ class CodeIndex {
579
599
  const escaped = symbol.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
580
600
  const callPattern = new RegExp(`(?<![a-zA-Z0-9_])${escaped}\\s*\\(`);
581
601
  const jsxPattern = new RegExp(`<\\s*${escaped}(?=[\\s>/.]|$)`);
602
+ const memberPattern = new RegExp(`\\.\\s*${escaped}\\b`);
603
+ const bracketPattern = new RegExp(`\\[\\s*['"\`]${escaped}['"\`]\\s*\\]`);
582
604
  // Fallback: import/from lines that reference the symbol
583
605
  const importPattern = new RegExp(`(?:import|from)\\s.*\\b${escaped}\\b`);
584
606
 
@@ -605,7 +627,13 @@ class CodeIndex {
605
627
  if (inDef) continue;
606
628
 
607
629
  const line = lines[i];
608
- if (!callPattern.test(line) && !jsxPattern.test(line) && !importPattern.test(line)) continue;
630
+ if (
631
+ !callPattern.test(line) &&
632
+ !jsxPattern.test(line) &&
633
+ !memberPattern.test(line) &&
634
+ !bracketPattern.test(line) &&
635
+ !importPattern.test(line)
636
+ ) continue;
609
637
 
610
638
  const start = Math.max(0, i - context);
611
639
  const end = Math.min(lines.length - 1, i + context);
@@ -1109,55 +1137,60 @@ class CodeIndex {
1109
1137
  const bodyLines = lines.slice(target.lineStart - 1, target.lineEnd);
1110
1138
  const bodyText = bodyLines.join('\n');
1111
1139
 
1112
- // Build a set of all symbol names in the graph (for matching)
1113
- const allSymbols = new Map(); // name -> [{ file, kind, signature, lineStart }]
1140
+ // Build a set of all symbol names in the graph.
1141
+ const allSymbols = new Map(); // name -> [{ file, kind, signature, lineStart, score }]
1114
1142
  graph.forEachNode((node, attrs) => {
1115
1143
  if (attrs.type !== 'symbol') return;
1116
1144
  if (attrs.file === target.file && attrs.name === target.name) return; // skip self
1117
1145
  if (!allSymbols.has(attrs.name)) allSymbols.set(attrs.name, []);
1118
1146
  allSymbols.get(attrs.name).push({
1147
+ key: node,
1119
1148
  file: attrs.file,
1120
1149
  kind: attrs.kind,
1121
1150
  signature: attrs.signature,
1122
1151
  lineStart: attrs.lineStart,
1123
1152
  lineEnd: attrs.lineEnd,
1153
+ score: scoreMap.get(node) || 0,
1124
1154
  });
1125
1155
  });
1126
1156
 
1127
- // Find symbols referenced in the body
1128
- // Use word-boundary matching for each known symbol name
1129
- // Skip very common names that cause false positives
1130
- const NOISE_NAMES = new Set([
1131
- 'get', 'set', 'put', 'post', 'delete', 'head', 'patch',
1132
- 'start', 'stop', 'run', 'main', 'init', 'setup', 'close',
1133
- 'dict', 'list', 'str', 'int', 'bool', 'float', 'type',
1134
- 'key', 'value', 'name', 'data', 'config', 'result', 'error',
1135
- 'test', 'self', 'cls', 'app', 'log', 'logger',
1136
- 'enabled', 'default', 'constructor', 'length', 'size',
1137
- 'fetch', 'send', 'table', 'one', 'append', 'write', 'read',
1138
- 'update', 'create', 'find', 'add', 'remove', 'index', 'map',
1139
- 'filter', 'sort', 'join', 'split', 'trim', 'replace',
1140
- 'push', 'pop', 'shift', 'reduce', 'keys', 'values', 'items',
1141
- 'search', 'match', 'query', 'count', 'call', 'apply', 'bind',
1142
- ]);
1143
- const usedSymbols = [];
1144
- const seen = new Set();
1145
- for (const [name, defs] of allSymbols) {
1146
- if (name.length < 3) continue; // skip very short names
1147
- if (NOISE_NAMES.has(name)) continue;
1148
- if (seen.has(name)) continue;
1149
- const pattern = new RegExp(`(?<![a-zA-Z0-9_])${name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}(?![a-zA-Z0-9_])`);
1150
- if (pattern.test(bodyText)) {
1151
- seen.add(name);
1152
- // Pick the best definition (same-file first, then highest PageRank)
1153
- const sameFile = defs.find(d => d.file === target.file);
1154
- const best = sameFile || defs[0];
1155
- usedSymbols.push({ name, ...best });
1157
+ for (const defs of allSymbols.values()) {
1158
+ defs.sort((a, b) =>
1159
+ Number(b.file === target.file) - Number(a.file === target.file)
1160
+ || (b.score || 0) - (a.score || 0)
1161
+ || a.file.localeCompare(b.file)
1162
+ || a.lineStart - b.lineStart,
1163
+ );
1164
+ }
1165
+
1166
+ // Find symbols referenced in the body, preferring parser-scoped local refs.
1167
+ const referenceNames = new Set();
1168
+ for (const name of target.localRefs || []) {
1169
+ if (!name || name.length < 3) continue;
1170
+ if (CONTEXT_NOISE_NAMES.has(name)) continue;
1171
+ if (!allSymbols.has(name)) continue;
1172
+ referenceNames.add(name);
1173
+ }
1174
+
1175
+ // Fallback for older caches or definitions without scoped refs.
1176
+ if (referenceNames.size === 0) {
1177
+ for (const name of allSymbols.keys()) {
1178
+ if (name.length < 3) continue;
1179
+ if (CONTEXT_NOISE_NAMES.has(name)) continue;
1180
+ const pattern = new RegExp(`(?<![a-zA-Z0-9_])${escapeRegExp(name)}(?![a-zA-Z0-9_])`);
1181
+ if (pattern.test(bodyText)) {
1182
+ referenceNames.add(name);
1183
+ }
1156
1184
  }
1157
1185
  }
1158
- // Sort: functions first, then types, then by name
1159
- const kindOrder = { function: 0, class: 1, type: 2, variable: 3 };
1160
- usedSymbols.sort((a, b) => (kindOrder[a.kind] ?? 9) - (kindOrder[b.kind] ?? 9) || a.name.localeCompare(b.name));
1186
+
1187
+ const usedSymbols = [];
1188
+ for (const name of referenceNames) {
1189
+ const defs = allSymbols.get(name);
1190
+ if (!defs || defs.length === 0) continue;
1191
+ usedSymbols.push({ name, ...defs[0] });
1192
+ }
1193
+ usedSymbols.sort((a, b) => (CONTEXT_KIND_ORDER[a.kind] ?? 9) - (CONTEXT_KIND_ORDER[b.kind] ?? 9) || a.name.localeCompare(b.name));
1161
1194
 
1162
1195
  // Resolve type annotations in the signature
1163
1196
  // Extract type-like tokens from the signature (capitalized words, common patterns)
@@ -1173,7 +1206,7 @@ class CodeIndex {
1173
1206
  const typeDefs = allSymbols.get(typeName);
1174
1207
  if (!typeDefs) continue;
1175
1208
  // Find the type definition and get its fields (expand its body)
1176
- const best = typeDefs.find(d => d.file === target.file) || typeDefs[0];
1209
+ const best = typeDefs[0];
1177
1210
  if (best.kind === 'class' || best.kind === 'type') {
1178
1211
  let fields = null;
1179
1212
  try {
package/src/parser.js CHANGED
@@ -188,6 +188,7 @@ const REF_QUERIES = {
188
188
  (call_expression function: (identifier) @ref)
189
189
  (import_specifier name: (identifier) @ref)
190
190
  (import_clause (identifier) @ref)
191
+ (member_expression property: (property_identifier) @prop_ref)
191
192
  (jsx_opening_element (identifier) @jsx_ref)
192
193
  (jsx_self_closing_element (identifier) @jsx_ref)
193
194
  (jsx_opening_element (member_expression (identifier) @jsx_ref))
@@ -198,6 +199,7 @@ const REF_QUERIES = {
198
199
  (call_expression function: (identifier) @ref)
199
200
  (import_specifier name: (identifier) @ref)
200
201
  (import_clause (identifier) @ref)
202
+ (member_expression property: (property_identifier) @prop_ref)
201
203
  (type_identifier) @ref
202
204
  `,
203
205
 
@@ -205,6 +207,7 @@ const REF_QUERIES = {
205
207
  (call_expression function: (identifier) @ref)
206
208
  (import_specifier name: (identifier) @ref)
207
209
  (import_clause (identifier) @ref)
210
+ (member_expression property: (property_identifier) @prop_ref)
208
211
  (type_identifier) @ref
209
212
  (jsx_opening_element (identifier) @jsx_ref)
210
213
  (jsx_self_closing_element (identifier) @jsx_ref)
@@ -287,8 +290,12 @@ const KIND_MAP = {
287
290
  };
288
291
 
289
292
  const OUTLINE_EXTRA_LANGUAGES = new Set(['javascript', 'typescript', 'tsx']);
293
+ const INDEX_EXTRA_LANGUAGES = new Set(['javascript', 'typescript', 'tsx']);
290
294
  const OUTLINE_BODY_TYPES = new Set(['statement_block', 'class_body', 'object', 'array']);
291
295
  const OUTLINE_CALL_TYPES = new Set(['call_expression', 'new_expression']);
296
+ const SEMANTIC_CONTAINER_NAME_RE = /(router|routes|handlers|actions|reducers|selectors|queries|mutations|registry|registries|map|maps|config|configs|options|endpoints|procedures)$/i;
297
+ const NON_SEMANTIC_CONTAINER_NAME_RE = /(schema|shape|validator|payload|params?|props?|input|output|response|request|dto)$/i;
298
+ const NON_SEMANTIC_CALLEE_NAME_RE = /^(object|array|enum|union|literal|record|tuple|pick|omit|extend|merge|intersection|partial|strictObject|looseObject)$/i;
292
299
 
293
300
  /**
294
301
  * Walk an AST subtree and count node types that reveal structural shape.
@@ -488,6 +495,111 @@ function hasMultilinePairChildren(node) {
488
495
  return false;
489
496
  }
490
497
 
498
+ function isTopLevelVariableDeclarator(node) {
499
+ if (!node || node.type !== 'variable_declarator') return false;
500
+ const decl = node.parent;
501
+ if (!decl || decl.type !== 'lexical_declaration') return false;
502
+ const container = decl.parent;
503
+ return !!container && (container.type === 'program' || container.type === 'export_statement');
504
+ }
505
+
506
+ function extractDeclaratorName(node) {
507
+ const nameNode = node?.childForFieldName('name');
508
+ return nameNode?.type === 'identifier' ? nameNode.text : null;
509
+ }
510
+
511
+ function extractCalleeLeafName(node) {
512
+ if (!node) return null;
513
+
514
+ if (['identifier', 'property_identifier', 'private_property_identifier', 'type_identifier'].includes(node.type)) {
515
+ return node.text;
516
+ }
517
+
518
+ if (node.type === 'member_expression') {
519
+ const propertyNode = node.childForFieldName('property');
520
+ if (propertyNode) return extractCalleeLeafName(propertyNode);
521
+ return extractCalleeLeafName(node.namedChild(node.namedChildCount - 1));
522
+ }
523
+
524
+ return null;
525
+ }
526
+
527
+ function extractCallLikeCalleeName(node) {
528
+ if (!node || (node.type !== 'call_expression' && node.type !== 'new_expression')) return null;
529
+
530
+ const calleeNode = node.childForFieldName('function')
531
+ || node.childForFieldName('constructor')
532
+ || node.namedChild(0);
533
+ return extractCalleeLeafName(calleeNode);
534
+ }
535
+
536
+ function shouldIndexSemanticObjectMembers(node) {
537
+ if (!isTopLevelVariableDeclarator(node)) return false;
538
+
539
+ const variableName = extractDeclaratorName(node);
540
+ const valueNode = node.childForFieldName('value');
541
+ const calleeName = extractCallLikeCalleeName(valueNode);
542
+
543
+ const hasSemanticVariableName = !!variableName
544
+ && SEMANTIC_CONTAINER_NAME_RE.test(variableName)
545
+ && !NON_SEMANTIC_CONTAINER_NAME_RE.test(variableName);
546
+ const hasSemanticCalleeName = !!calleeName && !NON_SEMANTIC_CALLEE_NAME_RE.test(calleeName);
547
+
548
+ if (valueNode?.type === 'object') {
549
+ return hasSemanticVariableName;
550
+ }
551
+
552
+ if (valueNode?.type === 'call_expression' || valueNode?.type === 'new_expression') {
553
+ return hasSemanticVariableName || hasSemanticCalleeName;
554
+ }
555
+
556
+ return false;
557
+ }
558
+
559
+ function collectDirectPairDefinitions(node, filePath, langName) {
560
+ if (!node || node.type !== 'object') return [];
561
+
562
+ const definitions = [];
563
+ for (let i = 0; i < node.namedChildCount; i++) {
564
+ const child = node.namedChild(i);
565
+ if (child.type !== 'pair') continue;
566
+
567
+ const name = extractPairName(child);
568
+ if (!name) continue;
569
+ if (child.endPosition.row <= child.startPosition.row) continue;
570
+
571
+ definitions.push(createDefinition(name, child, filePath, langName, {
572
+ bodyStartLine: child.startPosition.row + 2,
573
+ kind: 'property',
574
+ }));
575
+ }
576
+
577
+ return definitions;
578
+ }
579
+
580
+ function collectIndexDefinitions(rootNode, filePath, langName) {
581
+ if (!INDEX_EXTRA_LANGUAGES.has(langName)) return [];
582
+
583
+ const definitions = [];
584
+
585
+ function visit(node) {
586
+ if (node.type === 'variable_declarator' && shouldIndexSemanticObjectMembers(node)) {
587
+ const bodyNode = findBodyNode(node);
588
+ if (bodyNode && bodyNode.type === 'object') {
589
+ definitions.push(...collectDirectPairDefinitions(bodyNode, filePath, langName));
590
+ }
591
+ return;
592
+ }
593
+
594
+ for (let i = 0; i < node.namedChildCount; i++) {
595
+ visit(node.namedChild(i));
596
+ }
597
+ }
598
+
599
+ visit(rootNode);
600
+ return definitions;
601
+ }
602
+
491
603
  function collectOutlineDefinitions(rootNode, filePath, langName) {
492
604
  if (!OUTLINE_EXTRA_LANGUAGES.has(langName)) return [];
493
605
 
@@ -581,6 +693,52 @@ function normalizeImportPathCapture(capture) {
581
693
  return text.replace(/^['"`]|['"`]$/g, '');
582
694
  }
583
695
 
696
+ function createImportEntry(path, filePath, node) {
697
+ if (!path || !filePath || !node) return null;
698
+ return {
699
+ path,
700
+ file: filePath,
701
+ line: node.startPosition.row + 1,
702
+ };
703
+ }
704
+
705
+ function collectPythonImports(rootNode, filePath) {
706
+ const imports = [];
707
+
708
+ function push(path, node) {
709
+ const entry = createImportEntry(path, filePath, node);
710
+ if (entry) imports.push(entry);
711
+ }
712
+
713
+ function visit(node) {
714
+ if (node.type === 'import_statement') {
715
+ for (let i = 0; i < node.namedChildCount; i++) {
716
+ const child = node.namedChild(i);
717
+ if (child.type === 'dotted_name') {
718
+ push(child.text, child);
719
+ continue;
720
+ }
721
+ if (child.type === 'aliased_import') {
722
+ const moduleNode = child.namedChild(0);
723
+ if (moduleNode?.type === 'dotted_name') {
724
+ push(moduleNode.text, moduleNode);
725
+ }
726
+ }
727
+ }
728
+ } else if (node.type === 'import_from_statement') {
729
+ const moduleNode = node.childForFieldName('module_name');
730
+ if (moduleNode) push(moduleNode.text, moduleNode);
731
+ }
732
+
733
+ for (let i = 0; i < node.namedChildCount; i++) {
734
+ visit(node.namedChild(i));
735
+ }
736
+ }
737
+
738
+ visit(rootNode);
739
+ return imports;
740
+ }
741
+
584
742
  /**
585
743
  * Parse a single source file and extract definitions, references, and imports.
586
744
  * Returns null if the language is unsupported.
@@ -618,6 +776,8 @@ function parseFile(filePath, source, { includeOutlineDefinitions = false } = {})
618
776
  }
619
777
  }
620
778
 
779
+ definitions.push(...collectIndexDefinitions(tree.rootNode, filePath, langName));
780
+
621
781
  if (includeOutlineDefinitions) {
622
782
  definitions.push(...collectOutlineDefinitions(tree.rootNode, filePath, langName));
623
783
  }
@@ -628,11 +788,12 @@ function parseFile(filePath, source, { includeOutlineDefinitions = false } = {})
628
788
  const refQuery = new Parser.Query(lang, refQueryStr);
629
789
  for (const match of refQuery.matches(tree.rootNode)) {
630
790
  for (const capture of match.captures) {
631
- if (capture.name !== 'ref' && capture.name !== 'jsx_ref') continue;
791
+ if (capture.name !== 'ref' && capture.name !== 'jsx_ref' && capture.name !== 'prop_ref') continue;
632
792
  const name = normalizeReferenceCapture(capture);
633
793
  if (!name) continue;
634
794
  references.push({
635
795
  name,
796
+ kind: capture.name === 'prop_ref' ? 'property' : 'symbol',
636
797
  file: filePath,
637
798
  line: capture.node.startPosition.row + 1,
638
799
  });
@@ -666,6 +827,10 @@ function parseFile(filePath, source, { includeOutlineDefinitions = false } = {})
666
827
  }
667
828
  }
668
829
 
830
+ if (langName === 'python') {
831
+ imports.push(...collectPythonImports(tree.rootNode, filePath));
832
+ }
833
+
669
834
  // Associate each reference with its enclosing definition (by line range).
670
835
  // This gives us per-function reference sets for similarity analysis.
671
836
  // Sort definitions by lineStart for binary search.