@colbymchenry/codegraph 0.6.6 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. package/README.md +180 -502
  2. package/dist/bin/codegraph.d.ts +0 -5
  3. package/dist/bin/codegraph.d.ts.map +1 -1
  4. package/dist/bin/codegraph.js +217 -263
  5. package/dist/bin/codegraph.js.map +1 -1
  6. package/dist/bin/uninstall.d.ts +0 -1
  7. package/dist/bin/uninstall.d.ts.map +1 -1
  8. package/dist/bin/uninstall.js +3 -29
  9. package/dist/bin/uninstall.js.map +1 -1
  10. package/dist/config.d.ts.map +1 -1
  11. package/dist/config.js +0 -3
  12. package/dist/config.js.map +1 -1
  13. package/dist/context/index.d.ts +3 -5
  14. package/dist/context/index.d.ts.map +1 -1
  15. package/dist/context/index.js +497 -46
  16. package/dist/context/index.js.map +1 -1
  17. package/dist/db/migrations.d.ts +1 -1
  18. package/dist/db/migrations.d.ts.map +1 -1
  19. package/dist/db/migrations.js +10 -1
  20. package/dist/db/migrations.js.map +1 -1
  21. package/dist/db/queries.d.ts +53 -0
  22. package/dist/db/queries.d.ts.map +1 -1
  23. package/dist/db/queries.js +244 -24
  24. package/dist/db/queries.js.map +1 -1
  25. package/dist/db/schema.sql +1 -16
  26. package/dist/errors.d.ts +1 -1
  27. package/dist/errors.d.ts.map +1 -1
  28. package/dist/errors.js +1 -7
  29. package/dist/errors.js.map +1 -1
  30. package/dist/extraction/dfm-extractor.d.ts +31 -0
  31. package/dist/extraction/dfm-extractor.d.ts.map +1 -0
  32. package/dist/extraction/dfm-extractor.js +151 -0
  33. package/dist/extraction/dfm-extractor.js.map +1 -0
  34. package/dist/extraction/grammars.d.ts +9 -1
  35. package/dist/extraction/grammars.d.ts.map +1 -1
  36. package/dist/extraction/grammars.js +34 -2
  37. package/dist/extraction/grammars.js.map +1 -1
  38. package/dist/extraction/index.d.ts +7 -1
  39. package/dist/extraction/index.d.ts.map +1 -1
  40. package/dist/extraction/index.js +373 -29
  41. package/dist/extraction/index.js.map +1 -1
  42. package/dist/extraction/languages/c-cpp.d.ts +4 -0
  43. package/dist/extraction/languages/c-cpp.d.ts.map +1 -0
  44. package/dist/extraction/languages/c-cpp.js +126 -0
  45. package/dist/extraction/languages/c-cpp.js.map +1 -0
  46. package/dist/extraction/languages/csharp.d.ts +3 -0
  47. package/dist/extraction/languages/csharp.d.ts.map +1 -0
  48. package/dist/extraction/languages/csharp.js +72 -0
  49. package/dist/extraction/languages/csharp.js.map +1 -0
  50. package/dist/extraction/languages/dart.d.ts +3 -0
  51. package/dist/extraction/languages/dart.d.ts.map +1 -0
  52. package/dist/extraction/languages/dart.js +192 -0
  53. package/dist/extraction/languages/dart.js.map +1 -0
  54. package/dist/extraction/languages/go.d.ts +3 -0
  55. package/dist/extraction/languages/go.d.ts.map +1 -0
  56. package/dist/extraction/languages/go.js +58 -0
  57. package/dist/extraction/languages/go.js.map +1 -0
  58. package/dist/extraction/languages/index.d.ts +10 -0
  59. package/dist/extraction/languages/index.d.ts.map +1 -0
  60. package/dist/extraction/languages/index.js +43 -0
  61. package/dist/extraction/languages/index.js.map +1 -0
  62. package/dist/extraction/languages/java.d.ts +3 -0
  63. package/dist/extraction/languages/java.d.ts.map +1 -0
  64. package/dist/extraction/languages/java.js +64 -0
  65. package/dist/extraction/languages/java.js.map +1 -0
  66. package/dist/extraction/languages/javascript.d.ts +3 -0
  67. package/dist/extraction/languages/javascript.d.ts.map +1 -0
  68. package/dist/extraction/languages/javascript.js +90 -0
  69. package/dist/extraction/languages/javascript.js.map +1 -0
  70. package/dist/extraction/languages/kotlin.d.ts +3 -0
  71. package/dist/extraction/languages/kotlin.d.ts.map +1 -0
  72. package/dist/extraction/languages/kotlin.js +253 -0
  73. package/dist/extraction/languages/kotlin.js.map +1 -0
  74. package/dist/extraction/languages/pascal.d.ts +3 -0
  75. package/dist/extraction/languages/pascal.d.ts.map +1 -0
  76. package/dist/extraction/languages/pascal.js +66 -0
  77. package/dist/extraction/languages/pascal.js.map +1 -0
  78. package/dist/extraction/languages/php.d.ts +3 -0
  79. package/dist/extraction/languages/php.d.ts.map +1 -0
  80. package/dist/extraction/languages/php.js +107 -0
  81. package/dist/extraction/languages/php.js.map +1 -0
  82. package/dist/extraction/languages/python.d.ts +3 -0
  83. package/dist/extraction/languages/python.d.ts.map +1 -0
  84. package/dist/extraction/languages/python.js +56 -0
  85. package/dist/extraction/languages/python.js.map +1 -0
  86. package/dist/extraction/languages/ruby.d.ts +3 -0
  87. package/dist/extraction/languages/ruby.d.ts.map +1 -0
  88. package/dist/extraction/languages/ruby.js +114 -0
  89. package/dist/extraction/languages/ruby.js.map +1 -0
  90. package/dist/extraction/languages/rust.d.ts +3 -0
  91. package/dist/extraction/languages/rust.d.ts.map +1 -0
  92. package/dist/extraction/languages/rust.js +109 -0
  93. package/dist/extraction/languages/rust.js.map +1 -0
  94. package/dist/extraction/languages/swift.d.ts +3 -0
  95. package/dist/extraction/languages/swift.d.ts.map +1 -0
  96. package/dist/extraction/languages/swift.js +91 -0
  97. package/dist/extraction/languages/swift.js.map +1 -0
  98. package/dist/extraction/languages/typescript.d.ts +3 -0
  99. package/dist/extraction/languages/typescript.d.ts.map +1 -0
  100. package/dist/extraction/languages/typescript.js +129 -0
  101. package/dist/extraction/languages/typescript.js.map +1 -0
  102. package/dist/extraction/liquid-extractor.d.ts +52 -0
  103. package/dist/extraction/liquid-extractor.d.ts.map +1 -0
  104. package/dist/extraction/liquid-extractor.js +313 -0
  105. package/dist/extraction/liquid-extractor.js.map +1 -0
  106. package/dist/extraction/parse-worker.d.ts +8 -0
  107. package/dist/extraction/parse-worker.d.ts.map +1 -0
  108. package/dist/extraction/parse-worker.js +57 -0
  109. package/dist/extraction/parse-worker.js.map +1 -0
  110. package/dist/extraction/svelte-extractor.d.ts +47 -0
  111. package/dist/extraction/svelte-extractor.d.ts.map +1 -0
  112. package/dist/extraction/svelte-extractor.js +230 -0
  113. package/dist/extraction/svelte-extractor.js.map +1 -0
  114. package/dist/extraction/tree-sitter-helpers.d.ts +28 -0
  115. package/dist/extraction/tree-sitter-helpers.d.ts.map +1 -0
  116. package/dist/extraction/tree-sitter-helpers.js +103 -0
  117. package/dist/extraction/tree-sitter-helpers.js.map +1 -0
  118. package/dist/extraction/tree-sitter-types.d.ts +179 -0
  119. package/dist/extraction/tree-sitter-types.d.ts.map +1 -0
  120. package/dist/extraction/tree-sitter-types.js +10 -0
  121. package/dist/extraction/tree-sitter-types.js.map +1 -0
  122. package/dist/extraction/tree-sitter.d.ts +67 -125
  123. package/dist/extraction/tree-sitter.d.ts.map +1 -1
  124. package/dist/extraction/tree-sitter.js +1052 -1860
  125. package/dist/extraction/tree-sitter.js.map +1 -1
  126. package/dist/graph/traversal.d.ts.map +1 -1
  127. package/dist/graph/traversal.js +20 -2
  128. package/dist/graph/traversal.js.map +1 -1
  129. package/dist/index.d.ts +29 -53
  130. package/dist/index.d.ts.map +1 -1
  131. package/dist/index.js +88 -117
  132. package/dist/index.js.map +1 -1
  133. package/dist/installer/claude-md-template.d.ts +1 -1
  134. package/dist/installer/claude-md-template.d.ts.map +1 -1
  135. package/dist/installer/claude-md-template.js +15 -15
  136. package/dist/installer/config-writer.d.ts +2 -13
  137. package/dist/installer/config-writer.d.ts.map +1 -1
  138. package/dist/installer/config-writer.js +4 -87
  139. package/dist/installer/config-writer.js.map +1 -1
  140. package/dist/installer/index.d.ts +3 -4
  141. package/dist/installer/index.d.ts.map +1 -1
  142. package/dist/installer/index.js +118 -127
  143. package/dist/installer/index.js.map +1 -1
  144. package/dist/mcp/index.d.ts +5 -0
  145. package/dist/mcp/index.d.ts.map +1 -1
  146. package/dist/mcp/index.js +25 -4
  147. package/dist/mcp/index.js.map +1 -1
  148. package/dist/mcp/tools.d.ts +33 -0
  149. package/dist/mcp/tools.d.ts.map +1 -1
  150. package/dist/mcp/tools.js +405 -26
  151. package/dist/mcp/tools.js.map +1 -1
  152. package/dist/mcp/transport.d.ts.map +1 -1
  153. package/dist/mcp/transport.js +0 -2
  154. package/dist/mcp/transport.js.map +1 -1
  155. package/dist/resolution/frameworks/csharp.js +29 -84
  156. package/dist/resolution/frameworks/csharp.js.map +1 -1
  157. package/dist/resolution/frameworks/express.js +44 -48
  158. package/dist/resolution/frameworks/express.js.map +1 -1
  159. package/dist/resolution/frameworks/go.js +34 -70
  160. package/dist/resolution/frameworks/go.js.map +1 -1
  161. package/dist/resolution/frameworks/java.js +29 -87
  162. package/dist/resolution/frameworks/java.js.map +1 -1
  163. package/dist/resolution/frameworks/laravel.js +6 -6
  164. package/dist/resolution/frameworks/laravel.js.map +1 -1
  165. package/dist/resolution/frameworks/python.js +33 -98
  166. package/dist/resolution/frameworks/python.js.map +1 -1
  167. package/dist/resolution/frameworks/react.js +53 -76
  168. package/dist/resolution/frameworks/react.js.map +1 -1
  169. package/dist/resolution/frameworks/ruby.js +12 -24
  170. package/dist/resolution/frameworks/ruby.js.map +1 -1
  171. package/dist/resolution/frameworks/rust.js +26 -66
  172. package/dist/resolution/frameworks/rust.js.map +1 -1
  173. package/dist/resolution/frameworks/svelte.js +11 -31
  174. package/dist/resolution/frameworks/svelte.js.map +1 -1
  175. package/dist/resolution/frameworks/swift.js +42 -160
  176. package/dist/resolution/frameworks/swift.js.map +1 -1
  177. package/dist/resolution/index.d.ts +19 -6
  178. package/dist/resolution/index.d.ts.map +1 -1
  179. package/dist/resolution/index.js +300 -144
  180. package/dist/resolution/index.js.map +1 -1
  181. package/dist/resolution/name-matcher.d.ts +5 -0
  182. package/dist/resolution/name-matcher.d.ts.map +1 -1
  183. package/dist/resolution/name-matcher.js +148 -8
  184. package/dist/resolution/name-matcher.js.map +1 -1
  185. package/dist/resolution/types.d.ts +1 -1
  186. package/dist/resolution/types.d.ts.map +1 -1
  187. package/dist/search/query-utils.d.ts +26 -1
  188. package/dist/search/query-utils.d.ts.map +1 -1
  189. package/dist/search/query-utils.js +209 -9
  190. package/dist/search/query-utils.js.map +1 -1
  191. package/dist/sync/index.d.ts +2 -4
  192. package/dist/sync/index.d.ts.map +1 -1
  193. package/dist/sync/index.js +4 -3
  194. package/dist/sync/index.js.map +1 -1
  195. package/dist/sync/watcher.d.ts +81 -0
  196. package/dist/sync/watcher.d.ts.map +1 -0
  197. package/dist/sync/watcher.js +184 -0
  198. package/dist/sync/watcher.js.map +1 -0
  199. package/dist/types.d.ts +2 -2
  200. package/dist/types.d.ts.map +1 -1
  201. package/dist/types.js +0 -1
  202. package/dist/types.js.map +1 -1
  203. package/dist/ui/shimmer-progress.d.ts +11 -0
  204. package/dist/ui/shimmer-progress.d.ts.map +1 -0
  205. package/dist/ui/shimmer-progress.js +90 -0
  206. package/dist/ui/shimmer-progress.js.map +1 -0
  207. package/dist/ui/shimmer-worker.d.ts +2 -0
  208. package/dist/ui/shimmer-worker.d.ts.map +1 -0
  209. package/dist/ui/shimmer-worker.js +112 -0
  210. package/dist/ui/shimmer-worker.js.map +1 -0
  211. package/dist/ui/types.d.ts +17 -0
  212. package/dist/ui/types.d.ts.map +1 -0
  213. package/dist/ui/types.js +3 -0
  214. package/dist/ui/types.js.map +1 -0
  215. package/dist/vectors/embedder.js +1 -1
  216. package/dist/vectors/embedder.js.map +1 -1
  217. package/dist/visualizer/server.d.ts.map +1 -1
  218. package/dist/visualizer/server.js +3 -11
  219. package/dist/visualizer/server.js.map +1 -1
  220. package/package.json +7 -12
  221. package/scripts/postinstall.js +0 -68
@@ -2,7 +2,7 @@
2
2
  /**
3
3
  * Context Builder
4
4
  *
5
- * Builds rich context for tasks by combining semantic search with graph traversal.
5
+ * Builds rich context for tasks by combining FTS search with graph traversal.
6
6
  * Outputs structured context ready to inject into Claude.
7
7
  */
8
8
  var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
@@ -42,9 +42,11 @@ Object.defineProperty(exports, "__esModule", { value: true });
42
42
  exports.formatContextAsJson = exports.formatContextAsMarkdown = exports.ContextBuilder = void 0;
43
43
  exports.createContextBuilder = createContextBuilder;
44
44
  const fs = __importStar(require("fs"));
45
+ const path = __importStar(require("path"));
45
46
  const formatter_1 = require("./formatter");
46
47
  const errors_1 = require("../errors");
47
48
  const utils_1 = require("../utils");
49
+ const query_utils_1 = require("../search/query-utils");
48
50
  /**
49
51
  * Extract likely symbol names from a natural language query
50
52
  *
@@ -82,6 +84,13 @@ function extractSymbolsFromQuery(query) {
82
84
  symbols.add(match[1]);
83
85
  }
84
86
  }
87
+ // Extract ALL_CAPS acronyms (2+ chars, e.g., REST, HTTP, LRU, API)
88
+ const acronymPattern = /\b([A-Z]{2,})\b/g;
89
+ while ((match = acronymPattern.exec(query)) !== null) {
90
+ if (match[1]) {
91
+ symbols.add(match[1]);
92
+ }
93
+ }
85
94
  // Extract dot.notation and split into parts (e.g., "app.isPackaged" -> ["app", "isPackaged"])
86
95
  const dotPattern = /\b([a-zA-Z][a-zA-Z0-9]*(?:\.[a-zA-Z][a-zA-Z0-9]*)+)\b/g;
87
96
  while ((match = dotPattern.exec(query)) !== null) {
@@ -96,13 +105,42 @@ function extractSymbolsFromQuery(query) {
96
105
  }
97
106
  }
98
107
  }
99
- // Filter out common English words that might match patterns
108
+ // Extract plain lowercase identifiers (3+ chars, not already matched)
109
+ // Catches symbol names like "undo", "redo", "history", "render", "parse"
110
+ const lowercasePattern = /\b([a-z][a-z0-9]{2,})\b/g;
111
+ while ((match = lowercasePattern.exec(query)) !== null) {
112
+ if (match[1]) {
113
+ symbols.add(match[1]);
114
+ }
115
+ }
116
+ // Filter out common English words that aren't likely symbol names
100
117
  const commonWords = new Set([
101
118
  'the', 'and', 'for', 'with', 'from', 'this', 'that', 'have', 'been',
102
119
  'will', 'would', 'could', 'should', 'does', 'done', 'make', 'made',
103
120
  'use', 'used', 'using', 'work', 'works', 'find', 'found', 'show',
104
121
  'call', 'called', 'calling', 'get', 'set', 'add', 'all', 'any',
105
- 'how', 'what', 'when', 'where', 'which', 'who', 'why'
122
+ 'how', 'what', 'when', 'where', 'which', 'who', 'why',
123
+ 'not', 'but', 'are', 'was', 'were', 'has', 'had', 'its',
124
+ 'can', 'did', 'may', 'also', 'into', 'than', 'then', 'them',
125
+ 'each', 'other', 'some', 'such', 'only', 'same', 'about',
126
+ 'after', 'before', 'between', 'through', 'during', 'without',
127
+ 'again', 'further', 'once', 'here', 'there', 'both', 'just',
128
+ 'more', 'most', 'very', 'being', 'having', 'doing',
129
+ 'system', 'need', 'needs', 'want', 'wants', 'like', 'look',
130
+ 'change', 'changes', 'changed', 'changing',
131
+ // Common English nouns/verbs that match thousands of unrelated code symbols
132
+ 'layer', 'handle', 'handles', 'handling', 'incoming', 'outgoing',
133
+ 'data', 'flow', 'flows', 'level', 'levels', 'request', 'requests',
134
+ 'response', 'responses', 'implement', 'implements', 'implementation',
135
+ 'interface', 'interfaces', 'class', 'classes', 'method', 'methods',
136
+ 'trigger', 'triggers', 'affected', 'affect', 'affects',
137
+ 'else', 'code', 'failing', 'failed', 'silently', 'decide', 'decides',
138
+ 'connect', 'connection', 'connections',
139
+ 'return', 'returns', 'returned', 'take', 'takes', 'taken',
140
+ 'send', 'sends', 'receive', 'receives', 'process', 'processes',
141
+ 'check', 'checks', 'checked', 'create', 'creates', 'created',
142
+ 'read', 'reads', 'write', 'writes', 'written',
143
+ 'start', 'starts', 'stop', 'stops', 'run', 'runs', 'running',
106
144
  ]);
107
145
  return Array.from(symbols).filter(s => !commonWords.has(s.toLowerCase()));
108
146
  }
@@ -154,12 +192,10 @@ class ContextBuilder {
154
192
  projectRoot;
155
193
  queries;
156
194
  traverser;
157
- vectorManager;
158
- constructor(projectRoot, queries, traverser, vectorManager) {
195
+ constructor(projectRoot, queries, traverser) {
159
196
  this.projectRoot = projectRoot;
160
197
  this.queries = queries;
161
198
  this.traverser = traverser;
162
- this.vectorManager = vectorManager;
163
199
  }
164
200
  /**
165
201
  * Build context for a task
@@ -254,45 +290,136 @@ class ContextBuilder {
254
290
  let exactMatches = [];
255
291
  if (symbolsFromQuery.length > 0) {
256
292
  try {
293
+ // Get more results so we can apply co-location boosting before trimming
257
294
  exactMatches = this.queries.findNodesByExactName(symbolsFromQuery, {
258
- limit: Math.ceil(opts.searchLimit * 2), // Get more since we'll merge
295
+ limit: Math.ceil(opts.searchLimit * 5),
259
296
  kinds: opts.nodeKinds && opts.nodeKinds.length > 0 ? opts.nodeKinds : undefined,
260
297
  });
298
+ // Co-location boost: when multiple extracted symbols appear in the same file,
299
+ // those results are much more likely to be what the user is looking for.
300
+ // E.g., "scrapeLoop" + "run" both in scrape/scrape.go → boost both.
301
+ if (exactMatches.length > 1) {
302
+ // Build a map of files → how many distinct symbol names matched in that file
303
+ const fileSymbolCounts = new Map();
304
+ for (const r of exactMatches) {
305
+ const names = fileSymbolCounts.get(r.node.filePath) || new Set();
306
+ names.add(r.node.name.toLowerCase());
307
+ fileSymbolCounts.set(r.node.filePath, names);
308
+ }
309
+ // Boost results in files where multiple query symbols co-occur
310
+ exactMatches = exactMatches.map(r => {
311
+ const symbolCount = fileSymbolCounts.get(r.node.filePath)?.size || 1;
312
+ return {
313
+ ...r,
314
+ score: symbolCount > 1 ? r.score + (symbolCount - 1) * 20 : r.score,
315
+ };
316
+ });
317
+ exactMatches.sort((a, b) => b.score - a.score);
318
+ }
319
+ // Trim back to reasonable size
320
+ exactMatches = exactMatches.slice(0, Math.ceil(opts.searchLimit * 2));
261
321
  (0, errors_1.logDebug)('Exact symbol matches', { count: exactMatches.length });
262
322
  }
263
323
  catch (error) {
264
324
  (0, errors_1.logDebug)('Exact symbol lookup failed', { error: String(error) });
265
325
  }
266
326
  }
267
- // Step 3: Try semantic search if vector manager is available
268
- let semanticResults = [];
269
- if (this.vectorManager && this.vectorManager.isInitialized()) {
270
- try {
271
- semanticResults = await this.vectorManager.search(query, {
272
- limit: opts.searchLimit,
273
- kinds: opts.nodeKinds && opts.nodeKinds.length > 0 ? opts.nodeKinds : undefined,
274
- });
275
- (0, errors_1.logDebug)('Semantic search results', { count: semanticResults.length });
276
- }
277
- catch (error) {
278
- (0, errors_1.logDebug)('Semantic search failed, falling back to text search', { query, error: String(error) });
327
+ // Step 2b: Search for extracted symbols as definition (class/interface) prefixes.
328
+ // When the user writes "REST", "bulk", or "allocation", they usually mean classes
329
+ // like RestController, BulkRequest, AllocationService — not nodes named exactly that.
330
+ // Also tries stem variants: "caching" → "cache" finds Cache, CacheBuilder.
331
+ if (symbolsFromQuery.length > 0) {
332
+ const definitionKinds = ['class', 'interface', 'struct', 'trait',
333
+ 'protocol', 'enum', 'type_alias'];
334
+ // Expand symbols with stem variants for broader definition matching
335
+ const expandedSymbols = new Set(symbolsFromQuery);
336
+ for (const sym of symbolsFromQuery) {
337
+ for (const variant of (0, query_utils_1.getStemVariants)(sym)) {
338
+ expandedSymbols.add(variant);
339
+ }
279
340
  }
280
- }
281
- // Step 4: Fall back to text search if no semantic results
282
- if (semanticResults.length === 0 && exactMatches.length === 0) {
283
- try {
284
- const textResults = this.queries.searchNodes(query, {
285
- limit: opts.searchLimit,
286
- kinds: opts.nodeKinds && opts.nodeKinds.length > 0 ? opts.nodeKinds : undefined,
341
+ for (const sym of expandedSymbols) {
342
+ // Title-case the symbol: "REST" "Rest", "bulk" "Bulk", "allocation" "Allocation"
343
+ const titleCased = sym.charAt(0).toUpperCase() + sym.slice(1).toLowerCase();
344
+ if (titleCased === sym)
345
+ continue; // already title-case (e.g., "Engine") — handled by exact match
346
+ // Fetch more results since popular prefixes have many matches
347
+ const prefixResults = this.queries.searchNodes(titleCased, {
348
+ limit: 30,
349
+ kinds: definitionKinds,
287
350
  });
288
- semanticResults = textResults;
351
+ const matched = [];
352
+ for (const r of prefixResults) {
353
+ if (r.node.name.toLowerCase().startsWith(titleCased.toLowerCase())) {
354
+ // Favor shorter names: "AllocationService" (18 chars) over
355
+ // "AllocationBalancingRoundMetrics" (31 chars). Core classes tend
356
+ // to have concise names; test/helper classes are verbose.
357
+ const brevityBonus = Math.max(0, 10 - (r.node.name.length - titleCased.length) / 3);
358
+ matched.push({ ...r, score: r.score + 15 + brevityBonus });
359
+ }
360
+ }
361
+ matched.sort((a, b) => b.score - a.score);
362
+ for (const r of matched.slice(0, Math.ceil(opts.searchLimit))) {
363
+ const existing = exactMatches.find(e => e.node.id === r.node.id);
364
+ if (!existing) {
365
+ exactMatches.push(r);
366
+ }
367
+ }
289
368
  }
290
- catch (error) {
291
- (0, errors_1.logWarn)('Text search failed', { query, error: String(error) });
292
- // Return empty results
369
+ exactMatches.sort((a, b) => b.score - a.score);
370
+ exactMatches = exactMatches.slice(0, Math.ceil(opts.searchLimit * 3));
371
+ }
372
+ // Step 3: Run text search for natural language term matching
373
+ // This catches file-name and node-name matches that semantic search may miss,
374
+ // which is critical for template-heavy codebases (e.g., Liquid/Shopify themes)
375
+ // where file names are the primary identifiers.
376
+ let textResults = [];
377
+ try {
378
+ const searchTerms = (0, query_utils_1.extractSearchTerms)(query);
379
+ if (searchTerms.length > 0) {
380
+ // Search each term individually to get broader coverage,
381
+ // then boost results that match multiple terms
382
+ const termResultsMap = new Map();
383
+ // When no explicit kind filter is set, exclude imports — they flood FTS
384
+ // results with qualified name matches (e.g., "REST" matches 445K import paths)
385
+ // but are almost never what exploration queries want.
386
+ const searchKinds = opts.nodeKinds && opts.nodeKinds.length > 0
387
+ ? opts.nodeKinds
388
+ : ['file', 'module', 'class', 'struct', 'interface', 'trait', 'protocol',
389
+ 'function', 'method', 'property', 'field', 'variable', 'constant',
390
+ 'enum', 'enum_member', 'type_alias', 'namespace', 'export',
391
+ 'route', 'component'];
392
+ for (const term of searchTerms) {
393
+ const termResults = this.queries.searchNodes(term, {
394
+ limit: opts.searchLimit * 2,
395
+ kinds: searchKinds,
396
+ });
397
+ for (const r of termResults) {
398
+ const existing = termResultsMap.get(r.node.id);
399
+ if (existing) {
400
+ existing.termHits++;
401
+ existing.result.score = Math.max(existing.result.score, r.score);
402
+ }
403
+ else {
404
+ termResultsMap.set(r.node.id, { result: r, termHits: 1 });
405
+ }
406
+ }
407
+ }
408
+ // Boost results matching multiple terms and sort
409
+ textResults = Array.from(termResultsMap.values())
410
+ .map(({ result, termHits }) => ({
411
+ ...result,
412
+ score: result.score + (termHits - 1) * 5,
413
+ }))
414
+ .sort((a, b) => b.score - a.score)
415
+ .slice(0, opts.searchLimit * 2);
293
416
  }
417
+ (0, errors_1.logDebug)('Text search results', { count: textResults.length });
418
+ }
419
+ catch (error) {
420
+ (0, errors_1.logDebug)('Text search failed', { query, error: String(error) });
294
421
  }
295
- // Step 5: Merge results, prioritizing exact matches
422
+ // Step 4: Merge results, prioritizing exact matches, then text (path-boosted)
296
423
  const seenIds = new Set();
297
424
  let searchResults = [];
298
425
  // Add exact matches first (highest priority)
@@ -302,15 +429,217 @@ class ContextBuilder {
302
429
  searchResults.push(result);
303
430
  }
304
431
  }
305
- // Add semantic/text results
306
- for (const result of semanticResults) {
432
+ // Add text search results (includes path relevance scoring from searchNodes)
433
+ for (const result of textResults) {
307
434
  if (!seenIds.has(result.node.id)) {
308
435
  seenIds.add(result.node.id);
309
436
  searchResults.push(result);
310
437
  }
311
438
  }
312
- // Limit total results
313
- searchResults = searchResults.slice(0, opts.searchLimit * 2);
439
+ const queryLower = query.toLowerCase();
440
+ const isTestQuery = queryLower.includes('test') || queryLower.includes('spec');
441
+ // Deprioritize test files early so they don't take multi-term boost slots
442
+ if (!isTestQuery) {
443
+ for (const result of searchResults) {
444
+ if ((0, query_utils_1.isTestFile)(result.node.filePath)) {
445
+ result.score *= 0.3;
446
+ }
447
+ }
448
+ }
449
+ // Step 5a: Multi-term co-occurrence re-ranking (applied BEFORE truncation).
450
+ // For multi-word queries like "search execution from request to shard",
451
+ // nodes matching 2+ query terms in their name or path are far more relevant
452
+ // than nodes matching just one generic term. Without this, "ExecutionUtils"
453
+ // (matches only "execution") fills budget slots meant for "ShardSearchRequest"
454
+ // (matches "shard" + "search" + "request").
455
+ const queryTermsForBoost = (0, query_utils_1.extractSearchTerms)(query);
456
+ if (queryTermsForBoost.length >= 2) {
457
+ // Group terms that are substrings of each other (stem variants of the same
458
+ // root word). "indexed", "indexe", "index" should count as ONE concept match,
459
+ // not three. Without this, stem variants inflate matchCount and give false
460
+ // multi-term boosts to symbols matching one root word multiple times.
461
+ const termGroups = [];
462
+ const sorted = [...queryTermsForBoost].sort((a, b) => b.length - a.length);
463
+ const assigned = new Set();
464
+ for (const term of sorted) {
465
+ if (assigned.has(term))
466
+ continue;
467
+ const group = [term];
468
+ assigned.add(term);
469
+ for (const other of sorted) {
470
+ if (assigned.has(other))
471
+ continue;
472
+ if (term.includes(other) || other.includes(term)) {
473
+ group.push(other);
474
+ assigned.add(other);
475
+ }
476
+ }
477
+ termGroups.push(group);
478
+ }
479
+ for (const result of searchResults) {
480
+ // Check term matches in name (substring) and path DIRECTORIES (exact).
481
+ // Directory segments must match exactly — "search" matches directory
482
+ // "search/" but NOT "elasticsearch/". The class name is checked
483
+ // separately via substring match on the node name.
484
+ const nameLower = result.node.name.toLowerCase();
485
+ const dirSegments = path.dirname(result.node.filePath).toLowerCase().split('/');
486
+ let matchCount = 0;
487
+ for (const group of termGroups) {
488
+ const groupMatches = group.some(term => {
489
+ const inName = nameLower.includes(term);
490
+ const inDir = dirSegments.some(seg => seg === term);
491
+ return inName || inDir;
492
+ });
493
+ if (groupMatches)
494
+ matchCount++;
495
+ }
496
+ if (matchCount >= 2) {
497
+ // Multiplicative boost — 2 terms → 2x, 3 terms → 2.5x
498
+ result.score *= 1 + matchCount * 0.5;
499
+ }
500
+ else {
501
+ // Dampen single-term matches — they matched a generic word
502
+ // (e.g., "Execution" or "Shard" alone) not the compound concept
503
+ result.score *= 0.3;
504
+ }
505
+ }
506
+ searchResults.sort((a, b) => b.score - a.score);
507
+ }
508
+ // Step 5b: CamelCase-boundary matching via LIKE query.
509
+ // FTS can't find "Search" inside "TransportSearchAction" (one FTS token).
510
+ // LIKE reliably finds these substring matches. Results are appended with
511
+ // guaranteed slots so they don't compete with higher-scoring prefix matches.
512
+ if (symbolsFromQuery.length > 0) {
513
+ const camelDefinitionKinds = ['class', 'interface', 'struct', 'trait',
514
+ 'protocol', 'enum', 'type_alias'];
515
+ const camelSearchedTerms = new Set();
516
+ const searchIdSet = new Set(searchResults.map(r => r.node.id));
517
+ // Track per-node term hits for multi-term boosting
518
+ const camelNodeTerms = new Map();
519
+ const maxCamelPerTerm = Math.ceil(opts.searchLimit / 2);
520
+ for (const sym of symbolsFromQuery) {
521
+ const titleCased = sym.charAt(0).toUpperCase() + sym.slice(1).toLowerCase();
522
+ if (titleCased.length < 3)
523
+ continue;
524
+ const termKey = titleCased.toLowerCase();
525
+ if (camelSearchedTerms.has(termKey))
526
+ continue;
527
+ camelSearchedTerms.add(termKey);
528
+ // Fetch a large batch — popular terms like "Search" in Elasticsearch
529
+ // have hundreds of substring matches. The LIKE scan cost is the same
530
+ // regardless of LIMIT (SQLite scans all matches to sort), so we fetch
531
+ // generously and let path-relevance scoring pick the best ones.
532
+ const likeResults = this.queries.findNodesByNameSubstring(titleCased, {
533
+ limit: 200,
534
+ kinds: camelDefinitionKinds,
535
+ excludePrefix: true,
536
+ });
537
+ // Filter to CamelCase boundaries, score by path relevance, and take top N
538
+ const termCandidates = [];
539
+ for (const r of likeResults) {
540
+ const name = r.node.name;
541
+ const idx = name.indexOf(titleCased);
542
+ if (idx <= 0)
543
+ continue;
544
+ if (!/[a-z]/.test(name.charAt(idx - 1)))
545
+ continue;
546
+ if (searchIdSet.has(r.node.id))
547
+ continue;
548
+ if ((0, query_utils_1.isTestFile)(r.node.filePath) && !isTestQuery)
549
+ continue;
550
+ const pathScore = (0, query_utils_1.scorePathRelevance)(r.node.filePath, query);
551
+ const brevityBonus = Math.max(0, 6 - (name.length - titleCased.length) / 4);
552
+ termCandidates.push({ node: r.node, score: 8 + brevityBonus + pathScore });
553
+ }
554
+ termCandidates.sort((a, b) => b.score - a.score);
555
+ // Widen the per-term pool for accumulation so multi-term co-occurrences
556
+ // can be discovered. A class matching 3 query terms at CamelCase boundaries
557
+ // is far more relevant than one matching just 1, but it needs to survive
558
+ // the per-term cut for EACH term to accumulate its count.
559
+ const accumPerTerm = maxCamelPerTerm * 4;
560
+ for (const r of termCandidates.slice(0, accumPerTerm)) {
561
+ const existing = camelNodeTerms.get(r.node.id);
562
+ if (existing) {
563
+ existing.termCount++;
564
+ }
565
+ else {
566
+ camelNodeTerms.set(r.node.id, {
567
+ result: r,
568
+ termCount: 1,
569
+ });
570
+ }
571
+ }
572
+ }
573
+ // Append CamelCase matches with multi-term boost (guaranteed slots)
574
+ const camelResults = [];
575
+ for (const [, info] of camelNodeTerms) {
576
+ info.result.score += (info.termCount - 1) * 15;
577
+ camelResults.push(info.result);
578
+ }
579
+ camelResults.sort((a, b) => b.score - a.score);
580
+ const maxCamelTotal = Math.ceil(opts.searchLimit / 2);
581
+ for (const r of camelResults.slice(0, maxCamelTotal)) {
582
+ searchResults.push(r);
583
+ searchIdSet.add(r.node.id);
584
+ }
585
+ // Step 5c: Compound term matching — find classes whose name contains 2+
586
+ // query terms at ANY position (not just CamelCase boundaries).
587
+ // The CamelCase step above requires idx > 0, which misses classes that
588
+ // START with a query term (e.g., "SearchShardsRequest" starts with "Search").
589
+ // For multi-word queries, a class matching multiple query terms in its name
590
+ // is almost certainly relevant regardless of position.
591
+ if (symbolsFromQuery.length >= 2) {
592
+ // Collect ALL LIKE results per term (reusing findNodesByNameSubstring)
593
+ // but without the CamelCase boundary or prefix exclusion filters.
594
+ const compoundTermMap = new Map();
595
+ for (const sym of symbolsFromQuery) {
596
+ const titleCased = sym.charAt(0).toUpperCase() + sym.slice(1).toLowerCase();
597
+ if (titleCased.length < 3)
598
+ continue;
599
+ const likeResults = this.queries.findNodesByNameSubstring(titleCased, {
600
+ limit: 200,
601
+ kinds: camelDefinitionKinds,
602
+ excludePrefix: false,
603
+ });
604
+ for (const r of likeResults) {
605
+ if (searchIdSet.has(r.node.id))
606
+ continue;
607
+ if ((0, query_utils_1.isTestFile)(r.node.filePath) && !isTestQuery)
608
+ continue;
609
+ const entry = compoundTermMap.get(r.node.id);
610
+ if (entry) {
611
+ entry.terms.add(titleCased);
612
+ }
613
+ else {
614
+ compoundTermMap.set(r.node.id, { node: r.node, terms: new Set([titleCased]) });
615
+ }
616
+ }
617
+ }
618
+ // Keep only nodes matching 2+ distinct terms
619
+ const compoundResults = [];
620
+ for (const [, entry] of compoundTermMap) {
621
+ if (entry.terms.size >= 2) {
622
+ const pathScore = (0, query_utils_1.scorePathRelevance)(entry.node.filePath, query);
623
+ const brevityBonus = Math.max(0, 6 - entry.node.name.length / 8);
624
+ compoundResults.push({
625
+ node: entry.node,
626
+ score: 10 + (entry.terms.size - 1) * 20 + pathScore + brevityBonus,
627
+ });
628
+ }
629
+ }
630
+ compoundResults.sort((a, b) => b.score - a.score);
631
+ const maxCompound = Math.ceil(opts.searchLimit / 2);
632
+ for (const r of compoundResults.slice(0, maxCompound)) {
633
+ searchResults.push(r);
634
+ searchIdSet.add(r.node.id);
635
+ }
636
+ }
637
+ }
638
+ // Final sort and truncation — all search channels (exact, text, CamelCase,
639
+ // compound) have now contributed. Sort by score so multi-term matches from
640
+ // later steps can outrank dampened single-term matches from earlier steps.
641
+ searchResults.sort((a, b) => b.score - a.score);
642
+ searchResults = searchResults.slice(0, opts.searchLimit * 3);
314
643
  // Filter by minimum score
315
644
  let filteredResults = searchResults.filter((r) => r.score >= opts.minScore);
316
645
  // Resolve imports/exports to their actual definitions
@@ -322,6 +651,57 @@ class ContextBuilder {
322
651
  nodes.set(result.node.id, result.node);
323
652
  roots.push(result.node.id);
324
653
  }
654
+ // Expand type hierarchy for class/interface entry points.
655
+ // BFS often exhausts its per-entry-point budget on contained methods
656
+ // before reaching extends/implements neighbors. This dedicated step
657
+ // ensures subclasses and superclasses always appear in results.
658
+ // Budget: up to maxNodes/4 hierarchy nodes to avoid flooding.
659
+ const typeHierarchyKinds = new Set(['class', 'interface', 'struct', 'trait', 'protocol']);
660
+ const maxHierarchyNodes = Math.ceil(opts.maxNodes / 4);
661
+ let hierarchyNodesAdded = 0;
662
+ for (const result of filteredResults) {
663
+ if (hierarchyNodesAdded >= maxHierarchyNodes)
664
+ break;
665
+ if (typeHierarchyKinds.has(result.node.kind)) {
666
+ const hierarchy = this.traverser.getTypeHierarchy(result.node.id);
667
+ for (const [id, node] of hierarchy.nodes) {
668
+ if (!nodes.has(id)) {
669
+ nodes.set(id, node);
670
+ hierarchyNodesAdded++;
671
+ }
672
+ }
673
+ for (const edge of hierarchy.edges) {
674
+ const exists = edges.some((e) => e.source === edge.source && e.target === edge.target && e.kind === edge.kind);
675
+ if (!exists) {
676
+ edges.push(edge);
677
+ }
678
+ }
679
+ }
680
+ }
681
+ // Pass 2: expand hierarchy of newly-discovered parent types to find siblings.
682
+ // E.g., InternalEngine → Engine (parent, from pass 1) → ReadOnlyEngine (sibling).
683
+ if (hierarchyNodesAdded > 0) {
684
+ const pass2Candidates = [...nodes.values()].filter(n => typeHierarchyKinds.has(n.kind) && !roots.includes(n.id));
685
+ for (const candidate of pass2Candidates) {
686
+ if (hierarchyNodesAdded >= maxHierarchyNodes)
687
+ break;
688
+ const siblingHierarchy = this.traverser.getTypeHierarchy(candidate.id);
689
+ for (const [id, node] of siblingHierarchy.nodes) {
690
+ if (!nodes.has(id) && hierarchyNodesAdded < maxHierarchyNodes) {
691
+ nodes.set(id, node);
692
+ hierarchyNodesAdded++;
693
+ }
694
+ }
695
+ for (const edge of siblingHierarchy.edges) {
696
+ if (nodes.has(edge.source) && nodes.has(edge.target)) {
697
+ const exists = edges.some((e) => e.source === edge.source && e.target === edge.target && e.kind === edge.kind);
698
+ if (!exists) {
699
+ edges.push(edge);
700
+ }
701
+ }
702
+ }
703
+ }
704
+ }
325
705
  // Traverse from each entry point
326
706
  for (const result of filteredResults) {
327
707
  const traversalResult = this.traverser.traverseBFS(result.node.id, {
@@ -346,6 +726,8 @@ class ContextBuilder {
346
726
  }
347
727
  }
348
728
  // Trim to max nodes if needed
729
+ let finalNodes = nodes;
730
+ let finalEdges = edges;
349
731
  if (nodes.size > opts.maxNodes) {
350
732
  // Prioritize entry points and their direct neighbors
351
733
  const priorityIds = new Set(roots);
@@ -358,26 +740,95 @@ class ContextBuilder {
358
740
  }
359
741
  }
360
742
  // Keep priority nodes, then fill remaining slots
361
- const trimmedNodes = new Map();
743
+ finalNodes = new Map();
362
744
  for (const id of priorityIds) {
363
745
  const node = nodes.get(id);
364
- if (node && trimmedNodes.size < opts.maxNodes) {
365
- trimmedNodes.set(id, node);
746
+ if (node && finalNodes.size < opts.maxNodes) {
747
+ finalNodes.set(id, node);
366
748
  }
367
749
  }
368
750
  // Fill remaining from other nodes
369
751
  for (const [id, node] of nodes) {
370
- if (trimmedNodes.size >= opts.maxNodes)
752
+ if (finalNodes.size >= opts.maxNodes)
371
753
  break;
372
- if (!trimmedNodes.has(id)) {
373
- trimmedNodes.set(id, node);
754
+ if (!finalNodes.has(id)) {
755
+ finalNodes.set(id, node);
374
756
  }
375
757
  }
376
758
  // Filter edges to only include kept nodes
377
- const trimmedEdges = edges.filter((e) => trimmedNodes.has(e.source) && trimmedNodes.has(e.target));
378
- return { nodes: trimmedNodes, edges: trimmedEdges, roots };
759
+ finalEdges = edges.filter((e) => finalNodes.has(e.source) && finalNodes.has(e.target));
760
+ }
761
+ // Per-file diversity cap: prevent any single file from monopolizing the
762
+ // node budget. When BFS traverses from a method, it follows `contains`
763
+ // to the parent class, then back down to all sibling methods. With
764
+ // multiple entry points in the same class, one file can consume 30-40%
765
+ // of maxNodes. Cap each file to ~20% to ensure cross-file diversity.
766
+ const maxPerFile = Math.max(5, Math.ceil(opts.maxNodes * 0.2));
767
+ const fileCounts = new Map();
768
+ for (const [id, node] of finalNodes) {
769
+ const ids = fileCounts.get(node.filePath) || [];
770
+ ids.push(id);
771
+ fileCounts.set(node.filePath, ids);
772
+ }
773
+ const rootSet = new Set(roots);
774
+ for (const [, nodeIds] of fileCounts) {
775
+ if (nodeIds.length <= maxPerFile)
776
+ continue;
777
+ // Sort: entry points first, then classes/interfaces, then others
778
+ const kindPriority = {
779
+ class: 3, interface: 3, struct: 3, trait: 3, protocol: 3, enum: 3,
780
+ method: 1, function: 1, property: 0, field: 0, variable: 0,
781
+ };
782
+ nodeIds.sort((a, b) => {
783
+ const aRoot = rootSet.has(a) ? 10 : 0;
784
+ const bRoot = rootSet.has(b) ? 10 : 0;
785
+ const aKind = kindPriority[finalNodes.get(a).kind] ?? 0;
786
+ const bKind = kindPriority[finalNodes.get(b).kind] ?? 0;
787
+ return (bRoot + bKind) - (aRoot + aKind);
788
+ });
789
+ // Remove excess nodes (keep the highest-priority ones)
790
+ for (const id of nodeIds.slice(maxPerFile)) {
791
+ finalNodes.delete(id);
792
+ }
793
+ }
794
+ // Non-production node cap: limit test/sample/integration/example files to
795
+ // at most 15% of the budget. Many codebases have dozens of near-identical
796
+ // test implementations (e.g., 6 Guard classes in integration tests) that
797
+ // individually survive score dampening but collectively flood the result.
798
+ // Test entry points are NOT exempt — they should be evicted too.
799
+ if (!isTestQuery) {
800
+ const maxNonProd = Math.max(3, Math.ceil(opts.maxNodes * 0.15));
801
+ const nonProdIds = [];
802
+ for (const [id, node] of finalNodes) {
803
+ if ((0, query_utils_1.isTestFile)(node.filePath)) {
804
+ nonProdIds.push(id);
805
+ }
806
+ }
807
+ if (nonProdIds.length > maxNonProd) {
808
+ for (const id of nonProdIds.slice(maxNonProd)) {
809
+ finalNodes.delete(id);
810
+ // Also remove from roots — test file entry points shouldn't anchor results
811
+ const rootIdx = roots.indexOf(id);
812
+ if (rootIdx !== -1)
813
+ roots.splice(rootIdx, 1);
814
+ }
815
+ }
816
+ }
817
+ // Re-filter edges after per-file and non-production caps
818
+ finalEdges = finalEdges.filter((e) => finalNodes.has(e.source) && finalNodes.has(e.target));
819
+ // Edge recovery: BFS with many entry points leaves most nodes disconnected.
820
+ // Discover edges between already-selected nodes to recover connectivity.
821
+ const recoveryKinds = ['calls', 'extends', 'implements', 'references', 'overrides'];
822
+ const recoveredEdges = this.queries.findEdgesBetweenNodes([...finalNodes.keys()], recoveryKinds);
823
+ const existingEdgeKeys = new Set(finalEdges.map((e) => `${e.source}:${e.target}:${e.kind}`));
824
+ for (const edge of recoveredEdges) {
825
+ const key = `${edge.source}:${edge.target}:${edge.kind}`;
826
+ if (!existingEdgeKeys.has(key)) {
827
+ finalEdges.push(edge);
828
+ existingEdgeKeys.add(key);
829
+ }
379
830
  }
380
- return { nodes, edges, roots };
831
+ return { nodes: finalNodes, edges: finalEdges, roots };
381
832
  }
382
833
  /**
383
834
  * Get the source code for a node
@@ -559,8 +1010,8 @@ exports.ContextBuilder = ContextBuilder;
559
1010
  /**
560
1011
  * Create a context builder
561
1012
  */
562
- function createContextBuilder(projectRoot, queries, traverser, vectorManager) {
563
- return new ContextBuilder(projectRoot, queries, traverser, vectorManager);
1013
+ function createContextBuilder(projectRoot, queries, traverser) {
1014
+ return new ContextBuilder(projectRoot, queries, traverser);
564
1015
  }
565
1016
  // Re-export formatter
566
1017
  var formatter_2 = require("./formatter");