purecontext-mcp 1.2.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/AGENT_INSTRUCTIONS.md +110 -784
  2. package/AGENT_REFERENCE.md +561 -0
  3. package/CHANGELOG.md +177 -6
  4. package/FRAMEWORK-ADAPTERS.md +351 -0
  5. package/LANGUAGE-SUPPORT.md +144 -0
  6. package/README.md +92 -12
  7. package/USER-GUIDE.md +8 -0
  8. package/dist/cli/hooks.d.ts +28 -0
  9. package/dist/cli/hooks.d.ts.map +1 -0
  10. package/dist/cli/hooks.js +570 -0
  11. package/dist/cli/hooks.js.map +1 -0
  12. package/dist/cli/install-detect.d.ts +16 -0
  13. package/dist/cli/install-detect.d.ts.map +1 -0
  14. package/dist/cli/install-detect.js +70 -0
  15. package/dist/cli/install-detect.js.map +1 -0
  16. package/dist/cli/install-writers.d.ts +59 -0
  17. package/dist/cli/install-writers.d.ts.map +1 -0
  18. package/dist/cli/install-writers.js +292 -0
  19. package/dist/cli/install-writers.js.map +1 -0
  20. package/dist/cli/install.d.ts +14 -0
  21. package/dist/cli/install.d.ts.map +1 -0
  22. package/dist/cli/install.js +150 -0
  23. package/dist/cli/install.js.map +1 -0
  24. package/dist/config/config-loader.js +3 -0
  25. package/dist/config/config-loader.js.map +1 -1
  26. package/dist/config/config-schema.d.ts +11 -0
  27. package/dist/config/config-schema.d.ts.map +1 -1
  28. package/dist/config/config-schema.js +15 -0
  29. package/dist/config/config-schema.js.map +1 -1
  30. package/dist/core/db/symbol-store.d.ts +1 -0
  31. package/dist/core/db/symbol-store.d.ts.map +1 -1
  32. package/dist/core/db/symbol-store.js +120 -6
  33. package/dist/core/db/symbol-store.js.map +1 -1
  34. package/dist/core/file-discovery.d.ts +6 -0
  35. package/dist/core/file-discovery.d.ts.map +1 -1
  36. package/dist/core/file-discovery.js +20 -13
  37. package/dist/core/file-discovery.js.map +1 -1
  38. package/dist/core/file-processor.d.ts.map +1 -1
  39. package/dist/core/file-processor.js +26 -1
  40. package/dist/core/file-processor.js.map +1 -1
  41. package/dist/core/git-log-reader.d.ts.map +1 -1
  42. package/dist/core/git-log-reader.js +21 -0
  43. package/dist/core/git-log-reader.js.map +1 -1
  44. package/dist/core/index-manager.d.ts.map +1 -1
  45. package/dist/core/index-manager.js +21 -7
  46. package/dist/core/index-manager.js.map +1 -1
  47. package/dist/core/indexing-worker.d.ts.map +1 -1
  48. package/dist/core/indexing-worker.js +14 -0
  49. package/dist/core/indexing-worker.js.map +1 -1
  50. package/dist/core/parse-dispatcher.d.ts.map +1 -1
  51. package/dist/core/parse-dispatcher.js +20 -5
  52. package/dist/core/parse-dispatcher.js.map +1 -1
  53. package/dist/core/search/query-preprocessor.d.ts +69 -3
  54. package/dist/core/search/query-preprocessor.d.ts.map +1 -1
  55. package/dist/core/search/query-preprocessor.js +450 -17
  56. package/dist/core/search/query-preprocessor.js.map +1 -1
  57. package/dist/core/search/relevance-ranker.d.ts +60 -5
  58. package/dist/core/search/relevance-ranker.d.ts.map +1 -1
  59. package/dist/core/search/relevance-ranker.js +931 -33
  60. package/dist/core/search/relevance-ranker.js.map +1 -1
  61. package/dist/core/test-mapper.d.ts.map +1 -1
  62. package/dist/core/test-mapper.js +7 -1
  63. package/dist/core/test-mapper.js.map +1 -1
  64. package/dist/core/types.d.ts +28 -1
  65. package/dist/core/types.d.ts.map +1 -1
  66. package/dist/handlers/angular-html.d.ts +3 -0
  67. package/dist/handlers/angular-html.d.ts.map +1 -0
  68. package/dist/handlers/angular-html.js +215 -0
  69. package/dist/handlers/angular-html.js.map +1 -0
  70. package/dist/handlers/c.d.ts.map +1 -1
  71. package/dist/handlers/c.js +19 -0
  72. package/dist/handlers/c.js.map +1 -1
  73. package/dist/handlers/cpp-macro-registry.d.ts +21 -0
  74. package/dist/handlers/cpp-macro-registry.d.ts.map +1 -0
  75. package/dist/handlers/cpp-macro-registry.js +44 -0
  76. package/dist/handlers/cpp-macro-registry.js.map +1 -0
  77. package/dist/handlers/cpp.d.ts.map +1 -1
  78. package/dist/handlers/cpp.js +579 -10
  79. package/dist/handlers/cpp.js.map +1 -1
  80. package/dist/handlers/csharp.d.ts.map +1 -1
  81. package/dist/handlers/csharp.js +39 -2
  82. package/dist/handlers/csharp.js.map +1 -1
  83. package/dist/handlers/css.d.ts +3 -0
  84. package/dist/handlers/css.d.ts.map +1 -0
  85. package/dist/handlers/css.js +154 -0
  86. package/dist/handlers/css.js.map +1 -0
  87. package/dist/handlers/erlang.d.ts.map +1 -1
  88. package/dist/handlers/erlang.js +8 -1
  89. package/dist/handlers/erlang.js.map +1 -1
  90. package/dist/handlers/fortran.js +1 -1
  91. package/dist/handlers/fortran.js.map +1 -1
  92. package/dist/handlers/go.d.ts.map +1 -1
  93. package/dist/handlers/go.js +87 -2
  94. package/dist/handlers/go.js.map +1 -1
  95. package/dist/handlers/handler-registry.d.ts.map +1 -1
  96. package/dist/handlers/handler-registry.js +4 -0
  97. package/dist/handlers/handler-registry.js.map +1 -1
  98. package/dist/handlers/hcl.d.ts +3 -0
  99. package/dist/handlers/hcl.d.ts.map +1 -0
  100. package/dist/handlers/hcl.js +193 -0
  101. package/dist/handlers/hcl.js.map +1 -0
  102. package/dist/handlers/java.d.ts.map +1 -1
  103. package/dist/handlers/java.js +33 -16
  104. package/dist/handlers/java.js.map +1 -1
  105. package/dist/handlers/kotlin.d.ts.map +1 -1
  106. package/dist/handlers/kotlin.js +48 -3
  107. package/dist/handlers/kotlin.js.map +1 -1
  108. package/dist/handlers/less.d.ts +3 -0
  109. package/dist/handlers/less.d.ts.map +1 -0
  110. package/dist/handlers/less.js +255 -0
  111. package/dist/handlers/less.js.map +1 -0
  112. package/dist/handlers/objective-c.d.ts.map +1 -1
  113. package/dist/handlers/objective-c.js +122 -64
  114. package/dist/handlers/objective-c.js.map +1 -1
  115. package/dist/handlers/openapi.d.ts.map +1 -1
  116. package/dist/handlers/openapi.js +30 -5
  117. package/dist/handlers/openapi.js.map +1 -1
  118. package/dist/handlers/php.d.ts.map +1 -1
  119. package/dist/handlers/php.js +287 -41
  120. package/dist/handlers/php.js.map +1 -1
  121. package/dist/handlers/protobuf.d.ts.map +1 -1
  122. package/dist/handlers/protobuf.js +1 -0
  123. package/dist/handlers/protobuf.js.map +1 -1
  124. package/dist/handlers/python.d.ts.map +1 -1
  125. package/dist/handlers/python.js +1 -3
  126. package/dist/handlers/python.js.map +1 -1
  127. package/dist/handlers/ruby-dsl.d.ts +23 -0
  128. package/dist/handlers/ruby-dsl.d.ts.map +1 -0
  129. package/dist/handlers/ruby-dsl.js +251 -0
  130. package/dist/handlers/ruby-dsl.js.map +1 -0
  131. package/dist/handlers/ruby.d.ts.map +1 -1
  132. package/dist/handlers/ruby.js +29 -4
  133. package/dist/handlers/ruby.js.map +1 -1
  134. package/dist/handlers/rust.d.ts.map +1 -1
  135. package/dist/handlers/rust.js +98 -2
  136. package/dist/handlers/rust.js.map +1 -1
  137. package/dist/handlers/scss.d.ts +3 -0
  138. package/dist/handlers/scss.d.ts.map +1 -0
  139. package/dist/handlers/scss.js +290 -0
  140. package/dist/handlers/scss.js.map +1 -0
  141. package/dist/handlers/sql.d.ts.map +1 -1
  142. package/dist/handlers/sql.js +37 -18
  143. package/dist/handlers/sql.js.map +1 -1
  144. package/dist/handlers/typescript.d.ts.map +1 -1
  145. package/dist/handlers/typescript.js +65 -17
  146. package/dist/handlers/typescript.js.map +1 -1
  147. package/dist/handlers/xml.d.ts.map +1 -1
  148. package/dist/handlers/xml.js +35 -2
  149. package/dist/handlers/xml.js.map +1 -1
  150. package/dist/index.d.ts.map +1 -1
  151. package/dist/index.js +91 -0
  152. package/dist/index.js.map +1 -1
  153. package/dist/server/mcp-server.d.ts.map +1 -1
  154. package/dist/server/mcp-server.js +10 -0
  155. package/dist/server/mcp-server.js.map +1 -1
  156. package/dist/server/tools/detect-antipatterns.d.ts +1 -1
  157. package/dist/server/tools/get-architecture-snapshot.d.ts +1 -1
  158. package/dist/server/tools/get-entry-points.d.ts +1 -1
  159. package/dist/server/tools/get-lexical-scope-matches.d.ts +54 -0
  160. package/dist/server/tools/get-lexical-scope-matches.d.ts.map +1 -0
  161. package/dist/server/tools/get-lexical-scope-matches.js +470 -0
  162. package/dist/server/tools/get-lexical-scope-matches.js.map +1 -0
  163. package/dist/server/tools/search-symbols.d.ts +10 -0
  164. package/dist/server/tools/search-symbols.d.ts.map +1 -1
  165. package/dist/server/tools/search-symbols.js +353 -8
  166. package/dist/server/tools/search-symbols.js.map +1 -1
  167. package/dist/server/tools/trace-invocation-chain.d.ts +53 -0
  168. package/dist/server/tools/trace-invocation-chain.d.ts.map +1 -0
  169. package/dist/server/tools/trace-invocation-chain.js +280 -0
  170. package/dist/server/tools/trace-invocation-chain.js.map +1 -0
  171. package/dist/version.d.ts +1 -1
  172. package/dist/version.js +1 -1
  173. package/docs/02-installation.md +89 -17
  174. package/docs/05-cli-reference.md +89 -0
  175. package/docs/dev/benchmark-findings-eu-za-tebe.md +210 -0
  176. package/docs/dev/phase-35-coverage-audit.md +469 -0
  177. package/package.json +4 -1
@@ -7,17 +7,371 @@
7
7
  * scheme that promotes exact name matches to the top.
8
8
  *
9
9
  * Scoring (additive, higher = more relevant):
10
- * 100 — exact name match (case-insensitive)
10
+ * 100 — exact name match (case-insensitive, entire query = entire name)
11
11
  * 60 — name starts with query
12
12
  * 40 — name contains query as substring
13
- * 30all query words appear in name
14
- * 20any query word is exact name match
15
- * 10 — any query word appears in name (per word)
13
+ * 60identity-exact: any single query word exactly equals the symbol name (10–40 for data kinds, scaled by 40/queryWords.length); 2× when matched word appears twice in raw query
14
+ * 30all query words match a word-boundary name part (exact or stem)
15
+ * 20 — any query word matches a word-boundary name part (exact or stem)
16
+ * 15 — method verb bonus: first part of method name matches a query word
17
+ * 30 — kindBoost: method on a *Service class
18
+ * 20 — kindBoost: Rust trait (interface kind) when ALL trait name parts match query words
19
+ * 15 — kindBoost: method on a *Repository / *Manager / *Store / *_model / *Handler / *DB / *Client / *Activity / *Fragment / *Adapter / *ViewModel / *Processor / *Indexer / *Parser / *EventSubscriber / *Listener / *FormType class
20
+ * 10 — each query word with an exact word-boundary name-part match
21
+ * 8 — each query word with a stem-only word-boundary name-part match
16
22
  * 8 — query phrase in signature
17
23
  * 2 — any query word in signature (per word)
18
24
  * 5 — query phrase in summary
19
25
  * 1 — any query word in summary (per word)
26
+ * +15 — core path boost: symbol in /core/src/main/java/ or /main/java/ (Java/Groovy repos)
27
+ * +20 — frontend path boost: symbol in /apps/dashboard/ etc. in mixed monorepos, on hook/component queries
28
+ * +20 — use/hook bonus: symbol name starts with useXxx and query has React hook vocabulary
29
+ * +5 — path proximity boost per overlapping query token (applies when >= 3 symbols share name)
30
+ * -35 — library path penalty (system/, vendor/, third_party/, node_modules/, engine/, erts/, contrib/)
31
+ * -35 — Java plugin path penalty: /plugins/ or /plugin/ in Java/Groovy repos
32
+ *
33
+ * Query word extraction:
34
+ * - Hyphenated tokens split ("front-end" → "front", "end")
35
+ * - camelCase and snake_case tokens split into components
36
+ * - English stop words removed
37
+ * - Inflectional suffixes stripped to add stem variants:
38
+ * -s (plural) "models" → "model"
39
+ * -ing (gerund) "building" → "build"
40
+ * -ed (past) "updated" → "update" and "updat"
41
+ * -tion (nominal) "pagination" → "paginat"
20
42
  */
43
+ import { isStopWord, expandVerbSynonyms } from './query-preprocessor.js';
44
+ // ─── Library path detection ───────────────────────────────────────────────────
45
+ /**
46
+ * Directory names that always indicate third-party or low-priority library code,
47
+ * regardless of where they appear in the path. Symbols under these directories
48
+ * receive a -35 score penalty so application-level symbols rank above them.
49
+ *
50
+ * Covers:
51
+ * system/ CodeIgniter framework core
52
+ * vendor/ Composer packages (PHP) / generic vendor trees
53
+ * third_party/ Generic third-party library directories
54
+ * node_modules/ npm packages
55
+ * bower_components/ Bower packages
56
+ * engine/ Flutter C++ engine (pollutes Dart widget queries)
57
+ * erts/ Erlang/OTP BEAM VM C++ (pollutes Erlang stdlib)
58
+ * contrib/ Scientific computing legacy code
59
+ */
60
+ const LIBRARY_PATH_SEGMENTS = new Set([
61
+ 'system',
62
+ 'vendor',
63
+ 'third_party',
64
+ 'node_modules',
65
+ 'bower_components',
66
+ // Phase 71 additions:
67
+ 'engine',
68
+ 'erts',
69
+ 'contrib',
70
+ ]);
71
+ /**
72
+ * Multi-segment path substrings that identify library/low-priority code.
73
+ * Checked case-insensitively against the full lowercased path.
74
+ *
75
+ * /lib/wx/ Erlang/OTP wxWidgets C++ bindings
76
+ * /blas/ BLAS numerical library wrappers
77
+ * /lapack/ LAPACK numerical library wrappers
78
+ */
79
+ const LIBRARY_PATH_SUBSTRINGS = [
80
+ '/lib/wx/',
81
+ '/blas/',
82
+ '/lapack/',
83
+ ];
84
+ /**
85
+ * Return true when the symbol's file path contains a well-known library
86
+ * directory segment or multi-segment substring, indicating third-party /
87
+ * framework code.
88
+ *
89
+ * Uses forward-slash normalisation so paths work correctly on Windows and Unix.
90
+ * Checks are case-insensitive to handle /BLAS/, /Engine/, etc.
91
+ */
92
+ export function isLibraryPath(filePath) {
93
+ const normalized = filePath.replace(/\\/g, '/').toLowerCase();
94
+ if (normalized.split('/').some((seg) => LIBRARY_PATH_SEGMENTS.has(seg)))
95
+ return true;
96
+ // Prepend '/' so that repo-relative paths like 'lib/wx/...' match '/lib/wx/'
97
+ const withLeadingSlash = '/' + normalized;
98
+ return LIBRARY_PATH_SUBSTRINGS.some((sub) => withLeadingSlash.includes(sub));
99
+ }
100
+ /**
101
+ * For methods stored with bare names (Java post-Task 258, Rust post-Task 255),
102
+ * extract the class/type name from the signature prefix.
103
+ *
104
+ * Signature formats:
105
+ * Java: "ClassName.methodName: <raw sig>"
106
+ * Rust: "TypeName::methodName: <raw sig>"
107
+ *
108
+ * Returns the lower-cased class/type name, or '' when extraction fails.
109
+ * Guards against false positives by requiring the class part to be a simple
110
+ * identifier (no dots or colons in it).
111
+ */
112
+ function classFromSignature(name, signature) {
113
+ if (!signature)
114
+ return '';
115
+ // Dot notation — Java: "ClassName.methodName: ..."
116
+ const dotPat = '.' + name + ':';
117
+ const di = signature.indexOf(dotPat);
118
+ if (di > 0) {
119
+ const candidate = signature.slice(0, di);
120
+ if (!candidate.includes('.') && !candidate.includes(':')) {
121
+ return candidate.toLowerCase();
122
+ }
123
+ }
124
+ // Double-colon notation — Rust: "TypeName::methodName ..."
125
+ const colonPat = '::' + name;
126
+ const ci = signature.indexOf(colonPat);
127
+ if (ci > 0) {
128
+ const candidate = signature.slice(0, ci);
129
+ if (!candidate.includes('.') && !candidate.includes(':')) {
130
+ return candidate.toLowerCase();
131
+ }
132
+ }
133
+ return '';
134
+ }
135
+ // ─── Core path boost helpers (Task 414) ──────────────────────────────────────
136
+ /**
137
+ * Return true when the symbol is in the canonical source directory of a
138
+ * Java/Groovy project: /core/src/main/java/, /main/java/, or the Groovy variants.
139
+ * Only applied when domain === 'java' so TypeScript repos with similar paths
140
+ * are unaffected.
141
+ */
142
+ function isCoreJavaPath(filePath) {
143
+ const p = '/' + filePath.replace(/\\/g, '/');
144
+ return (p.includes('/core/src/main/java/') ||
145
+ p.includes('/core/src/main/groovy/') ||
146
+ p.includes('/main/java/') ||
147
+ p.includes('/main/groovy/'));
148
+ }
149
+ /**
150
+ * Return true when the symbol lives in a Jenkins-style plugin tree.
151
+ * Penalised for Java/Groovy repos to lift canonical core methods above plugin
152
+ * overrides.
153
+ */
154
+ function isJavaPluginPath(filePath) {
155
+ const p = '/' + filePath.replace(/\\/g, '/').toLowerCase();
156
+ return p.includes('/plugins/') || p.includes('/plugin/');
157
+ }
158
+ // ─── Frontend path boost helpers (Task 415) ──────────────────────────────────
159
+ /**
160
+ * Return true when the symbol lives in a frontend app directory of a mixed
161
+ * monorepo (e.g. novu apps/dashboard/, cal.com apps/web/).
162
+ */
163
+ function isFrontendAppPath(filePath) {
164
+ const p = '/' + filePath.replace(/\\/g, '/');
165
+ return (p.includes('/apps/dashboard/') ||
166
+ p.includes('/apps/web/') ||
167
+ p.includes('/apps/frontend/') ||
168
+ p.includes('/apps/client/'));
169
+ }
170
+ /**
171
+ * Return true when the query contains vocabulary strongly associated with
172
+ * React hooks or frontend components.
173
+ */
174
+ function hasFrontendVocab(queryWords) {
175
+ return queryWords.some((w) => (w.startsWith('use') && w.length >= 4) ||
176
+ w === 'hook' ||
177
+ w === 'hooks' ||
178
+ w === 'component' ||
179
+ w === 'react' ||
180
+ w === 'vue' ||
181
+ w === 'svelte');
182
+ }
183
+ // ─── Path proximity boost helpers (Task 417) ─────────────────────────────────
184
+ /**
185
+ * Common path segments that are too generic to signal query relevance.
186
+ * Excluded from the path-proximity overlap calculation.
187
+ */
188
+ const COMMON_PATH_SEGMENTS = new Set([
189
+ 'src', 'lib', 'app', 'apps', 'packages', 'core', 'main',
190
+ 'index', 'test', 'tests', 'dist', 'build',
191
+ ]);
192
+ /**
193
+ * Compute a path-proximity bonus for symbols whose file-path tokens overlap
194
+ * with query words. Only applied when ≥ 3 candidates share the exact same
195
+ * lowercase name (suppresses noise for unique symbols).
196
+ *
197
+ * Returns +5 per overlapping token.
198
+ */
199
+ function computePathProximityBoost(filePath, queryWords) {
200
+ const pathTokens = filePath
201
+ .replace(/\\/g, '/')
202
+ .split(/[/\\\-_.]/)
203
+ .filter((t) => t.length >= 3 && !COMMON_PATH_SEGMENTS.has(t.toLowerCase()))
204
+ .map((t) => t.toLowerCase());
205
+ const overlap = pathTokens.filter((t) => queryWords.has(t)).length;
206
+ return overlap * 5;
207
+ }
208
+ // ─── Phase 73 boost helpers ───────────────────────────────────────────────────
209
+ const ANGULAR_LIFECYCLE_METHODS = new Set([
210
+ 'ngOnInit', 'ngOnDestroy', 'ngAfterViewInit', 'ngAfterContentInit',
211
+ 'ngOnChanges', 'ngDoCheck', 'ngAfterViewChecked', 'ngAfterContentChecked',
212
+ ]);
213
+ const LIFECYCLE_QUERY_WORDS = new Set([
214
+ 'initialization', 'initialize', 'setup', 'teardown', 'destroy', 'cleanup',
215
+ 'mount', 'unmount', 'render',
216
+ ]);
217
+ /**
218
+ * Boost Angular lifecycle methods (+10) when the query contains lifecycle
219
+ * vocabulary ("initialization", "destroy", etc.). Only fires in Angular repos.
220
+ */
221
+ function computeAngularLifecycleBoost(symbol, queryWords, isAngularRepo) {
222
+ if (!isAngularRepo)
223
+ return 0;
224
+ const methodName = symbol.name.split('.').pop() ?? symbol.name;
225
+ if (!ANGULAR_LIFECYCLE_METHODS.has(methodName))
226
+ return 0;
227
+ for (const w of queryWords)
228
+ if (LIFECYCLE_QUERY_WORDS.has(w))
229
+ return 10;
230
+ return 0;
231
+ }
232
+ const RENDERING_VERBS = new Set(['render', 'draw', 'stroke', 'paint', 'plot', 'sketch']);
233
+ /**
234
+ * Boost rendering functions (+15) that share a verb (render/draw/etc.) AND at
235
+ * least one noun with the query. Only fires in rendering-domain repos.
236
+ *
237
+ * Differentiates "renderSelectionElement" from bare "render" on a query like
238
+ * "render canvas selection element" — only the compound function has the extra
239
+ * noun "selection" in its name.
240
+ */
241
+ function computeRenderingCompoundBoost(symbol, queryWords, domain) {
242
+ if (domain !== 'rendering')
243
+ return 0;
244
+ if (symbol.kind !== 'function')
245
+ return 0;
246
+ const nameWords = splitNameParts(symbol.name);
247
+ const hasVerb = nameWords.some((w) => RENDERING_VERBS.has(w)) &&
248
+ queryWords.some((w) => RENDERING_VERBS.has(w));
249
+ if (!hasVerb)
250
+ return 0;
251
+ const querySet = new Set(queryWords);
252
+ const nameNouns = nameWords.filter((w) => !RENDERING_VERBS.has(w));
253
+ const overlap = nameNouns.filter((w) => querySet.has(w)).length;
254
+ return overlap >= 1 ? 15 : 0;
255
+ }
256
+ const MUTATION_QUERY_VERBS = new Set(['create', 'update', 'delete', 'patch', 'remove', 'add']);
257
+ const QUERY_FILE_RE = /\b(queries|mutations|hooks)\b/i;
258
+ /**
259
+ * Boost React Query hooks (+25) that live in queries/mutations/hooks files and
260
+ * match a mutation verb in the query.
261
+ *
262
+ * useCreateSecretV3 in src/hooks/api/secrets/queries.ts should rank first for
263
+ * queries like "create secret api hook" ahead of schema type symbols.
264
+ */
265
+ function computeReactQueryHookBoost(symbol, queryWords) {
266
+ const methodName = symbol.name.split('.').pop() ?? symbol.name;
267
+ if (!/^use[A-Z]\w+$/.test(methodName))
268
+ return 0;
269
+ if (!QUERY_FILE_RE.test(symbol.filePath))
270
+ return 0;
271
+ for (const w of queryWords)
272
+ if (MUTATION_QUERY_VERBS.has(w))
273
+ return 25;
274
+ return 0;
275
+ }
276
+ const INTERCEPTOR_QUERY_WORDS = new Set([
277
+ 'interceptor', 'intercept', 'resolver', 'resolve', 'guard',
278
+ 'middleware', 'pipe', 'hook',
279
+ ]);
280
+ // Matches Angular/NestJS convention: tokenInterceptor, errorInterceptor,
281
+ // bankAccountResolve (Angular route resolver), authGuard, etc.
282
+ const INTERCEPTOR_NAME_RE = /(?:Interceptor|Resolver|Resolve|Guard|Middleware|Pipe)$/;
283
+ /**
284
+ * Boost HTTP interceptor / resolver / guard symbols (+15) when the query
285
+ * explicitly mentions those concepts.
286
+ */
287
+ function computeInterceptorBoost(symbol, queryWords) {
288
+ if (symbol.kind !== 'function' && symbol.kind !== 'class')
289
+ return 0;
290
+ const baseName = symbol.name.split('.').pop() ?? symbol.name;
291
+ if (!INTERCEPTOR_NAME_RE.test(baseName))
292
+ return 0;
293
+ for (const w of queryWords)
294
+ if (INTERCEPTOR_QUERY_WORDS.has(w))
295
+ return 30;
296
+ return 0;
297
+ }
298
+ // Generic MVC/framework namespace segments that appear in many package names
299
+ // but convey no discriminating signal — filtering prevents false matches like
300
+ // TestApp::Controller::Action matching "catalyst action controller" queries.
301
+ const PACKAGE_SEGMENT_STOPWORDS = new Set([
302
+ 'test', 'testapp', 'base', 'action', 'controller', 'model', 'view',
303
+ 'helper', 'plugin', 'role', 'app', 'core', 'type', 'class', 'package',
304
+ 'lib', 'util', 'utils', 'common', 'shared',
305
+ ]);
306
+ /**
307
+ * Boost Perl and R symbols (+8 per overlapping package token) when query words
308
+ * overlap with the package name prefix.
309
+ *
310
+ * For "Mojolicious::Controller::render" matching "mojolicious render": the
311
+ * package tokens ["mojolicious", "controller"] overlap with "mojolicious" in
312
+ * the query → +8. Generic MVC segments (controller, action, model…) are excluded
313
+ * to prevent TestApp::Controller from spuriously matching catalyst queries.
314
+ */
315
+ function computePackageContextBoost(symbol, queryWords) {
316
+ const ext = symbol.filePath.split('.').pop()?.toLowerCase() ?? '';
317
+ if (!['pm', 'pl', 'r'].includes(ext))
318
+ return 0;
319
+ const parts = symbol.name.split(/[.:]+/);
320
+ if (parts.length < 2)
321
+ return 0;
322
+ const packageWords = parts
323
+ .slice(0, -1)
324
+ .flatMap((p) => splitNameParts(p))
325
+ .filter((w) => !PACKAGE_SEGMENT_STOPWORDS.has(w));
326
+ const querySet = new Set(queryWords);
327
+ let overlap = 0;
328
+ for (const pw of packageWords)
329
+ if (querySet.has(pw))
330
+ overlap++;
331
+ // +4 for first match, +8 for each additional — single-match boost is intentionally
332
+ // modest to avoid tipping rankings where the framework name appears as context
333
+ // (e.g. "dispatch through the Catalyst router" should not boost Catalyst::Request
334
+ // above the bare `dispatch` function that is the correct result).
335
+ return overlap >= 2 ? (overlap - 1) * 8 + 4 : overlap >= 1 ? 4 : 0;
336
+ }
337
+ const TRPC_PREFIX_RE = /^(createTRPC|ProcedureBuilder)/;
338
+ const TRPC_QUERY_WORDS = new Set(['trpc', 'procedure', 'builder', 'router', 'rpc']);
339
+ /**
340
+ * Boost tRPC factory functions and ProcedureBuilder symbols (+20) when the query
341
+ * contains tRPC vocabulary.
342
+ */
343
+ function computeTrpcPrefixBoost(symbol, queryWords) {
344
+ const baseName = symbol.name.split('.')[0] ?? symbol.name;
345
+ if (!TRPC_PREFIX_RE.test(baseName))
346
+ return 0;
347
+ for (const w of queryWords)
348
+ if (TRPC_QUERY_WORDS.has(w))
349
+ return 20;
350
+ return 0;
351
+ }
352
+ /**
353
+ * Single-token exact-name boost (+50 for full match, +40 for last dot-segment).
354
+ *
355
+ * Fires only when the query is a single bare token. Differentiates "Pod" from
356
+ * "PodSpec" when the user typed exactly the resource name, and handles
357
+ * dot-qualified names like "io.k8s.api.core.v1.Pod" via the last-segment check.
358
+ */
359
+ function computeSingleTokenExactBoost(symbol, queryLower) {
360
+ if (queryLower.includes(' '))
361
+ return 0;
362
+ const nameLower = symbol.name.toLowerCase();
363
+ if (queryLower === nameLower)
364
+ return 50;
365
+ // For dot-qualified names (e.g. OpenAPI / protobuf schemas): also match the
366
+ // last name segment so "pod" matches "io.k8s.api.core.v1.Pod".
367
+ const lastDot = nameLower.lastIndexOf('.');
368
+ if (lastDot >= 0) {
369
+ const lastSegment = nameLower.slice(lastDot + 1);
370
+ if (queryLower === lastSegment)
371
+ return 40;
372
+ }
373
+ return 0;
374
+ }
21
375
  // ─── Public API ───────────────────────────────────────────────────────────────
22
376
  /**
23
377
  * Score and sort a list of symbols by relevance to `query`.
@@ -27,16 +381,82 @@
27
381
  *
28
382
  * When `debug` is true, each result includes a `debugScore` breakdown.
29
383
  */
30
- export function rankSymbols(symbols, query, debug = false) {
384
+ export function rankSymbols(symbols, query, debug = false, domain, opts) {
31
385
  if (symbols.length === 0)
32
386
  return [];
33
387
  const queryLower = query.trim().toLowerCase();
34
- const queryWords = extractQueryWords(query.trim());
35
- const scored = symbols.map((symbol, originalIndex) => ({
36
- ...score(symbol, queryLower, queryWords),
37
- symbol,
38
- originalIndex,
39
- }));
388
+ const queryWords = extractQueryWords(query.trim(), domain);
389
+ // Count raw occurrences of each query word (before deduplication/stemming) so
390
+ // identityExact can award a 2× boost when the matched concept is repeated in the
391
+ // query (e.g. "base formula class ... formula files" has "formula" twice, signalling
392
+ // it is the primary search target rather than a generic modifier like "files").
393
+ const rawQueryFreq = new Map();
394
+ for (const tok of query.trim().toLowerCase().split(/[\s-]+/).filter(Boolean)) {
395
+ if (!isStopWord(tok) && tok.length >= 2) {
396
+ rawQueryFreq.set(tok, (rawQueryFreq.get(tok) ?? 0) + 1);
397
+ }
398
+ }
399
+ // ── FTS5 BM25 normalization ──────────────────────────────────────────────────
400
+ //
401
+ // BM25 scores are negative (more negative = better match). We normalize the
402
+ // candidate set's BM25 scores to a [0, 50] bonus so summary-only matches
403
+ // (e.g. a function whose name is unrelated to the query but whose docstring
404
+ // describes it perfectly) rise above generic name-noise matches.
405
+ //
406
+ // Normalization formula:
407
+ // bonus_i = (score_i - worst) / (best - worst) * 50
408
+ // where best = min(bm25 values) and worst = max(bm25 values).
409
+ // When all scores are equal (range = 0), every symbol gets 25.
410
+ const bm25Values = symbols.map((s) => s.ftsBm25 ?? 0).filter((v) => v !== 0);
411
+ let bm25Bonuses = symbols.map(() => 0);
412
+ if (bm25Values.length > 0) {
413
+ const best = Math.min(...bm25Values); // most negative = best match
414
+ const worst = Math.max(...bm25Values); // least negative = worst match
415
+ const range = best - worst; // always <= 0
416
+ bm25Bonuses = symbols.map((s) => {
417
+ if (s.ftsBm25 === undefined)
418
+ return 0;
419
+ return range !== 0 ? Math.max(0, ((s.ftsBm25 - worst) / range) * 50) : 25;
420
+ });
421
+ }
422
+ // First pass: compute base scores (without BM25) for all symbols.
423
+ const baseResults = symbols.map((symbol) => score(symbol, queryLower, queryWords, rawQueryFreq, domain, opts));
424
+ // Cap BM25 bonus to 30% of its computed value when any symbol already has a
425
+ // dominant name-match score (≥80). This prevents BM25 from overriding a clear
426
+ // winner (e.g. identityExact+namePrefix+kindBoost on a NestJS *Service method)
427
+ // while still letting it act as a tiebreaker between similarly-scored symbols.
428
+ // The 30% scale reduces the maximum BM25 contribution from 50 to 15 points,
429
+ // ensuring that a ≤2-point base score gap cannot be flipped by content ranking
430
+ // when the result set already contains a strong name match.
431
+ const topBaseScore = Math.max(...baseResults.map((r) => r.score));
432
+ const bm25Scale = topBaseScore >= 80 ? 0.3 : 1.0;
433
+ // ── Path proximity boost (Task 417) ─────────────────────────────────────────
434
+ // When ≥ 3 candidates share the exact same lowercase name, boost the one
435
+ // whose file-path tokens overlap with query words (+5 per overlapping token).
436
+ // Suppressed for unique names to avoid noise.
437
+ const nameFreq = new Map();
438
+ for (const s of symbols) {
439
+ const n = s.name.toLowerCase();
440
+ nameFreq.set(n, (nameFreq.get(n) ?? 0) + 1);
441
+ }
442
+ const queryWordsSet = new Set(queryWords);
443
+ const scored = symbols.map((symbol, originalIndex) => {
444
+ const baseScore = baseResults[originalIndex];
445
+ const ftsBm25Bonus = Math.round(bm25Bonuses[originalIndex] * bm25Scale);
446
+ let pathProximity = 0;
447
+ if ((nameFreq.get(symbol.name.toLowerCase()) ?? 0) >= 3) {
448
+ pathProximity = computePathProximityBoost(symbol.filePath, queryWordsSet);
449
+ }
450
+ baseScore.debugScore.pathProximityBoost = pathProximity;
451
+ baseScore.debugScore.ftsBm25Bonus = ftsBm25Bonus;
452
+ baseScore.debugScore.total += ftsBm25Bonus + pathProximity;
453
+ return {
454
+ ...baseScore,
455
+ score: baseScore.score + ftsBm25Bonus + pathProximity,
456
+ symbol,
457
+ originalIndex,
458
+ };
459
+ });
40
460
  scored.sort((a, b) => {
41
461
  if (b.score !== a.score)
42
462
  return b.score - a.score;
@@ -50,7 +470,7 @@ export function rankSymbols(symbols, query, debug = false) {
50
470
  }));
51
471
  }
52
472
  // ─── Scoring ─────────────────────────────────────────────────────────────────
53
- function score(symbol, queryLower, queryWords) {
473
+ function score(symbol, queryLower, queryWords, rawQueryFreq, domain, opts) {
54
474
  const nameLower = symbol.name.toLowerCase();
55
475
  const sigLower = symbol.signature.toLowerCase();
56
476
  const sumLower = symbol.summary.toLowerCase();
@@ -66,32 +486,180 @@ function score(symbol, queryLower, queryWords) {
66
486
  matchReason = 'exact_name';
67
487
  }
68
488
  else if (nameLower.startsWith(queryLower)) {
69
- namePrefix = 60;
70
- total += 60;
71
- matchReason = 'prefix_name';
489
+ // Only award the prefix bonus when the character immediately after the
490
+ // query string in the *original* (cased) symbol name is a word-boundary:
491
+ // uppercase letter (camelCase), a non-alpha separator (_, \, :, .), or
492
+ // end of string. A continuing lowercase letter means the query is merely
493
+ // a prefix of a longer word inside the name — not a name-level prefix.
494
+ // e.g. query "model" vs name "models\Article_base" → nextChar="s" (lowercase)
495
+ // → treated as substring match, not prefix match.
496
+ const nextChar = symbol.name[queryLower.length];
497
+ const isWordBoundary = !nextChar || /[^a-z]/.test(nextChar);
498
+ if (isWordBoundary) {
499
+ namePrefix = 60;
500
+ total += 60;
501
+ matchReason = 'prefix_name';
502
+ }
503
+ else {
504
+ // The query string is embedded inside a longer word — treat as substring.
505
+ nameFuzzy = 40;
506
+ total += 40;
507
+ matchReason = 'name_contains';
508
+ }
72
509
  }
73
510
  else if (nameLower.includes(queryLower)) {
74
511
  nameFuzzy = 40;
75
512
  total += 40;
76
513
  matchReason = 'name_contains';
77
514
  }
78
- // ── Word-overlap rules (always additive; update matchReason if not yet set) ─
515
+ // ── Identity-exact boost ─────────────────────────────────────────────────────
516
+ //
517
+ // Fires when any single query word exactly matches the symbol's bare name
518
+ // (case-insensitive). Equivalent to JC's Identity channel (weight=2.0).
519
+ //
520
+ // Primary use case: with Go/Rust bare method names (Phase 46), struct/class
521
+ // symbols like `Builder` or `Mutex` share name parts with their own methods.
522
+ // For a query like "Builder struct" or "Mutex lock", the struct symbol should
523
+ // rank above a method named `build` or `lock` on the same type.
524
+ //
525
+ // This differs from nameExact (100pt, entire query = name): identityExact fires
526
+ // for multi-word queries where ONE word is the symbol's full name.
527
+ //
528
+ // Boost: +40 — large enough to overcome a method's kindBoost (+15–+30) advantage.
529
+ // For namespace-qualified C++ names like `folly::Future`, also check the
530
+ // bare local name (last segment after ::) for identity matching, since ground
531
+ // truth uses bare names while the index stores qualified names. Restricted to
532
+ // non-method symbols: methods with :: are class::method pairs (PHP, C++) where
533
+ // the bare method name is too generic to warrant a full identity boost.
534
+ const isNsQualified = nameLower.includes('::');
535
+ const bareLocalName = isNsQualified && symbol.kind !== 'method'
536
+ ? nameLower.split('::').pop()
537
+ : nameLower;
538
+ // For XML-disambiguated names (e.g. project@maven-cli), also check the bare
539
+ // tag name (part before @) for identity matching.
540
+ const isAtDisambiguated = nameLower.includes('@');
541
+ const bareTagName = isAtDisambiguated ? nameLower.split('@')[0] : nameLower;
542
+ // Find the query word that triggered identityExact (needed for the frequency lookup).
543
+ const identityMatchWord = queryWords.find((w) => w === nameLower)
544
+ ?? (bareLocalName !== nameLower ? queryWords.find((w) => w === bareLocalName) : undefined)
545
+ ?? (bareTagName !== nameLower ? queryWords.find((w) => w === bareTagName) : undefined);
546
+ let identityExact = 0;
547
+ if (queryWords.length > 0 && identityMatchWord !== undefined) {
548
+ // Data-definition symbols (const, type, interface, enum, property) named after
549
+ // a single concept (e.g. STRIPE, Subscribers) fire identityExact when that word
550
+ // appears anywhere in a multi-word query, even as incidental context rather than
551
+ // the actual search target. Scale the bonus proportionally so the signal weakens
552
+ // as the query grows: 40/N, minimum 10. Code-definition symbols (function, method,
553
+ // class, …) use a higher base of 60 with a frequency multiplier (capped at 2×):
554
+ // when the matched word appears twice in the raw query (e.g. "base formula class
555
+ // … formula files") that repetition signals it is the primary target, giving the
556
+ // correctly-named symbol enough margin to overcome BM25 noise from generically-
557
+ // named symbols that also match the repeated word.
558
+ const DATA_KINDS = new Set(['const', 'type', 'interface', 'enum', 'property']);
559
+ if (DATA_KINDS.has(symbol.kind) && queryWords.length > 1) {
560
+ identityExact = Math.max(10, Math.round(40 / queryWords.length));
561
+ }
562
+ else {
563
+ const rawFreq = rawQueryFreq.get(identityMatchWord) ?? 1;
564
+ identityExact = 60 * Math.min(rawFreq, 2);
565
+ }
566
+ total += identityExact;
567
+ }
568
+ // ── Compound underscore identity boost (Task 433) ─────────────────────────
569
+ // Fires when the symbol has a compound underscore name (e.g. payment_intent)
570
+ // and ALL underscore-separated parts appear as query words. Differentiates
571
+ // "payment_intent" from "payment_method" for query "create payment intent"
572
+ // where both share the "payment" part but only payment_intent matches all parts.
573
+ const queryWordsSet = new Set(queryWords);
574
+ let compoundUnderscoreBoost = 0;
575
+ if (identityExact === 0 && nameLower.includes('_')) {
576
+ const uParts = nameLower.split('_').filter((p) => p.length >= 2);
577
+ if (uParts.length >= 2 && uParts.every((p) => queryWordsSet.has(p))) {
578
+ compoundUnderscoreBoost = 30;
579
+ total += compoundUnderscoreBoost;
580
+ }
581
+ }
582
+ // ── Single-token exact-name boost (Task 434) ──────────────────────────────
583
+ // Fires only when the query is a single bare token. Lifts "Pod" above "PodSpec"
584
+ // for the query "Pod", and handles dot-qualified names via last-segment check.
585
+ const singleTokenExactBoost = computeSingleTokenExactBoost(symbol, queryLower);
586
+ total += singleTokenExactBoost;
587
+ // ── Word-overlap rules (word-boundary matching against split name parts) ────
588
+ //
589
+ // We split the symbol name into its constituent word-boundary parts
590
+ // (camelCase + snake_case + namespace separators) and check query words
591
+ // against those parts. Inflectional stem variants of each name part are
592
+ // also included so that query word "product" matches name part "products",
593
+ // "order" matches "orders", "review" matches "reviews", etc.
594
+ //
595
+ // This is more precise than substring matching: query word "model" matches
596
+ // the "model" part of "CIR_Model" but scores slightly less against the
597
+ // "models" namespace prefix in "models\\Article_base" (stem-only match).
79
598
  let wordOverlap = 0;
80
599
  if (queryWords.length > 0) {
81
- if (queryWords.every((w) => nameLower.includes(w))) {
82
- wordOverlap += 30;
600
+ const nameParts = splitNameParts(symbol.name);
601
+ // Add inflectional stems of each name part so that pluralized name parts
602
+ // match their singular query-word counterparts and vice-versa.
603
+ // e.g. "products" → "product", "orders" → "order", "reviews" → "review"
604
+ const namePartsSet = new Set(nameParts);
605
+ for (const p of nameParts) {
606
+ addStemsOf(p, namePartsSet);
607
+ }
608
+ // Two levels of matching used to differentiate exact vs stem-based hits:
609
+ // partStrict — query word appears verbatim in the split name parts
610
+ // partLoose — query word matches a stem variant of a name part
611
+ // This prevents a pluralised namespace prefix ("models") from scoring
612
+ // identically to an exact name-part match ("model" in "CIR_Model").
613
+ const partStrict = (w) => nameParts.some((p) => p === w);
614
+ const partLoose = (w) => namePartsSet.has(w);
615
+ if (queryWords.every(partLoose)) {
616
+ wordOverlap += 30; // all query words match name parts (exact or stem)
83
617
  }
84
- if (queryWords.some((w) => nameLower === w)) {
85
- wordOverlap += 20;
618
+ if (queryWords.some(partLoose)) {
619
+ wordOverlap += 20; // at least one query word matches
620
+ }
621
+ // Per-word bonus: exact part match earns +10; stem-only match earns +8.
622
+ for (const w of queryWords) {
623
+ if (partStrict(w))
624
+ wordOverlap += 10;
625
+ else if (partLoose(w))
626
+ wordOverlap += 8;
86
627
  }
87
- const wordsInName = queryWords.filter((w) => nameLower.includes(w)).length;
88
- wordOverlap += wordsInName * 10;
89
628
  if (wordOverlap > 0) {
90
629
  total += wordOverlap;
91
630
  if (matchReason === 'content_match')
92
631
  matchReason = 'word_overlap';
93
632
  }
94
633
  }
634
+ // ── Method verb bonus ────────────────────────────────────────────────────────
635
+ //
636
+ // For method symbols, give a +15 bonus when a query word exactly matches the
637
+ // FIRST split part of the method name — the "action verb" (e.g. "create" in
638
+ // "ProductsService.create", "get" in "OrdersService.getMyOrders").
639
+ //
640
+ // This differentiates application methods from helper/utility methods that
641
+ // happen to share other name parts with the query. Example:
642
+ // query "create product listing"
643
+ // ProductsService.create → verb "create" matches → +15 (total 89)
644
+ // buildProductListCacheKey → verb "build" ≠ "create" → no bonus (total 76)
645
+ //
646
+ // The bonus is intentionally limited to exact query-word matches on the first
647
+ // method part only — we do not use stems here because a stem match on the verb
648
+ // is too loose (e.g. "builds" → "build" would also match "get").
649
+ let methodVerbBonus = 0;
650
+ if (symbol.kind === 'method' && queryWords.length > 0) {
651
+ const dotIdx = symbol.name.indexOf('.');
652
+ const colonIdx = symbol.name.indexOf('::');
653
+ const sepIdx = dotIdx >= 0 ? dotIdx : colonIdx;
654
+ if (sepIdx > 0) {
655
+ const methodPart = symbol.name.slice(sepIdx + (colonIdx >= 0 && colonIdx === sepIdx ? 2 : 1));
656
+ const methodVerbParts = splitNameParts(methodPart);
657
+ if (methodVerbParts.length > 0 && queryWords.some((w) => w === methodVerbParts[0])) {
658
+ methodVerbBonus = 15;
659
+ }
660
+ }
661
+ }
662
+ total += methodVerbBonus;
95
663
  // ── Content rules (signature + summary) ────────────────────────────────────
96
664
  let signatureMatch = 0;
97
665
  let summaryMatch = 0;
@@ -102,42 +670,368 @@ function score(symbol, queryLower, queryWords) {
102
670
  summaryMatch += 5;
103
671
  summaryMatch += queryWords.filter((w) => sumLower.includes(w)).length * 1;
104
672
  total += signatureMatch + summaryMatch;
673
+ // ── Kind boost for application-layer methods and Rust traits ────────────────
674
+ //
675
+ // For natural-language "how to do X" queries, a method on a *Service class is
676
+ // almost always the correct answer — not the DTO, event type, schema const, or
677
+ // controller delegate that happens to share words with the query.
678
+ //
679
+ // Boost values (additive):
680
+ // +30 — method on a *Service class (e.g. AuthService.login)
681
+ // +20 — Rust trait (interface kind) when a query word matches the trait name
682
+ // +15 — method on a *Repository / *Manager / *Store class (data-access layer)
683
+ // +15 — method on a *_model class (PHP CodeIgniter model convention)
684
+ // +15 — method on a *Handler / *DB / *Client class (Go HTTP handlers, DB layers, API clients)
685
+ // +15 — method on a *Activity / *Fragment / *Adapter / *ViewModel class (Android framework)
686
+ // +15 — method on a *Processor / *Indexer / *Parser class (Python application-layer patterns)
687
+ // +15 — method on a *EventSubscriber / *Listener / *FormType class (Symfony patterns)
688
+ //
689
+ // The method boost is applied to 'method' kind symbols. Class context is
690
+ // extracted from the symbol name (qualified: "ClassName.method" or
691
+ // "TypeName::method") or, for bare-name handlers (Java, Rust), from the
692
+ // signature prefix ("ClassName.method: <sig>" / "TypeName::method: <sig>").
693
+ // The interface boost applies to Rust traits (kind: 'interface') when query
694
+ // words overlap with the trait name's word-boundary parts.
695
+ let kindBoost = 0;
696
+ // Rust trait kindBoost: boost when ALL word-boundary parts of the trait name
697
+ // are matched by query words. This lifts Serialize/Deserialize above private
698
+ // helpers for queries like "serializable type" or "implement serialize",
699
+ // without boosting TypeScript option-bag interfaces (e.g. NuxtLinkOptions
700
+ // which has parts [nuxt, link, options] — "link" is rarely in the query).
701
+ if (symbol.kind === 'interface' && queryWords.length > 0) {
702
+ const traitParts = splitNameParts(symbol.name);
703
+ const hasQueryMatch = traitParts.length > 0 && traitParts.every((p) => queryWords.includes(p));
704
+ if (hasQueryMatch)
705
+ kindBoost = 20;
706
+ }
707
+ if (symbol.kind === 'method') {
708
+ const dotIdx = symbol.name.indexOf('.');
709
+ const colonIdx = symbol.name.indexOf('::');
710
+ const sepIdx = dotIdx >= 0 ? dotIdx : colonIdx;
711
+ // For bare names (Java/Rust handlers store bare methodName), fall back to
712
+ // extracting the class/type name from the signature prefix.
713
+ const classPart = sepIdx > 0
714
+ ? symbol.name.slice(0, sepIdx).toLowerCase()
715
+ : classFromSignature(symbol.name, symbol.signature);
716
+ if (classPart) {
717
+ if (classPart.endsWith('service')) {
718
+ kindBoost = 30;
719
+ }
720
+ else if (classPart.endsWith('repository') ||
721
+ classPart.endsWith('manager') ||
722
+ classPart.endsWith('store') ||
723
+ classPart.endsWith('_model') ||
724
+ (classPart.endsWith('model') && classPart.length > 5) ||
725
+ // PHP / NestJS controller action methods
726
+ classPart.endsWith('_controller') ||
727
+ (classPart.endsWith('controller') && classPart.length > 10) ||
728
+ // Go HTTP handlers, database layers, and API clients
729
+ classPart.endsWith('handler') ||
730
+ classPart.endsWith('db') ||
731
+ classPart.endsWith('client') ||
732
+ // Android framework classes (lifecycle methods, adapters)
733
+ classPart.endsWith('activity') ||
734
+ classPart.endsWith('fragment') ||
735
+ classPart.endsWith('adapter') ||
736
+ classPart.endsWith('viewmodel') ||
737
+ // Python application-layer class patterns
738
+ classPart.endsWith('processor') ||
739
+ classPart.endsWith('indexer') ||
740
+ classPart.endsWith('parser') ||
741
+ // Symfony event-driven and form patterns
742
+ classPart.endsWith('eventsubscriber') ||
743
+ classPart.endsWith('listener') ||
744
+ classPart.endsWith('formtype') ||
745
+ (classPart.endsWith('type') && classPart.length > 4)) {
746
+ kindBoost = 15;
747
+ }
748
+ }
749
+ }
750
+ total += kindBoost;
751
+ // ── Kind-hint boost ──────────────────────────────────────────────────────────
752
+ //
753
+ // When the query explicitly names a symbol kind ("class that ...", "callback
754
+ // interface", "enum of ..."), strongly prefer symbols of that exact kind.
755
+ // This prevents method/function symbols from outranking the class or interface
756
+ // the query is asking about.
757
+ //
758
+ // Boost value: +35 — enough to overcome the method kindBoost (+15–+30) and a
759
+ // multi-word overlap advantage that a method on a similarly-named class may have.
760
+ //
761
+ // Match rules (all case-insensitive, word-boundary in query words):
762
+ // "class" → prefer kind 'class' or 'struct'
763
+ // "interface" → prefer kind 'class', 'struct', or 'interface'
764
+ // "struct" → prefer kind 'struct'
765
+ // "enum" → prefer kind 'enum'
766
+ // "function" → prefer kind 'function'
767
+ let kindHintBoost = 0;
768
+ if (queryWords.includes('class') || queryWords.includes('cls')) {
769
+ if (symbol.kind === 'class' || symbol.kind === 'struct')
770
+ kindHintBoost = 35;
771
+ }
772
+ else if (queryWords.includes('interface')) {
773
+ if (symbol.kind === 'class' || symbol.kind === 'struct' || symbol.kind === 'interface')
774
+ kindHintBoost = 35;
775
+ }
776
+ else if (queryWords.includes('struct')) {
777
+ if (symbol.kind === 'struct')
778
+ kindHintBoost = 35;
779
+ }
780
+ else if (queryWords.includes('enum')) {
781
+ if (symbol.kind === 'enum')
782
+ kindHintBoost = 35;
783
+ }
784
+ total += kindHintBoost;
785
+ // ── Library path penalty ─────────────────────────────────────────────────────
786
+ //
787
+ // Symbols from well-known library/framework directories (CodeIgniter system/,
788
+ // Composer vendor/, npm node_modules/, etc.) are almost never the intended
789
+ // answer for a natural-language query about application behaviour. Applying a
790
+ // fixed penalty pushes them below application symbols that scored similarly on
791
+ // name and word-overlap rules, without excluding them entirely (they still
792
+ // appear when no application code matches).
793
+ //
794
+ // Penalty: -35 points — enough to overcome a 1-word lexical advantage that a
795
+ // library class may have over an application wrapper (e.g. Twig_Template::render
796
+ // matches "template" while the application Twig::render does not, but for
797
+ // realistic multi-word queries the application symbol still wins).
798
+ // Set just below the per-word bonus tier (10 × 3 = 30) so that a library
799
+ // symbol with 4+ extra matching words can still surface for explicit library
800
+ // lookups (e.g. query "CI_DB_driver execute").
801
+ let libraryPenalty = 0;
802
+ if (isLibraryPath(symbol.filePath)) {
803
+ libraryPenalty = -35;
804
+ total += libraryPenalty;
805
+ }
806
+ // Perl test-fixture penalty: symbols in t/lib/ (e.g. TestApp::Controller)
807
+ // are test stubs that should not outrank the actual library API symbols.
808
+ if (libraryPenalty === 0) {
809
+ const ext = symbol.filePath.split('.').pop()?.toLowerCase() ?? '';
810
+ if (ext === 'pm' || ext === 'pl') {
811
+ const normalizedFp = symbol.filePath.replace(/\\/g, '/').toLowerCase();
812
+ if (normalizedFp.startsWith('t/lib/') || normalizedFp.includes('/t/lib/')) {
813
+ libraryPenalty = -25;
814
+ total += libraryPenalty;
815
+ }
816
+ }
817
+ }
818
+ // ── Core path boost (Task 414) ────────────────────────────────────────────
819
+ // For Java/Groovy repos: boost canonical core source paths (+15) and penalise
820
+ // plugin implementations (-35) so core methods surface above plugin overrides.
821
+ let corePathBoost = 0;
822
+ if (domain === 'java' && libraryPenalty === 0) {
823
+ // Check plugin penalty BEFORE core-path boost: plugin dirs may also contain
824
+ // /main/java/ sub-paths, so ordering matters.
825
+ if (isJavaPluginPath(symbol.filePath)) {
826
+ corePathBoost = -35;
827
+ total += corePathBoost;
828
+ }
829
+ else if (isCoreJavaPath(symbol.filePath)) {
830
+ corePathBoost = 15;
831
+ total += corePathBoost;
832
+ }
833
+ }
834
+ // ── Frontend path boost (Task 415) ───────────────────────────────────────
835
+ // In mixed monorepos (frontend + backend apps/ subdirs), boost symbols from
836
+ // frontend paths when the query uses hook/component/use* vocabulary.
837
+ let frontendPathBoost = 0;
838
+ if (opts?.isMixedMonorepo && isFrontendAppPath(symbol.filePath) && hasFrontendVocab(queryWords)) {
839
+ frontendPathBoost = 20;
840
+ total += frontendPathBoost;
841
+ }
842
+ // ── Use/hook bonus (Task 416) ────────────────────────────────────────────
843
+ // When the query is asking for a React hook (use*/hook vocabulary) and the
844
+ // OR-fallback fired (indicated by opts.hasReactHookQuery), reward symbols
845
+ // whose names follow the React hook naming convention (use[A-Z]...).
846
+ let useHookBonus = 0;
847
+ if (opts?.hasReactHookQuery && /^use[A-Z]/.test(symbol.name)) {
848
+ useHookBonus = 20;
849
+ total += useHookBonus;
850
+ }
851
+ // ── Groovy source boost (Task 423) ───────────────────────────────────────
852
+ // In mixed Java+Groovy repos (e.g. gradle, groovy) Java methods dominate by
853
+ // sheer count. Give a +10 bonus to symbols in .groovy files so that Groovy
854
+ // `def` methods surface above equally-scored Java counterparts.
855
+ let groovySourceBoost = 0;
856
+ if (opts?.isJavaGroovyMixed && symbol.filePath.endsWith('.groovy')) {
857
+ groovySourceBoost = 10;
858
+ total += groovySourceBoost;
859
+ }
860
+ // ── Phase 73 boosts ───────────────────────────────────────────────────────
861
+ const p73AngularLifecycle = computeAngularLifecycleBoost(symbol, queryWords, opts?.isAngularRepo ?? false);
862
+ total += p73AngularLifecycle;
863
+ const p73RenderingCompound = computeRenderingCompoundBoost(symbol, queryWords, domain);
864
+ total += p73RenderingCompound;
865
+ const p73ReactQueryHook = computeReactQueryHookBoost(symbol, queryWords);
866
+ total += p73ReactQueryHook;
867
+ const p73Interceptor = computeInterceptorBoost(symbol, queryWords);
868
+ total += p73Interceptor;
869
+ const p73PackageContext = computePackageContextBoost(symbol, queryWords);
870
+ total += p73PackageContext;
871
+ const p73TrpcPrefix = computeTrpcPrefixBoost(symbol, queryWords);
872
+ total += p73TrpcPrefix;
105
873
  const debugScore = {
106
874
  total,
107
875
  nameExact,
108
876
  namePrefix,
109
877
  nameFuzzy,
878
+ identityExact,
879
+ compoundUnderscoreBoost,
880
+ singleTokenExactBoost,
110
881
  wordOverlap,
882
+ methodVerbBonus,
111
883
  signatureMatch,
112
884
  summaryMatch,
113
- kindBoost: 0,
885
+ kindBoost,
886
+ kindHintBoost,
887
+ libraryPenalty,
888
+ corePathBoost,
889
+ frontendPathBoost,
890
+ useHookBonus,
891
+ groovySourceBoost,
892
+ angularLifecycleBoost: p73AngularLifecycle,
893
+ renderingCompoundBoost: p73RenderingCompound,
894
+ reactQueryHookBoost: p73ReactQueryHook,
895
+ interceptorBoost: p73Interceptor,
896
+ packageContextBoost: p73PackageContext,
897
+ trpcPrefixBoost: p73TrpcPrefix,
898
+ pathProximityBoost: 0, // filled in by rankSymbols after the name-frequency pass
114
899
  recencyBoost: 0,
900
+ ftsBm25Bonus: 0,
115
901
  };
116
902
  return { score: total, matchReason, debugScore };
117
903
  }
118
904
  // ─── Helpers ─────────────────────────────────────────────────────────────────
905
+ /**
906
+ * Split a symbol name into word-boundary parts for precise matching.
907
+ *
908
+ * Handles namespace separators (\\, ::), snake_case underscores, and
909
+ * camelCase/PascalCase boundaries. Parts shorter than 2 characters are
910
+ * excluded.
911
+ *
912
+ * Examples:
913
+ * 'CIR_Model' → ['cir', 'model']
914
+ * 'Homepage_model::getSettings' → ['homepage', 'model', 'get', 'settings']
915
+ * 'models\\Article_base' → ['models', 'article', 'base']
916
+ * 'CIR_FrontController' → ['cir', 'front', 'controller']
917
+ * 'CI_DB_query_builder::_insert' → ['ci', 'db', 'query', 'builder', 'insert']
918
+ */
919
+ function splitNameParts(name) {
920
+ const parts = [];
921
+ // Split on namespace/method-call separators: \ (PHP/Python paths), : (PHP ::), . (TS dot notation), @ (XML disambiguation)
922
+ for (const segment of name.split(/[\\:.@]+/)) {
923
+ // Split each segment on underscores
924
+ for (const subSeg of segment.split('_').filter(Boolean)) {
925
+ // camelCase / PascalCase split within each snake_case segment
926
+ const camelParts = subSeg
927
+ .replace(/([a-z\d])([A-Z])/g, '$1 $2')
928
+ .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
929
+ .split(' ')
930
+ .map((p) => p.toLowerCase())
931
+ .filter((p) => p.length >= 2);
932
+ parts.push(...camelParts);
933
+ }
934
+ }
935
+ return parts;
936
+ }
937
+ /**
938
+ * Add inflectional suffix stems of `word` to `set`.
939
+ *
940
+ * Adding stems (rather than replacing the original word) means both the
941
+ * inflected form and the stem are available for matching — the inflected form
942
+ * matches index tokens that include the suffix; the stem matches code symbol
943
+ * parts that use the base form.
944
+ *
945
+ * Suffixes handled:
946
+ * -s (plural / 3rd-person) "models" → "model"
947
+ * -ing (gerund) "building" → "build"
948
+ * -ed (past tense) "updated" → "update" + "updat"
949
+ * "matched" → "matche" + "match"
950
+ * -tion (nominalisation) "pagination" → "paginat"
951
+ *
952
+ * For -ed we add both the e-drop form (strip only -d) and the regular form
953
+ * (strip -ed) so that both "update" (code symbol) and "match" are covered
954
+ * regardless of the inflection pattern.
955
+ */
956
+ function addStemsOf(word, set) {
957
+ // -tion: "pagination" → "paginat" (minimum total length 7 to avoid "on" → "")
958
+ if (word.length > 6 && word.endsWith('tion')) {
959
+ const s = word.slice(0, -4);
960
+ if (s.length >= 2)
961
+ set.add(s);
962
+ return;
963
+ }
964
+ // -ing: "building" → "build" (minimum total length 6 to avoid "ring" → "r")
965
+ if (word.length > 5 && word.endsWith('ing')) {
966
+ const s = word.slice(0, -3);
967
+ if (s.length >= 2)
968
+ set.add(s);
969
+ return;
970
+ }
971
+ // -ed: add both e-drop ("updated"→"update") and regular strip ("matched"→"match")
972
+ // The e-drop form may produce noise ("matche") but it won't match real symbol parts.
973
+ if (word.length > 4 && word.endsWith('ed')) {
974
+ const dStem = word.slice(0, -1); // strip -d : "updated"→"update", "matched"→"matche"
975
+ const edStem = word.slice(0, -2); // strip -ed : "updated"→"updat", "matched"→"match"
976
+ if (dStem.length >= 2)
977
+ set.add(dStem);
978
+ if (edStem.length >= 2 && edStem !== dStem)
979
+ set.add(edStem);
980
+ return;
981
+ }
982
+ // -s: "models"→"model", "records"→"record" (skip -ss endings like "class")
983
+ if (word.length > 3 && word.endsWith('s') && !word.endsWith('ss')) {
984
+ const s = word.slice(0, -1);
985
+ if (s.length >= 2)
986
+ set.add(s);
987
+ }
988
+ }
119
989
  /**
120
990
  * Extract a deduplicated set of lowercase search terms from the raw query.
121
991
  *
122
- * For camelCase/snake_case identifiers the component words are included so
123
- * scoring works on both the full token and its parts.
992
+ * Processing pipeline:
993
+ * 1. Split on whitespace AND hyphens ("front-end" "front", "end").
994
+ * 2. Discard English stop words.
995
+ * 3. Add inflectional stem variants via addStemsOf.
996
+ * 4. For snake_case tokens split on underscores and repeat steps 2-3.
997
+ * 5. For camelCase tokens split on case boundaries and repeat steps 2-3.
124
998
  *
125
999
  * Examples:
126
- * 'indexFolder' → ['indexfolder', 'index', 'folder']
127
- * 'blast radius' → ['blast', 'radius']
128
- * 'get_symbol_source'→ ['get_symbol_source', 'get', 'symbol', 'source']
1000
+ * 'indexFolder' → ['indexfolder', 'index', 'folder']
1001
+ * 'blast radius' → ['blast', 'radius']
1002
+ * 'get_symbol_source' → ['get_symbol_source', 'get', 'symbol', 'source']
1003
+ * 'models' → ['models', 'model']
1004
+ * 'updated homepage' → ['updated', 'update', 'updat', 'homepage']
1005
+ * 'front-end controller' → ['front', 'end', 'controller']
129
1006
  */
130
- function extractQueryWords(raw) {
1007
+ function extractQueryWords(raw, domain) {
131
1008
  const words = new Set();
132
- for (const part of raw.split(/\s+/).filter(Boolean)) {
1009
+ // Split on both whitespace and hyphens so "front-end" → ["front", "end"]
1010
+ for (const part of raw.split(/[\s-]+/).filter(Boolean)) {
133
1011
  const lower = part.toLowerCase();
1012
+ // Skip English function words — they never appear in code symbol names and
1013
+ // inflate the "all words in name" (30-pt) and per-word (10-pt) scoring
1014
+ // rules, making the 30-pt bonus unachievable for any real symbol.
1015
+ if (isStopWord(lower))
1016
+ continue;
1017
+ if (lower.length < 2)
1018
+ continue;
134
1019
  words.add(lower);
1020
+ addStemsOf(lower, words);
1021
+ // Synonym expansion: add code-domain synonyms so the ranker rewards symbols
1022
+ // whose names use a synonym of the query word (e.g. "authenticate" → "login").
1023
+ for (const syn of expandVerbSynonyms(lower, domain)) {
1024
+ words.add(syn);
1025
+ addStemsOf(syn, words);
1026
+ }
135
1027
  if (part.includes('_')) {
136
1028
  // snake_case — split on underscores
137
1029
  for (const seg of part.split('_')) {
138
1030
  const s = seg.toLowerCase();
139
- if (s.length >= 2)
1031
+ if (s.length >= 2 && !isStopWord(s)) {
140
1032
  words.add(s);
1033
+ addStemsOf(s, words);
1034
+ }
141
1035
  }
142
1036
  }
143
1037
  else {
@@ -149,8 +1043,12 @@ function extractQueryWords(raw) {
149
1043
  .map((s) => s.toLowerCase())
150
1044
  .filter((s) => s.length >= 2);
151
1045
  if (parts.length > 1) {
152
- for (const s of parts)
153
- words.add(s);
1046
+ for (const s of parts) {
1047
+ if (!isStopWord(s)) {
1048
+ words.add(s);
1049
+ addStemsOf(s, words);
1050
+ }
1051
+ }
154
1052
  }
155
1053
  }
156
1054
  }