purecontext-mcp 1.2.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT_INSTRUCTIONS.md +110 -784
- package/AGENT_REFERENCE.md +561 -0
- package/CHANGELOG.md +177 -6
- package/FRAMEWORK-ADAPTERS.md +351 -0
- package/LANGUAGE-SUPPORT.md +144 -0
- package/README.md +92 -12
- package/USER-GUIDE.md +8 -0
- package/dist/cli/hooks.d.ts +28 -0
- package/dist/cli/hooks.d.ts.map +1 -0
- package/dist/cli/hooks.js +570 -0
- package/dist/cli/hooks.js.map +1 -0
- package/dist/cli/install-detect.d.ts +16 -0
- package/dist/cli/install-detect.d.ts.map +1 -0
- package/dist/cli/install-detect.js +70 -0
- package/dist/cli/install-detect.js.map +1 -0
- package/dist/cli/install-writers.d.ts +59 -0
- package/dist/cli/install-writers.d.ts.map +1 -0
- package/dist/cli/install-writers.js +292 -0
- package/dist/cli/install-writers.js.map +1 -0
- package/dist/cli/install.d.ts +14 -0
- package/dist/cli/install.d.ts.map +1 -0
- package/dist/cli/install.js +150 -0
- package/dist/cli/install.js.map +1 -0
- package/dist/config/config-loader.js +3 -0
- package/dist/config/config-loader.js.map +1 -1
- package/dist/config/config-schema.d.ts +11 -0
- package/dist/config/config-schema.d.ts.map +1 -1
- package/dist/config/config-schema.js +15 -0
- package/dist/config/config-schema.js.map +1 -1
- package/dist/core/db/symbol-store.d.ts +1 -0
- package/dist/core/db/symbol-store.d.ts.map +1 -1
- package/dist/core/db/symbol-store.js +120 -6
- package/dist/core/db/symbol-store.js.map +1 -1
- package/dist/core/file-discovery.d.ts +6 -0
- package/dist/core/file-discovery.d.ts.map +1 -1
- package/dist/core/file-discovery.js +20 -13
- package/dist/core/file-discovery.js.map +1 -1
- package/dist/core/file-processor.d.ts.map +1 -1
- package/dist/core/file-processor.js +26 -1
- package/dist/core/file-processor.js.map +1 -1
- package/dist/core/git-log-reader.d.ts.map +1 -1
- package/dist/core/git-log-reader.js +21 -0
- package/dist/core/git-log-reader.js.map +1 -1
- package/dist/core/index-manager.d.ts.map +1 -1
- package/dist/core/index-manager.js +21 -7
- package/dist/core/index-manager.js.map +1 -1
- package/dist/core/indexing-worker.d.ts.map +1 -1
- package/dist/core/indexing-worker.js +14 -0
- package/dist/core/indexing-worker.js.map +1 -1
- package/dist/core/parse-dispatcher.d.ts.map +1 -1
- package/dist/core/parse-dispatcher.js +20 -5
- package/dist/core/parse-dispatcher.js.map +1 -1
- package/dist/core/search/query-preprocessor.d.ts +69 -3
- package/dist/core/search/query-preprocessor.d.ts.map +1 -1
- package/dist/core/search/query-preprocessor.js +450 -17
- package/dist/core/search/query-preprocessor.js.map +1 -1
- package/dist/core/search/relevance-ranker.d.ts +60 -5
- package/dist/core/search/relevance-ranker.d.ts.map +1 -1
- package/dist/core/search/relevance-ranker.js +931 -33
- package/dist/core/search/relevance-ranker.js.map +1 -1
- package/dist/core/test-mapper.d.ts.map +1 -1
- package/dist/core/test-mapper.js +7 -1
- package/dist/core/test-mapper.js.map +1 -1
- package/dist/core/types.d.ts +28 -1
- package/dist/core/types.d.ts.map +1 -1
- package/dist/handlers/angular-html.d.ts +3 -0
- package/dist/handlers/angular-html.d.ts.map +1 -0
- package/dist/handlers/angular-html.js +215 -0
- package/dist/handlers/angular-html.js.map +1 -0
- package/dist/handlers/c.d.ts.map +1 -1
- package/dist/handlers/c.js +19 -0
- package/dist/handlers/c.js.map +1 -1
- package/dist/handlers/cpp-macro-registry.d.ts +21 -0
- package/dist/handlers/cpp-macro-registry.d.ts.map +1 -0
- package/dist/handlers/cpp-macro-registry.js +44 -0
- package/dist/handlers/cpp-macro-registry.js.map +1 -0
- package/dist/handlers/cpp.d.ts.map +1 -1
- package/dist/handlers/cpp.js +579 -10
- package/dist/handlers/cpp.js.map +1 -1
- package/dist/handlers/csharp.d.ts.map +1 -1
- package/dist/handlers/csharp.js +39 -2
- package/dist/handlers/csharp.js.map +1 -1
- package/dist/handlers/css.d.ts +3 -0
- package/dist/handlers/css.d.ts.map +1 -0
- package/dist/handlers/css.js +154 -0
- package/dist/handlers/css.js.map +1 -0
- package/dist/handlers/erlang.d.ts.map +1 -1
- package/dist/handlers/erlang.js +8 -1
- package/dist/handlers/erlang.js.map +1 -1
- package/dist/handlers/fortran.js +1 -1
- package/dist/handlers/fortran.js.map +1 -1
- package/dist/handlers/go.d.ts.map +1 -1
- package/dist/handlers/go.js +87 -2
- package/dist/handlers/go.js.map +1 -1
- package/dist/handlers/handler-registry.d.ts.map +1 -1
- package/dist/handlers/handler-registry.js +4 -0
- package/dist/handlers/handler-registry.js.map +1 -1
- package/dist/handlers/hcl.d.ts +3 -0
- package/dist/handlers/hcl.d.ts.map +1 -0
- package/dist/handlers/hcl.js +193 -0
- package/dist/handlers/hcl.js.map +1 -0
- package/dist/handlers/java.d.ts.map +1 -1
- package/dist/handlers/java.js +33 -16
- package/dist/handlers/java.js.map +1 -1
- package/dist/handlers/kotlin.d.ts.map +1 -1
- package/dist/handlers/kotlin.js +48 -3
- package/dist/handlers/kotlin.js.map +1 -1
- package/dist/handlers/less.d.ts +3 -0
- package/dist/handlers/less.d.ts.map +1 -0
- package/dist/handlers/less.js +255 -0
- package/dist/handlers/less.js.map +1 -0
- package/dist/handlers/objective-c.d.ts.map +1 -1
- package/dist/handlers/objective-c.js +122 -64
- package/dist/handlers/objective-c.js.map +1 -1
- package/dist/handlers/openapi.d.ts.map +1 -1
- package/dist/handlers/openapi.js +30 -5
- package/dist/handlers/openapi.js.map +1 -1
- package/dist/handlers/php.d.ts.map +1 -1
- package/dist/handlers/php.js +287 -41
- package/dist/handlers/php.js.map +1 -1
- package/dist/handlers/protobuf.d.ts.map +1 -1
- package/dist/handlers/protobuf.js +1 -0
- package/dist/handlers/protobuf.js.map +1 -1
- package/dist/handlers/python.d.ts.map +1 -1
- package/dist/handlers/python.js +1 -3
- package/dist/handlers/python.js.map +1 -1
- package/dist/handlers/ruby-dsl.d.ts +23 -0
- package/dist/handlers/ruby-dsl.d.ts.map +1 -0
- package/dist/handlers/ruby-dsl.js +251 -0
- package/dist/handlers/ruby-dsl.js.map +1 -0
- package/dist/handlers/ruby.d.ts.map +1 -1
- package/dist/handlers/ruby.js +29 -4
- package/dist/handlers/ruby.js.map +1 -1
- package/dist/handlers/rust.d.ts.map +1 -1
- package/dist/handlers/rust.js +98 -2
- package/dist/handlers/rust.js.map +1 -1
- package/dist/handlers/scss.d.ts +3 -0
- package/dist/handlers/scss.d.ts.map +1 -0
- package/dist/handlers/scss.js +290 -0
- package/dist/handlers/scss.js.map +1 -0
- package/dist/handlers/sql.d.ts.map +1 -1
- package/dist/handlers/sql.js +37 -18
- package/dist/handlers/sql.js.map +1 -1
- package/dist/handlers/typescript.d.ts.map +1 -1
- package/dist/handlers/typescript.js +65 -17
- package/dist/handlers/typescript.js.map +1 -1
- package/dist/handlers/xml.d.ts.map +1 -1
- package/dist/handlers/xml.js +35 -2
- package/dist/handlers/xml.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +91 -0
- package/dist/index.js.map +1 -1
- package/dist/server/mcp-server.d.ts.map +1 -1
- package/dist/server/mcp-server.js +10 -0
- package/dist/server/mcp-server.js.map +1 -1
- package/dist/server/tools/detect-antipatterns.d.ts +1 -1
- package/dist/server/tools/get-architecture-snapshot.d.ts +1 -1
- package/dist/server/tools/get-entry-points.d.ts +1 -1
- package/dist/server/tools/get-lexical-scope-matches.d.ts +54 -0
- package/dist/server/tools/get-lexical-scope-matches.d.ts.map +1 -0
- package/dist/server/tools/get-lexical-scope-matches.js +470 -0
- package/dist/server/tools/get-lexical-scope-matches.js.map +1 -0
- package/dist/server/tools/search-symbols.d.ts +10 -0
- package/dist/server/tools/search-symbols.d.ts.map +1 -1
- package/dist/server/tools/search-symbols.js +353 -8
- package/dist/server/tools/search-symbols.js.map +1 -1
- package/dist/server/tools/trace-invocation-chain.d.ts +53 -0
- package/dist/server/tools/trace-invocation-chain.d.ts.map +1 -0
- package/dist/server/tools/trace-invocation-chain.js +280 -0
- package/dist/server/tools/trace-invocation-chain.js.map +1 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/docs/02-installation.md +89 -17
- package/docs/05-cli-reference.md +89 -0
- package/docs/dev/benchmark-findings-eu-za-tebe.md +210 -0
- package/docs/dev/phase-35-coverage-audit.md +469 -0
- package/package.json +4 -1
|
@@ -7,17 +7,371 @@
|
|
|
7
7
|
* scheme that promotes exact name matches to the top.
|
|
8
8
|
*
|
|
9
9
|
* Scoring (additive, higher = more relevant):
|
|
10
|
-
* 100 — exact name match (case-insensitive)
|
|
10
|
+
* 100 — exact name match (case-insensitive, entire query = entire name)
|
|
11
11
|
* 60 — name starts with query
|
|
12
12
|
* 40 — name contains query as substring
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
13
|
+
* 60 — identity-exact: any single query word exactly equals the symbol name (10–40 for data kinds, scaled by 40/queryWords.length); 2× when matched word appears twice in raw query
|
|
14
|
+
* 30 — all query words match a word-boundary name part (exact or stem)
|
|
15
|
+
* 20 — any query word matches a word-boundary name part (exact or stem)
|
|
16
|
+
* 15 — method verb bonus: first part of method name matches a query word
|
|
17
|
+
* 30 — kindBoost: method on a *Service class
|
|
18
|
+
* 20 — kindBoost: Rust trait (interface kind) when ALL trait name parts match query words
|
|
19
|
+
* 15 — kindBoost: method on a *Repository / *Manager / *Store / *_model / *Handler / *DB / *Client / *Activity / *Fragment / *Adapter / *ViewModel / *Processor / *Indexer / *Parser / *EventSubscriber / *Listener / *FormType class
|
|
20
|
+
* 10 — each query word with an exact word-boundary name-part match
|
|
21
|
+
* 8 — each query word with a stem-only word-boundary name-part match
|
|
16
22
|
* 8 — query phrase in signature
|
|
17
23
|
* 2 — any query word in signature (per word)
|
|
18
24
|
* 5 — query phrase in summary
|
|
19
25
|
* 1 — any query word in summary (per word)
|
|
26
|
+
* +15 — core path boost: symbol in /core/src/main/java/ or /main/java/ (Java/Groovy repos)
|
|
27
|
+
* +20 — frontend path boost: symbol in /apps/dashboard/ etc. in mixed monorepos, on hook/component queries
|
|
28
|
+
* +20 — use/hook bonus: symbol name starts with useXxx and query has React hook vocabulary
|
|
29
|
+
* +5 — path proximity boost per overlapping query token (applies when >= 3 symbols share name)
|
|
30
|
+
* -35 — library path penalty (system/, vendor/, third_party/, node_modules/, engine/, erts/, contrib/)
|
|
31
|
+
* -35 — Java plugin path penalty: /plugins/ or /plugin/ in Java/Groovy repos
|
|
32
|
+
*
|
|
33
|
+
* Query word extraction:
|
|
34
|
+
* - Hyphenated tokens split ("front-end" → "front", "end")
|
|
35
|
+
* - camelCase and snake_case tokens split into components
|
|
36
|
+
* - English stop words removed
|
|
37
|
+
* - Inflectional suffixes stripped to add stem variants:
|
|
38
|
+
* -s (plural) "models" → "model"
|
|
39
|
+
* -ing (gerund) "building" → "build"
|
|
40
|
+
* -ed (past) "updated" → "update" and "updat"
|
|
41
|
+
* -tion (nominal) "pagination" → "paginat"
|
|
20
42
|
*/
|
|
43
|
+
import { isStopWord, expandVerbSynonyms } from './query-preprocessor.js';
|
|
44
|
+
// ─── Library path detection ───────────────────────────────────────────────────
|
|
45
|
+
/**
|
|
46
|
+
* Directory names that always indicate third-party or low-priority library code,
|
|
47
|
+
* regardless of where they appear in the path. Symbols under these directories
|
|
48
|
+
* receive a -35 score penalty so application-level symbols rank above them.
|
|
49
|
+
*
|
|
50
|
+
* Covers:
|
|
51
|
+
* system/ CodeIgniter framework core
|
|
52
|
+
* vendor/ Composer packages (PHP) / generic vendor trees
|
|
53
|
+
* third_party/ Generic third-party library directories
|
|
54
|
+
* node_modules/ npm packages
|
|
55
|
+
* bower_components/ Bower packages
|
|
56
|
+
* engine/ Flutter C++ engine (pollutes Dart widget queries)
|
|
57
|
+
* erts/ Erlang/OTP BEAM VM C++ (pollutes Erlang stdlib)
|
|
58
|
+
* contrib/ Scientific computing legacy code
|
|
59
|
+
*/
|
|
60
|
+
const LIBRARY_PATH_SEGMENTS = new Set([
|
|
61
|
+
'system',
|
|
62
|
+
'vendor',
|
|
63
|
+
'third_party',
|
|
64
|
+
'node_modules',
|
|
65
|
+
'bower_components',
|
|
66
|
+
// Phase 71 additions:
|
|
67
|
+
'engine',
|
|
68
|
+
'erts',
|
|
69
|
+
'contrib',
|
|
70
|
+
]);
|
|
71
|
+
/**
|
|
72
|
+
* Multi-segment path substrings that identify library/low-priority code.
|
|
73
|
+
* Checked case-insensitively against the full lowercased path.
|
|
74
|
+
*
|
|
75
|
+
* /lib/wx/ Erlang/OTP wxWidgets C++ bindings
|
|
76
|
+
* /blas/ BLAS numerical library wrappers
|
|
77
|
+
* /lapack/ LAPACK numerical library wrappers
|
|
78
|
+
*/
|
|
79
|
+
const LIBRARY_PATH_SUBSTRINGS = [
|
|
80
|
+
'/lib/wx/',
|
|
81
|
+
'/blas/',
|
|
82
|
+
'/lapack/',
|
|
83
|
+
];
|
|
84
|
+
/**
|
|
85
|
+
* Return true when the symbol's file path contains a well-known library
|
|
86
|
+
* directory segment or multi-segment substring, indicating third-party /
|
|
87
|
+
* framework code.
|
|
88
|
+
*
|
|
89
|
+
* Uses forward-slash normalisation so paths work correctly on Windows and Unix.
|
|
90
|
+
* Checks are case-insensitive to handle /BLAS/, /Engine/, etc.
|
|
91
|
+
*/
|
|
92
|
+
export function isLibraryPath(filePath) {
|
|
93
|
+
const normalized = filePath.replace(/\\/g, '/').toLowerCase();
|
|
94
|
+
if (normalized.split('/').some((seg) => LIBRARY_PATH_SEGMENTS.has(seg)))
|
|
95
|
+
return true;
|
|
96
|
+
// Prepend '/' so that repo-relative paths like 'lib/wx/...' match '/lib/wx/'
|
|
97
|
+
const withLeadingSlash = '/' + normalized;
|
|
98
|
+
return LIBRARY_PATH_SUBSTRINGS.some((sub) => withLeadingSlash.includes(sub));
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* For methods stored with bare names (Java post-Task 258, Rust post-Task 255),
|
|
102
|
+
* extract the class/type name from the signature prefix.
|
|
103
|
+
*
|
|
104
|
+
* Signature formats:
|
|
105
|
+
* Java: "ClassName.methodName: <raw sig>"
|
|
106
|
+
* Rust: "TypeName::methodName: <raw sig>"
|
|
107
|
+
*
|
|
108
|
+
* Returns the lower-cased class/type name, or '' when extraction fails.
|
|
109
|
+
* Guards against false positives by requiring the class part to be a simple
|
|
110
|
+
* identifier (no dots or colons in it).
|
|
111
|
+
*/
|
|
112
|
+
function classFromSignature(name, signature) {
|
|
113
|
+
if (!signature)
|
|
114
|
+
return '';
|
|
115
|
+
// Dot notation — Java: "ClassName.methodName: ..."
|
|
116
|
+
const dotPat = '.' + name + ':';
|
|
117
|
+
const di = signature.indexOf(dotPat);
|
|
118
|
+
if (di > 0) {
|
|
119
|
+
const candidate = signature.slice(0, di);
|
|
120
|
+
if (!candidate.includes('.') && !candidate.includes(':')) {
|
|
121
|
+
return candidate.toLowerCase();
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
// Double-colon notation — Rust: "TypeName::methodName ..."
|
|
125
|
+
const colonPat = '::' + name;
|
|
126
|
+
const ci = signature.indexOf(colonPat);
|
|
127
|
+
if (ci > 0) {
|
|
128
|
+
const candidate = signature.slice(0, ci);
|
|
129
|
+
if (!candidate.includes('.') && !candidate.includes(':')) {
|
|
130
|
+
return candidate.toLowerCase();
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return '';
|
|
134
|
+
}
|
|
135
|
+
// ─── Core path boost helpers (Task 414) ──────────────────────────────────────
|
|
136
|
+
/**
|
|
137
|
+
* Return true when the symbol is in the canonical source directory of a
|
|
138
|
+
* Java/Groovy project: /core/src/main/java/, /main/java/, or the Groovy variants.
|
|
139
|
+
* Only applied when domain === 'java' so TypeScript repos with similar paths
|
|
140
|
+
* are unaffected.
|
|
141
|
+
*/
|
|
142
|
+
function isCoreJavaPath(filePath) {
|
|
143
|
+
const p = '/' + filePath.replace(/\\/g, '/');
|
|
144
|
+
return (p.includes('/core/src/main/java/') ||
|
|
145
|
+
p.includes('/core/src/main/groovy/') ||
|
|
146
|
+
p.includes('/main/java/') ||
|
|
147
|
+
p.includes('/main/groovy/'));
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Return true when the symbol lives in a Jenkins-style plugin tree.
|
|
151
|
+
* Penalised for Java/Groovy repos to lift canonical core methods above plugin
|
|
152
|
+
* overrides.
|
|
153
|
+
*/
|
|
154
|
+
function isJavaPluginPath(filePath) {
|
|
155
|
+
const p = '/' + filePath.replace(/\\/g, '/').toLowerCase();
|
|
156
|
+
return p.includes('/plugins/') || p.includes('/plugin/');
|
|
157
|
+
}
|
|
158
|
+
// ─── Frontend path boost helpers (Task 415) ──────────────────────────────────
|
|
159
|
+
/**
|
|
160
|
+
* Return true when the symbol lives in a frontend app directory of a mixed
|
|
161
|
+
* monorepo (e.g. novu apps/dashboard/, cal.com apps/web/).
|
|
162
|
+
*/
|
|
163
|
+
function isFrontendAppPath(filePath) {
|
|
164
|
+
const p = '/' + filePath.replace(/\\/g, '/');
|
|
165
|
+
return (p.includes('/apps/dashboard/') ||
|
|
166
|
+
p.includes('/apps/web/') ||
|
|
167
|
+
p.includes('/apps/frontend/') ||
|
|
168
|
+
p.includes('/apps/client/'));
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Return true when the query contains vocabulary strongly associated with
|
|
172
|
+
* React hooks or frontend components.
|
|
173
|
+
*/
|
|
174
|
+
function hasFrontendVocab(queryWords) {
|
|
175
|
+
return queryWords.some((w) => (w.startsWith('use') && w.length >= 4) ||
|
|
176
|
+
w === 'hook' ||
|
|
177
|
+
w === 'hooks' ||
|
|
178
|
+
w === 'component' ||
|
|
179
|
+
w === 'react' ||
|
|
180
|
+
w === 'vue' ||
|
|
181
|
+
w === 'svelte');
|
|
182
|
+
}
|
|
183
|
+
// ─── Path proximity boost helpers (Task 417) ─────────────────────────────────
|
|
184
|
+
/**
|
|
185
|
+
* Common path segments that are too generic to signal query relevance.
|
|
186
|
+
* Excluded from the path-proximity overlap calculation.
|
|
187
|
+
*/
|
|
188
|
+
const COMMON_PATH_SEGMENTS = new Set([
|
|
189
|
+
'src', 'lib', 'app', 'apps', 'packages', 'core', 'main',
|
|
190
|
+
'index', 'test', 'tests', 'dist', 'build',
|
|
191
|
+
]);
|
|
192
|
+
/**
|
|
193
|
+
* Compute a path-proximity bonus for symbols whose file-path tokens overlap
|
|
194
|
+
* with query words. Only applied when ≥ 3 candidates share the exact same
|
|
195
|
+
* lowercase name (suppresses noise for unique symbols).
|
|
196
|
+
*
|
|
197
|
+
* Returns +5 per overlapping token.
|
|
198
|
+
*/
|
|
199
|
+
function computePathProximityBoost(filePath, queryWords) {
|
|
200
|
+
const pathTokens = filePath
|
|
201
|
+
.replace(/\\/g, '/')
|
|
202
|
+
.split(/[/\\\-_.]/)
|
|
203
|
+
.filter((t) => t.length >= 3 && !COMMON_PATH_SEGMENTS.has(t.toLowerCase()))
|
|
204
|
+
.map((t) => t.toLowerCase());
|
|
205
|
+
const overlap = pathTokens.filter((t) => queryWords.has(t)).length;
|
|
206
|
+
return overlap * 5;
|
|
207
|
+
}
|
|
208
|
+
// ─── Phase 73 boost helpers ───────────────────────────────────────────────────
|
|
209
|
+
const ANGULAR_LIFECYCLE_METHODS = new Set([
|
|
210
|
+
'ngOnInit', 'ngOnDestroy', 'ngAfterViewInit', 'ngAfterContentInit',
|
|
211
|
+
'ngOnChanges', 'ngDoCheck', 'ngAfterViewChecked', 'ngAfterContentChecked',
|
|
212
|
+
]);
|
|
213
|
+
const LIFECYCLE_QUERY_WORDS = new Set([
|
|
214
|
+
'initialization', 'initialize', 'setup', 'teardown', 'destroy', 'cleanup',
|
|
215
|
+
'mount', 'unmount', 'render',
|
|
216
|
+
]);
|
|
217
|
+
/**
|
|
218
|
+
* Boost Angular lifecycle methods (+10) when the query contains lifecycle
|
|
219
|
+
* vocabulary ("initialization", "destroy", etc.). Only fires in Angular repos.
|
|
220
|
+
*/
|
|
221
|
+
function computeAngularLifecycleBoost(symbol, queryWords, isAngularRepo) {
|
|
222
|
+
if (!isAngularRepo)
|
|
223
|
+
return 0;
|
|
224
|
+
const methodName = symbol.name.split('.').pop() ?? symbol.name;
|
|
225
|
+
if (!ANGULAR_LIFECYCLE_METHODS.has(methodName))
|
|
226
|
+
return 0;
|
|
227
|
+
for (const w of queryWords)
|
|
228
|
+
if (LIFECYCLE_QUERY_WORDS.has(w))
|
|
229
|
+
return 10;
|
|
230
|
+
return 0;
|
|
231
|
+
}
|
|
232
|
+
const RENDERING_VERBS = new Set(['render', 'draw', 'stroke', 'paint', 'plot', 'sketch']);
|
|
233
|
+
/**
|
|
234
|
+
* Boost rendering functions (+15) that share a verb (render/draw/etc.) AND at
|
|
235
|
+
* least one noun with the query. Only fires in rendering-domain repos.
|
|
236
|
+
*
|
|
237
|
+
* Differentiates "renderSelectionElement" from bare "render" on a query like
|
|
238
|
+
* "render canvas selection element" — only the compound function has the extra
|
|
239
|
+
* noun "selection" in its name.
|
|
240
|
+
*/
|
|
241
|
+
function computeRenderingCompoundBoost(symbol, queryWords, domain) {
|
|
242
|
+
if (domain !== 'rendering')
|
|
243
|
+
return 0;
|
|
244
|
+
if (symbol.kind !== 'function')
|
|
245
|
+
return 0;
|
|
246
|
+
const nameWords = splitNameParts(symbol.name);
|
|
247
|
+
const hasVerb = nameWords.some((w) => RENDERING_VERBS.has(w)) &&
|
|
248
|
+
queryWords.some((w) => RENDERING_VERBS.has(w));
|
|
249
|
+
if (!hasVerb)
|
|
250
|
+
return 0;
|
|
251
|
+
const querySet = new Set(queryWords);
|
|
252
|
+
const nameNouns = nameWords.filter((w) => !RENDERING_VERBS.has(w));
|
|
253
|
+
const overlap = nameNouns.filter((w) => querySet.has(w)).length;
|
|
254
|
+
return overlap >= 1 ? 15 : 0;
|
|
255
|
+
}
|
|
256
|
+
const MUTATION_QUERY_VERBS = new Set(['create', 'update', 'delete', 'patch', 'remove', 'add']);
|
|
257
|
+
const QUERY_FILE_RE = /\b(queries|mutations|hooks)\b/i;
|
|
258
|
+
/**
|
|
259
|
+
* Boost React Query hooks (+25) that live in queries/mutations/hooks files and
|
|
260
|
+
* match a mutation verb in the query.
|
|
261
|
+
*
|
|
262
|
+
* useCreateSecretV3 in src/hooks/api/secrets/queries.ts should rank first for
|
|
263
|
+
* queries like "create secret api hook" ahead of schema type symbols.
|
|
264
|
+
*/
|
|
265
|
+
function computeReactQueryHookBoost(symbol, queryWords) {
|
|
266
|
+
const methodName = symbol.name.split('.').pop() ?? symbol.name;
|
|
267
|
+
if (!/^use[A-Z]\w+$/.test(methodName))
|
|
268
|
+
return 0;
|
|
269
|
+
if (!QUERY_FILE_RE.test(symbol.filePath))
|
|
270
|
+
return 0;
|
|
271
|
+
for (const w of queryWords)
|
|
272
|
+
if (MUTATION_QUERY_VERBS.has(w))
|
|
273
|
+
return 25;
|
|
274
|
+
return 0;
|
|
275
|
+
}
|
|
276
|
+
const INTERCEPTOR_QUERY_WORDS = new Set([
|
|
277
|
+
'interceptor', 'intercept', 'resolver', 'resolve', 'guard',
|
|
278
|
+
'middleware', 'pipe', 'hook',
|
|
279
|
+
]);
|
|
280
|
+
// Matches Angular/NestJS convention: tokenInterceptor, errorInterceptor,
|
|
281
|
+
// bankAccountResolve (Angular route resolver), authGuard, etc.
|
|
282
|
+
const INTERCEPTOR_NAME_RE = /(?:Interceptor|Resolver|Resolve|Guard|Middleware|Pipe)$/;
|
|
283
|
+
/**
|
|
284
|
+
* Boost HTTP interceptor / resolver / guard symbols (+15) when the query
|
|
285
|
+
* explicitly mentions those concepts.
|
|
286
|
+
*/
|
|
287
|
+
function computeInterceptorBoost(symbol, queryWords) {
|
|
288
|
+
if (symbol.kind !== 'function' && symbol.kind !== 'class')
|
|
289
|
+
return 0;
|
|
290
|
+
const baseName = symbol.name.split('.').pop() ?? symbol.name;
|
|
291
|
+
if (!INTERCEPTOR_NAME_RE.test(baseName))
|
|
292
|
+
return 0;
|
|
293
|
+
for (const w of queryWords)
|
|
294
|
+
if (INTERCEPTOR_QUERY_WORDS.has(w))
|
|
295
|
+
return 30;
|
|
296
|
+
return 0;
|
|
297
|
+
}
|
|
298
|
+
// Generic MVC/framework namespace segments that appear in many package names
|
|
299
|
+
// but convey no discriminating signal — filtering prevents false matches like
|
|
300
|
+
// TestApp::Controller::Action matching "catalyst action controller" queries.
|
|
301
|
+
const PACKAGE_SEGMENT_STOPWORDS = new Set([
|
|
302
|
+
'test', 'testapp', 'base', 'action', 'controller', 'model', 'view',
|
|
303
|
+
'helper', 'plugin', 'role', 'app', 'core', 'type', 'class', 'package',
|
|
304
|
+
'lib', 'util', 'utils', 'common', 'shared',
|
|
305
|
+
]);
|
|
306
|
+
/**
|
|
307
|
+
* Boost Perl and R symbols (+8 per overlapping package token) when query words
|
|
308
|
+
* overlap with the package name prefix.
|
|
309
|
+
*
|
|
310
|
+
* For "Mojolicious::Controller::render" matching "mojolicious render": the
|
|
311
|
+
* package tokens ["mojolicious", "controller"] overlap with "mojolicious" in
|
|
312
|
+
* the query → +8. Generic MVC segments (controller, action, model…) are excluded
|
|
313
|
+
* to prevent TestApp::Controller from spuriously matching catalyst queries.
|
|
314
|
+
*/
|
|
315
|
+
function computePackageContextBoost(symbol, queryWords) {
|
|
316
|
+
const ext = symbol.filePath.split('.').pop()?.toLowerCase() ?? '';
|
|
317
|
+
if (!['pm', 'pl', 'r'].includes(ext))
|
|
318
|
+
return 0;
|
|
319
|
+
const parts = symbol.name.split(/[.:]+/);
|
|
320
|
+
if (parts.length < 2)
|
|
321
|
+
return 0;
|
|
322
|
+
const packageWords = parts
|
|
323
|
+
.slice(0, -1)
|
|
324
|
+
.flatMap((p) => splitNameParts(p))
|
|
325
|
+
.filter((w) => !PACKAGE_SEGMENT_STOPWORDS.has(w));
|
|
326
|
+
const querySet = new Set(queryWords);
|
|
327
|
+
let overlap = 0;
|
|
328
|
+
for (const pw of packageWords)
|
|
329
|
+
if (querySet.has(pw))
|
|
330
|
+
overlap++;
|
|
331
|
+
// +4 for first match, +8 for each additional — single-match boost is intentionally
|
|
332
|
+
// modest to avoid tipping rankings where the framework name appears as context
|
|
333
|
+
// (e.g. "dispatch through the Catalyst router" should not boost Catalyst::Request
|
|
334
|
+
// above the bare `dispatch` function that is the correct result).
|
|
335
|
+
return overlap >= 2 ? (overlap - 1) * 8 + 4 : overlap >= 1 ? 4 : 0;
|
|
336
|
+
}
|
|
337
|
+
const TRPC_PREFIX_RE = /^(createTRPC|ProcedureBuilder)/;
|
|
338
|
+
const TRPC_QUERY_WORDS = new Set(['trpc', 'procedure', 'builder', 'router', 'rpc']);
|
|
339
|
+
/**
|
|
340
|
+
* Boost tRPC factory functions and ProcedureBuilder symbols (+20) when the query
|
|
341
|
+
* contains tRPC vocabulary.
|
|
342
|
+
*/
|
|
343
|
+
function computeTrpcPrefixBoost(symbol, queryWords) {
|
|
344
|
+
const baseName = symbol.name.split('.')[0] ?? symbol.name;
|
|
345
|
+
if (!TRPC_PREFIX_RE.test(baseName))
|
|
346
|
+
return 0;
|
|
347
|
+
for (const w of queryWords)
|
|
348
|
+
if (TRPC_QUERY_WORDS.has(w))
|
|
349
|
+
return 20;
|
|
350
|
+
return 0;
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* Single-token exact-name boost (+50 for full match, +40 for last dot-segment).
|
|
354
|
+
*
|
|
355
|
+
* Fires only when the query is a single bare token. Differentiates "Pod" from
|
|
356
|
+
* "PodSpec" when the user typed exactly the resource name, and handles
|
|
357
|
+
* dot-qualified names like "io.k8s.api.core.v1.Pod" via the last-segment check.
|
|
358
|
+
*/
|
|
359
|
+
function computeSingleTokenExactBoost(symbol, queryLower) {
|
|
360
|
+
if (queryLower.includes(' '))
|
|
361
|
+
return 0;
|
|
362
|
+
const nameLower = symbol.name.toLowerCase();
|
|
363
|
+
if (queryLower === nameLower)
|
|
364
|
+
return 50;
|
|
365
|
+
// For dot-qualified names (e.g. OpenAPI / protobuf schemas): also match the
|
|
366
|
+
// last name segment so "pod" matches "io.k8s.api.core.v1.Pod".
|
|
367
|
+
const lastDot = nameLower.lastIndexOf('.');
|
|
368
|
+
if (lastDot >= 0) {
|
|
369
|
+
const lastSegment = nameLower.slice(lastDot + 1);
|
|
370
|
+
if (queryLower === lastSegment)
|
|
371
|
+
return 40;
|
|
372
|
+
}
|
|
373
|
+
return 0;
|
|
374
|
+
}
|
|
21
375
|
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
22
376
|
/**
|
|
23
377
|
* Score and sort a list of symbols by relevance to `query`.
|
|
@@ -27,16 +381,82 @@
|
|
|
27
381
|
*
|
|
28
382
|
* When `debug` is true, each result includes a `debugScore` breakdown.
|
|
29
383
|
*/
|
|
30
|
-
export function rankSymbols(symbols, query, debug = false) {
|
|
384
|
+
export function rankSymbols(symbols, query, debug = false, domain, opts) {
|
|
31
385
|
if (symbols.length === 0)
|
|
32
386
|
return [];
|
|
33
387
|
const queryLower = query.trim().toLowerCase();
|
|
34
|
-
const queryWords = extractQueryWords(query.trim());
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
388
|
+
const queryWords = extractQueryWords(query.trim(), domain);
|
|
389
|
+
// Count raw occurrences of each query word (before deduplication/stemming) so
|
|
390
|
+
// identityExact can award a 2× boost when the matched concept is repeated in the
|
|
391
|
+
// query (e.g. "base formula class ... formula files" has "formula" twice, signalling
|
|
392
|
+
// it is the primary search target rather than a generic modifier like "files").
|
|
393
|
+
const rawQueryFreq = new Map();
|
|
394
|
+
for (const tok of query.trim().toLowerCase().split(/[\s-]+/).filter(Boolean)) {
|
|
395
|
+
if (!isStopWord(tok) && tok.length >= 2) {
|
|
396
|
+
rawQueryFreq.set(tok, (rawQueryFreq.get(tok) ?? 0) + 1);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
// ── FTS5 BM25 normalization ──────────────────────────────────────────────────
|
|
400
|
+
//
|
|
401
|
+
// BM25 scores are negative (more negative = better match). We normalize the
|
|
402
|
+
// candidate set's BM25 scores to a [0, 50] bonus so summary-only matches
|
|
403
|
+
// (e.g. a function whose name is unrelated to the query but whose docstring
|
|
404
|
+
// describes it perfectly) rise above generic name-noise matches.
|
|
405
|
+
//
|
|
406
|
+
// Normalization formula:
|
|
407
|
+
// bonus_i = (score_i - worst) / (best - worst) * 50
|
|
408
|
+
// where best = min(bm25 values) and worst = max(bm25 values).
|
|
409
|
+
// When all scores are equal (range = 0), every symbol gets 25.
|
|
410
|
+
const bm25Values = symbols.map((s) => s.ftsBm25 ?? 0).filter((v) => v !== 0);
|
|
411
|
+
let bm25Bonuses = symbols.map(() => 0);
|
|
412
|
+
if (bm25Values.length > 0) {
|
|
413
|
+
const best = Math.min(...bm25Values); // most negative = best match
|
|
414
|
+
const worst = Math.max(...bm25Values); // least negative = worst match
|
|
415
|
+
const range = best - worst; // always <= 0
|
|
416
|
+
bm25Bonuses = symbols.map((s) => {
|
|
417
|
+
if (s.ftsBm25 === undefined)
|
|
418
|
+
return 0;
|
|
419
|
+
return range !== 0 ? Math.max(0, ((s.ftsBm25 - worst) / range) * 50) : 25;
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
// First pass: compute base scores (without BM25) for all symbols.
|
|
423
|
+
const baseResults = symbols.map((symbol) => score(symbol, queryLower, queryWords, rawQueryFreq, domain, opts));
|
|
424
|
+
// Cap BM25 bonus to 30% of its computed value when any symbol already has a
|
|
425
|
+
// dominant name-match score (≥80). This prevents BM25 from overriding a clear
|
|
426
|
+
// winner (e.g. identityExact+namePrefix+kindBoost on a NestJS *Service method)
|
|
427
|
+
// while still letting it act as a tiebreaker between similarly-scored symbols.
|
|
428
|
+
// The 30% scale reduces the maximum BM25 contribution from 50 to 15 points,
|
|
429
|
+
// ensuring that a ≤2-point base score gap cannot be flipped by content ranking
|
|
430
|
+
// when the result set already contains a strong name match.
|
|
431
|
+
const topBaseScore = Math.max(...baseResults.map((r) => r.score));
|
|
432
|
+
const bm25Scale = topBaseScore >= 80 ? 0.3 : 1.0;
|
|
433
|
+
// ── Path proximity boost (Task 417) ─────────────────────────────────────────
|
|
434
|
+
// When ≥ 3 candidates share the exact same lowercase name, boost the one
|
|
435
|
+
// whose file-path tokens overlap with query words (+5 per overlapping token).
|
|
436
|
+
// Suppressed for unique names to avoid noise.
|
|
437
|
+
const nameFreq = new Map();
|
|
438
|
+
for (const s of symbols) {
|
|
439
|
+
const n = s.name.toLowerCase();
|
|
440
|
+
nameFreq.set(n, (nameFreq.get(n) ?? 0) + 1);
|
|
441
|
+
}
|
|
442
|
+
const queryWordsSet = new Set(queryWords);
|
|
443
|
+
const scored = symbols.map((symbol, originalIndex) => {
|
|
444
|
+
const baseScore = baseResults[originalIndex];
|
|
445
|
+
const ftsBm25Bonus = Math.round(bm25Bonuses[originalIndex] * bm25Scale);
|
|
446
|
+
let pathProximity = 0;
|
|
447
|
+
if ((nameFreq.get(symbol.name.toLowerCase()) ?? 0) >= 3) {
|
|
448
|
+
pathProximity = computePathProximityBoost(symbol.filePath, queryWordsSet);
|
|
449
|
+
}
|
|
450
|
+
baseScore.debugScore.pathProximityBoost = pathProximity;
|
|
451
|
+
baseScore.debugScore.ftsBm25Bonus = ftsBm25Bonus;
|
|
452
|
+
baseScore.debugScore.total += ftsBm25Bonus + pathProximity;
|
|
453
|
+
return {
|
|
454
|
+
...baseScore,
|
|
455
|
+
score: baseScore.score + ftsBm25Bonus + pathProximity,
|
|
456
|
+
symbol,
|
|
457
|
+
originalIndex,
|
|
458
|
+
};
|
|
459
|
+
});
|
|
40
460
|
scored.sort((a, b) => {
|
|
41
461
|
if (b.score !== a.score)
|
|
42
462
|
return b.score - a.score;
|
|
@@ -50,7 +470,7 @@ export function rankSymbols(symbols, query, debug = false) {
|
|
|
50
470
|
}));
|
|
51
471
|
}
|
|
52
472
|
// ─── Scoring ─────────────────────────────────────────────────────────────────
|
|
53
|
-
function score(symbol, queryLower, queryWords) {
|
|
473
|
+
function score(symbol, queryLower, queryWords, rawQueryFreq, domain, opts) {
|
|
54
474
|
const nameLower = symbol.name.toLowerCase();
|
|
55
475
|
const sigLower = symbol.signature.toLowerCase();
|
|
56
476
|
const sumLower = symbol.summary.toLowerCase();
|
|
@@ -66,32 +486,180 @@ function score(symbol, queryLower, queryWords) {
|
|
|
66
486
|
matchReason = 'exact_name';
|
|
67
487
|
}
|
|
68
488
|
else if (nameLower.startsWith(queryLower)) {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
489
|
+
// Only award the prefix bonus when the character immediately after the
|
|
490
|
+
// query string in the *original* (cased) symbol name is a word-boundary:
|
|
491
|
+
// uppercase letter (camelCase), a non-alpha separator (_, \, :, .), or
|
|
492
|
+
// end of string. A continuing lowercase letter means the query is merely
|
|
493
|
+
// a prefix of a longer word inside the name — not a name-level prefix.
|
|
494
|
+
// e.g. query "model" vs name "models\Article_base" → nextChar="s" (lowercase)
|
|
495
|
+
// → treated as substring match, not prefix match.
|
|
496
|
+
const nextChar = symbol.name[queryLower.length];
|
|
497
|
+
const isWordBoundary = !nextChar || /[^a-z]/.test(nextChar);
|
|
498
|
+
if (isWordBoundary) {
|
|
499
|
+
namePrefix = 60;
|
|
500
|
+
total += 60;
|
|
501
|
+
matchReason = 'prefix_name';
|
|
502
|
+
}
|
|
503
|
+
else {
|
|
504
|
+
// The query string is embedded inside a longer word — treat as substring.
|
|
505
|
+
nameFuzzy = 40;
|
|
506
|
+
total += 40;
|
|
507
|
+
matchReason = 'name_contains';
|
|
508
|
+
}
|
|
72
509
|
}
|
|
73
510
|
else if (nameLower.includes(queryLower)) {
|
|
74
511
|
nameFuzzy = 40;
|
|
75
512
|
total += 40;
|
|
76
513
|
matchReason = 'name_contains';
|
|
77
514
|
}
|
|
78
|
-
// ──
|
|
515
|
+
// ── Identity-exact boost ─────────────────────────────────────────────────────
|
|
516
|
+
//
|
|
517
|
+
// Fires when any single query word exactly matches the symbol's bare name
|
|
518
|
+
// (case-insensitive). Equivalent to JC's Identity channel (weight=2.0).
|
|
519
|
+
//
|
|
520
|
+
// Primary use case: with Go/Rust bare method names (Phase 46), struct/class
|
|
521
|
+
// symbols like `Builder` or `Mutex` share name parts with their own methods.
|
|
522
|
+
// For a query like "Builder struct" or "Mutex lock", the struct symbol should
|
|
523
|
+
// rank above a method named `build` or `lock` on the same type.
|
|
524
|
+
//
|
|
525
|
+
// This differs from nameExact (100pt, entire query = name): identityExact fires
|
|
526
|
+
// for multi-word queries where ONE word is the symbol's full name.
|
|
527
|
+
//
|
|
528
|
+
// Boost: +40 — large enough to overcome a method's kindBoost (+15–+30) advantage.
|
|
529
|
+
// For namespace-qualified C++ names like `folly::Future`, also check the
|
|
530
|
+
// bare local name (last segment after ::) for identity matching, since ground
|
|
531
|
+
// truth uses bare names while the index stores qualified names. Restricted to
|
|
532
|
+
// non-method symbols: methods with :: are class::method pairs (PHP, C++) where
|
|
533
|
+
// the bare method name is too generic to warrant a full identity boost.
|
|
534
|
+
const isNsQualified = nameLower.includes('::');
|
|
535
|
+
const bareLocalName = isNsQualified && symbol.kind !== 'method'
|
|
536
|
+
? nameLower.split('::').pop()
|
|
537
|
+
: nameLower;
|
|
538
|
+
// For XML-disambiguated names (e.g. project@maven-cli), also check the bare
|
|
539
|
+
// tag name (part before @) for identity matching.
|
|
540
|
+
const isAtDisambiguated = nameLower.includes('@');
|
|
541
|
+
const bareTagName = isAtDisambiguated ? nameLower.split('@')[0] : nameLower;
|
|
542
|
+
// Find the query word that triggered identityExact (needed for the frequency lookup).
|
|
543
|
+
const identityMatchWord = queryWords.find((w) => w === nameLower)
|
|
544
|
+
?? (bareLocalName !== nameLower ? queryWords.find((w) => w === bareLocalName) : undefined)
|
|
545
|
+
?? (bareTagName !== nameLower ? queryWords.find((w) => w === bareTagName) : undefined);
|
|
546
|
+
let identityExact = 0;
|
|
547
|
+
if (queryWords.length > 0 && identityMatchWord !== undefined) {
|
|
548
|
+
// Data-definition symbols (const, type, interface, enum, property) named after
|
|
549
|
+
// a single concept (e.g. STRIPE, Subscribers) fire identityExact when that word
|
|
550
|
+
// appears anywhere in a multi-word query, even as incidental context rather than
|
|
551
|
+
// the actual search target. Scale the bonus proportionally so the signal weakens
|
|
552
|
+
// as the query grows: 40/N, minimum 10. Code-definition symbols (function, method,
|
|
553
|
+
// class, …) use a higher base of 60 with a frequency multiplier (capped at 2×):
|
|
554
|
+
// when the matched word appears twice in the raw query (e.g. "base formula class
|
|
555
|
+
// … formula files") that repetition signals it is the primary target, giving the
|
|
556
|
+
// correctly-named symbol enough margin to overcome BM25 noise from generically-
|
|
557
|
+
// named symbols that also match the repeated word.
|
|
558
|
+
const DATA_KINDS = new Set(['const', 'type', 'interface', 'enum', 'property']);
|
|
559
|
+
if (DATA_KINDS.has(symbol.kind) && queryWords.length > 1) {
|
|
560
|
+
identityExact = Math.max(10, Math.round(40 / queryWords.length));
|
|
561
|
+
}
|
|
562
|
+
else {
|
|
563
|
+
const rawFreq = rawQueryFreq.get(identityMatchWord) ?? 1;
|
|
564
|
+
identityExact = 60 * Math.min(rawFreq, 2);
|
|
565
|
+
}
|
|
566
|
+
total += identityExact;
|
|
567
|
+
}
|
|
568
|
+
// ── Compound underscore identity boost (Task 433) ─────────────────────────
|
|
569
|
+
// Fires when the symbol has a compound underscore name (e.g. payment_intent)
|
|
570
|
+
// and ALL underscore-separated parts appear as query words. Differentiates
|
|
571
|
+
// "payment_intent" from "payment_method" for query "create payment intent"
|
|
572
|
+
// where both share the "payment" part but only payment_intent matches all parts.
|
|
573
|
+
const queryWordsSet = new Set(queryWords);
|
|
574
|
+
let compoundUnderscoreBoost = 0;
|
|
575
|
+
if (identityExact === 0 && nameLower.includes('_')) {
|
|
576
|
+
const uParts = nameLower.split('_').filter((p) => p.length >= 2);
|
|
577
|
+
if (uParts.length >= 2 && uParts.every((p) => queryWordsSet.has(p))) {
|
|
578
|
+
compoundUnderscoreBoost = 30;
|
|
579
|
+
total += compoundUnderscoreBoost;
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
// ── Single-token exact-name boost (Task 434) ──────────────────────────────
|
|
583
|
+
// Fires only when the query is a single bare token. Lifts "Pod" above "PodSpec"
|
|
584
|
+
// for the query "Pod", and handles dot-qualified names via last-segment check.
|
|
585
|
+
const singleTokenExactBoost = computeSingleTokenExactBoost(symbol, queryLower);
|
|
586
|
+
total += singleTokenExactBoost;
|
|
587
|
+
// ── Word-overlap rules (word-boundary matching against split name parts) ────
|
|
588
|
+
//
|
|
589
|
+
// We split the symbol name into its constituent word-boundary parts
|
|
590
|
+
// (camelCase + snake_case + namespace separators) and check query words
|
|
591
|
+
// against those parts. Inflectional stem variants of each name part are
|
|
592
|
+
// also included so that query word "product" matches name part "products",
|
|
593
|
+
// "order" matches "orders", "review" matches "reviews", etc.
|
|
594
|
+
//
|
|
595
|
+
// This is more precise than substring matching: query word "model" matches
|
|
596
|
+
// the "model" part of "CIR_Model" but scores slightly less against the
|
|
597
|
+
// "models" namespace prefix in "models\\Article_base" (stem-only match).
|
|
79
598
|
let wordOverlap = 0;
|
|
80
599
|
if (queryWords.length > 0) {
|
|
81
|
-
|
|
82
|
-
|
|
600
|
+
const nameParts = splitNameParts(symbol.name);
|
|
601
|
+
// Add inflectional stems of each name part so that pluralized name parts
|
|
602
|
+
// match their singular query-word counterparts and vice-versa.
|
|
603
|
+
// e.g. "products" → "product", "orders" → "order", "reviews" → "review"
|
|
604
|
+
const namePartsSet = new Set(nameParts);
|
|
605
|
+
for (const p of nameParts) {
|
|
606
|
+
addStemsOf(p, namePartsSet);
|
|
607
|
+
}
|
|
608
|
+
// Two levels of matching used to differentiate exact vs stem-based hits:
|
|
609
|
+
// partStrict — query word appears verbatim in the split name parts
|
|
610
|
+
// partLoose — query word matches a stem variant of a name part
|
|
611
|
+
// This prevents a pluralised namespace prefix ("models") from scoring
|
|
612
|
+
// identically to an exact name-part match ("model" in "CIR_Model").
|
|
613
|
+
const partStrict = (w) => nameParts.some((p) => p === w);
|
|
614
|
+
const partLoose = (w) => namePartsSet.has(w);
|
|
615
|
+
if (queryWords.every(partLoose)) {
|
|
616
|
+
wordOverlap += 30; // all query words match name parts (exact or stem)
|
|
83
617
|
}
|
|
84
|
-
if (queryWords.some(
|
|
85
|
-
wordOverlap += 20;
|
|
618
|
+
if (queryWords.some(partLoose)) {
|
|
619
|
+
wordOverlap += 20; // at least one query word matches
|
|
620
|
+
}
|
|
621
|
+
// Per-word bonus: exact part match earns +10; stem-only match earns +8.
|
|
622
|
+
for (const w of queryWords) {
|
|
623
|
+
if (partStrict(w))
|
|
624
|
+
wordOverlap += 10;
|
|
625
|
+
else if (partLoose(w))
|
|
626
|
+
wordOverlap += 8;
|
|
86
627
|
}
|
|
87
|
-
const wordsInName = queryWords.filter((w) => nameLower.includes(w)).length;
|
|
88
|
-
wordOverlap += wordsInName * 10;
|
|
89
628
|
if (wordOverlap > 0) {
|
|
90
629
|
total += wordOverlap;
|
|
91
630
|
if (matchReason === 'content_match')
|
|
92
631
|
matchReason = 'word_overlap';
|
|
93
632
|
}
|
|
94
633
|
}
|
|
634
|
+
// ── Method verb bonus ────────────────────────────────────────────────────────
|
|
635
|
+
//
|
|
636
|
+
// For method symbols, give a +15 bonus when a query word exactly matches the
|
|
637
|
+
// FIRST split part of the method name — the "action verb" (e.g. "create" in
|
|
638
|
+
// "ProductsService.create", "get" in "OrdersService.getMyOrders").
|
|
639
|
+
//
|
|
640
|
+
// This differentiates application methods from helper/utility methods that
|
|
641
|
+
// happen to share other name parts with the query. Example:
|
|
642
|
+
// query "create product listing"
|
|
643
|
+
// ProductsService.create → verb "create" matches → +15 (total 89)
|
|
644
|
+
// buildProductListCacheKey → verb "build" ≠ "create" → no bonus (total 76)
|
|
645
|
+
//
|
|
646
|
+
// The bonus is intentionally limited to exact query-word matches on the first
|
|
647
|
+
// method part only — we do not use stems here because a stem match on the verb
|
|
648
|
+
// is too loose (e.g. "builds" → "build" would also match "get").
|
|
649
|
+
let methodVerbBonus = 0;
|
|
650
|
+
if (symbol.kind === 'method' && queryWords.length > 0) {
|
|
651
|
+
const dotIdx = symbol.name.indexOf('.');
|
|
652
|
+
const colonIdx = symbol.name.indexOf('::');
|
|
653
|
+
const sepIdx = dotIdx >= 0 ? dotIdx : colonIdx;
|
|
654
|
+
if (sepIdx > 0) {
|
|
655
|
+
const methodPart = symbol.name.slice(sepIdx + (colonIdx >= 0 && colonIdx === sepIdx ? 2 : 1));
|
|
656
|
+
const methodVerbParts = splitNameParts(methodPart);
|
|
657
|
+
if (methodVerbParts.length > 0 && queryWords.some((w) => w === methodVerbParts[0])) {
|
|
658
|
+
methodVerbBonus = 15;
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
total += methodVerbBonus;
|
|
95
663
|
// ── Content rules (signature + summary) ────────────────────────────────────
|
|
96
664
|
let signatureMatch = 0;
|
|
97
665
|
let summaryMatch = 0;
|
|
@@ -102,42 +670,368 @@ function score(symbol, queryLower, queryWords) {
|
|
|
102
670
|
summaryMatch += 5;
|
|
103
671
|
summaryMatch += queryWords.filter((w) => sumLower.includes(w)).length * 1;
|
|
104
672
|
total += signatureMatch + summaryMatch;
|
|
673
|
+
// ── Kind boost for application-layer methods and Rust traits ────────────────
|
|
674
|
+
//
|
|
675
|
+
// For natural-language "how to do X" queries, a method on a *Service class is
|
|
676
|
+
// almost always the correct answer — not the DTO, event type, schema const, or
|
|
677
|
+
// controller delegate that happens to share words with the query.
|
|
678
|
+
//
|
|
679
|
+
// Boost values (additive):
|
|
680
|
+
// +30 — method on a *Service class (e.g. AuthService.login)
|
|
681
|
+
// +20 — Rust trait (interface kind) when a query word matches the trait name
|
|
682
|
+
// +15 — method on a *Repository / *Manager / *Store class (data-access layer)
|
|
683
|
+
// +15 — method on a *_model class (PHP CodeIgniter model convention)
|
|
684
|
+
// +15 — method on a *Handler / *DB / *Client class (Go HTTP handlers, DB layers, API clients)
|
|
685
|
+
// +15 — method on a *Activity / *Fragment / *Adapter / *ViewModel class (Android framework)
|
|
686
|
+
// +15 — method on a *Processor / *Indexer / *Parser class (Python application-layer patterns)
|
|
687
|
+
// +15 — method on a *EventSubscriber / *Listener / *FormType class (Symfony patterns)
|
|
688
|
+
//
|
|
689
|
+
// The method boost is applied to 'method' kind symbols. Class context is
|
|
690
|
+
// extracted from the symbol name (qualified: "ClassName.method" or
|
|
691
|
+
// "TypeName::method") or, for bare-name handlers (Java, Rust), from the
|
|
692
|
+
// signature prefix ("ClassName.method: <sig>" / "TypeName::method: <sig>").
|
|
693
|
+
// The interface boost applies to Rust traits (kind: 'interface') when query
|
|
694
|
+
// words overlap with the trait name's word-boundary parts.
|
|
695
|
+
let kindBoost = 0;
|
|
696
|
+
// Rust trait kindBoost: boost when ALL word-boundary parts of the trait name
|
|
697
|
+
// are matched by query words. This lifts Serialize/Deserialize above private
|
|
698
|
+
// helpers for queries like "serializable type" or "implement serialize",
|
|
699
|
+
// without boosting TypeScript option-bag interfaces (e.g. NuxtLinkOptions
|
|
700
|
+
// which has parts [nuxt, link, options] — "link" is rarely in the query).
|
|
701
|
+
if (symbol.kind === 'interface' && queryWords.length > 0) {
|
|
702
|
+
const traitParts = splitNameParts(symbol.name);
|
|
703
|
+
const hasQueryMatch = traitParts.length > 0 && traitParts.every((p) => queryWords.includes(p));
|
|
704
|
+
if (hasQueryMatch)
|
|
705
|
+
kindBoost = 20;
|
|
706
|
+
}
|
|
707
|
+
if (symbol.kind === 'method') {
|
|
708
|
+
const dotIdx = symbol.name.indexOf('.');
|
|
709
|
+
const colonIdx = symbol.name.indexOf('::');
|
|
710
|
+
const sepIdx = dotIdx >= 0 ? dotIdx : colonIdx;
|
|
711
|
+
// For bare names (Java/Rust handlers store bare methodName), fall back to
|
|
712
|
+
// extracting the class/type name from the signature prefix.
|
|
713
|
+
const classPart = sepIdx > 0
|
|
714
|
+
? symbol.name.slice(0, sepIdx).toLowerCase()
|
|
715
|
+
: classFromSignature(symbol.name, symbol.signature);
|
|
716
|
+
if (classPart) {
|
|
717
|
+
if (classPart.endsWith('service')) {
|
|
718
|
+
kindBoost = 30;
|
|
719
|
+
}
|
|
720
|
+
else if (classPart.endsWith('repository') ||
|
|
721
|
+
classPart.endsWith('manager') ||
|
|
722
|
+
classPart.endsWith('store') ||
|
|
723
|
+
classPart.endsWith('_model') ||
|
|
724
|
+
(classPart.endsWith('model') && classPart.length > 5) ||
|
|
725
|
+
// PHP / NestJS controller action methods
|
|
726
|
+
classPart.endsWith('_controller') ||
|
|
727
|
+
(classPart.endsWith('controller') && classPart.length > 10) ||
|
|
728
|
+
// Go HTTP handlers, database layers, and API clients
|
|
729
|
+
classPart.endsWith('handler') ||
|
|
730
|
+
classPart.endsWith('db') ||
|
|
731
|
+
classPart.endsWith('client') ||
|
|
732
|
+
// Android framework classes (lifecycle methods, adapters)
|
|
733
|
+
classPart.endsWith('activity') ||
|
|
734
|
+
classPart.endsWith('fragment') ||
|
|
735
|
+
classPart.endsWith('adapter') ||
|
|
736
|
+
classPart.endsWith('viewmodel') ||
|
|
737
|
+
// Python application-layer class patterns
|
|
738
|
+
classPart.endsWith('processor') ||
|
|
739
|
+
classPart.endsWith('indexer') ||
|
|
740
|
+
classPart.endsWith('parser') ||
|
|
741
|
+
// Symfony event-driven and form patterns
|
|
742
|
+
classPart.endsWith('eventsubscriber') ||
|
|
743
|
+
classPart.endsWith('listener') ||
|
|
744
|
+
classPart.endsWith('formtype') ||
|
|
745
|
+
(classPart.endsWith('type') && classPart.length > 4)) {
|
|
746
|
+
kindBoost = 15;
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
total += kindBoost;
|
|
751
|
+
// ── Kind-hint boost ──────────────────────────────────────────────────────────
|
|
752
|
+
//
|
|
753
|
+
// When the query explicitly names a symbol kind ("class that ...", "callback
|
|
754
|
+
// interface", "enum of ..."), strongly prefer symbols of that exact kind.
|
|
755
|
+
// This prevents method/function symbols from outranking the class or interface
|
|
756
|
+
// the query is asking about.
|
|
757
|
+
//
|
|
758
|
+
// Boost value: +35 — enough to overcome the method kindBoost (+15–+30) and a
|
|
759
|
+
// multi-word overlap advantage that a method on a similarly-named class may have.
|
|
760
|
+
//
|
|
761
|
+
// Match rules (all case-insensitive, word-boundary in query words):
|
|
762
|
+
// "class" → prefer kind 'class' or 'struct'
|
|
763
|
+
// "interface" → prefer kind 'class', 'struct', or 'interface'
|
|
764
|
+
// "struct" → prefer kind 'struct'
|
|
765
|
+
// "enum" → prefer kind 'enum'
|
|
766
|
+
// "function" → prefer kind 'function'
|
|
767
|
+
let kindHintBoost = 0;
|
|
768
|
+
if (queryWords.includes('class') || queryWords.includes('cls')) {
|
|
769
|
+
if (symbol.kind === 'class' || symbol.kind === 'struct')
|
|
770
|
+
kindHintBoost = 35;
|
|
771
|
+
}
|
|
772
|
+
else if (queryWords.includes('interface')) {
|
|
773
|
+
if (symbol.kind === 'class' || symbol.kind === 'struct' || symbol.kind === 'interface')
|
|
774
|
+
kindHintBoost = 35;
|
|
775
|
+
}
|
|
776
|
+
else if (queryWords.includes('struct')) {
|
|
777
|
+
if (symbol.kind === 'struct')
|
|
778
|
+
kindHintBoost = 35;
|
|
779
|
+
}
|
|
780
|
+
else if (queryWords.includes('enum')) {
|
|
781
|
+
if (symbol.kind === 'enum')
|
|
782
|
+
kindHintBoost = 35;
|
|
783
|
+
}
|
|
784
|
+
total += kindHintBoost;
|
|
785
|
+
// ── Library path penalty ─────────────────────────────────────────────────────
|
|
786
|
+
//
|
|
787
|
+
// Symbols from well-known library/framework directories (CodeIgniter system/,
|
|
788
|
+
// Composer vendor/, npm node_modules/, etc.) are almost never the intended
|
|
789
|
+
// answer for a natural-language query about application behaviour. Applying a
|
|
790
|
+
// fixed penalty pushes them below application symbols that scored similarly on
|
|
791
|
+
// name and word-overlap rules, without excluding them entirely (they still
|
|
792
|
+
// appear when no application code matches).
|
|
793
|
+
//
|
|
794
|
+
// Penalty: -35 points — enough to overcome a 1-word lexical advantage that a
|
|
795
|
+
// library class may have over an application wrapper (e.g. Twig_Template::render
|
|
796
|
+
// matches "template" while the application Twig::render does not, but for
|
|
797
|
+
// realistic multi-word queries the application symbol still wins).
|
|
798
|
+
// Set just below the per-word bonus tier (10 × 3 = 30) so that a library
|
|
799
|
+
// symbol with 4+ extra matching words can still surface for explicit library
|
|
800
|
+
// lookups (e.g. query "CI_DB_driver execute").
|
|
801
|
+
let libraryPenalty = 0;
|
|
802
|
+
if (isLibraryPath(symbol.filePath)) {
|
|
803
|
+
libraryPenalty = -35;
|
|
804
|
+
total += libraryPenalty;
|
|
805
|
+
}
|
|
806
|
+
// Perl test-fixture penalty: symbols in t/lib/ (e.g. TestApp::Controller)
|
|
807
|
+
// are test stubs that should not outrank the actual library API symbols.
|
|
808
|
+
if (libraryPenalty === 0) {
|
|
809
|
+
const ext = symbol.filePath.split('.').pop()?.toLowerCase() ?? '';
|
|
810
|
+
if (ext === 'pm' || ext === 'pl') {
|
|
811
|
+
const normalizedFp = symbol.filePath.replace(/\\/g, '/').toLowerCase();
|
|
812
|
+
if (normalizedFp.startsWith('t/lib/') || normalizedFp.includes('/t/lib/')) {
|
|
813
|
+
libraryPenalty = -25;
|
|
814
|
+
total += libraryPenalty;
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
// ── Core path boost (Task 414) ────────────────────────────────────────────
|
|
819
|
+
// For Java/Groovy repos: boost canonical core source paths (+15) and penalise
|
|
820
|
+
// plugin implementations (-35) so core methods surface above plugin overrides.
|
|
821
|
+
let corePathBoost = 0;
|
|
822
|
+
if (domain === 'java' && libraryPenalty === 0) {
|
|
823
|
+
// Check plugin penalty BEFORE core-path boost: plugin dirs may also contain
|
|
824
|
+
// /main/java/ sub-paths, so ordering matters.
|
|
825
|
+
if (isJavaPluginPath(symbol.filePath)) {
|
|
826
|
+
corePathBoost = -35;
|
|
827
|
+
total += corePathBoost;
|
|
828
|
+
}
|
|
829
|
+
else if (isCoreJavaPath(symbol.filePath)) {
|
|
830
|
+
corePathBoost = 15;
|
|
831
|
+
total += corePathBoost;
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
// ── Frontend path boost (Task 415) ───────────────────────────────────────
|
|
835
|
+
// In mixed monorepos (frontend + backend apps/ subdirs), boost symbols from
|
|
836
|
+
// frontend paths when the query uses hook/component/use* vocabulary.
|
|
837
|
+
let frontendPathBoost = 0;
|
|
838
|
+
if (opts?.isMixedMonorepo && isFrontendAppPath(symbol.filePath) && hasFrontendVocab(queryWords)) {
|
|
839
|
+
frontendPathBoost = 20;
|
|
840
|
+
total += frontendPathBoost;
|
|
841
|
+
}
|
|
842
|
+
// ── Use/hook bonus (Task 416) ────────────────────────────────────────────
|
|
843
|
+
// When the query is asking for a React hook (use*/hook vocabulary) and the
|
|
844
|
+
// OR-fallback fired (indicated by opts.hasReactHookQuery), reward symbols
|
|
845
|
+
// whose names follow the React hook naming convention (use[A-Z]...).
|
|
846
|
+
let useHookBonus = 0;
|
|
847
|
+
if (opts?.hasReactHookQuery && /^use[A-Z]/.test(symbol.name)) {
|
|
848
|
+
useHookBonus = 20;
|
|
849
|
+
total += useHookBonus;
|
|
850
|
+
}
|
|
851
|
+
// ── Groovy source boost (Task 423) ───────────────────────────────────────
|
|
852
|
+
// In mixed Java+Groovy repos (e.g. gradle, groovy) Java methods dominate by
|
|
853
|
+
// sheer count. Give a +10 bonus to symbols in .groovy files so that Groovy
|
|
854
|
+
// `def` methods surface above equally-scored Java counterparts.
|
|
855
|
+
let groovySourceBoost = 0;
|
|
856
|
+
if (opts?.isJavaGroovyMixed && symbol.filePath.endsWith('.groovy')) {
|
|
857
|
+
groovySourceBoost = 10;
|
|
858
|
+
total += groovySourceBoost;
|
|
859
|
+
}
|
|
860
|
+
// ── Phase 73 boosts ───────────────────────────────────────────────────────
|
|
861
|
+
const p73AngularLifecycle = computeAngularLifecycleBoost(symbol, queryWords, opts?.isAngularRepo ?? false);
|
|
862
|
+
total += p73AngularLifecycle;
|
|
863
|
+
const p73RenderingCompound = computeRenderingCompoundBoost(symbol, queryWords, domain);
|
|
864
|
+
total += p73RenderingCompound;
|
|
865
|
+
const p73ReactQueryHook = computeReactQueryHookBoost(symbol, queryWords);
|
|
866
|
+
total += p73ReactQueryHook;
|
|
867
|
+
const p73Interceptor = computeInterceptorBoost(symbol, queryWords);
|
|
868
|
+
total += p73Interceptor;
|
|
869
|
+
const p73PackageContext = computePackageContextBoost(symbol, queryWords);
|
|
870
|
+
total += p73PackageContext;
|
|
871
|
+
const p73TrpcPrefix = computeTrpcPrefixBoost(symbol, queryWords);
|
|
872
|
+
total += p73TrpcPrefix;
|
|
105
873
|
const debugScore = {
|
|
106
874
|
total,
|
|
107
875
|
nameExact,
|
|
108
876
|
namePrefix,
|
|
109
877
|
nameFuzzy,
|
|
878
|
+
identityExact,
|
|
879
|
+
compoundUnderscoreBoost,
|
|
880
|
+
singleTokenExactBoost,
|
|
110
881
|
wordOverlap,
|
|
882
|
+
methodVerbBonus,
|
|
111
883
|
signatureMatch,
|
|
112
884
|
summaryMatch,
|
|
113
|
-
kindBoost
|
|
885
|
+
kindBoost,
|
|
886
|
+
kindHintBoost,
|
|
887
|
+
libraryPenalty,
|
|
888
|
+
corePathBoost,
|
|
889
|
+
frontendPathBoost,
|
|
890
|
+
useHookBonus,
|
|
891
|
+
groovySourceBoost,
|
|
892
|
+
angularLifecycleBoost: p73AngularLifecycle,
|
|
893
|
+
renderingCompoundBoost: p73RenderingCompound,
|
|
894
|
+
reactQueryHookBoost: p73ReactQueryHook,
|
|
895
|
+
interceptorBoost: p73Interceptor,
|
|
896
|
+
packageContextBoost: p73PackageContext,
|
|
897
|
+
trpcPrefixBoost: p73TrpcPrefix,
|
|
898
|
+
pathProximityBoost: 0, // filled in by rankSymbols after the name-frequency pass
|
|
114
899
|
recencyBoost: 0,
|
|
900
|
+
ftsBm25Bonus: 0,
|
|
115
901
|
};
|
|
116
902
|
return { score: total, matchReason, debugScore };
|
|
117
903
|
}
|
|
118
904
|
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
905
|
+
/**
|
|
906
|
+
* Split a symbol name into word-boundary parts for precise matching.
|
|
907
|
+
*
|
|
908
|
+
* Handles namespace separators (\\, ::), snake_case underscores, and
|
|
909
|
+
* camelCase/PascalCase boundaries. Parts shorter than 2 characters are
|
|
910
|
+
* excluded.
|
|
911
|
+
*
|
|
912
|
+
* Examples:
|
|
913
|
+
* 'CIR_Model' → ['cir', 'model']
|
|
914
|
+
* 'Homepage_model::getSettings' → ['homepage', 'model', 'get', 'settings']
|
|
915
|
+
* 'models\\Article_base' → ['models', 'article', 'base']
|
|
916
|
+
* 'CIR_FrontController' → ['cir', 'front', 'controller']
|
|
917
|
+
* 'CI_DB_query_builder::_insert' → ['ci', 'db', 'query', 'builder', 'insert']
|
|
918
|
+
*/
|
|
919
|
+
function splitNameParts(name) {
|
|
920
|
+
const parts = [];
|
|
921
|
+
// Split on namespace/method-call separators: \ (PHP/Python paths), : (PHP ::), . (TS dot notation), @ (XML disambiguation)
|
|
922
|
+
for (const segment of name.split(/[\\:.@]+/)) {
|
|
923
|
+
// Split each segment on underscores
|
|
924
|
+
for (const subSeg of segment.split('_').filter(Boolean)) {
|
|
925
|
+
// camelCase / PascalCase split within each snake_case segment
|
|
926
|
+
const camelParts = subSeg
|
|
927
|
+
.replace(/([a-z\d])([A-Z])/g, '$1 $2')
|
|
928
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
929
|
+
.split(' ')
|
|
930
|
+
.map((p) => p.toLowerCase())
|
|
931
|
+
.filter((p) => p.length >= 2);
|
|
932
|
+
parts.push(...camelParts);
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
return parts;
|
|
936
|
+
}
|
|
937
|
+
/**
|
|
938
|
+
* Add inflectional suffix stems of `word` to `set`.
|
|
939
|
+
*
|
|
940
|
+
* Adding stems (rather than replacing the original word) means both the
|
|
941
|
+
* inflected form and the stem are available for matching — the inflected form
|
|
942
|
+
* matches index tokens that include the suffix; the stem matches code symbol
|
|
943
|
+
* parts that use the base form.
|
|
944
|
+
*
|
|
945
|
+
* Suffixes handled:
|
|
946
|
+
* -s (plural / 3rd-person) "models" → "model"
|
|
947
|
+
* -ing (gerund) "building" → "build"
|
|
948
|
+
* -ed (past tense) "updated" → "update" + "updat"
|
|
949
|
+
* "matched" → "matche" + "match"
|
|
950
|
+
* -tion (nominalisation) "pagination" → "paginat"
|
|
951
|
+
*
|
|
952
|
+
* For -ed we add both the e-drop form (strip only -d) and the regular form
|
|
953
|
+
* (strip -ed) so that both "update" (code symbol) and "match" are covered
|
|
954
|
+
* regardless of the inflection pattern.
|
|
955
|
+
*/
|
|
956
|
+
function addStemsOf(word, set) {
|
|
957
|
+
// -tion: "pagination" → "paginat" (minimum total length 7 to avoid "on" → "")
|
|
958
|
+
if (word.length > 6 && word.endsWith('tion')) {
|
|
959
|
+
const s = word.slice(0, -4);
|
|
960
|
+
if (s.length >= 2)
|
|
961
|
+
set.add(s);
|
|
962
|
+
return;
|
|
963
|
+
}
|
|
964
|
+
// -ing: "building" → "build" (minimum total length 6 to avoid "ring" → "r")
|
|
965
|
+
if (word.length > 5 && word.endsWith('ing')) {
|
|
966
|
+
const s = word.slice(0, -3);
|
|
967
|
+
if (s.length >= 2)
|
|
968
|
+
set.add(s);
|
|
969
|
+
return;
|
|
970
|
+
}
|
|
971
|
+
// -ed: add both e-drop ("updated"→"update") and regular strip ("matched"→"match")
|
|
972
|
+
// The e-drop form may produce noise ("matche") but it won't match real symbol parts.
|
|
973
|
+
if (word.length > 4 && word.endsWith('ed')) {
|
|
974
|
+
const dStem = word.slice(0, -1); // strip -d : "updated"→"update", "matched"→"matche"
|
|
975
|
+
const edStem = word.slice(0, -2); // strip -ed : "updated"→"updat", "matched"→"match"
|
|
976
|
+
if (dStem.length >= 2)
|
|
977
|
+
set.add(dStem);
|
|
978
|
+
if (edStem.length >= 2 && edStem !== dStem)
|
|
979
|
+
set.add(edStem);
|
|
980
|
+
return;
|
|
981
|
+
}
|
|
982
|
+
// -s: "models"→"model", "records"→"record" (skip -ss endings like "class")
|
|
983
|
+
if (word.length > 3 && word.endsWith('s') && !word.endsWith('ss')) {
|
|
984
|
+
const s = word.slice(0, -1);
|
|
985
|
+
if (s.length >= 2)
|
|
986
|
+
set.add(s);
|
|
987
|
+
}
|
|
988
|
+
}
|
|
119
989
|
/**
|
|
120
990
|
* Extract a deduplicated set of lowercase search terms from the raw query.
|
|
121
991
|
*
|
|
122
|
-
*
|
|
123
|
-
*
|
|
992
|
+
* Processing pipeline:
|
|
993
|
+
* 1. Split on whitespace AND hyphens ("front-end" → "front", "end").
|
|
994
|
+
* 2. Discard English stop words.
|
|
995
|
+
* 3. Add inflectional stem variants via addStemsOf.
|
|
996
|
+
* 4. For snake_case tokens split on underscores and repeat steps 2-3.
|
|
997
|
+
* 5. For camelCase tokens split on case boundaries and repeat steps 2-3.
|
|
124
998
|
*
|
|
125
999
|
* Examples:
|
|
126
|
-
* 'indexFolder'
|
|
127
|
-
* 'blast radius'
|
|
128
|
-
* 'get_symbol_source'→ ['get_symbol_source', 'get', 'symbol', 'source']
|
|
1000
|
+
* 'indexFolder' → ['indexfolder', 'index', 'folder']
|
|
1001
|
+
* 'blast radius' → ['blast', 'radius']
|
|
1002
|
+
* 'get_symbol_source' → ['get_symbol_source', 'get', 'symbol', 'source']
|
|
1003
|
+
* 'models' → ['models', 'model']
|
|
1004
|
+
* 'updated homepage' → ['updated', 'update', 'updat', 'homepage']
|
|
1005
|
+
* 'front-end controller' → ['front', 'end', 'controller']
|
|
129
1006
|
*/
|
|
130
|
-
function extractQueryWords(raw) {
|
|
1007
|
+
function extractQueryWords(raw, domain) {
|
|
131
1008
|
const words = new Set();
|
|
132
|
-
|
|
1009
|
+
// Split on both whitespace and hyphens so "front-end" → ["front", "end"]
|
|
1010
|
+
for (const part of raw.split(/[\s-]+/).filter(Boolean)) {
|
|
133
1011
|
const lower = part.toLowerCase();
|
|
1012
|
+
// Skip English function words — they never appear in code symbol names and
|
|
1013
|
+
// inflate the "all words in name" (30-pt) and per-word (10-pt) scoring
|
|
1014
|
+
// rules, making the 30-pt bonus unachievable for any real symbol.
|
|
1015
|
+
if (isStopWord(lower))
|
|
1016
|
+
continue;
|
|
1017
|
+
if (lower.length < 2)
|
|
1018
|
+
continue;
|
|
134
1019
|
words.add(lower);
|
|
1020
|
+
addStemsOf(lower, words);
|
|
1021
|
+
// Synonym expansion: add code-domain synonyms so the ranker rewards symbols
|
|
1022
|
+
// whose names use a synonym of the query word (e.g. "authenticate" → "login").
|
|
1023
|
+
for (const syn of expandVerbSynonyms(lower, domain)) {
|
|
1024
|
+
words.add(syn);
|
|
1025
|
+
addStemsOf(syn, words);
|
|
1026
|
+
}
|
|
135
1027
|
if (part.includes('_')) {
|
|
136
1028
|
// snake_case — split on underscores
|
|
137
1029
|
for (const seg of part.split('_')) {
|
|
138
1030
|
const s = seg.toLowerCase();
|
|
139
|
-
if (s.length >= 2)
|
|
1031
|
+
if (s.length >= 2 && !isStopWord(s)) {
|
|
140
1032
|
words.add(s);
|
|
1033
|
+
addStemsOf(s, words);
|
|
1034
|
+
}
|
|
141
1035
|
}
|
|
142
1036
|
}
|
|
143
1037
|
else {
|
|
@@ -149,8 +1043,12 @@ function extractQueryWords(raw) {
|
|
|
149
1043
|
.map((s) => s.toLowerCase())
|
|
150
1044
|
.filter((s) => s.length >= 2);
|
|
151
1045
|
if (parts.length > 1) {
|
|
152
|
-
for (const s of parts)
|
|
153
|
-
|
|
1046
|
+
for (const s of parts) {
|
|
1047
|
+
if (!isStopWord(s)) {
|
|
1048
|
+
words.add(s);
|
|
1049
|
+
addStemsOf(s, words);
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
154
1052
|
}
|
|
155
1053
|
}
|
|
156
1054
|
}
|