codebase-context 1.4.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +193 -45
- package/dist/analyzers/generic/index.d.ts +0 -1
- package/dist/analyzers/generic/index.d.ts.map +1 -1
- package/dist/analyzers/generic/index.js +0 -13
- package/dist/analyzers/generic/index.js.map +1 -1
- package/dist/constants/codebase-context.d.ts +2 -0
- package/dist/constants/codebase-context.d.ts.map +1 -1
- package/dist/constants/codebase-context.js +2 -0
- package/dist/constants/codebase-context.js.map +1 -1
- package/dist/constants/git-patterns.d.ts +12 -0
- package/dist/constants/git-patterns.d.ts.map +1 -0
- package/dist/constants/git-patterns.js +11 -0
- package/dist/constants/git-patterns.js.map +1 -0
- package/dist/core/analyzer-registry.d.ts.map +1 -1
- package/dist/core/analyzer-registry.js +3 -1
- package/dist/core/analyzer-registry.js.map +1 -1
- package/dist/core/indexer.d.ts +2 -0
- package/dist/core/indexer.d.ts.map +1 -1
- package/dist/core/indexer.js +179 -34
- package/dist/core/indexer.js.map +1 -1
- package/dist/core/manifest.d.ts +39 -0
- package/dist/core/manifest.d.ts.map +1 -0
- package/dist/core/manifest.js +86 -0
- package/dist/core/manifest.js.map +1 -0
- package/dist/core/reranker.d.ts +23 -0
- package/dist/core/reranker.d.ts.map +1 -0
- package/dist/core/reranker.js +120 -0
- package/dist/core/reranker.js.map +1 -0
- package/dist/core/search-quality.d.ts +10 -0
- package/dist/core/search-quality.d.ts.map +1 -0
- package/dist/core/search-quality.js +64 -0
- package/dist/core/search-quality.js.map +1 -0
- package/dist/core/search.d.ts +26 -2
- package/dist/core/search.d.ts.map +1 -1
- package/dist/core/search.js +508 -69
- package/dist/core/search.js.map +1 -1
- package/dist/embeddings/transformers.d.ts.map +1 -1
- package/dist/embeddings/transformers.js +17 -7
- package/dist/embeddings/transformers.js.map +1 -1
- package/dist/embeddings/types.d.ts.map +1 -1
- package/dist/embeddings/types.js +3 -0
- package/dist/embeddings/types.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +460 -55
- package/dist/index.js.map +1 -1
- package/dist/memory/git-memory.d.ts +9 -0
- package/dist/memory/git-memory.d.ts.map +1 -0
- package/dist/memory/git-memory.js +51 -0
- package/dist/memory/git-memory.js.map +1 -0
- package/dist/memory/store.d.ts +16 -0
- package/dist/memory/store.d.ts.map +1 -1
- package/dist/memory/store.js +40 -1
- package/dist/memory/store.js.map +1 -1
- package/dist/patterns/semantics.d.ts +4 -0
- package/dist/patterns/semantics.d.ts.map +1 -0
- package/dist/patterns/semantics.js +24 -0
- package/dist/patterns/semantics.js.map +1 -0
- package/dist/preflight/evidence-lock.d.ts +50 -0
- package/dist/preflight/evidence-lock.d.ts.map +1 -0
- package/dist/preflight/evidence-lock.js +130 -0
- package/dist/preflight/evidence-lock.js.map +1 -0
- package/dist/preflight/query-scope.d.ts +3 -0
- package/dist/preflight/query-scope.d.ts.map +1 -0
- package/dist/preflight/query-scope.js +40 -0
- package/dist/preflight/query-scope.js.map +1 -0
- package/dist/resources/uri.d.ts +5 -0
- package/dist/resources/uri.d.ts.map +1 -0
- package/dist/resources/uri.js +15 -0
- package/dist/resources/uri.js.map +1 -0
- package/dist/storage/lancedb.d.ts +1 -0
- package/dist/storage/lancedb.d.ts.map +1 -1
- package/dist/storage/lancedb.js +24 -3
- package/dist/storage/lancedb.js.map +1 -1
- package/dist/storage/types.d.ts +5 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/storage/types.js.map +1 -1
- package/dist/types/index.d.ts +20 -3
- package/dist/types/index.d.ts.map +1 -1
- package/dist/utils/chunking.js +2 -2
- package/dist/utils/chunking.js.map +1 -1
- package/dist/utils/git-dates.d.ts +1 -0
- package/dist/utils/git-dates.d.ts.map +1 -1
- package/dist/utils/git-dates.js +20 -0
- package/dist/utils/git-dates.js.map +1 -1
- package/dist/utils/usage-tracker.d.ts.map +1 -1
- package/dist/utils/usage-tracker.js +3 -8
- package/dist/utils/usage-tracker.js.map +1 -1
- package/package.json +17 -9
package/dist/core/search.js
CHANGED
|
@@ -9,13 +9,67 @@ import { getEmbeddingProvider } from '../embeddings/index.js';
|
|
|
9
9
|
import { getStorageProvider } from '../storage/index.js';
|
|
10
10
|
import { analyzerRegistry } from './analyzer-registry.js';
|
|
11
11
|
import { IndexCorruptedError } from '../errors/index.js';
|
|
12
|
+
import { isTestingRelatedQuery } from '../preflight/query-scope.js';
|
|
13
|
+
import { assessSearchQuality } from './search-quality.js';
|
|
14
|
+
import { rerank } from './reranker.js';
|
|
12
15
|
import { CODEBASE_CONTEXT_DIRNAME, INTELLIGENCE_FILENAME, KEYWORD_INDEX_FILENAME, VECTOR_DB_DIRNAME } from '../constants/codebase-context.js';
|
|
13
16
|
const DEFAULT_SEARCH_OPTIONS = {
|
|
14
17
|
useSemanticSearch: true,
|
|
15
18
|
useKeywordSearch: true,
|
|
16
|
-
semanticWeight
|
|
17
|
-
|
|
19
|
+
// semanticWeight/keywordWeight intentionally omitted —
|
|
20
|
+
// intent classification provides per-query weights.
|
|
21
|
+
// Callers can still override by passing explicit values.
|
|
22
|
+
profile: 'explore',
|
|
23
|
+
enableQueryExpansion: true,
|
|
24
|
+
enableLowConfidenceRescue: true,
|
|
25
|
+
candidateFloor: 30,
|
|
26
|
+
enableReranker: true
|
|
18
27
|
};
|
|
28
|
+
const QUERY_EXPANSION_HINTS = [
|
|
29
|
+
{
|
|
30
|
+
pattern: /\b(auth|authentication|login|signin|sign-in|session|token|oauth)\b/i,
|
|
31
|
+
terms: ['auth', 'login', 'token', 'session', 'guard', 'oauth']
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
pattern: /\b(route|routes|routing|router|navigate|navigation|redirect|path)\b/i,
|
|
35
|
+
terms: ['router', 'route', 'navigation', 'redirect', 'path']
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
pattern: /\b(config|configuration|configure|setup|register|provider|providers|bootstrap)\b/i,
|
|
39
|
+
terms: ['config', 'setup', 'register', 'provider', 'bootstrap']
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
pattern: /\b(role|roles|permission|permissions|authorization|authorisation|access)\b/i,
|
|
43
|
+
terms: ['roles', 'permissions', 'access', 'policy', 'guard']
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
pattern: /\b(interceptor|middleware|request|response|http)\b/i,
|
|
47
|
+
terms: ['interceptor', 'middleware', 'http', 'request', 'response']
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
pattern: /\b(theme|styles?|styling|palette|color|branding|upload)\b/i,
|
|
51
|
+
terms: ['theme', 'styles', 'palette', 'color', 'branding', 'upload']
|
|
52
|
+
}
|
|
53
|
+
];
|
|
54
|
+
const QUERY_STOP_WORDS = new Set([
|
|
55
|
+
'the',
|
|
56
|
+
'a',
|
|
57
|
+
'an',
|
|
58
|
+
'to',
|
|
59
|
+
'of',
|
|
60
|
+
'for',
|
|
61
|
+
'and',
|
|
62
|
+
'or',
|
|
63
|
+
'with',
|
|
64
|
+
'in',
|
|
65
|
+
'on',
|
|
66
|
+
'by',
|
|
67
|
+
'how',
|
|
68
|
+
'are',
|
|
69
|
+
'is',
|
|
70
|
+
'after',
|
|
71
|
+
'before'
|
|
72
|
+
]);
|
|
19
73
|
export class CodebaseSearcher {
|
|
20
74
|
rootPath;
|
|
21
75
|
storagePath;
|
|
@@ -24,8 +78,9 @@ export class CodebaseSearcher {
|
|
|
24
78
|
embeddingProvider = null;
|
|
25
79
|
storageProvider = null;
|
|
26
80
|
initialized = false;
|
|
27
|
-
//
|
|
81
|
+
// Pattern intelligence for trend detection
|
|
28
82
|
patternIntelligence = null;
|
|
83
|
+
importCentrality = null;
|
|
29
84
|
constructor(rootPath) {
|
|
30
85
|
this.rootPath = rootPath;
|
|
31
86
|
this.storagePath = path.join(rootPath, CODEBASE_CONTEXT_DIRNAME, VECTOR_DB_DIRNAME);
|
|
@@ -78,7 +133,7 @@ export class CodebaseSearcher {
|
|
|
78
133
|
}
|
|
79
134
|
}
|
|
80
135
|
/**
|
|
81
|
-
*
|
|
136
|
+
* Load pattern intelligence for trend detection and warnings
|
|
82
137
|
*/
|
|
83
138
|
async loadPatternIntelligence() {
|
|
84
139
|
try {
|
|
@@ -112,14 +167,32 @@ export class CodebaseSearcher {
|
|
|
112
167
|
}
|
|
113
168
|
this.patternIntelligence = { decliningPatterns, risingPatterns, patternWarnings };
|
|
114
169
|
console.error(`[search] Loaded pattern intelligence: ${decliningPatterns.size} declining, ${risingPatterns.size} rising patterns`);
|
|
170
|
+
this.importCentrality = new Map();
|
|
171
|
+
if (intelligence.internalFileGraph && intelligence.internalFileGraph.imports) {
|
|
172
|
+
// Count how many files import each file (in-degree centrality)
|
|
173
|
+
const importCounts = new Map();
|
|
174
|
+
for (const [_importingFile, importedFiles] of Object.entries(intelligence.internalFileGraph.imports)) {
|
|
175
|
+
const imports = importedFiles;
|
|
176
|
+
for (const imported of imports) {
|
|
177
|
+
importCounts.set(imported, (importCounts.get(imported) || 0) + 1);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
// Normalize centrality to 0-1 range
|
|
181
|
+
const maxImports = Math.max(...Array.from(importCounts.values()), 1);
|
|
182
|
+
for (const [file, count] of importCounts) {
|
|
183
|
+
this.importCentrality.set(file, count / maxImports);
|
|
184
|
+
}
|
|
185
|
+
console.error(`[search] Computed import centrality for ${importCounts.size} files`);
|
|
186
|
+
}
|
|
115
187
|
}
|
|
116
188
|
catch (error) {
|
|
117
189
|
console.warn('Pattern intelligence load failed (will proceed without trend detection):', error);
|
|
118
190
|
this.patternIntelligence = null;
|
|
191
|
+
this.importCentrality = null;
|
|
119
192
|
}
|
|
120
193
|
}
|
|
121
194
|
/**
|
|
122
|
-
*
|
|
195
|
+
* Detect pattern trend from chunk content
|
|
123
196
|
*/
|
|
124
197
|
detectChunkTrend(chunk) {
|
|
125
198
|
if (!this.patternIntelligence) {
|
|
@@ -144,82 +217,281 @@ export class CodebaseSearcher {
|
|
|
144
217
|
}
|
|
145
218
|
return { trend: 'Stable' };
|
|
146
219
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
220
|
+
isTestFile(filePath) {
|
|
221
|
+
const normalized = filePath.toLowerCase().replace(/\\/g, '/');
|
|
222
|
+
return (normalized.includes('.spec.') ||
|
|
223
|
+
normalized.includes('.test.') ||
|
|
224
|
+
normalized.includes('/e2e/') ||
|
|
225
|
+
normalized.includes('/__tests__/'));
|
|
226
|
+
}
|
|
227
|
+
normalizeQueryTerms(query) {
|
|
228
|
+
return query
|
|
229
|
+
.toLowerCase()
|
|
230
|
+
.split(/[^a-z0-9_]+/)
|
|
231
|
+
.filter((term) => term.length > 2 && !QUERY_STOP_WORDS.has(term));
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Classify query intent based on heuristic patterns
|
|
235
|
+
*/
|
|
236
|
+
classifyQueryIntent(query) {
|
|
237
|
+
const lowerQuery = query.toLowerCase();
|
|
238
|
+
// EXACT_NAME: Contains PascalCase or camelCase tokens (literal class/component names)
|
|
239
|
+
if (/[A-Z][a-z]+[A-Z]/.test(query) || /[a-z][A-Z]/.test(query)) {
|
|
240
|
+
return {
|
|
241
|
+
intent: 'EXACT_NAME',
|
|
242
|
+
weights: { semantic: 0.4, keyword: 0.6 } // Keyword search dominates for exact names
|
|
243
|
+
};
|
|
150
244
|
}
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
245
|
+
// CONFIG: Configuration/setup queries
|
|
246
|
+
const configKeywords = [
|
|
247
|
+
'config',
|
|
248
|
+
'setup',
|
|
249
|
+
'routing',
|
|
250
|
+
'providers',
|
|
251
|
+
'configuration',
|
|
252
|
+
'bootstrap'
|
|
253
|
+
];
|
|
254
|
+
if (configKeywords.some((kw) => lowerQuery.includes(kw))) {
|
|
255
|
+
return {
|
|
256
|
+
intent: 'CONFIG',
|
|
257
|
+
weights: { semantic: 0.5, keyword: 0.5 } // Balanced
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
// WIRING: DI/registration queries
|
|
261
|
+
const wiringKeywords = [
|
|
262
|
+
'provide',
|
|
263
|
+
'inject',
|
|
264
|
+
'dependency',
|
|
265
|
+
'register',
|
|
266
|
+
'wire',
|
|
267
|
+
'bootstrap',
|
|
268
|
+
'module'
|
|
269
|
+
];
|
|
270
|
+
if (wiringKeywords.some((kw) => lowerQuery.includes(kw))) {
|
|
271
|
+
return {
|
|
272
|
+
intent: 'WIRING',
|
|
273
|
+
weights: { semantic: 0.5, keyword: 0.5 } // Balanced
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
// FLOW: Action/navigation queries
|
|
277
|
+
const flowVerbs = [
|
|
278
|
+
'navigate',
|
|
279
|
+
'redirect',
|
|
280
|
+
'route',
|
|
281
|
+
'handle',
|
|
282
|
+
'process',
|
|
283
|
+
'execute',
|
|
284
|
+
'trigger',
|
|
285
|
+
'dispatch'
|
|
286
|
+
];
|
|
287
|
+
if (flowVerbs.some((verb) => lowerQuery.includes(verb))) {
|
|
288
|
+
return {
|
|
289
|
+
intent: 'FLOW',
|
|
290
|
+
weights: { semantic: 0.6, keyword: 0.4 } // Semantic helps with flow understanding
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
// CONCEPTUAL: Natural language without code tokens (default)
|
|
294
|
+
return {
|
|
295
|
+
intent: 'CONCEPTUAL',
|
|
296
|
+
weights: { semantic: 0.7, keyword: 0.3 } // Semantic dominates for concepts
|
|
154
297
|
};
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
});
|
|
170
|
-
}
|
|
171
|
-
});
|
|
298
|
+
}
|
|
299
|
+
buildQueryVariants(query, maxExpansions) {
|
|
300
|
+
const variants = [{ query, weight: 1 }];
|
|
301
|
+
if (maxExpansions <= 0)
|
|
302
|
+
return variants;
|
|
303
|
+
const normalized = query.toLowerCase();
|
|
304
|
+
const terms = new Set(this.normalizeQueryTerms(query));
|
|
305
|
+
for (const hint of QUERY_EXPANSION_HINTS) {
|
|
306
|
+
if (!hint.pattern.test(query))
|
|
307
|
+
continue;
|
|
308
|
+
for (const term of hint.terms) {
|
|
309
|
+
if (!normalized.includes(term)) {
|
|
310
|
+
terms.add(term);
|
|
311
|
+
}
|
|
172
312
|
}
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
313
|
+
}
|
|
314
|
+
const addedTerms = Array.from(terms).filter((term) => !normalized.includes(term));
|
|
315
|
+
if (addedTerms.length === 0)
|
|
316
|
+
return variants;
|
|
317
|
+
const firstExpansion = `${query} ${addedTerms.slice(0, 6).join(' ')}`.trim();
|
|
318
|
+
if (firstExpansion !== query) {
|
|
319
|
+
variants.push({ query: firstExpansion, weight: 0.35 });
|
|
320
|
+
}
|
|
321
|
+
if (maxExpansions > 1 && addedTerms.length > 6) {
|
|
322
|
+
const secondExpansion = `${query} ${addedTerms.slice(6, 12).join(' ')}`.trim();
|
|
323
|
+
if (secondExpansion !== query) {
|
|
324
|
+
variants.push({ query: secondExpansion, weight: 0.25 });
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
return variants.slice(0, 1 + maxExpansions);
|
|
328
|
+
}
|
|
329
|
+
isTemplateOrStyleFile(filePath) {
|
|
330
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
331
|
+
return ['.html', '.scss', '.css', '.less', '.sass', '.styl'].includes(ext);
|
|
332
|
+
}
|
|
333
|
+
isCompositionRootFile(filePath) {
|
|
334
|
+
const normalized = filePath.toLowerCase().replace(/\\/g, '/');
|
|
335
|
+
const base = path.basename(normalized);
|
|
336
|
+
if (/^(main|index|bootstrap|startup)\./.test(base))
|
|
337
|
+
return true;
|
|
338
|
+
return (normalized.includes('/routes') ||
|
|
339
|
+
normalized.includes('/routing') ||
|
|
340
|
+
normalized.includes('/router') ||
|
|
341
|
+
normalized.includes('/config') ||
|
|
342
|
+
normalized.includes('/providers'));
|
|
343
|
+
}
|
|
344
|
+
queryPathTokenOverlap(filePath, query) {
|
|
345
|
+
const queryTerms = new Set(this.normalizeQueryTerms(query));
|
|
346
|
+
if (queryTerms.size === 0)
|
|
347
|
+
return 0;
|
|
348
|
+
const pathTerms = this.normalizeQueryTerms(filePath.replace(/\\/g, '/'));
|
|
349
|
+
return pathTerms.reduce((count, term) => (queryTerms.has(term) ? count + 1 : count), 0);
|
|
350
|
+
}
|
|
351
|
+
isLikelyWiringOrFlowQuery(query) {
|
|
352
|
+
return /\b(route|router|routing|navigate|navigation|redirect|auth|authentication|login|provider|register|config|configuration|interceptor|middleware)\b/i.test(query);
|
|
353
|
+
}
|
|
354
|
+
isActionOrHowQuery(query) {
|
|
355
|
+
return /\b(how|where|configure|configured|setup|register|wire|wiring|navigate|redirect|login|authenticate|copy|upload|handle|create|update|delete)\b/i.test(query);
|
|
356
|
+
}
|
|
357
|
+
isDefinitionHeavyResult(chunk) {
|
|
358
|
+
const normalizedPath = chunk.filePath.toLowerCase().replace(/\\/g, '/');
|
|
359
|
+
const componentType = (chunk.componentType || '').toLowerCase();
|
|
360
|
+
if (['type', 'interface', 'enum', 'constant'].includes(componentType))
|
|
361
|
+
return true;
|
|
362
|
+
return (normalizedPath.includes('/models/') ||
|
|
363
|
+
normalizedPath.includes('/interfaces/') ||
|
|
364
|
+
normalizedPath.includes('/types/') ||
|
|
365
|
+
normalizedPath.includes('/constants'));
|
|
366
|
+
}
|
|
367
|
+
scoreAndSortResults(query, limit, results, profile, intent, totalVariantWeight) {
|
|
368
|
+
const likelyWiringQuery = this.isLikelyWiringOrFlowQuery(query);
|
|
369
|
+
const actionQuery = this.isActionOrHowQuery(query);
|
|
370
|
+
// RRF: k=60 is the standard parameter (proven robust in Elasticsearch + TOSS paper arXiv:2208.11274)
|
|
371
|
+
const RRF_K = 60;
|
|
372
|
+
// Collect all unique chunks from both retrieval channels
|
|
373
|
+
const allChunks = new Map();
|
|
374
|
+
const rrfScores = new Map();
|
|
375
|
+
// Gather all chunks
|
|
376
|
+
for (const [id, entry] of results.semantic) {
|
|
377
|
+
allChunks.set(id, entry.chunk);
|
|
378
|
+
}
|
|
379
|
+
for (const [id, entry] of results.keyword) {
|
|
380
|
+
if (!allChunks.has(id)) {
|
|
381
|
+
allChunks.set(id, entry.chunk);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
// Calculate RRF scores: RRF(d) = SUM(weight_i / (k + rank_i))
|
|
385
|
+
for (const [id] of allChunks) {
|
|
386
|
+
let rrfScore = 0;
|
|
387
|
+
// Add contributions from semantic ranks
|
|
388
|
+
const semanticEntry = results.semantic.get(id);
|
|
389
|
+
if (semanticEntry) {
|
|
390
|
+
for (const { rank, weight } of semanticEntry.ranks) {
|
|
391
|
+
rrfScore += weight / (RRF_K + rank);
|
|
176
392
|
}
|
|
177
|
-
console.warn('Semantic search failed:', error);
|
|
178
393
|
}
|
|
394
|
+
// Add contributions from keyword ranks
|
|
395
|
+
const keywordEntry = results.keyword.get(id);
|
|
396
|
+
if (keywordEntry) {
|
|
397
|
+
for (const { rank, weight } of keywordEntry.ranks) {
|
|
398
|
+
rrfScore += weight / (RRF_K + rank);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
rrfScores.set(id, rrfScore);
|
|
179
402
|
}
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
chunk: result.chunk,
|
|
192
|
-
scores: [result.score * (keywordWeight || 0.3)]
|
|
193
|
-
});
|
|
194
|
-
}
|
|
195
|
-
});
|
|
403
|
+
// Normalize by theoretical maximum (rank-0 in every list), NOT by actual max.
|
|
404
|
+
// Using actual max makes top result always 1.0, breaking quality confidence gating.
|
|
405
|
+
const theoreticalMaxRrf = totalVariantWeight / (RRF_K + 0);
|
|
406
|
+
const maxRrfScore = Math.max(theoreticalMaxRrf, 0.01);
|
|
407
|
+
// Separate test files from implementation files before scoring
|
|
408
|
+
const isNonTestQuery = !isTestingRelatedQuery(query);
|
|
409
|
+
const implementationChunks = [];
|
|
410
|
+
const testChunks = [];
|
|
411
|
+
for (const [id, chunk] of allChunks.entries()) {
|
|
412
|
+
if (this.isTestFile(chunk.filePath)) {
|
|
413
|
+
testChunks.push([id, chunk]);
|
|
196
414
|
}
|
|
197
|
-
|
|
198
|
-
|
|
415
|
+
else {
|
|
416
|
+
implementationChunks.push([id, chunk]);
|
|
199
417
|
}
|
|
200
418
|
}
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
//
|
|
206
|
-
//
|
|
207
|
-
combinedScore =
|
|
208
|
-
//
|
|
209
|
-
if (chunk.componentType && chunk.
|
|
210
|
-
combinedScore
|
|
419
|
+
// For non-test queries: filter test files from candidate pool, keep max 1 test file only if < 3 implementation matches
|
|
420
|
+
const chunksToScore = isNonTestQuery ? implementationChunks : Array.from(allChunks.entries());
|
|
421
|
+
const scoredResults = chunksToScore
|
|
422
|
+
.map(([id, chunk]) => {
|
|
423
|
+
// RRF score normalized to [0,1] range. Boosts below are unclamped
|
|
424
|
+
// to preserve score differentiation — only relative ordering matters.
|
|
425
|
+
let combinedScore = rrfScores.get(id) / maxRrfScore;
|
|
426
|
+
// Slight boost when analyzer identified a concrete component type
|
|
427
|
+
if (chunk.componentType && chunk.componentType !== 'unknown') {
|
|
428
|
+
combinedScore *= 1.1;
|
|
211
429
|
}
|
|
212
430
|
// Boost if layer is detected
|
|
213
431
|
if (chunk.layer && chunk.layer !== 'unknown') {
|
|
214
|
-
combinedScore
|
|
432
|
+
combinedScore *= 1.1;
|
|
433
|
+
}
|
|
434
|
+
if (actionQuery && this.isDefinitionHeavyResult(chunk)) {
|
|
435
|
+
combinedScore *= 0.82;
|
|
436
|
+
}
|
|
437
|
+
if (actionQuery &&
|
|
438
|
+
['service', 'component', 'interceptor', 'guard', 'module', 'resolver'].includes((chunk.componentType || '').toLowerCase())) {
|
|
439
|
+
combinedScore *= 1.06;
|
|
215
440
|
}
|
|
216
|
-
//
|
|
441
|
+
// Demote template/style files for behavioral queries — they describe
|
|
442
|
+
// structure/presentation, not implementation logic.
|
|
443
|
+
if ((intent === 'FLOW' || intent === 'WIRING' || actionQuery) &&
|
|
444
|
+
this.isTemplateOrStyleFile(chunk.filePath)) {
|
|
445
|
+
combinedScore *= 0.75;
|
|
446
|
+
}
|
|
447
|
+
// Light intent-aware boost for likely wiring/configuration queries.
|
|
448
|
+
if (likelyWiringQuery && profile !== 'explore') {
|
|
449
|
+
if (this.isCompositionRootFile(chunk.filePath)) {
|
|
450
|
+
combinedScore *= 1.12;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
if (intent === 'FLOW') {
|
|
454
|
+
// Boost service/guard/interceptor files for action/navigation queries
|
|
455
|
+
if (['service', 'guard', 'interceptor', 'middleware'].includes((chunk.componentType || '').toLowerCase())) {
|
|
456
|
+
combinedScore *= 1.15;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
else if (intent === 'CONFIG') {
|
|
460
|
+
// Boost composition-root files for configuration queries
|
|
461
|
+
if (this.isCompositionRootFile(chunk.filePath)) {
|
|
462
|
+
combinedScore *= 1.2;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
else if (intent === 'WIRING') {
|
|
466
|
+
// Boost DI/module files for wiring queries
|
|
467
|
+
if (['module', 'provider', 'config'].some((type) => (chunk.componentType || '').toLowerCase().includes(type))) {
|
|
468
|
+
combinedScore *= 1.18;
|
|
469
|
+
}
|
|
470
|
+
if (this.isCompositionRootFile(chunk.filePath)) {
|
|
471
|
+
combinedScore *= 1.22;
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
const pathOverlap = this.queryPathTokenOverlap(chunk.filePath, query);
|
|
475
|
+
if (pathOverlap >= 2) {
|
|
476
|
+
combinedScore *= 1.08;
|
|
477
|
+
}
|
|
478
|
+
if (this.importCentrality) {
|
|
479
|
+
const normalizedRoot = this.rootPath.replace(/\\/g, '/').replace(/\/?$/, '/');
|
|
480
|
+
const normalizedPath = chunk.filePath.replace(/\\/g, '/').replace(normalizedRoot, '');
|
|
481
|
+
const centrality = this.importCentrality.get(normalizedPath);
|
|
482
|
+
if (centrality !== undefined && centrality > 0.1) {
|
|
483
|
+
// Boost files with high centrality (many imports)
|
|
484
|
+
const centralityBoost = 1.0 + centrality * 0.15; // Up to +15% for max centrality
|
|
485
|
+
combinedScore *= centralityBoost;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
// Detect pattern trend and apply momentum boost
|
|
217
489
|
const { trend, warning } = this.detectChunkTrend(chunk);
|
|
218
490
|
if (trend === 'Rising') {
|
|
219
|
-
combinedScore
|
|
491
|
+
combinedScore *= 1.15; // +15% for modern patterns
|
|
220
492
|
}
|
|
221
493
|
else if (trend === 'Declining') {
|
|
222
|
-
combinedScore
|
|
494
|
+
combinedScore *= 0.9; // -10% for legacy patterns
|
|
223
495
|
}
|
|
224
496
|
const summary = this.generateSummary(chunk);
|
|
225
497
|
const snippet = this.generateSnippet(chunk.content);
|
|
@@ -236,14 +508,181 @@ export class CodebaseSearcher {
|
|
|
236
508
|
componentType: chunk.componentType,
|
|
237
509
|
layer: chunk.layer,
|
|
238
510
|
metadata: chunk.metadata,
|
|
239
|
-
// v1.2: Pattern momentum awareness
|
|
240
511
|
trend,
|
|
241
512
|
patternWarning: warning
|
|
242
513
|
};
|
|
243
514
|
})
|
|
244
|
-
.sort((a, b) => b.score - a.score)
|
|
245
|
-
|
|
246
|
-
|
|
515
|
+
.sort((a, b) => b.score - a.score);
|
|
516
|
+
const seenFiles = new Set();
|
|
517
|
+
const deduped = [];
|
|
518
|
+
for (const result of scoredResults) {
|
|
519
|
+
const normalizedPath = result.filePath.toLowerCase().replace(/\\/g, '/');
|
|
520
|
+
if (seenFiles.has(normalizedPath))
|
|
521
|
+
continue;
|
|
522
|
+
seenFiles.add(normalizedPath);
|
|
523
|
+
deduped.push(result);
|
|
524
|
+
if (deduped.length >= limit)
|
|
525
|
+
break;
|
|
526
|
+
}
|
|
527
|
+
const finalResults = deduped;
|
|
528
|
+
if (isNonTestQuery &&
|
|
529
|
+
finalResults.length < 3 &&
|
|
530
|
+
finalResults.length < limit &&
|
|
531
|
+
testChunks.length > 0) {
|
|
532
|
+
// Find the highest-scoring test file
|
|
533
|
+
const bestTestChunk = testChunks
|
|
534
|
+
.map(([id, chunk]) => ({
|
|
535
|
+
id,
|
|
536
|
+
chunk,
|
|
537
|
+
score: rrfScores.get(id) / maxRrfScore
|
|
538
|
+
}))
|
|
539
|
+
.sort((a, b) => b.score - a.score)[0];
|
|
540
|
+
if (bestTestChunk) {
|
|
541
|
+
const { trend, warning } = this.detectChunkTrend(bestTestChunk.chunk);
|
|
542
|
+
const summary = this.generateSummary(bestTestChunk.chunk);
|
|
543
|
+
const snippet = this.generateSnippet(bestTestChunk.chunk.content);
|
|
544
|
+
finalResults.push({
|
|
545
|
+
summary,
|
|
546
|
+
snippet,
|
|
547
|
+
filePath: bestTestChunk.chunk.filePath,
|
|
548
|
+
startLine: bestTestChunk.chunk.startLine,
|
|
549
|
+
endLine: bestTestChunk.chunk.endLine,
|
|
550
|
+
score: bestTestChunk.score * 0.5, // Demote below implementation files
|
|
551
|
+
relevanceReason: this.generateRelevanceReason(bestTestChunk.chunk, query) + ' (test file)',
|
|
552
|
+
language: bestTestChunk.chunk.language,
|
|
553
|
+
framework: bestTestChunk.chunk.framework,
|
|
554
|
+
componentType: bestTestChunk.chunk.componentType,
|
|
555
|
+
layer: bestTestChunk.chunk.layer,
|
|
556
|
+
metadata: bestTestChunk.chunk.metadata,
|
|
557
|
+
trend,
|
|
558
|
+
patternWarning: warning
|
|
559
|
+
});
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
return finalResults;
|
|
563
|
+
}
|
|
564
|
+
pickBetterResultSet(query, primary, rescue) {
|
|
565
|
+
const primaryQuality = assessSearchQuality(query, primary);
|
|
566
|
+
const rescueQuality = assessSearchQuality(query, rescue);
|
|
567
|
+
if (rescueQuality.status === 'ok' &&
|
|
568
|
+
primaryQuality.status === 'low_confidence' &&
|
|
569
|
+
rescueQuality.confidence >= primaryQuality.confidence) {
|
|
570
|
+
return rescue;
|
|
571
|
+
}
|
|
572
|
+
if (rescueQuality.confidence >= primaryQuality.confidence + 0.05) {
|
|
573
|
+
return rescue;
|
|
574
|
+
}
|
|
575
|
+
return primary;
|
|
576
|
+
}
|
|
577
|
+
async collectHybridMatches(queryVariants, candidateLimit, filters, useSemanticSearch, useKeywordSearch, semanticWeight, keywordWeight) {
|
|
578
|
+
const semanticRanks = new Map();
|
|
579
|
+
const keywordRanks = new Map();
|
|
580
|
+
// RRF uses ranks instead of scores for fusion robustness
|
|
581
|
+
if (useSemanticSearch && this.embeddingProvider && this.storageProvider) {
|
|
582
|
+
try {
|
|
583
|
+
for (const variant of queryVariants) {
|
|
584
|
+
const vectorResults = await this.semanticSearch(variant.query, candidateLimit, filters);
|
|
585
|
+
// Assign ranks based on retrieval order (0-indexed)
|
|
586
|
+
vectorResults.forEach((result, index) => {
|
|
587
|
+
const id = result.chunk.id;
|
|
588
|
+
const rank = index; // 0-indexed rank
|
|
589
|
+
const weight = semanticWeight * variant.weight;
|
|
590
|
+
const existing = semanticRanks.get(id);
|
|
591
|
+
if (existing) {
|
|
592
|
+
existing.ranks.push({ rank, weight });
|
|
593
|
+
}
|
|
594
|
+
else {
|
|
595
|
+
semanticRanks.set(id, {
|
|
596
|
+
chunk: result.chunk,
|
|
597
|
+
ranks: [{ rank, weight }]
|
|
598
|
+
});
|
|
599
|
+
}
|
|
600
|
+
});
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
catch (error) {
|
|
604
|
+
if (error instanceof IndexCorruptedError) {
|
|
605
|
+
throw error; // Propagate to handler for auto-heal
|
|
606
|
+
}
|
|
607
|
+
console.warn('Semantic search failed:', error);
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
if (useKeywordSearch && this.fuseIndex) {
|
|
611
|
+
try {
|
|
612
|
+
for (const variant of queryVariants) {
|
|
613
|
+
const keywordResults = await this.keywordSearch(variant.query, candidateLimit, filters);
|
|
614
|
+
// Assign ranks based on retrieval order (0-indexed)
|
|
615
|
+
keywordResults.forEach((result, index) => {
|
|
616
|
+
const id = result.chunk.id;
|
|
617
|
+
const rank = index; // 0-indexed rank
|
|
618
|
+
const weight = keywordWeight * variant.weight;
|
|
619
|
+
const existing = keywordRanks.get(id);
|
|
620
|
+
if (existing) {
|
|
621
|
+
existing.ranks.push({ rank, weight });
|
|
622
|
+
}
|
|
623
|
+
else {
|
|
624
|
+
keywordRanks.set(id, {
|
|
625
|
+
chunk: result.chunk,
|
|
626
|
+
ranks: [{ rank, weight }]
|
|
627
|
+
});
|
|
628
|
+
}
|
|
629
|
+
});
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
catch (error) {
|
|
633
|
+
console.warn('Keyword search failed:', error);
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
return { semantic: semanticRanks, keyword: keywordRanks };
|
|
637
|
+
}
|
|
638
|
+
async search(query, limit = 5, filters, options = DEFAULT_SEARCH_OPTIONS) {
|
|
639
|
+
if (!this.initialized) {
|
|
640
|
+
await this.initialize();
|
|
641
|
+
}
|
|
642
|
+
const merged = {
|
|
643
|
+
...DEFAULT_SEARCH_OPTIONS,
|
|
644
|
+
...options
|
|
645
|
+
};
|
|
646
|
+
const { useSemanticSearch, useKeywordSearch, profile, enableQueryExpansion, enableLowConfidenceRescue, candidateFloor, enableReranker } = merged;
|
|
647
|
+
const { intent, weights: intentWeights } = this.classifyQueryIntent(query);
|
|
648
|
+
// Intent weights are the default; caller-supplied weights override them
|
|
649
|
+
const finalSemanticWeight = merged.semanticWeight ?? intentWeights.semantic;
|
|
650
|
+
const finalKeywordWeight = merged.keywordWeight ?? intentWeights.keyword;
|
|
651
|
+
const candidateLimit = Math.max(limit * 2, candidateFloor || 30);
|
|
652
|
+
const primaryVariants = this.buildQueryVariants(query, enableQueryExpansion ? 1 : 0);
|
|
653
|
+
const primaryMatches = await this.collectHybridMatches(primaryVariants, candidateLimit, filters, Boolean(useSemanticSearch), Boolean(useKeywordSearch), finalSemanticWeight, finalKeywordWeight);
|
|
654
|
+
const primaryTotalWeight = primaryVariants.reduce((sum, v) => sum + v.weight, 0) *
|
|
655
|
+
(finalSemanticWeight + finalKeywordWeight);
|
|
656
|
+
const primaryResults = this.scoreAndSortResults(query, limit, primaryMatches, (profile || 'explore'), intent, primaryTotalWeight);
|
|
657
|
+
let bestResults = primaryResults;
|
|
658
|
+
if (enableLowConfidenceRescue) {
|
|
659
|
+
const primaryQuality = assessSearchQuality(query, primaryResults);
|
|
660
|
+
if (primaryQuality.status === 'low_confidence') {
|
|
661
|
+
const rescueVariants = this.buildQueryVariants(query, 2).slice(1);
|
|
662
|
+
if (rescueVariants.length > 0) {
|
|
663
|
+
const rescueMatches = await this.collectHybridMatches(rescueVariants.map((variant, index) => ({
|
|
664
|
+
query: variant.query,
|
|
665
|
+
weight: index === 0 ? 1 : 0.8
|
|
666
|
+
})), candidateLimit, filters, Boolean(useSemanticSearch), Boolean(useKeywordSearch), finalSemanticWeight, finalKeywordWeight);
|
|
667
|
+
const rescueVariantWeights = rescueVariants.map((_, i) => (i === 0 ? 1 : 0.8));
|
|
668
|
+
const rescueTotalWeight = rescueVariantWeights.reduce((sum, w) => sum + w, 0) *
|
|
669
|
+
(finalSemanticWeight + finalKeywordWeight);
|
|
670
|
+
const rescueResults = this.scoreAndSortResults(query, limit, rescueMatches, (profile || 'explore'), intent, rescueTotalWeight);
|
|
671
|
+
bestResults = this.pickBetterResultSet(query, primaryResults, rescueResults);
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
// Stage-2: cross-encoder reranking when top scores are ambiguous
|
|
676
|
+
if (enableReranker) {
|
|
677
|
+
try {
|
|
678
|
+
bestResults = await rerank(query, bestResults);
|
|
679
|
+
}
|
|
680
|
+
catch (error) {
|
|
681
|
+
// Reranker is non-critical — log and return unranked results
|
|
682
|
+
console.warn('[reranker] Failed, returning original order:', error);
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
return bestResults;
|
|
247
686
|
}
|
|
248
687
|
generateSummary(chunk) {
|
|
249
688
|
const analyzer = chunk.framework ? analyzerRegistry.get(chunk.framework) : null;
|
|
@@ -287,7 +726,7 @@ export class CodebaseSearcher {
|
|
|
287
726
|
};
|
|
288
727
|
return `${langMap[ext] || ext.toUpperCase()} in ${fileName}.`;
|
|
289
728
|
}
|
|
290
|
-
generateSnippet(content, maxLines =
|
|
729
|
+
generateSnippet(content, maxLines = 20) {
|
|
291
730
|
const lines = content.split('\n');
|
|
292
731
|
if (lines.length <= maxLines) {
|
|
293
732
|
return content;
|