codebase-context 1.5.1 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +128 -158
- package/dist/core/indexer.d.ts.map +1 -1
- package/dist/core/indexer.js +26 -16
- package/dist/core/indexer.js.map +1 -1
- package/dist/core/reranker.d.ts +23 -0
- package/dist/core/reranker.d.ts.map +1 -0
- package/dist/core/reranker.js +120 -0
- package/dist/core/reranker.js.map +1 -0
- package/dist/core/search.d.ts +10 -2
- package/dist/core/search.d.ts.map +1 -1
- package/dist/core/search.js +312 -68
- package/dist/core/search.js.map +1 -1
- package/dist/embeddings/transformers.d.ts.map +1 -1
- package/dist/embeddings/transformers.js +17 -7
- package/dist/embeddings/transformers.js.map +1 -1
- package/dist/embeddings/types.d.ts.map +1 -1
- package/dist/embeddings/types.js +3 -0
- package/dist/embeddings/types.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +109 -49
- package/dist/index.js.map +1 -1
- package/dist/preflight/evidence-lock.js +1 -1
- package/dist/types/index.d.ts +0 -3
- package/dist/types/index.d.ts.map +1 -1
- package/dist/utils/chunking.js +2 -2
- package/dist/utils/chunking.js.map +1 -1
- package/dist/utils/usage-tracker.d.ts.map +1 -1
- package/dist/utils/usage-tracker.js +2 -4
- package/dist/utils/usage-tracker.js.map +1 -1
- package/docs/capabilities.md +75 -0
- package/package.json +31 -9
package/dist/core/search.js
CHANGED
|
@@ -11,16 +11,19 @@ import { analyzerRegistry } from './analyzer-registry.js';
|
|
|
11
11
|
import { IndexCorruptedError } from '../errors/index.js';
|
|
12
12
|
import { isTestingRelatedQuery } from '../preflight/query-scope.js';
|
|
13
13
|
import { assessSearchQuality } from './search-quality.js';
|
|
14
|
+
import { rerank } from './reranker.js';
|
|
14
15
|
import { CODEBASE_CONTEXT_DIRNAME, INTELLIGENCE_FILENAME, KEYWORD_INDEX_FILENAME, VECTOR_DB_DIRNAME } from '../constants/codebase-context.js';
|
|
15
16
|
const DEFAULT_SEARCH_OPTIONS = {
|
|
16
17
|
useSemanticSearch: true,
|
|
17
18
|
useKeywordSearch: true,
|
|
18
|
-
semanticWeight
|
|
19
|
-
|
|
19
|
+
// semanticWeight/keywordWeight intentionally omitted —
|
|
20
|
+
// intent classification provides per-query weights.
|
|
21
|
+
// Callers can still override by passing explicit values.
|
|
20
22
|
profile: 'explore',
|
|
21
23
|
enableQueryExpansion: true,
|
|
22
24
|
enableLowConfidenceRescue: true,
|
|
23
|
-
candidateFloor: 30
|
|
25
|
+
candidateFloor: 30,
|
|
26
|
+
enableReranker: true
|
|
24
27
|
};
|
|
25
28
|
const QUERY_EXPANSION_HINTS = [
|
|
26
29
|
{
|
|
@@ -75,8 +78,9 @@ export class CodebaseSearcher {
|
|
|
75
78
|
embeddingProvider = null;
|
|
76
79
|
storageProvider = null;
|
|
77
80
|
initialized = false;
|
|
78
|
-
//
|
|
81
|
+
// Pattern intelligence for trend detection
|
|
79
82
|
patternIntelligence = null;
|
|
83
|
+
importCentrality = null;
|
|
80
84
|
constructor(rootPath) {
|
|
81
85
|
this.rootPath = rootPath;
|
|
82
86
|
this.storagePath = path.join(rootPath, CODEBASE_CONTEXT_DIRNAME, VECTOR_DB_DIRNAME);
|
|
@@ -129,7 +133,7 @@ export class CodebaseSearcher {
|
|
|
129
133
|
}
|
|
130
134
|
}
|
|
131
135
|
/**
|
|
132
|
-
*
|
|
136
|
+
* Load pattern intelligence for trend detection and warnings
|
|
133
137
|
*/
|
|
134
138
|
async loadPatternIntelligence() {
|
|
135
139
|
try {
|
|
@@ -152,7 +156,7 @@ export class CodebaseSearcher {
|
|
|
152
156
|
for (const alt of patternData.alsoDetected) {
|
|
153
157
|
if (alt.trend === 'Declining') {
|
|
154
158
|
decliningPatterns.add(alt.name.toLowerCase());
|
|
155
|
-
patternWarnings.set(alt.name.toLowerCase(),
|
|
159
|
+
patternWarnings.set(alt.name.toLowerCase(), `WARNING: Uses declining pattern: ${alt.name} (${alt.guidance || 'consider modern alternatives'})`);
|
|
156
160
|
}
|
|
157
161
|
else if (alt.trend === 'Rising') {
|
|
158
162
|
risingPatterns.add(alt.name.toLowerCase());
|
|
@@ -163,17 +167,35 @@ export class CodebaseSearcher {
|
|
|
163
167
|
}
|
|
164
168
|
this.patternIntelligence = { decliningPatterns, risingPatterns, patternWarnings };
|
|
165
169
|
console.error(`[search] Loaded pattern intelligence: ${decliningPatterns.size} declining, ${risingPatterns.size} rising patterns`);
|
|
170
|
+
this.importCentrality = new Map();
|
|
171
|
+
if (intelligence.internalFileGraph && intelligence.internalFileGraph.imports) {
|
|
172
|
+
// Count how many files import each file (in-degree centrality)
|
|
173
|
+
const importCounts = new Map();
|
|
174
|
+
for (const [_importingFile, importedFiles] of Object.entries(intelligence.internalFileGraph.imports)) {
|
|
175
|
+
const imports = importedFiles;
|
|
176
|
+
for (const imported of imports) {
|
|
177
|
+
importCounts.set(imported, (importCounts.get(imported) || 0) + 1);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
// Normalize centrality to 0-1 range
|
|
181
|
+
const maxImports = Math.max(...Array.from(importCounts.values()), 1);
|
|
182
|
+
for (const [file, count] of importCounts) {
|
|
183
|
+
this.importCentrality.set(file, count / maxImports);
|
|
184
|
+
}
|
|
185
|
+
console.error(`[search] Computed import centrality for ${importCounts.size} files`);
|
|
186
|
+
}
|
|
166
187
|
}
|
|
167
188
|
catch (error) {
|
|
168
189
|
console.warn('Pattern intelligence load failed (will proceed without trend detection):', error);
|
|
169
190
|
this.patternIntelligence = null;
|
|
191
|
+
this.importCentrality = null;
|
|
170
192
|
}
|
|
171
193
|
}
|
|
172
194
|
/**
|
|
173
|
-
*
|
|
195
|
+
* Detect pattern trend from chunk content
|
|
174
196
|
*/
|
|
175
197
|
detectChunkTrend(chunk) {
|
|
176
|
-
if (!this.patternIntelligence) {
|
|
198
|
+
if (!this.patternIntelligence || chunk.content == null) {
|
|
177
199
|
return { trend: undefined };
|
|
178
200
|
}
|
|
179
201
|
const content = chunk.content.toLowerCase();
|
|
@@ -208,6 +230,72 @@ export class CodebaseSearcher {
|
|
|
208
230
|
.split(/[^a-z0-9_]+/)
|
|
209
231
|
.filter((term) => term.length > 2 && !QUERY_STOP_WORDS.has(term));
|
|
210
232
|
}
|
|
233
|
+
/**
|
|
234
|
+
* Classify query intent based on heuristic patterns
|
|
235
|
+
*/
|
|
236
|
+
classifyQueryIntent(query) {
|
|
237
|
+
const lowerQuery = query.toLowerCase();
|
|
238
|
+
// EXACT_NAME: Contains PascalCase or camelCase tokens (literal class/component names)
|
|
239
|
+
if (/[A-Z][a-z]+[A-Z]/.test(query) || /[a-z][A-Z]/.test(query)) {
|
|
240
|
+
return {
|
|
241
|
+
intent: 'EXACT_NAME',
|
|
242
|
+
weights: { semantic: 0.4, keyword: 0.6 } // Keyword search dominates for exact names
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
// CONFIG: Configuration/setup queries
|
|
246
|
+
const configKeywords = [
|
|
247
|
+
'config',
|
|
248
|
+
'setup',
|
|
249
|
+
'routing',
|
|
250
|
+
'providers',
|
|
251
|
+
'configuration',
|
|
252
|
+
'bootstrap'
|
|
253
|
+
];
|
|
254
|
+
if (configKeywords.some((kw) => lowerQuery.includes(kw))) {
|
|
255
|
+
return {
|
|
256
|
+
intent: 'CONFIG',
|
|
257
|
+
weights: { semantic: 0.5, keyword: 0.5 } // Balanced
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
// WIRING: DI/registration queries
|
|
261
|
+
const wiringKeywords = [
|
|
262
|
+
'provide',
|
|
263
|
+
'inject',
|
|
264
|
+
'dependency',
|
|
265
|
+
'register',
|
|
266
|
+
'wire',
|
|
267
|
+
'bootstrap',
|
|
268
|
+
'module'
|
|
269
|
+
];
|
|
270
|
+
if (wiringKeywords.some((kw) => lowerQuery.includes(kw))) {
|
|
271
|
+
return {
|
|
272
|
+
intent: 'WIRING',
|
|
273
|
+
weights: { semantic: 0.5, keyword: 0.5 } // Balanced
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
// FLOW: Action/navigation queries
|
|
277
|
+
const flowVerbs = [
|
|
278
|
+
'navigate',
|
|
279
|
+
'redirect',
|
|
280
|
+
'route',
|
|
281
|
+
'handle',
|
|
282
|
+
'process',
|
|
283
|
+
'execute',
|
|
284
|
+
'trigger',
|
|
285
|
+
'dispatch'
|
|
286
|
+
];
|
|
287
|
+
if (flowVerbs.some((verb) => lowerQuery.includes(verb))) {
|
|
288
|
+
return {
|
|
289
|
+
intent: 'FLOW',
|
|
290
|
+
weights: { semantic: 0.6, keyword: 0.4 } // Semantic helps with flow understanding
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
// CONCEPTUAL: Natural language without code tokens (default)
|
|
294
|
+
return {
|
|
295
|
+
intent: 'CONCEPTUAL',
|
|
296
|
+
weights: { semantic: 0.7, keyword: 0.3 } // Semantic dominates for concepts
|
|
297
|
+
};
|
|
298
|
+
}
|
|
211
299
|
buildQueryVariants(query, maxExpansions) {
|
|
212
300
|
const variants = [{ query, weight: 1 }];
|
|
213
301
|
if (maxExpansions <= 0)
|
|
@@ -238,6 +326,10 @@ export class CodebaseSearcher {
|
|
|
238
326
|
}
|
|
239
327
|
return variants.slice(0, 1 + maxExpansions);
|
|
240
328
|
}
|
|
329
|
+
isTemplateOrStyleFile(filePath) {
|
|
330
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
331
|
+
return ['.html', '.scss', '.css', '.less', '.sass', '.styl'].includes(ext);
|
|
332
|
+
}
|
|
241
333
|
isCompositionRootFile(filePath) {
|
|
242
334
|
const normalized = filePath.toLowerCase().replace(/\\/g, '/');
|
|
243
335
|
const base = path.basename(normalized);
|
|
@@ -272,55 +364,137 @@ export class CodebaseSearcher {
|
|
|
272
364
|
normalizedPath.includes('/types/') ||
|
|
273
365
|
normalizedPath.includes('/constants'));
|
|
274
366
|
}
|
|
275
|
-
scoreAndSortResults(query, limit, results, profile) {
|
|
367
|
+
scoreAndSortResults(query, limit, results, profile, intent, totalVariantWeight) {
|
|
276
368
|
const likelyWiringQuery = this.isLikelyWiringOrFlowQuery(query);
|
|
277
369
|
const actionQuery = this.isActionOrHowQuery(query);
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
370
|
+
// RRF: k=60 is the standard parameter (proven robust in Elasticsearch + TOSS paper arXiv:2208.11274)
|
|
371
|
+
const RRF_K = 60;
|
|
372
|
+
// Collect all unique chunks from both retrieval channels
|
|
373
|
+
const allChunks = new Map();
|
|
374
|
+
const rrfScores = new Map();
|
|
375
|
+
// Gather all chunks
|
|
376
|
+
for (const [id, entry] of results.semantic) {
|
|
377
|
+
allChunks.set(id, entry.chunk);
|
|
378
|
+
}
|
|
379
|
+
for (const [id, entry] of results.keyword) {
|
|
380
|
+
if (!allChunks.has(id)) {
|
|
381
|
+
allChunks.set(id, entry.chunk);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
// Calculate RRF scores: RRF(d) = SUM(weight_i / (k + rank_i))
|
|
385
|
+
for (const [id] of allChunks) {
|
|
386
|
+
let rrfScore = 0;
|
|
387
|
+
// Add contributions from semantic ranks
|
|
388
|
+
const semanticEntry = results.semantic.get(id);
|
|
389
|
+
if (semanticEntry) {
|
|
390
|
+
for (const { rank, weight } of semanticEntry.ranks) {
|
|
391
|
+
rrfScore += weight / (RRF_K + rank);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
// Add contributions from keyword ranks
|
|
395
|
+
const keywordEntry = results.keyword.get(id);
|
|
396
|
+
if (keywordEntry) {
|
|
397
|
+
for (const { rank, weight } of keywordEntry.ranks) {
|
|
398
|
+
rrfScore += weight / (RRF_K + rank);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
rrfScores.set(id, rrfScore);
|
|
402
|
+
}
|
|
403
|
+
// Normalize by theoretical maximum (rank-0 in every list), NOT by actual max.
|
|
404
|
+
// Using actual max makes top result always 1.0, breaking quality confidence gating.
|
|
405
|
+
const theoreticalMaxRrf = totalVariantWeight / (RRF_K + 0);
|
|
406
|
+
const maxRrfScore = Math.max(theoreticalMaxRrf, 0.01);
|
|
407
|
+
// Separate test files from implementation files before scoring
|
|
408
|
+
const isNonTestQuery = !isTestingRelatedQuery(query);
|
|
409
|
+
const implementationChunks = [];
|
|
410
|
+
const testChunks = [];
|
|
411
|
+
for (const [id, chunk] of allChunks.entries()) {
|
|
412
|
+
if (this.isTestFile(chunk.filePath)) {
|
|
413
|
+
testChunks.push([id, chunk]);
|
|
414
|
+
}
|
|
415
|
+
else {
|
|
416
|
+
implementationChunks.push([id, chunk]);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
// For non-test queries: filter test files from candidate pool, keep max 1 test file only if < 3 implementation matches
|
|
420
|
+
const chunksToScore = isNonTestQuery ? implementationChunks : Array.from(allChunks.entries());
|
|
421
|
+
const scoredResults = chunksToScore
|
|
422
|
+
.map(([id, chunk]) => {
|
|
423
|
+
// RRF score normalized to [0,1] range. Boosts below are unclamped
|
|
424
|
+
// to preserve score differentiation — only relative ordering matters.
|
|
425
|
+
let combinedScore = rrfScores.get(id) / maxRrfScore;
|
|
285
426
|
// Slight boost when analyzer identified a concrete component type
|
|
286
427
|
if (chunk.componentType && chunk.componentType !== 'unknown') {
|
|
287
|
-
combinedScore
|
|
428
|
+
combinedScore *= 1.1;
|
|
288
429
|
}
|
|
289
430
|
// Boost if layer is detected
|
|
290
431
|
if (chunk.layer && chunk.layer !== 'unknown') {
|
|
291
|
-
combinedScore
|
|
292
|
-
}
|
|
293
|
-
// Query-aware reranking to reduce noisy matches in practical workflows.
|
|
294
|
-
if (!isTestingRelatedQuery(query) && this.isTestFile(chunk.filePath)) {
|
|
295
|
-
combinedScore = combinedScore * 0.75;
|
|
432
|
+
combinedScore *= 1.1;
|
|
296
433
|
}
|
|
297
434
|
if (actionQuery && this.isDefinitionHeavyResult(chunk)) {
|
|
298
|
-
combinedScore
|
|
435
|
+
combinedScore *= 0.82;
|
|
299
436
|
}
|
|
300
437
|
if (actionQuery &&
|
|
301
438
|
['service', 'component', 'interceptor', 'guard', 'module', 'resolver'].includes((chunk.componentType || '').toLowerCase())) {
|
|
302
|
-
combinedScore
|
|
439
|
+
combinedScore *= 1.06;
|
|
440
|
+
}
|
|
441
|
+
// Demote template/style files for behavioral queries — they describe
|
|
442
|
+
// structure/presentation, not implementation logic.
|
|
443
|
+
if ((intent === 'FLOW' || intent === 'WIRING' || actionQuery) &&
|
|
444
|
+
this.isTemplateOrStyleFile(chunk.filePath)) {
|
|
445
|
+
combinedScore *= 0.75;
|
|
303
446
|
}
|
|
304
447
|
// Light intent-aware boost for likely wiring/configuration queries.
|
|
305
448
|
if (likelyWiringQuery && profile !== 'explore') {
|
|
306
449
|
if (this.isCompositionRootFile(chunk.filePath)) {
|
|
307
|
-
combinedScore
|
|
450
|
+
combinedScore *= 1.12;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
if (intent === 'FLOW') {
|
|
454
|
+
// Boost service/guard/interceptor files for action/navigation queries
|
|
455
|
+
if (['service', 'guard', 'interceptor', 'middleware'].includes((chunk.componentType || '').toLowerCase())) {
|
|
456
|
+
combinedScore *= 1.15;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
else if (intent === 'CONFIG') {
|
|
460
|
+
// Boost composition-root files for configuration queries
|
|
461
|
+
if (this.isCompositionRootFile(chunk.filePath)) {
|
|
462
|
+
combinedScore *= 1.2;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
else if (intent === 'WIRING') {
|
|
466
|
+
// Boost DI/module files for wiring queries
|
|
467
|
+
if (['module', 'provider', 'config'].some((type) => (chunk.componentType || '').toLowerCase().includes(type))) {
|
|
468
|
+
combinedScore *= 1.18;
|
|
469
|
+
}
|
|
470
|
+
if (this.isCompositionRootFile(chunk.filePath)) {
|
|
471
|
+
combinedScore *= 1.22;
|
|
308
472
|
}
|
|
309
473
|
}
|
|
310
474
|
const pathOverlap = this.queryPathTokenOverlap(chunk.filePath, query);
|
|
311
475
|
if (pathOverlap >= 2) {
|
|
312
|
-
combinedScore
|
|
476
|
+
combinedScore *= 1.08;
|
|
313
477
|
}
|
|
314
|
-
|
|
478
|
+
if (this.importCentrality) {
|
|
479
|
+
const normalizedRoot = this.rootPath.replace(/\\/g, '/').replace(/\/?$/, '/');
|
|
480
|
+
const normalizedPath = chunk.filePath.replace(/\\/g, '/').replace(normalizedRoot, '');
|
|
481
|
+
const centrality = this.importCentrality.get(normalizedPath);
|
|
482
|
+
if (centrality !== undefined && centrality > 0.1) {
|
|
483
|
+
// Boost files with high centrality (many imports)
|
|
484
|
+
const centralityBoost = 1.0 + centrality * 0.15; // Up to +15% for max centrality
|
|
485
|
+
combinedScore *= centralityBoost;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
// Detect pattern trend and apply momentum boost
|
|
315
489
|
const { trend, warning } = this.detectChunkTrend(chunk);
|
|
316
490
|
if (trend === 'Rising') {
|
|
317
|
-
combinedScore
|
|
491
|
+
combinedScore *= 1.15; // +15% for modern patterns
|
|
318
492
|
}
|
|
319
493
|
else if (trend === 'Declining') {
|
|
320
|
-
combinedScore
|
|
494
|
+
combinedScore *= 0.9; // -10% for legacy patterns
|
|
321
495
|
}
|
|
322
496
|
const summary = this.generateSummary(chunk);
|
|
323
|
-
const snippet = this.generateSnippet(chunk.content);
|
|
497
|
+
const snippet = this.generateSnippet(chunk.content ?? '');
|
|
324
498
|
return {
|
|
325
499
|
summary,
|
|
326
500
|
snippet,
|
|
@@ -334,13 +508,58 @@ export class CodebaseSearcher {
|
|
|
334
508
|
componentType: chunk.componentType,
|
|
335
509
|
layer: chunk.layer,
|
|
336
510
|
metadata: chunk.metadata,
|
|
337
|
-
// v1.2: Pattern momentum awareness
|
|
338
511
|
trend,
|
|
339
512
|
patternWarning: warning
|
|
340
513
|
};
|
|
341
514
|
})
|
|
342
|
-
.sort((a, b) => b.score - a.score)
|
|
343
|
-
|
|
515
|
+
.sort((a, b) => b.score - a.score);
|
|
516
|
+
const seenFiles = new Set();
|
|
517
|
+
const deduped = [];
|
|
518
|
+
for (const result of scoredResults) {
|
|
519
|
+
const normalizedPath = result.filePath.toLowerCase().replace(/\\/g, '/');
|
|
520
|
+
if (seenFiles.has(normalizedPath))
|
|
521
|
+
continue;
|
|
522
|
+
seenFiles.add(normalizedPath);
|
|
523
|
+
deduped.push(result);
|
|
524
|
+
if (deduped.length >= limit)
|
|
525
|
+
break;
|
|
526
|
+
}
|
|
527
|
+
const finalResults = deduped;
|
|
528
|
+
if (isNonTestQuery &&
|
|
529
|
+
finalResults.length < 3 &&
|
|
530
|
+
finalResults.length < limit &&
|
|
531
|
+
testChunks.length > 0) {
|
|
532
|
+
// Find the highest-scoring test file
|
|
533
|
+
const bestTestChunk = testChunks
|
|
534
|
+
.map(([id, chunk]) => ({
|
|
535
|
+
id,
|
|
536
|
+
chunk,
|
|
537
|
+
score: rrfScores.get(id) / maxRrfScore
|
|
538
|
+
}))
|
|
539
|
+
.sort((a, b) => b.score - a.score)[0];
|
|
540
|
+
if (bestTestChunk) {
|
|
541
|
+
const { trend, warning } = this.detectChunkTrend(bestTestChunk.chunk);
|
|
542
|
+
const summary = this.generateSummary(bestTestChunk.chunk);
|
|
543
|
+
const snippet = this.generateSnippet(bestTestChunk.chunk.content ?? '');
|
|
544
|
+
finalResults.push({
|
|
545
|
+
summary,
|
|
546
|
+
snippet,
|
|
547
|
+
filePath: bestTestChunk.chunk.filePath,
|
|
548
|
+
startLine: bestTestChunk.chunk.startLine,
|
|
549
|
+
endLine: bestTestChunk.chunk.endLine,
|
|
550
|
+
score: bestTestChunk.score * 0.5, // Demote below implementation files
|
|
551
|
+
relevanceReason: this.generateRelevanceReason(bestTestChunk.chunk, query) + ' (test file)',
|
|
552
|
+
language: bestTestChunk.chunk.language,
|
|
553
|
+
framework: bestTestChunk.chunk.framework,
|
|
554
|
+
componentType: bestTestChunk.chunk.componentType,
|
|
555
|
+
layer: bestTestChunk.chunk.layer,
|
|
556
|
+
metadata: bestTestChunk.chunk.metadata,
|
|
557
|
+
trend,
|
|
558
|
+
patternWarning: warning
|
|
559
|
+
});
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
return finalResults;
|
|
344
563
|
}
|
|
345
564
|
pickBetterResultSet(query, primary, rescue) {
|
|
346
565
|
const primaryQuality = assessSearchQuality(query, primary);
|
|
@@ -356,22 +575,26 @@ export class CodebaseSearcher {
|
|
|
356
575
|
return primary;
|
|
357
576
|
}
|
|
358
577
|
async collectHybridMatches(queryVariants, candidateLimit, filters, useSemanticSearch, useKeywordSearch, semanticWeight, keywordWeight) {
|
|
359
|
-
const
|
|
578
|
+
const semanticRanks = new Map();
|
|
579
|
+
const keywordRanks = new Map();
|
|
580
|
+
// RRF uses ranks instead of scores for fusion robustness
|
|
360
581
|
if (useSemanticSearch && this.embeddingProvider && this.storageProvider) {
|
|
361
582
|
try {
|
|
362
583
|
for (const variant of queryVariants) {
|
|
363
584
|
const vectorResults = await this.semanticSearch(variant.query, candidateLimit, filters);
|
|
364
|
-
|
|
585
|
+
// Assign ranks based on retrieval order (0-indexed)
|
|
586
|
+
vectorResults.forEach((result, index) => {
|
|
365
587
|
const id = result.chunk.id;
|
|
366
|
-
const
|
|
367
|
-
const
|
|
588
|
+
const rank = index; // 0-indexed rank
|
|
589
|
+
const weight = semanticWeight * variant.weight;
|
|
590
|
+
const existing = semanticRanks.get(id);
|
|
368
591
|
if (existing) {
|
|
369
|
-
existing.
|
|
592
|
+
existing.ranks.push({ rank, weight });
|
|
370
593
|
}
|
|
371
594
|
else {
|
|
372
|
-
|
|
595
|
+
semanticRanks.set(id, {
|
|
373
596
|
chunk: result.chunk,
|
|
374
|
-
|
|
597
|
+
ranks: [{ rank, weight }]
|
|
375
598
|
});
|
|
376
599
|
}
|
|
377
600
|
});
|
|
@@ -388,17 +611,19 @@ export class CodebaseSearcher {
|
|
|
388
611
|
try {
|
|
389
612
|
for (const variant of queryVariants) {
|
|
390
613
|
const keywordResults = await this.keywordSearch(variant.query, candidateLimit, filters);
|
|
391
|
-
|
|
614
|
+
// Assign ranks based on retrieval order (0-indexed)
|
|
615
|
+
keywordResults.forEach((result, index) => {
|
|
392
616
|
const id = result.chunk.id;
|
|
393
|
-
const
|
|
394
|
-
const
|
|
617
|
+
const rank = index; // 0-indexed rank
|
|
618
|
+
const weight = keywordWeight * variant.weight;
|
|
619
|
+
const existing = keywordRanks.get(id);
|
|
395
620
|
if (existing) {
|
|
396
|
-
existing.
|
|
621
|
+
existing.ranks.push({ rank, weight });
|
|
397
622
|
}
|
|
398
623
|
else {
|
|
399
|
-
|
|
624
|
+
keywordRanks.set(id, {
|
|
400
625
|
chunk: result.chunk,
|
|
401
|
-
|
|
626
|
+
ranks: [{ rank, weight }]
|
|
402
627
|
});
|
|
403
628
|
}
|
|
404
629
|
});
|
|
@@ -408,37 +633,56 @@ export class CodebaseSearcher {
|
|
|
408
633
|
console.warn('Keyword search failed:', error);
|
|
409
634
|
}
|
|
410
635
|
}
|
|
411
|
-
return
|
|
636
|
+
return { semantic: semanticRanks, keyword: keywordRanks };
|
|
412
637
|
}
|
|
413
638
|
async search(query, limit = 5, filters, options = DEFAULT_SEARCH_OPTIONS) {
|
|
414
639
|
if (!this.initialized) {
|
|
415
640
|
await this.initialize();
|
|
416
641
|
}
|
|
417
|
-
const
|
|
642
|
+
const merged = {
|
|
418
643
|
...DEFAULT_SEARCH_OPTIONS,
|
|
419
644
|
...options
|
|
420
645
|
};
|
|
646
|
+
const { useSemanticSearch, useKeywordSearch, profile, enableQueryExpansion, enableLowConfidenceRescue, candidateFloor, enableReranker } = merged;
|
|
647
|
+
const { intent, weights: intentWeights } = this.classifyQueryIntent(query);
|
|
648
|
+
// Intent weights are the default; caller-supplied weights override them
|
|
649
|
+
const finalSemanticWeight = merged.semanticWeight ?? intentWeights.semantic;
|
|
650
|
+
const finalKeywordWeight = merged.keywordWeight ?? intentWeights.keyword;
|
|
421
651
|
const candidateLimit = Math.max(limit * 2, candidateFloor || 30);
|
|
422
652
|
const primaryVariants = this.buildQueryVariants(query, enableQueryExpansion ? 1 : 0);
|
|
423
|
-
const primaryMatches = await this.collectHybridMatches(primaryVariants, candidateLimit, filters, Boolean(useSemanticSearch), Boolean(useKeywordSearch),
|
|
424
|
-
const
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
653
|
+
const primaryMatches = await this.collectHybridMatches(primaryVariants, candidateLimit, filters, Boolean(useSemanticSearch), Boolean(useKeywordSearch), finalSemanticWeight, finalKeywordWeight);
|
|
654
|
+
const primaryTotalWeight = primaryVariants.reduce((sum, v) => sum + v.weight, 0) *
|
|
655
|
+
(finalSemanticWeight + finalKeywordWeight);
|
|
656
|
+
const primaryResults = this.scoreAndSortResults(query, limit, primaryMatches, (profile || 'explore'), intent, primaryTotalWeight);
|
|
657
|
+
let bestResults = primaryResults;
|
|
658
|
+
if (enableLowConfidenceRescue) {
|
|
659
|
+
const primaryQuality = assessSearchQuality(query, primaryResults);
|
|
660
|
+
if (primaryQuality.status === 'low_confidence') {
|
|
661
|
+
const rescueVariants = this.buildQueryVariants(query, 2).slice(1);
|
|
662
|
+
if (rescueVariants.length > 0) {
|
|
663
|
+
const rescueMatches = await this.collectHybridMatches(rescueVariants.map((variant, index) => ({
|
|
664
|
+
query: variant.query,
|
|
665
|
+
weight: index === 0 ? 1 : 0.8
|
|
666
|
+
})), candidateLimit, filters, Boolean(useSemanticSearch), Boolean(useKeywordSearch), finalSemanticWeight, finalKeywordWeight);
|
|
667
|
+
const rescueVariantWeights = rescueVariants.map((_, i) => (i === 0 ? 1 : 0.8));
|
|
668
|
+
const rescueTotalWeight = rescueVariantWeights.reduce((sum, w) => sum + w, 0) *
|
|
669
|
+
(finalSemanticWeight + finalKeywordWeight);
|
|
670
|
+
const rescueResults = this.scoreAndSortResults(query, limit, rescueMatches, (profile || 'explore'), intent, rescueTotalWeight);
|
|
671
|
+
bestResults = this.pickBetterResultSet(query, primaryResults, rescueResults);
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
// Stage-2: cross-encoder reranking when top scores are ambiguous
|
|
676
|
+
if (enableReranker) {
|
|
677
|
+
try {
|
|
678
|
+
bestResults = await rerank(query, bestResults);
|
|
679
|
+
}
|
|
680
|
+
catch (error) {
|
|
681
|
+
// Reranker is non-critical — log and return unranked results
|
|
682
|
+
console.warn('[reranker] Failed, returning original order:', error);
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
return bestResults;
|
|
442
686
|
}
|
|
443
687
|
generateSummary(chunk) {
|
|
444
688
|
const analyzer = chunk.framework ? analyzerRegistry.get(chunk.framework) : null;
|
|
@@ -459,7 +703,7 @@ export class CodebaseSearcher {
|
|
|
459
703
|
const componentName = chunk.metadata?.componentName;
|
|
460
704
|
const componentType = chunk.componentType;
|
|
461
705
|
// Try to extract a meaningful name from content
|
|
462
|
-
const classMatch = chunk.content.match(/(?:export\s+)?(?:class|interface|type|enum|function)\s+(\w+)/);
|
|
706
|
+
const classMatch = (chunk.content ?? '').match(/(?:export\s+)?(?:class|interface|type|enum|function)\s+(\w+)/);
|
|
463
707
|
const name = componentName || (classMatch ? classMatch[1] : null);
|
|
464
708
|
if (name && componentType) {
|
|
465
709
|
return `${componentType.charAt(0).toUpperCase() + componentType.slice(1)} '${name}' in ${fileName}.`;
|