codebase-context 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +193 -45
  2. package/dist/analyzers/generic/index.d.ts +0 -1
  3. package/dist/analyzers/generic/index.d.ts.map +1 -1
  4. package/dist/analyzers/generic/index.js +0 -13
  5. package/dist/analyzers/generic/index.js.map +1 -1
  6. package/dist/constants/codebase-context.d.ts +2 -0
  7. package/dist/constants/codebase-context.d.ts.map +1 -1
  8. package/dist/constants/codebase-context.js +2 -0
  9. package/dist/constants/codebase-context.js.map +1 -1
  10. package/dist/constants/git-patterns.d.ts +12 -0
  11. package/dist/constants/git-patterns.d.ts.map +1 -0
  12. package/dist/constants/git-patterns.js +11 -0
  13. package/dist/constants/git-patterns.js.map +1 -0
  14. package/dist/core/analyzer-registry.d.ts.map +1 -1
  15. package/dist/core/analyzer-registry.js +3 -1
  16. package/dist/core/analyzer-registry.js.map +1 -1
  17. package/dist/core/indexer.d.ts +2 -0
  18. package/dist/core/indexer.d.ts.map +1 -1
  19. package/dist/core/indexer.js +179 -34
  20. package/dist/core/indexer.js.map +1 -1
  21. package/dist/core/manifest.d.ts +39 -0
  22. package/dist/core/manifest.d.ts.map +1 -0
  23. package/dist/core/manifest.js +86 -0
  24. package/dist/core/manifest.js.map +1 -0
  25. package/dist/core/reranker.d.ts +23 -0
  26. package/dist/core/reranker.d.ts.map +1 -0
  27. package/dist/core/reranker.js +120 -0
  28. package/dist/core/reranker.js.map +1 -0
  29. package/dist/core/search-quality.d.ts +10 -0
  30. package/dist/core/search-quality.d.ts.map +1 -0
  31. package/dist/core/search-quality.js +64 -0
  32. package/dist/core/search-quality.js.map +1 -0
  33. package/dist/core/search.d.ts +26 -2
  34. package/dist/core/search.d.ts.map +1 -1
  35. package/dist/core/search.js +508 -69
  36. package/dist/core/search.js.map +1 -1
  37. package/dist/embeddings/transformers.d.ts.map +1 -1
  38. package/dist/embeddings/transformers.js +17 -7
  39. package/dist/embeddings/transformers.js.map +1 -1
  40. package/dist/embeddings/types.d.ts.map +1 -1
  41. package/dist/embeddings/types.js +3 -0
  42. package/dist/embeddings/types.js.map +1 -1
  43. package/dist/index.d.ts +1 -1
  44. package/dist/index.d.ts.map +1 -1
  45. package/dist/index.js +460 -55
  46. package/dist/index.js.map +1 -1
  47. package/dist/memory/git-memory.d.ts +9 -0
  48. package/dist/memory/git-memory.d.ts.map +1 -0
  49. package/dist/memory/git-memory.js +51 -0
  50. package/dist/memory/git-memory.js.map +1 -0
  51. package/dist/memory/store.d.ts +16 -0
  52. package/dist/memory/store.d.ts.map +1 -1
  53. package/dist/memory/store.js +40 -1
  54. package/dist/memory/store.js.map +1 -1
  55. package/dist/patterns/semantics.d.ts +4 -0
  56. package/dist/patterns/semantics.d.ts.map +1 -0
  57. package/dist/patterns/semantics.js +24 -0
  58. package/dist/patterns/semantics.js.map +1 -0
  59. package/dist/preflight/evidence-lock.d.ts +50 -0
  60. package/dist/preflight/evidence-lock.d.ts.map +1 -0
  61. package/dist/preflight/evidence-lock.js +130 -0
  62. package/dist/preflight/evidence-lock.js.map +1 -0
  63. package/dist/preflight/query-scope.d.ts +3 -0
  64. package/dist/preflight/query-scope.d.ts.map +1 -0
  65. package/dist/preflight/query-scope.js +40 -0
  66. package/dist/preflight/query-scope.js.map +1 -0
  67. package/dist/resources/uri.d.ts +5 -0
  68. package/dist/resources/uri.d.ts.map +1 -0
  69. package/dist/resources/uri.js +15 -0
  70. package/dist/resources/uri.js.map +1 -0
  71. package/dist/storage/lancedb.d.ts +1 -0
  72. package/dist/storage/lancedb.d.ts.map +1 -1
  73. package/dist/storage/lancedb.js +24 -3
  74. package/dist/storage/lancedb.js.map +1 -1
  75. package/dist/storage/types.d.ts +5 -0
  76. package/dist/storage/types.d.ts.map +1 -1
  77. package/dist/storage/types.js.map +1 -1
  78. package/dist/types/index.d.ts +20 -3
  79. package/dist/types/index.d.ts.map +1 -1
  80. package/dist/utils/chunking.js +2 -2
  81. package/dist/utils/chunking.js.map +1 -1
  82. package/dist/utils/git-dates.d.ts +1 -0
  83. package/dist/utils/git-dates.d.ts.map +1 -1
  84. package/dist/utils/git-dates.js +20 -0
  85. package/dist/utils/git-dates.js.map +1 -1
  86. package/dist/utils/usage-tracker.d.ts.map +1 -1
  87. package/dist/utils/usage-tracker.js +3 -8
  88. package/dist/utils/usage-tracker.js.map +1 -1
  89. package/package.json +17 -9
@@ -9,13 +9,67 @@ import { getEmbeddingProvider } from '../embeddings/index.js';
9
9
  import { getStorageProvider } from '../storage/index.js';
10
10
  import { analyzerRegistry } from './analyzer-registry.js';
11
11
  import { IndexCorruptedError } from '../errors/index.js';
12
+ import { isTestingRelatedQuery } from '../preflight/query-scope.js';
13
+ import { assessSearchQuality } from './search-quality.js';
14
+ import { rerank } from './reranker.js';
12
15
  import { CODEBASE_CONTEXT_DIRNAME, INTELLIGENCE_FILENAME, KEYWORD_INDEX_FILENAME, VECTOR_DB_DIRNAME } from '../constants/codebase-context.js';
13
16
  const DEFAULT_SEARCH_OPTIONS = {
14
17
  useSemanticSearch: true,
15
18
  useKeywordSearch: true,
16
- semanticWeight: 0.7,
17
- keywordWeight: 0.3
19
+ // semanticWeight/keywordWeight intentionally omitted —
20
+ // intent classification provides per-query weights.
21
+ // Callers can still override by passing explicit values.
22
+ profile: 'explore',
23
+ enableQueryExpansion: true,
24
+ enableLowConfidenceRescue: true,
25
+ candidateFloor: 30,
26
+ enableReranker: true
18
27
  };
28
+ const QUERY_EXPANSION_HINTS = [
29
+ {
30
+ pattern: /\b(auth|authentication|login|signin|sign-in|session|token|oauth)\b/i,
31
+ terms: ['auth', 'login', 'token', 'session', 'guard', 'oauth']
32
+ },
33
+ {
34
+ pattern: /\b(route|routes|routing|router|navigate|navigation|redirect|path)\b/i,
35
+ terms: ['router', 'route', 'navigation', 'redirect', 'path']
36
+ },
37
+ {
38
+ pattern: /\b(config|configuration|configure|setup|register|provider|providers|bootstrap)\b/i,
39
+ terms: ['config', 'setup', 'register', 'provider', 'bootstrap']
40
+ },
41
+ {
42
+ pattern: /\b(role|roles|permission|permissions|authorization|authorisation|access)\b/i,
43
+ terms: ['roles', 'permissions', 'access', 'policy', 'guard']
44
+ },
45
+ {
46
+ pattern: /\b(interceptor|middleware|request|response|http)\b/i,
47
+ terms: ['interceptor', 'middleware', 'http', 'request', 'response']
48
+ },
49
+ {
50
+ pattern: /\b(theme|styles?|styling|palette|color|branding|upload)\b/i,
51
+ terms: ['theme', 'styles', 'palette', 'color', 'branding', 'upload']
52
+ }
53
+ ];
54
+ const QUERY_STOP_WORDS = new Set([
55
+ 'the',
56
+ 'a',
57
+ 'an',
58
+ 'to',
59
+ 'of',
60
+ 'for',
61
+ 'and',
62
+ 'or',
63
+ 'with',
64
+ 'in',
65
+ 'on',
66
+ 'by',
67
+ 'how',
68
+ 'are',
69
+ 'is',
70
+ 'after',
71
+ 'before'
72
+ ]);
19
73
  export class CodebaseSearcher {
20
74
  rootPath;
21
75
  storagePath;
@@ -24,8 +78,9 @@ export class CodebaseSearcher {
24
78
  embeddingProvider = null;
25
79
  storageProvider = null;
26
80
  initialized = false;
27
- // v1.2: Pattern intelligence for trend detection
81
+ // Pattern intelligence for trend detection
28
82
  patternIntelligence = null;
83
+ importCentrality = null;
29
84
  constructor(rootPath) {
30
85
  this.rootPath = rootPath;
31
86
  this.storagePath = path.join(rootPath, CODEBASE_CONTEXT_DIRNAME, VECTOR_DB_DIRNAME);
@@ -78,7 +133,7 @@ export class CodebaseSearcher {
78
133
  }
79
134
  }
80
135
  /**
81
- * v1.2: Load pattern intelligence for trend detection and warnings
136
+ * Load pattern intelligence for trend detection and warnings
82
137
  */
83
138
  async loadPatternIntelligence() {
84
139
  try {
@@ -112,14 +167,32 @@ export class CodebaseSearcher {
112
167
  }
113
168
  this.patternIntelligence = { decliningPatterns, risingPatterns, patternWarnings };
114
169
  console.error(`[search] Loaded pattern intelligence: ${decliningPatterns.size} declining, ${risingPatterns.size} rising patterns`);
170
+ this.importCentrality = new Map();
171
+ if (intelligence.internalFileGraph && intelligence.internalFileGraph.imports) {
172
+ // Count how many files import each file (in-degree centrality)
173
+ const importCounts = new Map();
174
+ for (const [_importingFile, importedFiles] of Object.entries(intelligence.internalFileGraph.imports)) {
175
+ const imports = importedFiles;
176
+ for (const imported of imports) {
177
+ importCounts.set(imported, (importCounts.get(imported) || 0) + 1);
178
+ }
179
+ }
180
+ // Normalize centrality to 0-1 range
181
+ const maxImports = Math.max(...Array.from(importCounts.values()), 1);
182
+ for (const [file, count] of importCounts) {
183
+ this.importCentrality.set(file, count / maxImports);
184
+ }
185
+ console.error(`[search] Computed import centrality for ${importCounts.size} files`);
186
+ }
115
187
  }
116
188
  catch (error) {
117
189
  console.warn('Pattern intelligence load failed (will proceed without trend detection):', error);
118
190
  this.patternIntelligence = null;
191
+ this.importCentrality = null;
119
192
  }
120
193
  }
121
194
  /**
122
- * v1.2: Detect pattern trend from chunk content
195
+ * Detect pattern trend from chunk content
123
196
  */
124
197
  detectChunkTrend(chunk) {
125
198
  if (!this.patternIntelligence) {
@@ -144,82 +217,281 @@ export class CodebaseSearcher {
144
217
  }
145
218
  return { trend: 'Stable' };
146
219
  }
147
- async search(query, limit = 5, filters, options = DEFAULT_SEARCH_OPTIONS) {
148
- if (!this.initialized) {
149
- await this.initialize();
220
+ isTestFile(filePath) {
221
+ const normalized = filePath.toLowerCase().replace(/\\/g, '/');
222
+ return (normalized.includes('.spec.') ||
223
+ normalized.includes('.test.') ||
224
+ normalized.includes('/e2e/') ||
225
+ normalized.includes('/__tests__/'));
226
+ }
227
+ normalizeQueryTerms(query) {
228
+ return query
229
+ .toLowerCase()
230
+ .split(/[^a-z0-9_]+/)
231
+ .filter((term) => term.length > 2 && !QUERY_STOP_WORDS.has(term));
232
+ }
233
+ /**
234
+ * Classify query intent based on heuristic patterns
235
+ */
236
+ classifyQueryIntent(query) {
237
+ const lowerQuery = query.toLowerCase();
238
+ // EXACT_NAME: Contains PascalCase or camelCase tokens (literal class/component names)
239
+ if (/[A-Z][a-z]+[A-Z]/.test(query) || /[a-z][A-Z]/.test(query)) {
240
+ return {
241
+ intent: 'EXACT_NAME',
242
+ weights: { semantic: 0.4, keyword: 0.6 } // Keyword search dominates for exact names
243
+ };
150
244
  }
151
- const { useSemanticSearch, useKeywordSearch, semanticWeight, keywordWeight } = {
152
- ...DEFAULT_SEARCH_OPTIONS,
153
- ...options
245
+ // CONFIG: Configuration/setup queries
246
+ const configKeywords = [
247
+ 'config',
248
+ 'setup',
249
+ 'routing',
250
+ 'providers',
251
+ 'configuration',
252
+ 'bootstrap'
253
+ ];
254
+ if (configKeywords.some((kw) => lowerQuery.includes(kw))) {
255
+ return {
256
+ intent: 'CONFIG',
257
+ weights: { semantic: 0.5, keyword: 0.5 } // Balanced
258
+ };
259
+ }
260
+ // WIRING: DI/registration queries
261
+ const wiringKeywords = [
262
+ 'provide',
263
+ 'inject',
264
+ 'dependency',
265
+ 'register',
266
+ 'wire',
267
+ 'bootstrap',
268
+ 'module'
269
+ ];
270
+ if (wiringKeywords.some((kw) => lowerQuery.includes(kw))) {
271
+ return {
272
+ intent: 'WIRING',
273
+ weights: { semantic: 0.5, keyword: 0.5 } // Balanced
274
+ };
275
+ }
276
+ // FLOW: Action/navigation queries
277
+ const flowVerbs = [
278
+ 'navigate',
279
+ 'redirect',
280
+ 'route',
281
+ 'handle',
282
+ 'process',
283
+ 'execute',
284
+ 'trigger',
285
+ 'dispatch'
286
+ ];
287
+ if (flowVerbs.some((verb) => lowerQuery.includes(verb))) {
288
+ return {
289
+ intent: 'FLOW',
290
+ weights: { semantic: 0.6, keyword: 0.4 } // Semantic helps with flow understanding
291
+ };
292
+ }
293
+ // CONCEPTUAL: Natural language without code tokens (default)
294
+ return {
295
+ intent: 'CONCEPTUAL',
296
+ weights: { semantic: 0.7, keyword: 0.3 } // Semantic dominates for concepts
154
297
  };
155
- const results = new Map();
156
- if (useSemanticSearch && this.embeddingProvider && this.storageProvider) {
157
- try {
158
- const vectorResults = await this.semanticSearch(query, limit * 2, filters);
159
- vectorResults.forEach((result) => {
160
- const id = result.chunk.id;
161
- const existing = results.get(id);
162
- if (existing) {
163
- existing.scores.push(result.score * (semanticWeight || 0.7));
164
- }
165
- else {
166
- results.set(id, {
167
- chunk: result.chunk,
168
- scores: [result.score * (semanticWeight || 0.7)]
169
- });
170
- }
171
- });
298
+ }
299
+ buildQueryVariants(query, maxExpansions) {
300
+ const variants = [{ query, weight: 1 }];
301
+ if (maxExpansions <= 0)
302
+ return variants;
303
+ const normalized = query.toLowerCase();
304
+ const terms = new Set(this.normalizeQueryTerms(query));
305
+ for (const hint of QUERY_EXPANSION_HINTS) {
306
+ if (!hint.pattern.test(query))
307
+ continue;
308
+ for (const term of hint.terms) {
309
+ if (!normalized.includes(term)) {
310
+ terms.add(term);
311
+ }
172
312
  }
173
- catch (error) {
174
- if (error instanceof IndexCorruptedError) {
175
- throw error; // Propagate to handler for auto-heal
313
+ }
314
+ const addedTerms = Array.from(terms).filter((term) => !normalized.includes(term));
315
+ if (addedTerms.length === 0)
316
+ return variants;
317
+ const firstExpansion = `${query} ${addedTerms.slice(0, 6).join(' ')}`.trim();
318
+ if (firstExpansion !== query) {
319
+ variants.push({ query: firstExpansion, weight: 0.35 });
320
+ }
321
+ if (maxExpansions > 1 && addedTerms.length > 6) {
322
+ const secondExpansion = `${query} ${addedTerms.slice(6, 12).join(' ')}`.trim();
323
+ if (secondExpansion !== query) {
324
+ variants.push({ query: secondExpansion, weight: 0.25 });
325
+ }
326
+ }
327
+ return variants.slice(0, 1 + maxExpansions);
328
+ }
329
+ isTemplateOrStyleFile(filePath) {
330
+ const ext = path.extname(filePath).toLowerCase();
331
+ return ['.html', '.scss', '.css', '.less', '.sass', '.styl'].includes(ext);
332
+ }
333
+ isCompositionRootFile(filePath) {
334
+ const normalized = filePath.toLowerCase().replace(/\\/g, '/');
335
+ const base = path.basename(normalized);
336
+ if (/^(main|index|bootstrap|startup)\./.test(base))
337
+ return true;
338
+ return (normalized.includes('/routes') ||
339
+ normalized.includes('/routing') ||
340
+ normalized.includes('/router') ||
341
+ normalized.includes('/config') ||
342
+ normalized.includes('/providers'));
343
+ }
344
+ queryPathTokenOverlap(filePath, query) {
345
+ const queryTerms = new Set(this.normalizeQueryTerms(query));
346
+ if (queryTerms.size === 0)
347
+ return 0;
348
+ const pathTerms = this.normalizeQueryTerms(filePath.replace(/\\/g, '/'));
349
+ return pathTerms.reduce((count, term) => (queryTerms.has(term) ? count + 1 : count), 0);
350
+ }
351
+ isLikelyWiringOrFlowQuery(query) {
352
+ return /\b(route|router|routing|navigate|navigation|redirect|auth|authentication|login|provider|register|config|configuration|interceptor|middleware)\b/i.test(query);
353
+ }
354
+ isActionOrHowQuery(query) {
355
+ return /\b(how|where|configure|configured|setup|register|wire|wiring|navigate|redirect|login|authenticate|copy|upload|handle|create|update|delete)\b/i.test(query);
356
+ }
357
+ isDefinitionHeavyResult(chunk) {
358
+ const normalizedPath = chunk.filePath.toLowerCase().replace(/\\/g, '/');
359
+ const componentType = (chunk.componentType || '').toLowerCase();
360
+ if (['type', 'interface', 'enum', 'constant'].includes(componentType))
361
+ return true;
362
+ return (normalizedPath.includes('/models/') ||
363
+ normalizedPath.includes('/interfaces/') ||
364
+ normalizedPath.includes('/types/') ||
365
+ normalizedPath.includes('/constants'));
366
+ }
367
+ scoreAndSortResults(query, limit, results, profile, intent, totalVariantWeight) {
368
+ const likelyWiringQuery = this.isLikelyWiringOrFlowQuery(query);
369
+ const actionQuery = this.isActionOrHowQuery(query);
370
+ // RRF: k=60 is the standard parameter (proven robust in Elasticsearch + TOSS paper arXiv:2208.11274)
371
+ const RRF_K = 60;
372
+ // Collect all unique chunks from both retrieval channels
373
+ const allChunks = new Map();
374
+ const rrfScores = new Map();
375
+ // Gather all chunks
376
+ for (const [id, entry] of results.semantic) {
377
+ allChunks.set(id, entry.chunk);
378
+ }
379
+ for (const [id, entry] of results.keyword) {
380
+ if (!allChunks.has(id)) {
381
+ allChunks.set(id, entry.chunk);
382
+ }
383
+ }
384
+ // Calculate RRF scores: RRF(d) = SUM(weight_i / (k + rank_i))
385
+ for (const [id] of allChunks) {
386
+ let rrfScore = 0;
387
+ // Add contributions from semantic ranks
388
+ const semanticEntry = results.semantic.get(id);
389
+ if (semanticEntry) {
390
+ for (const { rank, weight } of semanticEntry.ranks) {
391
+ rrfScore += weight / (RRF_K + rank);
176
392
  }
177
- console.warn('Semantic search failed:', error);
178
393
  }
394
+ // Add contributions from keyword ranks
395
+ const keywordEntry = results.keyword.get(id);
396
+ if (keywordEntry) {
397
+ for (const { rank, weight } of keywordEntry.ranks) {
398
+ rrfScore += weight / (RRF_K + rank);
399
+ }
400
+ }
401
+ rrfScores.set(id, rrfScore);
179
402
  }
180
- if (useKeywordSearch && this.fuseIndex) {
181
- try {
182
- const keywordResults = await this.keywordSearch(query, limit * 2, filters);
183
- keywordResults.forEach((result) => {
184
- const id = result.chunk.id;
185
- const existing = results.get(id);
186
- if (existing) {
187
- existing.scores.push(result.score * (keywordWeight || 0.3));
188
- }
189
- else {
190
- results.set(id, {
191
- chunk: result.chunk,
192
- scores: [result.score * (keywordWeight || 0.3)]
193
- });
194
- }
195
- });
403
+ // Normalize by theoretical maximum (rank-0 in every list), NOT by actual max.
404
+ // Using actual max makes top result always 1.0, breaking quality confidence gating.
405
+ const theoreticalMaxRrf = totalVariantWeight / (RRF_K + 0);
406
+ const maxRrfScore = Math.max(theoreticalMaxRrf, 0.01);
407
+ // Separate test files from implementation files before scoring
408
+ const isNonTestQuery = !isTestingRelatedQuery(query);
409
+ const implementationChunks = [];
410
+ const testChunks = [];
411
+ for (const [id, chunk] of allChunks.entries()) {
412
+ if (this.isTestFile(chunk.filePath)) {
413
+ testChunks.push([id, chunk]);
196
414
  }
197
- catch (error) {
198
- console.warn('Keyword search failed:', error);
415
+ else {
416
+ implementationChunks.push([id, chunk]);
199
417
  }
200
418
  }
201
- const combinedResults = Array.from(results.entries())
202
- .map(([_id, { chunk, scores }]) => {
203
- // Calculate base combined score
204
- let combinedScore = scores.reduce((sum, score) => sum + score, 0);
205
- // Normalize to 0-1 range (scores are already weighted)
206
- // If both semantic and keyword matched, max possible is ~1.0
207
- combinedScore = Math.min(1.0, combinedScore);
208
- // Boost scores for Angular components with proper detection
209
- if (chunk.componentType && chunk.framework === 'angular') {
210
- combinedScore = Math.min(1.0, combinedScore * 1.3);
419
+ // For non-test queries: filter test files from candidate pool, keep max 1 test file only if < 3 implementation matches
420
+ const chunksToScore = isNonTestQuery ? implementationChunks : Array.from(allChunks.entries());
421
+ const scoredResults = chunksToScore
422
+ .map(([id, chunk]) => {
423
+ // RRF score normalized to [0,1] range. Boosts below are unclamped
424
+ // to preserve score differentiation only relative ordering matters.
425
+ let combinedScore = rrfScores.get(id) / maxRrfScore;
426
+ // Slight boost when analyzer identified a concrete component type
427
+ if (chunk.componentType && chunk.componentType !== 'unknown') {
428
+ combinedScore *= 1.1;
211
429
  }
212
430
  // Boost if layer is detected
213
431
  if (chunk.layer && chunk.layer !== 'unknown') {
214
- combinedScore = Math.min(1.0, combinedScore * 1.1);
432
+ combinedScore *= 1.1;
433
+ }
434
+ if (actionQuery && this.isDefinitionHeavyResult(chunk)) {
435
+ combinedScore *= 0.82;
436
+ }
437
+ if (actionQuery &&
438
+ ['service', 'component', 'interceptor', 'guard', 'module', 'resolver'].includes((chunk.componentType || '').toLowerCase())) {
439
+ combinedScore *= 1.06;
215
440
  }
216
- // v1.2: Detect pattern trend and apply momentum boost
441
+ // Demote template/style files for behavioral queries they describe
442
+ // structure/presentation, not implementation logic.
443
+ if ((intent === 'FLOW' || intent === 'WIRING' || actionQuery) &&
444
+ this.isTemplateOrStyleFile(chunk.filePath)) {
445
+ combinedScore *= 0.75;
446
+ }
447
+ // Light intent-aware boost for likely wiring/configuration queries.
448
+ if (likelyWiringQuery && profile !== 'explore') {
449
+ if (this.isCompositionRootFile(chunk.filePath)) {
450
+ combinedScore *= 1.12;
451
+ }
452
+ }
453
+ if (intent === 'FLOW') {
454
+ // Boost service/guard/interceptor files for action/navigation queries
455
+ if (['service', 'guard', 'interceptor', 'middleware'].includes((chunk.componentType || '').toLowerCase())) {
456
+ combinedScore *= 1.15;
457
+ }
458
+ }
459
+ else if (intent === 'CONFIG') {
460
+ // Boost composition-root files for configuration queries
461
+ if (this.isCompositionRootFile(chunk.filePath)) {
462
+ combinedScore *= 1.2;
463
+ }
464
+ }
465
+ else if (intent === 'WIRING') {
466
+ // Boost DI/module files for wiring queries
467
+ if (['module', 'provider', 'config'].some((type) => (chunk.componentType || '').toLowerCase().includes(type))) {
468
+ combinedScore *= 1.18;
469
+ }
470
+ if (this.isCompositionRootFile(chunk.filePath)) {
471
+ combinedScore *= 1.22;
472
+ }
473
+ }
474
+ const pathOverlap = this.queryPathTokenOverlap(chunk.filePath, query);
475
+ if (pathOverlap >= 2) {
476
+ combinedScore *= 1.08;
477
+ }
478
+ if (this.importCentrality) {
479
+ const normalizedRoot = this.rootPath.replace(/\\/g, '/').replace(/\/?$/, '/');
480
+ const normalizedPath = chunk.filePath.replace(/\\/g, '/').replace(normalizedRoot, '');
481
+ const centrality = this.importCentrality.get(normalizedPath);
482
+ if (centrality !== undefined && centrality > 0.1) {
483
+ // Boost files with high centrality (many imports)
484
+ const centralityBoost = 1.0 + centrality * 0.15; // Up to +15% for max centrality
485
+ combinedScore *= centralityBoost;
486
+ }
487
+ }
488
+ // Detect pattern trend and apply momentum boost
217
489
  const { trend, warning } = this.detectChunkTrend(chunk);
218
490
  if (trend === 'Rising') {
219
- combinedScore = Math.min(1.0, combinedScore * 1.15); // +15% for modern patterns
491
+ combinedScore *= 1.15; // +15% for modern patterns
220
492
  }
221
493
  else if (trend === 'Declining') {
222
- combinedScore = combinedScore * 0.9; // -10% for legacy patterns
494
+ combinedScore *= 0.9; // -10% for legacy patterns
223
495
  }
224
496
  const summary = this.generateSummary(chunk);
225
497
  const snippet = this.generateSnippet(chunk.content);
@@ -236,14 +508,181 @@ export class CodebaseSearcher {
236
508
  componentType: chunk.componentType,
237
509
  layer: chunk.layer,
238
510
  metadata: chunk.metadata,
239
- // v1.2: Pattern momentum awareness
240
511
  trend,
241
512
  patternWarning: warning
242
513
  };
243
514
  })
244
- .sort((a, b) => b.score - a.score)
245
- .slice(0, limit);
246
- return combinedResults;
515
+ .sort((a, b) => b.score - a.score);
516
+ const seenFiles = new Set();
517
+ const deduped = [];
518
+ for (const result of scoredResults) {
519
+ const normalizedPath = result.filePath.toLowerCase().replace(/\\/g, '/');
520
+ if (seenFiles.has(normalizedPath))
521
+ continue;
522
+ seenFiles.add(normalizedPath);
523
+ deduped.push(result);
524
+ if (deduped.length >= limit)
525
+ break;
526
+ }
527
+ const finalResults = deduped;
528
+ if (isNonTestQuery &&
529
+ finalResults.length < 3 &&
530
+ finalResults.length < limit &&
531
+ testChunks.length > 0) {
532
+ // Find the highest-scoring test file
533
+ const bestTestChunk = testChunks
534
+ .map(([id, chunk]) => ({
535
+ id,
536
+ chunk,
537
+ score: rrfScores.get(id) / maxRrfScore
538
+ }))
539
+ .sort((a, b) => b.score - a.score)[0];
540
+ if (bestTestChunk) {
541
+ const { trend, warning } = this.detectChunkTrend(bestTestChunk.chunk);
542
+ const summary = this.generateSummary(bestTestChunk.chunk);
543
+ const snippet = this.generateSnippet(bestTestChunk.chunk.content);
544
+ finalResults.push({
545
+ summary,
546
+ snippet,
547
+ filePath: bestTestChunk.chunk.filePath,
548
+ startLine: bestTestChunk.chunk.startLine,
549
+ endLine: bestTestChunk.chunk.endLine,
550
+ score: bestTestChunk.score * 0.5, // Demote below implementation files
551
+ relevanceReason: this.generateRelevanceReason(bestTestChunk.chunk, query) + ' (test file)',
552
+ language: bestTestChunk.chunk.language,
553
+ framework: bestTestChunk.chunk.framework,
554
+ componentType: bestTestChunk.chunk.componentType,
555
+ layer: bestTestChunk.chunk.layer,
556
+ metadata: bestTestChunk.chunk.metadata,
557
+ trend,
558
+ patternWarning: warning
559
+ });
560
+ }
561
+ }
562
+ return finalResults;
563
+ }
564
+ pickBetterResultSet(query, primary, rescue) {
565
+ const primaryQuality = assessSearchQuality(query, primary);
566
+ const rescueQuality = assessSearchQuality(query, rescue);
567
+ if (rescueQuality.status === 'ok' &&
568
+ primaryQuality.status === 'low_confidence' &&
569
+ rescueQuality.confidence >= primaryQuality.confidence) {
570
+ return rescue;
571
+ }
572
+ if (rescueQuality.confidence >= primaryQuality.confidence + 0.05) {
573
+ return rescue;
574
+ }
575
+ return primary;
576
+ }
577
+ async collectHybridMatches(queryVariants, candidateLimit, filters, useSemanticSearch, useKeywordSearch, semanticWeight, keywordWeight) {
578
+ const semanticRanks = new Map();
579
+ const keywordRanks = new Map();
580
+ // RRF uses ranks instead of scores for fusion robustness
581
+ if (useSemanticSearch && this.embeddingProvider && this.storageProvider) {
582
+ try {
583
+ for (const variant of queryVariants) {
584
+ const vectorResults = await this.semanticSearch(variant.query, candidateLimit, filters);
585
+ // Assign ranks based on retrieval order (0-indexed)
586
+ vectorResults.forEach((result, index) => {
587
+ const id = result.chunk.id;
588
+ const rank = index; // 0-indexed rank
589
+ const weight = semanticWeight * variant.weight;
590
+ const existing = semanticRanks.get(id);
591
+ if (existing) {
592
+ existing.ranks.push({ rank, weight });
593
+ }
594
+ else {
595
+ semanticRanks.set(id, {
596
+ chunk: result.chunk,
597
+ ranks: [{ rank, weight }]
598
+ });
599
+ }
600
+ });
601
+ }
602
+ }
603
+ catch (error) {
604
+ if (error instanceof IndexCorruptedError) {
605
+ throw error; // Propagate to handler for auto-heal
606
+ }
607
+ console.warn('Semantic search failed:', error);
608
+ }
609
+ }
610
+ if (useKeywordSearch && this.fuseIndex) {
611
+ try {
612
+ for (const variant of queryVariants) {
613
+ const keywordResults = await this.keywordSearch(variant.query, candidateLimit, filters);
614
+ // Assign ranks based on retrieval order (0-indexed)
615
+ keywordResults.forEach((result, index) => {
616
+ const id = result.chunk.id;
617
+ const rank = index; // 0-indexed rank
618
+ const weight = keywordWeight * variant.weight;
619
+ const existing = keywordRanks.get(id);
620
+ if (existing) {
621
+ existing.ranks.push({ rank, weight });
622
+ }
623
+ else {
624
+ keywordRanks.set(id, {
625
+ chunk: result.chunk,
626
+ ranks: [{ rank, weight }]
627
+ });
628
+ }
629
+ });
630
+ }
631
+ }
632
+ catch (error) {
633
+ console.warn('Keyword search failed:', error);
634
+ }
635
+ }
636
+ return { semantic: semanticRanks, keyword: keywordRanks };
637
+ }
638
+ async search(query, limit = 5, filters, options = DEFAULT_SEARCH_OPTIONS) {
639
+ if (!this.initialized) {
640
+ await this.initialize();
641
+ }
642
+ const merged = {
643
+ ...DEFAULT_SEARCH_OPTIONS,
644
+ ...options
645
+ };
646
+ const { useSemanticSearch, useKeywordSearch, profile, enableQueryExpansion, enableLowConfidenceRescue, candidateFloor, enableReranker } = merged;
647
+ const { intent, weights: intentWeights } = this.classifyQueryIntent(query);
648
+ // Intent weights are the default; caller-supplied weights override them
649
+ const finalSemanticWeight = merged.semanticWeight ?? intentWeights.semantic;
650
+ const finalKeywordWeight = merged.keywordWeight ?? intentWeights.keyword;
651
+ const candidateLimit = Math.max(limit * 2, candidateFloor || 30);
652
+ const primaryVariants = this.buildQueryVariants(query, enableQueryExpansion ? 1 : 0);
653
+ const primaryMatches = await this.collectHybridMatches(primaryVariants, candidateLimit, filters, Boolean(useSemanticSearch), Boolean(useKeywordSearch), finalSemanticWeight, finalKeywordWeight);
654
+ const primaryTotalWeight = primaryVariants.reduce((sum, v) => sum + v.weight, 0) *
655
+ (finalSemanticWeight + finalKeywordWeight);
656
+ const primaryResults = this.scoreAndSortResults(query, limit, primaryMatches, (profile || 'explore'), intent, primaryTotalWeight);
657
+ let bestResults = primaryResults;
658
+ if (enableLowConfidenceRescue) {
659
+ const primaryQuality = assessSearchQuality(query, primaryResults);
660
+ if (primaryQuality.status === 'low_confidence') {
661
+ const rescueVariants = this.buildQueryVariants(query, 2).slice(1);
662
+ if (rescueVariants.length > 0) {
663
+ const rescueMatches = await this.collectHybridMatches(rescueVariants.map((variant, index) => ({
664
+ query: variant.query,
665
+ weight: index === 0 ? 1 : 0.8
666
+ })), candidateLimit, filters, Boolean(useSemanticSearch), Boolean(useKeywordSearch), finalSemanticWeight, finalKeywordWeight);
667
+ const rescueVariantWeights = rescueVariants.map((_, i) => (i === 0 ? 1 : 0.8));
668
+ const rescueTotalWeight = rescueVariantWeights.reduce((sum, w) => sum + w, 0) *
669
+ (finalSemanticWeight + finalKeywordWeight);
670
+ const rescueResults = this.scoreAndSortResults(query, limit, rescueMatches, (profile || 'explore'), intent, rescueTotalWeight);
671
+ bestResults = this.pickBetterResultSet(query, primaryResults, rescueResults);
672
+ }
673
+ }
674
+ }
675
+ // Stage-2: cross-encoder reranking when top scores are ambiguous
676
+ if (enableReranker) {
677
+ try {
678
+ bestResults = await rerank(query, bestResults);
679
+ }
680
+ catch (error) {
681
+ // Reranker is non-critical — log and return unranked results
682
+ console.warn('[reranker] Failed, returning original order:', error);
683
+ }
684
+ }
685
+ return bestResults;
247
686
  }
248
687
  generateSummary(chunk) {
249
688
  const analyzer = chunk.framework ? analyzerRegistry.get(chunk.framework) : null;
@@ -287,7 +726,7 @@ export class CodebaseSearcher {
287
726
  };
288
727
  return `${langMap[ext] || ext.toUpperCase()} in ${fileName}.`;
289
728
  }
290
- generateSnippet(content, maxLines = 100) {
729
+ generateSnippet(content, maxLines = 20) {
291
730
  const lines = content.split('\n');
292
731
  if (lines.length <= maxLines) {
293
732
  return content;