@renseiai/agentfactory-code-intelligence 0.8.8 → 0.8.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/src/embedding/__tests__/embedding.test.d.ts +2 -0
  2. package/dist/src/embedding/__tests__/embedding.test.d.ts.map +1 -0
  3. package/dist/src/embedding/__tests__/embedding.test.js +339 -0
  4. package/dist/src/embedding/chunker.d.ts +40 -0
  5. package/dist/src/embedding/chunker.d.ts.map +1 -0
  6. package/dist/src/embedding/chunker.js +135 -0
  7. package/dist/src/embedding/embedding-provider.d.ts +15 -0
  8. package/dist/src/embedding/embedding-provider.d.ts.map +1 -0
  9. package/dist/src/embedding/embedding-provider.js +1 -0
  10. package/dist/src/embedding/voyage-provider.d.ts +39 -0
  11. package/dist/src/embedding/voyage-provider.d.ts.map +1 -0
  12. package/dist/src/embedding/voyage-provider.js +146 -0
  13. package/dist/src/index.d.ts +14 -2
  14. package/dist/src/index.d.ts.map +1 -1
  15. package/dist/src/index.js +10 -1
  16. package/dist/src/indexing/__tests__/vector-indexing.test.d.ts +2 -0
  17. package/dist/src/indexing/__tests__/vector-indexing.test.d.ts.map +1 -0
  18. package/dist/src/indexing/__tests__/vector-indexing.test.js +291 -0
  19. package/dist/src/indexing/incremental-indexer.d.ts +4 -0
  20. package/dist/src/indexing/incremental-indexer.d.ts.map +1 -1
  21. package/dist/src/indexing/incremental-indexer.js +45 -0
  22. package/dist/src/indexing/vector-indexer.d.ts +63 -0
  23. package/dist/src/indexing/vector-indexer.d.ts.map +1 -0
  24. package/dist/src/indexing/vector-indexer.js +197 -0
  25. package/dist/src/plugin/code-intelligence-plugin.d.ts.map +1 -1
  26. package/dist/src/plugin/code-intelligence-plugin.js +4 -2
  27. package/dist/src/reranking/__tests__/reranker.test.d.ts +2 -0
  28. package/dist/src/reranking/__tests__/reranker.test.d.ts.map +1 -0
  29. package/dist/src/reranking/__tests__/reranker.test.js +503 -0
  30. package/dist/src/reranking/cohere-reranker.d.ts +26 -0
  31. package/dist/src/reranking/cohere-reranker.d.ts.map +1 -0
  32. package/dist/src/reranking/cohere-reranker.js +110 -0
  33. package/dist/src/reranking/reranker-provider.d.ts +40 -0
  34. package/dist/src/reranking/reranker-provider.d.ts.map +1 -0
  35. package/dist/src/reranking/reranker-provider.js +6 -0
  36. package/dist/src/reranking/voyage-reranker.d.ts +27 -0
  37. package/dist/src/reranking/voyage-reranker.d.ts.map +1 -0
  38. package/dist/src/reranking/voyage-reranker.js +111 -0
  39. package/dist/src/search/__tests__/hybrid-search.test.d.ts +2 -0
  40. package/dist/src/search/__tests__/hybrid-search.test.d.ts.map +1 -0
  41. package/dist/src/search/__tests__/hybrid-search.test.js +437 -0
  42. package/dist/src/search/__tests__/query-classifier.test.d.ts +2 -0
  43. package/dist/src/search/__tests__/query-classifier.test.d.ts.map +1 -0
  44. package/dist/src/search/__tests__/query-classifier.test.js +136 -0
  45. package/dist/src/search/hybrid-search.d.ts +56 -0
  46. package/dist/src/search/hybrid-search.d.ts.map +1 -0
  47. package/dist/src/search/hybrid-search.js +299 -0
  48. package/dist/src/search/query-classifier.d.ts +20 -0
  49. package/dist/src/search/query-classifier.d.ts.map +1 -0
  50. package/dist/src/search/query-classifier.js +58 -0
  51. package/dist/src/search/score-normalizer.d.ts +16 -0
  52. package/dist/src/search/score-normalizer.d.ts.map +1 -0
  53. package/dist/src/search/score-normalizer.js +26 -0
  54. package/dist/src/types.d.ts +83 -0
  55. package/dist/src/types.d.ts.map +1 -1
  56. package/dist/src/types.js +36 -2
  57. package/dist/src/vector/__tests__/vector-store.test.d.ts +2 -0
  58. package/dist/src/vector/__tests__/vector-store.test.d.ts.map +1 -0
  59. package/dist/src/vector/__tests__/vector-store.test.js +278 -0
  60. package/dist/src/vector/hnsw-store.d.ts +48 -0
  61. package/dist/src/vector/hnsw-store.d.ts.map +1 -0
  62. package/dist/src/vector/hnsw-store.js +437 -0
  63. package/dist/src/vector/vector-store.d.ts +15 -0
  64. package/dist/src/vector/vector-store.d.ts.map +1 -0
  65. package/dist/src/vector/vector-store.js +1 -0
  66. package/package.json +1 -1
@@ -0,0 +1,136 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { classifyQuery } from '../query-classifier.js';
3
+ describe('classifyQuery', () => {
4
+ // ── Identifier detection ──────────────────────────────────────────
5
+ describe('camelCase detection', () => {
6
+ it('classifies camelCase as identifier', () => {
7
+ const result = classifyQuery('handleRequest');
8
+ expect(result.type).toBe('identifier');
9
+ expect(result.alpha).toBe(0.25);
10
+ });
11
+ it('classifies multi-word camelCase as identifier', () => {
12
+ const result = classifyQuery('getUserById');
13
+ expect(result.type).toBe('identifier');
14
+ expect(result.alpha).toBe(0.25);
15
+ });
16
+ });
17
+ describe('snake_case detection', () => {
18
+ it('classifies snake_case as identifier', () => {
19
+ const result = classifyQuery('get_user_by_id');
20
+ expect(result.type).toBe('identifier');
21
+ expect(result.alpha).toBe(0.25);
22
+ });
23
+ it('classifies double snake_case as identifier', () => {
24
+ const result = classifyQuery('handle_http_request');
25
+ expect(result.type).toBe('identifier');
26
+ expect(result.alpha).toBe(0.25);
27
+ });
28
+ });
29
+ describe('PascalCase detection', () => {
30
+ it('classifies PascalCase as identifier', () => {
31
+ const result = classifyQuery('UserService');
32
+ expect(result.type).toBe('identifier');
33
+ expect(result.alpha).toBe(0.25);
34
+ });
35
+ it('classifies multi-word PascalCase as identifier', () => {
36
+ const result = classifyQuery('HttpRequestHandler');
37
+ expect(result.type).toBe('identifier');
38
+ expect(result.alpha).toBe(0.25);
39
+ });
40
+ });
41
+ describe('CONSTANT_CASE detection', () => {
42
+ it('classifies CONSTANT_CASE as identifier', () => {
43
+ const result = classifyQuery('MAX_RETRIES');
44
+ expect(result.type).toBe('identifier');
45
+ expect(result.alpha).toBe(0.25);
46
+ });
47
+ it('classifies single CONSTANT as identifier', () => {
48
+ const result = classifyQuery('HTTP_TIMEOUT');
49
+ expect(result.type).toBe('identifier');
50
+ expect(result.alpha).toBe(0.25);
51
+ });
52
+ });
53
+ describe('dot.notation detection', () => {
54
+ it('classifies dot notation as identifier', () => {
55
+ const result = classifyQuery('req.body');
56
+ expect(result.type).toBe('identifier');
57
+ expect(result.alpha).toBe(0.25);
58
+ });
59
+ it('classifies chained dot notation as identifier', () => {
60
+ const result = classifyQuery('this.service.getUser');
61
+ expect(result.type).toBe('identifier');
62
+ expect(result.alpha).toBe(0.25);
63
+ });
64
+ });
65
+ describe('operator tokens', () => {
66
+ it('classifies :: operator token as identifier', () => {
67
+ const result = classifyQuery('std::vector');
68
+ expect(result.type).toBe('identifier');
69
+ expect(result.alpha).toBe(0.25);
70
+ });
71
+ it('classifies -> operator token as identifier', () => {
72
+ const result = classifyQuery('node->next');
73
+ expect(result.type).toBe('identifier');
74
+ expect(result.alpha).toBe(0.25);
75
+ });
76
+ });
77
+ // ── Natural language detection ────────────────────────────────────
78
+ describe('natural language queries', () => {
79
+ it('classifies plain English as natural', () => {
80
+ const result = classifyQuery('how to handle errors');
81
+ expect(result.type).toBe('natural');
82
+ expect(result.alpha).toBe(0.75);
83
+ });
84
+ it('classifies question-style query as natural', () => {
85
+ const result = classifyQuery('authentication middleware for express');
86
+ expect(result.type).toBe('natural');
87
+ expect(result.alpha).toBe(0.75);
88
+ });
89
+ it('classifies descriptive query as natural', () => {
90
+ const result = classifyQuery('database connection pooling strategy');
91
+ expect(result.type).toBe('natural');
92
+ expect(result.alpha).toBe(0.75);
93
+ });
94
+ it('classifies short natural query as natural', () => {
95
+ const result = classifyQuery('error handling');
96
+ expect(result.type).toBe('natural');
97
+ expect(result.alpha).toBe(0.75);
98
+ });
99
+ });
100
+ // ── Mixed queries ─────────────────────────────────────────────────
101
+ describe('mixed queries', () => {
102
+ it('classifies query with one identifier in natural context as mixed', () => {
103
+ const result = classifyQuery('fix handleRequest error');
104
+ expect(result.type).toBe('mixed');
105
+ expect(result.alpha).toBe(0.55);
106
+ });
107
+ it('classifies query with identifier and natural words as mixed', () => {
108
+ const result = classifyQuery('where is UserService defined');
109
+ expect(result.type).toBe('mixed');
110
+ expect(result.alpha).toBe(0.55);
111
+ });
112
+ it('classifies query mixing snake_case with natural as mixed', () => {
113
+ const result = classifyQuery('update get_user_by_id function');
114
+ expect(result.type).toBe('mixed');
115
+ expect(result.alpha).toBe(0.55);
116
+ });
117
+ });
118
+ // ── Edge cases ────────────────────────────────────────────────────
119
+ describe('edge cases', () => {
120
+ it('handles empty query', () => {
121
+ const result = classifyQuery('');
122
+ expect(result.type).toBe('natural');
123
+ expect(result.alpha).toBe(0.75);
124
+ });
125
+ it('handles single word that is not an identifier', () => {
126
+ const result = classifyQuery('search');
127
+ expect(result.type).toBe('natural');
128
+ expect(result.alpha).toBe(0.75);
129
+ });
130
+ it('handles multiple identifiers', () => {
131
+ const result = classifyQuery('handleRequest processData getUserById');
132
+ expect(result.type).toBe('identifier');
133
+ expect(result.alpha).toBe(0.25);
134
+ });
135
+ });
136
+ });
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Hybrid search engine combining BM25 lexical search with dense vector semantic search.
3
+ * Uses Convex Combination Score (CCS) fusion with query-adaptive alpha weighting
4
+ * and Reciprocal Rank Fusion (RRF) as a fallback.
5
+ */
6
+ import type { SearchQuery, SearchResult } from '../types.js';
7
+ import type { VectorStore } from '../vector/vector-store.js';
8
+ import type { EmbeddingProvider } from '../embedding/embedding-provider.js';
9
+ import type { SearchEngine } from './search-engine.js';
10
+ import type { RerankerConfig } from '../reranking/reranker-provider.js';
11
+ export interface HybridSearchConfig {
12
+ /** Weight for vector scores in CCS (0 = BM25-only, 1 = vector-only). Default: 0.45 */
13
+ alpha: number;
14
+ /** Enable query-adaptive alpha selection. Default: true */
15
+ adaptiveAlpha: boolean;
16
+ /** Number of BM25 candidates to retrieve. Default: 100 */
17
+ bm25TopK: number;
18
+ /** Number of vector candidates to retrieve. Default: 100 */
19
+ vectorTopK: number;
20
+ /** Fusion method. Default: 'ccs' */
21
+ fusionMethod: 'ccs' | 'rrf';
22
+ /** RRF constant k. Default: 60 */
23
+ rrfK: number;
24
+ }
25
+ export declare class HybridSearchEngine {
26
+ private bm25Engine;
27
+ private vectorStore;
28
+ private embeddingProvider;
29
+ private config;
30
+ private rerankerConfig;
31
+ constructor(bm25Engine: SearchEngine, vectorStore: VectorStore | null, embeddingProvider: EmbeddingProvider | null, config?: Partial<HybridSearchConfig>, rerankerConfig?: RerankerConfig | null);
32
+ /** Run hybrid search combining BM25 and vector retrieval. */
33
+ search(query: SearchQuery): Promise<SearchResult[]>;
34
+ private hybridFusion;
35
+ /**
36
+ * Convex Combination Score fusion.
37
+ * score(d) = alpha * normalized_vector(d) + (1 - alpha) * normalized_bm25(d)
38
+ */
39
+ private ccsFusion;
40
+ /**
41
+ * Reciprocal Rank Fusion.
42
+ * rrf_score(d) = sum(1 / (k + rank_i(d))) for each ranking
43
+ */
44
+ private rrfFusion;
45
+ /**
46
+ * Apply cross-encoder reranking to search results.
47
+ * Returns results unchanged if reranker is not configured, disabled, or errors.
48
+ */
49
+ private applyReranking;
50
+ /** Build text for reranking from a search result's symbol metadata. */
51
+ private buildRerankText;
52
+ /** Create a document key for matching BM25 results to vector results. */
53
+ private makeDocKey;
54
+ private matchPattern;
55
+ }
56
+ //# sourceMappingURL=hybrid-search.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hybrid-search.d.ts","sourceRoot":"","sources":["../../../src/search/hybrid-search.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAA;AAC5D,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oCAAoC,CAAA;AAC3E,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACtD,OAAO,KAAK,EAAE,cAAc,EAAkB,MAAM,mCAAmC,CAAA;AAMvF,MAAM,WAAW,kBAAkB;IACjC,sFAAsF;IACtF,KAAK,EAAE,MAAM,CAAA;IACb,2DAA2D;IAC3D,aAAa,EAAE,OAAO,CAAA;IACtB,0DAA0D;IAC1D,QAAQ,EAAE,MAAM,CAAA;IAChB,4DAA4D;IAC5D,UAAU,EAAE,MAAM,CAAA;IAClB,oCAAoC;IACpC,YAAY,EAAE,KAAK,GAAG,KAAK,CAAA;IAC3B,kCAAkC;IAClC,IAAI,EAAE,MAAM,CAAA;CACb;AA0BD,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,UAAU,CAAc;IAChC,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,iBAAiB,CAA0B;IACnD,OAAO,CAAC,MAAM,CAAoB;IAClC,OAAO,CAAC,cAAc,CAAuB;gBAG3C,UAAU,EAAE,YAAY,EACxB,WAAW,EAAE,WAAW,GAAG,IAAI,EAC/B,iBAAiB,EAAE,iBAAiB,GAAG,IAAI,EAC3C,MAAM,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACpC,cAAc,CAAC,EAAE,cAAc,GAAG,IAAI;IASxC,6DAA6D;IACvD,MAAM,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;YA2B3C,YAAY;IA6F1B;;;OAGG;IACH,OAAO,CAAC,SAAS;IAuEjB;;;OAGG;IACH,OAAO,CAAC,SAAS;IA2BjB;;;OAGG;YACW,cAAc;IAkD5B,uEAAuE;IACvE,OAAO,CAAC,eAAe;IAmBvB,yEAAyE;IACzE,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,YAAY;CAarB"}
@@ -0,0 +1,299 @@
1
+ /**
2
+ * Hybrid search engine combining BM25 lexical search with dense vector semantic search.
3
+ * Uses Convex Combination Score (CCS) fusion with query-adaptive alpha weighting
4
+ * and Reciprocal Rank Fusion (RRF) as a fallback.
5
+ */
6
+ import { minMaxNormalize } from './score-normalizer.js';
7
+ import { classifyQuery } from './query-classifier.js';
8
+ const DEFAULT_CONFIG = {
9
+ alpha: 0.45,
10
+ adaptiveAlpha: true,
11
+ bm25TopK: 100,
12
+ vectorTopK: 100,
13
+ fusionMethod: 'ccs',
14
+ rrfK: 60,
15
+ };
16
+ // ── Hybrid Search Engine ─────────────────────────────────────────────
17
+ export class HybridSearchEngine {
18
+ bm25Engine;
19
+ vectorStore;
20
+ embeddingProvider;
21
+ config;
22
+ rerankerConfig;
23
+ constructor(bm25Engine, vectorStore, embeddingProvider, config, rerankerConfig) {
24
+ this.bm25Engine = bm25Engine;
25
+ this.vectorStore = vectorStore;
26
+ this.embeddingProvider = embeddingProvider;
27
+ this.config = { ...DEFAULT_CONFIG, ...config };
28
+ this.rerankerConfig = rerankerConfig ?? null;
29
+ }
30
+ /** Run hybrid search combining BM25 and vector retrieval. */
31
+ async search(query) {
32
+ // Step 1: Get BM25 results (always available)
33
+ const bm25Query = {
34
+ ...query,
35
+ maxResults: this.config.bm25TopK,
36
+ };
37
+ const bm25Results = this.bm25Engine.search(bm25Query);
38
+ let results;
39
+ // Step 2: If vector store and embedding provider are available, do hybrid fusion
40
+ if (this.vectorStore && this.embeddingProvider && this.vectorStore.size() > 0) {
41
+ results = await this.hybridFusion(query, bm25Results);
42
+ }
43
+ else {
44
+ // Step 3: Fallback to BM25-only
45
+ results = bm25Results;
46
+ if (query.maxResults) {
47
+ results = results.slice(0, query.maxResults);
48
+ }
49
+ }
50
+ // Step 4: Apply reranking if configured and enabled
51
+ results = await this.applyReranking(query.query, results);
52
+ return results;
53
+ }
54
+ async hybridFusion(query, bm25Results) {
55
+ // Embed the query
56
+ const queryVector = await this.embeddingProvider.embedQuery(query.query);
57
+ // Search vector store
58
+ const vectorResults = await this.vectorStore.search(queryVector, this.config.vectorTopK);
59
+ // Build candidate map keyed by docKey
60
+ const candidates = new Map();
61
+ // Add BM25 results
62
+ for (let i = 0; i < bm25Results.length; i++) {
63
+ const r = bm25Results[i];
64
+ const key = this.makeDocKey(r.symbol.filePath, r.symbol.name, r.symbol.line);
65
+ candidates.set(key, {
66
+ bm25Score: r.score,
67
+ vectorScore: undefined,
68
+ bm25Rank: i + 1,
69
+ vectorRank: undefined,
70
+ result: r,
71
+ });
72
+ }
73
+ // Add/merge vector results
74
+ for (let i = 0; i < vectorResults.length; i++) {
75
+ const vr = vectorResults[i];
76
+ const meta = vr.chunk.metadata;
77
+ const key = this.makeDocKey(meta.filePath, meta.symbolName ?? '', meta.startLine);
78
+ const existing = candidates.get(key);
79
+ if (existing) {
80
+ existing.vectorScore = vr.score;
81
+ existing.vectorRank = i + 1;
82
+ }
83
+ else {
84
+ // Vector-only result — create a SearchResult from the chunk metadata
85
+ candidates.set(key, {
86
+ bm25Score: undefined,
87
+ vectorScore: vr.score,
88
+ bm25Rank: undefined,
89
+ vectorRank: i + 1,
90
+ result: {
91
+ symbol: {
92
+ name: meta.symbolName ?? '',
93
+ kind: meta.symbolKind ?? 'function',
94
+ filePath: meta.filePath,
95
+ line: meta.startLine,
96
+ endLine: meta.endLine,
97
+ language: meta.language,
98
+ exported: false,
99
+ },
100
+ score: 0, // Will be set by fusion
101
+ matchType: 'semantic',
102
+ },
103
+ });
104
+ }
105
+ }
106
+ // Determine alpha
107
+ const alpha = this.config.adaptiveAlpha
108
+ ? classifyQuery(query.query).alpha
109
+ : this.config.alpha;
110
+ // Fuse scores
111
+ const fused = this.config.fusionMethod === 'ccs'
112
+ ? this.ccsFusion(candidates, alpha)
113
+ : this.rrfFusion(candidates);
114
+ // Apply filters
115
+ let results = fused.filter(r => {
116
+ if (query.symbolKinds && !query.symbolKinds.includes(r.symbol.kind))
117
+ return false;
118
+ if (query.language && r.symbol.language !== query.language)
119
+ return false;
120
+ if (query.filePattern && !this.matchPattern(r.symbol.filePath, query.filePattern))
121
+ return false;
122
+ return true;
123
+ });
124
+ // Sort by fused score descending
125
+ results.sort((a, b) => b.score - a.score);
126
+ // Limit results
127
+ if (query.maxResults) {
128
+ results = results.slice(0, query.maxResults);
129
+ }
130
+ return results;
131
+ }
132
+ /**
133
+ * Convex Combination Score fusion.
134
+ * score(d) = alpha * normalized_vector(d) + (1 - alpha) * normalized_bm25(d)
135
+ */
136
+ ccsFusion(candidates, alpha) {
137
+ // Collect raw scores for normalization
138
+ const bm25Scores = [];
139
+ const vectorScores = [];
140
+ for (const c of candidates.values()) {
141
+ if (c.bm25Score !== undefined)
142
+ bm25Scores.push(c.bm25Score);
143
+ if (c.vectorScore !== undefined)
144
+ vectorScores.push(c.vectorScore);
145
+ }
146
+ // Normalize
147
+ const bm25Normalized = minMaxNormalize(bm25Scores);
148
+ const vectorNormalized = minMaxNormalize(vectorScores);
149
+ // Build a map from raw score to normalized score
150
+ let bm25Idx = 0;
151
+ let vectorIdx = 0;
152
+ const bm25NormMap = new Map();
153
+ const vectorNormMap = new Map();
154
+ // Since multiple candidates can have the same raw score, use arrays
155
+ for (const c of candidates.values()) {
156
+ if (c.bm25Score !== undefined) {
157
+ if (!bm25NormMap.has(c.bm25Score))
158
+ bm25NormMap.set(c.bm25Score, []);
159
+ bm25NormMap.get(c.bm25Score).push(bm25Normalized[bm25Idx++]);
160
+ }
161
+ if (c.vectorScore !== undefined) {
162
+ if (!vectorNormMap.has(c.vectorScore))
163
+ vectorNormMap.set(c.vectorScore, []);
164
+ vectorNormMap.get(c.vectorScore).push(vectorNormalized[vectorIdx++]);
165
+ }
166
+ }
167
+ // Reset counters for consumption
168
+ const bm25NormCounters = new Map();
169
+ const vectorNormCounters = new Map();
170
+ const results = [];
171
+ // Re-iterate to assign normalized scores in order
172
+ bm25Idx = 0;
173
+ vectorIdx = 0;
174
+ for (const c of candidates.values()) {
175
+ let normBm25 = 0;
176
+ let normVector = 0;
177
+ if (c.bm25Score !== undefined) {
178
+ normBm25 = bm25Normalized[bm25Idx++];
179
+ }
180
+ if (c.vectorScore !== undefined) {
181
+ normVector = vectorNormalized[vectorIdx++];
182
+ }
183
+ const fusedScore = alpha * normVector + (1 - alpha) * normBm25;
184
+ const hasBoth = c.bm25Score !== undefined && c.vectorScore !== undefined;
185
+ results.push({
186
+ ...c.result,
187
+ score: fusedScore,
188
+ matchType: hasBoth ? 'hybrid' : (c.vectorScore !== undefined ? 'semantic' : c.result.matchType),
189
+ bm25Score: c.bm25Score,
190
+ vectorScore: c.vectorScore,
191
+ });
192
+ }
193
+ return results;
194
+ }
195
+ /**
196
+ * Reciprocal Rank Fusion.
197
+ * rrf_score(d) = sum(1 / (k + rank_i(d))) for each ranking
198
+ */
199
+ rrfFusion(candidates) {
200
+ const k = this.config.rrfK;
201
+ const results = [];
202
+ for (const c of candidates.values()) {
203
+ let rrfScore = 0;
204
+ if (c.bm25Rank !== undefined) {
205
+ rrfScore += 1 / (k + c.bm25Rank);
206
+ }
207
+ if (c.vectorRank !== undefined) {
208
+ rrfScore += 1 / (k + c.vectorRank);
209
+ }
210
+ const hasBoth = c.bm25Rank !== undefined && c.vectorRank !== undefined;
211
+ results.push({
212
+ ...c.result,
213
+ score: rrfScore,
214
+ matchType: hasBoth ? 'hybrid' : (c.vectorRank !== undefined ? 'semantic' : c.result.matchType),
215
+ bm25Score: c.bm25Score,
216
+ vectorScore: c.vectorScore,
217
+ });
218
+ }
219
+ return results;
220
+ }
221
+ /**
222
+ * Apply cross-encoder reranking to search results.
223
+ * Returns results unchanged if reranker is not configured, disabled, or errors.
224
+ */
225
+ async applyReranking(query, results) {
226
+ if (!this.rerankerConfig || !this.rerankerConfig.enabled) {
227
+ return results;
228
+ }
229
+ const { provider, topN = 10, candidatePool = 50 } = this.rerankerConfig;
230
+ // Take the top candidatePool results for reranking
231
+ const candidates = results.slice(0, candidatePool);
232
+ if (candidates.length === 0)
233
+ return results;
234
+ // Build rerank documents from search results
235
+ const documents = candidates.map((r, i) => ({
236
+ id: `${i}`,
237
+ text: this.buildRerankText(r),
238
+ }));
239
+ try {
240
+ const rerankResults = await provider.rerank(query, documents);
241
+ // Build a map from index to rerank score
242
+ const scoreMap = new Map();
243
+ for (const rr of rerankResults) {
244
+ scoreMap.set(rr.index, rr.score);
245
+ }
246
+ // Update candidate results with rerank scores
247
+ const reranked = [];
248
+ for (let i = 0; i < candidates.length; i++) {
249
+ const rerankScore = scoreMap.get(i);
250
+ if (rerankScore !== undefined) {
251
+ reranked.push({
252
+ ...candidates[i],
253
+ score: rerankScore,
254
+ rerankScore,
255
+ });
256
+ }
257
+ }
258
+ // Sort by reranker score descending
259
+ reranked.sort((a, b) => b.score - a.score);
260
+ // Return top N
261
+ return reranked.slice(0, topN);
262
+ }
263
+ catch {
264
+ // Graceful fallback: return original results if reranker errors
265
+ return results;
266
+ }
267
+ }
268
+ /** Build text for reranking from a search result's symbol metadata. */
269
+ buildRerankText(result) {
270
+ const parts = [];
271
+ const { symbol } = result;
272
+ if (symbol.signature) {
273
+ parts.push(symbol.signature);
274
+ }
275
+ if (symbol.documentation) {
276
+ parts.push(symbol.documentation);
277
+ }
278
+ // Always include name and kind for context
279
+ parts.push(`${symbol.kind} ${symbol.name}`);
280
+ return parts.join('\n');
281
+ }
282
+ /** Create a document key for matching BM25 results to vector results. */
283
+ makeDocKey(filePath, symbolName, startLine) {
284
+ return `${filePath}:${symbolName}:${startLine}`;
285
+ }
286
+ matchPattern(filePath, pattern) {
287
+ if (pattern.startsWith('*')) {
288
+ return filePath.endsWith(pattern.slice(1));
289
+ }
290
+ if (pattern.endsWith('/**')) {
291
+ return filePath.startsWith(pattern.slice(0, -3));
292
+ }
293
+ if (pattern.endsWith('/*')) {
294
+ const dir = pattern.slice(0, -2);
295
+ return filePath.startsWith(dir) && !filePath.slice(dir.length + 1).includes('/');
296
+ }
297
+ return filePath.includes(pattern);
298
+ }
299
+ }
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Query classifier for adaptive alpha weighting in hybrid search.
3
+ * Detects whether a query is identifier-heavy, natural language, or mixed
4
+ * and returns a recommended alpha value.
5
+ */
6
+ export type QueryType = 'identifier' | 'natural' | 'mixed';
7
+ export interface QueryClassification {
8
+ type: QueryType;
9
+ alpha: number;
10
+ }
11
+ /**
12
+ * Classify a search query and return the recommended alpha value
13
+ * for CCS fusion.
14
+ *
15
+ * - identifier-heavy queries (camelCase, snake_case, etc.): alpha = 0.25 (favor BM25)
16
+ * - natural language queries ("authentication middleware"): alpha = 0.75 (favor vectors)
17
+ * - mixed queries ("fix CORS error in Express"): alpha = 0.55 (balanced)
18
+ */
19
+ export declare function classifyQuery(query: string): QueryClassification;
20
+ //# sourceMappingURL=query-classifier.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"query-classifier.d.ts","sourceRoot":"","sources":["../../../src/search/query-classifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,MAAM,MAAM,SAAS,GAAG,YAAY,GAAG,SAAS,GAAG,OAAO,CAAA;AAE1D,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,SAAS,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;CACd;AAuBD;;;;;;;GAOG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,mBAAmB,CAuBhE"}
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Query classifier for adaptive alpha weighting in hybrid search.
3
+ * Detects whether a query is identifier-heavy, natural language, or mixed
4
+ * and returns a recommended alpha value.
5
+ */
6
+ // Patterns that indicate code identifiers
7
+ const CAMEL_CASE = /[a-z][a-zA-Z]*[A-Z]/;
8
+ const PASCAL_CASE = /^[A-Z][a-zA-Z]+[A-Z]/;
9
+ const SNAKE_CASE = /\w+_\w+/;
10
+ const CONSTANT_CASE = /^[A-Z][A-Z0-9_]+$/;
11
+ const DOT_NOTATION = /\w+\.\w+/;
12
+ const OPERATOR_TOKENS = /(::|->|=>|#)/;
13
+ /**
14
+ * Check if a token looks like a code identifier.
15
+ */
16
+ function isIdentifierToken(token) {
17
+ if (CAMEL_CASE.test(token))
18
+ return true;
19
+ if (PASCAL_CASE.test(token))
20
+ return true;
21
+ if (SNAKE_CASE.test(token))
22
+ return true;
23
+ if (CONSTANT_CASE.test(token))
24
+ return true;
25
+ if (DOT_NOTATION.test(token))
26
+ return true;
27
+ if (OPERATOR_TOKENS.test(token))
28
+ return true;
29
+ return false;
30
+ }
31
+ /**
32
+ * Classify a search query and return the recommended alpha value
33
+ * for CCS fusion.
34
+ *
35
+ * - identifier-heavy queries (camelCase, snake_case, etc.): alpha = 0.25 (favor BM25)
36
+ * - natural language queries ("authentication middleware"): alpha = 0.75 (favor vectors)
37
+ * - mixed queries ("fix CORS error in Express"): alpha = 0.55 (balanced)
38
+ */
39
+ export function classifyQuery(query) {
40
+ const tokens = query.split(/\s+/).filter(t => t.length > 0);
41
+ if (tokens.length === 0) {
42
+ return { type: 'natural', alpha: 0.75 };
43
+ }
44
+ let identifierCount = 0;
45
+ for (const token of tokens) {
46
+ if (isIdentifierToken(token)) {
47
+ identifierCount++;
48
+ }
49
+ }
50
+ const ratio = identifierCount / tokens.length;
51
+ if (ratio > 0.5) {
52
+ return { type: 'identifier', alpha: 0.25 };
53
+ }
54
+ if (ratio < 0.2) {
55
+ return { type: 'natural', alpha: 0.75 };
56
+ }
57
+ return { type: 'mixed', alpha: 0.55 };
58
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Min-max score normalization for hybrid search fusion.
3
+ * Normalizes scores to [0, 1] range for CCS combination.
4
+ */
5
+ /**
6
+ * Apply min-max normalization to an array of scores.
7
+ *
8
+ * normalized = (score - min) / (max - min)
9
+ *
10
+ * Edge cases:
11
+ * - Empty array → []
12
+ * - Single result → [1.0]
13
+ * - All same scores → all 1.0
14
+ */
15
+ export declare function minMaxNormalize(scores: number[]): number[];
16
+ //# sourceMappingURL=score-normalizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"score-normalizer.d.ts","sourceRoot":"","sources":["../../../src/search/score-normalizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAW1D"}
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Min-max score normalization for hybrid search fusion.
3
+ * Normalizes scores to [0, 1] range for CCS combination.
4
+ */
5
+ /**
6
+ * Apply min-max normalization to an array of scores.
7
+ *
8
+ * normalized = (score - min) / (max - min)
9
+ *
10
+ * Edge cases:
11
+ * - Empty array → []
12
+ * - Single result → [1.0]
13
+ * - All same scores → all 1.0
14
+ */
15
+ export function minMaxNormalize(scores) {
16
+ if (scores.length === 0)
17
+ return [];
18
+ if (scores.length === 1)
19
+ return [1.0];
20
+ const min = Math.min(...scores);
21
+ const max = Math.max(...scores);
22
+ const range = max - min;
23
+ if (range === 0)
24
+ return scores.map(() => 1.0);
25
+ return scores.map(s => (s - min) / range);
26
+ }