viberag 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/LICENSE +661 -0
  2. package/README.md +219 -0
  3. package/dist/cli/__tests__/mcp-setup.test.d.ts +6 -0
  4. package/dist/cli/__tests__/mcp-setup.test.js +597 -0
  5. package/dist/cli/app.d.ts +2 -0
  6. package/dist/cli/app.js +238 -0
  7. package/dist/cli/commands/handlers.d.ts +57 -0
  8. package/dist/cli/commands/handlers.js +231 -0
  9. package/dist/cli/commands/index.d.ts +2 -0
  10. package/dist/cli/commands/index.js +2 -0
  11. package/dist/cli/commands/mcp-setup.d.ts +107 -0
  12. package/dist/cli/commands/mcp-setup.js +509 -0
  13. package/dist/cli/commands/useRagCommands.d.ts +23 -0
  14. package/dist/cli/commands/useRagCommands.js +180 -0
  15. package/dist/cli/components/CleanWizard.d.ts +17 -0
  16. package/dist/cli/components/CleanWizard.js +169 -0
  17. package/dist/cli/components/InitWizard.d.ts +20 -0
  18. package/dist/cli/components/InitWizard.js +370 -0
  19. package/dist/cli/components/McpSetupWizard.d.ts +37 -0
  20. package/dist/cli/components/McpSetupWizard.js +387 -0
  21. package/dist/cli/components/SearchResultsDisplay.d.ts +13 -0
  22. package/dist/cli/components/SearchResultsDisplay.js +130 -0
  23. package/dist/cli/components/WelcomeBanner.d.ts +10 -0
  24. package/dist/cli/components/WelcomeBanner.js +26 -0
  25. package/dist/cli/components/index.d.ts +1 -0
  26. package/dist/cli/components/index.js +1 -0
  27. package/dist/cli/data/mcp-editors.d.ts +80 -0
  28. package/dist/cli/data/mcp-editors.js +270 -0
  29. package/dist/cli/index.d.ts +2 -0
  30. package/dist/cli/index.js +26 -0
  31. package/dist/cli-bundle.cjs +5269 -0
  32. package/dist/common/commands/terminalSetup.d.ts +2 -0
  33. package/dist/common/commands/terminalSetup.js +144 -0
  34. package/dist/common/components/CommandSuggestions.d.ts +9 -0
  35. package/dist/common/components/CommandSuggestions.js +20 -0
  36. package/dist/common/components/StaticWithResize.d.ts +23 -0
  37. package/dist/common/components/StaticWithResize.js +62 -0
  38. package/dist/common/components/StatusBar.d.ts +8 -0
  39. package/dist/common/components/StatusBar.js +64 -0
  40. package/dist/common/components/TextInput.d.ts +12 -0
  41. package/dist/common/components/TextInput.js +239 -0
  42. package/dist/common/components/index.d.ts +3 -0
  43. package/dist/common/components/index.js +3 -0
  44. package/dist/common/hooks/index.d.ts +4 -0
  45. package/dist/common/hooks/index.js +4 -0
  46. package/dist/common/hooks/useCommandHistory.d.ts +7 -0
  47. package/dist/common/hooks/useCommandHistory.js +51 -0
  48. package/dist/common/hooks/useCtrlC.d.ts +9 -0
  49. package/dist/common/hooks/useCtrlC.js +40 -0
  50. package/dist/common/hooks/useKittyKeyboard.d.ts +10 -0
  51. package/dist/common/hooks/useKittyKeyboard.js +26 -0
  52. package/dist/common/hooks/useStaticOutputBuffer.d.ts +31 -0
  53. package/dist/common/hooks/useStaticOutputBuffer.js +58 -0
  54. package/dist/common/hooks/useTerminalResize.d.ts +28 -0
  55. package/dist/common/hooks/useTerminalResize.js +51 -0
  56. package/dist/common/hooks/useTextBuffer.d.ts +13 -0
  57. package/dist/common/hooks/useTextBuffer.js +165 -0
  58. package/dist/common/index.d.ts +13 -0
  59. package/dist/common/index.js +17 -0
  60. package/dist/common/types.d.ts +162 -0
  61. package/dist/common/types.js +1 -0
  62. package/dist/mcp/index.d.ts +12 -0
  63. package/dist/mcp/index.js +66 -0
  64. package/dist/mcp/server.d.ts +25 -0
  65. package/dist/mcp/server.js +837 -0
  66. package/dist/mcp/watcher.d.ts +86 -0
  67. package/dist/mcp/watcher.js +334 -0
  68. package/dist/rag/__tests__/grammar-smoke.test.d.ts +9 -0
  69. package/dist/rag/__tests__/grammar-smoke.test.js +161 -0
  70. package/dist/rag/__tests__/helpers.d.ts +30 -0
  71. package/dist/rag/__tests__/helpers.js +67 -0
  72. package/dist/rag/__tests__/merkle.test.d.ts +5 -0
  73. package/dist/rag/__tests__/merkle.test.js +161 -0
  74. package/dist/rag/__tests__/metadata-extraction.test.d.ts +10 -0
  75. package/dist/rag/__tests__/metadata-extraction.test.js +202 -0
  76. package/dist/rag/__tests__/multi-language.test.d.ts +13 -0
  77. package/dist/rag/__tests__/multi-language.test.js +535 -0
  78. package/dist/rag/__tests__/rag.test.d.ts +10 -0
  79. package/dist/rag/__tests__/rag.test.js +311 -0
  80. package/dist/rag/__tests__/search-exhaustive.test.d.ts +9 -0
  81. package/dist/rag/__tests__/search-exhaustive.test.js +87 -0
  82. package/dist/rag/__tests__/search-filters.test.d.ts +10 -0
  83. package/dist/rag/__tests__/search-filters.test.js +250 -0
  84. package/dist/rag/__tests__/search-modes.test.d.ts +8 -0
  85. package/dist/rag/__tests__/search-modes.test.js +133 -0
  86. package/dist/rag/config/index.d.ts +61 -0
  87. package/dist/rag/config/index.js +111 -0
  88. package/dist/rag/constants.d.ts +41 -0
  89. package/dist/rag/constants.js +57 -0
  90. package/dist/rag/embeddings/fastembed.d.ts +62 -0
  91. package/dist/rag/embeddings/fastembed.js +124 -0
  92. package/dist/rag/embeddings/gemini.d.ts +26 -0
  93. package/dist/rag/embeddings/gemini.js +116 -0
  94. package/dist/rag/embeddings/index.d.ts +10 -0
  95. package/dist/rag/embeddings/index.js +9 -0
  96. package/dist/rag/embeddings/local-4b.d.ts +28 -0
  97. package/dist/rag/embeddings/local-4b.js +51 -0
  98. package/dist/rag/embeddings/local.d.ts +29 -0
  99. package/dist/rag/embeddings/local.js +119 -0
  100. package/dist/rag/embeddings/mistral.d.ts +22 -0
  101. package/dist/rag/embeddings/mistral.js +85 -0
  102. package/dist/rag/embeddings/openai.d.ts +22 -0
  103. package/dist/rag/embeddings/openai.js +85 -0
  104. package/dist/rag/embeddings/types.d.ts +37 -0
  105. package/dist/rag/embeddings/types.js +1 -0
  106. package/dist/rag/gitignore/index.d.ts +57 -0
  107. package/dist/rag/gitignore/index.js +178 -0
  108. package/dist/rag/index.d.ts +15 -0
  109. package/dist/rag/index.js +25 -0
  110. package/dist/rag/indexer/chunker.d.ts +129 -0
  111. package/dist/rag/indexer/chunker.js +1352 -0
  112. package/dist/rag/indexer/index.d.ts +6 -0
  113. package/dist/rag/indexer/index.js +6 -0
  114. package/dist/rag/indexer/indexer.d.ts +73 -0
  115. package/dist/rag/indexer/indexer.js +356 -0
  116. package/dist/rag/indexer/types.d.ts +68 -0
  117. package/dist/rag/indexer/types.js +47 -0
  118. package/dist/rag/logger/index.d.ts +20 -0
  119. package/dist/rag/logger/index.js +75 -0
  120. package/dist/rag/manifest/index.d.ts +50 -0
  121. package/dist/rag/manifest/index.js +97 -0
  122. package/dist/rag/merkle/diff.d.ts +26 -0
  123. package/dist/rag/merkle/diff.js +95 -0
  124. package/dist/rag/merkle/hash.d.ts +34 -0
  125. package/dist/rag/merkle/hash.js +165 -0
  126. package/dist/rag/merkle/index.d.ts +68 -0
  127. package/dist/rag/merkle/index.js +298 -0
  128. package/dist/rag/merkle/node.d.ts +51 -0
  129. package/dist/rag/merkle/node.js +69 -0
  130. package/dist/rag/search/filters.d.ts +21 -0
  131. package/dist/rag/search/filters.js +100 -0
  132. package/dist/rag/search/fts.d.ts +32 -0
  133. package/dist/rag/search/fts.js +61 -0
  134. package/dist/rag/search/hybrid.d.ts +17 -0
  135. package/dist/rag/search/hybrid.js +58 -0
  136. package/dist/rag/search/index.d.ts +89 -0
  137. package/dist/rag/search/index.js +367 -0
  138. package/dist/rag/search/types.d.ts +130 -0
  139. package/dist/rag/search/types.js +4 -0
  140. package/dist/rag/search/vector.d.ts +25 -0
  141. package/dist/rag/search/vector.js +44 -0
  142. package/dist/rag/storage/index.d.ts +92 -0
  143. package/dist/rag/storage/index.js +287 -0
  144. package/dist/rag/storage/lancedb-native.d.ts +7 -0
  145. package/dist/rag/storage/lancedb-native.js +10 -0
  146. package/dist/rag/storage/schema.d.ts +23 -0
  147. package/dist/rag/storage/schema.js +50 -0
  148. package/dist/rag/storage/types.d.ts +100 -0
  149. package/dist/rag/storage/types.js +68 -0
  150. package/package.json +67 -0
  151. package/scripts/check-node-version.js +37 -0
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Hybrid search combining vector and FTS with RRF reranking.
3
+ */
4
+ /**
5
+ * Reciprocal Rank Fusion (RRF) constant.
6
+ * Higher values give more weight to lower-ranked results.
7
+ */
8
+ const RRF_K = 60;
9
+ /**
10
+ * Combine vector and FTS results using Reciprocal Rank Fusion.
11
+ *
12
+ * RRF formula: score = sum(1 / (k + rank))
13
+ * where k is a constant (typically 60) and rank is 1-indexed.
14
+ *
15
+ * @param vectorResults - Results from vector search
16
+ * @param ftsResults - Results from FTS search
17
+ * @param limit - Maximum number of results to return
18
+ * @param vectorWeight - Weight for vector results (0.0-1.0, default 0.7)
19
+ * @returns Combined and reranked results
20
+ */
21
+ export function hybridRerank(vectorResults, ftsResults, limit, vectorWeight = 0.7) {
22
+ const ftsWeight = 1 - vectorWeight;
23
+ const scores = new Map();
24
+ const resultMap = new Map();
25
+ const vectorScores = new Map();
26
+ const ftsScoresMap = new Map();
27
+ // Score from vector results
28
+ vectorResults.forEach((result, rank) => {
29
+ const rrfScore = vectorWeight * (1 / (RRF_K + rank + 1));
30
+ scores.set(result.id, (scores.get(result.id) ?? 0) + rrfScore);
31
+ resultMap.set(result.id, result);
32
+ vectorScores.set(result.id, result.vectorScore ?? result.score);
33
+ });
34
+ // Score from FTS results
35
+ ftsResults.forEach((result, rank) => {
36
+ const rrfScore = ftsWeight * (1 / (RRF_K + rank + 1));
37
+ scores.set(result.id, (scores.get(result.id) ?? 0) + rrfScore);
38
+ ftsScoresMap.set(result.id, result.ftsScore ?? result.score);
39
+ // Keep the result with more info (prefer vector result if exists)
40
+ if (!resultMap.has(result.id)) {
41
+ resultMap.set(result.id, result);
42
+ }
43
+ });
44
+ // Sort by combined RRF score and take top results
45
+ const sortedIds = [...scores.entries()]
46
+ .sort((a, b) => b[1] - a[1])
47
+ .slice(0, limit)
48
+ .map(([id]) => id);
49
+ return sortedIds.map(id => {
50
+ const result = resultMap.get(id);
51
+ return {
52
+ ...result,
53
+ score: scores.get(id),
54
+ vectorScore: vectorScores.get(id),
55
+ ftsScore: ftsScoresMap.get(id),
56
+ };
57
+ });
58
+ }
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Search module for code search.
3
+ *
4
+ * Supports multiple search modes:
5
+ * - semantic: Dense vector search for conceptual queries
6
+ * - exact: BM25/FTS for symbol names and exact matches
7
+ * - hybrid: Combined vector + BM25 with RRF (default)
8
+ * - definition: Direct metadata lookup for symbol definitions
9
+ * - similar: Vector search with code snippet as query
10
+ */
11
+ import type { Logger } from '../logger/index.js';
12
+ import type { SearchOptions, SearchResults } from './types.js';
13
+ export type { SearchDebugInfo, SearchFilters, SearchMode, SearchOptions, SearchResult, SearchResults, } from './types.js';
14
+ export { vectorSearch } from './vector.js';
15
+ export { ftsSearch, ensureFtsIndex } from './fts.js';
16
+ export { hybridRerank } from './hybrid.js';
17
+ /**
18
+ * Search engine for code search.
19
+ * Supports vector, FTS, hybrid, definition, and similar search modes.
20
+ */
21
+ export declare class SearchEngine {
22
+ private readonly projectRoot;
23
+ private storage;
24
+ private embeddings;
25
+ private logger;
26
+ private initialized;
27
+ constructor(projectRoot: string, logger?: Logger);
28
+ /**
29
+ * Primary search method. Dispatches to appropriate search mode.
30
+ */
31
+ search(query: string, options?: SearchOptions): Promise<SearchResults>;
32
+ /**
33
+ * Semantic search: Dense vector search only.
34
+ * Best for conceptual queries like "how does auth work?"
35
+ */
36
+ private searchSemantic;
37
+ /**
38
+ * Exact search: BM25/FTS only.
39
+ * Best for symbol names and exact string matches.
40
+ */
41
+ private searchExact;
42
+ /**
43
+ * Hybrid search: Vector + BM25 with RRF reranking.
44
+ * Good general-purpose search.
45
+ *
46
+ * @param autoBoost - When true, increase BM25 weight and oversample if vector scores are low
47
+ * @param autoBoostThreshold - Vector score threshold below which auto-boost activates
48
+ * @param returnDebug - Include debug info in results for AI evaluation
49
+ */
50
+ private searchHybrid;
51
+ /**
52
+ * Definition search: Direct metadata lookup.
53
+ * Best for "where is X defined?" queries.
54
+ */
55
+ private searchDefinition;
56
+ /**
57
+ * Similar search: Vector search with code snippet as query.
58
+ * Best for "find code like this" queries.
59
+ */
60
+ private searchSimilar;
61
+ /**
62
+ * Perform vector-only search. (Legacy method)
63
+ */
64
+ searchVector(query: string, limit?: number): Promise<SearchResults>;
65
+ /**
66
+ * Perform FTS-only search. (Legacy method)
67
+ */
68
+ searchFts(query: string, limit?: number): Promise<SearchResults>;
69
+ /**
70
+ * Initialize the search engine.
71
+ */
72
+ private ensureInitialized;
73
+ /**
74
+ * Create the appropriate embedding provider based on config.
75
+ */
76
+ private createEmbeddingProvider;
77
+ /**
78
+ * Get the code chunks table.
79
+ */
80
+ private getTable;
81
+ /**
82
+ * Log a message.
83
+ */
84
+ private log;
85
+ /**
86
+ * Close the search engine and free resources.
87
+ */
88
+ close(): void;
89
+ }
@@ -0,0 +1,367 @@
1
+ /**
2
+ * Search module for code search.
3
+ *
4
+ * Supports multiple search modes:
5
+ * - semantic: Dense vector search for conceptual queries
6
+ * - exact: BM25/FTS for symbol names and exact matches
7
+ * - hybrid: Combined vector + BM25 with RRF (default)
8
+ * - definition: Direct metadata lookup for symbol definitions
9
+ * - similar: Vector search with code snippet as query
10
+ */
11
+ import { loadConfig } from '../config/index.js';
12
+ import { GeminiEmbeddingProvider, Local4BEmbeddingProvider, LocalEmbeddingProvider, MistralEmbeddingProvider, OpenAIEmbeddingProvider, } from '../embeddings/index.js';
13
+ import { Storage } from '../storage/index.js';
14
+ import { buildDefinitionFilter, buildFilterClause } from './filters.js';
15
+ import { ftsSearch } from './fts.js';
16
+ import { hybridRerank } from './hybrid.js';
17
+ import { vectorSearch } from './vector.js';
18
+ export { vectorSearch } from './vector.js';
19
+ export { ftsSearch, ensureFtsIndex } from './fts.js';
20
+ export { hybridRerank } from './hybrid.js';
21
+ /** Default search limit */
22
+ const DEFAULT_LIMIT = 10;
23
+ /** Exhaustive mode limit (high but bounded) */
24
+ const EXHAUSTIVE_LIMIT = 500;
25
+ /** Default BM25 weight for hybrid search */
26
+ const DEFAULT_BM25_WEIGHT = 0.3;
27
+ /** Default oversample multiplier for hybrid search */
28
+ const DEFAULT_OVERSAMPLE_MULTIPLIER = 2;
29
+ /** Maximum oversample multiplier (for low vector confidence) */
30
+ const MAX_OVERSAMPLE_MULTIPLIER = 4;
31
+ /**
32
+ * Search engine for code search.
33
+ * Supports vector, FTS, hybrid, definition, and similar search modes.
34
+ */
35
+ export class SearchEngine {
36
+ constructor(projectRoot, logger) {
37
+ Object.defineProperty(this, "projectRoot", {
38
+ enumerable: true,
39
+ configurable: true,
40
+ writable: true,
41
+ value: void 0
42
+ });
43
+ Object.defineProperty(this, "storage", {
44
+ enumerable: true,
45
+ configurable: true,
46
+ writable: true,
47
+ value: null
48
+ });
49
+ Object.defineProperty(this, "embeddings", {
50
+ enumerable: true,
51
+ configurable: true,
52
+ writable: true,
53
+ value: null
54
+ });
55
+ Object.defineProperty(this, "logger", {
56
+ enumerable: true,
57
+ configurable: true,
58
+ writable: true,
59
+ value: null
60
+ });
61
+ Object.defineProperty(this, "initialized", {
62
+ enumerable: true,
63
+ configurable: true,
64
+ writable: true,
65
+ value: false
66
+ });
67
+ this.projectRoot = projectRoot;
68
+ this.logger = logger ?? null;
69
+ }
70
+ /**
71
+ * Primary search method. Dispatches to appropriate search mode.
72
+ */
73
+ async search(query, options = {}) {
74
+ const start = Date.now();
75
+ const mode = options.mode ?? 'hybrid';
76
+ const limit = options.exhaustive
77
+ ? EXHAUSTIVE_LIMIT
78
+ : (options.limit ?? DEFAULT_LIMIT);
79
+ const filterClause = buildFilterClause(options.filters);
80
+ await this.ensureInitialized();
81
+ const table = await this.getTable();
82
+ let results;
83
+ switch (mode) {
84
+ case 'semantic':
85
+ results = await this.searchSemantic(table, query, limit, filterClause, options.minScore);
86
+ break;
87
+ case 'exact':
88
+ results = await this.searchExact(table, query, limit, filterClause, options.minScore);
89
+ break;
90
+ case 'definition':
91
+ results = await this.searchDefinition(table, options.symbolName ?? query, limit, options.filters?.type, filterClause);
92
+ break;
93
+ case 'similar':
94
+ results = await this.searchSimilar(table, options.codeSnippet ?? query, limit, filterClause, options.minScore);
95
+ break;
96
+ case 'hybrid':
97
+ default:
98
+ results = await this.searchHybrid(table, query, limit, options.bm25Weight ?? DEFAULT_BM25_WEIGHT, filterClause, options.minScore, options.autoBoost ?? true, options.autoBoostThreshold ?? 0.3, options.returnDebug ?? false);
99
+ break;
100
+ }
101
+ // Add total matches for exhaustive mode
102
+ if (options.exhaustive) {
103
+ results.totalMatches = results.results.length;
104
+ }
105
+ results.elapsedMs = Date.now() - start;
106
+ return results;
107
+ }
108
+ /**
109
+ * Semantic search: Dense vector search only.
110
+ * Best for conceptual queries like "how does auth work?"
111
+ */
112
+ async searchSemantic(table, query, limit, filterClause, minScore) {
113
+ const queryVector = await this.embeddings.embedSingle(query);
114
+ const results = await vectorSearch(table, queryVector, {
115
+ limit,
116
+ filterClause,
117
+ minScore,
118
+ });
119
+ return {
120
+ results,
121
+ query,
122
+ searchType: 'semantic',
123
+ elapsedMs: 0,
124
+ };
125
+ }
126
+ /**
127
+ * Exact search: BM25/FTS only.
128
+ * Best for symbol names and exact string matches.
129
+ */
130
+ async searchExact(table, query, limit, filterClause, minScore) {
131
+ const results = await ftsSearch(table, query, {
132
+ limit,
133
+ filterClause,
134
+ minScore,
135
+ });
136
+ return {
137
+ results,
138
+ query,
139
+ searchType: 'exact',
140
+ elapsedMs: 0,
141
+ };
142
+ }
143
+ /**
144
+ * Hybrid search: Vector + BM25 with RRF reranking.
145
+ * Good general-purpose search.
146
+ *
147
+ * @param autoBoost - When true, increase BM25 weight and oversample if vector scores are low
148
+ * @param autoBoostThreshold - Vector score threshold below which auto-boost activates
149
+ * @param returnDebug - Include debug info in results for AI evaluation
150
+ */
151
+ async searchHybrid(table, query, limit, bm25Weight, filterClause, minScore, autoBoost = true, autoBoostThreshold = 0.3, returnDebug = false) {
152
+ const queryVector = await this.embeddings.embedSingle(query);
153
+ // Initial search with default oversample to assess vector confidence
154
+ const initialOversample = limit * DEFAULT_OVERSAMPLE_MULTIPLIER;
155
+ const [initialVectorResults, initialFtsResults] = await Promise.all([
156
+ vectorSearch(table, queryVector, {
157
+ limit: initialOversample,
158
+ filterClause,
159
+ }),
160
+ ftsSearch(table, query, {
161
+ limit: initialOversample,
162
+ filterClause,
163
+ }),
164
+ ]);
165
+ // Calculate confidence metrics
166
+ const maxVectorScore = Math.max(...initialVectorResults.map(r => r.score), 0);
167
+ const maxFtsScore = Math.max(...initialFtsResults.map(r => r.ftsScore ?? r.score), 0);
168
+ // Dynamic oversample: increase when vector confidence is low
169
+ let oversampleMultiplier = DEFAULT_OVERSAMPLE_MULTIPLIER;
170
+ let dynamicOversampleApplied = false;
171
+ if (autoBoost && maxVectorScore < autoBoostThreshold) {
172
+ // Linear scale from 2x to 4x based on how low vector scores are
173
+ // At threshold (0.3): 2x, at 0: 4x
174
+ const boost = 1 - maxVectorScore / autoBoostThreshold;
175
+ oversampleMultiplier =
176
+ DEFAULT_OVERSAMPLE_MULTIPLIER +
177
+ boost * (MAX_OVERSAMPLE_MULTIPLIER - DEFAULT_OVERSAMPLE_MULTIPLIER);
178
+ dynamicOversampleApplied =
179
+ oversampleMultiplier > DEFAULT_OVERSAMPLE_MULTIPLIER;
180
+ }
181
+ const effectiveOversample = Math.round(limit * oversampleMultiplier);
182
+ // If we need more results due to dynamic oversample, fetch additional
183
+ let vectorResults = initialVectorResults;
184
+ let ftsResults = initialFtsResults;
185
+ if (effectiveOversample > initialOversample) {
186
+ // Re-fetch with higher limit
187
+ [vectorResults, ftsResults] = await Promise.all([
188
+ vectorSearch(table, queryVector, {
189
+ limit: effectiveOversample,
190
+ filterClause,
191
+ }),
192
+ ftsSearch(table, query, {
193
+ limit: effectiveOversample,
194
+ filterClause,
195
+ }),
196
+ ]);
197
+ }
198
+ // Auto-boost: increase BM25 weight when vector confidence is low
199
+ let effectiveBm25Weight = bm25Weight;
200
+ let autoBoostApplied = false;
201
+ if (autoBoost && maxVectorScore < autoBoostThreshold) {
202
+ // Calculate boost factor: higher boost when vector scores are lower
203
+ const boost = (autoBoostThreshold - maxVectorScore) / autoBoostThreshold;
204
+ // Increase BM25 weight by up to 0.5, capped at 0.9
205
+ effectiveBm25Weight = Math.min(0.9, bm25Weight + boost * 0.5);
206
+ autoBoostApplied = effectiveBm25Weight !== bm25Weight;
207
+ }
208
+ // Combine with RRF using effective weight
209
+ const vectorWeight = 1 - effectiveBm25Weight;
210
+ let results = hybridRerank(vectorResults, ftsResults, limit, vectorWeight);
211
+ // Apply minScore filter
212
+ if (minScore) {
213
+ results = results.filter(r => r.score >= minScore);
214
+ }
215
+ // Build debug info if requested
216
+ const debug = returnDebug
217
+ ? {
218
+ maxVectorScore,
219
+ maxFtsScore,
220
+ requestedBm25Weight: bm25Weight,
221
+ effectiveBm25Weight,
222
+ autoBoostApplied,
223
+ autoBoostThreshold,
224
+ vectorResultCount: vectorResults.length,
225
+ ftsResultCount: ftsResults.length,
226
+ oversampleMultiplier,
227
+ dynamicOversampleApplied,
228
+ }
229
+ : undefined;
230
+ return {
231
+ results,
232
+ query,
233
+ searchType: 'hybrid',
234
+ elapsedMs: 0,
235
+ debug,
236
+ };
237
+ }
238
+ /**
239
+ * Definition search: Direct metadata lookup.
240
+ * Best for "where is X defined?" queries.
241
+ */
242
+ async searchDefinition(table, symbolName, limit, typeFilter, additionalFilter) {
243
+ const definitionFilter = buildDefinitionFilter(symbolName, typeFilter);
244
+ // Combine with additional filters
245
+ const fullFilter = additionalFilter
246
+ ? `(${definitionFilter}) AND (${additionalFilter})`
247
+ : definitionFilter;
248
+ // Use table query directly for metadata lookup
249
+ const queryResults = await table
250
+ .query()
251
+ .where(fullFilter)
252
+ .limit(limit)
253
+ .toArray();
254
+ const results = queryResults.map((row, index) => {
255
+ const chunk = row;
256
+ return {
257
+ id: chunk.id,
258
+ text: chunk.text,
259
+ filepath: chunk.filepath,
260
+ filename: chunk.filename,
261
+ name: chunk.name,
262
+ type: chunk.type,
263
+ startLine: chunk.start_line,
264
+ endLine: chunk.end_line,
265
+ score: 1 / (index + 1), // Rank-based score
266
+ signature: chunk.signature,
267
+ isExported: chunk.is_exported,
268
+ };
269
+ });
270
+ return {
271
+ results,
272
+ query: symbolName,
273
+ searchType: 'definition',
274
+ elapsedMs: 0,
275
+ };
276
+ }
277
+ /**
278
+ * Similar search: Vector search with code snippet as query.
279
+ * Best for "find code like this" queries.
280
+ */
281
+ async searchSimilar(table, codeSnippet, limit, filterClause, minScore) {
282
+ // Embed the code snippet directly
283
+ const queryVector = await this.embeddings.embedSingle(codeSnippet);
284
+ const results = await vectorSearch(table, queryVector, {
285
+ limit,
286
+ filterClause,
287
+ minScore,
288
+ });
289
+ return {
290
+ results,
291
+ query: codeSnippet.substring(0, 100) + (codeSnippet.length > 100 ? '...' : ''),
292
+ searchType: 'similar',
293
+ elapsedMs: 0,
294
+ };
295
+ }
296
+ /**
297
+ * Perform vector-only search. (Legacy method)
298
+ */
299
+ async searchVector(query, limit = DEFAULT_LIMIT) {
300
+ return this.search(query, { mode: 'semantic', limit });
301
+ }
302
+ /**
303
+ * Perform FTS-only search. (Legacy method)
304
+ */
305
+ async searchFts(query, limit = DEFAULT_LIMIT) {
306
+ return this.search(query, { mode: 'exact', limit });
307
+ }
308
+ /**
309
+ * Initialize the search engine.
310
+ */
311
+ async ensureInitialized() {
312
+ if (this.initialized)
313
+ return;
314
+ const config = await loadConfig(this.projectRoot);
315
+ // Initialize storage
316
+ this.storage = new Storage(this.projectRoot, config.embeddingDimensions);
317
+ await this.storage.connect();
318
+ // Initialize embeddings with config (includes apiKey for cloud providers)
319
+ this.embeddings = this.createEmbeddingProvider(config);
320
+ await this.embeddings.initialize();
321
+ this.initialized = true;
322
+ this.log('info', 'SearchEngine initialized');
323
+ }
324
+ /**
325
+ * Create the appropriate embedding provider based on config.
326
+ */
327
+ createEmbeddingProvider(config) {
328
+ const apiKey = config.apiKey;
329
+ switch (config.embeddingProvider) {
330
+ case 'local':
331
+ return new LocalEmbeddingProvider();
332
+ case 'local-4b':
333
+ return new Local4BEmbeddingProvider();
334
+ case 'gemini':
335
+ return new GeminiEmbeddingProvider(apiKey);
336
+ case 'mistral':
337
+ return new MistralEmbeddingProvider(apiKey);
338
+ case 'openai':
339
+ return new OpenAIEmbeddingProvider(apiKey);
340
+ default:
341
+ throw new Error(`Unknown embedding provider: ${config.embeddingProvider}`);
342
+ }
343
+ }
344
+ /**
345
+ * Get the code chunks table.
346
+ */
347
+ async getTable() {
348
+ return this.storage.getChunksTable();
349
+ }
350
+ /**
351
+ * Log a message.
352
+ */
353
+ log(level, message) {
354
+ if (!this.logger)
355
+ return;
356
+ this.logger[level]('Search', message);
357
+ }
358
+ /**
359
+ * Close the search engine and free resources.
360
+ */
361
+ close() {
362
+ this.storage?.close();
363
+ this.embeddings?.close();
364
+ this.initialized = false;
365
+ this.log('info', 'SearchEngine closed');
366
+ }
367
+ }
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Search result types.
3
+ */
4
+ /**
5
+ * Search mode determines the search strategy.
6
+ */
7
+ export type SearchMode = 'semantic' | 'exact' | 'hybrid' | 'definition' | 'similar';
8
+ /**
9
+ * A single search result.
10
+ */
11
+ export interface SearchResult {
12
+ /** Unique ID: "{filepath}:{startLine}" */
13
+ id: string;
14
+ /** Source code content */
15
+ text: string;
16
+ /** Relative file path */
17
+ filepath: string;
18
+ /** Just the filename */
19
+ filename: string;
20
+ /** Symbol name */
21
+ name: string;
22
+ /** Chunk type: function, class, method, or module */
23
+ type: string;
24
+ /** Start line number (1-indexed) */
25
+ startLine: number;
26
+ /** End line number (1-indexed) */
27
+ endLine: number;
28
+ /** Combined score (for hybrid search) */
29
+ score: number;
30
+ /** Vector similarity score (optional) */
31
+ vectorScore?: number;
32
+ /** FTS/BM25 score (optional) */
33
+ ftsScore?: number;
34
+ /** Function/method signature (if available) */
35
+ signature?: string | null;
36
+ /** Whether symbol is exported */
37
+ isExported?: boolean;
38
+ }
39
+ /**
40
+ * Debug information for search quality evaluation.
41
+ * Helps AI agents understand search effectiveness and tune parameters.
42
+ */
43
+ export interface SearchDebugInfo {
44
+ /** Maximum vector similarity score across results */
45
+ maxVectorScore: number;
46
+ /** Maximum FTS/BM25 score across results */
47
+ maxFtsScore: number;
48
+ /** BM25 weight requested by caller */
49
+ requestedBm25Weight: number;
50
+ /** Effective BM25 weight after auto-boost */
51
+ effectiveBm25Weight: number;
52
+ /** Whether auto-boost was applied */
53
+ autoBoostApplied: boolean;
54
+ /** Auto-boost threshold used */
55
+ autoBoostThreshold: number;
56
+ /** Number of results from vector search */
57
+ vectorResultCount: number;
58
+ /** Number of results from FTS search */
59
+ ftsResultCount: number;
60
+ /** Oversample multiplier used (2-4x) */
61
+ oversampleMultiplier?: number;
62
+ /** Whether dynamic oversample boost was applied */
63
+ dynamicOversampleApplied?: boolean;
64
+ }
65
+ /**
66
+ * Collection of search results with metadata.
67
+ */
68
+ export interface SearchResults {
69
+ /** Array of search results */
70
+ results: SearchResult[];
71
+ /** Original search query */
72
+ query: string;
73
+ /** Type of search performed */
74
+ searchType: SearchMode;
75
+ /** Time taken in milliseconds */
76
+ elapsedMs: number;
77
+ /** Total matches (when exhaustive=true) */
78
+ totalMatches?: number;
79
+ /** Debug info for hybrid search (when return_debug=true) */
80
+ debug?: SearchDebugInfo;
81
+ }
82
+ /**
83
+ * Transparent, AI-controlled filters.
84
+ * AI sees exactly what's being filtered.
85
+ */
86
+ export interface SearchFilters {
87
+ /** Scope to files starting with this path prefix (e.g., "src/api/") */
88
+ pathPrefix?: string;
89
+ /** Must contain ALL of these strings in path */
90
+ pathContains?: string[];
91
+ /** Must not contain ANY of these strings in path */
92
+ pathNotContains?: string[];
93
+ /** Filter by chunk type: function, class, method, module */
94
+ type?: ('function' | 'class' | 'method' | 'module')[];
95
+ /** Filter by file extension (e.g., [".ts", ".tsx"]) */
96
+ extension?: string[];
97
+ /** Only exported/public symbols */
98
+ isExported?: boolean;
99
+ /** Decorator name contains this string (e.g., "Get", "route") */
100
+ decoratorContains?: string;
101
+ /** Has documentation/docstring */
102
+ hasDocstring?: boolean;
103
+ }
104
+ /**
105
+ * Options for search operations.
106
+ */
107
+ export interface SearchOptions {
108
+ /** Search mode (default: 'hybrid') */
109
+ mode?: SearchMode;
110
+ /** Maximum number of results (default: 10) */
111
+ limit?: number;
112
+ /** Weight for BM25 in hybrid search (0.0-1.0, default: 0.3) */
113
+ bm25Weight?: number;
114
+ /** Return all matches above threshold (default: false) */
115
+ exhaustive?: boolean;
116
+ /** Minimum score threshold 0-1 (default: 0) */
117
+ minScore?: number;
118
+ /** Transparent filters */
119
+ filters?: SearchFilters;
120
+ /** Code snippet for 'similar' mode */
121
+ codeSnippet?: string;
122
+ /** Symbol name for 'definition' mode */
123
+ symbolName?: string;
124
+ /** Enable auto-boost of BM25 weight when vector scores are low (default: true) */
125
+ autoBoost?: boolean;
126
+ /** Vector score threshold below which auto-boost activates (default: 0.3) */
127
+ autoBoostThreshold?: number;
128
+ /** Include debug info in results (default: false) */
129
+ returnDebug?: boolean;
130
+ }
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Search result types.
3
+ */
4
+ export {};
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Vector similarity search using LanceDB.
3
+ */
4
+ import type { Table } from '@lancedb/lancedb';
5
+ import type { SearchResult } from './types.js';
6
+ /**
7
+ * Options for vector search.
8
+ */
9
+ export interface VectorSearchOptions {
10
+ /** Maximum number of results */
11
+ limit: number;
12
+ /** LanceDB WHERE clause filter */
13
+ filterClause?: string;
14
+ /** Minimum score threshold (0-1) */
15
+ minScore?: number;
16
+ }
17
+ /**
18
+ * Perform vector similarity search.
19
+ *
20
+ * @param table - LanceDB table to search
21
+ * @param queryVector - Query embedding vector
22
+ * @param options - Search options
23
+ * @returns Array of search results with vector scores
24
+ */
25
+ export declare function vectorSearch(table: Table, queryVector: number[], options: VectorSearchOptions | number): Promise<SearchResult[]>;