viberag 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +1 -1
  2. package/dist/cli/commands/mcp-setup.d.ts +1 -1
  3. package/dist/cli/commands/mcp-setup.js +23 -3
  4. package/dist/cli/components/CleanWizard.js +16 -1
  5. package/dist/cli/components/InitWizard.js +37 -15
  6. package/dist/common/types.d.ts +2 -2
  7. package/dist/mcp/index.js +5 -1
  8. package/dist/mcp/warmup.d.ts +5 -0
  9. package/dist/mcp/warmup.js +7 -0
  10. package/dist/rag/config/index.d.ts +4 -0
  11. package/dist/rag/config/index.js +37 -13
  12. package/dist/rag/embeddings/gemini.js +34 -7
  13. package/dist/rag/embeddings/index.d.ts +1 -0
  14. package/dist/rag/embeddings/index.js +1 -0
  15. package/dist/rag/embeddings/mistral.d.ts +2 -2
  16. package/dist/rag/embeddings/mistral.js +18 -5
  17. package/dist/rag/embeddings/openai.js +22 -3
  18. package/dist/rag/embeddings/validate.d.ts +22 -0
  19. package/dist/rag/embeddings/validate.js +148 -0
  20. package/dist/rag/index.d.ts +1 -1
  21. package/dist/rag/index.js +1 -1
  22. package/dist/rag/indexer/chunker.js +31 -19
  23. package/dist/rag/indexer/indexer.d.ts +10 -0
  24. package/dist/rag/indexer/indexer.js +88 -53
  25. package/dist/rag/search/index.d.ts +6 -0
  26. package/dist/rag/search/index.js +35 -9
  27. package/dist/rag/storage/index.d.ts +15 -1
  28. package/dist/rag/storage/index.js +108 -21
  29. package/package.json +33 -4
  30. package/dist/cli/__tests__/mcp-setup-comprehensive.test.d.ts +0 -10
  31. package/dist/cli/__tests__/mcp-setup-comprehensive.test.js +0 -515
  32. package/dist/cli/__tests__/mcp-setup-global.test.d.ts +0 -7
  33. package/dist/cli/__tests__/mcp-setup-global.test.js +0 -577
  34. package/dist/cli/__tests__/mcp-setup.test.d.ts +0 -6
  35. package/dist/cli/__tests__/mcp-setup.test.js +0 -704
  36. package/dist/rag/__tests__/grammar-smoke.test.d.ts +0 -9
  37. package/dist/rag/__tests__/grammar-smoke.test.js +0 -161
  38. package/dist/rag/__tests__/helpers.d.ts +0 -30
  39. package/dist/rag/__tests__/helpers.js +0 -67
  40. package/dist/rag/__tests__/merkle.test.d.ts +0 -5
  41. package/dist/rag/__tests__/merkle.test.js +0 -161
  42. package/dist/rag/__tests__/metadata-extraction.test.d.ts +0 -10
  43. package/dist/rag/__tests__/metadata-extraction.test.js +0 -202
  44. package/dist/rag/__tests__/multi-language.test.d.ts +0 -13
  45. package/dist/rag/__tests__/multi-language.test.js +0 -535
  46. package/dist/rag/__tests__/rag.test.d.ts +0 -10
  47. package/dist/rag/__tests__/rag.test.js +0 -311
  48. package/dist/rag/__tests__/search-exhaustive.test.d.ts +0 -9
  49. package/dist/rag/__tests__/search-exhaustive.test.js +0 -87
  50. package/dist/rag/__tests__/search-filters.test.d.ts +0 -10
  51. package/dist/rag/__tests__/search-filters.test.js +0 -250
  52. package/dist/rag/__tests__/search-modes.test.d.ts +0 -8
  53. package/dist/rag/__tests__/search-modes.test.js +0 -133
@@ -0,0 +1,148 @@
1
+ /**
2
+ * API key validation for cloud embedding providers.
3
+ *
4
+ * Makes a minimal test embedding call to verify the API key is valid
5
+ * before proceeding with indexing.
6
+ */
7
+ /**
8
+ * API endpoints for each cloud provider.
9
+ */
10
+ const ENDPOINTS = {
11
+ gemini: 'https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:embedContent',
12
+ mistral: 'https://api.mistral.ai/v1/embeddings',
13
+ openai: 'https://api.openai.com/v1/embeddings',
14
+ };
15
+ /**
16
+ * Safely parse JSON response, logging parse failures instead of swallowing them.
17
+ */
18
+ async function safeParseJson(response, provider) {
19
+ try {
20
+ return (await response.json());
21
+ }
22
+ catch (error) {
23
+ console.warn(`[${provider}] Failed to parse error response:`, error instanceof Error ? error.message : String(error));
24
+ return {};
25
+ }
26
+ }
27
+ /**
28
+ * Validate an API key by making a minimal test embedding call.
29
+ *
30
+ * @param provider - The embedding provider type
31
+ * @param apiKey - The API key to validate
32
+ * @returns Validation result with error message if invalid
33
+ */
34
+ export async function validateApiKey(provider, apiKey) {
35
+ // Local providers don't need API key validation
36
+ if (provider === 'local' || provider === 'local-4b') {
37
+ return { valid: true };
38
+ }
39
+ if (!apiKey || apiKey.trim() === '') {
40
+ return { valid: false, error: 'API key is required' };
41
+ }
42
+ try {
43
+ switch (provider) {
44
+ case 'gemini':
45
+ return await validateGeminiKey(apiKey);
46
+ case 'mistral':
47
+ return await validateMistralKey(apiKey);
48
+ case 'openai':
49
+ return await validateOpenAIKey(apiKey);
50
+ default:
51
+ return { valid: false, error: `Unknown provider: ${provider}` };
52
+ }
53
+ }
54
+ catch (err) {
55
+ return {
56
+ valid: false,
57
+ error: err instanceof Error ? err.message : String(err),
58
+ };
59
+ }
60
+ }
61
+ /**
62
+ * Validate Gemini API key.
63
+ */
64
+ async function validateGeminiKey(apiKey) {
65
+ const response = await fetch(ENDPOINTS.gemini, {
66
+ method: 'POST',
67
+ headers: {
68
+ 'Content-Type': 'application/json',
69
+ 'x-goog-api-key': apiKey,
70
+ },
71
+ body: JSON.stringify({
72
+ content: { parts: [{ text: 'test' }] },
73
+ }),
74
+ });
75
+ if (response.ok) {
76
+ return { valid: true };
77
+ }
78
+ const data = await safeParseJson(response, 'gemini');
79
+ const message = data?.error?.message ||
80
+ `HTTP ${response.status}`;
81
+ if (response.status === 400 && message.includes('API key')) {
82
+ return { valid: false, error: 'Invalid API key' };
83
+ }
84
+ if (response.status === 403) {
85
+ return { valid: false, error: 'API key not authorized for this API' };
86
+ }
87
+ return { valid: false, error: message };
88
+ }
89
+ /**
90
+ * Validate Mistral API key.
91
+ */
92
+ async function validateMistralKey(apiKey) {
93
+ const response = await fetch(ENDPOINTS.mistral, {
94
+ method: 'POST',
95
+ headers: {
96
+ 'Content-Type': 'application/json',
97
+ Authorization: `Bearer ${apiKey}`,
98
+ },
99
+ body: JSON.stringify({
100
+ model: 'codestral-embed',
101
+ input: ['test'],
102
+ }),
103
+ });
104
+ if (response.ok) {
105
+ return { valid: true };
106
+ }
107
+ const data = await safeParseJson(response, 'mistral');
108
+ const message = data?.message ||
109
+ data?.detail ||
110
+ `HTTP ${response.status}`;
111
+ if (response.status === 401) {
112
+ return { valid: false, error: 'Invalid API key' };
113
+ }
114
+ return { valid: false, error: message };
115
+ }
116
+ /**
117
+ * Validate OpenAI API key.
118
+ */
119
+ async function validateOpenAIKey(apiKey) {
120
+ const response = await fetch(ENDPOINTS.openai, {
121
+ method: 'POST',
122
+ headers: {
123
+ 'Content-Type': 'application/json',
124
+ Authorization: `Bearer ${apiKey}`,
125
+ },
126
+ body: JSON.stringify({
127
+ model: 'text-embedding-3-small',
128
+ input: ['test'],
129
+ }),
130
+ });
131
+ if (response.ok) {
132
+ return { valid: true };
133
+ }
134
+ const data = await safeParseJson(response, 'openai');
135
+ const error = data?.error;
136
+ if (response.status === 401) {
137
+ return { valid: false, error: 'Invalid API key' };
138
+ }
139
+ if (error?.message) {
140
+ // Truncate long error messages
141
+ const msg = error.message;
142
+ return {
143
+ valid: false,
144
+ error: msg.length > 100 ? msg.slice(0, 100) + '...' : msg,
145
+ };
146
+ }
147
+ return { valid: false, error: `HTTP ${response.status}` };
148
+ }
@@ -10,6 +10,6 @@ export { loadManifest, saveManifest, manifestExists, createEmptyManifest, update
10
10
  export { Storage, SCHEMA_VERSION, createCodeChunksSchema, createEmbeddingCacheSchema, chunkToRow, rowToChunk, embeddingToRow, rowToEmbedding, type CodeChunk, type CodeChunkRow, type CachedEmbedding, type CachedEmbeddingRow, type ChunkType, } from './storage/index.js';
11
11
  export { MerkleTree, compareTrees, createEmptyDiff, computeFileHash, computeStringHash, computeDirectoryHash, isBinaryFile, shouldExclude, hasValidExtension, serializeNode, deserializeNode, createFileNode, createDirectoryNode, type MerkleNode, type NodeType, type SerializedNode, type TreeDiff, type BuildStats, } from './merkle/index.js';
12
12
  export { Chunker, Indexer, createEmptyIndexStats, type Chunk, type IndexOptions, type IndexStats, type ProgressCallback, type SupportedLanguage, } from './indexer/index.js';
13
- export { GeminiEmbeddingProvider, MistralEmbeddingProvider, OpenAIEmbeddingProvider, type EmbeddingProvider, } from './embeddings/index.js';
13
+ export { GeminiEmbeddingProvider, MistralEmbeddingProvider, OpenAIEmbeddingProvider, validateApiKey, type EmbeddingProvider, type ValidationResult, } from './embeddings/index.js';
14
14
  export { SearchEngine, vectorSearch, ftsSearch, ensureFtsIndex, hybridRerank, type SearchFilters, type SearchMode, type SearchOptions, type SearchResult, type SearchResults, } from './search/index.js';
15
15
  export { loadGitignore, shouldIgnore, createGitignoreFilter, clearGitignoreCache, clearAllGitignoreCache, } from './gitignore/index.js';
package/dist/rag/index.js CHANGED
@@ -18,7 +18,7 @@ export { MerkleTree, compareTrees, createEmptyDiff, computeFileHash, computeStri
18
18
  // Indexer (Chunking & Orchestration)
19
19
  export { Chunker, Indexer, createEmptyIndexStats, } from './indexer/index.js';
20
20
  // Embeddings
21
- export { GeminiEmbeddingProvider, MistralEmbeddingProvider, OpenAIEmbeddingProvider, } from './embeddings/index.js';
21
+ export { GeminiEmbeddingProvider, MistralEmbeddingProvider, OpenAIEmbeddingProvider, validateApiKey, } from './embeddings/index.js';
22
22
  // Search
23
23
  export { SearchEngine, vectorSearch, ftsSearch, ensureFtsIndex, hybridRerank, } from './search/index.js';
24
24
  // Gitignore
@@ -188,28 +188,40 @@ export class Chunker {
188
188
  await Parser.init();
189
189
  // Create parser instance after init
190
190
  this.parser = new Parser();
191
- // Resolve the path to tree-sitter-wasms/out/
192
- const wasmPackagePath = require.resolve('tree-sitter-wasms/package.json');
193
- this.wasmBasePath = path.join(path.dirname(wasmPackagePath), 'out');
194
- // Load all language grammars sequentially (skip null entries like Dart)
195
- // IMPORTANT: Must be sequential - web-tree-sitter has global state that
196
- // gets corrupted when loading multiple WASM modules in parallel.
197
- for (const [lang, wasmFile] of Object.entries(LANGUAGE_WASM_FILES)) {
198
- if (!wasmFile) {
199
- // Language temporarily disabled (e.g., Dart due to version mismatch)
200
- continue;
201
- }
202
- try {
203
- const wasmPath = path.join(this.wasmBasePath, wasmFile);
204
- const language = await Parser.Language.load(wasmPath);
205
- this.languages.set(lang, language);
191
+ try {
192
+ // Resolve the path to tree-sitter-wasms/out/
193
+ const wasmPackagePath = require.resolve('tree-sitter-wasms/package.json');
194
+ this.wasmBasePath = path.join(path.dirname(wasmPackagePath), 'out');
195
+ // Load all language grammars sequentially (skip null entries like Dart)
196
+ // IMPORTANT: Must be sequential - web-tree-sitter has global state that
197
+ // gets corrupted when loading multiple WASM modules in parallel.
198
+ for (const [lang, wasmFile] of Object.entries(LANGUAGE_WASM_FILES)) {
199
+ if (!wasmFile) {
200
+ // Language temporarily disabled (e.g., Dart due to version mismatch)
201
+ continue;
202
+ }
203
+ try {
204
+ const wasmPath = path.join(this.wasmBasePath, wasmFile);
205
+ const language = await Parser.Language.load(wasmPath);
206
+ this.languages.set(lang, language);
207
+ }
208
+ catch (error) {
209
+ // Log but don't fail - we can still work with other languages
210
+ console.error(`Failed to load ${lang} grammar:`, error);
211
+ }
206
212
  }
207
- catch (error) {
208
- // Log but don't fail - we can still work with other languages
209
- console.error(`Failed to load ${lang} grammar:`, error);
213
+ this.initialized = true;
214
+ }
215
+ catch (error) {
216
+ // Cleanup parser on failure to prevent resource leak
217
+ if (this.parser) {
218
+ this.parser.delete();
219
+ this.parser = null;
210
220
  }
221
+ this.wasmBasePath = null;
222
+ this.languages.clear();
223
+ throw error;
211
224
  }
212
- this.initialized = true;
213
225
  }
214
226
  /**
215
227
  * Get the language for a file extension.
@@ -33,11 +33,17 @@ export declare class Indexer {
33
33
  private chunker;
34
34
  private embeddings;
35
35
  private logger;
36
+ private indexPromise;
36
37
  constructor(projectRoot: string, logger?: Logger);
37
38
  /**
38
39
  * Run the indexing pipeline.
40
+ * Uses mutex to prevent concurrent index operations.
39
41
  */
40
42
  index(options?: IndexOptions): Promise<IndexStats>;
43
+ /**
44
+ * Perform the actual indexing operation.
45
+ */
46
+ private doIndex;
41
47
  /**
42
48
  * Create a diff that treats all files as new (for force reindex).
43
49
  */
@@ -48,6 +54,10 @@ export declare class Indexer {
48
54
  private collectAllFilesFromSerialized;
49
55
  /**
50
56
  * Process a batch of files: read, chunk, embed, and prepare CodeChunks.
57
+ *
58
+ * Error handling strategy:
59
+ * - File read/parse errors: Log and continue (file-specific, recoverable)
60
+ * - Embedding/storage errors: Let propagate (fatal, affects all files)
51
61
  */
52
62
  private processFileBatch;
53
63
  /**
@@ -62,13 +62,37 @@ export class Indexer {
62
62
  writable: true,
63
63
  value: null
64
64
  });
65
+ Object.defineProperty(this, "indexPromise", {
66
+ enumerable: true,
67
+ configurable: true,
68
+ writable: true,
69
+ value: null
70
+ });
65
71
  this.projectRoot = projectRoot;
66
72
  this.logger = logger ?? null;
67
73
  }
68
74
  /**
69
75
  * Run the indexing pipeline.
76
+ * Uses mutex to prevent concurrent index operations.
70
77
  */
71
78
  async index(options = {}) {
79
+ // If indexing is already in progress, wait for it
80
+ if (this.indexPromise) {
81
+ this.log('warn', 'Index already in progress, waiting for completion');
82
+ return this.indexPromise;
83
+ }
84
+ this.indexPromise = this.doIndex(options);
85
+ try {
86
+ return await this.indexPromise;
87
+ }
88
+ finally {
89
+ this.indexPromise = null;
90
+ }
91
+ }
92
+ /**
93
+ * Perform the actual indexing operation.
94
+ */
95
+ async doIndex(options = {}) {
72
96
  const stats = createEmptyIndexStats();
73
97
  const { force = false, progressCallback } = options;
74
98
  try {
@@ -193,68 +217,79 @@ export class Indexer {
193
217
  }
194
218
  /**
195
219
  * Process a batch of files: read, chunk, embed, and prepare CodeChunks.
220
+ *
221
+ * Error handling strategy:
222
+ * - File read/parse errors: Log and continue (file-specific, recoverable)
223
+ * - Embedding/storage errors: Let propagate (fatal, affects all files)
196
224
  */
197
225
  async processFileBatch(filepaths, chunker, embeddings, storage, stats) {
198
226
  const allChunks = [];
199
227
  for (const filepath of filepaths) {
228
+ // Phase 1: File reading and chunking (recoverable errors)
229
+ let content;
230
+ let fileHash;
231
+ let chunks;
200
232
  try {
201
233
  const absolutePath = path.join(this.projectRoot, filepath);
202
- const content = await fs.readFile(absolutePath, 'utf-8');
203
- const fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
234
+ content = await fs.readFile(absolutePath, 'utf-8');
235
+ fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
204
236
  // Chunk the file (with size limits from config)
205
- const chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
206
- // Check embedding cache for each chunk
207
- const contentHashes = chunks.map(c => c.contentHash);
208
- const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
209
- // Compute embeddings for cache misses
210
- const missingChunks = chunks.filter(c => !cachedEmbeddings.has(c.contentHash));
211
- if (missingChunks.length > 0) {
212
- // Embed contextHeader + text for semantic relevance
213
- const texts = missingChunks.map(c => c.contextHeader ? `${c.contextHeader}\n${c.text}` : c.text);
214
- const newEmbeddings = await embeddings.embed(texts);
215
- stats.embeddingsComputed += missingChunks.length;
216
- // Cache the new embeddings
217
- const cacheEntries = missingChunks.map((chunk, i) => ({
218
- contentHash: chunk.contentHash,
219
- vector: newEmbeddings[i],
220
- createdAt: new Date().toISOString(),
221
- }));
222
- await storage.cacheEmbeddings(cacheEntries);
223
- // Add to cachedEmbeddings map
224
- missingChunks.forEach((chunk, i) => {
225
- cachedEmbeddings.set(chunk.contentHash, newEmbeddings[i]);
226
- });
227
- }
228
- stats.embeddingsCached += chunks.length - missingChunks.length;
229
- // Build CodeChunk objects
230
- const filename = path.basename(filepath);
231
- const extension = path.extname(filepath);
232
- for (const chunk of chunks) {
233
- const vector = cachedEmbeddings.get(chunk.contentHash);
234
- allChunks.push({
235
- id: `${filepath}:${chunk.startLine}`,
236
- vector,
237
- text: chunk.text,
238
- contentHash: chunk.contentHash,
239
- filepath,
240
- filename,
241
- extension,
242
- type: chunk.type,
243
- name: chunk.name,
244
- startLine: chunk.startLine,
245
- endLine: chunk.endLine,
246
- fileHash,
247
- // New metadata fields from schema v2
248
- signature: chunk.signature,
249
- docstring: chunk.docstring,
250
- isExported: chunk.isExported,
251
- decoratorNames: chunk.decoratorNames,
252
- });
253
- }
237
+ chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
254
238
  }
255
239
  catch (error) {
256
- this.log('warn', `Failed to process file: ${filepath}`, error);
257
- // Continue with other files
240
+ // File-specific error (read/parse) - log and continue with other files
241
+ this.log('warn', `Failed to read/parse file: ${filepath}`, error);
242
+ continue;
243
+ }
244
+ // Phase 2: Embedding and storage (fatal errors - let propagate)
245
+ // NO try-catch here - API/storage errors should stop indexing
246
+ // Check embedding cache for each chunk
247
+ const contentHashes = chunks.map(c => c.contentHash);
248
+ const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
249
+ // Compute embeddings for cache misses
250
+ const missingChunks = chunks.filter(c => !cachedEmbeddings.has(c.contentHash));
251
+ if (missingChunks.length > 0) {
252
+ // Embed contextHeader + text for semantic relevance
253
+ const texts = missingChunks.map(c => c.contextHeader ? `${c.contextHeader}\n${c.text}` : c.text);
254
+ const newEmbeddings = await embeddings.embed(texts);
255
+ stats.embeddingsComputed += missingChunks.length;
256
+ // Cache the new embeddings
257
+ const cacheEntries = missingChunks.map((chunk, i) => ({
258
+ contentHash: chunk.contentHash,
259
+ vector: newEmbeddings[i],
260
+ createdAt: new Date().toISOString(),
261
+ }));
262
+ await storage.cacheEmbeddings(cacheEntries);
263
+ // Add to cachedEmbeddings map
264
+ missingChunks.forEach((chunk, i) => {
265
+ cachedEmbeddings.set(chunk.contentHash, newEmbeddings[i]);
266
+ });
267
+ }
268
+ stats.embeddingsCached += chunks.length - missingChunks.length;
269
+ // Build CodeChunk objects
270
+ const filename = path.basename(filepath);
271
+ const extension = path.extname(filepath);
272
+ for (const chunk of chunks) {
273
+ const vector = cachedEmbeddings.get(chunk.contentHash);
274
+ allChunks.push({
275
+ id: `${filepath}:${chunk.startLine}`,
276
+ vector,
277
+ text: chunk.text,
278
+ contentHash: chunk.contentHash,
279
+ filepath,
280
+ filename,
281
+ extension,
282
+ type: chunk.type,
283
+ name: chunk.name,
284
+ startLine: chunk.startLine,
285
+ endLine: chunk.endLine,
286
+ fileHash,
287
+ // New metadata fields from schema v2
288
+ signature: chunk.signature,
289
+ docstring: chunk.docstring,
290
+ isExported: chunk.isExported,
291
+ decoratorNames: chunk.decoratorNames,
292
+ });
258
293
  }
259
294
  }
260
295
  return allChunks;
@@ -24,6 +24,7 @@ export declare class SearchEngine {
24
24
  private embeddings;
25
25
  private logger;
26
26
  private initialized;
27
+ private initPromise;
27
28
  constructor(projectRoot: string, logger?: Logger);
28
29
  /**
29
30
  * Primary search method. Dispatches to appropriate search mode.
@@ -74,8 +75,13 @@ export declare class SearchEngine {
74
75
  warmup(): Promise<void>;
75
76
  /**
76
77
  * Initialize the search engine.
78
+ * Uses idempotent promise pattern to prevent race conditions.
77
79
  */
78
80
  private ensureInitialized;
81
+ /**
82
+ * Perform actual initialization.
83
+ */
84
+ private doInitialize;
79
85
  /**
80
86
  * Create the appropriate embedding provider based on config.
81
87
  */
@@ -64,6 +64,12 @@ export class SearchEngine {
64
64
  writable: true,
65
65
  value: false
66
66
  });
67
+ Object.defineProperty(this, "initPromise", {
68
+ enumerable: true,
69
+ configurable: true,
70
+ writable: true,
71
+ value: null
72
+ });
67
73
  this.projectRoot = projectRoot;
68
74
  this.logger = logger ?? null;
69
75
  }
@@ -315,19 +321,39 @@ export class SearchEngine {
315
321
  }
316
322
  /**
317
323
  * Initialize the search engine.
324
+ * Uses idempotent promise pattern to prevent race conditions.
318
325
  */
319
326
  async ensureInitialized() {
327
+ // Fast path: already initialized
320
328
  if (this.initialized)
321
329
  return;
322
- const config = await loadConfig(this.projectRoot);
323
- // Initialize storage
324
- this.storage = new Storage(this.projectRoot, config.embeddingDimensions);
325
- await this.storage.connect();
326
- // Initialize embeddings with config (includes apiKey for cloud providers)
327
- this.embeddings = this.createEmbeddingProvider(config);
328
- await this.embeddings.initialize();
329
- this.initialized = true;
330
- this.log('info', 'SearchEngine initialized');
330
+ // Idempotent: return existing promise if initialization in progress
331
+ if (this.initPromise)
332
+ return this.initPromise;
333
+ // Start initialization and store promise
334
+ this.initPromise = this.doInitialize();
335
+ return this.initPromise;
336
+ }
337
+ /**
338
+ * Perform actual initialization.
339
+ */
340
+ async doInitialize() {
341
+ try {
342
+ const config = await loadConfig(this.projectRoot);
343
+ // Initialize storage
344
+ this.storage = new Storage(this.projectRoot, config.embeddingDimensions);
345
+ await this.storage.connect();
346
+ // Initialize embeddings with config (includes apiKey for cloud providers)
347
+ this.embeddings = this.createEmbeddingProvider(config);
348
+ await this.embeddings.initialize();
349
+ this.initialized = true;
350
+ this.log('info', 'SearchEngine initialized');
351
+ }
352
+ catch (error) {
353
+ // Reset promise on failure to allow retry
354
+ this.initPromise = null;
355
+ throw error;
356
+ }
331
357
  }
332
358
  /**
333
359
  * Create the appropriate embedding provider based on config.
@@ -15,8 +15,14 @@ export declare class Storage {
15
15
  /**
16
16
  * Connect to the LanceDB database.
17
17
  * Creates tables if they don't exist.
18
+ * Validates that existing tables have matching dimensions.
18
19
  */
19
20
  connect(): Promise<void>;
21
+ /**
22
+ * Get the vector column dimensions from a table schema.
23
+ * Returns null if vector column not found.
24
+ */
25
+ private getTableVectorDimensions;
20
26
  /**
21
27
  * Close the database connection.
22
28
  */
@@ -26,6 +32,14 @@ export declare class Storage {
26
32
  * Note: chunksTable may be null after resetChunksTable().
27
33
  */
28
34
  private ensureConnected;
35
+ /**
36
+ * Get the database connection with a clear error if not connected.
37
+ */
38
+ private getDb;
39
+ /**
40
+ * Get the cache table with a clear error if not connected.
41
+ */
42
+ private getCacheTable;
29
43
  /**
30
44
  * Upsert chunks into the database.
31
45
  * Uses merge insert to update existing chunks or add new ones.
@@ -34,7 +48,6 @@ export declare class Storage {
34
48
  /**
35
49
  * Add chunks to the database (no merge, just insert).
36
50
  * Use this after resetChunksTable() to avoid schema mismatch issues.
37
- * Creates the table from data if it doesn't exist.
38
51
  */
39
52
  addChunks(chunks: CodeChunk[]): Promise<void>;
40
53
  /**
@@ -87,6 +100,7 @@ export declare class Storage {
87
100
  clearCache(): Promise<void>;
88
101
  /**
89
102
  * Get the chunks table for direct querying (e.g., search).
103
+ * @throws Error if table doesn't exist (not indexed yet)
90
104
  */
91
105
  getChunksTable(): Table;
92
106
  }