viberag 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/LICENSE +661 -0
  2. package/README.md +219 -0
  3. package/dist/cli/__tests__/mcp-setup.test.d.ts +6 -0
  4. package/dist/cli/__tests__/mcp-setup.test.js +597 -0
  5. package/dist/cli/app.d.ts +2 -0
  6. package/dist/cli/app.js +238 -0
  7. package/dist/cli/commands/handlers.d.ts +57 -0
  8. package/dist/cli/commands/handlers.js +231 -0
  9. package/dist/cli/commands/index.d.ts +2 -0
  10. package/dist/cli/commands/index.js +2 -0
  11. package/dist/cli/commands/mcp-setup.d.ts +107 -0
  12. package/dist/cli/commands/mcp-setup.js +509 -0
  13. package/dist/cli/commands/useRagCommands.d.ts +23 -0
  14. package/dist/cli/commands/useRagCommands.js +180 -0
  15. package/dist/cli/components/CleanWizard.d.ts +17 -0
  16. package/dist/cli/components/CleanWizard.js +169 -0
  17. package/dist/cli/components/InitWizard.d.ts +20 -0
  18. package/dist/cli/components/InitWizard.js +370 -0
  19. package/dist/cli/components/McpSetupWizard.d.ts +37 -0
  20. package/dist/cli/components/McpSetupWizard.js +387 -0
  21. package/dist/cli/components/SearchResultsDisplay.d.ts +13 -0
  22. package/dist/cli/components/SearchResultsDisplay.js +130 -0
  23. package/dist/cli/components/WelcomeBanner.d.ts +10 -0
  24. package/dist/cli/components/WelcomeBanner.js +26 -0
  25. package/dist/cli/components/index.d.ts +1 -0
  26. package/dist/cli/components/index.js +1 -0
  27. package/dist/cli/data/mcp-editors.d.ts +80 -0
  28. package/dist/cli/data/mcp-editors.js +270 -0
  29. package/dist/cli/index.d.ts +2 -0
  30. package/dist/cli/index.js +26 -0
  31. package/dist/cli-bundle.cjs +5269 -0
  32. package/dist/common/commands/terminalSetup.d.ts +2 -0
  33. package/dist/common/commands/terminalSetup.js +144 -0
  34. package/dist/common/components/CommandSuggestions.d.ts +9 -0
  35. package/dist/common/components/CommandSuggestions.js +20 -0
  36. package/dist/common/components/StaticWithResize.d.ts +23 -0
  37. package/dist/common/components/StaticWithResize.js +62 -0
  38. package/dist/common/components/StatusBar.d.ts +8 -0
  39. package/dist/common/components/StatusBar.js +64 -0
  40. package/dist/common/components/TextInput.d.ts +12 -0
  41. package/dist/common/components/TextInput.js +239 -0
  42. package/dist/common/components/index.d.ts +3 -0
  43. package/dist/common/components/index.js +3 -0
  44. package/dist/common/hooks/index.d.ts +4 -0
  45. package/dist/common/hooks/index.js +4 -0
  46. package/dist/common/hooks/useCommandHistory.d.ts +7 -0
  47. package/dist/common/hooks/useCommandHistory.js +51 -0
  48. package/dist/common/hooks/useCtrlC.d.ts +9 -0
  49. package/dist/common/hooks/useCtrlC.js +40 -0
  50. package/dist/common/hooks/useKittyKeyboard.d.ts +10 -0
  51. package/dist/common/hooks/useKittyKeyboard.js +26 -0
  52. package/dist/common/hooks/useStaticOutputBuffer.d.ts +31 -0
  53. package/dist/common/hooks/useStaticOutputBuffer.js +58 -0
  54. package/dist/common/hooks/useTerminalResize.d.ts +28 -0
  55. package/dist/common/hooks/useTerminalResize.js +51 -0
  56. package/dist/common/hooks/useTextBuffer.d.ts +13 -0
  57. package/dist/common/hooks/useTextBuffer.js +165 -0
  58. package/dist/common/index.d.ts +13 -0
  59. package/dist/common/index.js +17 -0
  60. package/dist/common/types.d.ts +162 -0
  61. package/dist/common/types.js +1 -0
  62. package/dist/mcp/index.d.ts +12 -0
  63. package/dist/mcp/index.js +66 -0
  64. package/dist/mcp/server.d.ts +25 -0
  65. package/dist/mcp/server.js +837 -0
  66. package/dist/mcp/watcher.d.ts +86 -0
  67. package/dist/mcp/watcher.js +334 -0
  68. package/dist/rag/__tests__/grammar-smoke.test.d.ts +9 -0
  69. package/dist/rag/__tests__/grammar-smoke.test.js +161 -0
  70. package/dist/rag/__tests__/helpers.d.ts +30 -0
  71. package/dist/rag/__tests__/helpers.js +67 -0
  72. package/dist/rag/__tests__/merkle.test.d.ts +5 -0
  73. package/dist/rag/__tests__/merkle.test.js +161 -0
  74. package/dist/rag/__tests__/metadata-extraction.test.d.ts +10 -0
  75. package/dist/rag/__tests__/metadata-extraction.test.js +202 -0
  76. package/dist/rag/__tests__/multi-language.test.d.ts +13 -0
  77. package/dist/rag/__tests__/multi-language.test.js +535 -0
  78. package/dist/rag/__tests__/rag.test.d.ts +10 -0
  79. package/dist/rag/__tests__/rag.test.js +311 -0
  80. package/dist/rag/__tests__/search-exhaustive.test.d.ts +9 -0
  81. package/dist/rag/__tests__/search-exhaustive.test.js +87 -0
  82. package/dist/rag/__tests__/search-filters.test.d.ts +10 -0
  83. package/dist/rag/__tests__/search-filters.test.js +250 -0
  84. package/dist/rag/__tests__/search-modes.test.d.ts +8 -0
  85. package/dist/rag/__tests__/search-modes.test.js +133 -0
  86. package/dist/rag/config/index.d.ts +61 -0
  87. package/dist/rag/config/index.js +111 -0
  88. package/dist/rag/constants.d.ts +41 -0
  89. package/dist/rag/constants.js +57 -0
  90. package/dist/rag/embeddings/fastembed.d.ts +62 -0
  91. package/dist/rag/embeddings/fastembed.js +124 -0
  92. package/dist/rag/embeddings/gemini.d.ts +26 -0
  93. package/dist/rag/embeddings/gemini.js +116 -0
  94. package/dist/rag/embeddings/index.d.ts +10 -0
  95. package/dist/rag/embeddings/index.js +9 -0
  96. package/dist/rag/embeddings/local-4b.d.ts +28 -0
  97. package/dist/rag/embeddings/local-4b.js +51 -0
  98. package/dist/rag/embeddings/local.d.ts +29 -0
  99. package/dist/rag/embeddings/local.js +119 -0
  100. package/dist/rag/embeddings/mistral.d.ts +22 -0
  101. package/dist/rag/embeddings/mistral.js +85 -0
  102. package/dist/rag/embeddings/openai.d.ts +22 -0
  103. package/dist/rag/embeddings/openai.js +85 -0
  104. package/dist/rag/embeddings/types.d.ts +37 -0
  105. package/dist/rag/embeddings/types.js +1 -0
  106. package/dist/rag/gitignore/index.d.ts +57 -0
  107. package/dist/rag/gitignore/index.js +178 -0
  108. package/dist/rag/index.d.ts +15 -0
  109. package/dist/rag/index.js +25 -0
  110. package/dist/rag/indexer/chunker.d.ts +129 -0
  111. package/dist/rag/indexer/chunker.js +1352 -0
  112. package/dist/rag/indexer/index.d.ts +6 -0
  113. package/dist/rag/indexer/index.js +6 -0
  114. package/dist/rag/indexer/indexer.d.ts +73 -0
  115. package/dist/rag/indexer/indexer.js +356 -0
  116. package/dist/rag/indexer/types.d.ts +68 -0
  117. package/dist/rag/indexer/types.js +47 -0
  118. package/dist/rag/logger/index.d.ts +20 -0
  119. package/dist/rag/logger/index.js +75 -0
  120. package/dist/rag/manifest/index.d.ts +50 -0
  121. package/dist/rag/manifest/index.js +97 -0
  122. package/dist/rag/merkle/diff.d.ts +26 -0
  123. package/dist/rag/merkle/diff.js +95 -0
  124. package/dist/rag/merkle/hash.d.ts +34 -0
  125. package/dist/rag/merkle/hash.js +165 -0
  126. package/dist/rag/merkle/index.d.ts +68 -0
  127. package/dist/rag/merkle/index.js +298 -0
  128. package/dist/rag/merkle/node.d.ts +51 -0
  129. package/dist/rag/merkle/node.js +69 -0
  130. package/dist/rag/search/filters.d.ts +21 -0
  131. package/dist/rag/search/filters.js +100 -0
  132. package/dist/rag/search/fts.d.ts +32 -0
  133. package/dist/rag/search/fts.js +61 -0
  134. package/dist/rag/search/hybrid.d.ts +17 -0
  135. package/dist/rag/search/hybrid.js +58 -0
  136. package/dist/rag/search/index.d.ts +89 -0
  137. package/dist/rag/search/index.js +367 -0
  138. package/dist/rag/search/types.d.ts +130 -0
  139. package/dist/rag/search/types.js +4 -0
  140. package/dist/rag/search/vector.d.ts +25 -0
  141. package/dist/rag/search/vector.js +44 -0
  142. package/dist/rag/storage/index.d.ts +92 -0
  143. package/dist/rag/storage/index.js +287 -0
  144. package/dist/rag/storage/lancedb-native.d.ts +7 -0
  145. package/dist/rag/storage/lancedb-native.js +10 -0
  146. package/dist/rag/storage/schema.d.ts +23 -0
  147. package/dist/rag/storage/schema.js +50 -0
  148. package/dist/rag/storage/types.d.ts +100 -0
  149. package/dist/rag/storage/types.js +68 -0
  150. package/package.json +67 -0
  151. package/scripts/check-node-version.js +37 -0
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Vector similarity search using LanceDB.
3
+ */
4
+ /**
5
+ * Perform vector similarity search.
6
+ *
7
+ * @param table - LanceDB table to search
8
+ * @param queryVector - Query embedding vector
9
+ * @param options - Search options
10
+ * @returns Array of search results with vector scores
11
+ */
12
+ export async function vectorSearch(table, queryVector, options) {
13
+ // Support legacy signature: vectorSearch(table, vector, limit)
14
+ const opts = typeof options === 'number' ? { limit: options } : options;
15
+ let query = table.search(queryVector).limit(opts.limit);
16
+ // Apply filter if provided
17
+ if (opts.filterClause) {
18
+ query = query.where(opts.filterClause);
19
+ }
20
+ const results = await query.toArray();
21
+ return results
22
+ .map(row => {
23
+ const chunk = row;
24
+ // LanceDB returns _distance (lower is better for L2/cosine)
25
+ // Convert to similarity score (higher is better)
26
+ const distance = chunk._distance ?? 0;
27
+ const vectorScore = 1 / (1 + distance);
28
+ return {
29
+ id: chunk.id,
30
+ text: chunk.text,
31
+ filepath: chunk.filepath,
32
+ filename: chunk.filename,
33
+ name: chunk.name,
34
+ type: chunk.type,
35
+ startLine: chunk.start_line,
36
+ endLine: chunk.end_line,
37
+ score: vectorScore,
38
+ vectorScore,
39
+ signature: chunk.signature,
40
+ isExported: chunk.is_exported,
41
+ };
42
+ })
43
+ .filter(r => !opts.minScore || r.score >= opts.minScore);
44
+ }
@@ -0,0 +1,92 @@
1
+ import type { Table } from '@lancedb/lancedb';
2
+ import { type CachedEmbedding, type CodeChunk } from './types.js';
3
+ export * from './types.js';
4
+ export * from './schema.js';
5
+ /**
6
+ * Storage layer wrapping LanceDB for code chunks and embedding cache.
7
+ */
8
+ export declare class Storage {
9
+ private readonly projectRoot;
10
+ private readonly dimensions;
11
+ private db;
12
+ private chunksTable;
13
+ private cacheTable;
14
+ constructor(projectRoot: string, dimensions?: number);
15
+ /**
16
+ * Connect to the LanceDB database.
17
+ * Creates tables if they don't exist.
18
+ */
19
+ connect(): Promise<void>;
20
+ /**
21
+ * Close the database connection.
22
+ */
23
+ close(): void;
24
+ /**
25
+ * Ensure we're connected.
26
+ * Note: chunksTable may be null after resetChunksTable().
27
+ */
28
+ private ensureConnected;
29
+ /**
30
+ * Upsert chunks into the database.
31
+ * Uses merge insert to update existing chunks or add new ones.
32
+ */
33
+ upsertChunks(chunks: CodeChunk[]): Promise<void>;
34
+ /**
35
+ * Add chunks to the database (no merge, just insert).
36
+ * Use this after resetChunksTable() to avoid schema mismatch issues.
37
+ * Creates the table from data if it doesn't exist.
38
+ */
39
+ addChunks(chunks: CodeChunk[]): Promise<void>;
40
+ /**
41
+ * Delete all chunks for a specific file.
42
+ * @returns Number of chunks deleted
43
+ */
44
+ deleteChunksByFilepath(filepath: string): Promise<number>;
45
+ /**
46
+ * Delete all chunks for multiple files.
47
+ * @returns Number of chunks deleted
48
+ */
49
+ deleteChunksByFilepaths(filepaths: string[]): Promise<number>;
50
+ /**
51
+ * Get all chunks for a specific file.
52
+ */
53
+ getChunksByFilepath(filepath: string): Promise<CodeChunk[]>;
54
+ /**
55
+ * Get all unique filepaths in the database.
56
+ */
57
+ getAllFilepaths(): Promise<Set<string>>;
58
+ /**
59
+ * Count total number of chunks.
60
+ */
61
+ countChunks(): Promise<number>;
62
+ /**
63
+ * Get cached embeddings for a list of content hashes.
64
+ * @returns Map from content hash to vector
65
+ */
66
+ getCachedEmbeddings(hashes: string[]): Promise<Map<string, number[]>>;
67
+ /**
68
+ * Cache embeddings for future use.
69
+ */
70
+ cacheEmbeddings(entries: CachedEmbedding[]): Promise<void>;
71
+ /**
72
+ * Count total number of cached embeddings.
73
+ */
74
+ countCachedEmbeddings(): Promise<number>;
75
+ /**
76
+ * Clear all chunks but keep the embedding cache.
77
+ */
78
+ clearAll(): Promise<void>;
79
+ /**
80
+ * Drop and recreate the chunks table.
81
+ * Use this for force reindex to avoid schema mismatch issues.
82
+ */
83
+ resetChunksTable(): Promise<void>;
84
+ /**
85
+ * Clear the embedding cache.
86
+ */
87
+ clearCache(): Promise<void>;
88
+ /**
89
+ * Get the chunks table for direct querying (e.g., search).
90
+ */
91
+ getChunksTable(): Table;
92
+ }
@@ -0,0 +1,287 @@
1
+ import * as lancedb from '@lancedb/lancedb';
2
+ import { makeArrowTable } from '@lancedb/lancedb';
3
+ import { getLanceDbPath, TABLE_NAMES } from '../constants.js';
4
+ import { createCodeChunksSchema, createEmbeddingCacheSchema } from './schema.js';
5
+ import { chunkToRow, embeddingToRow, rowToChunk, } from './types.js';
6
+ export * from './types.js';
7
+ export * from './schema.js';
8
+ /**
9
+ * Storage layer wrapping LanceDB for code chunks and embedding cache.
10
+ */
11
+ export class Storage {
12
+ constructor(projectRoot, dimensions = 768) {
13
+ Object.defineProperty(this, "projectRoot", {
14
+ enumerable: true,
15
+ configurable: true,
16
+ writable: true,
17
+ value: void 0
18
+ });
19
+ Object.defineProperty(this, "dimensions", {
20
+ enumerable: true,
21
+ configurable: true,
22
+ writable: true,
23
+ value: void 0
24
+ });
25
+ Object.defineProperty(this, "db", {
26
+ enumerable: true,
27
+ configurable: true,
28
+ writable: true,
29
+ value: null
30
+ });
31
+ Object.defineProperty(this, "chunksTable", {
32
+ enumerable: true,
33
+ configurable: true,
34
+ writable: true,
35
+ value: null
36
+ });
37
+ Object.defineProperty(this, "cacheTable", {
38
+ enumerable: true,
39
+ configurable: true,
40
+ writable: true,
41
+ value: null
42
+ });
43
+ this.projectRoot = projectRoot;
44
+ this.dimensions = dimensions;
45
+ }
46
+ /**
47
+ * Connect to the LanceDB database.
48
+ * Creates tables if they don't exist.
49
+ */
50
+ async connect() {
51
+ const dbPath = getLanceDbPath(this.projectRoot);
52
+ this.db = await lancedb.connect(dbPath);
53
+ // Get existing table names
54
+ const tableNames = await this.db.tableNames();
55
+ // Open or create code_chunks table
56
+ if (tableNames.includes(TABLE_NAMES.CODE_CHUNKS)) {
57
+ this.chunksTable = await this.db.openTable(TABLE_NAMES.CODE_CHUNKS);
58
+ }
59
+ else {
60
+ const schema = createCodeChunksSchema(this.dimensions);
61
+ this.chunksTable = await this.db.createEmptyTable(TABLE_NAMES.CODE_CHUNKS, schema);
62
+ }
63
+ // Open or create embedding_cache table
64
+ if (tableNames.includes(TABLE_NAMES.EMBEDDING_CACHE)) {
65
+ this.cacheTable = await this.db.openTable(TABLE_NAMES.EMBEDDING_CACHE);
66
+ }
67
+ else {
68
+ const schema = createEmbeddingCacheSchema(this.dimensions);
69
+ this.cacheTable = await this.db.createEmptyTable(TABLE_NAMES.EMBEDDING_CACHE, schema);
70
+ }
71
+ }
72
+ /**
73
+ * Close the database connection.
74
+ */
75
+ close() {
76
+ // LanceDB connections don't need explicit closing in the JS SDK
77
+ this.db = null;
78
+ this.chunksTable = null;
79
+ this.cacheTable = null;
80
+ }
81
+ /**
82
+ * Ensure we're connected.
83
+ * Note: chunksTable may be null after resetChunksTable().
84
+ */
85
+ ensureConnected() {
86
+ if (!this.db || !this.cacheTable) {
87
+ throw new Error('Storage not connected. Call connect() first.');
88
+ }
89
+ }
90
+ // ============================================================
91
+ // Chunk Operations
92
+ // ============================================================
93
+ /**
94
+ * Upsert chunks into the database.
95
+ * Uses merge insert to update existing chunks or add new ones.
96
+ */
97
+ async upsertChunks(chunks) {
98
+ this.ensureConnected();
99
+ if (chunks.length === 0)
100
+ return;
101
+ const rows = chunks.map(chunkToRow);
102
+ // Use merge insert for upsert behavior
103
+ await this.chunksTable.mergeInsert('id')
104
+ .whenMatchedUpdateAll()
105
+ .whenNotMatchedInsertAll()
106
+ .execute(rows);
107
+ }
108
+ /**
109
+ * Add chunks to the database (no merge, just insert).
110
+ * Use this after resetChunksTable() to avoid schema mismatch issues.
111
+ * Creates the table from data if it doesn't exist.
112
+ */
113
+ async addChunks(chunks) {
114
+ this.ensureConnected();
115
+ if (chunks.length === 0)
116
+ return;
117
+ const rows = chunks.map(chunkToRow);
118
+ const schema = createCodeChunksSchema(this.dimensions);
119
+ // Use makeArrowTable to properly convert data with schema
120
+ const arrowTable = makeArrowTable(rows, { schema });
121
+ // If table was reset (null), create from Arrow table
122
+ if (!this.chunksTable) {
123
+ this.chunksTable = await this.db.createTable(TABLE_NAMES.CODE_CHUNKS, arrowTable);
124
+ }
125
+ else {
126
+ await this.chunksTable.add(arrowTable);
127
+ }
128
+ }
129
+ /**
130
+ * Delete all chunks for a specific file.
131
+ * @returns Number of chunks deleted
132
+ */
133
+ async deleteChunksByFilepath(filepath) {
134
+ this.ensureConnected();
135
+ const countBefore = await this.chunksTable.countRows();
136
+ await this.chunksTable.delete(`filepath = '${escapeString(filepath)}'`);
137
+ const countAfter = await this.chunksTable.countRows();
138
+ return countBefore - countAfter;
139
+ }
140
+ /**
141
+ * Delete all chunks for multiple files.
142
+ * @returns Number of chunks deleted
143
+ */
144
+ async deleteChunksByFilepaths(filepaths) {
145
+ this.ensureConnected();
146
+ if (filepaths.length === 0)
147
+ return 0;
148
+ const countBefore = await this.chunksTable.countRows();
149
+ // Build IN clause with escaped strings
150
+ const escaped = filepaths.map(fp => `'${escapeString(fp)}'`).join(', ');
151
+ await this.chunksTable.delete(`filepath IN (${escaped})`);
152
+ const countAfter = await this.chunksTable.countRows();
153
+ return countBefore - countAfter;
154
+ }
155
+ /**
156
+ * Get all chunks for a specific file.
157
+ */
158
+ async getChunksByFilepath(filepath) {
159
+ this.ensureConnected();
160
+ const results = await this.chunksTable.query()
161
+ .where(`filepath = '${escapeString(filepath)}'`)
162
+ .toArray();
163
+ return results.map(row => rowToChunk(row));
164
+ }
165
+ /**
166
+ * Get all unique filepaths in the database.
167
+ */
168
+ async getAllFilepaths() {
169
+ this.ensureConnected();
170
+ // Query all rows but only need filepath column
171
+ const results = await this.chunksTable.query()
172
+ .select(['filepath'])
173
+ .toArray();
174
+ const filepaths = new Set();
175
+ for (const row of results) {
176
+ filepaths.add(row.filepath);
177
+ }
178
+ return filepaths;
179
+ }
180
+ /**
181
+ * Count total number of chunks.
182
+ */
183
+ async countChunks() {
184
+ this.ensureConnected();
185
+ return this.chunksTable.countRows();
186
+ }
187
+ // ============================================================
188
+ // Cache Operations
189
+ // ============================================================
190
+ /**
191
+ * Get cached embeddings for a list of content hashes.
192
+ * @returns Map from content hash to vector
193
+ */
194
+ async getCachedEmbeddings(hashes) {
195
+ this.ensureConnected();
196
+ if (hashes.length === 0)
197
+ return new Map();
198
+ // Build IN clause
199
+ const escaped = hashes.map(h => `'${escapeString(h)}'`).join(', ');
200
+ const results = await this.cacheTable.query()
201
+ .where(`content_hash IN (${escaped})`)
202
+ .toArray();
203
+ const cache = new Map();
204
+ for (const row of results) {
205
+ const typed = row;
206
+ // Ensure vector is a plain array (LanceDB may return typed arrays)
207
+ const vector = Array.isArray(typed.vector)
208
+ ? typed.vector
209
+ : Array.from(typed.vector);
210
+ cache.set(typed.content_hash, vector);
211
+ }
212
+ return cache;
213
+ }
214
+ /**
215
+ * Cache embeddings for future use.
216
+ */
217
+ async cacheEmbeddings(entries) {
218
+ this.ensureConnected();
219
+ if (entries.length === 0)
220
+ return;
221
+ const rows = entries.map(embeddingToRow);
222
+ // Use merge insert for upsert behavior
223
+ await this.cacheTable.mergeInsert('content_hash')
224
+ .whenMatchedUpdateAll()
225
+ .whenNotMatchedInsertAll()
226
+ .execute(rows);
227
+ }
228
+ /**
229
+ * Count total number of cached embeddings.
230
+ */
231
+ async countCachedEmbeddings() {
232
+ this.ensureConnected();
233
+ return this.cacheTable.countRows();
234
+ }
235
+ // ============================================================
236
+ // Maintenance Operations
237
+ // ============================================================
238
+ /**
239
+ * Clear all chunks but keep the embedding cache.
240
+ */
241
+ async clearAll() {
242
+ this.ensureConnected();
243
+ // Delete all rows from chunks table
244
+ // LanceDB doesn't have a truncate, so we delete all
245
+ const count = await this.chunksTable.countRows();
246
+ if (count > 0) {
247
+ // Delete with a condition that matches all rows
248
+ await this.chunksTable.delete('id IS NOT NULL');
249
+ }
250
+ }
251
+ /**
252
+ * Drop and recreate the chunks table.
253
+ * Use this for force reindex to avoid schema mismatch issues.
254
+ */
255
+ async resetChunksTable() {
256
+ this.ensureConnected();
257
+ // Drop existing table
258
+ await this.db.dropTable(TABLE_NAMES.CODE_CHUNKS);
259
+ // Don't pre-create - let it be created from first data insert
260
+ // This avoids Arrow schema mismatch issues
261
+ this.chunksTable = null;
262
+ }
263
+ /**
264
+ * Clear the embedding cache.
265
+ */
266
+ async clearCache() {
267
+ this.ensureConnected();
268
+ const count = await this.cacheTable.countRows();
269
+ if (count > 0) {
270
+ await this.cacheTable.delete('content_hash IS NOT NULL');
271
+ }
272
+ }
273
+ /**
274
+ * Get the chunks table for direct querying (e.g., search).
275
+ */
276
+ getChunksTable() {
277
+ this.ensureConnected();
278
+ return this.chunksTable;
279
+ }
280
+ }
281
+ /**
282
+ * Escape a string for use in SQL-like LanceDB filter expressions.
283
+ * Escapes single quotes by doubling them.
284
+ */
285
+ function escapeString(s) {
286
+ return s.replace(/'/g, "''");
287
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Force bundler to include LanceDB native module.
3
+ * This file exists to make Bun's bundler statically analyze and include
4
+ * the LanceDB native binding, which is normally loaded dynamically.
5
+ */
6
+ import '@lancedb/lancedb-darwin-arm64';
7
+ export * from '@lancedb/lancedb';
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Force bundler to include LanceDB native module.
3
+ * This file exists to make Bun's bundler statically analyze and include
4
+ * the LanceDB native binding, which is normally loaded dynamically.
5
+ */
6
+ // Force bundler to include the darwin-arm64 native module
7
+ // @ts-ignore - native module
8
+ import '@lancedb/lancedb-darwin-arm64';
9
+ // Re-export everything from lancedb
10
+ export * from '@lancedb/lancedb';
@@ -0,0 +1,23 @@
1
+ import { Schema } from 'apache-arrow';
2
+ /**
3
+ * Current schema version. Increment when schema changes require reindex.
4
+ */
5
+ export declare const SCHEMA_VERSION = 2;
6
+ /**
7
+ * Arrow schema for the code_chunks table.
8
+ *
9
+ * Stores indexed code chunks with their embeddings.
10
+ *
11
+ * Schema v2 adds:
12
+ * - signature: Function/method signature line
13
+ * - docstring: Extracted documentation
14
+ * - is_exported: Whether symbol is exported
15
+ * - decorator_names: Comma-separated decorator names
16
+ */
17
+ export declare function createCodeChunksSchema(dimensions?: number): Schema;
18
+ /**
19
+ * Arrow schema for the embedding_cache table.
20
+ *
21
+ * Content-addressed cache for embeddings to avoid recomputation.
22
+ */
23
+ export declare function createEmbeddingCacheSchema(dimensions?: number): Schema;
@@ -0,0 +1,50 @@
1
+ import { Field, FixedSizeList, Float32, Int32, Schema, Utf8, Bool, } from 'apache-arrow';
2
+ import { DEFAULT_EMBEDDING_DIMENSIONS } from '../constants.js';
3
+ /**
4
+ * Current schema version. Increment when schema changes require reindex.
5
+ */
6
+ export const SCHEMA_VERSION = 2;
7
+ /**
8
+ * Arrow schema for the code_chunks table.
9
+ *
10
+ * Stores indexed code chunks with their embeddings.
11
+ *
12
+ * Schema v2 adds:
13
+ * - signature: Function/method signature line
14
+ * - docstring: Extracted documentation
15
+ * - is_exported: Whether symbol is exported
16
+ * - decorator_names: Comma-separated decorator names
17
+ */
18
+ export function createCodeChunksSchema(dimensions = DEFAULT_EMBEDDING_DIMENSIONS) {
19
+ return new Schema([
20
+ new Field('id', new Utf8(), false), // "{filepath}:{startLine}"
21
+ new Field('vector', new FixedSizeList(dimensions, new Field('item', new Float32(), false)), false),
22
+ new Field('text', new Utf8(), false),
23
+ new Field('content_hash', new Utf8(), false),
24
+ new Field('filepath', new Utf8(), false),
25
+ new Field('filename', new Utf8(), false),
26
+ new Field('extension', new Utf8(), false),
27
+ new Field('type', new Utf8(), false), // function/class/method/module
28
+ new Field('name', new Utf8(), false),
29
+ new Field('start_line', new Int32(), false),
30
+ new Field('end_line', new Int32(), false),
31
+ new Field('file_hash', new Utf8(), false),
32
+ // New in schema v2: deterministic AST-derived metadata
33
+ new Field('signature', new Utf8(), true), // Function/class signature line (nullable)
34
+ new Field('docstring', new Utf8(), true), // Extracted documentation (nullable)
35
+ new Field('is_exported', new Bool(), false), // Has export modifier
36
+ new Field('decorator_names', new Utf8(), true), // Comma-separated decorators (nullable)
37
+ ]);
38
+ }
39
+ /**
40
+ * Arrow schema for the embedding_cache table.
41
+ *
42
+ * Content-addressed cache for embeddings to avoid recomputation.
43
+ */
44
+ export function createEmbeddingCacheSchema(dimensions = DEFAULT_EMBEDDING_DIMENSIONS) {
45
+ return new Schema([
46
+ new Field('content_hash', new Utf8(), false), // Primary key
47
+ new Field('vector', new FixedSizeList(dimensions, new Field('item', new Float32(), false)), false),
48
+ new Field('created_at', new Utf8(), false), // ISO timestamp
49
+ ]);
50
+ }
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Types of code chunks extracted by tree-sitter.
3
+ */
4
+ export type ChunkType = 'function' | 'class' | 'method' | 'module';
5
+ /**
6
+ * A code chunk stored in LanceDB.
7
+ * Represents a semantic unit of code (function, class, method, or module).
8
+ */
9
+ export interface CodeChunk {
10
+ /** Unique ID: "{filepath}:{startLine}" */
11
+ id: string;
12
+ /** Embedding vector (768 dimensions for Jina code embeddings) */
13
+ vector: number[];
14
+ /** Source code content */
15
+ text: string;
16
+ /** SHA256 hash of the text content */
17
+ contentHash: string;
18
+ /** Relative file path from project root */
19
+ filepath: string;
20
+ /** Just the filename (e.g., "utils.py") */
21
+ filename: string;
22
+ /** File extension (e.g., ".py") */
23
+ extension: string;
24
+ /** Chunk type: function, class, method, or module */
25
+ type: ChunkType;
26
+ /** Symbol name (empty for module chunks) */
27
+ name: string;
28
+ /** Start line number (1-indexed) */
29
+ startLine: number;
30
+ /** End line number (1-indexed) */
31
+ endLine: number;
32
+ /** SHA256 hash of the entire source file */
33
+ fileHash: string;
34
+ /** Function/method signature line (null for module/class without params) */
35
+ signature: string | null;
36
+ /** Extracted documentation (JSDoc, docstring, etc.) */
37
+ docstring: string | null;
38
+ /** Whether symbol has export modifier (JS/TS) or is in __all__ (Python) */
39
+ isExported: boolean;
40
+ /** Comma-separated decorator/annotation names (null if none) */
41
+ decoratorNames: string | null;
42
+ }
43
+ /**
44
+ * Row format for LanceDB code_chunks table.
45
+ * Uses snake_case to match Arrow/LanceDB conventions.
46
+ */
47
+ export interface CodeChunkRow {
48
+ id: string;
49
+ vector: number[];
50
+ text: string;
51
+ content_hash: string;
52
+ filepath: string;
53
+ filename: string;
54
+ extension: string;
55
+ type: string;
56
+ name: string;
57
+ start_line: number;
58
+ end_line: number;
59
+ file_hash: string;
60
+ signature: string | null;
61
+ docstring: string | null;
62
+ is_exported: boolean;
63
+ decorator_names: string | null;
64
+ }
65
+ /**
66
+ * A cached embedding stored in LanceDB.
67
+ * Content-addressed by the hash of the text.
68
+ */
69
+ export interface CachedEmbedding {
70
+ /** SHA256 hash of the text content (primary key) */
71
+ contentHash: string;
72
+ /** Embedding vector */
73
+ vector: number[];
74
+ /** ISO timestamp when cached */
75
+ createdAt: string;
76
+ }
77
+ /**
78
+ * Row format for LanceDB embedding_cache table.
79
+ */
80
+ export interface CachedEmbeddingRow {
81
+ content_hash: string;
82
+ vector: number[];
83
+ created_at: string;
84
+ }
85
+ /**
86
+ * Convert a CodeChunk to a LanceDB row format.
87
+ */
88
+ export declare function chunkToRow(chunk: CodeChunk): CodeChunkRow;
89
+ /**
90
+ * Convert a LanceDB row to a CodeChunk.
91
+ */
92
+ export declare function rowToChunk(row: CodeChunkRow): CodeChunk;
93
+ /**
94
+ * Convert a CachedEmbedding to a LanceDB row format.
95
+ */
96
+ export declare function embeddingToRow(embedding: CachedEmbedding): CachedEmbeddingRow;
97
+ /**
98
+ * Convert a LanceDB row to a CachedEmbedding.
99
+ */
100
+ export declare function rowToEmbedding(row: CachedEmbeddingRow): CachedEmbedding;
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Convert a CodeChunk to a LanceDB row format.
3
+ */
4
+ export function chunkToRow(chunk) {
5
+ return {
6
+ id: chunk.id,
7
+ vector: chunk.vector,
8
+ text: chunk.text,
9
+ content_hash: chunk.contentHash,
10
+ filepath: chunk.filepath,
11
+ filename: chunk.filename,
12
+ extension: chunk.extension,
13
+ type: chunk.type,
14
+ name: chunk.name,
15
+ start_line: chunk.startLine,
16
+ end_line: chunk.endLine,
17
+ file_hash: chunk.fileHash,
18
+ // New in schema v2
19
+ signature: chunk.signature,
20
+ docstring: chunk.docstring,
21
+ is_exported: chunk.isExported,
22
+ decorator_names: chunk.decoratorNames,
23
+ };
24
+ }
25
+ /**
26
+ * Convert a LanceDB row to a CodeChunk.
27
+ */
28
+ export function rowToChunk(row) {
29
+ return {
30
+ id: row.id,
31
+ vector: row.vector,
32
+ text: row.text,
33
+ contentHash: row.content_hash,
34
+ filepath: row.filepath,
35
+ filename: row.filename,
36
+ extension: row.extension,
37
+ type: row.type,
38
+ name: row.name,
39
+ startLine: row.start_line,
40
+ endLine: row.end_line,
41
+ fileHash: row.file_hash,
42
+ // New in schema v2
43
+ signature: row.signature,
44
+ docstring: row.docstring,
45
+ isExported: row.is_exported,
46
+ decoratorNames: row.decorator_names,
47
+ };
48
+ }
49
+ /**
50
+ * Convert a CachedEmbedding to a LanceDB row format.
51
+ */
52
+ export function embeddingToRow(embedding) {
53
+ return {
54
+ content_hash: embedding.contentHash,
55
+ vector: embedding.vector,
56
+ created_at: embedding.createdAt,
57
+ };
58
+ }
59
+ /**
60
+ * Convert a LanceDB row to a CachedEmbedding.
61
+ */
62
+ export function rowToEmbedding(row) {
63
+ return {
64
+ contentHash: row.content_hash,
65
+ vector: row.vector,
66
+ createdAt: row.created_at,
67
+ };
68
+ }