@lojban/semantic-search-mcp 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,7 +12,7 @@ Use it in **Cursor**, **Claude Code**, or any IDE that supports MCP to search th
12
12
 
13
13
  ## How it works
14
14
 
15
- - **Indexing**: Scans directories for `.txt`, `.md`, `.tsv`, `.csv`, `.json`, `.html`, `.xml`. Each non-empty line gets a vector embedding (via [Hugging Face Transformers.js](https://huggingface.co/docs/transformers.js), model `Xenova/all-MiniLM-L6-v2`) and is stored in a local SQLite database with [sqlite-vec](https://github.com/asg017/sqlite-vec).
15
+ - **Indexing**: Scans directories for `.txt`, `.md`, `.tsv`, `.csv`, `.json`, `.html`, `.xml`. Each non-empty line gets a vector embedding (via [Hugging Face Transformers.js](https://huggingface.co/docs/transformers.js), model `Xenova/all-MiniLM-L6-v2`) and is stored in a local SQLite database with [@dao-xyz/sqlite3-vec](https://www.npmjs.com/package/@dao-xyz/sqlite3-vec) (SQLite + sqlite-vec for Node and browser).
16
16
  - **Search**: You send a natural-language query; the server embeds it and returns the closest lines by cosine similarity.
17
17
  - **Storage**: Index is stored in your project's `.semantic-search/data/` (or set `SEMANTIC_SEARCH_DATA_DIR`). No cloud, no API keys.
18
18
 
package/package.json CHANGED
@@ -1,21 +1,19 @@
1
1
  {
2
2
  "name": "@lojban/semantic-search-mcp",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "description": "Local-first MCP server for semantic search using transformers.js and SQLite",
5
5
  "type": "module",
6
6
  "scripts": {
7
7
  "dev": "tsx src/index.ts"
8
8
  },
9
9
  "dependencies": {
10
+ "@dao-xyz/sqlite3-vec": "^0.0.19",
10
11
  "@huggingface/transformers": "^3.0.0",
11
12
  "@modelcontextprotocol/sdk": "^1.0.0",
12
- "better-sqlite3": "^11.0.0",
13
13
  "glob": "^10.3.0",
14
- "sqlite-vec": "^0.1.0",
15
14
  "tsx": "^4.0.0"
16
15
  },
17
16
  "devDependencies": {
18
- "@types/better-sqlite3": "^7.6.0",
19
17
  "@types/node": "^20.0.0",
20
18
  "typescript": "^5.0.0"
21
19
  },
package/src/index.ts CHANGED
@@ -7,210 +7,196 @@ import {
7
7
  } from '@modelcontextprotocol/sdk/types.js';
8
8
  import path from 'path';
9
9
  import { getEmbedding, getBatchEmbeddings } from './embeddings.js';
10
- import { VectorStorage, SearchResult } from './storage.js';
10
+ import { createVectorStorage, type SearchResult } from './storage.js';
11
11
  import { scanDirectories } from './scanner.js';
12
12
 
13
- import { mkdirSync } from 'fs';
14
-
15
13
  // Data dir: use env, or project cwd so each workspace has its own index when run via npx from Cursor
16
14
  const dataDir =
17
15
  process.env.SEMANTIC_SEARCH_DATA_DIR ||
18
16
  path.join(process.cwd(), '.semantic-search', 'data');
19
17
  const DB_PATH = path.join(dataDir, 'vectors.db');
20
18
 
21
- mkdirSync(path.dirname(DB_PATH), { recursive: true });
22
-
23
- // Initialize storage
24
- const storage = new VectorStorage(DB_PATH);
19
+ async function main() {
20
+ const storage = await createVectorStorage(DB_PATH);
25
21
 
26
- // Create MCP server
27
- const server = new Server(
28
- {
29
- name: 'semantic-search',
30
- version: '1.0.0',
31
- },
32
- {
33
- capabilities: {
34
- tools: {},
22
+ const server = new Server(
23
+ {
24
+ name: 'semantic-search',
25
+ version: '1.0.0',
35
26
  },
36
- }
37
- );
38
-
39
- // Define available tools
40
- server.setRequestHandler(ListToolsRequestSchema, async () => {
41
- return {
42
- tools: [
43
- {
44
- name: 'index_directories',
45
- description: 'Scan directories and index all text file lines for semantic search. Each line gets a vector embedding.',
46
- inputSchema: {
47
- type: 'object',
48
- properties: {
49
- directories: {
50
- type: 'array',
51
- items: { type: 'string' },
52
- description: 'List of directory paths to scan and index. Defaults to SEMANTIC_SEARCH_INDEX_DIRS (comma-separated) if unset.',
53
- },
54
- clear_existing: {
55
- type: 'boolean',
56
- description: 'Whether to clear the existing index before indexing (default: false)',
57
- default: false,
27
+ {
28
+ capabilities: {
29
+ tools: {},
30
+ },
31
+ }
32
+ );
33
+
34
+ server.setRequestHandler(ListToolsRequestSchema, async () => {
35
+ return {
36
+ tools: [
37
+ {
38
+ name: 'index_directories',
39
+ description: 'Scan directories and index all text file lines for semantic search. Each line gets a vector embedding.',
40
+ inputSchema: {
41
+ type: 'object',
42
+ properties: {
43
+ directories: {
44
+ type: 'array',
45
+ items: { type: 'string' },
46
+ description: 'List of directory paths to scan and index. Defaults to SEMANTIC_SEARCH_INDEX_DIRS (comma-separated) if unset.',
47
+ },
48
+ clear_existing: {
49
+ type: 'boolean',
50
+ description: 'Whether to clear the existing index before indexing (default: false)',
51
+ default: false,
52
+ },
58
53
  },
54
+ required: [],
59
55
  },
60
- required: [],
61
56
  },
62
- },
63
- {
64
- name: 'search',
65
- description: 'Search for lines semantically similar to the query. Returns the most relevant lines from indexed files.',
66
- inputSchema: {
67
- type: 'object',
68
- properties: {
69
- query: {
70
- type: 'string',
71
- description: 'The search query (natural language)',
72
- },
73
- limit: {
74
- type: 'number',
75
- description: 'Maximum number of results to return (default: 10)',
76
- default: 10,
57
+ {
58
+ name: 'search',
59
+ description: 'Search for lines semantically similar to the query. Returns the most relevant lines from indexed files.',
60
+ inputSchema: {
61
+ type: 'object',
62
+ properties: {
63
+ query: {
64
+ type: 'string',
65
+ description: 'The search query (natural language)',
66
+ },
67
+ limit: {
68
+ type: 'number',
69
+ description: 'Maximum number of results to return (default: 10)',
70
+ default: 10,
71
+ },
77
72
  },
73
+ required: ['query'],
78
74
  },
79
- required: ['query'],
80
75
  },
81
- },
82
- {
83
- name: 'get_index_stats',
84
- description: 'Get statistics about the current index (number of files and lines indexed)',
85
- inputSchema: {
86
- type: 'object',
87
- properties: {},
76
+ {
77
+ name: 'get_index_stats',
78
+ description: 'Get statistics about the current index (number of files and lines indexed)',
79
+ inputSchema: {
80
+ type: 'object',
81
+ properties: {},
82
+ },
88
83
  },
89
- },
90
- ],
91
- };
92
- });
93
-
94
- // Handle tool calls
95
- server.setRequestHandler(CallToolRequestSchema, async (request) => {
96
- const { name, arguments: args } = request.params;
97
-
98
- try {
99
- switch (name) {
100
- case 'index_directories': {
101
- let directories = (args as { directories?: string[]; clear_existing?: boolean }).directories;
102
- if (!directories?.length) {
103
- const envDirs = process.env.SEMANTIC_SEARCH_INDEX_DIRS;
104
- directories = envDirs ? envDirs.split(',').map((d) => d.trim()).filter(Boolean) : [];
105
- }
106
- if (!directories.length) {
107
- throw new Error('No directories to index. Set directories in the request or SEMANTIC_SEARCH_INDEX_DIRS (comma-separated).');
84
+ ],
85
+ };
86
+ });
87
+
88
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
89
+ const { name, arguments: args } = request.params;
90
+
91
+ try {
92
+ switch (name) {
93
+ case 'index_directories': {
94
+ let directories = (args as { directories?: string[]; clear_existing?: boolean }).directories;
95
+ if (!directories?.length) {
96
+ const envDirs = process.env.SEMANTIC_SEARCH_INDEX_DIRS;
97
+ directories = envDirs ? envDirs.split(',').map((d) => d.trim()).filter(Boolean) : [];
98
+ }
99
+ if (!directories.length) {
100
+ throw new Error('No directories to index. Set directories in the request or SEMANTIC_SEARCH_INDEX_DIRS (comma-separated).');
101
+ }
102
+ const clearExisting = (args as { directories?: string[]; clear_existing?: boolean }).clear_existing ?? false;
103
+
104
+ if (clearExisting) {
105
+ storage.clear();
106
+ }
107
+
108
+ console.error(`Scanning ${directories.length} directories...`);
109
+ const lines = await scanDirectories(directories);
110
+ console.error(`Found ${lines.length} lines to index`);
111
+
112
+ const batchSize = 50;
113
+ let indexed = 0;
114
+
115
+ for (let i = 0; i < lines.length; i += batchSize) {
116
+ const batch = lines.slice(i, i + batchSize);
117
+ const texts = batch.map(l => l.content);
118
+ const embeddings = await getBatchEmbeddings(texts);
119
+
120
+ const batchData = batch.map((line, idx) => ({
121
+ filePath: line.filePath,
122
+ lineNumber: line.lineNumber,
123
+ content: line.content,
124
+ embedding: embeddings[idx],
125
+ }));
126
+
127
+ await storage.upsertLinesBatch(batchData);
128
+ indexed += batch.length;
129
+ console.error(`Indexed ${indexed}/${lines.length} lines`);
130
+ }
131
+
132
+ const stats = await storage.getStats();
133
+ return {
134
+ content: [
135
+ {
136
+ type: 'text',
137
+ text: JSON.stringify({
138
+ success: true,
139
+ indexed_lines: stats.totalLines,
140
+ indexed_files: stats.totalFiles,
141
+ message: `Successfully indexed ${stats.totalLines} lines from ${stats.totalFiles} files`,
142
+ }),
143
+ },
144
+ ],
145
+ };
108
146
  }
109
- const clearExisting = (args as { directories?: string[]; clear_existing?: boolean }).clear_existing ?? false;
110
147
 
111
- if (clearExisting) {
112
- storage.clear();
148
+ case 'search': {
149
+ const query = (args as { query: string; limit?: number }).query;
150
+ const limit = (args as { query: string; limit?: number }).limit ?? 10;
151
+
152
+ const queryEmbedding = await getEmbedding(query);
153
+ const results = await storage.search(queryEmbedding, limit);
154
+
155
+ return {
156
+ content: [
157
+ {
158
+ type: 'text',
159
+ text: JSON.stringify({
160
+ query,
161
+ results: results.map((r: SearchResult) => ({
162
+ file: r.file_path,
163
+ line: r.line_number,
164
+ content: r.content,
165
+ score: Math.round(r.score * 1000) / 1000,
166
+ })),
167
+ }),
168
+ },
169
+ ],
170
+ };
113
171
  }
114
172
 
115
- // Scan directories
116
- console.error(`Scanning ${directories.length} directories...`);
117
- const lines = await scanDirectories(directories);
118
- console.error(`Found ${lines.length} lines to index`);
119
-
120
- // Generate embeddings and store
121
- const batchSize = 50;
122
- let indexed = 0;
123
-
124
- for (let i = 0; i < lines.length; i += batchSize) {
125
- const batch = lines.slice(i, i + batchSize);
126
- const texts = batch.map(l => l.content);
127
- const embeddings = await getBatchEmbeddings(texts);
128
-
129
- const batchData = batch.map((line, idx) => ({
130
- filePath: line.filePath,
131
- lineNumber: line.lineNumber,
132
- content: line.content,
133
- embedding: embeddings[idx],
134
- }));
135
-
136
- storage.upsertLinesBatch(batchData);
137
- indexed += batch.length;
138
- console.error(`Indexed ${indexed}/${lines.length} lines`);
173
+ case 'get_index_stats': {
174
+ const stats = await storage.getStats();
175
+ return {
176
+ content: [
177
+ {
178
+ type: 'text',
179
+ text: JSON.stringify({
180
+ total_files: stats.totalFiles,
181
+ total_lines: stats.totalLines,
182
+ }),
183
+ },
184
+ ],
185
+ };
139
186
  }
140
187
 
141
- const stats = storage.getStats();
142
- return {
143
- content: [
144
- {
145
- type: 'text',
146
- text: JSON.stringify({
147
- success: true,
148
- indexed_lines: stats.totalLines,
149
- indexed_files: stats.totalFiles,
150
- message: `Successfully indexed ${stats.totalLines} lines from ${stats.totalFiles} files`,
151
- }),
152
- },
153
- ],
154
- };
188
+ default:
189
+ throw new Error(`Unknown tool: ${name}`);
155
190
  }
156
-
157
- case 'search': {
158
- const query = (args as { query: string; limit?: number }).query;
159
- const limit = (args as { query: string; limit?: number }).limit ?? 10;
160
-
161
- // Generate query embedding
162
- const queryEmbedding = await getEmbedding(query);
163
-
164
- // Search
165
- const results = storage.search(queryEmbedding, limit);
166
-
167
- return {
168
- content: [
169
- {
170
- type: 'text',
171
- text: JSON.stringify({
172
- query,
173
- results: results.map((r: SearchResult) => ({
174
- file: r.file_path,
175
- line: r.line_number,
176
- content: r.content,
177
- score: Math.round(r.score * 1000) / 1000,
178
- })),
179
- }),
180
- },
181
- ],
182
- };
183
- }
184
-
185
- case 'get_index_stats': {
186
- const stats = storage.getStats();
187
- return {
188
- content: [
189
- {
190
- type: 'text',
191
- text: JSON.stringify({
192
- total_files: stats.totalFiles,
193
- total_lines: stats.totalLines,
194
- }),
195
- },
196
- ],
197
- };
198
- }
199
-
200
- default:
201
- throw new Error(`Unknown tool: ${name}`);
191
+ } catch (error) {
192
+ const message = error instanceof Error ? error.message : String(error);
193
+ return {
194
+ content: [{ type: 'text', text: JSON.stringify({ error: message }) }],
195
+ isError: true,
196
+ };
202
197
  }
203
- } catch (error) {
204
- const message = error instanceof Error ? error.message : String(error);
205
- return {
206
- content: [{ type: 'text', text: JSON.stringify({ error: message }) }],
207
- isError: true,
208
- };
209
- }
210
- });
198
+ });
211
199
 
212
- // Start server
213
- async function main() {
214
200
  const transport = new StdioServerTransport();
215
201
  await server.connect(transport);
216
202
  console.error('Semantic Search MCP Server running on stdio');
package/src/storage.ts CHANGED
@@ -1,6 +1,7 @@
1
- import Database from 'better-sqlite3';
1
+ import pkg from '@dao-xyz/sqlite3-vec';
2
+ const { createDatabase } = pkg;
2
3
  import path from 'path';
3
- import * as sqliteVec from 'sqlite-vec';
4
+ import { mkdirSync } from 'fs';
4
5
 
5
6
  const EMBEDDING_DIM = 384; // all-MiniLM-L6-v2 produces 384-dim vectors
6
7
 
@@ -19,20 +20,17 @@ export interface SearchResult {
19
20
  score: number;
20
21
  }
21
22
 
23
+ type DB = Awaited<ReturnType<typeof createDatabase>>;
24
+
22
25
  export class VectorStorage {
23
- private db: Database.Database;
24
-
25
- constructor(dbPath: string) {
26
- this.db = new Database(dbPath);
27
-
28
- // Load sqlite-vec extension
29
- sqliteVec.load(this.db);
30
-
26
+ private db: DB;
27
+
28
+ constructor(db: DB) {
29
+ this.db = db;
31
30
  this.init();
32
31
  }
33
32
 
34
33
  private init(): void {
35
- // Create regular table for metadata
36
34
  this.db.exec(`
37
35
  CREATE TABLE IF NOT EXISTS lines (
38
36
  id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -41,11 +39,8 @@ export class VectorStorage {
41
39
  content TEXT NOT NULL,
42
40
  UNIQUE(file_path, line_number)
43
41
  );
44
-
45
42
  CREATE INDEX IF NOT EXISTS idx_file ON lines(file_path);
46
43
  `);
47
-
48
- // Create virtual table for vectors using sqlite-vec
49
44
  this.db.exec(`
50
45
  CREATE VIRTUAL TABLE IF NOT EXISTS vec_lines USING vec0(
51
46
  line_id INTEGER PRIMARY KEY,
@@ -57,126 +52,90 @@ export class VectorStorage {
57
52
  /**
58
53
  * Insert or update a line with its embedding
59
54
  */
60
- upsertLine(filePath: string, lineNumber: number, content: string, embedding: Float32Array): void {
61
- const insertLine = this.db.prepare(`
62
- INSERT INTO lines (file_path, line_number, content)
63
- VALUES (?, ?, ?)
64
- ON CONFLICT(file_path, line_number) DO UPDATE SET
65
- content = excluded.content
66
- RETURNING id
67
- `);
68
-
69
- const result = insertLine.get(filePath, lineNumber, content) as { id: number };
70
- const lineId = result.id;
71
-
72
- // Insert/update vector
73
- // vec0 tables don't support UPSERT, so we delete first just in case
74
- const safeId = BigInt(lineId);
75
- this.db.prepare('DELETE FROM vec_lines WHERE line_id = ?').run(safeId);
76
-
77
- const insertVec = this.db.prepare(`
78
- INSERT INTO vec_lines (line_id, embedding)
79
- VALUES (?, ?)
80
- `);
81
-
82
- insertVec.run(safeId, JSON.stringify(Array.from(embedding)));
55
+ async upsertLine(filePath: string, lineNumber: number, content: string, embedding: Float32Array): Promise<void> {
56
+ const insertLine = await this.db.prepare(
57
+ `INSERT INTO lines (file_path, line_number, content)
58
+ VALUES (?, ?, ?)
59
+ ON CONFLICT(file_path, line_number) DO UPDATE SET content = excluded.content`
60
+ );
61
+ insertLine.run([filePath, lineNumber, content]);
62
+
63
+ const sel = await this.db.prepare('SELECT id FROM lines WHERE file_path = ? AND line_number = ?');
64
+ const row = sel.get([filePath, lineNumber]) as { id: number } | undefined;
65
+ if (row == null) throw new Error('Failed to get line id');
66
+ const id = Math.trunc(Number(row.id));
67
+
68
+ (await this.db.prepare('DELETE FROM vec_lines WHERE line_id = ?')).run([id]);
69
+ (await this.db.prepare('INSERT INTO vec_lines (line_id, embedding) VALUES (?, ?)')).run([id, embedding.buffer]);
83
70
  }
84
71
 
85
72
  /**
86
73
  * Batch insert lines for efficiency
87
74
  */
88
- upsertLinesBatch(lines: Array<{ filePath: string; lineNumber: number; content: string; embedding: Float32Array }>): void {
89
- const insertLine = this.db.prepare(`
90
- INSERT INTO lines (file_path, line_number, content)
91
- VALUES (?, ?, ?)
92
- ON CONFLICT(file_path, line_number) DO UPDATE SET
93
- content = excluded.content
94
- RETURNING id
95
- `);
96
-
97
- // vec0 doesn't support UPSERT, so we use DELETE + INSERT
98
- const deleteVec = this.db.prepare('DELETE FROM vec_lines WHERE line_id = ?');
99
- const insertVec = this.db.prepare('INSERT INTO vec_lines (line_id, embedding) VALUES (?, ?)');
100
-
101
- const insertMany = this.db.transaction((items: typeof lines) => {
102
- for (const item of items) {
103
- const result = insertLine.get(item.filePath, item.lineNumber, item.content) as { id: number | bigint };
104
- // Ensure id is treated as appropriate integer type for vec0
105
- const id = result.id;
106
- const safeId = BigInt(id);
107
- deleteVec.run(safeId);
108
- insertVec.run(safeId, JSON.stringify(Array.from(item.embedding)));
109
- }
110
- });
111
-
112
- return insertMany(lines);
75
+ async upsertLinesBatch(
76
+ lines: Array<{ filePath: string; lineNumber: number; content: string; embedding: Float32Array }>
77
+ ): Promise<void> {
78
+ const insertLine = await this.db.prepare(
79
+ `INSERT INTO lines (file_path, line_number, content)
80
+ VALUES (?, ?, ?)
81
+ ON CONFLICT(file_path, line_number) DO UPDATE SET content = excluded.content`
82
+ );
83
+ const selId = await this.db.prepare('SELECT id FROM lines WHERE file_path = ? AND line_number = ?');
84
+ const deleteVec = await this.db.prepare('DELETE FROM vec_lines WHERE line_id = ?');
85
+ const insertVec = await this.db.prepare('INSERT INTO vec_lines (line_id, embedding) VALUES (?, ?)');
86
+
87
+ for (const item of lines) {
88
+ insertLine.run([item.filePath, item.lineNumber, item.content]);
89
+ const row = selId.get([item.filePath, item.lineNumber]) as { id: number };
90
+ const id = Math.trunc(Number(row.id));
91
+ deleteVec.run([id]);
92
+ insertVec.run([id, item.embedding.buffer]);
93
+ }
113
94
  }
114
95
 
115
96
  /**
116
- * Search for similar lines using sqlite-vec's native cosine similarity
97
+ * Search for similar lines using sqlite-vec cosine distance
117
98
  */
118
- search(queryEmbedding: Float32Array, limit: number = 10): SearchResult[] {
119
- const stmt = this.db.prepare(`
120
- SELECT
99
+ async search(queryEmbedding: Float32Array, limit: number = 10): Promise<SearchResult[]> {
100
+ const stmt = await this.db.prepare(`
101
+ SELECT
121
102
  l.file_path,
122
103
  l.line_number,
123
104
  l.content,
124
- vec_distance_cosine(v.embedding, ?) as distance
105
+ vec_distance_cosine(v.embedding, ?1) AS distance
125
106
  FROM vec_lines v
126
107
  INNER JOIN lines l ON v.line_id = l.id
127
108
  ORDER BY distance
128
- LIMIT ?
109
+ LIMIT ?2
129
110
  `);
130
-
131
- const rows = stmt.all(JSON.stringify(Array.from(queryEmbedding)), limit) as Array<{
111
+ const rows = stmt.all([queryEmbedding.buffer, limit]) as Array<{
132
112
  file_path: string;
133
113
  line_number: number;
134
114
  content: string;
135
115
  distance: number;
136
116
  }>;
137
-
138
- return rows.map(row => ({
117
+ return rows.map((row) => ({
139
118
  file_path: row.file_path,
140
119
  line_number: row.line_number,
141
120
  content: row.content,
142
- score: 1 - row.distance, // Convert distance to similarity score
121
+ score: 1 - row.distance,
143
122
  }));
144
123
  }
145
124
 
146
- /**
147
- * Get index statistics
148
- */
149
- getStats(): { totalFiles: number; totalLines: number } {
150
- const filesStmt = this.db.prepare('SELECT COUNT(DISTINCT file_path) as count FROM lines');
151
- const linesStmt = this.db.prepare('SELECT COUNT(*) as count FROM lines');
152
-
153
- const totalFiles = (filesStmt.get() as { count: number }).count;
154
- const totalLines = (linesStmt.get() as { count: number }).count;
155
-
156
- return { totalFiles, totalLines };
125
+ async getStats(): Promise<{ totalFiles: number; totalLines: number }> {
126
+ const filesRow = (await this.db.prepare('SELECT COUNT(DISTINCT file_path) AS count FROM lines')).get() as { count: number } | undefined;
127
+ const linesRow = (await this.db.prepare('SELECT COUNT(*) AS count FROM lines')).get() as { count: number } | undefined;
128
+ return {
129
+ totalFiles: filesRow?.count ?? 0,
130
+ totalLines: linesRow?.count ?? 0,
131
+ };
157
132
  }
158
133
 
159
- /**
160
- * Remove all lines for a specific file
161
- */
162
- removeFile(filePath: string): void {
163
- const deleteVecs = this.db.prepare(`
164
- DELETE FROM vec_lines
165
- WHERE line_id IN (SELECT id FROM lines WHERE file_path = ?)
166
- `);
167
- const deleteLines = this.db.prepare('DELETE FROM lines WHERE file_path = ?');
168
-
169
- const transaction = this.db.transaction(() => {
170
- deleteVecs.run(filePath);
171
- deleteLines.run(filePath);
172
- });
173
-
174
- transaction();
134
+ async removeFile(filePath: string): Promise<void> {
135
+ (await this.db.prepare('DELETE FROM vec_lines WHERE line_id IN (SELECT id FROM lines WHERE file_path = ?)')).run([filePath]);
136
+ (await this.db.prepare('DELETE FROM lines WHERE file_path = ?')).run([filePath]);
175
137
  }
176
138
 
177
- /**
178
- * Clear the entire index
179
- */
180
139
  clear(): void {
181
140
  this.db.exec('DELETE FROM vec_lines');
182
141
  this.db.exec('DELETE FROM lines');
@@ -187,3 +146,15 @@ export class VectorStorage {
187
146
  }
188
147
  }
189
148
 
149
+
150
+ /**
151
+ * Create and open the vector storage (async). Use this instead of `new VectorStorage()`.
152
+ */
153
+ export async function createVectorStorage(dbPath: string): Promise<VectorStorage> {
154
+ mkdirSync(path.dirname(dbPath), { recursive: true });
155
+ const db = await createDatabase({
156
+ database: dbPath,
157
+ });
158
+ await db.open();
159
+ return new VectorStorage(db);
160
+ }