@lojban/semantic-search-mcp 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lojban/semantic-search-mcp",
3
- "version": "1.0.2",
3
+ "version": "1.0.4",
4
4
  "description": "Local-first MCP server for semantic search using transformers.js and SQLite",
5
5
  "type": "module",
6
6
  "scripts": {
package/src/embeddings.ts CHANGED
@@ -24,23 +24,30 @@ export async function getEmbedding(text: string): Promise<Float32Array> {
24
24
  return new Float32Array(output.data as ArrayLike<number>);
25
25
  }
26
26
 
27
+ const EMBEDDING_DIM = 384; // all-MiniLM-L6-v2
28
+
27
29
  /**
28
- * Generate embeddings for multiple texts (batched for efficiency)
30
+ * Generate embeddings for multiple texts (batched for efficiency).
31
+ * Passes arrays to the pipeline so the model runs one forward pass per batch.
29
32
  */
30
33
  export async function getBatchEmbeddings(texts: string[]): Promise<Float32Array[]> {
34
+ if (texts.length === 0) return [];
35
+
31
36
  const ext = await getExtractor();
32
37
  const results: Float32Array[] = [];
33
-
34
- // Process in batches of 32 for memory efficiency
35
- const batchSize = 32;
38
+
39
+ // Process in batches for memory; each batch is one model forward pass
40
+ const batchSize = 64;
36
41
  for (let i = 0; i < texts.length; i += batchSize) {
37
42
  const batch = texts.slice(i, i + batchSize);
38
- for (const text of batch) {
39
- const output = await ext(text, { pooling: 'mean', normalize: true });
40
- results.push(new Float32Array(output.data as ArrayLike<number>));
43
+ const output = await ext(batch, { pooling: 'mean', normalize: true });
44
+ const data = output.data as Float32Array;
45
+ const dim = EMBEDDING_DIM;
46
+ for (let j = 0; j < batch.length; j++) {
47
+ results.push(new Float32Array(data.slice(j * dim, (j + 1) * dim)));
41
48
  }
42
49
  }
43
-
50
+
44
51
  return results;
45
52
  }
46
53
 
package/src/index.ts CHANGED
@@ -109,13 +109,23 @@ async function main() {
109
109
  const lines = await scanDirectories(directories);
110
110
  console.error(`Found ${lines.length} lines to index`);
111
111
 
112
- const batchSize = 50;
112
+ const batchSize = 128;
113
113
  let indexed = 0;
114
114
 
115
+ // Pipeline: compute embeddings for next batch while writing current batch to DB
116
+ let embedPromise: Promise<Float32Array[]> =
117
+ lines.length > 0
118
+ ? getBatchEmbeddings(lines.slice(0, batchSize).map((l) => l.content))
119
+ : Promise.resolve([]);
120
+
115
121
  for (let i = 0; i < lines.length; i += batchSize) {
116
122
  const batch = lines.slice(i, i + batchSize);
117
- const texts = batch.map(l => l.content);
118
- const embeddings = await getBatchEmbeddings(texts);
123
+ const embeddings = await embedPromise;
124
+
125
+ if (i + batchSize < lines.length) {
126
+ const nextTexts = lines.slice(i + batchSize, i + batchSize * 2).map((l) => l.content);
127
+ embedPromise = getBatchEmbeddings(nextTexts);
128
+ }
119
129
 
120
130
  const batchData = batch.map((line, idx) => ({
121
131
  filePath: line.filePath,
package/src/scanner.ts CHANGED
@@ -67,15 +67,9 @@ export async function scanDirectory(dirPath: string): Promise<FileLine[]> {
67
67
  }
68
68
 
69
69
  /**
70
- * Scan multiple directories
70
+ * Scan multiple directories in parallel
71
71
  */
72
72
  export async function scanDirectories(dirPaths: string[]): Promise<FileLine[]> {
73
- const allLines: FileLine[] = [];
74
-
75
- for (const dirPath of dirPaths) {
76
- const lines = await scanDirectory(dirPath);
77
- allLines.push(...lines);
78
- }
79
-
80
- return allLines;
73
+ const results = await Promise.all(dirPaths.map((dirPath) => scanDirectory(dirPath)));
74
+ return results.flat();
81
75
  }
package/src/storage.ts CHANGED
@@ -63,18 +63,20 @@ export class VectorStorage {
63
63
  const sel = await this.db.prepare('SELECT id FROM lines WHERE file_path = ? AND line_number = ?');
64
64
  const row = sel.get([filePath, lineNumber]) as { id: number } | undefined;
65
65
  if (row == null) throw new Error('Failed to get line id');
66
- const id = Math.trunc(Number(row.id));
66
+ const idInt = BigInt(row.id); // vec0 requires INTEGER primary key; BigInt binds as integer in better-sqlite3
67
67
 
68
- (await this.db.prepare('DELETE FROM vec_lines WHERE line_id = ?')).run([id]);
69
- (await this.db.prepare('INSERT INTO vec_lines (line_id, embedding) VALUES (?, ?)')).run([id, embedding.buffer]);
68
+ (await this.db.prepare('DELETE FROM vec_lines WHERE line_id = ?')).run([idInt]);
69
+ (await this.db.prepare('INSERT INTO vec_lines (line_id, embedding) VALUES (?, ?)')).run([idInt, embedding.buffer]);
70
70
  }
71
71
 
72
72
  /**
73
- * Batch insert lines for efficiency
73
+ * Batch insert lines for efficiency (single transaction)
74
74
  */
75
75
  async upsertLinesBatch(
76
76
  lines: Array<{ filePath: string; lineNumber: number; content: string; embedding: Float32Array }>
77
77
  ): Promise<void> {
78
+ if (lines.length === 0) return;
79
+
78
80
  const insertLine = await this.db.prepare(
79
81
  `INSERT INTO lines (file_path, line_number, content)
80
82
  VALUES (?, ?, ?)
@@ -84,12 +86,19 @@ export class VectorStorage {
84
86
  const deleteVec = await this.db.prepare('DELETE FROM vec_lines WHERE line_id = ?');
85
87
  const insertVec = await this.db.prepare('INSERT INTO vec_lines (line_id, embedding) VALUES (?, ?)');
86
88
 
87
- for (const item of lines) {
88
- insertLine.run([item.filePath, item.lineNumber, item.content]);
89
- const row = selId.get([item.filePath, item.lineNumber]) as { id: number };
90
- const id = Math.trunc(Number(row.id));
91
- deleteVec.run([id]);
92
- insertVec.run([id, item.embedding.buffer]);
89
+ this.db.exec('BEGIN');
90
+ try {
91
+ for (const item of lines) {
92
+ insertLine.run([item.filePath, item.lineNumber, item.content]);
93
+ const row = selId.get([item.filePath, item.lineNumber]) as { id: number };
94
+ const idInt = BigInt(row.id);
95
+ deleteVec.run([idInt]);
96
+ insertVec.run([idInt, item.embedding.buffer]);
97
+ }
98
+ this.db.exec('COMMIT');
99
+ } catch (e) {
100
+ this.db.exec('ROLLBACK');
101
+ throw e;
93
102
  }
94
103
  }
95
104