@aeriondyseti/vector-memory-mcp 2.3.0-rc.2 → 2.3.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aeriondyseti/vector-memory-mcp",
3
- "version": "2.3.0-rc.2",
3
+ "version": "2.3.0-rc.4",
4
4
  "description": "A zero-configuration RAG memory server for MCP clients",
5
5
  "type": "module",
6
6
  "main": "server/index.ts",
@@ -47,18 +47,18 @@
47
47
  ],
48
48
  "license": "MIT",
49
49
  "dependencies": {
50
- "@huggingface/transformers": "^3.8.0",
50
+ "@huggingface/tokenizers": "^0.1.3",
51
51
  "@lancedb/lancedb": "^0.26.2",
52
52
  "@modelcontextprotocol/sdk": "^1.0.0",
53
53
  "arg": "^5.0.2",
54
- "hono": "^4.11.3"
54
+ "hono": "^4.11.3",
55
+ "onnxruntime-node": "^1.21.0"
55
56
  },
56
57
  "devDependencies": {
57
58
  "@types/bun": "latest",
58
59
  "typescript": "^5.0.0"
59
60
  },
60
61
  "trustedDependencies": [
61
- "protobufjs",
62
- "sharp"
62
+ "protobufjs"
63
63
  ]
64
64
  }
package/scripts/warmup.ts CHANGED
@@ -23,20 +23,11 @@ async function warmup(): Promise<void> {
23
23
  process.exit(1);
24
24
  }
25
25
 
26
- try {
27
- await import("sharp");
28
- console.log(" ✓ sharp loaded");
29
- } catch (e) {
30
- console.error(" ✗ sharp failed:", (e as Error).message);
31
- process.exit(1);
32
- }
33
-
34
26
  console.log();
35
27
 
36
28
  // Initialize embeddings service to download model
37
29
  console.log("📥 Downloading ML model (this may take a minute)...");
38
30
  console.log(` Model: ${config.embeddingModel}`);
39
- console.log(` Cache: ~/.cache/huggingface/`);
40
31
  console.log();
41
32
 
42
33
  const embeddings = new EmbeddingsService(
@@ -1,9 +1,17 @@
1
- import { pipeline, type FeatureExtractionPipeline } from "@huggingface/transformers";
1
+ import * as ort from "onnxruntime-node";
2
+ import { Tokenizer } from "@huggingface/tokenizers";
3
+ import { join, dirname } from "path";
4
+ import { mkdir } from "fs/promises";
5
+ import { existsSync } from "fs";
6
+
7
+ const HF_CDN = "https://huggingface.co";
8
+ const MAX_SEQ_LENGTH = 512;
2
9
 
3
10
  export class EmbeddingsService {
4
11
  private modelName: string;
5
- private extractor: FeatureExtractionPipeline | null = null;
6
- private initPromise: Promise<FeatureExtractionPipeline> | null = null;
12
+ private session: ort.InferenceSession | null = null;
13
+ private tokenizer: Tokenizer | null = null;
14
+ private initPromise: Promise<void> | null = null;
7
15
  private _dimension: number;
8
16
 
9
17
  constructor(modelName: string, dimension: number) {
@@ -15,27 +23,71 @@ export class EmbeddingsService {
15
23
  return this._dimension;
16
24
  }
17
25
 
18
- private async getExtractor(): Promise<FeatureExtractionPipeline> {
19
- if (this.extractor) {
20
- return this.extractor;
21
- }
22
-
26
+ private async initialize(): Promise<void> {
27
+ if (this.session) return;
23
28
  if (!this.initPromise) {
24
- this.initPromise = pipeline(
25
- "feature-extraction",
26
- this.modelName,
27
- { dtype: "fp32" } as any
28
- ) as Promise<FeatureExtractionPipeline>;
29
+ this.initPromise = this._init();
29
30
  }
31
+ await this.initPromise;
32
+ }
33
+
34
+ private get cacheDir(): string {
35
+ const packageRoot = join(dirname(Bun.main), "..");
36
+ return join(packageRoot, ".cache", "models", this.modelName);
37
+ }
30
38
 
31
- this.extractor = await this.initPromise;
32
- return this.extractor;
39
+ private async downloadIfMissing(fileName: string): Promise<string> {
40
+ const filePath = join(this.cacheDir, fileName);
41
+ if (existsSync(filePath)) return filePath;
42
+
43
+ const url = `${HF_CDN}/${this.modelName}/resolve/main/${fileName}`;
44
+ await mkdir(dirname(filePath), { recursive: true });
45
+ const response = await fetch(url);
46
+ if (!response.ok) throw new Error(`Failed to download ${url}: ${response.status}`);
47
+ const buffer = await response.arrayBuffer();
48
+ await Bun.write(filePath, buffer);
49
+ return filePath;
50
+ }
51
+
52
+ private async _init(): Promise<void> {
53
+ const modelPath = await this.downloadIfMissing("onnx/model.onnx");
54
+ const tokenizerJsonPath = await this.downloadIfMissing("tokenizer.json");
55
+ const tokenizerConfigPath = await this.downloadIfMissing("tokenizer_config.json");
56
+
57
+ this.session = await ort.InferenceSession.create(modelPath, {
58
+ executionProviders: ["cpu"],
59
+ });
60
+
61
+ const tokenizerJson = await Bun.file(tokenizerJsonPath).json();
62
+ const tokenizerConfig = await Bun.file(tokenizerConfigPath).json();
63
+ this.tokenizer = new Tokenizer(tokenizerJson, tokenizerConfig);
33
64
  }
34
65
 
35
66
  async embed(text: string): Promise<number[]> {
36
- const extractor = await this.getExtractor();
37
- const output = await extractor(text, { pooling: "mean", normalize: true });
38
- return Array.from(output.data as Float32Array);
67
+ await this.initialize();
68
+
69
+ const encoded = this.tokenizer!.encode(text);
70
+
71
+ // Truncate to model's max sequence length
72
+ const seqLen = Math.min(encoded.ids.length, MAX_SEQ_LENGTH);
73
+ const ids = encoded.ids.slice(0, seqLen);
74
+ const mask = encoded.attention_mask.slice(0, seqLen);
75
+
76
+ const inputIds = BigInt64Array.from(ids.map(BigInt));
77
+ const attentionMask = BigInt64Array.from(mask.map(BigInt));
78
+ const tokenTypeIds = new BigInt64Array(seqLen); // zeros for single-sequence input
79
+
80
+ const feeds: Record<string, ort.Tensor> = {
81
+ input_ids: new ort.Tensor("int64", inputIds, [1, seqLen]),
82
+ attention_mask: new ort.Tensor("int64", attentionMask, [1, seqLen]),
83
+ token_type_ids: new ort.Tensor("int64", tokenTypeIds, [1, seqLen]),
84
+ };
85
+
86
+ const output = await this.session!.run(feeds);
87
+ const lastHidden = output["last_hidden_state"];
88
+
89
+ const pooled = this.meanPool(lastHidden.data as Float32Array, mask, seqLen);
90
+ return this.normalize(pooled);
39
91
  }
40
92
 
41
93
  async embedBatch(texts: string[]): Promise<number[][]> {
@@ -45,4 +97,29 @@ export class EmbeddingsService {
45
97
  }
46
98
  return results;
47
99
  }
100
+
101
+ private meanPool(data: Float32Array, mask: number[], seqLen: number): number[] {
102
+ const dim = this._dimension;
103
+ const pooled = new Array(dim).fill(0);
104
+ let maskSum = 0;
105
+ for (let t = 0; t < seqLen; t++) {
106
+ if (mask[t]) {
107
+ maskSum += 1;
108
+ for (let d = 0; d < dim; d++) {
109
+ pooled[d] += data[t * dim + d];
110
+ }
111
+ }
112
+ }
113
+ for (let d = 0; d < dim; d++) {
114
+ pooled[d] /= maskSum;
115
+ }
116
+ return pooled;
117
+ }
118
+
119
+ private normalize(vec: number[]): number[] {
120
+ let norm = 0;
121
+ for (const v of vec) norm += v * v;
122
+ norm = Math.sqrt(norm);
123
+ return vec.map(v => v / norm);
124
+ }
48
125
  }
@@ -127,8 +127,43 @@ export async function backfillVectors(
127
127
  db: Database,
128
128
  embeddings: EmbeddingsService,
129
129
  ): Promise<void> {
130
+ // Fast sentinel check: skip the LEFT JOIN queries entirely when backfill is done
131
+ const sentinel = db
132
+ .prepare("SELECT 1 FROM memories_vec LIMIT 1")
133
+ .get();
134
+ const memoriesExist = db.prepare("SELECT 1 FROM memories LIMIT 1").get();
135
+ const convosExist = db.prepare("SELECT 1 FROM conversation_history LIMIT 1").get();
136
+
137
+ // If vec tables have data and source tables have data, backfill is likely complete.
138
+ // Only run the expensive LEFT JOIN when there's reason to suspect gaps.
139
+ const convoSentinel = db
140
+ .prepare("SELECT 1 FROM conversation_history_vec LIMIT 1")
141
+ .get();
142
+ const mayNeedMemoryBackfill = memoriesExist && !sentinel;
143
+ const mayNeedConvoBackfill = convosExist && !convoSentinel;
144
+
145
+ // If both vec tables are populated, do a quick count check to confirm
146
+ if (!mayNeedMemoryBackfill && !mayNeedConvoBackfill) {
147
+ if (memoriesExist) {
148
+ const gap = db.prepare(
149
+ `SELECT 1 FROM memories m LEFT JOIN memories_vec v ON m.id = v.id
150
+ WHERE v.id IS NULL OR length(v.vector) = 0 LIMIT 1`,
151
+ ).get();
152
+ if (!gap && convosExist) {
153
+ const convoGap = db.prepare(
154
+ `SELECT 1 FROM conversation_history c LEFT JOIN conversation_history_vec v ON c.id = v.id
155
+ WHERE v.id IS NULL OR length(v.vector) = 0 LIMIT 1`,
156
+ ).get();
157
+ if (!convoGap) return;
158
+ } else if (!gap && !convosExist) {
159
+ return;
160
+ }
161
+ } else {
162
+ return; // No data at all
163
+ }
164
+ }
165
+
130
166
  // ── Memories ──────────────────────────────────────────────────────
131
- // Catch both missing rows (v.id IS NULL) and corrupt 0-byte BLOBs
132
167
  const missingMemories = db
133
168
  .prepare(
134
169
  `SELECT m.id, m.content, json_extract(m.metadata, '$.type') AS type
@@ -151,14 +186,27 @@ export async function backfillVectors(
151
186
  new Array(embeddings.dimension).fill(0),
152
187
  );
153
188
 
154
- for (const row of missingMemories) {
155
- // Waypoints use a zero vector (not semantically searched)
156
- const blob =
157
- row.type === "waypoint"
158
- ? zeroVector
159
- : serializeVector(await embeddings.embed(row.content));
189
+ // Separate waypoints from content that needs embedding
190
+ const toEmbed = missingMemories.filter((r) => r.type !== "waypoint");
191
+ const waypoints = missingMemories.filter((r) => r.type === "waypoint");
192
+
193
+ // Batch embed all non-waypoint content
194
+ const vectors = toEmbed.length > 0
195
+ ? await embeddings.embedBatch(toEmbed.map((r) => r.content))
196
+ : [];
160
197
 
161
- insertVec.run(row.id, blob);
198
+ db.exec("BEGIN");
199
+ try {
200
+ for (const row of waypoints) {
201
+ insertVec.run(row.id, zeroVector);
202
+ }
203
+ for (let i = 0; i < toEmbed.length; i++) {
204
+ insertVec.run(toEmbed[i].id, serializeVector(vectors[i]));
205
+ }
206
+ db.exec("COMMIT");
207
+ } catch (e) {
208
+ db.exec("ROLLBACK");
209
+ throw e;
162
210
  }
163
211
 
164
212
  console.error(
@@ -185,17 +233,27 @@ export async function backfillVectors(
185
233
  "INSERT OR REPLACE INTO conversation_history_vec (id, vector) VALUES (?, ?)",
186
234
  );
187
235
 
188
- for (let i = 0; i < missingConvos.length; i++) {
189
- const row = missingConvos[i];
190
- const vec = serializeVector(await embeddings.embed(row.content));
191
- insertConvoVec.run(row.id, vec);
236
+ // Batch embed in chunks of 32
237
+ const BATCH_SIZE = 32;
238
+ db.exec("BEGIN");
239
+ try {
240
+ for (let i = 0; i < missingConvos.length; i += BATCH_SIZE) {
241
+ const batch = missingConvos.slice(i, i + BATCH_SIZE);
242
+ const vecs = await embeddings.embedBatch(batch.map((r) => r.content));
243
+ for (let j = 0; j < batch.length; j++) {
244
+ insertConvoVec.run(batch[j].id, serializeVector(vecs[j]));
245
+ }
192
246
 
193
- // Log progress every 100 chunks
194
- if ((i + 1) % 100 === 0) {
195
- console.error(
196
- `[vector-memory-mcp] ...${i + 1}/${missingConvos.length} conversation chunks`,
197
- );
247
+ if ((i + BATCH_SIZE) % 100 < BATCH_SIZE) {
248
+ console.error(
249
+ `[vector-memory-mcp] ...${Math.min(i + BATCH_SIZE, missingConvos.length)}/${missingConvos.length} conversation chunks`,
250
+ );
251
+ }
198
252
  }
253
+ db.exec("COMMIT");
254
+ } catch (e) {
255
+ db.exec("ROLLBACK");
256
+ throw e;
199
257
  }
200
258
 
201
259
  console.error(
package/server/index.ts CHANGED
@@ -25,17 +25,15 @@ async function main(): Promise<void> {
25
25
  const overrides = parseCliArgs(args);
26
26
  const config = loadConfig(overrides);
27
27
 
28
- // Initialize database
28
+ // Initialize database and backfill any missing vectors before services start
29
29
  const db = connectToDatabase(config.dbPath);
30
+ const embeddings = new EmbeddingsService(config.embeddingModel, config.embeddingDimension);
31
+ await backfillVectors(db, embeddings);
30
32
 
31
33
  // Initialize layers
32
34
  const repository = new MemoryRepository(db);
33
- const embeddings = new EmbeddingsService(config.embeddingModel, config.embeddingDimension);
34
35
  const memoryService = new MemoryService(repository, embeddings);
35
36
 
36
- // Backfill any missing vectors (e.g. after vec0-to-BLOB migration)
37
- await backfillVectors(db, embeddings);
38
-
39
37
  if (config.pluginMode) {
40
38
  console.error("[vector-memory-mcp] Running in plugin mode");
41
39
  }