@aeriondyseti/vector-memory-mcp 2.3.0-rc.3 → 2.3.0-rc.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -5
- package/scripts/warmup.ts +0 -9
- package/server/core/embeddings.service.ts +95 -18
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aeriondyseti/vector-memory-mcp",
|
|
3
|
-
"version": "2.3.0-rc.
|
|
3
|
+
"version": "2.3.0-rc.4",
|
|
4
4
|
"description": "A zero-configuration RAG memory server for MCP clients",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "server/index.ts",
|
|
@@ -47,18 +47,18 @@
|
|
|
47
47
|
],
|
|
48
48
|
"license": "MIT",
|
|
49
49
|
"dependencies": {
|
|
50
|
-
"@huggingface/
|
|
50
|
+
"@huggingface/tokenizers": "^0.1.3",
|
|
51
51
|
"@lancedb/lancedb": "^0.26.2",
|
|
52
52
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
53
53
|
"arg": "^5.0.2",
|
|
54
|
-
"hono": "^4.11.3"
|
|
54
|
+
"hono": "^4.11.3",
|
|
55
|
+
"onnxruntime-node": "^1.21.0"
|
|
55
56
|
},
|
|
56
57
|
"devDependencies": {
|
|
57
58
|
"@types/bun": "latest",
|
|
58
59
|
"typescript": "^5.0.0"
|
|
59
60
|
},
|
|
60
61
|
"trustedDependencies": [
|
|
61
|
-
"protobufjs"
|
|
62
|
-
"sharp"
|
|
62
|
+
"protobufjs"
|
|
63
63
|
]
|
|
64
64
|
}
|
package/scripts/warmup.ts
CHANGED
|
@@ -23,20 +23,11 @@ async function warmup(): Promise<void> {
|
|
|
23
23
|
process.exit(1);
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
-
try {
|
|
27
|
-
await import("sharp");
|
|
28
|
-
console.log(" ✓ sharp loaded");
|
|
29
|
-
} catch (e) {
|
|
30
|
-
console.error(" ✗ sharp failed:", (e as Error).message);
|
|
31
|
-
process.exit(1);
|
|
32
|
-
}
|
|
33
|
-
|
|
34
26
|
console.log();
|
|
35
27
|
|
|
36
28
|
// Initialize embeddings service to download model
|
|
37
29
|
console.log("📥 Downloading ML model (this may take a minute)...");
|
|
38
30
|
console.log(` Model: ${config.embeddingModel}`);
|
|
39
|
-
console.log(` Cache: ~/.cache/huggingface/`);
|
|
40
31
|
console.log();
|
|
41
32
|
|
|
42
33
|
const embeddings = new EmbeddingsService(
|
|
@@ -1,9 +1,17 @@
|
|
|
1
|
-
import
|
|
1
|
+
import * as ort from "onnxruntime-node";
|
|
2
|
+
import { Tokenizer } from "@huggingface/tokenizers";
|
|
3
|
+
import { join, dirname } from "path";
|
|
4
|
+
import { mkdir } from "fs/promises";
|
|
5
|
+
import { existsSync } from "fs";
|
|
6
|
+
|
|
7
|
+
const HF_CDN = "https://huggingface.co";
|
|
8
|
+
const MAX_SEQ_LENGTH = 512;
|
|
2
9
|
|
|
3
10
|
export class EmbeddingsService {
|
|
4
11
|
private modelName: string;
|
|
5
|
-
private
|
|
6
|
-
private
|
|
12
|
+
private session: ort.InferenceSession | null = null;
|
|
13
|
+
private tokenizer: Tokenizer | null = null;
|
|
14
|
+
private initPromise: Promise<void> | null = null;
|
|
7
15
|
private _dimension: number;
|
|
8
16
|
|
|
9
17
|
constructor(modelName: string, dimension: number) {
|
|
@@ -15,27 +23,71 @@ export class EmbeddingsService {
|
|
|
15
23
|
return this._dimension;
|
|
16
24
|
}
|
|
17
25
|
|
|
18
|
-
private async
|
|
19
|
-
if (this.
|
|
20
|
-
return this.extractor;
|
|
21
|
-
}
|
|
22
|
-
|
|
26
|
+
private async initialize(): Promise<void> {
|
|
27
|
+
if (this.session) return;
|
|
23
28
|
if (!this.initPromise) {
|
|
24
|
-
this.initPromise =
|
|
25
|
-
"feature-extraction",
|
|
26
|
-
this.modelName,
|
|
27
|
-
{ dtype: "fp32" } as any
|
|
28
|
-
) as Promise<FeatureExtractionPipeline>;
|
|
29
|
+
this.initPromise = this._init();
|
|
29
30
|
}
|
|
31
|
+
await this.initPromise;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
private get cacheDir(): string {
|
|
35
|
+
const packageRoot = join(dirname(Bun.main), "..");
|
|
36
|
+
return join(packageRoot, ".cache", "models", this.modelName);
|
|
37
|
+
}
|
|
30
38
|
|
|
31
|
-
|
|
32
|
-
|
|
39
|
+
private async downloadIfMissing(fileName: string): Promise<string> {
|
|
40
|
+
const filePath = join(this.cacheDir, fileName);
|
|
41
|
+
if (existsSync(filePath)) return filePath;
|
|
42
|
+
|
|
43
|
+
const url = `${HF_CDN}/${this.modelName}/resolve/main/${fileName}`;
|
|
44
|
+
await mkdir(dirname(filePath), { recursive: true });
|
|
45
|
+
const response = await fetch(url);
|
|
46
|
+
if (!response.ok) throw new Error(`Failed to download ${url}: ${response.status}`);
|
|
47
|
+
const buffer = await response.arrayBuffer();
|
|
48
|
+
await Bun.write(filePath, buffer);
|
|
49
|
+
return filePath;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
private async _init(): Promise<void> {
|
|
53
|
+
const modelPath = await this.downloadIfMissing("onnx/model.onnx");
|
|
54
|
+
const tokenizerJsonPath = await this.downloadIfMissing("tokenizer.json");
|
|
55
|
+
const tokenizerConfigPath = await this.downloadIfMissing("tokenizer_config.json");
|
|
56
|
+
|
|
57
|
+
this.session = await ort.InferenceSession.create(modelPath, {
|
|
58
|
+
executionProviders: ["cpu"],
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
const tokenizerJson = await Bun.file(tokenizerJsonPath).json();
|
|
62
|
+
const tokenizerConfig = await Bun.file(tokenizerConfigPath).json();
|
|
63
|
+
this.tokenizer = new Tokenizer(tokenizerJson, tokenizerConfig);
|
|
33
64
|
}
|
|
34
65
|
|
|
35
66
|
async embed(text: string): Promise<number[]> {
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
67
|
+
await this.initialize();
|
|
68
|
+
|
|
69
|
+
const encoded = this.tokenizer!.encode(text);
|
|
70
|
+
|
|
71
|
+
// Truncate to model's max sequence length
|
|
72
|
+
const seqLen = Math.min(encoded.ids.length, MAX_SEQ_LENGTH);
|
|
73
|
+
const ids = encoded.ids.slice(0, seqLen);
|
|
74
|
+
const mask = encoded.attention_mask.slice(0, seqLen);
|
|
75
|
+
|
|
76
|
+
const inputIds = BigInt64Array.from(ids.map(BigInt));
|
|
77
|
+
const attentionMask = BigInt64Array.from(mask.map(BigInt));
|
|
78
|
+
const tokenTypeIds = new BigInt64Array(seqLen); // zeros for single-sequence input
|
|
79
|
+
|
|
80
|
+
const feeds: Record<string, ort.Tensor> = {
|
|
81
|
+
input_ids: new ort.Tensor("int64", inputIds, [1, seqLen]),
|
|
82
|
+
attention_mask: new ort.Tensor("int64", attentionMask, [1, seqLen]),
|
|
83
|
+
token_type_ids: new ort.Tensor("int64", tokenTypeIds, [1, seqLen]),
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
const output = await this.session!.run(feeds);
|
|
87
|
+
const lastHidden = output["last_hidden_state"];
|
|
88
|
+
|
|
89
|
+
const pooled = this.meanPool(lastHidden.data as Float32Array, mask, seqLen);
|
|
90
|
+
return this.normalize(pooled);
|
|
39
91
|
}
|
|
40
92
|
|
|
41
93
|
async embedBatch(texts: string[]): Promise<number[][]> {
|
|
@@ -45,4 +97,29 @@ export class EmbeddingsService {
|
|
|
45
97
|
}
|
|
46
98
|
return results;
|
|
47
99
|
}
|
|
100
|
+
|
|
101
|
+
private meanPool(data: Float32Array, mask: number[], seqLen: number): number[] {
|
|
102
|
+
const dim = this._dimension;
|
|
103
|
+
const pooled = new Array(dim).fill(0);
|
|
104
|
+
let maskSum = 0;
|
|
105
|
+
for (let t = 0; t < seqLen; t++) {
|
|
106
|
+
if (mask[t]) {
|
|
107
|
+
maskSum += 1;
|
|
108
|
+
for (let d = 0; d < dim; d++) {
|
|
109
|
+
pooled[d] += data[t * dim + d];
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
for (let d = 0; d < dim; d++) {
|
|
114
|
+
pooled[d] /= maskSum;
|
|
115
|
+
}
|
|
116
|
+
return pooled;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
private normalize(vec: number[]): number[] {
|
|
120
|
+
let norm = 0;
|
|
121
|
+
for (const v of vec) norm += v * v;
|
|
122
|
+
norm = Math.sqrt(norm);
|
|
123
|
+
return vec.map(v => v / norm);
|
|
124
|
+
}
|
|
48
125
|
}
|