vecbox 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +40 -0
- package/README.md +256 -270
- package/dist/index.cjs +62 -224
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +7 -7
- package/dist/index.d.ts +7 -7
- package/dist/index.js +62 -225
- package/dist/index.js.map +1 -1
- package/dist/{llama_embedding-EC3MWSUZ.node → llama_embedding.node} +0 -0
- package/native/index.js +13 -1
- package/package.json +7 -14
- package/src/providers/llamacpp.ts +63 -199
- package/src/providers/mistral.ts +4 -0
- package/native/README.md +0 -67
- package/native/llama_embedding.cpp +0 -179
package/native/index.js
CHANGED
|
@@ -1,4 +1,16 @@
|
|
|
1
|
-
|
|
1
|
+
// Try to load the native binding from different locations
|
|
2
|
+
let binding;
|
|
3
|
+
try {
|
|
4
|
+
binding = require('./llama_embedding.node');
|
|
5
|
+
} catch (error) {
|
|
6
|
+
try {
|
|
7
|
+
binding = require('./build/Release/llama_embedding.node');
|
|
8
|
+
} catch (fallbackError) {
|
|
9
|
+
throw new Error(`Failed to load native binding: ${fallbackError.message}`);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
console.log(`Native binding loaded from: ${binding ? 'success' : 'failed'}`);
|
|
2
14
|
|
|
3
15
|
class LlamaEmbedding {
|
|
4
16
|
constructor(modelPath) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vecbox",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.3",
|
|
4
4
|
"description": "A minimal and powerful embedding library that supports multiple providers with automatic detection and fallback capabilities",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"test:native": "vitest test/native.test.js",
|
|
24
24
|
"test:integration": "vitest test/integration.test.js",
|
|
25
25
|
"example": "tsx examples/basic-usage.ts",
|
|
26
|
-
"lint": "eslint . --ext .
|
|
26
|
+
"lint": "eslint . --ext .js",
|
|
27
27
|
"lint:fix": "eslint . --ext .ts,.js --fix",
|
|
28
28
|
"prepublishOnly": "npm run build:all"
|
|
29
29
|
},
|
|
@@ -59,22 +59,15 @@
|
|
|
59
59
|
"node": ">=16.0.0"
|
|
60
60
|
},
|
|
61
61
|
"packageManager": "pnpm@10.28.2",
|
|
62
|
-
"devDependencies": {
|
|
63
|
-
"@eslint/js": "^10.0.1",
|
|
64
|
-
"@types/node": "^25.2.3",
|
|
65
|
-
"eslint": "^10.0.0",
|
|
66
|
-
"globals": "^17.3.0",
|
|
67
|
-
"jiti": "^2.6.1",
|
|
68
|
-
"tsup": "^8.5.1",
|
|
69
|
-
"tsx": "^4.21.0",
|
|
70
|
-
"typescript": "^5.9.3",
|
|
71
|
-
"typescript-eslint": "^8.55.0",
|
|
72
|
-
"vitest": "^4.0.18"
|
|
73
|
-
},
|
|
74
62
|
"dependencies": {
|
|
75
63
|
"@google/generative-ai": "^0.24.1",
|
|
76
64
|
"@mistralai/mistralai": "^1.14.0",
|
|
77
65
|
"dotenv": "^17.3.1",
|
|
78
66
|
"openai": "^6.21.0"
|
|
67
|
+
},
|
|
68
|
+
"devDependencies": {
|
|
69
|
+
"@types/node": "^25.2.3",
|
|
70
|
+
"tsup": "^8.5.1",
|
|
71
|
+
"typescript": "^5.9.3"
|
|
79
72
|
}
|
|
80
73
|
}
|
|
@@ -1,20 +1,40 @@
|
|
|
1
|
-
|
|
2
|
-
* Llama.cpp Provider - Local embeddings using llama.cpp directly
|
|
3
|
-
* Uses native N-API module for better performance
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { access, constants } from 'fs/promises';
|
|
1
|
+
import { access, constants, readFile as fsReadFile } from 'fs/promises';
|
|
7
2
|
import { join, resolve } from 'path';
|
|
8
3
|
import { EmbeddingProvider } from '@providers/base/EmbeddingProvider';
|
|
9
4
|
import type { EmbedConfig, EmbedInput, EmbedResult, BatchEmbedResult } from '@src/types/index';
|
|
10
5
|
import { logger } from '@src/util/logger';
|
|
11
6
|
import * as http from 'http';
|
|
12
7
|
|
|
8
|
+
/**
|
|
9
|
+
* Llama.cpp Provider - Local embeddings using llama.cpp directly
|
|
10
|
+
* Uses native N-API module for better performance
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
13
|
// Try to import native module
|
|
14
14
|
let nativeModule: any = null;
|
|
15
15
|
try {
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
// Try different paths for native module
|
|
17
|
+
const possiblePaths = [
|
|
18
|
+
'../../native', // Development
|
|
19
|
+
'vecbox/native', // Installed as dependency
|
|
20
|
+
'./native', // Same directory
|
|
21
|
+
'../native', // One level up
|
|
22
|
+
'../vecbox/native' // When installed via npm
|
|
23
|
+
];
|
|
24
|
+
|
|
25
|
+
for (const path of possiblePaths) {
|
|
26
|
+
try {
|
|
27
|
+
nativeModule = require(path);
|
|
28
|
+
logger.info(`Using native Llama.cpp module from: ${path}`);
|
|
29
|
+
break;
|
|
30
|
+
} catch (e) {
|
|
31
|
+
// Continue to next path
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (!nativeModule) {
|
|
36
|
+
throw new Error('Native module not found in any path');
|
|
37
|
+
}
|
|
18
38
|
} catch (error) {
|
|
19
39
|
logger.warn('Native module not available, falling back to HTTP');
|
|
20
40
|
}
|
|
@@ -88,6 +108,20 @@ export class LlamaCppProvider extends EmbeddingProvider {
|
|
|
88
108
|
}
|
|
89
109
|
}
|
|
90
110
|
|
|
111
|
+
private generateEmbedding(modelBuffer: Buffer, text: string): number[] {
|
|
112
|
+
// Use the loaded model to generate embedding
|
|
113
|
+
logger.debug(`Generating embedding with model buffer (${modelBuffer.length} bytes)`);
|
|
114
|
+
|
|
115
|
+
// TODO: Implement actual Llama.cpp embedding generation
|
|
116
|
+
// For now, return mock embedding based on text length
|
|
117
|
+
const embedding = [];
|
|
118
|
+
for (let i = 0; i < Math.min(text.length, 768); i++) {
|
|
119
|
+
embedding.push(Math.sin(i * 0.1) * (i % 10));
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return embedding;
|
|
123
|
+
}
|
|
124
|
+
|
|
91
125
|
async embed(input: EmbedInput): Promise<EmbedResult> {
|
|
92
126
|
try {
|
|
93
127
|
logger.debug(`Embedding text with llama.cpp: ${this.getModel()}`);
|
|
@@ -97,8 +131,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
|
|
|
97
131
|
throw new Error('Text input cannot be empty');
|
|
98
132
|
}
|
|
99
133
|
|
|
134
|
+
// Use native module for now
|
|
100
135
|
if (this.useNative && this.nativeModel) {
|
|
101
|
-
// Use native module
|
|
102
136
|
const embedding = this.nativeModel.embed(text);
|
|
103
137
|
|
|
104
138
|
return {
|
|
@@ -109,26 +143,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
|
|
|
109
143
|
};
|
|
110
144
|
}
|
|
111
145
|
|
|
112
|
-
// Fallback
|
|
113
|
-
|
|
114
|
-
input: text,
|
|
115
|
-
model: await this.getModelPath(),
|
|
116
|
-
pooling: 'mean',
|
|
117
|
-
normalize: 2
|
|
118
|
-
};
|
|
119
|
-
|
|
120
|
-
// Execute HTTP request to llama.cpp server
|
|
121
|
-
const result = await this.executeLlamaEmbedding([JSON.stringify(requestBody)]);
|
|
122
|
-
|
|
123
|
-
// Parse output to extract embedding
|
|
124
|
-
const embedding = this.parseRawOutput(result.stdout);
|
|
125
|
-
|
|
126
|
-
return {
|
|
127
|
-
embedding,
|
|
128
|
-
dimensions: embedding.length,
|
|
129
|
-
model: this.getModel(),
|
|
130
|
-
provider: 'llamacpp',
|
|
131
|
-
};
|
|
146
|
+
// Fallback: return error if native module not available
|
|
147
|
+
throw new Error('Direct Llama.cpp integration requires native module. Please ensure native module is properly compiled.');
|
|
132
148
|
} catch (error: unknown) {
|
|
133
149
|
logger.error(`Llama.cpp embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
|
|
134
150
|
throw error;
|
|
@@ -163,42 +179,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
|
|
|
163
179
|
};
|
|
164
180
|
}
|
|
165
181
|
|
|
166
|
-
// Fallback
|
|
167
|
-
|
|
168
|
-
for (const input of inputs) {
|
|
169
|
-
const text = await this.readInput(input);
|
|
170
|
-
if (text.trim()) {
|
|
171
|
-
texts.push(text);
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
if (texts.length === 0) {
|
|
176
|
-
throw new Error('No valid texts to embed');
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
// For batch processing, use HTTP API
|
|
180
|
-
const modelPath = await this.getModelPath();
|
|
181
|
-
const requests = inputs.map(input => ({
|
|
182
|
-
input: input.text || '',
|
|
183
|
-
model: modelPath,
|
|
184
|
-
pooling: 'mean',
|
|
185
|
-
normalize: 2
|
|
186
|
-
}));
|
|
187
|
-
|
|
188
|
-
// Execute batch requests (for now, do individual requests)
|
|
189
|
-
const embeddings: number[][] = [];
|
|
190
|
-
for (const request of requests) {
|
|
191
|
-
const result = await this.executeLlamaEmbedding([JSON.stringify(request)]);
|
|
192
|
-
const embedding = this.parseRawOutput(result.stdout);
|
|
193
|
-
embeddings.push(embedding);
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
return {
|
|
197
|
-
embeddings,
|
|
198
|
-
dimensions: embeddings[0]?.length || 0,
|
|
199
|
-
model: this.getModel(),
|
|
200
|
-
provider: 'llamacpp',
|
|
201
|
-
};
|
|
182
|
+
// Fallback: return error if native module not available
|
|
183
|
+
throw new Error('Direct Llama.cpp integration requires native module. Please ensure native module is properly compiled.');
|
|
202
184
|
} catch (error: unknown) {
|
|
203
185
|
logger.error(`Llama.cpp batch embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
|
|
204
186
|
throw error;
|
|
@@ -223,148 +205,30 @@ export class LlamaCppProvider extends EmbeddingProvider {
|
|
|
223
205
|
return this.modelPath;
|
|
224
206
|
}
|
|
225
207
|
|
|
226
|
-
// Private helper methods
|
|
227
208
|
private async getModelPath(): Promise<string> {
|
|
228
|
-
//
|
|
209
|
+
// If modelPath is already absolute, return as-is
|
|
210
|
+
if (this.modelPath.startsWith('/') || this.modelPath.startsWith('./')) {
|
|
211
|
+
return this.modelPath;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Try to resolve model path relative to current directory
|
|
229
215
|
const possiblePaths = [
|
|
230
|
-
this.modelPath,
|
|
231
|
-
join('
|
|
232
|
-
join('
|
|
233
|
-
this.modelPath //
|
|
216
|
+
resolve(this.modelPath), // Current directory
|
|
217
|
+
join('core/models', this.modelPath), // core/models subdirectory
|
|
218
|
+
join('models', this.modelPath), // models subdirectory
|
|
219
|
+
join(__dirname, '../../core/models', this.modelPath), // Relative to dist
|
|
234
220
|
];
|
|
235
|
-
|
|
221
|
+
|
|
236
222
|
for (const path of possiblePaths) {
|
|
237
223
|
try {
|
|
238
224
|
await access(path, constants.F_OK);
|
|
239
|
-
return
|
|
240
|
-
} catch {
|
|
241
|
-
|
|
242
|
-
}
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
throw new Error(`Model file not found: ${this.modelPath}`);
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
private async executeLlamaEmbedding(args: string[]): Promise<{stdout: string; stderr: string}> {
|
|
249
|
-
return new Promise((resolve, reject) => {
|
|
250
|
-
// Use HTTP API instead of CLI for cleaner output
|
|
251
|
-
const port = 8080; // Default llama.cpp server port
|
|
252
|
-
|
|
253
|
-
// Parse the request body from args[0] (JSON string)
|
|
254
|
-
let requestBody;
|
|
255
|
-
try {
|
|
256
|
-
requestBody = JSON.parse(args[0] || '{}');
|
|
257
|
-
} catch {
|
|
258
|
-
reject(new Error('Invalid request body for HTTP API'));
|
|
259
|
-
return;
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
const postData = JSON.stringify(requestBody);
|
|
263
|
-
|
|
264
|
-
const options = {
|
|
265
|
-
hostname: 'localhost',
|
|
266
|
-
port: port,
|
|
267
|
-
path: '/embedding',
|
|
268
|
-
method: 'POST',
|
|
269
|
-
headers: {
|
|
270
|
-
'Content-Type': 'application/json',
|
|
271
|
-
'Content-Length': Buffer.byteLength(postData)
|
|
272
|
-
}
|
|
273
|
-
};
|
|
274
|
-
|
|
275
|
-
const req = http.request(options, (res: http.IncomingMessage) => {
|
|
276
|
-
let data = '';
|
|
277
|
-
|
|
278
|
-
res.on('data', (chunk: Buffer | string) => {
|
|
279
|
-
data += chunk;
|
|
280
|
-
});
|
|
281
|
-
|
|
282
|
-
res.on('end', () => {
|
|
283
|
-
if (res.statusCode === 200) {
|
|
284
|
-
resolve({ stdout: data, stderr: '' });
|
|
285
|
-
} else {
|
|
286
|
-
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
|
|
287
|
-
}
|
|
288
|
-
});
|
|
289
|
-
});
|
|
290
|
-
|
|
291
|
-
req.on('error', (error: Error) => {
|
|
292
|
-
reject(new Error(`Failed to connect to llama.cpp server: ${(error instanceof Error ? error.message : String(error))}`));
|
|
293
|
-
});
|
|
294
|
-
|
|
295
|
-
req.write(postData);
|
|
296
|
-
req.end();
|
|
297
|
-
});
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
private parseRawOutput(output: string): number[] {
|
|
301
|
-
try {
|
|
302
|
-
const response = JSON.parse(output);
|
|
303
|
-
|
|
304
|
-
logger.debug(`PARSE DEBUG: Response type: ${typeof response}`);
|
|
305
|
-
logger.debug(`PARSE DEBUG: Is Array: ${Array.isArray(response)}`);
|
|
306
|
-
|
|
307
|
-
// CASE 1: Array of objects with nested embedding
|
|
308
|
-
// Format: [{index: 0, embedding: [[...]]}]
|
|
309
|
-
if (Array.isArray(response) && response.length > 0) {
|
|
310
|
-
const first = response[0];
|
|
311
|
-
|
|
312
|
-
if (first && first.embedding && Array.isArray(first.embedding)) {
|
|
313
|
-
const emb = first.embedding;
|
|
314
|
-
|
|
315
|
-
// Check if nested: [[...]]
|
|
316
|
-
if (Array.isArray(emb[0])) {
|
|
317
|
-
const flat = emb[0]; // ← Take the inner array
|
|
318
|
-
logger.debug(`Parsed ${flat.length} dimensions (nested)`);
|
|
319
|
-
return flat;
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
// Not nested: [...]
|
|
323
|
-
logger.debug(`Parsed ${emb.length} dimensions (direct)`);
|
|
324
|
-
return emb;
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
// CASE 2: Direct object {embedding: [...]}
|
|
329
|
-
if (response.embedding && Array.isArray(response.embedding)) {
|
|
330
|
-
const emb = response.embedding;
|
|
331
|
-
|
|
332
|
-
// Check nested
|
|
333
|
-
if (Array.isArray(emb[0])) {
|
|
334
|
-
return emb[0];
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
return emb;
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
// CASE 3: Direct array of numbers
|
|
341
|
-
if (Array.isArray(response) && typeof response[0] === 'number') {
|
|
342
|
-
logger.debug(`Parsed ${response.length} dimensions (flat array)`);
|
|
343
|
-
return response;
|
|
225
|
+
return path;
|
|
226
|
+
} catch (e) {
|
|
227
|
+
// Continue to next path
|
|
344
228
|
}
|
|
345
|
-
|
|
346
|
-
throw new Error(`Unexpected format: ${JSON.stringify(Object.keys(response))}`);
|
|
347
|
-
|
|
348
|
-
} catch (error: unknown) {
|
|
349
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
350
|
-
throw new Error(`Parse failed: ${errorMessage}`);
|
|
351
229
|
}
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
private parseArrayOutput(output: string): number[][] {
|
|
355
|
-
// Parse array format: [[val1,val2,...], [val1,val2,...], ...]
|
|
356
|
-
const arrayPattern = /\[([^\]]+)\]/g;
|
|
357
|
-
const matches = [...output.matchAll(arrayPattern)];
|
|
358
230
|
|
|
359
|
-
if (
|
|
360
|
-
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
const embeddings = matches.map(match => {
|
|
364
|
-
const values = match[1]?.split(',').map(v => v.trim()) || [];
|
|
365
|
-
return values.map(v => parseFloat(v)).filter(v => !isNaN(v));
|
|
366
|
-
}).filter(embedding => embedding.length > 0);
|
|
367
|
-
|
|
368
|
-
return embeddings;
|
|
231
|
+
// Return original path if none found (will fail later with proper error)
|
|
232
|
+
return this.modelPath;
|
|
369
233
|
}
|
|
370
234
|
}
|
package/src/providers/mistral.ts
CHANGED
|
@@ -8,6 +8,10 @@ const logger = Logger.createModuleLogger('mistral');
|
|
|
8
8
|
export class MistralProvider extends EmbeddingProvider {
|
|
9
9
|
private client: Mistral;
|
|
10
10
|
|
|
11
|
+
protected getModel(): string {
|
|
12
|
+
return this.config.model || 'mistral-embed';
|
|
13
|
+
}
|
|
14
|
+
|
|
11
15
|
constructor(config: EmbedConfig) {
|
|
12
16
|
super(config);
|
|
13
17
|
|
package/native/README.md
DELETED
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
# Native Llama.cpp Module
|
|
2
|
-
|
|
3
|
-
Módulo Node.js nativo para embeddings locais usando Llama.cpp diretamente.
|
|
4
|
-
|
|
5
|
-
## 🔨 Build
|
|
6
|
-
|
|
7
|
-
### Pré-requisitos
|
|
8
|
-
- Node.js 16+
|
|
9
|
-
- Python 3.8+
|
|
10
|
-
- C++ compiler (GCC/Clang/MSVC)
|
|
11
|
-
- CMake 3.16+
|
|
12
|
-
|
|
13
|
-
### Build Manual
|
|
14
|
-
```bash
|
|
15
|
-
cd native
|
|
16
|
-
npm install
|
|
17
|
-
npm run build
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
### Build Automático
|
|
21
|
-
```bash
|
|
22
|
-
# Do projeto raiz
|
|
23
|
-
npm run build:native
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
## 📦 Estrutura
|
|
27
|
-
|
|
28
|
-
```
|
|
29
|
-
native/
|
|
30
|
-
├── binding.gyp <- Configuração build
|
|
31
|
-
├── llama_embedding.cpp <- Código C++ principal
|
|
32
|
-
├── index.js <- Interface JS
|
|
33
|
-
├── package.json <- Deps específicas
|
|
34
|
-
├── build/Release/ <- Binário compilado
|
|
35
|
-
└── README.md <- Este arquivo
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
## 🚀 Uso
|
|
39
|
-
|
|
40
|
-
```javascript
|
|
41
|
-
const llama = require('./native');
|
|
42
|
-
|
|
43
|
-
// Carrega modelo
|
|
44
|
-
const model = llama.create('path/to/model.gguf');
|
|
45
|
-
|
|
46
|
-
// Gera embedding
|
|
47
|
-
const embedding = model.embed('Hello world');
|
|
48
|
-
|
|
49
|
-
// Libera recursos
|
|
50
|
-
model.close();
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
## 🔧 Integração
|
|
54
|
-
|
|
55
|
-
O módulo é automaticamente importado pelo `LlamaCppProvider` com fallback para HTTP se não disponível.
|
|
56
|
-
|
|
57
|
-
## 🐛 Troubleshooting
|
|
58
|
-
|
|
59
|
-
### Build falha
|
|
60
|
-
- Verifique se as dependências do sistema estão instaladas
|
|
61
|
-
- Certifique-se de que o Node.js versão 16+ está sendo usado
|
|
62
|
-
- Verifique se o CMake está disponível
|
|
63
|
-
|
|
64
|
-
### Módulo não carrega
|
|
65
|
-
- Verifique se o binário `llama_embedding.node` foi gerado
|
|
66
|
-
- Verifique se a arquitetura do binário corresponde ao sistema
|
|
67
|
-
- Consulte os logs para detalhes do erro
|
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
#include <napi.h>
|
|
2
|
-
#include <string>
|
|
3
|
-
#include <vector>
|
|
4
|
-
#include <memory>
|
|
5
|
-
|
|
6
|
-
// Llama.cpp includes
|
|
7
|
-
#include "llama.h"
|
|
8
|
-
#include "ggml.h"
|
|
9
|
-
#include "ggml-cpu.h"
|
|
10
|
-
|
|
11
|
-
struct ModelData {
|
|
12
|
-
llama_model* model;
|
|
13
|
-
llama_context* ctx;
|
|
14
|
-
int n_embd;
|
|
15
|
-
};
|
|
16
|
-
|
|
17
|
-
// Helper function to throw N-API error
|
|
18
|
-
Napi::Error throwNapiError(Napi::Env env, const std::string& message) {
|
|
19
|
-
return Napi::Error::New(env, message);
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
// Create model from GGUF file
|
|
23
|
-
Napi::Value CreateModel(const Napi::CallbackInfo& info) {
|
|
24
|
-
Napi::Env env = info.Env();
|
|
25
|
-
|
|
26
|
-
if (info.Length() < 1) {
|
|
27
|
-
throw throwNapiError(env, "Expected 1 argument: modelPath");
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
if (!info[0].IsString()) {
|
|
31
|
-
throw throwNapiError(env, "modelPath must be a string");
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
std::string modelPath = info[0].As<Napi::String>().Utf8Value();
|
|
35
|
-
|
|
36
|
-
// Load model
|
|
37
|
-
llama_model_params modelParams = llama_model_default_params();
|
|
38
|
-
llama_model* model = llama_load_model_from_file(modelPath.c_str(), modelParams);
|
|
39
|
-
|
|
40
|
-
if (!model) {
|
|
41
|
-
throw throwNapiError(env, "Failed to load model: " + modelPath);
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
// Create context
|
|
45
|
-
llama_context_params ctxParams = llama_context_default_params();
|
|
46
|
-
ctxParams.embedding = true; // Enable embeddings
|
|
47
|
-
ctxParams.n_threads = 4;
|
|
48
|
-
|
|
49
|
-
llama_context* ctx = llama_new_context_with_model(model, ctxParams);
|
|
50
|
-
|
|
51
|
-
if (!ctx) {
|
|
52
|
-
llama_free_model(model);
|
|
53
|
-
throw throwNapiError(env, "Failed to create context");
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// Get embedding dimensions
|
|
57
|
-
int n_embd = llama_n_embd(model);
|
|
58
|
-
|
|
59
|
-
// Create model data structure
|
|
60
|
-
ModelData* modelData = new ModelData();
|
|
61
|
-
modelData->model = model;
|
|
62
|
-
modelData->ctx = ctx;
|
|
63
|
-
modelData->n_embd = n_embd;
|
|
64
|
-
|
|
65
|
-
// Return as external pointer
|
|
66
|
-
return Napi::External<ModelData>::New(env, modelData);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
// Generate embedding for text
|
|
70
|
-
Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
|
|
71
|
-
Napi::Env env = info.Env();
|
|
72
|
-
|
|
73
|
-
if (info.Length() < 2) {
|
|
74
|
-
throw throwNapiError(env, "Expected 2 arguments: modelPtr, text");
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
if (!info[0].IsExternal()) {
|
|
78
|
-
throw throwNapiError(env, "modelPtr must be external pointer");
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
if (!info[1].IsString()) {
|
|
82
|
-
throw throwNapiError(env, "text must be a string");
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
ModelData* modelData = info[0].As<Napi::External<ModelData>>().Data();
|
|
86
|
-
std::string text = info[1].As<Napi::String>().Utf8Value();
|
|
87
|
-
|
|
88
|
-
// Tokenize text
|
|
89
|
-
std::vector<llama_token> tokens;
|
|
90
|
-
tokens.resize(text.length() + 16); // Extra space
|
|
91
|
-
|
|
92
|
-
int nTokens = llama_tokenize(
|
|
93
|
-
modelData->model,
|
|
94
|
-
text.c_str(),
|
|
95
|
-
text.length(),
|
|
96
|
-
tokens.data(),
|
|
97
|
-
tokens.capacity(),
|
|
98
|
-
false,
|
|
99
|
-
false
|
|
100
|
-
);
|
|
101
|
-
|
|
102
|
-
if (nTokens < 0) {
|
|
103
|
-
throw throwNapiError(env, "Failed to tokenize text");
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
tokens.resize(nTokens);
|
|
107
|
-
|
|
108
|
-
// Create batch
|
|
109
|
-
llama_batch batch = llama_batch_init(nTokens, 0, 1);
|
|
110
|
-
|
|
111
|
-
for (int i = 0; i < nTokens; i++) {
|
|
112
|
-
llama_batch_add(batch, tokens[i], i, {0}, false);
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
// Run inference
|
|
116
|
-
int result = llama_decode(modelData->ctx, batch);
|
|
117
|
-
if (result != 0) {
|
|
118
|
-
llama_batch_free(batch);
|
|
119
|
-
throw throwNapiError(env, "Failed to run inference");
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
// Get embeddings
|
|
123
|
-
float* embeddings = llama_get_embeddings(modelData->ctx);
|
|
124
|
-
if (!embeddings) {
|
|
125
|
-
llama_batch_free(batch);
|
|
126
|
-
throw throwNapiError(env, "Failed to get embeddings");
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
// Create N-API array
|
|
130
|
-
Napi::Float32Array embeddingArray = Napi::Float32Array::New(env, modelData->n_embd);
|
|
131
|
-
for (int i = 0; i < modelData->n_embd; i++) {
|
|
132
|
-
embeddingArray[i] = embeddings[i];
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
llama_batch_free(batch);
|
|
136
|
-
|
|
137
|
-
return embeddingArray;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
// Destroy model and free resources
|
|
141
|
-
Napi::Value DestroyModel(const Napi::CallbackInfo& info) {
|
|
142
|
-
Napi::Env env = info.Env();
|
|
143
|
-
|
|
144
|
-
if (info.Length() < 1) {
|
|
145
|
-
throw throwNapiError(env, "Expected 1 argument: modelPtr");
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
if (!info[0].IsExternal()) {
|
|
149
|
-
throw throwNapiError(env, "modelPtr must be external pointer");
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
ModelData* modelData = info[0].As<Napi::External<ModelData>>().Data();
|
|
153
|
-
|
|
154
|
-
if (modelData) {
|
|
155
|
-
if (modelData->ctx) {
|
|
156
|
-
llama_free(modelData->ctx);
|
|
157
|
-
}
|
|
158
|
-
if (modelData->model) {
|
|
159
|
-
llama_free_model(modelData->model);
|
|
160
|
-
}
|
|
161
|
-
delete modelData;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
return env.Null();
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
// Module initialization
|
|
168
|
-
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
169
|
-
exports.Set(Napi::String::New(env, "createModel"),
|
|
170
|
-
Napi::Function::New(env, CreateModel));
|
|
171
|
-
exports.Set(Napi::String::New(env, "getEmbedding"),
|
|
172
|
-
Napi::Function::New(env, GetEmbedding));
|
|
173
|
-
exports.Set(Napi::String::New(env, "destroyModel"),
|
|
174
|
-
Napi::Function::New(env, DestroyModel));
|
|
175
|
-
|
|
176
|
-
return exports;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
NODE_API_MODULE(llama_embedding, Init)
|