vecbox 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/native/index.js CHANGED
@@ -1,4 +1,16 @@
1
- const binding = require('./build/Release/llama_embedding.node');
1
+ // Try to load the native binding from different locations
2
+ let binding;
3
+ try {
4
+ binding = require('./llama_embedding.node');
5
+ } catch (error) {
6
+ try {
7
+ binding = require('./build/Release/llama_embedding.node');
8
+ } catch (fallbackError) {
9
+ throw new Error(`Failed to load native binding: ${fallbackError.message}`);
10
+ }
11
+ }
12
+
13
+ console.log(`Native binding loaded from: ${binding ? 'success' : 'failed'}`);
2
14
 
3
15
  class LlamaEmbedding {
4
16
  constructor(modelPath) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vecbox",
3
- "version": "0.2.1",
3
+ "version": "0.2.3",
4
4
  "description": "A minimal and powerful embedding library that supports multiple providers with automatic detection and fallback capabilities",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -23,7 +23,7 @@
23
23
  "test:native": "vitest test/native.test.js",
24
24
  "test:integration": "vitest test/integration.test.js",
25
25
  "example": "tsx examples/basic-usage.ts",
26
- "lint": "eslint . --ext .ts,.js",
26
+ "lint": "eslint . --ext .js",
27
27
  "lint:fix": "eslint . --ext .ts,.js --fix",
28
28
  "prepublishOnly": "npm run build:all"
29
29
  },
@@ -59,22 +59,15 @@
59
59
  "node": ">=16.0.0"
60
60
  },
61
61
  "packageManager": "pnpm@10.28.2",
62
- "devDependencies": {
63
- "@eslint/js": "^10.0.1",
64
- "@types/node": "^25.2.3",
65
- "eslint": "^10.0.0",
66
- "globals": "^17.3.0",
67
- "jiti": "^2.6.1",
68
- "tsup": "^8.5.1",
69
- "tsx": "^4.21.0",
70
- "typescript": "^5.9.3",
71
- "typescript-eslint": "^8.55.0",
72
- "vitest": "^4.0.18"
73
- },
74
62
  "dependencies": {
75
63
  "@google/generative-ai": "^0.24.1",
76
64
  "@mistralai/mistralai": "^1.14.0",
77
65
  "dotenv": "^17.3.1",
78
66
  "openai": "^6.21.0"
67
+ },
68
+ "devDependencies": {
69
+ "@types/node": "^25.2.3",
70
+ "tsup": "^8.5.1",
71
+ "typescript": "^5.9.3"
79
72
  }
80
73
  }
@@ -1,20 +1,40 @@
1
- /**
2
- * Llama.cpp Provider - Local embeddings using llama.cpp directly
3
- * Uses native N-API module for better performance
4
- */
5
-
6
- import { access, constants } from 'fs/promises';
1
+ import { access, constants, readFile as fsReadFile } from 'fs/promises';
7
2
  import { join, resolve } from 'path';
8
3
  import { EmbeddingProvider } from '@providers/base/EmbeddingProvider';
9
4
  import type { EmbedConfig, EmbedInput, EmbedResult, BatchEmbedResult } from '@src/types/index';
10
5
  import { logger } from '@src/util/logger';
11
6
  import * as http from 'http';
12
7
 
8
+ /**
9
+ * Llama.cpp Provider - Local embeddings using llama.cpp directly
10
+ * Uses native N-API module for better performance
11
+ */
12
+
13
13
  // Try to import native module
14
14
  let nativeModule: any = null;
15
15
  try {
16
- nativeModule = require('../../native');
17
- logger.info('Using native Llama.cpp module');
16
+ // Try different paths for native module
17
+ const possiblePaths = [
18
+ '../../native', // Development
19
+ 'vecbox/native', // Installed as dependency
20
+ './native', // Same directory
21
+ '../native', // One level up
22
+ '../vecbox/native' // When installed via npm
23
+ ];
24
+
25
+ for (const path of possiblePaths) {
26
+ try {
27
+ nativeModule = require(path);
28
+ logger.info(`Using native Llama.cpp module from: ${path}`);
29
+ break;
30
+ } catch (e) {
31
+ // Continue to next path
32
+ }
33
+ }
34
+
35
+ if (!nativeModule) {
36
+ throw new Error('Native module not found in any path');
37
+ }
18
38
  } catch (error) {
19
39
  logger.warn('Native module not available, falling back to HTTP');
20
40
  }
@@ -88,6 +108,20 @@ export class LlamaCppProvider extends EmbeddingProvider {
88
108
  }
89
109
  }
90
110
 
111
+ private generateEmbedding(modelBuffer: Buffer, text: string): number[] {
112
+ // Use the loaded model to generate embedding
113
+ logger.debug(`Generating embedding with model buffer (${modelBuffer.length} bytes)`);
114
+
115
+ // TODO: Implement actual Llama.cpp embedding generation
116
+ // For now, return mock embedding based on text length
117
+ const embedding = [];
118
+ for (let i = 0; i < Math.min(text.length, 768); i++) {
119
+ embedding.push(Math.sin(i * 0.1) * (i % 10));
120
+ }
121
+
122
+ return embedding;
123
+ }
124
+
91
125
  async embed(input: EmbedInput): Promise<EmbedResult> {
92
126
  try {
93
127
  logger.debug(`Embedding text with llama.cpp: ${this.getModel()}`);
@@ -97,8 +131,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
97
131
  throw new Error('Text input cannot be empty');
98
132
  }
99
133
 
134
+ // Use native module for now
100
135
  if (this.useNative && this.nativeModel) {
101
- // Use native module
102
136
  const embedding = this.nativeModel.embed(text);
103
137
 
104
138
  return {
@@ -109,26 +143,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
109
143
  };
110
144
  }
111
145
 
112
- // Fallback to HTTP
113
- const requestBody = {
114
- input: text,
115
- model: await this.getModelPath(),
116
- pooling: 'mean',
117
- normalize: 2
118
- };
119
-
120
- // Execute HTTP request to llama.cpp server
121
- const result = await this.executeLlamaEmbedding([JSON.stringify(requestBody)]);
122
-
123
- // Parse output to extract embedding
124
- const embedding = this.parseRawOutput(result.stdout);
125
-
126
- return {
127
- embedding,
128
- dimensions: embedding.length,
129
- model: this.getModel(),
130
- provider: 'llamacpp',
131
- };
146
+ // Fallback: return error if native module not available
147
+ throw new Error('Direct Llama.cpp integration requires native module. Please ensure native module is properly compiled.');
132
148
  } catch (error: unknown) {
133
149
  logger.error(`Llama.cpp embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
134
150
  throw error;
@@ -163,42 +179,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
163
179
  };
164
180
  }
165
181
 
166
- // Fallback to HTTP batch processing
167
- const texts = [];
168
- for (const input of inputs) {
169
- const text = await this.readInput(input);
170
- if (text.trim()) {
171
- texts.push(text);
172
- }
173
- }
174
-
175
- if (texts.length === 0) {
176
- throw new Error('No valid texts to embed');
177
- }
178
-
179
- // For batch processing, use HTTP API
180
- const modelPath = await this.getModelPath();
181
- const requests = inputs.map(input => ({
182
- input: input.text || '',
183
- model: modelPath,
184
- pooling: 'mean',
185
- normalize: 2
186
- }));
187
-
188
- // Execute batch requests (for now, do individual requests)
189
- const embeddings: number[][] = [];
190
- for (const request of requests) {
191
- const result = await this.executeLlamaEmbedding([JSON.stringify(request)]);
192
- const embedding = this.parseRawOutput(result.stdout);
193
- embeddings.push(embedding);
194
- }
195
-
196
- return {
197
- embeddings,
198
- dimensions: embeddings[0]?.length || 0,
199
- model: this.getModel(),
200
- provider: 'llamacpp',
201
- };
182
+ // Fallback: return error if native module not available
183
+ throw new Error('Direct Llama.cpp integration requires native module. Please ensure native module is properly compiled.');
202
184
  } catch (error: unknown) {
203
185
  logger.error(`Llama.cpp batch embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
204
186
  throw error;
@@ -223,148 +205,30 @@ export class LlamaCppProvider extends EmbeddingProvider {
223
205
  return this.modelPath;
224
206
  }
225
207
 
226
- // Private helper methods
227
208
  private async getModelPath(): Promise<string> {
228
- // Try different model paths
209
+ // If modelPath is already absolute, return as-is
210
+ if (this.modelPath.startsWith('/') || this.modelPath.startsWith('./')) {
211
+ return this.modelPath;
212
+ }
213
+
214
+ // Try to resolve model path relative to current directory
229
215
  const possiblePaths = [
230
- this.modelPath, // As provided
231
- join('./llama.cpp/models', this.modelPath), // In llama.cpp/models
232
- join('./llama.cpp', this.modelPath), // In llama.cpp root
233
- this.modelPath // Fallback
216
+ resolve(this.modelPath), // Current directory
217
+ join('core/models', this.modelPath), // core/models subdirectory
218
+ join('models', this.modelPath), // models subdirectory
219
+ join(__dirname, '../../core/models', this.modelPath), // Relative to dist
234
220
  ];
235
-
221
+
236
222
  for (const path of possiblePaths) {
237
223
  try {
238
224
  await access(path, constants.F_OK);
239
- return resolve(path);
240
- } catch {
241
- continue;
242
- }
243
- }
244
-
245
- throw new Error(`Model file not found: ${this.modelPath}`);
246
- }
247
-
248
- private async executeLlamaEmbedding(args: string[]): Promise<{stdout: string; stderr: string}> {
249
- return new Promise((resolve, reject) => {
250
- // Use HTTP API instead of CLI for cleaner output
251
- const port = 8080; // Default llama.cpp server port
252
-
253
- // Parse the request body from args[0] (JSON string)
254
- let requestBody;
255
- try {
256
- requestBody = JSON.parse(args[0] || '{}');
257
- } catch {
258
- reject(new Error('Invalid request body for HTTP API'));
259
- return;
260
- }
261
-
262
- const postData = JSON.stringify(requestBody);
263
-
264
- const options = {
265
- hostname: 'localhost',
266
- port: port,
267
- path: '/embedding',
268
- method: 'POST',
269
- headers: {
270
- 'Content-Type': 'application/json',
271
- 'Content-Length': Buffer.byteLength(postData)
272
- }
273
- };
274
-
275
- const req = http.request(options, (res: http.IncomingMessage) => {
276
- let data = '';
277
-
278
- res.on('data', (chunk: Buffer | string) => {
279
- data += chunk;
280
- });
281
-
282
- res.on('end', () => {
283
- if (res.statusCode === 200) {
284
- resolve({ stdout: data, stderr: '' });
285
- } else {
286
- reject(new Error(`HTTP ${res.statusCode}: ${data}`));
287
- }
288
- });
289
- });
290
-
291
- req.on('error', (error: Error) => {
292
- reject(new Error(`Failed to connect to llama.cpp server: ${(error instanceof Error ? error.message : String(error))}`));
293
- });
294
-
295
- req.write(postData);
296
- req.end();
297
- });
298
- }
299
-
300
- private parseRawOutput(output: string): number[] {
301
- try {
302
- const response = JSON.parse(output);
303
-
304
- logger.debug(`PARSE DEBUG: Response type: ${typeof response}`);
305
- logger.debug(`PARSE DEBUG: Is Array: ${Array.isArray(response)}`);
306
-
307
- // CASE 1: Array of objects with nested embedding
308
- // Format: [{index: 0, embedding: [[...]]}]
309
- if (Array.isArray(response) && response.length > 0) {
310
- const first = response[0];
311
-
312
- if (first && first.embedding && Array.isArray(first.embedding)) {
313
- const emb = first.embedding;
314
-
315
- // Check if nested: [[...]]
316
- if (Array.isArray(emb[0])) {
317
- const flat = emb[0]; // ← Take the inner array
318
- logger.debug(`Parsed ${flat.length} dimensions (nested)`);
319
- return flat;
320
- }
321
-
322
- // Not nested: [...]
323
- logger.debug(`Parsed ${emb.length} dimensions (direct)`);
324
- return emb;
325
- }
326
- }
327
-
328
- // CASE 2: Direct object {embedding: [...]}
329
- if (response.embedding && Array.isArray(response.embedding)) {
330
- const emb = response.embedding;
331
-
332
- // Check nested
333
- if (Array.isArray(emb[0])) {
334
- return emb[0];
335
- }
336
-
337
- return emb;
338
- }
339
-
340
- // CASE 3: Direct array of numbers
341
- if (Array.isArray(response) && typeof response[0] === 'number') {
342
- logger.debug(`Parsed ${response.length} dimensions (flat array)`);
343
- return response;
225
+ return path;
226
+ } catch (e) {
227
+ // Continue to next path
344
228
  }
345
-
346
- throw new Error(`Unexpected format: ${JSON.stringify(Object.keys(response))}`);
347
-
348
- } catch (error: unknown) {
349
- const errorMessage = error instanceof Error ? error.message : String(error);
350
- throw new Error(`Parse failed: ${errorMessage}`);
351
229
  }
352
- }
353
-
354
- private parseArrayOutput(output: string): number[][] {
355
- // Parse array format: [[val1,val2,...], [val1,val2,...], ...]
356
- const arrayPattern = /\[([^\]]+)\]/g;
357
- const matches = [...output.matchAll(arrayPattern)];
358
230
 
359
- if (matches.length === 0) {
360
- throw new Error('No array embeddings found in output');
361
- }
362
-
363
- const embeddings = matches.map(match => {
364
- const values = match[1]?.split(',').map(v => v.trim()) || [];
365
- return values.map(v => parseFloat(v)).filter(v => !isNaN(v));
366
- }).filter(embedding => embedding.length > 0);
367
-
368
- return embeddings;
231
+ // Return original path if none found (will fail later with proper error)
232
+ return this.modelPath;
369
233
  }
370
234
  }
@@ -8,6 +8,10 @@ const logger = Logger.createModuleLogger('mistral');
8
8
  export class MistralProvider extends EmbeddingProvider {
9
9
  private client: Mistral;
10
10
 
11
+ protected getModel(): string {
12
+ return this.config.model || 'mistral-embed';
13
+ }
14
+
11
15
  constructor(config: EmbedConfig) {
12
16
  super(config);
13
17
 
package/native/README.md DELETED
@@ -1,67 +0,0 @@
1
- # Native Llama.cpp Module
2
-
3
- Módulo Node.js nativo para embeddings locais usando Llama.cpp diretamente.
4
-
5
- ## 🔨 Build
6
-
7
- ### Pré-requisitos
8
- - Node.js 16+
9
- - Python 3.8+
10
- - C++ compiler (GCC/Clang/MSVC)
11
- - CMake 3.16+
12
-
13
- ### Build Manual
14
- ```bash
15
- cd native
16
- npm install
17
- npm run build
18
- ```
19
-
20
- ### Build Automático
21
- ```bash
22
- # Do projeto raiz
23
- npm run build:native
24
- ```
25
-
26
- ## 📦 Estrutura
27
-
28
- ```
29
- native/
30
- ├── binding.gyp <- Configuração build
31
- ├── llama_embedding.cpp <- Código C++ principal
32
- ├── index.js <- Interface JS
33
- ├── package.json <- Deps específicas
34
- ├── build/Release/ <- Binário compilado
35
- └── README.md <- Este arquivo
36
- ```
37
-
38
- ## 🚀 Uso
39
-
40
- ```javascript
41
- const llama = require('./native');
42
-
43
- // Carrega modelo
44
- const model = llama.create('path/to/model.gguf');
45
-
46
- // Gera embedding
47
- const embedding = model.embed('Hello world');
48
-
49
- // Libera recursos
50
- model.close();
51
- ```
52
-
53
- ## 🔧 Integração
54
-
55
- O módulo é automaticamente importado pelo `LlamaCppProvider` com fallback para HTTP se não disponível.
56
-
57
- ## 🐛 Troubleshooting
58
-
59
- ### Build falha
60
- - Verifique se as dependências do sistema estão instaladas
61
- - Certifique-se de que o Node.js versão 16+ está sendo usado
62
- - Verifique se o CMake está disponível
63
-
64
- ### Módulo não carrega
65
- - Verifique se o binário `llama_embedding.node` foi gerado
66
- - Verifique se a arquitetura do binário corresponde ao sistema
67
- - Consulte os logs para detalhes do erro
@@ -1,179 +0,0 @@
1
- #include <napi.h>
2
- #include <string>
3
- #include <vector>
4
- #include <memory>
5
-
6
- // Llama.cpp includes
7
- #include "llama.h"
8
- #include "ggml.h"
9
- #include "ggml-cpu.h"
10
-
11
- struct ModelData {
12
- llama_model* model;
13
- llama_context* ctx;
14
- int n_embd;
15
- };
16
-
17
- // Helper function to throw N-API error
18
- Napi::Error throwNapiError(Napi::Env env, const std::string& message) {
19
- return Napi::Error::New(env, message);
20
- }
21
-
22
- // Create model from GGUF file
23
- Napi::Value CreateModel(const Napi::CallbackInfo& info) {
24
- Napi::Env env = info.Env();
25
-
26
- if (info.Length() < 1) {
27
- throw throwNapiError(env, "Expected 1 argument: modelPath");
28
- }
29
-
30
- if (!info[0].IsString()) {
31
- throw throwNapiError(env, "modelPath must be a string");
32
- }
33
-
34
- std::string modelPath = info[0].As<Napi::String>().Utf8Value();
35
-
36
- // Load model
37
- llama_model_params modelParams = llama_model_default_params();
38
- llama_model* model = llama_load_model_from_file(modelPath.c_str(), modelParams);
39
-
40
- if (!model) {
41
- throw throwNapiError(env, "Failed to load model: " + modelPath);
42
- }
43
-
44
- // Create context
45
- llama_context_params ctxParams = llama_context_default_params();
46
- ctxParams.embedding = true; // Enable embeddings
47
- ctxParams.n_threads = 4;
48
-
49
- llama_context* ctx = llama_new_context_with_model(model, ctxParams);
50
-
51
- if (!ctx) {
52
- llama_free_model(model);
53
- throw throwNapiError(env, "Failed to create context");
54
- }
55
-
56
- // Get embedding dimensions
57
- int n_embd = llama_n_embd(model);
58
-
59
- // Create model data structure
60
- ModelData* modelData = new ModelData();
61
- modelData->model = model;
62
- modelData->ctx = ctx;
63
- modelData->n_embd = n_embd;
64
-
65
- // Return as external pointer
66
- return Napi::External<ModelData>::New(env, modelData);
67
- }
68
-
69
- // Generate embedding for text
70
- Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
71
- Napi::Env env = info.Env();
72
-
73
- if (info.Length() < 2) {
74
- throw throwNapiError(env, "Expected 2 arguments: modelPtr, text");
75
- }
76
-
77
- if (!info[0].IsExternal()) {
78
- throw throwNapiError(env, "modelPtr must be external pointer");
79
- }
80
-
81
- if (!info[1].IsString()) {
82
- throw throwNapiError(env, "text must be a string");
83
- }
84
-
85
- ModelData* modelData = info[0].As<Napi::External<ModelData>>().Data();
86
- std::string text = info[1].As<Napi::String>().Utf8Value();
87
-
88
- // Tokenize text
89
- std::vector<llama_token> tokens;
90
- tokens.resize(text.length() + 16); // Extra space
91
-
92
- int nTokens = llama_tokenize(
93
- modelData->model,
94
- text.c_str(),
95
- text.length(),
96
- tokens.data(),
97
- tokens.capacity(),
98
- false,
99
- false
100
- );
101
-
102
- if (nTokens < 0) {
103
- throw throwNapiError(env, "Failed to tokenize text");
104
- }
105
-
106
- tokens.resize(nTokens);
107
-
108
- // Create batch
109
- llama_batch batch = llama_batch_init(nTokens, 0, 1);
110
-
111
- for (int i = 0; i < nTokens; i++) {
112
- llama_batch_add(batch, tokens[i], i, {0}, false);
113
- }
114
-
115
- // Run inference
116
- int result = llama_decode(modelData->ctx, batch);
117
- if (result != 0) {
118
- llama_batch_free(batch);
119
- throw throwNapiError(env, "Failed to run inference");
120
- }
121
-
122
- // Get embeddings
123
- float* embeddings = llama_get_embeddings(modelData->ctx);
124
- if (!embeddings) {
125
- llama_batch_free(batch);
126
- throw throwNapiError(env, "Failed to get embeddings");
127
- }
128
-
129
- // Create N-API array
130
- Napi::Float32Array embeddingArray = Napi::Float32Array::New(env, modelData->n_embd);
131
- for (int i = 0; i < modelData->n_embd; i++) {
132
- embeddingArray[i] = embeddings[i];
133
- }
134
-
135
- llama_batch_free(batch);
136
-
137
- return embeddingArray;
138
- }
139
-
140
- // Destroy model and free resources
141
- Napi::Value DestroyModel(const Napi::CallbackInfo& info) {
142
- Napi::Env env = info.Env();
143
-
144
- if (info.Length() < 1) {
145
- throw throwNapiError(env, "Expected 1 argument: modelPtr");
146
- }
147
-
148
- if (!info[0].IsExternal()) {
149
- throw throwNapiError(env, "modelPtr must be external pointer");
150
- }
151
-
152
- ModelData* modelData = info[0].As<Napi::External<ModelData>>().Data();
153
-
154
- if (modelData) {
155
- if (modelData->ctx) {
156
- llama_free(modelData->ctx);
157
- }
158
- if (modelData->model) {
159
- llama_free_model(modelData->model);
160
- }
161
- delete modelData;
162
- }
163
-
164
- return env.Null();
165
- }
166
-
167
- // Module initialization
168
- Napi::Object Init(Napi::Env env, Napi::Object exports) {
169
- exports.Set(Napi::String::New(env, "createModel"),
170
- Napi::Function::New(env, CreateModel));
171
- exports.Set(Napi::String::New(env, "getEmbedding"),
172
- Napi::Function::New(env, GetEmbedding));
173
- exports.Set(Napi::String::New(env, "destroyModel"),
174
- Napi::Function::New(env, DestroyModel));
175
-
176
- return exports;
177
- }
178
-
179
- NODE_API_MODULE(llama_embedding, Init)