vecbox 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,3 @@
1
- /**
2
- * Llama.cpp Provider - Local embeddings using llama.cpp directly
3
- * Uses native N-API module for better performance
4
- */
5
-
6
1
  import { access, constants } from 'fs/promises';
7
2
  import { join, resolve } from 'path';
8
3
  import { EmbeddingProvider } from '@providers/base/EmbeddingProvider';
@@ -10,6 +5,11 @@ import type { EmbedConfig, EmbedInput, EmbedResult, BatchEmbedResult } from '@sr
10
5
  import { logger } from '@src/util/logger';
11
6
  import * as http from 'http';
12
7
 
8
+ /**
9
+ * Llama.cpp Provider - Local embeddings using llama.cpp directly
10
+ * Uses native N-API module for better performance
11
+ */
12
+
13
13
  // Try to import native module
14
14
  let nativeModule: any = null;
15
15
  try {
@@ -88,6 +88,39 @@ export class LlamaCppProvider extends EmbeddingProvider {
88
88
  }
89
89
  }
90
90
 
91
+ private async loadGGUFModel(modelPath: string): Promise<Buffer> {
92
+ try {
93
+ logger.debug(`Loading GGUF model from: ${modelPath}`);
94
+
95
+ // Read model file
96
+ const modelBuffer = await fs.readFile(modelPath);
97
+
98
+ if (!modelBuffer) {
99
+ throw new Error(`Failed to read model file: ${modelPath}`);
100
+ }
101
+
102
+ logger.debug(`Model file loaded, size: ${modelBuffer.length} bytes`);
103
+ return modelBuffer;
104
+ } catch (error) {
105
+ logger.error(`Failed to load GGUF model: ${error instanceof Error ? error.message : String(error)}`);
106
+ throw error;
107
+ }
108
+ }
109
+
110
+ private generateEmbedding(modelBuffer: Buffer, text: string): number[] {
111
+ // Use the loaded model to generate embedding
112
+ logger.debug(`Generating embedding with model buffer (${modelBuffer.length} bytes)`);
113
+
114
+ // TODO: Implement actual Llama.cpp embedding generation
115
+ // For now, return mock embedding based on text length
116
+ const embedding = [];
117
+ for (let i = 0; i < Math.min(text.length, 768); i++) {
118
+ embedding.push(Math.sin(i * 0.1) * (i % 10));
119
+ }
120
+
121
+ return embedding;
122
+ }
123
+
91
124
  async embed(input: EmbedInput): Promise<EmbedResult> {
92
125
  try {
93
126
  logger.debug(`Embedding text with llama.cpp: ${this.getModel()}`);
@@ -97,8 +130,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
97
130
  throw new Error('Text input cannot be empty');
98
131
  }
99
132
 
133
+ // Use native module for now
100
134
  if (this.useNative && this.nativeModel) {
101
- // Use native module
102
135
  const embedding = this.nativeModel.embed(text);
103
136
 
104
137
  return {
@@ -109,26 +142,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
109
142
  };
110
143
  }
111
144
 
112
- // Fallback to HTTP
113
- const requestBody = {
114
- input: text,
115
- model: await this.getModelPath(),
116
- pooling: 'mean',
117
- normalize: 2
118
- };
119
-
120
- // Execute HTTP request to llama.cpp server
121
- const result = await this.executeLlamaEmbedding([JSON.stringify(requestBody)]);
122
-
123
- // Parse output to extract embedding
124
- const embedding = this.parseRawOutput(result.stdout);
125
-
126
- return {
127
- embedding,
128
- dimensions: embedding.length,
129
- model: this.getModel(),
130
- provider: 'llamacpp',
131
- };
145
+ // TODO: Implement direct Llama.cpp core usage in future
146
+ throw new Error('Direct Llama.cpp core integration not yet implemented. Please use HTTP fallback or wait for next version.');
132
147
  } catch (error: unknown) {
133
148
  logger.error(`Llama.cpp embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
134
149
  throw error;
@@ -178,7 +193,7 @@ export class LlamaCppProvider extends EmbeddingProvider {
178
193
 
179
194
  // For batch processing, use HTTP API
180
195
  const modelPath = await this.getModelPath();
181
- const requests = inputs.map(input => ({
196
+ const requests = inputs.map((input, v) => ({
182
197
  input: input.text || '',
183
198
  model: modelPath,
184
199
  pooling: 'mean',
@@ -222,149 +237,4 @@ export class LlamaCppProvider extends EmbeddingProvider {
222
237
  protected getModel(): string {
223
238
  return this.modelPath;
224
239
  }
225
-
226
- // Private helper methods
227
- private async getModelPath(): Promise<string> {
228
- // Try different model paths
229
- const possiblePaths = [
230
- this.modelPath, // As provided
231
- join('./llama.cpp/models', this.modelPath), // In llama.cpp/models
232
- join('./llama.cpp', this.modelPath), // In llama.cpp root
233
- this.modelPath // Fallback
234
- ];
235
-
236
- for (const path of possiblePaths) {
237
- try {
238
- await access(path, constants.F_OK);
239
- return resolve(path);
240
- } catch {
241
- continue;
242
- }
243
- }
244
-
245
- throw new Error(`Model file not found: ${this.modelPath}`);
246
- }
247
-
248
- private async executeLlamaEmbedding(args: string[]): Promise<{stdout: string; stderr: string}> {
249
- return new Promise((resolve, reject) => {
250
- // Use HTTP API instead of CLI for cleaner output
251
- const port = 8080; // Default llama.cpp server port
252
-
253
- // Parse the request body from args[0] (JSON string)
254
- let requestBody;
255
- try {
256
- requestBody = JSON.parse(args[0] || '{}');
257
- } catch {
258
- reject(new Error('Invalid request body for HTTP API'));
259
- return;
260
- }
261
-
262
- const postData = JSON.stringify(requestBody);
263
-
264
- const options = {
265
- hostname: 'localhost',
266
- port: port,
267
- path: '/embedding',
268
- method: 'POST',
269
- headers: {
270
- 'Content-Type': 'application/json',
271
- 'Content-Length': Buffer.byteLength(postData)
272
- }
273
- };
274
-
275
- const req = http.request(options, (res: http.IncomingMessage) => {
276
- let data = '';
277
-
278
- res.on('data', (chunk: Buffer | string) => {
279
- data += chunk;
280
- });
281
-
282
- res.on('end', () => {
283
- if (res.statusCode === 200) {
284
- resolve({ stdout: data, stderr: '' });
285
- } else {
286
- reject(new Error(`HTTP ${res.statusCode}: ${data}`));
287
- }
288
- });
289
- });
290
-
291
- req.on('error', (error: Error) => {
292
- reject(new Error(`Failed to connect to llama.cpp server: ${(error instanceof Error ? error.message : String(error))}`));
293
- });
294
-
295
- req.write(postData);
296
- req.end();
297
- });
298
- }
299
-
300
- private parseRawOutput(output: string): number[] {
301
- try {
302
- const response = JSON.parse(output);
303
-
304
- logger.debug(`PARSE DEBUG: Response type: ${typeof response}`);
305
- logger.debug(`PARSE DEBUG: Is Array: ${Array.isArray(response)}`);
306
-
307
- // CASE 1: Array of objects with nested embedding
308
- // Format: [{index: 0, embedding: [[...]]}]
309
- if (Array.isArray(response) && response.length > 0) {
310
- const first = response[0];
311
-
312
- if (first && first.embedding && Array.isArray(first.embedding)) {
313
- const emb = first.embedding;
314
-
315
- // Check if nested: [[...]]
316
- if (Array.isArray(emb[0])) {
317
- const flat = emb[0]; // ← Take the inner array
318
- logger.debug(`Parsed ${flat.length} dimensions (nested)`);
319
- return flat;
320
- }
321
-
322
- // Not nested: [...]
323
- logger.debug(`Parsed ${emb.length} dimensions (direct)`);
324
- return emb;
325
- }
326
- }
327
-
328
- // CASE 2: Direct object {embedding: [...]}
329
- if (response.embedding && Array.isArray(response.embedding)) {
330
- const emb = response.embedding;
331
-
332
- // Check nested
333
- if (Array.isArray(emb[0])) {
334
- return emb[0];
335
- }
336
-
337
- return emb;
338
- }
339
-
340
- // CASE 3: Direct array of numbers
341
- if (Array.isArray(response) && typeof response[0] === 'number') {
342
- logger.debug(`Parsed ${response.length} dimensions (flat array)`);
343
- return response;
344
- }
345
-
346
- throw new Error(`Unexpected format: ${JSON.stringify(Object.keys(response))}`);
347
-
348
- } catch (error: unknown) {
349
- const errorMessage = error instanceof Error ? error.message : String(error);
350
- throw new Error(`Parse failed: ${errorMessage}`);
351
- }
352
- }
353
-
354
- private parseArrayOutput(output: string): number[][] {
355
- // Parse array format: [[val1,val2,...], [val1,val2,...], ...]
356
- const arrayPattern = /\[([^\]]+)\]/g;
357
- const matches = [...output.matchAll(arrayPattern)];
358
-
359
- if (matches.length === 0) {
360
- throw new Error('No array embeddings found in output');
361
- }
362
-
363
- const embeddings = matches.map(match => {
364
- const values = match[1]?.split(',').map(v => v.trim()) || [];
365
- return values.map(v => parseFloat(v)).filter(v => !isNaN(v));
366
- }).filter(embedding => embedding.length > 0);
367
-
368
- return embeddings;
369
- }
370
240
  }
package/native/README.md DELETED
@@ -1,67 +0,0 @@
1
- # Native Llama.cpp Module
2
-
3
- Módulo Node.js nativo para embeddings locais usando Llama.cpp diretamente.
4
-
5
- ## 🔨 Build
6
-
7
- ### Pré-requisitos
8
- - Node.js 16+
9
- - Python 3.8+
10
- - C++ compiler (GCC/Clang/MSVC)
11
- - CMake 3.16+
12
-
13
- ### Build Manual
14
- ```bash
15
- cd native
16
- npm install
17
- npm run build
18
- ```
19
-
20
- ### Build Automático
21
- ```bash
22
- # Do projeto raiz
23
- npm run build:native
24
- ```
25
-
26
- ## 📦 Estrutura
27
-
28
- ```
29
- native/
30
- ├── binding.gyp <- Configuração build
31
- ├── llama_embedding.cpp <- Código C++ principal
32
- ├── index.js <- Interface JS
33
- ├── package.json <- Deps específicas
34
- ├── build/Release/ <- Binário compilado
35
- └── README.md <- Este arquivo
36
- ```
37
-
38
- ## 🚀 Uso
39
-
40
- ```javascript
41
- const llama = require('./native');
42
-
43
- // Carrega modelo
44
- const model = llama.create('path/to/model.gguf');
45
-
46
- // Gera embedding
47
- const embedding = model.embed('Hello world');
48
-
49
- // Libera recursos
50
- model.close();
51
- ```
52
-
53
- ## 🔧 Integração
54
-
55
- O módulo é automaticamente importado pelo `LlamaCppProvider` com fallback para HTTP se não disponível.
56
-
57
- ## 🐛 Troubleshooting
58
-
59
- ### Build falha
60
- - Verifique se as dependências do sistema estão instaladas
61
- - Certifique-se de que o Node.js versão 16+ está sendo usado
62
- - Verifique se o CMake está disponível
63
-
64
- ### Módulo não carrega
65
- - Verifique se o binário `llama_embedding.node` foi gerado
66
- - Verifique se a arquitetura do binário corresponde ao sistema
67
- - Consulte os logs para detalhes do erro
@@ -1,179 +0,0 @@
1
- #include <napi.h>
2
- #include <string>
3
- #include <vector>
4
- #include <memory>
5
-
6
- // Llama.cpp includes
7
- #include "llama.h"
8
- #include "ggml.h"
9
- #include "ggml-cpu.h"
10
-
11
- struct ModelData {
12
- llama_model* model;
13
- llama_context* ctx;
14
- int n_embd;
15
- };
16
-
17
- // Helper function to throw N-API error
18
- Napi::Error throwNapiError(Napi::Env env, const std::string& message) {
19
- return Napi::Error::New(env, message);
20
- }
21
-
22
- // Create model from GGUF file
23
- Napi::Value CreateModel(const Napi::CallbackInfo& info) {
24
- Napi::Env env = info.Env();
25
-
26
- if (info.Length() < 1) {
27
- throw throwNapiError(env, "Expected 1 argument: modelPath");
28
- }
29
-
30
- if (!info[0].IsString()) {
31
- throw throwNapiError(env, "modelPath must be a string");
32
- }
33
-
34
- std::string modelPath = info[0].As<Napi::String>().Utf8Value();
35
-
36
- // Load model
37
- llama_model_params modelParams = llama_model_default_params();
38
- llama_model* model = llama_load_model_from_file(modelPath.c_str(), modelParams);
39
-
40
- if (!model) {
41
- throw throwNapiError(env, "Failed to load model: " + modelPath);
42
- }
43
-
44
- // Create context
45
- llama_context_params ctxParams = llama_context_default_params();
46
- ctxParams.embedding = true; // Enable embeddings
47
- ctxParams.n_threads = 4;
48
-
49
- llama_context* ctx = llama_new_context_with_model(model, ctxParams);
50
-
51
- if (!ctx) {
52
- llama_free_model(model);
53
- throw throwNapiError(env, "Failed to create context");
54
- }
55
-
56
- // Get embedding dimensions
57
- int n_embd = llama_n_embd(model);
58
-
59
- // Create model data structure
60
- ModelData* modelData = new ModelData();
61
- modelData->model = model;
62
- modelData->ctx = ctx;
63
- modelData->n_embd = n_embd;
64
-
65
- // Return as external pointer
66
- return Napi::External<ModelData>::New(env, modelData);
67
- }
68
-
69
- // Generate embedding for text
70
- Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
71
- Napi::Env env = info.Env();
72
-
73
- if (info.Length() < 2) {
74
- throw throwNapiError(env, "Expected 2 arguments: modelPtr, text");
75
- }
76
-
77
- if (!info[0].IsExternal()) {
78
- throw throwNapiError(env, "modelPtr must be external pointer");
79
- }
80
-
81
- if (!info[1].IsString()) {
82
- throw throwNapiError(env, "text must be a string");
83
- }
84
-
85
- ModelData* modelData = info[0].As<Napi::External<ModelData>>().Data();
86
- std::string text = info[1].As<Napi::String>().Utf8Value();
87
-
88
- // Tokenize text
89
- std::vector<llama_token> tokens;
90
- tokens.resize(text.length() + 16); // Extra space
91
-
92
- int nTokens = llama_tokenize(
93
- modelData->model,
94
- text.c_str(),
95
- text.length(),
96
- tokens.data(),
97
- tokens.capacity(),
98
- false,
99
- false
100
- );
101
-
102
- if (nTokens < 0) {
103
- throw throwNapiError(env, "Failed to tokenize text");
104
- }
105
-
106
- tokens.resize(nTokens);
107
-
108
- // Create batch
109
- llama_batch batch = llama_batch_init(nTokens, 0, 1);
110
-
111
- for (int i = 0; i < nTokens; i++) {
112
- llama_batch_add(batch, tokens[i], i, {0}, false);
113
- }
114
-
115
- // Run inference
116
- int result = llama_decode(modelData->ctx, batch);
117
- if (result != 0) {
118
- llama_batch_free(batch);
119
- throw throwNapiError(env, "Failed to run inference");
120
- }
121
-
122
- // Get embeddings
123
- float* embeddings = llama_get_embeddings(modelData->ctx);
124
- if (!embeddings) {
125
- llama_batch_free(batch);
126
- throw throwNapiError(env, "Failed to get embeddings");
127
- }
128
-
129
- // Create N-API array
130
- Napi::Float32Array embeddingArray = Napi::Float32Array::New(env, modelData->n_embd);
131
- for (int i = 0; i < modelData->n_embd; i++) {
132
- embeddingArray[i] = embeddings[i];
133
- }
134
-
135
- llama_batch_free(batch);
136
-
137
- return embeddingArray;
138
- }
139
-
140
- // Destroy model and free resources
141
- Napi::Value DestroyModel(const Napi::CallbackInfo& info) {
142
- Napi::Env env = info.Env();
143
-
144
- if (info.Length() < 1) {
145
- throw throwNapiError(env, "Expected 1 argument: modelPtr");
146
- }
147
-
148
- if (!info[0].IsExternal()) {
149
- throw throwNapiError(env, "modelPtr must be external pointer");
150
- }
151
-
152
- ModelData* modelData = info[0].As<Napi::External<ModelData>>().Data();
153
-
154
- if (modelData) {
155
- if (modelData->ctx) {
156
- llama_free(modelData->ctx);
157
- }
158
- if (modelData->model) {
159
- llama_free_model(modelData->model);
160
- }
161
- delete modelData;
162
- }
163
-
164
- return env.Null();
165
- }
166
-
167
- // Module initialization
168
- Napi::Object Init(Napi::Env env, Napi::Object exports) {
169
- exports.Set(Napi::String::New(env, "createModel"),
170
- Napi::Function::New(env, CreateModel));
171
- exports.Set(Napi::String::New(env, "getEmbedding"),
172
- Napi::Function::New(env, GetEmbedding));
173
- exports.Set(Napi::String::New(env, "destroyModel"),
174
- Napi::Function::New(env, DestroyModel));
175
-
176
- return exports;
177
- }
178
-
179
- NODE_API_MODULE(llama_embedding, Init)