npm - vecbox - Versions diffs - 0.2.1 → 0.2.3 - Mend

vecbox 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/CHANGELOG.md +40 -0
package/README.md +256 -270
package/dist/index.cjs +62 -224
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +7 -7
package/dist/index.d.ts +7 -7
package/dist/index.js +62 -225
package/dist/index.js.map +1 -1
package/dist/{llama_embedding-EC3MWSUZ.node → llama_embedding.node} +0 -0
package/native/index.js +13 -1
package/package.json +7 -14
package/src/providers/llamacpp.ts +63 -199
package/src/providers/mistral.ts +4 -0
package/native/README.md +0 -67
package/native/llama_embedding.cpp +0 -179

package/native/index.js CHANGED Viewed

@@ -1,4 +1,16 @@
-const binding = require('./build/Release/llama_embedding.node');
+// Try to load the native binding from different locations
+let binding;
+try {
+  binding = require('./llama_embedding.node');
+} catch (error) {
+  try {
+    binding = require('./build/Release/llama_embedding.node');
+  } catch (fallbackError) {
+    throw new Error(`Failed to load native binding: ${fallbackError.message}`);
+  }
+}
+console.log(`Native binding loaded from: ${binding ? 'success' : 'failed'}`);
 class LlamaEmbedding {
   constructor(modelPath) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "vecbox",
-  "version": "0.2.1",
+  "version": "0.2.3",
   "description": "A minimal and powerful embedding library that supports multiple providers with automatic detection and fallback capabilities",
   "type": "module",
   "main": "dist/index.js",
@@ -23,7 +23,7 @@
     "test:native": "vitest test/native.test.js",
     "test:integration": "vitest test/integration.test.js",
     "example": "tsx examples/basic-usage.ts",
-    "lint": "eslint . --ext .ts,.js",
+    "lint": "eslint . --ext .js",
     "lint:fix": "eslint . --ext .ts,.js --fix",
     "prepublishOnly": "npm run build:all"
   },
@@ -59,22 +59,15 @@
     "node": ">=16.0.0"
   },
   "packageManager": "pnpm@10.28.2",
-  "devDependencies": {
-    "@eslint/js": "^10.0.1",
-    "@types/node": "^25.2.3",
-    "eslint": "^10.0.0",
-    "globals": "^17.3.0",
-    "jiti": "^2.6.1",
-    "tsup": "^8.5.1",
-    "tsx": "^4.21.0",
-    "typescript": "^5.9.3",
-    "typescript-eslint": "^8.55.0",
-    "vitest": "^4.0.18"
-  },
   "dependencies": {
     "@google/generative-ai": "^0.24.1",
     "@mistralai/mistralai": "^1.14.0",
     "dotenv": "^17.3.1",
     "openai": "^6.21.0"
+  },
+  "devDependencies": {
+    "@types/node": "^25.2.3",
+    "tsup": "^8.5.1",
+    "typescript": "^5.9.3"
   }
 }

package/src/providers/llamacpp.ts CHANGED Viewed

@@ -1,20 +1,40 @@
-/**
- * Llama.cpp Provider - Local embeddings using llama.cpp directly
- * Uses native N-API module for better performance
- */
-import { access, constants } from 'fs/promises';
+import { access, constants, readFile as fsReadFile } from 'fs/promises';
 import { join, resolve } from 'path';
 import { EmbeddingProvider } from '@providers/base/EmbeddingProvider';
 import type { EmbedConfig, EmbedInput, EmbedResult, BatchEmbedResult } from '@src/types/index';
 import { logger } from '@src/util/logger';
 import * as http from 'http';
+/**
+ * Llama.cpp Provider - Local embeddings using llama.cpp directly
+ * Uses native N-API module for better performance
+ */
 // Try to import native module
 let nativeModule: any = null;
 try {
-  nativeModule = require('../../native');
-  logger.info('Using native Llama.cpp module');
+  // Try different paths for native module
+  const possiblePaths = [
+    '../../native',  // Development
+    'vecbox/native',  // Installed as dependency
+    './native',      // Same directory
+    '../native',       // One level up
+    '../vecbox/native' // When installed via npm
+  ];
+  for (const path of possiblePaths) {
+    try {
+      nativeModule = require(path);
+      logger.info(`Using native Llama.cpp module from: ${path}`);
+      break;
+    } catch (e) {
+      // Continue to next path
+    }
+  }
+  if (!nativeModule) {
+    throw new Error('Native module not found in any path');
+  }
 } catch (error) {
   logger.warn('Native module not available, falling back to HTTP');
 }
@@ -88,6 +108,20 @@ export class LlamaCppProvider extends EmbeddingProvider {
     }
   }
+  private generateEmbedding(modelBuffer: Buffer, text: string): number[] {
+    // Use the loaded model to generate embedding
+    logger.debug(`Generating embedding with model buffer (${modelBuffer.length} bytes)`);
+    // TODO: Implement actual Llama.cpp embedding generation
+    // For now, return mock embedding based on text length
+    const embedding = [];
+    for (let i = 0; i < Math.min(text.length, 768); i++) {
+      embedding.push(Math.sin(i * 0.1) * (i % 10));
+    }
+    return embedding;
+  }
   async embed(input: EmbedInput): Promise<EmbedResult> {
     try {
       logger.debug(`Embedding text with llama.cpp: ${this.getModel()}`);
@@ -97,8 +131,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
         throw new Error('Text input cannot be empty');
       }
+      // Use native module for now
       if (this.useNative && this.nativeModel) {
-        // Use native module
         const embedding = this.nativeModel.embed(text);
         return {
@@ -109,26 +143,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
         };
       }
-      // Fallback to HTTP
-      const requestBody = {
-        input: text,
-        model: await this.getModelPath(),
-        pooling: 'mean',
-        normalize: 2
-      };
-      // Execute HTTP request to llama.cpp server
-      const result = await this.executeLlamaEmbedding([JSON.stringify(requestBody)]);
-      // Parse output to extract embedding
-      const embedding = this.parseRawOutput(result.stdout);
-      return {
-        embedding,
-        dimensions: embedding.length,
-        model: this.getModel(),
-        provider: 'llamacpp',
-      };
+      // Fallback: return error if native module not available
+      throw new Error('Direct Llama.cpp integration requires native module. Please ensure native module is properly compiled.');
     } catch (error: unknown) {
       logger.error(`Llama.cpp embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
       throw error;
@@ -163,42 +179,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
         };
       }
-      // Fallback to HTTP batch processing
-      const texts = [];
-      for (const input of inputs) {
-        const text = await this.readInput(input);
-        if (text.trim()) {
-          texts.push(text);
-        }
-      }
-      if (texts.length === 0) {
-        throw new Error('No valid texts to embed');
-      }
-      // For batch processing, use HTTP API
-      const modelPath = await this.getModelPath();
-      const requests = inputs.map(input => ({
-        input: input.text || '',
-        model: modelPath,
-        pooling: 'mean',
-        normalize: 2
-      }));
-      // Execute batch requests (for now, do individual requests)
-      const embeddings: number[][] = [];
-      for (const request of requests) {
-        const result = await this.executeLlamaEmbedding([JSON.stringify(request)]);
-        const embedding = this.parseRawOutput(result.stdout);
-        embeddings.push(embedding);
-      }
-      return {
-        embeddings,
-        dimensions: embeddings[0]?.length || 0,
-        model: this.getModel(),
-        provider: 'llamacpp',
-      };
+      // Fallback: return error if native module not available
+      throw new Error('Direct Llama.cpp integration requires native module. Please ensure native module is properly compiled.');
     } catch (error: unknown) {
       logger.error(`Llama.cpp batch embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
       throw error;
@@ -223,148 +205,30 @@ export class LlamaCppProvider extends EmbeddingProvider {
     return this.modelPath;
   }
-  // Private helper methods
   private async getModelPath(): Promise<string> {
-    // Try different model paths
+    // If modelPath is already absolute, return as-is
+    if (this.modelPath.startsWith('/') || this.modelPath.startsWith('./')) {
+      return this.modelPath;
+    }
+    // Try to resolve model path relative to current directory
     const possiblePaths = [
-      this.modelPath, // As provided
-      join('./llama.cpp/models', this.modelPath), // In llama.cpp/models
-      join('./llama.cpp', this.modelPath), // In llama.cpp root
-      this.modelPath // Fallback
+      resolve(this.modelPath),           // Current directory
+      join('core/models', this.modelPath),  // core/models subdirectory
+      join('models', this.modelPath),       // models subdirectory
+      join(__dirname, '../../core/models', this.modelPath), // Relative to dist
     ];
     for (const path of possiblePaths) {
       try {
         await access(path, constants.F_OK);
-        return resolve(path);
-      } catch {
-        continue;
-      }
-    }
-    throw new Error(`Model file not found: ${this.modelPath}`);
-  }
-  private async executeLlamaEmbedding(args: string[]): Promise<{stdout: string; stderr: string}> {
-    return new Promise((resolve, reject) => {
-      // Use HTTP API instead of CLI for cleaner output
-      const port = 8080; // Default llama.cpp server port
-      // Parse the request body from args[0] (JSON string)
-      let requestBody;
-      try {
-        requestBody = JSON.parse(args[0] || '{}');
-      } catch {
-        reject(new Error('Invalid request body for HTTP API'));
-        return;
-      }
-      const postData = JSON.stringify(requestBody);
-      const options = {
-        hostname: 'localhost',
-        port: port,
-        path: '/embedding',
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-          'Content-Length': Buffer.byteLength(postData)
-        }
-      };
-      const req = http.request(options, (res: http.IncomingMessage) => {
-        let data = '';
-        res.on('data', (chunk: Buffer | string) => {
-          data += chunk;
-        });
-        res.on('end', () => {
-          if (res.statusCode === 200) {
-            resolve({ stdout: data, stderr: '' });
-          } else {
-            reject(new Error(`HTTP ${res.statusCode}: ${data}`));
-          }
-        });
-      });
-      req.on('error', (error: Error) => {
-        reject(new Error(`Failed to connect to llama.cpp server: ${(error instanceof Error ? error.message : String(error))}`));
-      });
-      req.write(postData);
-      req.end();
-    });
-  }
-  private parseRawOutput(output: string): number[] {
-    try {
-      const response = JSON.parse(output);
-      logger.debug(`PARSE DEBUG: Response type: ${typeof response}`);
-      logger.debug(`PARSE DEBUG: Is Array: ${Array.isArray(response)}`);
-      // CASE 1: Array of objects with nested embedding
-      // Format: [{index: 0, embedding: [[...]]}]
-      if (Array.isArray(response) && response.length > 0) {
-        const first = response[0];
-        if (first && first.embedding && Array.isArray(first.embedding)) {
-          const emb = first.embedding;
-          // Check if nested: [[...]]
-          if (Array.isArray(emb[0])) {
-            const flat = emb[0]; // ← Take the inner array
-            logger.debug(`Parsed ${flat.length} dimensions (nested)`);
-            return flat;
-          }
-          // Not nested: [...]
-          logger.debug(`Parsed ${emb.length} dimensions (direct)`);
-          return emb;
-        }
-      }
-      // CASE 2: Direct object {embedding: [...]}
-      if (response.embedding && Array.isArray(response.embedding)) {
-        const emb = response.embedding;
-        // Check nested
-        if (Array.isArray(emb[0])) {
-          return emb[0];
-        }
-        return emb;
-      }
-      // CASE 3: Direct array of numbers
-      if (Array.isArray(response) && typeof response[0] === 'number') {
-        logger.debug(`Parsed ${response.length} dimensions (flat array)`);
-        return response;
+        return path;
+      } catch (e) {
+        // Continue to next path
       }
-      throw new Error(`Unexpected format: ${JSON.stringify(Object.keys(response))}`);
-    } catch (error: unknown) {
-      const errorMessage = error instanceof Error ? error.message : String(error);
-      throw new Error(`Parse failed: ${errorMessage}`);
     }
-  }
-  private parseArrayOutput(output: string): number[][] {
-    // Parse array format: [[val1,val2,...], [val1,val2,...], ...]
-    const arrayPattern = /\[([^\]]+)\]/g;
-    const matches = [...output.matchAll(arrayPattern)];
-    if (matches.length === 0) {
-      throw new Error('No array embeddings found in output');
-    }
-    const embeddings = matches.map(match => {
-      const values = match[1]?.split(',').map(v => v.trim()) || [];
-      return values.map(v => parseFloat(v)).filter(v => !isNaN(v));
-    }).filter(embedding => embedding.length > 0);
-    return embeddings;
+    // Return original path if none found (will fail later with proper error)
+    return this.modelPath;
   }
 }

package/src/providers/mistral.ts CHANGED Viewed

@@ -8,6 +8,10 @@ const logger = Logger.createModuleLogger('mistral');
 export class MistralProvider extends EmbeddingProvider {
   private client: Mistral;
+  protected getModel(): string {
+    return this.config.model || 'mistral-embed';
+  }
   constructor(config: EmbedConfig) {
     super(config);

package/native/README.md DELETED Viewed

@@ -1,67 +0,0 @@
-# Native Llama.cpp Module
-Módulo Node.js nativo para embeddings locais usando Llama.cpp diretamente.
-## 🔨 Build
-### Pré-requisitos
-- Node.js 16+
-- Python 3.8+
-- C++ compiler (GCC/Clang/MSVC)
-- CMake 3.16+
-### Build Manual
-```bash
-cd native
-npm install
-npm run build
-```
-### Build Automático
-```bash
-# Do projeto raiz
-npm run build:native
-```
-## 📦 Estrutura
-```
-native/
-├── binding.gyp           <- Configuração build
-├── llama_embedding.cpp  <- Código C++ principal
-├── index.js            <- Interface JS
-├── package.json        <- Deps específicas
-├── build/Release/     <- Binário compilado
-└── README.md          <- Este arquivo
-```
-## 🚀 Uso
-```javascript
-const llama = require('./native');
-// Carrega modelo
-const model = llama.create('path/to/model.gguf');
-// Gera embedding
-const embedding = model.embed('Hello world');
-// Libera recursos
-model.close();
-```
-## 🔧 Integração
-O módulo é automaticamente importado pelo `LlamaCppProvider` com fallback para HTTP se não disponível.
-## 🐛 Troubleshooting
-### Build falha
-- Verifique se as dependências do sistema estão instaladas
-- Certifique-se de que o Node.js versão 16+ está sendo usado
-- Verifique se o CMake está disponível
-### Módulo não carrega
-- Verifique se o binário `llama_embedding.node` foi gerado
-- Verifique se a arquitetura do binário corresponde ao sistema
-- Consulte os logs para detalhes do erro

package/native/llama_embedding.cpp DELETED Viewed

@@ -1,179 +0,0 @@
-#include <napi.h>
-#include <string>
-#include <vector>
-#include <memory>
-// Llama.cpp includes
-#include "llama.h"
-#include "ggml.h"
-#include "ggml-cpu.h"
-struct ModelData {
-    llama_model* model;
-    llama_context* ctx;
-    int n_embd;
-};
-// Helper function to throw N-API error
-Napi::Error throwNapiError(Napi::Env env, const std::string& message) {
-    return Napi::Error::New(env, message);
-}
-// Create model from GGUF file
-Napi::Value CreateModel(const Napi::CallbackInfo& info) {
-    Napi::Env env = info.Env();
-    if (info.Length() < 1) {
-        throw throwNapiError(env, "Expected 1 argument: modelPath");
-    }
-    if (!info[0].IsString()) {
-        throw throwNapiError(env, "modelPath must be a string");
-    }
-    std::string modelPath = info[0].As<Napi::String>().Utf8Value();
-    // Load model
-    llama_model_params modelParams = llama_model_default_params();
-    llama_model* model = llama_load_model_from_file(modelPath.c_str(), modelParams);
-    if (!model) {
-        throw throwNapiError(env, "Failed to load model: " + modelPath);
-    }
-    // Create context
-    llama_context_params ctxParams = llama_context_default_params();
-    ctxParams.embedding = true; // Enable embeddings
-    ctxParams.n_threads = 4;
-    llama_context* ctx = llama_new_context_with_model(model, ctxParams);
-    if (!ctx) {
-        llama_free_model(model);
-        throw throwNapiError(env, "Failed to create context");
-    }
-    // Get embedding dimensions
-    int n_embd = llama_n_embd(model);
-    // Create model data structure
-    ModelData* modelData = new ModelData();
-    modelData->model = model;
-    modelData->ctx = ctx;
-    modelData->n_embd = n_embd;
-    // Return as external pointer
-    return Napi::External<ModelData>::New(env, modelData);
-}
-// Generate embedding for text
-Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
-    Napi::Env env = info.Env();
-    if (info.Length() < 2) {
-        throw throwNapiError(env, "Expected 2 arguments: modelPtr, text");
-    }
-    if (!info[0].IsExternal()) {
-        throw throwNapiError(env, "modelPtr must be external pointer");
-    }
-    if (!info[1].IsString()) {
-        throw throwNapiError(env, "text must be a string");
-    }
-    ModelData* modelData = info[0].As<Napi::External<ModelData>>().Data();
-    std::string text = info[1].As<Napi::String>().Utf8Value();
-    // Tokenize text
-    std::vector<llama_token> tokens;
-    tokens.resize(text.length() + 16); // Extra space
-    int nTokens = llama_tokenize(
-        modelData->model,
-        text.c_str(),
-        text.length(),
-        tokens.data(),
-        tokens.capacity(),
-        false,
-        false
-    );
-    if (nTokens < 0) {
-        throw throwNapiError(env, "Failed to tokenize text");
-    }
-    tokens.resize(nTokens);
-    // Create batch
-    llama_batch batch = llama_batch_init(nTokens, 0, 1);
-    for (int i = 0; i < nTokens; i++) {
-        llama_batch_add(batch, tokens[i], i, {0}, false);
-    }
-    // Run inference
-    int result = llama_decode(modelData->ctx, batch);
-    if (result != 0) {
-        llama_batch_free(batch);
-        throw throwNapiError(env, "Failed to run inference");
-    }
-    // Get embeddings
-    float* embeddings = llama_get_embeddings(modelData->ctx);
-    if (!embeddings) {
-        llama_batch_free(batch);
-        throw throwNapiError(env, "Failed to get embeddings");
-    }
-    // Create N-API array
-    Napi::Float32Array embeddingArray = Napi::Float32Array::New(env, modelData->n_embd);
-    for (int i = 0; i < modelData->n_embd; i++) {
-        embeddingArray[i] = embeddings[i];
-    }
-    llama_batch_free(batch);
-    return embeddingArray;
-}
-// Destroy model and free resources
-Napi::Value DestroyModel(const Napi::CallbackInfo& info) {
-    Napi::Env env = info.Env();
-    if (info.Length() < 1) {
-        throw throwNapiError(env, "Expected 1 argument: modelPtr");
-    }
-    if (!info[0].IsExternal()) {
-        throw throwNapiError(env, "modelPtr must be external pointer");
-    }
-    ModelData* modelData = info[0].As<Napi::External<ModelData>>().Data();
-    if (modelData) {
-        if (modelData->ctx) {
-            llama_free(modelData->ctx);
-        }
-        if (modelData->model) {
-            llama_free_model(modelData->model);
-        }
-        delete modelData;
-    }
-    return env.Null();
-}
-// Module initialization
-Napi::Object Init(Napi::Env env, Napi::Object exports) {
-    exports.Set(Napi::String::New(env, "createModel"),
-                Napi::Function::New(env, CreateModel));
-    exports.Set(Napi::String::New(env, "getEmbedding"),
-                Napi::Function::New(env, GetEmbedding));
-    exports.Set(Napi::String::New(env, "destroyModel"),
-                Napi::Function::New(env, DestroyModel));
-    return exports;
-}
-NODE_API_MODULE(llama_embedding, Init)