npm - vecbox - Versions diffs - 0.2.1 → 0.2.2 - Mend

vecbox 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +40 -0
package/README.md +123 -241
package/dist/index.cjs +31 -134
package/dist/index.cjs.map +1 -1
package/dist/index.js +31 -134
package/dist/index.js.map +1 -1
package/package.json +5 -14
package/src/providers/llamacpp.ts +42 -172
package/native/README.md +0 -67
package/native/llama_embedding.cpp +0 -179

package/src/providers/llamacpp.ts CHANGED Viewed

@@ -1,8 +1,3 @@
-/**
- * Llama.cpp Provider - Local embeddings using llama.cpp directly
- * Uses native N-API module for better performance
- */
 import { access, constants } from 'fs/promises';
 import { join, resolve } from 'path';
 import { EmbeddingProvider } from '@providers/base/EmbeddingProvider';
@@ -10,6 +5,11 @@ import type { EmbedConfig, EmbedInput, EmbedResult, BatchEmbedResult } from '@sr
 import { logger } from '@src/util/logger';
 import * as http from 'http';
+/**
+ * Llama.cpp Provider - Local embeddings using llama.cpp directly
+ * Uses native N-API module for better performance
+ */
 // Try to import native module
 let nativeModule: any = null;
 try {
@@ -88,6 +88,39 @@ export class LlamaCppProvider extends EmbeddingProvider {
     }
   }
+  private async loadGGUFModel(modelPath: string): Promise<Buffer> {
+    try {
+      logger.debug(`Loading GGUF model from: ${modelPath}`);
+      // Read model file
+      const modelBuffer = await fs.readFile(modelPath);
+      if (!modelBuffer) {
+        throw new Error(`Failed to read model file: ${modelPath}`);
+      }
+      logger.debug(`Model file loaded, size: ${modelBuffer.length} bytes`);
+      return modelBuffer;
+    } catch (error) {
+      logger.error(`Failed to load GGUF model: ${error instanceof Error ? error.message : String(error)}`);
+      throw error;
+    }
+  }
+  private generateEmbedding(modelBuffer: Buffer, text: string): number[] {
+    // Use the loaded model to generate embedding
+    logger.debug(`Generating embedding with model buffer (${modelBuffer.length} bytes)`);
+    // TODO: Implement actual Llama.cpp embedding generation
+    // For now, return mock embedding based on text length
+    const embedding = [];
+    for (let i = 0; i < Math.min(text.length, 768); i++) {
+      embedding.push(Math.sin(i * 0.1) * (i % 10));
+    }
+    return embedding;
+  }
   async embed(input: EmbedInput): Promise<EmbedResult> {
     try {
       logger.debug(`Embedding text with llama.cpp: ${this.getModel()}`);
@@ -97,8 +130,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
         throw new Error('Text input cannot be empty');
       }
+      // Use native module for now
       if (this.useNative && this.nativeModel) {
-        // Use native module
         const embedding = this.nativeModel.embed(text);
         return {
@@ -109,26 +142,8 @@ export class LlamaCppProvider extends EmbeddingProvider {
         };
       }
-      // Fallback to HTTP
-      const requestBody = {
-        input: text,
-        model: await this.getModelPath(),
-        pooling: 'mean',
-        normalize: 2
-      };
-      // Execute HTTP request to llama.cpp server
-      const result = await this.executeLlamaEmbedding([JSON.stringify(requestBody)]);
-      // Parse output to extract embedding
-      const embedding = this.parseRawOutput(result.stdout);
-      return {
-        embedding,
-        dimensions: embedding.length,
-        model: this.getModel(),
-        provider: 'llamacpp',
-      };
+      // TODO: Implement direct Llama.cpp core usage in future
+      throw new Error('Direct Llama.cpp core integration not yet implemented. Please use HTTP fallback or wait for next version.');
     } catch (error: unknown) {
       logger.error(`Llama.cpp embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
       throw error;
@@ -178,7 +193,7 @@ export class LlamaCppProvider extends EmbeddingProvider {
       // For batch processing, use HTTP API
       const modelPath = await this.getModelPath();
-      const requests = inputs.map(input => ({
+      const requests = inputs.map((input, v) => ({
         input: input.text || '',
         model: modelPath,
         pooling: 'mean',
@@ -222,149 +237,4 @@ export class LlamaCppProvider extends EmbeddingProvider {
   protected getModel(): string {
     return this.modelPath;
   }
-  // Private helper methods
-  private async getModelPath(): Promise<string> {
-    // Try different model paths
-    const possiblePaths = [
-      this.modelPath, // As provided
-      join('./llama.cpp/models', this.modelPath), // In llama.cpp/models
-      join('./llama.cpp', this.modelPath), // In llama.cpp root
-      this.modelPath // Fallback
-    ];
-    for (const path of possiblePaths) {
-      try {
-        await access(path, constants.F_OK);
-        return resolve(path);
-      } catch {
-        continue;
-      }
-    }
-    throw new Error(`Model file not found: ${this.modelPath}`);
-  }
-  private async executeLlamaEmbedding(args: string[]): Promise<{stdout: string; stderr: string}> {
-    return new Promise((resolve, reject) => {
-      // Use HTTP API instead of CLI for cleaner output
-      const port = 8080; // Default llama.cpp server port
-      // Parse the request body from args[0] (JSON string)
-      let requestBody;
-      try {
-        requestBody = JSON.parse(args[0] || '{}');
-      } catch {
-        reject(new Error('Invalid request body for HTTP API'));
-        return;
-      }
-      const postData = JSON.stringify(requestBody);
-      const options = {
-        hostname: 'localhost',
-        port: port,
-        path: '/embedding',
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-          'Content-Length': Buffer.byteLength(postData)
-        }
-      };
-      const req = http.request(options, (res: http.IncomingMessage) => {
-        let data = '';
-        res.on('data', (chunk: Buffer | string) => {
-          data += chunk;
-        });
-        res.on('end', () => {
-          if (res.statusCode === 200) {
-            resolve({ stdout: data, stderr: '' });
-          } else {
-            reject(new Error(`HTTP ${res.statusCode}: ${data}`));
-          }
-        });
-      });
-      req.on('error', (error: Error) => {
-        reject(new Error(`Failed to connect to llama.cpp server: ${(error instanceof Error ? error.message : String(error))}`));
-      });
-      req.write(postData);
-      req.end();
-    });
-  }
-  private parseRawOutput(output: string): number[] {
-    try {
-      const response = JSON.parse(output);
-      logger.debug(`PARSE DEBUG: Response type: ${typeof response}`);
-      logger.debug(`PARSE DEBUG: Is Array: ${Array.isArray(response)}`);
-      // CASE 1: Array of objects with nested embedding
-      // Format: [{index: 0, embedding: [[...]]}]
-      if (Array.isArray(response) && response.length > 0) {
-        const first = response[0];
-        if (first && first.embedding && Array.isArray(first.embedding)) {
-          const emb = first.embedding;
-          // Check if nested: [[...]]
-          if (Array.isArray(emb[0])) {
-            const flat = emb[0]; // ← Take the inner array
-            logger.debug(`Parsed ${flat.length} dimensions (nested)`);
-            return flat;
-          }
-          // Not nested: [...]
-          logger.debug(`Parsed ${emb.length} dimensions (direct)`);
-          return emb;
-        }
-      }
-      // CASE 2: Direct object {embedding: [...]}
-      if (response.embedding && Array.isArray(response.embedding)) {
-        const emb = response.embedding;
-        // Check nested
-        if (Array.isArray(emb[0])) {
-          return emb[0];
-        }
-        return emb;
-      }
-      // CASE 3: Direct array of numbers
-      if (Array.isArray(response) && typeof response[0] === 'number') {
-        logger.debug(`Parsed ${response.length} dimensions (flat array)`);
-        return response;
-      }
-      throw new Error(`Unexpected format: ${JSON.stringify(Object.keys(response))}`);
-    } catch (error: unknown) {
-      const errorMessage = error instanceof Error ? error.message : String(error);
-      throw new Error(`Parse failed: ${errorMessage}`);
-    }
-  }
-  private parseArrayOutput(output: string): number[][] {
-    // Parse array format: [[val1,val2,...], [val1,val2,...], ...]
-    const arrayPattern = /\[([^\]]+)\]/g;
-    const matches = [...output.matchAll(arrayPattern)];
-    if (matches.length === 0) {
-      throw new Error('No array embeddings found in output');
-    }
-    const embeddings = matches.map(match => {
-      const values = match[1]?.split(',').map(v => v.trim()) || [];
-      return values.map(v => parseFloat(v)).filter(v => !isNaN(v));
-    }).filter(embedding => embedding.length > 0);
-    return embeddings;
-  }
 }

package/native/README.md DELETED Viewed

@@ -1,67 +0,0 @@
-# Native Llama.cpp Module
-Módulo Node.js nativo para embeddings locais usando Llama.cpp diretamente.
-## 🔨 Build
-### Pré-requisitos
-- Node.js 16+
-- Python 3.8+
-- C++ compiler (GCC/Clang/MSVC)
-- CMake 3.16+
-### Build Manual
-```bash
-cd native
-npm install
-npm run build
-```
-### Build Automático
-```bash
-# Do projeto raiz
-npm run build:native
-```
-## 📦 Estrutura
-```
-native/
-├── binding.gyp           <- Configuração build
-├── llama_embedding.cpp  <- Código C++ principal
-├── index.js            <- Interface JS
-├── package.json        <- Deps específicas
-├── build/Release/     <- Binário compilado
-└── README.md          <- Este arquivo
-```
-## 🚀 Uso
-```javascript
-const llama = require('./native');
-// Carrega modelo
-const model = llama.create('path/to/model.gguf');
-// Gera embedding
-const embedding = model.embed('Hello world');
-// Libera recursos
-model.close();
-```
-## 🔧 Integração
-O módulo é automaticamente importado pelo `LlamaCppProvider` com fallback para HTTP se não disponível.
-## 🐛 Troubleshooting
-### Build falha
-- Verifique se as dependências do sistema estão instaladas
-- Certifique-se de que o Node.js versão 16+ está sendo usado
-- Verifique se o CMake está disponível
-### Módulo não carrega
-- Verifique se o binário `llama_embedding.node` foi gerado
-- Verifique se a arquitetura do binário corresponde ao sistema
-- Consulte os logs para detalhes do erro

package/native/llama_embedding.cpp DELETED Viewed

@@ -1,179 +0,0 @@
-#include <napi.h>
-#include <string>
-#include <vector>
-#include <memory>
-// Llama.cpp includes
-#include "llama.h"
-#include "ggml.h"
-#include "ggml-cpu.h"
-struct ModelData {
-    llama_model* model;
-    llama_context* ctx;
-    int n_embd;
-};
-// Helper function to throw N-API error
-Napi::Error throwNapiError(Napi::Env env, const std::string& message) {
-    return Napi::Error::New(env, message);
-}
-// Create model from GGUF file
-Napi::Value CreateModel(const Napi::CallbackInfo& info) {
-    Napi::Env env = info.Env();
-    if (info.Length() < 1) {
-        throw throwNapiError(env, "Expected 1 argument: modelPath");
-    }
-    if (!info[0].IsString()) {
-        throw throwNapiError(env, "modelPath must be a string");
-    }
-    std::string modelPath = info[0].As<Napi::String>().Utf8Value();
-    // Load model
-    llama_model_params modelParams = llama_model_default_params();
-    llama_model* model = llama_load_model_from_file(modelPath.c_str(), modelParams);
-    if (!model) {
-        throw throwNapiError(env, "Failed to load model: " + modelPath);
-    }
-    // Create context
-    llama_context_params ctxParams = llama_context_default_params();
-    ctxParams.embedding = true; // Enable embeddings
-    ctxParams.n_threads = 4;
-    llama_context* ctx = llama_new_context_with_model(model, ctxParams);
-    if (!ctx) {
-        llama_free_model(model);
-        throw throwNapiError(env, "Failed to create context");
-    }
-    // Get embedding dimensions
-    int n_embd = llama_n_embd(model);
-    // Create model data structure
-    ModelData* modelData = new ModelData();
-    modelData->model = model;
-    modelData->ctx = ctx;
-    modelData->n_embd = n_embd;
-    // Return as external pointer
-    return Napi::External<ModelData>::New(env, modelData);
-}
-// Generate embedding for text
-Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
-    Napi::Env env = info.Env();
-    if (info.Length() < 2) {
-        throw throwNapiError(env, "Expected 2 arguments: modelPtr, text");
-    }
-    if (!info[0].IsExternal()) {
-        throw throwNapiError(env, "modelPtr must be external pointer");
-    }
-    if (!info[1].IsString()) {
-        throw throwNapiError(env, "text must be a string");
-    }
-    ModelData* modelData = info[0].As<Napi::External<ModelData>>().Data();
-    std::string text = info[1].As<Napi::String>().Utf8Value();
-    // Tokenize text
-    std::vector<llama_token> tokens;
-    tokens.resize(text.length() + 16); // Extra space
-    int nTokens = llama_tokenize(
-        modelData->model,
-        text.c_str(),
-        text.length(),
-        tokens.data(),
-        tokens.capacity(),
-        false,
-        false
-    );
-    if (nTokens < 0) {
-        throw throwNapiError(env, "Failed to tokenize text");
-    }
-    tokens.resize(nTokens);
-    // Create batch
-    llama_batch batch = llama_batch_init(nTokens, 0, 1);
-    for (int i = 0; i < nTokens; i++) {
-        llama_batch_add(batch, tokens[i], i, {0}, false);
-    }
-    // Run inference
-    int result = llama_decode(modelData->ctx, batch);
-    if (result != 0) {
-        llama_batch_free(batch);
-        throw throwNapiError(env, "Failed to run inference");
-    }
-    // Get embeddings
-    float* embeddings = llama_get_embeddings(modelData->ctx);
-    if (!embeddings) {
-        llama_batch_free(batch);
-        throw throwNapiError(env, "Failed to get embeddings");
-    }
-    // Create N-API array
-    Napi::Float32Array embeddingArray = Napi::Float32Array::New(env, modelData->n_embd);
-    for (int i = 0; i < modelData->n_embd; i++) {
-        embeddingArray[i] = embeddings[i];
-    }
-    llama_batch_free(batch);
-    return embeddingArray;
-}
-// Destroy model and free resources
-Napi::Value DestroyModel(const Napi::CallbackInfo& info) {
-    Napi::Env env = info.Env();
-    if (info.Length() < 1) {
-        throw throwNapiError(env, "Expected 1 argument: modelPtr");
-    }
-    if (!info[0].IsExternal()) {
-        throw throwNapiError(env, "modelPtr must be external pointer");
-    }
-    ModelData* modelData = info[0].As<Napi::External<ModelData>>().Data();
-    if (modelData) {
-        if (modelData->ctx) {
-            llama_free(modelData->ctx);
-        }
-        if (modelData->model) {
-            llama_free_model(modelData->model);
-        }
-        delete modelData;
-    }
-    return env.Null();
-}
-// Module initialization
-Napi::Object Init(Napi::Env env, Napi::Object exports) {
-    exports.Set(Napi::String::New(env, "createModel"),
-                Napi::Function::New(env, CreateModel));
-    exports.Set(Napi::String::New(env, "getEmbedding"),
-                Napi::Function::New(env, GetEmbedding));
-    exports.Set(Napi::String::New(env, "destroyModel"),
-                Napi::Function::New(env, DestroyModel));
-    return exports;
-}
-NODE_API_MODULE(llama_embedding, Init)