@elizaos/plugin-local-ai 1.0.0-beta.26 → 1.0.0-beta.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,7 +5,6 @@ import path5 from "node:path";
5
5
  import { Readable as Readable2 } from "node:stream";
6
6
  import { fileURLToPath } from "node:url";
7
7
  import { ModelType as ModelType2, logger as logger9 } from "@elizaos/core";
8
- import pkg from "fastembed";
9
8
  import {
10
9
  LlamaChatSession,
11
10
  getLlama as getLlama2
@@ -21,7 +20,7 @@ var configSchema = z.object({
21
20
  STUDIOLM_SERVER_URL: z.string().default("http://localhost:1234"),
22
21
  STUDIOLM_SMALL_MODEL: z.string().default("lmstudio-community/deepseek-r1-distill-qwen-1.5b"),
23
22
  STUDIOLM_MEDIUM_MODEL: z.string().default("deepseek-r1-distill-qwen-7b"),
24
- STUDIOLM_EMBEDDING_MODEL: z.union([z.boolean(), z.string()]).default(false)
23
+ STUDIOLM_EMBEDDING_MODEL: z.string().default("BAAI/bge-small-en-v1.5")
25
24
  });
26
25
  function validateModelConfig(config) {
27
26
  logger.info("Validating model configuration with values:", {
@@ -47,7 +46,7 @@ async function validateConfig(config) {
47
46
  STUDIOLM_SERVER_URL: config.STUDIOLM_SERVER_URL || "http://localhost:1234",
48
47
  STUDIOLM_SMALL_MODEL: config.STUDIOLM_SMALL_MODEL || "lmstudio-community/deepseek-r1-distill-qwen-1.5b",
49
48
  STUDIOLM_MEDIUM_MODEL: config.STUDIOLM_MEDIUM_MODEL || "deepseek-r1-distill-qwen-7b",
50
- STUDIOLM_EMBEDDING_MODEL: config.STUDIOLM_EMBEDDING_MODEL || false
49
+ STUDIOLM_EMBEDDING_MODEL: config.STUDIOLM_EMBEDDING_MODEL || "BAAI/bge-small-en-v1.5"
51
50
  };
52
51
  const validatedConfig = configSchema.parse(fullConfig);
53
52
  return validatedConfig;
@@ -90,6 +89,18 @@ var MODEL_SPECS = {
90
89
  type: "llama"
91
90
  }
92
91
  },
92
+ embedding: {
93
+ name: "bge-small-en-v1.5.Q4_K_M.gguf",
94
+ repo: "ChristianAzinn/bge-small-en-v1.5-gguf",
95
+ size: "133 MB",
96
+ quantization: "Q4_K_M",
97
+ contextSize: 512,
98
+ dimensions: 384,
99
+ tokenizer: {
100
+ name: "ChristianAzinn/bge-small-en-v1.5-gguf",
101
+ type: "llama"
102
+ }
103
+ },
93
104
  vision: {
94
105
  name: "Florence-2-base-ft",
95
106
  repo: "onnx-community/Florence-2-base-ft",
@@ -2341,7 +2352,6 @@ var VisionManager = class _VisionManager {
2341
2352
  };
2342
2353
 
2343
2354
  // src/index.ts
2344
- var { EmbeddingModel, FlagEmbedding } = pkg;
2345
2355
  var __filename = fileURLToPath(import.meta.url);
2346
2356
  var __dirname = path5.dirname(__filename);
2347
2357
  var wordsToPunish = [
@@ -2399,17 +2409,20 @@ var LocalAIManager = class _LocalAIManager {
2399
2409
  llama;
2400
2410
  smallModel;
2401
2411
  mediumModel;
2412
+ embeddingModel;
2413
+ embeddingContext;
2402
2414
  ctx;
2403
2415
  sequence;
2404
2416
  chatSession;
2405
2417
  modelPath;
2406
2418
  mediumModelPath;
2419
+ embeddingModelPath;
2407
2420
  cacheDir;
2408
- embeddingModel = null;
2409
2421
  tokenizerManager;
2410
2422
  downloadManager;
2411
2423
  visionManager;
2412
2424
  activeModelConfig;
2425
+ embeddingModelConfig;
2413
2426
  transcribeManager;
2414
2427
  ttsManager;
2415
2428
  studioLMManager;
@@ -2447,6 +2460,7 @@ var LocalAIManager = class _LocalAIManager {
2447
2460
  }
2448
2461
  this.modelPath = path5.join(this.modelsDir, "DeepHermes-3-Llama-3-3B-Preview-q4.gguf");
2449
2462
  this.mediumModelPath = path5.join(this.modelsDir, "DeepHermes-3-Llama-3-8B-q4.gguf");
2463
+ this.embeddingModelPath = path5.join(this.modelsDir, "bge-small-en-v1.5.Q4_K_M.gguf");
2450
2464
  const cacheDirEnv = process.env.CACHE_DIR?.trim();
2451
2465
  if (cacheDirEnv) {
2452
2466
  this.cacheDir = path5.resolve(cacheDirEnv);
@@ -2467,6 +2481,7 @@ var LocalAIManager = class _LocalAIManager {
2467
2481
  this.studioLMManager = StudioLMManager.getInstance();
2468
2482
  }
2469
2483
  this.activeModelConfig = MODEL_SPECS.small;
2484
+ this.embeddingModelConfig = MODEL_SPECS.embedding;
2470
2485
  }
2471
2486
  /**
2472
2487
  * Retrieves the singleton instance of LocalAIManager. If an instance does not already exist, a new one is created and returned.
@@ -2531,18 +2546,31 @@ var LocalAIManager = class _LocalAIManager {
2531
2546
  }
2532
2547
  /**
2533
2548
  * Downloads the model based on the modelPath provided.
2534
- * Determines whether to download a large or small model based on the current modelPath.
2549
+ * Determines the model spec and path based on the model type.
2535
2550
  *
2551
+ * @param {ModelTypeName} modelType - The type of model to download
2552
+ * @param {ModelSpec} [customModelSpec] - Optional custom model spec to use instead of the default
2536
2553
  * @returns A Promise that resolves to a boolean indicating whether the model download was successful.
2537
2554
  */
2538
- async downloadModel(modelType) {
2539
- const modelSpec = modelType === ModelType2.TEXT_LARGE ? MODEL_SPECS.medium : MODEL_SPECS.small;
2540
- const modelPath = modelType === ModelType2.TEXT_LARGE ? this.mediumModelPath : this.modelPath;
2555
+ async downloadModel(modelType, customModelSpec) {
2556
+ let modelSpec;
2557
+ let modelPath;
2558
+ if (customModelSpec) {
2559
+ modelSpec = customModelSpec;
2560
+ modelPath = modelType === ModelType2.TEXT_EMBEDDING ? this.embeddingModelPath : modelType === ModelType2.TEXT_LARGE ? this.mediumModelPath : this.modelPath;
2561
+ } else if (modelType === ModelType2.TEXT_EMBEDDING) {
2562
+ modelSpec = MODEL_SPECS.embedding;
2563
+ modelPath = this.embeddingModelPath;
2564
+ } else {
2565
+ modelSpec = modelType === ModelType2.TEXT_LARGE ? MODEL_SPECS.medium : MODEL_SPECS.small;
2566
+ modelPath = modelType === ModelType2.TEXT_LARGE ? this.mediumModelPath : this.modelPath;
2567
+ }
2541
2568
  try {
2542
2569
  return await this.downloadManager.downloadModel(modelSpec, modelPath);
2543
2570
  } catch (error) {
2544
2571
  logger9.error("Model download failed:", {
2545
2572
  error: error instanceof Error ? error.message : String(error),
2573
+ modelType,
2546
2574
  modelPath
2547
2575
  });
2548
2576
  throw error;
@@ -2594,31 +2622,107 @@ var LocalAIManager = class _LocalAIManager {
2594
2622
  logger9.warn("Models directory does not exist, creating it:", this.modelsDir);
2595
2623
  fs5.mkdirSync(this.modelsDir, { recursive: true });
2596
2624
  }
2625
+ await this.downloadModel(ModelType2.TEXT_EMBEDDING);
2626
+ if (!this.llama) {
2627
+ this.llama = await getLlama2();
2628
+ }
2597
2629
  if (!this.embeddingModel) {
2598
- logger9.info("Creating new FlagEmbedding instance with BGESmallENV15 model");
2599
- const barLength = 30;
2600
- const emptyBar = "\u25B1".repeat(barLength);
2601
- logger9.info(`Downloading embedding model: ${emptyBar} 0%`);
2602
- this.embeddingModel = await FlagEmbedding.init({
2603
- cacheDir: this.modelsDir,
2604
- model: EmbeddingModel.BGESmallENV15,
2605
- maxLength: 512,
2606
- showDownloadProgress: true
2630
+ logger9.info("Loading embedding model:", this.embeddingModelPath);
2631
+ this.embeddingModel = await this.llama.loadModel({
2632
+ modelPath: this.embeddingModelPath,
2633
+ gpuLayers: 0,
2634
+ // Embedding models are typically small enough to run on CPU
2635
+ vocabOnly: false
2636
+ });
2637
+ this.embeddingContext = await this.embeddingModel.createEmbeddingContext({
2638
+ contextSize: this.embeddingModelConfig.contextSize,
2639
+ batchSize: 512
2607
2640
  });
2608
- const completedBar = "\u25B0".repeat(barLength);
2609
- logger9.info(`Downloading embedding model: ${completedBar} 100%`);
2610
- logger9.success("FlagEmbedding instance created successfully");
2641
+ logger9.success("Embedding model initialized successfully");
2611
2642
  }
2612
2643
  } catch (error) {
2613
2644
  logger9.error("Embedding initialization failed with details:", {
2614
2645
  error: error instanceof Error ? error.message : String(error),
2615
2646
  stack: error instanceof Error ? error.stack : void 0,
2616
2647
  modelsDir: this.modelsDir,
2617
- model: EmbeddingModel.BGESmallENV15
2648
+ embeddingModelPath: this.embeddingModelPath
2618
2649
  });
2619
2650
  throw error;
2620
2651
  }
2621
2652
  }
2653
+ /**
2654
+ * Generate embeddings using the proper LlamaContext.getEmbedding method.
2655
+ */
2656
+ async generateEmbedding(text) {
2657
+ try {
2658
+ await this.lazyInitEmbedding();
2659
+ if (!this.embeddingModel || !this.embeddingContext) {
2660
+ throw new Error("Failed to initialize embedding model");
2661
+ }
2662
+ logger9.info("Generating embedding for text", { textLength: text.length });
2663
+ const embeddingResult = await this.embeddingContext.getEmbeddingFor(text);
2664
+ const mutableEmbedding = [...embeddingResult.vector];
2665
+ const normalizedEmbedding = this.normalizeEmbedding(mutableEmbedding);
2666
+ logger9.info("Embedding generation complete", { dimensions: normalizedEmbedding.length });
2667
+ return normalizedEmbedding;
2668
+ } catch (error) {
2669
+ logger9.error("Embedding generation failed:", {
2670
+ error: error instanceof Error ? error.message : String(error),
2671
+ stack: error instanceof Error ? error.stack : void 0,
2672
+ textLength: text?.length ?? "text is null"
2673
+ });
2674
+ const zeroDimensions = process.env.LOCAL_EMBEDDING_DIMENSIONS ? parseInt(process.env.LOCAL_EMBEDDING_DIMENSIONS, 10) : this.embeddingModelConfig.dimensions;
2675
+ return new Array(zeroDimensions).fill(0);
2676
+ }
2677
+ }
2678
+ /**
2679
+ * Normalizes an embedding vector using L2 normalization
2680
+ *
2681
+ * @param {number[]} embedding - The embedding vector to normalize
2682
+ * @returns {number[]} - The normalized embedding vector
2683
+ */
2684
+ normalizeEmbedding(embedding) {
2685
+ const squareSum = embedding.reduce((sum, val) => sum + val * val, 0);
2686
+ const norm = Math.sqrt(squareSum);
2687
+ if (norm === 0) {
2688
+ return embedding;
2689
+ }
2690
+ return embedding.map((val) => val / norm);
2691
+ }
2692
+ /**
2693
+ * Lazy initialize the embedding model
2694
+ */
2695
+ async lazyInitEmbedding() {
2696
+ if (this.embeddingInitialized) return;
2697
+ if (!this.embeddingInitializingPromise) {
2698
+ this.embeddingInitializingPromise = (async () => {
2699
+ try {
2700
+ await this.initializeEnvironment();
2701
+ await this.downloadModel(ModelType2.TEXT_EMBEDDING);
2702
+ if (!this.llama) {
2703
+ this.llama = await getLlama2();
2704
+ }
2705
+ this.embeddingModel = await this.llama.loadModel({
2706
+ modelPath: this.embeddingModelPath,
2707
+ gpuLayers: 0,
2708
+ // Embedding models are typically small enough to run on CPU
2709
+ vocabOnly: false
2710
+ });
2711
+ this.embeddingContext = await this.embeddingModel.createEmbeddingContext({
2712
+ contextSize: this.embeddingModelConfig.contextSize,
2713
+ batchSize: 512
2714
+ });
2715
+ this.embeddingInitialized = true;
2716
+ logger9.info("Embedding model initialized successfully");
2717
+ } catch (error) {
2718
+ logger9.error("Failed to initialize embedding model:", error);
2719
+ this.embeddingInitializingPromise = null;
2720
+ throw error;
2721
+ }
2722
+ })();
2723
+ }
2724
+ await this.embeddingInitializingPromise;
2725
+ }
2622
2726
  /**
2623
2727
  * Asynchronously generates text using StudioLM models based on the specified parameters.
2624
2728
  *
@@ -2740,30 +2844,6 @@ var LocalAIManager = class _LocalAIManager {
2740
2844
  throw error;
2741
2845
  }
2742
2846
  }
2743
- /**
2744
- * Generate embeddings - now with lazy initialization
2745
- */
2746
- async generateEmbedding(text) {
2747
- try {
2748
- await this.lazyInitEmbedding();
2749
- if (!this.embeddingModel) {
2750
- throw new Error("Failed to initialize embedding model");
2751
- }
2752
- logger9.info("Generating query embedding...");
2753
- const embedding = await this.embeddingModel.queryEmbed(text);
2754
- const dimensions = embedding.length;
2755
- logger9.info("Embedding generation complete", { dimensions });
2756
- return Array.from(embedding);
2757
- } catch (error) {
2758
- logger9.error("Embedding generation failed:", {
2759
- error: error instanceof Error ? error.message : String(error),
2760
- stack: error instanceof Error ? error.stack : void 0,
2761
- // Only access text.length if text exists
2762
- textLength: text?.length ?? "text is null"
2763
- });
2764
- throw error;
2765
- }
2766
- }
2767
2847
  /**
2768
2848
  * Describe image with lazy vision model initialization
2769
2849
  */
@@ -2923,26 +3003,6 @@ var LocalAIManager = class _LocalAIManager {
2923
3003
  }
2924
3004
  await this.mediumModelInitializingPromise;
2925
3005
  }
2926
- /**
2927
- * Lazy initialize the embedding model
2928
- */
2929
- async lazyInitEmbedding() {
2930
- if (this.embeddingInitialized) return;
2931
- if (!this.embeddingInitializingPromise) {
2932
- this.embeddingInitializingPromise = (async () => {
2933
- try {
2934
- await this.initializeEmbedding();
2935
- this.embeddingInitialized = true;
2936
- logger9.info("Embedding model initialized successfully");
2937
- } catch (error) {
2938
- logger9.error("Failed to initialize embedding model:", error);
2939
- this.embeddingInitializingPromise = null;
2940
- throw error;
2941
- }
2942
- })();
2943
- }
2944
- await this.embeddingInitializingPromise;
2945
- }
2946
3006
  /**
2947
3007
  * Lazy initialize the vision model
2948
3008
  */