@elizaos/plugin-local-embedding 2.0.0-alpha.6 → 2.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -4,6 +4,7 @@ import {
4
4
  logger as logger5
5
5
  } from "@elizaos/core";
6
6
  import {
7
+ LlamaLogLevel,
7
8
  getLlama
8
9
  } from "node-llama-cpp";
9
10
  import fs2 from "fs";
@@ -17,44 +18,73 @@ import { z } from "zod";
17
18
  var DEFAULT_EMBEDDING_MODEL = "bge-small-en-v1.5.Q4_K_M.gguf";
18
19
  var configSchema = z.object({
19
20
  LOCAL_EMBEDDING_MODEL: z.string().optional().default(DEFAULT_EMBEDDING_MODEL),
21
+ LOCAL_EMBEDDING_MODEL_REPO: z.string().optional(),
20
22
  MODELS_DIR: z.string().optional(),
21
23
  // Path for the models directory
22
24
  CACHE_DIR: z.string().optional(),
23
25
  // Path for the cache directory
24
- LOCAL_EMBEDDING_DIMENSIONS: z.string().optional().default("384").transform((val) => parseInt(val, 10))
25
- // Transform to number
26
+ LOCAL_EMBEDDING_DIMENSIONS: z.string().optional().transform((val) => {
27
+ if (!val || !val.trim()) return void 0;
28
+ const parsed = Number.parseInt(val, 10);
29
+ return Number.isInteger(parsed) && parsed > 0 ? parsed : void 0;
30
+ }),
31
+ LOCAL_EMBEDDING_CONTEXT_SIZE: z.string().optional().transform((val) => {
32
+ if (!val || !val.trim()) return void 0;
33
+ const parsed = Number.parseInt(val, 10);
34
+ return Number.isInteger(parsed) && parsed > 0 ? parsed : void 0;
35
+ }),
36
+ LOCAL_EMBEDDING_GPU_LAYERS: z.string().optional().default("0").transform((val) => {
37
+ if (val === "auto") return -1;
38
+ const num = parseInt(val, 10);
39
+ return Number.isNaN(num) ? 0 : num;
40
+ }),
41
+ LOCAL_EMBEDDING_USE_MMAP: z.string().optional().default("true").transform((val) => val === "true")
26
42
  });
27
43
  function validateConfig() {
28
44
  try {
29
45
  const configToParse = {
30
46
  LOCAL_EMBEDDING_MODEL: process.env.LOCAL_EMBEDDING_MODEL,
47
+ LOCAL_EMBEDDING_MODEL_REPO: process.env.LOCAL_EMBEDDING_MODEL_REPO,
31
48
  MODELS_DIR: process.env.MODELS_DIR,
32
49
  // Read models directory path from env
33
50
  CACHE_DIR: process.env.CACHE_DIR,
34
51
  // Read cache directory path from env
35
- LOCAL_EMBEDDING_DIMENSIONS: process.env.LOCAL_EMBEDDING_DIMENSIONS
52
+ LOCAL_EMBEDDING_DIMENSIONS: process.env.LOCAL_EMBEDDING_DIMENSIONS,
36
53
  // Read embedding dimensions
54
+ LOCAL_EMBEDDING_CONTEXT_SIZE: process.env.LOCAL_EMBEDDING_CONTEXT_SIZE,
55
+ LOCAL_EMBEDDING_GPU_LAYERS: process.env.LOCAL_EMBEDDING_GPU_LAYERS,
56
+ LOCAL_EMBEDDING_USE_MMAP: process.env.LOCAL_EMBEDDING_USE_MMAP
37
57
  };
38
- logger.debug("Validating configuration for local AI plugin from env:", {
39
- LOCAL_EMBEDDING_MODEL: configToParse.LOCAL_EMBEDDING_MODEL,
40
- MODELS_DIR: configToParse.MODELS_DIR,
41
- CACHE_DIR: configToParse.CACHE_DIR,
42
- LOCAL_EMBEDDING_DIMENSIONS: configToParse.LOCAL_EMBEDDING_DIMENSIONS
43
- });
58
+ logger.debug(
59
+ {
60
+ LOCAL_EMBEDDING_MODEL: configToParse.LOCAL_EMBEDDING_MODEL,
61
+ LOCAL_EMBEDDING_MODEL_REPO: configToParse.LOCAL_EMBEDDING_MODEL_REPO,
62
+ MODELS_DIR: configToParse.MODELS_DIR,
63
+ CACHE_DIR: configToParse.CACHE_DIR,
64
+ LOCAL_EMBEDDING_DIMENSIONS: configToParse.LOCAL_EMBEDDING_DIMENSIONS,
65
+ LOCAL_EMBEDDING_CONTEXT_SIZE: configToParse.LOCAL_EMBEDDING_CONTEXT_SIZE,
66
+ LOCAL_EMBEDDING_GPU_LAYERS: configToParse.LOCAL_EMBEDDING_GPU_LAYERS,
67
+ LOCAL_EMBEDDING_USE_MMAP: configToParse.LOCAL_EMBEDDING_USE_MMAP
68
+ },
69
+ "Validating configuration for local AI plugin from env:"
70
+ );
44
71
  const validatedConfig = configSchema.parse(configToParse);
45
- logger.info("Using local AI configuration:", validatedConfig);
72
+ logger.info(validatedConfig, "Using local AI configuration:");
46
73
  return validatedConfig;
47
74
  } catch (error) {
48
75
  if (error instanceof z.ZodError) {
49
- const errorMessages = error.errors.map((err) => `${err.path.join(".")}: ${err.message}`).join("\n");
50
- logger.error("Zod validation failed:", errorMessages);
76
+ const errorMessages = error.issues.map((err) => `${err.path.join(".")}: ${err.message}`).join("\n");
77
+ logger.error(errorMessages, "Zod validation failed:");
51
78
  throw new Error(`Configuration validation failed:
52
79
  ${errorMessages}`);
53
80
  }
54
- logger.error("Configuration validation failed:", {
55
- error: error instanceof Error ? error.message : String(error),
56
- stack: error instanceof Error ? error.stack : void 0
57
- });
81
+ logger.error(
82
+ {
83
+ error: error instanceof Error ? error.message : String(error),
84
+ stack: error instanceof Error ? error.stack : void 0
85
+ },
86
+ "Configuration validation failed:"
87
+ );
58
88
  throw error;
59
89
  }
60
90
  }
@@ -147,6 +177,11 @@ import fs from "fs";
147
177
  import https from "https";
148
178
  import path from "path";
149
179
  import { logger as logger2 } from "@elizaos/core";
180
+ function parseContentLength(contentLength) {
181
+ if (!contentLength || Array.isArray(contentLength)) return null;
182
+ const parsed = Number.parseInt(contentLength, 10);
183
+ return Number.isFinite(parsed) && parsed >= 0 ? parsed : null;
184
+ }
150
185
  var DownloadManager = class _DownloadManager {
151
186
  static instance = null;
152
187
  cacheDir;
@@ -243,10 +278,10 @@ var DownloadManager = class _DownloadManager {
243
278
  reject(new Error(`Failed to download: ${response.statusCode}`));
244
279
  return;
245
280
  }
246
- const totalSize = Number.parseInt(
247
- response.headers["content-length"] || "0",
248
- 10
281
+ const expectedBytes = parseContentLength(
282
+ response.headers["content-length"]
249
283
  );
284
+ const totalSize = expectedBytes ?? 0;
250
285
  let downloadedSize = 0;
251
286
  let lastLoggedPercent = 0;
252
287
  const barLength = 30;
@@ -255,6 +290,7 @@ var DownloadManager = class _DownloadManager {
255
290
  const file = fs.createWriteStream(tempPath);
256
291
  response.on("data", (chunk) => {
257
292
  downloadedSize += chunk.length;
293
+ if (!totalSize) return;
258
294
  const percent = Math.round(downloadedSize / totalSize * 100);
259
295
  if (percent >= lastLoggedPercent + 5) {
260
296
  const filledLength = Math.floor(
@@ -283,6 +319,18 @@ var DownloadManager = class _DownloadManager {
283
319
  );
284
320
  return;
285
321
  }
322
+ if (expectedBytes !== null && downloadedSize !== expectedBytes) {
323
+ try {
324
+ fs.unlinkSync(tempPath);
325
+ } catch {
326
+ }
327
+ reject(
328
+ new Error(
329
+ `Downloaded file size mismatch for ${destPath}: expected ${expectedBytes} bytes, got ${downloadedSize}`
330
+ )
331
+ );
332
+ return;
333
+ }
286
334
  if (fs.existsSync(destPath)) {
287
335
  try {
288
336
  const backupPath = `${destPath}.bak`;
@@ -442,7 +490,7 @@ var DownloadManager = class _DownloadManager {
442
490
  * @param {string} modelPath - The path where the model will be saved.
443
491
  * @returns {Promise<boolean>} - Indicates if the model was successfully downloaded or not.
444
492
  */
445
- async downloadModel(modelSpec, modelPath) {
493
+ async downloadModel(modelSpec, modelPath, forceDownload = false) {
446
494
  try {
447
495
  logger2.info("Starting local model download...");
448
496
  const modelDir = path.dirname(modelPath);
@@ -450,7 +498,21 @@ var DownloadManager = class _DownloadManager {
450
498
  logger2.info("Creating model directory:", modelDir);
451
499
  fs.mkdirSync(modelDir, { recursive: true });
452
500
  }
453
- if (!fs.existsSync(modelPath)) {
501
+ if (!fs.existsSync(modelPath) || forceDownload) {
502
+ if (forceDownload && fs.existsSync(modelPath)) {
503
+ logger2.warn(
504
+ "Force re-download requested; removing existing model file:",
505
+ modelPath
506
+ );
507
+ this.activeDownloads.delete(modelPath);
508
+ try {
509
+ fs.unlinkSync(modelPath);
510
+ } catch (err) {
511
+ logger2.warn(
512
+ `Failed to remove existing model file before re-download: ${err instanceof Error ? err.message : String(err)}`
513
+ );
514
+ }
515
+ }
454
516
  const attempts = [
455
517
  {
456
518
  description: "LFS URL with GGUF suffix",
@@ -1063,6 +1125,118 @@ var TokenizerManager = class _TokenizerManager {
1063
1125
  };
1064
1126
 
1065
1127
  // src/index.ts
1128
+ var CORRUPTED_MODEL_ERROR_SIGNATURES = [
1129
+ "data is not within the file bounds",
1130
+ "failed to load model",
1131
+ "model is corrupted",
1132
+ "data of tensor",
1133
+ "is out of bounds"
1134
+ ];
1135
+ var CONTEXT_LIMIT_ERROR_SIGNATURES = [
1136
+ "input is longer than the context size",
1137
+ "context size",
1138
+ "too many tokens",
1139
+ "exceeds context"
1140
+ ];
1141
+ var NODE_LLAMA_NOISY_LOAD_ERROR_PATTERNS = [
1142
+ "llama_model_load:",
1143
+ "llama_model_load_from_file_impl: failed to load model"
1144
+ ];
1145
+ var MIN_EMBEDDING_RETRY_TEXT_LENGTH = 1;
1146
+ var EMBEDDING_MODEL_HINTS = [
1147
+ {
1148
+ pattern: /nomic-embed-text-v1\.5/i,
1149
+ repo: "nomic-ai/nomic-embed-text-v1.5-GGUF",
1150
+ dimensions: 768,
1151
+ contextSize: 8192
1152
+ },
1153
+ {
1154
+ pattern: /bge-small-en-v1\.5/i,
1155
+ repo: "ChristianAzinn/bge-small-en-v1.5-gguf",
1156
+ dimensions: 384,
1157
+ contextSize: 512
1158
+ },
1159
+ {
1160
+ pattern: /e5-mistral-7b/i,
1161
+ repo: "dranger003/e5-mistral-7b-instruct-GGUF",
1162
+ dimensions: 4096,
1163
+ contextSize: 32768
1164
+ }
1165
+ ];
1166
+ function getErrorMessage(error) {
1167
+ if (error instanceof Error) return error.message;
1168
+ if (typeof error === "string") return error;
1169
+ return String(error);
1170
+ }
1171
+ function isCorruptedModelLoadError(error) {
1172
+ const message = getErrorMessage(error).toLowerCase();
1173
+ return CORRUPTED_MODEL_ERROR_SIGNATURES.some(
1174
+ (signature) => message.includes(signature)
1175
+ );
1176
+ }
1177
+ function isContextLimitError(error) {
1178
+ const message = getErrorMessage(error).toLowerCase();
1179
+ return CONTEXT_LIMIT_ERROR_SIGNATURES.some(
1180
+ (signature) => message.includes(signature)
1181
+ );
1182
+ }
1183
+ function shouldSuppressNodeLlamaLoadError(message) {
1184
+ const lower = message.toLowerCase();
1185
+ return NODE_LLAMA_NOISY_LOAD_ERROR_PATTERNS.some(
1186
+ (pattern) => lower.includes(pattern)
1187
+ );
1188
+ }
1189
+ function shrinkEmbeddingInput(text) {
1190
+ if (text.length <= MIN_EMBEDDING_RETRY_TEXT_LENGTH) return text;
1191
+ const nextLength = Math.max(
1192
+ MIN_EMBEDDING_RETRY_TEXT_LENGTH,
1193
+ Math.floor(text.length / 2)
1194
+ );
1195
+ return text.slice(0, nextLength);
1196
+ }
1197
+ function inferEmbeddingModelHint(modelName) {
1198
+ const match = EMBEDDING_MODEL_HINTS.find((hint) => hint.pattern.test(modelName));
1199
+ return match ?? null;
1200
+ }
1201
+ function resolveEmbeddingModelSpec(config, fallback) {
1202
+ const modelName = config.LOCAL_EMBEDDING_MODEL || fallback.name;
1203
+ const hint = inferEmbeddingModelHint(modelName);
1204
+ return {
1205
+ ...fallback,
1206
+ name: modelName,
1207
+ repo: config.LOCAL_EMBEDDING_MODEL_REPO?.trim() || hint?.repo || fallback.repo,
1208
+ dimensions: config.LOCAL_EMBEDDING_DIMENSIONS ?? hint?.dimensions ?? fallback.dimensions,
1209
+ contextSize: config.LOCAL_EMBEDDING_CONTEXT_SIZE ?? hint?.contextSize ?? fallback.contextSize
1210
+ };
1211
+ }
1212
+ function readMagicHeader(filePath) {
1213
+ try {
1214
+ const fd = fs2.openSync(filePath, "r");
1215
+ try {
1216
+ const header = Buffer.alloc(4);
1217
+ const bytesRead = fs2.readSync(fd, header, 0, header.length, 0);
1218
+ return { bytesRead, magic: header.toString("ascii", 0, 4) };
1219
+ } finally {
1220
+ fs2.closeSync(fd);
1221
+ }
1222
+ } catch {
1223
+ return { bytesRead: 0, magic: "" };
1224
+ }
1225
+ }
1226
+ function isValidGgufFile(filePath) {
1227
+ const { bytesRead, magic } = readMagicHeader(filePath);
1228
+ return bytesRead === 4 && magic === "GGUF";
1229
+ }
1230
+ function safeUnlink(filePath) {
1231
+ if (!filePath || !fs2.existsSync(filePath)) return;
1232
+ try {
1233
+ fs2.unlinkSync(filePath);
1234
+ } catch (err) {
1235
+ logger5.warn(
1236
+ `Failed to remove model file ${filePath}: ${err instanceof Error ? err.message : String(err)}`
1237
+ );
1238
+ }
1239
+ }
1066
1240
  var LocalAIManager = class _LocalAIManager {
1067
1241
  static instance = null;
1068
1242
  llama;
@@ -1198,15 +1372,29 @@ var LocalAIManager = class _LocalAIManager {
1198
1372
  try {
1199
1373
  logger5.info("Initializing environment configuration...");
1200
1374
  this.config = await validateConfig();
1375
+ this.embeddingModelConfig = resolveEmbeddingModelSpec(
1376
+ this.config,
1377
+ MODEL_SPECS.embedding
1378
+ );
1201
1379
  this._postValidateInit();
1202
1380
  this.embeddingModelPath = path2.join(
1203
1381
  this.modelsDir,
1204
- this.config.LOCAL_EMBEDDING_MODEL
1382
+ this.embeddingModelConfig.name
1205
1383
  );
1206
1384
  logger5.info(
1207
1385
  "Using embedding model path:",
1208
1386
  basename(this.embeddingModelPath)
1209
1387
  );
1388
+ logger5.info(
1389
+ {
1390
+ model: this.embeddingModelConfig.name,
1391
+ repo: this.embeddingModelConfig.repo,
1392
+ dimensions: this.embeddingModelConfig.dimensions,
1393
+ contextSize: this.embeddingModelConfig.contextSize
1394
+ },
1395
+ "Resolved embedding model spec"
1396
+ );
1397
+ this.ensureEmbeddingModelFileIsValid();
1210
1398
  logger5.info("Environment configuration validated and model paths set");
1211
1399
  this.environmentInitialized = true;
1212
1400
  logger5.success("Environment initialization complete");
@@ -1232,7 +1420,7 @@ var LocalAIManager = class _LocalAIManager {
1232
1420
  * @param {ModelSpec} [customModelSpec] - Optional custom model spec to use instead of the default
1233
1421
  * @returns A Promise that resolves to a boolean indicating whether the model download was successful.
1234
1422
  */
1235
- async downloadModel(modelType, customModelSpec) {
1423
+ async downloadModel(modelType, customModelSpec, forceDownload = false) {
1236
1424
  let modelSpec;
1237
1425
  let modelPathToDownload;
1238
1426
  await this.initializeEnvironment();
@@ -1240,8 +1428,9 @@ var LocalAIManager = class _LocalAIManager {
1240
1428
  modelSpec = customModelSpec;
1241
1429
  modelPathToDownload = modelType === ModelType.TEXT_EMBEDDING ? this.embeddingModelPath : modelType === ModelType.TEXT_LARGE ? this.mediumModelPath : this.modelPath;
1242
1430
  } else if (modelType === ModelType.TEXT_EMBEDDING) {
1243
- modelSpec = MODEL_SPECS.embedding;
1431
+ modelSpec = this.embeddingModelConfig;
1244
1432
  modelPathToDownload = this.embeddingModelPath;
1433
+ this.ensureEmbeddingModelFileIsValid();
1245
1434
  } else {
1246
1435
  modelSpec = modelType === ModelType.TEXT_LARGE ? MODEL_SPECS.medium : MODEL_SPECS.small;
1247
1436
  modelPathToDownload = modelType === ModelType.TEXT_LARGE ? this.mediumModelPath : this.modelPath;
@@ -1249,7 +1438,8 @@ var LocalAIManager = class _LocalAIManager {
1249
1438
  try {
1250
1439
  return await this.downloadManager.downloadModel(
1251
1440
  modelSpec,
1252
- modelPathToDownload
1441
+ modelPathToDownload,
1442
+ forceDownload
1253
1443
  );
1254
1444
  } catch (error) {
1255
1445
  logger5.error(
@@ -1298,6 +1488,78 @@ var LocalAIManager = class _LocalAIManager {
1298
1488
  async initialize(modelType = ModelType.TEXT_SMALL) {
1299
1489
  await this.initializeEnvironment();
1300
1490
  }
1491
+ getEmbeddingDimensions() {
1492
+ return this.embeddingModelConfig.dimensions;
1493
+ }
1494
+ ensureEmbeddingModelFileIsValid() {
1495
+ if (!this.embeddingModelPath || !fs2.existsSync(this.embeddingModelPath))
1496
+ return;
1497
+ if (isValidGgufFile(this.embeddingModelPath)) return;
1498
+ const { bytesRead, magic } = readMagicHeader(this.embeddingModelPath);
1499
+ logger5.warn(
1500
+ {
1501
+ embeddingModelPath: this.embeddingModelPath,
1502
+ bytesRead,
1503
+ magic
1504
+ },
1505
+ "Invalid embedding model file detected; removing corrupt file before download/retry"
1506
+ );
1507
+ safeUnlink(this.embeddingModelPath);
1508
+ }
1509
+ async ensureLlama() {
1510
+ if (this.llama) return;
1511
+ this.llama = await getLlama({
1512
+ logLevel: LlamaLogLevel.error,
1513
+ logger: (level, message) => {
1514
+ if (level !== "error" && level !== "fatal") return;
1515
+ const text = message.trim();
1516
+ if (!text) return;
1517
+ if (shouldSuppressNodeLlamaLoadError(text)) return;
1518
+ logger5.error(`[node-llama-cpp] ${text}`);
1519
+ }
1520
+ });
1521
+ }
1522
+ async loadEmbeddingModel() {
1523
+ this.ensureEmbeddingModelFileIsValid();
1524
+ const gpuLayers = this.config?.LOCAL_EMBEDDING_GPU_LAYERS === -1 ? "auto" : this.config?.LOCAL_EMBEDDING_GPU_LAYERS ?? 0;
1525
+ const useMmap = this.config?.LOCAL_EMBEDDING_USE_MMAP ?? true;
1526
+ this.embeddingModel = await this.llama.loadModel({
1527
+ modelPath: this.embeddingModelPath,
1528
+ gpuLayers,
1529
+ vocabOnly: false,
1530
+ useMmap
1531
+ });
1532
+ this.embeddingContext = await this.embeddingModel.createEmbeddingContext({
1533
+ contextSize: this.embeddingModelConfig.contextSize,
1534
+ batchSize: 512
1535
+ });
1536
+ }
1537
+ async initializeEmbeddingWithRecovery() {
1538
+ logger5.info("Loading embedding model:", this.embeddingModelPath);
1539
+ try {
1540
+ await this.loadEmbeddingModel();
1541
+ logger5.success("Embedding model initialized successfully");
1542
+ return;
1543
+ } catch (error) {
1544
+ if (!isCorruptedModelLoadError(error)) {
1545
+ throw error;
1546
+ }
1547
+ logger5.warn(
1548
+ {
1549
+ error: getErrorMessage(error),
1550
+ embeddingModelPath: this.embeddingModelPath
1551
+ },
1552
+ "Embedding model appears corrupted/incomplete; deleting and re-downloading"
1553
+ );
1554
+ this.embeddingModel = void 0;
1555
+ this.embeddingContext = void 0;
1556
+ safeUnlink(this.embeddingModelPath);
1557
+ await this.downloadModel(ModelType.TEXT_EMBEDDING, void 0, true);
1558
+ this.ensureEmbeddingModelFileIsValid();
1559
+ await this.loadEmbeddingModel();
1560
+ logger5.success("Embedding model recovered after re-download");
1561
+ }
1562
+ }
1301
1563
  /**
1302
1564
  * Asynchronously initializes the embedding model.
1303
1565
  *
@@ -1316,35 +1578,35 @@ var LocalAIManager = class _LocalAIManager {
1316
1578
  fs2.mkdirSync(this.modelsDir, { recursive: true });
1317
1579
  }
1318
1580
  await this.downloadModel(ModelType.TEXT_EMBEDDING);
1319
- if (!this.llama) {
1320
- this.llama = await getLlama();
1321
- }
1581
+ this.ensureEmbeddingModelFileIsValid();
1582
+ await this.ensureLlama();
1322
1583
  if (!this.embeddingModel) {
1323
- logger5.info("Loading embedding model:", this.embeddingModelPath);
1324
- this.embeddingModel = await this.llama.loadModel({
1325
- modelPath: this.embeddingModelPath,
1326
- // Use the correct path
1327
- gpuLayers: 0,
1328
- // Embedding models are typically small enough to run on CPU
1329
- vocabOnly: false
1330
- });
1331
- this.embeddingContext = await this.embeddingModel.createEmbeddingContext({
1332
- contextSize: this.embeddingModelConfig.contextSize,
1333
- batchSize: 512
1334
- });
1335
- logger5.success("Embedding model initialized successfully");
1584
+ await this.initializeEmbeddingWithRecovery();
1336
1585
  }
1337
1586
  } catch (error) {
1338
- logger5.error(
1339
- {
1340
- error: error instanceof Error ? error.message : String(error),
1341
- stack: error instanceof Error ? error.stack : void 0,
1342
- modelsDir: this.modelsDir,
1343
- embeddingModelPath: this.embeddingModelPath
1344
- // Log the path being used
1345
- },
1346
- "Embedding initialization failed with details"
1347
- );
1587
+ if (isCorruptedModelLoadError(error)) {
1588
+ logger5.warn(
1589
+ {
1590
+ error: error instanceof Error ? error.message : String(error),
1591
+ stack: error instanceof Error ? error.stack : void 0,
1592
+ modelsDir: this.modelsDir,
1593
+ embeddingModelPath: this.embeddingModelPath
1594
+ },
1595
+ "Embedding initialization failed due to model corruption"
1596
+ );
1597
+ safeUnlink(this.embeddingModelPath);
1598
+ } else {
1599
+ logger5.error(
1600
+ {
1601
+ error: error instanceof Error ? error.message : String(error),
1602
+ stack: error instanceof Error ? error.stack : void 0,
1603
+ modelsDir: this.modelsDir,
1604
+ embeddingModelPath: this.embeddingModelPath
1605
+ // Log the path being used
1606
+ },
1607
+ "Embedding initialization failed with details"
1608
+ );
1609
+ }
1348
1610
  throw error;
1349
1611
  }
1350
1612
  }
@@ -1358,24 +1620,69 @@ var LocalAIManager = class _LocalAIManager {
1358
1620
  throw new Error("Failed to initialize embedding model");
1359
1621
  }
1360
1622
  logger5.info({ textLength: text.length }, "Generating embedding for text");
1361
- const embeddingResult = await this.embeddingContext.getEmbeddingFor(text);
1362
- const mutableEmbedding = [...embeddingResult.vector];
1363
- const normalizedEmbedding = this.normalizeEmbedding(mutableEmbedding);
1364
- logger5.info(
1365
- { dimensions: normalizedEmbedding.length },
1366
- "Embedding generation complete"
1367
- );
1368
- return normalizedEmbedding;
1623
+ let candidateText = text;
1624
+ let attempt = 0;
1625
+ while (true) {
1626
+ try {
1627
+ const embeddingResult = await this.embeddingContext.getEmbeddingFor(candidateText);
1628
+ logger5.info({
1629
+ hasResult: !!embeddingResult,
1630
+ hasVector: !!embeddingResult?.vector
1631
+ }, "Debug: embeddingResult");
1632
+ const mutableEmbedding = [...embeddingResult.vector];
1633
+ const sizedEmbedding = this.alignEmbeddingDimensions(mutableEmbedding);
1634
+ const normalizedEmbedding = this.normalizeEmbedding(sizedEmbedding);
1635
+ logger5.info(
1636
+ { dimensions: normalizedEmbedding.length },
1637
+ "Embedding generation complete"
1638
+ );
1639
+ return normalizedEmbedding;
1640
+ } catch (error) {
1641
+ if (!isContextLimitError(error)) {
1642
+ throw error;
1643
+ }
1644
+ const nextCandidate = shrinkEmbeddingInput(candidateText);
1645
+ if (nextCandidate === candidateText) {
1646
+ throw error;
1647
+ }
1648
+ attempt += 1;
1649
+ logger5.warn(
1650
+ {
1651
+ attempt,
1652
+ currentChars: candidateText.length,
1653
+ nextChars: nextCandidate.length
1654
+ },
1655
+ "Embedding input exceeded context window; retrying with truncated text"
1656
+ );
1657
+ candidateText = nextCandidate;
1658
+ }
1659
+ }
1369
1660
  } catch (error) {
1370
- logger5.error(
1371
- {
1372
- error: error instanceof Error ? error.message : String(error),
1373
- stack: error instanceof Error ? error.stack : void 0,
1374
- textLength: text?.length ?? "text is null"
1375
- },
1376
- "Embedding generation failed"
1377
- );
1378
- const zeroDimensions = this.config?.LOCAL_EMBEDDING_DIMENSIONS ? this.config.LOCAL_EMBEDDING_DIMENSIONS : this.embeddingModelConfig.dimensions;
1661
+ if (isCorruptedModelLoadError(error)) {
1662
+ logger5.warn(
1663
+ {
1664
+ error: error instanceof Error ? error.message : String(error),
1665
+ stack: error instanceof Error ? error.stack : void 0,
1666
+ textLength: text?.length ?? "text is null",
1667
+ embeddingModelPath: this.embeddingModelPath
1668
+ },
1669
+ "Embedding generation failed due to model corruption; model file removed"
1670
+ );
1671
+ safeUnlink(this.embeddingModelPath);
1672
+ this.embeddingModel = void 0;
1673
+ this.embeddingContext = void 0;
1674
+ this.embeddingInitialized = false;
1675
+ } else {
1676
+ logger5.error(
1677
+ {
1678
+ error: error instanceof Error ? error.message : String(error),
1679
+ stack: error instanceof Error ? error.stack : void 0,
1680
+ textLength: text?.length ?? "text is null"
1681
+ },
1682
+ "Embedding generation failed"
1683
+ );
1684
+ }
1685
+ const zeroDimensions = this.getEmbeddingDimensions();
1379
1686
  return new Array(zeroDimensions).fill(0);
1380
1687
  }
1381
1688
  }
@@ -1385,6 +1692,26 @@ var LocalAIManager = class _LocalAIManager {
1385
1692
  * @param {number[]} embedding - The embedding vector to normalize
1386
1693
  * @returns {number[]} - The normalized embedding vector
1387
1694
  */
1695
+ alignEmbeddingDimensions(embedding) {
1696
+ const targetDimensions = this.getEmbeddingDimensions();
1697
+ if (targetDimensions <= 0 || embedding.length === targetDimensions) {
1698
+ return embedding;
1699
+ }
1700
+ logger5.warn(
1701
+ {
1702
+ observedDimensions: embedding.length,
1703
+ targetDimensions
1704
+ },
1705
+ "Embedding dimensions mismatch; adjusting output dimensions"
1706
+ );
1707
+ if (embedding.length > targetDimensions) {
1708
+ return embedding.slice(0, targetDimensions);
1709
+ }
1710
+ return [
1711
+ ...embedding,
1712
+ ...new Array(targetDimensions - embedding.length).fill(0)
1713
+ ];
1714
+ }
1388
1715
  normalizeEmbedding(embedding) {
1389
1716
  const squareSum = embedding.reduce((sum, val) => sum + val * val, 0);
1390
1717
  const norm = Math.sqrt(squareSum);
@@ -1403,26 +1730,24 @@ var LocalAIManager = class _LocalAIManager {
1403
1730
  try {
1404
1731
  await this.initializeEnvironment();
1405
1732
  await this.downloadModel(ModelType.TEXT_EMBEDDING);
1406
- if (!this.llama) {
1407
- this.llama = await getLlama();
1408
- }
1409
- this.embeddingModel = await this.llama.loadModel({
1410
- modelPath: this.embeddingModelPath,
1411
- gpuLayers: 0,
1412
- // Embedding models are typically small enough to run on CPU
1413
- vocabOnly: false
1414
- });
1415
- this.embeddingContext = await this.embeddingModel.createEmbeddingContext({
1416
- contextSize: this.embeddingModelConfig.contextSize,
1417
- batchSize: 512
1418
- });
1733
+ this.ensureEmbeddingModelFileIsValid();
1734
+ await this.ensureLlama();
1735
+ await this.initializeEmbeddingWithRecovery();
1419
1736
  this.embeddingInitialized = true;
1420
1737
  logger5.info("Embedding model initialized successfully");
1421
1738
  } catch (error) {
1422
- logger5.error(
1423
- error instanceof Error ? error : String(error),
1424
- "Failed to initialize embedding model"
1425
- );
1739
+ if (isCorruptedModelLoadError(error)) {
1740
+ logger5.warn(
1741
+ error instanceof Error ? error : String(error),
1742
+ "Failed to initialize embedding model due to corruption"
1743
+ );
1744
+ safeUnlink(this.embeddingModelPath);
1745
+ } else {
1746
+ logger5.error(
1747
+ error instanceof Error ? error : String(error),
1748
+ "Failed to initialize embedding model"
1749
+ );
1750
+ }
1426
1751
  this.embeddingInitializingPromise = null;
1427
1752
  throw error;
1428
1753
  }
@@ -1454,85 +1779,38 @@ var localAiPlugin = {
1454
1779
  // providers (e.g. ElizaCloud, OpenAI) even when plugins register in
1455
1780
  // parallel and the registration order is non-deterministic.
1456
1781
  priority: 10,
1457
- async init(_config, runtime) {
1458
- logger5.info("\u{1F680} Initializing Local AI plugin...");
1782
+ async init(_config, _runtime) {
1783
+ logger5.info("Initializing local embedding plugin...");
1459
1784
  try {
1460
1785
  await localAIManager.initializeEnvironment();
1786
+ await localAIManager.checkPlatformCapabilities();
1461
1787
  const config = validateConfig();
1462
1788
  const modelsDir = config.MODELS_DIR || path2.join(os2.homedir(), ".eliza", "models");
1463
- if (!fs2.existsSync(modelsDir)) {
1464
- logger5.warn(`\u26A0\uFE0F Models directory does not exist: ${modelsDir}`);
1465
- logger5.warn(
1466
- "The directory will be created, but you need to download model files"
1467
- );
1468
- logger5.warn(
1469
- "Visit https://huggingface.co/models to download compatible GGUF models"
1470
- );
1471
- }
1472
- logger5.info("\u{1F50D} Testing Local AI initialization...");
1473
- try {
1474
- await localAIManager.checkPlatformCapabilities();
1475
- const llamaInstance = await getLlama();
1476
- if (llamaInstance) {
1477
- logger5.success("\u2705 Local AI: llama.cpp library loaded successfully");
1478
- } else {
1479
- throw new Error("Failed to load llama.cpp library");
1480
- }
1481
- const embeddingModelPath = path2.join(
1482
- modelsDir,
1483
- config.LOCAL_EMBEDDING_MODEL
1789
+ const embeddingModelPath = path2.join(
1790
+ modelsDir,
1791
+ config.LOCAL_EMBEDDING_MODEL
1792
+ );
1793
+ if (fs2.existsSync(embeddingModelPath)) {
1794
+ logger5.info(
1795
+ { embeddingModelPath: basename(embeddingModelPath) },
1796
+ "Embedding model file is present"
1484
1797
  );
1485
- const modelsExist = {
1486
- embedding: fs2.existsSync(embeddingModelPath)
1487
- };
1488
- if (!modelsExist.embedding) {
1489
- logger5.warn("\u26A0\uFE0F No model files found in models directory");
1490
- logger5.warn(
1491
- "Models will be downloaded on first use, which may take time"
1492
- );
1493
- logger5.warn(
1494
- "To pre-download models, run the plugin and it will fetch them automatically"
1495
- );
1496
- } else {
1497
- logger5.info(
1498
- { embedding: modelsExist.embedding ? "\u2713" : "\u2717" },
1499
- "\u{1F4E6} Found model files"
1500
- );
1501
- }
1502
- logger5.success("\u2705 Local AI plugin initialized successfully");
1503
- logger5.info("\u{1F4A1} Models will be loaded on-demand when first used");
1504
- } catch (testError) {
1505
- logger5.error(
1506
- testError instanceof Error ? testError : String(testError),
1507
- "\u274C Local AI initialization test failed"
1798
+ } else {
1799
+ logger5.info(
1800
+ { embeddingModelPath: basename(embeddingModelPath) },
1801
+ "Embedding model file not present yet; it will be downloaded on first use"
1508
1802
  );
1509
- logger5.warn("The plugin may not function correctly");
1510
- logger5.warn("Please check:");
1511
- logger5.warn("1. Your system has sufficient memory (8GB+ recommended)");
1512
- logger5.warn("2. C++ build tools are installed (for node-llama-cpp)");
1513
- logger5.warn("3. Your CPU supports the required instruction sets");
1514
1803
  }
1804
+ logger5.success("Local embedding plugin initialized");
1515
1805
  } catch (error) {
1516
1806
  logger5.error(
1517
1807
  {
1518
1808
  error: error instanceof Error ? error.message : String(error),
1519
1809
  stack: error instanceof Error ? error.stack : void 0
1520
1810
  },
1521
- "\u274C Failed to initialize Local AI plugin"
1811
+ "Failed to initialize local embedding plugin"
1522
1812
  );
1523
- if (error instanceof Error) {
1524
- if (error.message.includes("Cannot find module")) {
1525
- logger5.error("\u{1F4DA} Missing dependencies detected");
1526
- logger5.error("Please run: npm install or bun install");
1527
- } else if (error.message.includes("node-llama-cpp")) {
1528
- logger5.error("\u{1F527} node-llama-cpp build issue detected");
1529
- logger5.error("Please ensure C++ build tools are installed:");
1530
- logger5.error("- Windows: Install Visual Studio Build Tools");
1531
- logger5.error("- macOS: Install Xcode Command Line Tools");
1532
- logger5.error("- Linux: Install build-essential package");
1533
- }
1534
- }
1535
- logger5.warn("\u26A0\uFE0F Local AI plugin will not be available");
1813
+ logger5.warn("Local embedding plugin may be unavailable");
1536
1814
  }
1537
1815
  },
1538
1816
  models: {
@@ -1548,7 +1826,7 @@ var localAiPlugin = {
1548
1826
  logger5.debug(
1549
1827
  "Null or empty text input for embedding, returning zero vector"
1550
1828
  );
1551
- return new Array(384).fill(0);
1829
+ return new Array(localAIManager.getEmbeddingDimensions()).fill(0);
1552
1830
  }
1553
1831
  return await localAIManager.generateEmbedding(text);
1554
1832
  } catch (error) {
@@ -1561,7 +1839,7 @@ var localAiPlugin = {
1561
1839
  },
1562
1840
  "Error in TEXT_EMBEDDING handler"
1563
1841
  );
1564
- return new Array(384).fill(0);
1842
+ return new Array(localAIManager.getEmbeddingDimensions()).fill(0);
1565
1843
  }
1566
1844
  },
1567
1845
  [ModelType.TEXT_TOKENIZER_ENCODE]: async (_runtime, params) => {