npm - @elizaos/plugin-knowledge - Versions diffs - 1.6.0 → 1.6.1 - Mend

@elizaos/plugin-knowledge 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -257,12 +257,13 @@ function validateModelConfig(runtime) {
       EMBEDDING_DIMENSION: embeddingDimension,
       LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")),
       CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled,
-      // Rate limiting settings - disable for fast uploads with APIs without limits
-      // High defaults optimized for Vercel gateway / high-throughput APIs
+      // Rate limiting settings - optimized for batch embeddings
+      // With batch embeddings, we send 100 texts in ONE API call
+      // 935 chunks / 100 = ~10 API calls instead of 935!
       RATE_LIMIT_ENABLED: parseBooleanEnv(getSetting("RATE_LIMIT_ENABLED", "true")),
-      MAX_CONCURRENT_REQUESTS: getSetting("MAX_CONCURRENT_REQUESTS", "150"),
-      REQUESTS_PER_MINUTE: getSetting("REQUESTS_PER_MINUTE", "300"),
-      TOKENS_PER_MINUTE: getSetting("TOKENS_PER_MINUTE", "750000"),
+      MAX_CONCURRENT_REQUESTS: getSetting("MAX_CONCURRENT_REQUESTS", "100"),
+      REQUESTS_PER_MINUTE: getSetting("REQUESTS_PER_MINUTE", "500"),
+      TOKENS_PER_MINUTE: getSetting("TOKENS_PER_MINUTE", "1000000"),
       BATCH_DELAY_MS: getSetting("BATCH_DELAY_MS", "100")
     });
     validateConfigRequirements(config, assumePluginOpenAI);
@@ -1716,67 +1717,165 @@ async function processAndSaveFragments({
   }
   return { savedCount, failedCount, failedChunks };
 }
+var EMBEDDING_BATCH_SIZE = 100;
 async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLimiter) {
   const validChunks = contextualizedChunks.filter((chunk) => chunk.success);
   const failedChunks = contextualizedChunks.filter((chunk) => !chunk.success);
-  if (validChunks.length === 0) {
-    return failedChunks.map((chunk) => ({
+  const results = [];
+  for (const chunk of failedChunks) {
+    results.push({
       success: false,
       index: chunk.index,
       error: new Error("Chunk processing failed"),
       text: chunk.contextualizedText
-    }));
+    });
   }
-  return await Promise.all(
-    contextualizedChunks.map(async (contextualizedChunk) => {
-      if (!contextualizedChunk.success) {
-        return {
-          success: false,
-          index: contextualizedChunk.index,
-          error: new Error("Chunk processing failed"),
-          text: contextualizedChunk.contextualizedText
-        };
+  if (validChunks.length === 0) {
+    return results;
+  }
+  const useBatchEmbeddings = shouldUseBatchEmbeddings(runtime);
+  if (useBatchEmbeddings) {
+    logger4.info(`[Document Processor] Using BATCH embeddings for ${validChunks.length} chunks`);
+    return await generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results);
+  } else {
+    logger4.info(`[Document Processor] Using individual embeddings for ${validChunks.length} chunks`);
+    return await generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results);
+  }
+}
+function shouldUseBatchEmbeddings(runtime) {
+  const setting = runtime.getSetting("BATCH_EMBEDDINGS") ?? process.env.BATCH_EMBEDDINGS;
+  if (setting === "false" || setting === false) {
+    return false;
+  }
+  return true;
+}
+async function generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results) {
+  for (let batchStart = 0; batchStart < validChunks.length; batchStart += EMBEDDING_BATCH_SIZE) {
+    const batchEnd = Math.min(batchStart + EMBEDDING_BATCH_SIZE, validChunks.length);
+    const batch = validChunks.slice(batchStart, batchEnd);
+    const batchTexts = batch.map((c) => c.contextualizedText);
+    const totalTokens = batchTexts.reduce((sum, text) => sum + estimateTokens(text), 0);
+    await rateLimiter(totalTokens);
+    logger4.info(
+      `[Document Processor] Batch ${Math.floor(batchStart / EMBEDDING_BATCH_SIZE) + 1}/${Math.ceil(validChunks.length / EMBEDDING_BATCH_SIZE)}: ${batch.length} texts, ~${totalTokens} tokens`
+    );
+    try {
+      const embeddings = await generateBatchEmbeddingsViaRuntime(runtime, batchTexts);
+      for (let i = 0; i < batch.length; i++) {
+        const chunk = batch[i];
+        const embedding = embeddings[i];
+        if (embedding && embedding.length > 0 && embedding[0] !== 0) {
+          results.push({
+            embedding,
+            success: true,
+            index: chunk.index,
+            text: chunk.contextualizedText
+          });
+        } else {
+          results.push({
+            success: false,
+            index: chunk.index,
+            error: new Error("Empty or invalid embedding returned"),
+            text: chunk.contextualizedText
+          });
+        }
       }
-      const embeddingTokens = estimateTokens(contextualizedChunk.contextualizedText);
-      await rateLimiter(embeddingTokens);
-      try {
-        const generateEmbeddingOperation = async () => {
-          return await generateEmbeddingWithValidation(
-            runtime,
-            contextualizedChunk.contextualizedText
-          );
-        };
-        const { embedding, success, error } = await withRateLimitRetry(
-          generateEmbeddingOperation,
-          `embedding generation for chunk ${contextualizedChunk.index}`
-        );
-        if (!success) {
-          return {
+    } catch (error) {
+      logger4.error(`[Document Processor] Batch embedding error: ${error.message}`);
+      for (const chunk of batch) {
+        try {
+          const result = await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
+          if (result.success && result.embedding) {
+            results.push({
+              embedding: result.embedding,
+              success: true,
+              index: chunk.index,
+              text: chunk.contextualizedText
+            });
+          } else {
+            results.push({
+              success: false,
+              index: chunk.index,
+              error: result.error || new Error("Embedding failed"),
+              text: chunk.contextualizedText
+            });
+          }
+        } catch (fallbackError) {
+          results.push({
             success: false,
-            index: contextualizedChunk.index,
-            error,
-            text: contextualizedChunk.contextualizedText
-          };
+            index: chunk.index,
+            error: fallbackError,
+            text: chunk.contextualizedText
+          });
         }
-        return {
-          embedding,
-          success: true,
-          index: contextualizedChunk.index,
-          text: contextualizedChunk.contextualizedText
-        };
-      } catch (error) {
-        logger4.error(
-          `Error generating embedding for chunk ${contextualizedChunk.index}: ${error.message}`
-        );
-        return {
+      }
+    }
+  }
+  return results;
+}
+async function generateBatchEmbeddingsViaRuntime(runtime, texts) {
+  const batchResult = await runtime.useModel(
+    ModelType.TEXT_EMBEDDING,
+    { texts }
+    // Handler supports { texts: string[] } for batch mode
+  );
+  if (Array.isArray(batchResult) && Array.isArray(batchResult[0])) {
+    return batchResult;
+  }
+  if (Array.isArray(batchResult) && typeof batchResult[0] === "number") {
+    logger4.warn("[Document Processor] Runtime returned single embedding for batch request - falling back to individual calls");
+    const embeddings = await Promise.all(
+      texts.map(async (text) => {
+        const result = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
+        if (Array.isArray(result)) {
+          return result;
+        }
+        return result?.embedding || [];
+      })
+    );
+    return embeddings;
+  }
+  logger4.error("[Document Processor] Unexpected batch result format:", typeof batchResult);
+  throw new Error("Unexpected batch embedding result format");
+}
+async function generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results) {
+  for (const chunk of validChunks) {
+    const embeddingTokens = estimateTokens(chunk.contextualizedText);
+    await rateLimiter(embeddingTokens);
+    try {
+      const generateEmbeddingOperation = async () => {
+        return await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
+      };
+      const { embedding, success, error } = await withRateLimitRetry(
+        generateEmbeddingOperation,
+        `embedding generation for chunk ${chunk.index}`
+      );
+      if (!success) {
+        results.push({
           success: false,
-          index: contextualizedChunk.index,
+          index: chunk.index,
           error,
-          text: contextualizedChunk.contextualizedText
-        };
+          text: chunk.contextualizedText
+        });
+      } else {
+        results.push({
+          embedding,
+          success: true,
+          index: chunk.index,
+          text: chunk.contextualizedText
+        });
       }
-    })
-  );
+    } catch (error) {
+      logger4.error(`Error generating embedding for chunk ${chunk.index}: ${error.message}`);
+      results.push({
+        success: false,
+        index: chunk.index,
+        error,
+        text: chunk.contextualizedText
+      });
+    }
+  }
+  return results;
 }
 async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle) {
   const ctxEnabled = getCtxKnowledgeEnabled(runtime);
@@ -1965,17 +2064,11 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
 }
 async function generateEmbeddingWithValidation(runtime, text) {
   try {
-    const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, {
-      text
-    });
+    const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
     const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
     if (!embedding || embedding.length === 0) {
-      logger4.warn(`Zero vector detected. Embedding result: ${JSON.stringify(embedding)}`);
-      return {
-        embedding: null,
-        success: false,
-        error: new Error("Zero vector detected")
-      };
+      logger4.warn(`Zero vector detected`);
+      return { embedding: null, success: false, error: new Error("Zero vector detected") };
     }
     return { embedding, success: true };
   } catch (error) {
@@ -2005,9 +2098,7 @@ function createRateLimiter(requestsPerMinute, tokensPerMinute, rateLimitEnabled
   const tokenUsage = [];
   const intervalMs = 60 * 1e3;
   return async function rateLimiter(estimatedTokens = 1e3) {
-    if (!rateLimitEnabled) {
-      return;
-    }
+    if (!rateLimitEnabled) return;
     const now = Date.now();
     while (requestTimes.length > 0 && now - requestTimes[0] > intervalMs) {
       requestTimes.shift();
@@ -2021,23 +2112,15 @@ function createRateLimiter(requestsPerMinute, tokensPerMinute, rateLimitEnabled
     if (requestLimitExceeded || tokenLimitExceeded) {
       let timeToWait = 0;
       if (requestLimitExceeded) {
-        const oldestRequest = requestTimes[0];
-        timeToWait = Math.max(timeToWait, oldestRequest + intervalMs - now);
+        timeToWait = Math.max(timeToWait, requestTimes[0] + intervalMs - now);
       }
       if (tokenLimitExceeded && tokenUsage.length > 0) {
-        const oldestTokenUsage = tokenUsage[0];
-        timeToWait = Math.max(timeToWait, oldestTokenUsage.timestamp + intervalMs - now);
+        timeToWait = Math.max(timeToWait, tokenUsage[0].timestamp + intervalMs - now);
       }
       if (timeToWait > 0) {
         const reason = requestLimitExceeded ? "request" : "token";
         if (timeToWait > 5e3) {
-          logger4.info(
-            `[Document Processor] Rate limiting: waiting ${Math.round(timeToWait / 1e3)}s due to ${reason} limit`
-          );
-        } else {
-          logger4.debug(
-            `[Document Processor] Rate limiting: ${timeToWait}ms wait (${reason} limit)`
-          );
+          logger4.info(`[Rate Limiter] Waiting ${Math.round(timeToWait / 1e3)}s (${reason} limit)`);
         }
         await new Promise((resolve2) => setTimeout(resolve2, timeToWait));
       }