@elizaos/plugin-knowledge 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -257,12 +257,13 @@ function validateModelConfig(runtime) {
257
257
  EMBEDDING_DIMENSION: embeddingDimension,
258
258
  LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")),
259
259
  CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled,
260
- // Rate limiting settings - disable for fast uploads with APIs without limits
261
- // High defaults optimized for Vercel gateway / high-throughput APIs
260
+ // Rate limiting settings - optimized for batch embeddings
261
+ // With batch embeddings, we send 100 texts in ONE API call
262
+ // 935 chunks / 100 = ~10 API calls instead of 935!
262
263
  RATE_LIMIT_ENABLED: parseBooleanEnv(getSetting("RATE_LIMIT_ENABLED", "true")),
263
- MAX_CONCURRENT_REQUESTS: getSetting("MAX_CONCURRENT_REQUESTS", "150"),
264
- REQUESTS_PER_MINUTE: getSetting("REQUESTS_PER_MINUTE", "300"),
265
- TOKENS_PER_MINUTE: getSetting("TOKENS_PER_MINUTE", "750000"),
264
+ MAX_CONCURRENT_REQUESTS: getSetting("MAX_CONCURRENT_REQUESTS", "100"),
265
+ REQUESTS_PER_MINUTE: getSetting("REQUESTS_PER_MINUTE", "500"),
266
+ TOKENS_PER_MINUTE: getSetting("TOKENS_PER_MINUTE", "1000000"),
266
267
  BATCH_DELAY_MS: getSetting("BATCH_DELAY_MS", "100")
267
268
  });
268
269
  validateConfigRequirements(config, assumePluginOpenAI);
@@ -1716,67 +1717,165 @@ async function processAndSaveFragments({
1716
1717
  }
1717
1718
  return { savedCount, failedCount, failedChunks };
1718
1719
  }
1720
+ var EMBEDDING_BATCH_SIZE = 100;
1719
1721
  async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLimiter) {
1720
1722
  const validChunks = contextualizedChunks.filter((chunk) => chunk.success);
1721
1723
  const failedChunks = contextualizedChunks.filter((chunk) => !chunk.success);
1722
- if (validChunks.length === 0) {
1723
- return failedChunks.map((chunk) => ({
1724
+ const results = [];
1725
+ for (const chunk of failedChunks) {
1726
+ results.push({
1724
1727
  success: false,
1725
1728
  index: chunk.index,
1726
1729
  error: new Error("Chunk processing failed"),
1727
1730
  text: chunk.contextualizedText
1728
- }));
1731
+ });
1729
1732
  }
1730
- return await Promise.all(
1731
- contextualizedChunks.map(async (contextualizedChunk) => {
1732
- if (!contextualizedChunk.success) {
1733
- return {
1734
- success: false,
1735
- index: contextualizedChunk.index,
1736
- error: new Error("Chunk processing failed"),
1737
- text: contextualizedChunk.contextualizedText
1738
- };
1733
+ if (validChunks.length === 0) {
1734
+ return results;
1735
+ }
1736
+ const useBatchEmbeddings = shouldUseBatchEmbeddings(runtime);
1737
+ if (useBatchEmbeddings) {
1738
+ logger4.info(`[Document Processor] Using BATCH embeddings for ${validChunks.length} chunks`);
1739
+ return await generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results);
1740
+ } else {
1741
+ logger4.info(`[Document Processor] Using individual embeddings for ${validChunks.length} chunks`);
1742
+ return await generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results);
1743
+ }
1744
+ }
1745
+ function shouldUseBatchEmbeddings(runtime) {
1746
+ const setting = runtime.getSetting("BATCH_EMBEDDINGS") ?? process.env.BATCH_EMBEDDINGS;
1747
+ if (setting === "false" || setting === false) {
1748
+ return false;
1749
+ }
1750
+ return true;
1751
+ }
1752
+ async function generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results) {
1753
+ for (let batchStart = 0; batchStart < validChunks.length; batchStart += EMBEDDING_BATCH_SIZE) {
1754
+ const batchEnd = Math.min(batchStart + EMBEDDING_BATCH_SIZE, validChunks.length);
1755
+ const batch = validChunks.slice(batchStart, batchEnd);
1756
+ const batchTexts = batch.map((c) => c.contextualizedText);
1757
+ const totalTokens = batchTexts.reduce((sum, text) => sum + estimateTokens(text), 0);
1758
+ await rateLimiter(totalTokens);
1759
+ logger4.info(
1760
+ `[Document Processor] Batch ${Math.floor(batchStart / EMBEDDING_BATCH_SIZE) + 1}/${Math.ceil(validChunks.length / EMBEDDING_BATCH_SIZE)}: ${batch.length} texts, ~${totalTokens} tokens`
1761
+ );
1762
+ try {
1763
+ const embeddings = await generateBatchEmbeddingsViaRuntime(runtime, batchTexts);
1764
+ for (let i = 0; i < batch.length; i++) {
1765
+ const chunk = batch[i];
1766
+ const embedding = embeddings[i];
1767
+ if (embedding && embedding.length > 0 && embedding[0] !== 0) {
1768
+ results.push({
1769
+ embedding,
1770
+ success: true,
1771
+ index: chunk.index,
1772
+ text: chunk.contextualizedText
1773
+ });
1774
+ } else {
1775
+ results.push({
1776
+ success: false,
1777
+ index: chunk.index,
1778
+ error: new Error("Empty or invalid embedding returned"),
1779
+ text: chunk.contextualizedText
1780
+ });
1781
+ }
1739
1782
  }
1740
- const embeddingTokens = estimateTokens(contextualizedChunk.contextualizedText);
1741
- await rateLimiter(embeddingTokens);
1742
- try {
1743
- const generateEmbeddingOperation = async () => {
1744
- return await generateEmbeddingWithValidation(
1745
- runtime,
1746
- contextualizedChunk.contextualizedText
1747
- );
1748
- };
1749
- const { embedding, success, error } = await withRateLimitRetry(
1750
- generateEmbeddingOperation,
1751
- `embedding generation for chunk ${contextualizedChunk.index}`
1752
- );
1753
- if (!success) {
1754
- return {
1783
+ } catch (error) {
1784
+ logger4.error(`[Document Processor] Batch embedding error: ${error.message}`);
1785
+ for (const chunk of batch) {
1786
+ try {
1787
+ const result = await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
1788
+ if (result.success && result.embedding) {
1789
+ results.push({
1790
+ embedding: result.embedding,
1791
+ success: true,
1792
+ index: chunk.index,
1793
+ text: chunk.contextualizedText
1794
+ });
1795
+ } else {
1796
+ results.push({
1797
+ success: false,
1798
+ index: chunk.index,
1799
+ error: result.error || new Error("Embedding failed"),
1800
+ text: chunk.contextualizedText
1801
+ });
1802
+ }
1803
+ } catch (fallbackError) {
1804
+ results.push({
1755
1805
  success: false,
1756
- index: contextualizedChunk.index,
1757
- error,
1758
- text: contextualizedChunk.contextualizedText
1759
- };
1806
+ index: chunk.index,
1807
+ error: fallbackError,
1808
+ text: chunk.contextualizedText
1809
+ });
1760
1810
  }
1761
- return {
1762
- embedding,
1763
- success: true,
1764
- index: contextualizedChunk.index,
1765
- text: contextualizedChunk.contextualizedText
1766
- };
1767
- } catch (error) {
1768
- logger4.error(
1769
- `Error generating embedding for chunk ${contextualizedChunk.index}: ${error.message}`
1770
- );
1771
- return {
1811
+ }
1812
+ }
1813
+ }
1814
+ return results;
1815
+ }
1816
+ async function generateBatchEmbeddingsViaRuntime(runtime, texts) {
1817
+ const batchResult = await runtime.useModel(
1818
+ ModelType.TEXT_EMBEDDING,
1819
+ { texts }
1820
+ // Handler supports { texts: string[] } for batch mode
1821
+ );
1822
+ if (Array.isArray(batchResult) && Array.isArray(batchResult[0])) {
1823
+ return batchResult;
1824
+ }
1825
+ if (Array.isArray(batchResult) && typeof batchResult[0] === "number") {
1826
+ logger4.warn("[Document Processor] Runtime returned single embedding for batch request - falling back to individual calls");
1827
+ const embeddings = await Promise.all(
1828
+ texts.map(async (text) => {
1829
+ const result = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
1830
+ if (Array.isArray(result)) {
1831
+ return result;
1832
+ }
1833
+ return result?.embedding || [];
1834
+ })
1835
+ );
1836
+ return embeddings;
1837
+ }
1838
+ logger4.error("[Document Processor] Unexpected batch result format:", typeof batchResult);
1839
+ throw new Error("Unexpected batch embedding result format");
1840
+ }
1841
+ async function generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results) {
1842
+ for (const chunk of validChunks) {
1843
+ const embeddingTokens = estimateTokens(chunk.contextualizedText);
1844
+ await rateLimiter(embeddingTokens);
1845
+ try {
1846
+ const generateEmbeddingOperation = async () => {
1847
+ return await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
1848
+ };
1849
+ const { embedding, success, error } = await withRateLimitRetry(
1850
+ generateEmbeddingOperation,
1851
+ `embedding generation for chunk ${chunk.index}`
1852
+ );
1853
+ if (!success) {
1854
+ results.push({
1772
1855
  success: false,
1773
- index: contextualizedChunk.index,
1856
+ index: chunk.index,
1774
1857
  error,
1775
- text: contextualizedChunk.contextualizedText
1776
- };
1858
+ text: chunk.contextualizedText
1859
+ });
1860
+ } else {
1861
+ results.push({
1862
+ embedding,
1863
+ success: true,
1864
+ index: chunk.index,
1865
+ text: chunk.contextualizedText
1866
+ });
1777
1867
  }
1778
- })
1779
- );
1868
+ } catch (error) {
1869
+ logger4.error(`Error generating embedding for chunk ${chunk.index}: ${error.message}`);
1870
+ results.push({
1871
+ success: false,
1872
+ index: chunk.index,
1873
+ error,
1874
+ text: chunk.contextualizedText
1875
+ });
1876
+ }
1877
+ }
1878
+ return results;
1780
1879
  }
1781
1880
  async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle) {
1782
1881
  const ctxEnabled = getCtxKnowledgeEnabled(runtime);
@@ -1965,17 +2064,11 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
1965
2064
  }
1966
2065
  async function generateEmbeddingWithValidation(runtime, text) {
1967
2066
  try {
1968
- const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, {
1969
- text
1970
- });
2067
+ const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
1971
2068
  const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
1972
2069
  if (!embedding || embedding.length === 0) {
1973
- logger4.warn(`Zero vector detected. Embedding result: ${JSON.stringify(embedding)}`);
1974
- return {
1975
- embedding: null,
1976
- success: false,
1977
- error: new Error("Zero vector detected")
1978
- };
2070
+ logger4.warn(`Zero vector detected`);
2071
+ return { embedding: null, success: false, error: new Error("Zero vector detected") };
1979
2072
  }
1980
2073
  return { embedding, success: true };
1981
2074
  } catch (error) {
@@ -2005,9 +2098,7 @@ function createRateLimiter(requestsPerMinute, tokensPerMinute, rateLimitEnabled
2005
2098
  const tokenUsage = [];
2006
2099
  const intervalMs = 60 * 1e3;
2007
2100
  return async function rateLimiter(estimatedTokens = 1e3) {
2008
- if (!rateLimitEnabled) {
2009
- return;
2010
- }
2101
+ if (!rateLimitEnabled) return;
2011
2102
  const now = Date.now();
2012
2103
  while (requestTimes.length > 0 && now - requestTimes[0] > intervalMs) {
2013
2104
  requestTimes.shift();
@@ -2021,23 +2112,15 @@ function createRateLimiter(requestsPerMinute, tokensPerMinute, rateLimitEnabled
2021
2112
  if (requestLimitExceeded || tokenLimitExceeded) {
2022
2113
  let timeToWait = 0;
2023
2114
  if (requestLimitExceeded) {
2024
- const oldestRequest = requestTimes[0];
2025
- timeToWait = Math.max(timeToWait, oldestRequest + intervalMs - now);
2115
+ timeToWait = Math.max(timeToWait, requestTimes[0] + intervalMs - now);
2026
2116
  }
2027
2117
  if (tokenLimitExceeded && tokenUsage.length > 0) {
2028
- const oldestTokenUsage = tokenUsage[0];
2029
- timeToWait = Math.max(timeToWait, oldestTokenUsage.timestamp + intervalMs - now);
2118
+ timeToWait = Math.max(timeToWait, tokenUsage[0].timestamp + intervalMs - now);
2030
2119
  }
2031
2120
  if (timeToWait > 0) {
2032
2121
  const reason = requestLimitExceeded ? "request" : "token";
2033
2122
  if (timeToWait > 5e3) {
2034
- logger4.info(
2035
- `[Document Processor] Rate limiting: waiting ${Math.round(timeToWait / 1e3)}s due to ${reason} limit`
2036
- );
2037
- } else {
2038
- logger4.debug(
2039
- `[Document Processor] Rate limiting: ${timeToWait}ms wait (${reason} limit)`
2040
- );
2123
+ logger4.info(`[Rate Limiter] Waiting ${Math.round(timeToWait / 1e3)}s (${reason} limit)`);
2041
2124
  }
2042
2125
  await new Promise((resolve2) => setTimeout(resolve2, timeToWait));
2043
2126
  }