@elizaos/plugin-knowledge 1.6.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +158 -75
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -257,12 +257,13 @@ function validateModelConfig(runtime) {
|
|
|
257
257
|
EMBEDDING_DIMENSION: embeddingDimension,
|
|
258
258
|
LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")),
|
|
259
259
|
CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled,
|
|
260
|
-
// Rate limiting settings -
|
|
261
|
-
//
|
|
260
|
+
// Rate limiting settings - optimized for batch embeddings
|
|
261
|
+
// With batch embeddings, we send 100 texts in ONE API call
|
|
262
|
+
// 935 chunks / 100 = ~10 API calls instead of 935!
|
|
262
263
|
RATE_LIMIT_ENABLED: parseBooleanEnv(getSetting("RATE_LIMIT_ENABLED", "true")),
|
|
263
|
-
MAX_CONCURRENT_REQUESTS: getSetting("MAX_CONCURRENT_REQUESTS", "
|
|
264
|
-
REQUESTS_PER_MINUTE: getSetting("REQUESTS_PER_MINUTE", "
|
|
265
|
-
TOKENS_PER_MINUTE: getSetting("TOKENS_PER_MINUTE", "
|
|
264
|
+
MAX_CONCURRENT_REQUESTS: getSetting("MAX_CONCURRENT_REQUESTS", "100"),
|
|
265
|
+
REQUESTS_PER_MINUTE: getSetting("REQUESTS_PER_MINUTE", "500"),
|
|
266
|
+
TOKENS_PER_MINUTE: getSetting("TOKENS_PER_MINUTE", "1000000"),
|
|
266
267
|
BATCH_DELAY_MS: getSetting("BATCH_DELAY_MS", "100")
|
|
267
268
|
});
|
|
268
269
|
validateConfigRequirements(config, assumePluginOpenAI);
|
|
@@ -1716,67 +1717,165 @@ async function processAndSaveFragments({
|
|
|
1716
1717
|
}
|
|
1717
1718
|
return { savedCount, failedCount, failedChunks };
|
|
1718
1719
|
}
|
|
1720
|
+
var EMBEDDING_BATCH_SIZE = 100;
|
|
1719
1721
|
async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLimiter) {
|
|
1720
1722
|
const validChunks = contextualizedChunks.filter((chunk) => chunk.success);
|
|
1721
1723
|
const failedChunks = contextualizedChunks.filter((chunk) => !chunk.success);
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
+
const results = [];
|
|
1725
|
+
for (const chunk of failedChunks) {
|
|
1726
|
+
results.push({
|
|
1724
1727
|
success: false,
|
|
1725
1728
|
index: chunk.index,
|
|
1726
1729
|
error: new Error("Chunk processing failed"),
|
|
1727
1730
|
text: chunk.contextualizedText
|
|
1728
|
-
})
|
|
1731
|
+
});
|
|
1729
1732
|
}
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1733
|
+
if (validChunks.length === 0) {
|
|
1734
|
+
return results;
|
|
1735
|
+
}
|
|
1736
|
+
const useBatchEmbeddings = shouldUseBatchEmbeddings(runtime);
|
|
1737
|
+
if (useBatchEmbeddings) {
|
|
1738
|
+
logger4.info(`[Document Processor] Using BATCH embeddings for ${validChunks.length} chunks`);
|
|
1739
|
+
return await generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results);
|
|
1740
|
+
} else {
|
|
1741
|
+
logger4.info(`[Document Processor] Using individual embeddings for ${validChunks.length} chunks`);
|
|
1742
|
+
return await generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results);
|
|
1743
|
+
}
|
|
1744
|
+
}
|
|
1745
|
+
function shouldUseBatchEmbeddings(runtime) {
|
|
1746
|
+
const setting = runtime.getSetting("BATCH_EMBEDDINGS") ?? process.env.BATCH_EMBEDDINGS;
|
|
1747
|
+
if (setting === "false" || setting === false) {
|
|
1748
|
+
return false;
|
|
1749
|
+
}
|
|
1750
|
+
return true;
|
|
1751
|
+
}
|
|
1752
|
+
async function generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results) {
|
|
1753
|
+
for (let batchStart = 0; batchStart < validChunks.length; batchStart += EMBEDDING_BATCH_SIZE) {
|
|
1754
|
+
const batchEnd = Math.min(batchStart + EMBEDDING_BATCH_SIZE, validChunks.length);
|
|
1755
|
+
const batch = validChunks.slice(batchStart, batchEnd);
|
|
1756
|
+
const batchTexts = batch.map((c) => c.contextualizedText);
|
|
1757
|
+
const totalTokens = batchTexts.reduce((sum, text) => sum + estimateTokens(text), 0);
|
|
1758
|
+
await rateLimiter(totalTokens);
|
|
1759
|
+
logger4.info(
|
|
1760
|
+
`[Document Processor] Batch ${Math.floor(batchStart / EMBEDDING_BATCH_SIZE) + 1}/${Math.ceil(validChunks.length / EMBEDDING_BATCH_SIZE)}: ${batch.length} texts, ~${totalTokens} tokens`
|
|
1761
|
+
);
|
|
1762
|
+
try {
|
|
1763
|
+
const embeddings = await generateBatchEmbeddingsViaRuntime(runtime, batchTexts);
|
|
1764
|
+
for (let i = 0; i < batch.length; i++) {
|
|
1765
|
+
const chunk = batch[i];
|
|
1766
|
+
const embedding = embeddings[i];
|
|
1767
|
+
if (embedding && embedding.length > 0 && embedding[0] !== 0) {
|
|
1768
|
+
results.push({
|
|
1769
|
+
embedding,
|
|
1770
|
+
success: true,
|
|
1771
|
+
index: chunk.index,
|
|
1772
|
+
text: chunk.contextualizedText
|
|
1773
|
+
});
|
|
1774
|
+
} else {
|
|
1775
|
+
results.push({
|
|
1776
|
+
success: false,
|
|
1777
|
+
index: chunk.index,
|
|
1778
|
+
error: new Error("Empty or invalid embedding returned"),
|
|
1779
|
+
text: chunk.contextualizedText
|
|
1780
|
+
});
|
|
1781
|
+
}
|
|
1739
1782
|
}
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1783
|
+
} catch (error) {
|
|
1784
|
+
logger4.error(`[Document Processor] Batch embedding error: ${error.message}`);
|
|
1785
|
+
for (const chunk of batch) {
|
|
1786
|
+
try {
|
|
1787
|
+
const result = await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
|
|
1788
|
+
if (result.success && result.embedding) {
|
|
1789
|
+
results.push({
|
|
1790
|
+
embedding: result.embedding,
|
|
1791
|
+
success: true,
|
|
1792
|
+
index: chunk.index,
|
|
1793
|
+
text: chunk.contextualizedText
|
|
1794
|
+
});
|
|
1795
|
+
} else {
|
|
1796
|
+
results.push({
|
|
1797
|
+
success: false,
|
|
1798
|
+
index: chunk.index,
|
|
1799
|
+
error: result.error || new Error("Embedding failed"),
|
|
1800
|
+
text: chunk.contextualizedText
|
|
1801
|
+
});
|
|
1802
|
+
}
|
|
1803
|
+
} catch (fallbackError) {
|
|
1804
|
+
results.push({
|
|
1755
1805
|
success: false,
|
|
1756
|
-
index:
|
|
1757
|
-
error,
|
|
1758
|
-
text:
|
|
1759
|
-
};
|
|
1806
|
+
index: chunk.index,
|
|
1807
|
+
error: fallbackError,
|
|
1808
|
+
text: chunk.contextualizedText
|
|
1809
|
+
});
|
|
1760
1810
|
}
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1811
|
+
}
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
return results;
|
|
1815
|
+
}
|
|
1816
|
+
async function generateBatchEmbeddingsViaRuntime(runtime, texts) {
|
|
1817
|
+
const batchResult = await runtime.useModel(
|
|
1818
|
+
ModelType.TEXT_EMBEDDING,
|
|
1819
|
+
{ texts }
|
|
1820
|
+
// Handler supports { texts: string[] } for batch mode
|
|
1821
|
+
);
|
|
1822
|
+
if (Array.isArray(batchResult) && Array.isArray(batchResult[0])) {
|
|
1823
|
+
return batchResult;
|
|
1824
|
+
}
|
|
1825
|
+
if (Array.isArray(batchResult) && typeof batchResult[0] === "number") {
|
|
1826
|
+
logger4.warn("[Document Processor] Runtime returned single embedding for batch request - falling back to individual calls");
|
|
1827
|
+
const embeddings = await Promise.all(
|
|
1828
|
+
texts.map(async (text) => {
|
|
1829
|
+
const result = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
|
|
1830
|
+
if (Array.isArray(result)) {
|
|
1831
|
+
return result;
|
|
1832
|
+
}
|
|
1833
|
+
return result?.embedding || [];
|
|
1834
|
+
})
|
|
1835
|
+
);
|
|
1836
|
+
return embeddings;
|
|
1837
|
+
}
|
|
1838
|
+
logger4.error("[Document Processor] Unexpected batch result format:", typeof batchResult);
|
|
1839
|
+
throw new Error("Unexpected batch embedding result format");
|
|
1840
|
+
}
|
|
1841
|
+
async function generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results) {
|
|
1842
|
+
for (const chunk of validChunks) {
|
|
1843
|
+
const embeddingTokens = estimateTokens(chunk.contextualizedText);
|
|
1844
|
+
await rateLimiter(embeddingTokens);
|
|
1845
|
+
try {
|
|
1846
|
+
const generateEmbeddingOperation = async () => {
|
|
1847
|
+
return await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
|
|
1848
|
+
};
|
|
1849
|
+
const { embedding, success, error } = await withRateLimitRetry(
|
|
1850
|
+
generateEmbeddingOperation,
|
|
1851
|
+
`embedding generation for chunk ${chunk.index}`
|
|
1852
|
+
);
|
|
1853
|
+
if (!success) {
|
|
1854
|
+
results.push({
|
|
1772
1855
|
success: false,
|
|
1773
|
-
index:
|
|
1856
|
+
index: chunk.index,
|
|
1774
1857
|
error,
|
|
1775
|
-
text:
|
|
1776
|
-
};
|
|
1858
|
+
text: chunk.contextualizedText
|
|
1859
|
+
});
|
|
1860
|
+
} else {
|
|
1861
|
+
results.push({
|
|
1862
|
+
embedding,
|
|
1863
|
+
success: true,
|
|
1864
|
+
index: chunk.index,
|
|
1865
|
+
text: chunk.contextualizedText
|
|
1866
|
+
});
|
|
1777
1867
|
}
|
|
1778
|
-
})
|
|
1779
|
-
|
|
1868
|
+
} catch (error) {
|
|
1869
|
+
logger4.error(`Error generating embedding for chunk ${chunk.index}: ${error.message}`);
|
|
1870
|
+
results.push({
|
|
1871
|
+
success: false,
|
|
1872
|
+
index: chunk.index,
|
|
1873
|
+
error,
|
|
1874
|
+
text: chunk.contextualizedText
|
|
1875
|
+
});
|
|
1876
|
+
}
|
|
1877
|
+
}
|
|
1878
|
+
return results;
|
|
1780
1879
|
}
|
|
1781
1880
|
async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle) {
|
|
1782
1881
|
const ctxEnabled = getCtxKnowledgeEnabled(runtime);
|
|
@@ -1965,17 +2064,11 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
|
|
|
1965
2064
|
}
|
|
1966
2065
|
async function generateEmbeddingWithValidation(runtime, text) {
|
|
1967
2066
|
try {
|
|
1968
|
-
const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, {
|
|
1969
|
-
text
|
|
1970
|
-
});
|
|
2067
|
+
const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
|
|
1971
2068
|
const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
|
|
1972
2069
|
if (!embedding || embedding.length === 0) {
|
|
1973
|
-
logger4.warn(`Zero vector detected
|
|
1974
|
-
return {
|
|
1975
|
-
embedding: null,
|
|
1976
|
-
success: false,
|
|
1977
|
-
error: new Error("Zero vector detected")
|
|
1978
|
-
};
|
|
2070
|
+
logger4.warn(`Zero vector detected`);
|
|
2071
|
+
return { embedding: null, success: false, error: new Error("Zero vector detected") };
|
|
1979
2072
|
}
|
|
1980
2073
|
return { embedding, success: true };
|
|
1981
2074
|
} catch (error) {
|
|
@@ -2005,9 +2098,7 @@ function createRateLimiter(requestsPerMinute, tokensPerMinute, rateLimitEnabled
|
|
|
2005
2098
|
const tokenUsage = [];
|
|
2006
2099
|
const intervalMs = 60 * 1e3;
|
|
2007
2100
|
return async function rateLimiter(estimatedTokens = 1e3) {
|
|
2008
|
-
if (!rateLimitEnabled)
|
|
2009
|
-
return;
|
|
2010
|
-
}
|
|
2101
|
+
if (!rateLimitEnabled) return;
|
|
2011
2102
|
const now = Date.now();
|
|
2012
2103
|
while (requestTimes.length > 0 && now - requestTimes[0] > intervalMs) {
|
|
2013
2104
|
requestTimes.shift();
|
|
@@ -2021,23 +2112,15 @@ function createRateLimiter(requestsPerMinute, tokensPerMinute, rateLimitEnabled
|
|
|
2021
2112
|
if (requestLimitExceeded || tokenLimitExceeded) {
|
|
2022
2113
|
let timeToWait = 0;
|
|
2023
2114
|
if (requestLimitExceeded) {
|
|
2024
|
-
|
|
2025
|
-
timeToWait = Math.max(timeToWait, oldestRequest + intervalMs - now);
|
|
2115
|
+
timeToWait = Math.max(timeToWait, requestTimes[0] + intervalMs - now);
|
|
2026
2116
|
}
|
|
2027
2117
|
if (tokenLimitExceeded && tokenUsage.length > 0) {
|
|
2028
|
-
|
|
2029
|
-
timeToWait = Math.max(timeToWait, oldestTokenUsage.timestamp + intervalMs - now);
|
|
2118
|
+
timeToWait = Math.max(timeToWait, tokenUsage[0].timestamp + intervalMs - now);
|
|
2030
2119
|
}
|
|
2031
2120
|
if (timeToWait > 0) {
|
|
2032
2121
|
const reason = requestLimitExceeded ? "request" : "token";
|
|
2033
2122
|
if (timeToWait > 5e3) {
|
|
2034
|
-
logger4.info(
|
|
2035
|
-
`[Document Processor] Rate limiting: waiting ${Math.round(timeToWait / 1e3)}s due to ${reason} limit`
|
|
2036
|
-
);
|
|
2037
|
-
} else {
|
|
2038
|
-
logger4.debug(
|
|
2039
|
-
`[Document Processor] Rate limiting: ${timeToWait}ms wait (${reason} limit)`
|
|
2040
|
-
);
|
|
2123
|
+
logger4.info(`[Rate Limiter] Waiting ${Math.round(timeToWait / 1e3)}s (${reason} limit)`);
|
|
2041
2124
|
}
|
|
2042
2125
|
await new Promise((resolve2) => setTimeout(resolve2, timeToWait));
|
|
2043
2126
|
}
|