@elizaos/plugin-knowledge 1.5.15 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -19,6 +19,11 @@ declare const ModelConfigSchema: z.ZodObject<{
19
19
  EMBEDDING_DIMENSION: z.ZodEffects<z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>, number, string | number | undefined>;
20
20
  LOAD_DOCS_ON_STARTUP: z.ZodDefault<z.ZodBoolean>;
21
21
  CTX_KNOWLEDGE_ENABLED: z.ZodDefault<z.ZodBoolean>;
22
+ RATE_LIMIT_ENABLED: z.ZodDefault<z.ZodBoolean>;
23
+ MAX_CONCURRENT_REQUESTS: z.ZodEffects<z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>, number, string | number | undefined>;
24
+ REQUESTS_PER_MINUTE: z.ZodEffects<z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>, number, string | number | undefined>;
25
+ TOKENS_PER_MINUTE: z.ZodEffects<z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>, number, string | number | undefined>;
26
+ BATCH_DELAY_MS: z.ZodEffects<z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>, number, string | number | undefined>;
22
27
  }, "strip", z.ZodTypeAny, {
23
28
  TEXT_EMBEDDING_MODEL: string;
24
29
  MAX_INPUT_TOKENS: number;
@@ -26,6 +31,11 @@ declare const ModelConfigSchema: z.ZodObject<{
26
31
  EMBEDDING_DIMENSION: number;
27
32
  LOAD_DOCS_ON_STARTUP: boolean;
28
33
  CTX_KNOWLEDGE_ENABLED: boolean;
34
+ RATE_LIMIT_ENABLED: boolean;
35
+ MAX_CONCURRENT_REQUESTS: number;
36
+ REQUESTS_PER_MINUTE: number;
37
+ TOKENS_PER_MINUTE: number;
38
+ BATCH_DELAY_MS: number;
29
39
  EMBEDDING_PROVIDER?: "openai" | "google" | undefined;
30
40
  TEXT_PROVIDER?: "openai" | "google" | "anthropic" | "openrouter" | undefined;
31
41
  OPENAI_API_KEY?: string | undefined;
@@ -55,6 +65,11 @@ declare const ModelConfigSchema: z.ZodObject<{
55
65
  EMBEDDING_DIMENSION?: string | number | undefined;
56
66
  LOAD_DOCS_ON_STARTUP?: boolean | undefined;
57
67
  CTX_KNOWLEDGE_ENABLED?: boolean | undefined;
68
+ RATE_LIMIT_ENABLED?: boolean | undefined;
69
+ MAX_CONCURRENT_REQUESTS?: string | number | undefined;
70
+ REQUESTS_PER_MINUTE?: string | number | undefined;
71
+ TOKENS_PER_MINUTE?: string | number | undefined;
72
+ BATCH_DELAY_MS?: string | number | undefined;
58
73
  }>;
59
74
  type ModelConfig = z.infer<typeof ModelConfigSchema>;
60
75
  /**
@@ -65,6 +80,8 @@ interface ProviderRateLimits {
65
80
  requestsPerMinute: number;
66
81
  tokensPerMinute?: number;
67
82
  provider: string;
83
+ rateLimitEnabled: boolean;
84
+ batchDelayMs: number;
68
85
  }
69
86
  /**
70
87
  * Options for text generation overrides
@@ -149,6 +166,11 @@ interface KnowledgeConfig {
149
166
  EMBEDDING_PROVIDER?: string;
150
167
  TEXT_PROVIDER?: string;
151
168
  TEXT_EMBEDDING_MODEL?: string;
169
+ RATE_LIMIT_ENABLED?: boolean;
170
+ MAX_CONCURRENT_REQUESTS?: number;
171
+ REQUESTS_PER_MINUTE?: number;
172
+ TOKENS_PER_MINUTE?: number;
173
+ BATCH_DELAY_MS?: number;
152
174
  }
153
175
  interface LoadResult {
154
176
  successful: number;
package/dist/index.html CHANGED
@@ -5,7 +5,7 @@
5
5
  <link rel="icon" type="image/svg+xml" href="/vite.svg" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>Agent Plugin View</title>
8
- <script type="module" crossorigin src="./assets/index-DRqE0iU1.js"></script>
8
+ <script type="module" crossorigin src="./assets/index-DcxhjPNa.js"></script>
9
9
  <link rel="stylesheet" crossorigin href="./assets/index-CFqKS0Ch.css">
10
10
  </head>
11
11
  <body>
package/dist/index.js CHANGED
@@ -181,7 +181,20 @@ var ModelConfigSchema = z.object({
181
181
  // config setting
182
182
  LOAD_DOCS_ON_STARTUP: z.boolean().default(false),
183
183
  // Contextual Knowledge settings
184
- CTX_KNOWLEDGE_ENABLED: z.boolean().default(false)
184
+ CTX_KNOWLEDGE_ENABLED: z.boolean().default(false),
185
+ // Rate limiting settings
186
+ // Set RATE_LIMIT_ENABLED=false to disable all rate limiting for fast uploads
187
+ // Useful when using APIs without rate limits (e.g., self-hosted models)
188
+ // High defaults optimized for Vercel gateway / high-throughput APIs
189
+ RATE_LIMIT_ENABLED: z.boolean().default(true),
190
+ // Maximum concurrent requests (default: 150, set higher for faster processing)
191
+ MAX_CONCURRENT_REQUESTS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 150),
192
+ // Requests per minute limit (default: 300)
193
+ REQUESTS_PER_MINUTE: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 300),
194
+ // Tokens per minute limit (default: 750000)
195
+ TOKENS_PER_MINUTE: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 75e4),
196
+ // Delay between batches in milliseconds (default: 100, set to 0 for no delay)
197
+ BATCH_DELAY_MS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 100)
185
198
  });
186
199
  var KnowledgeServiceType = {
187
200
  KNOWLEDGE: "knowledge"
@@ -243,7 +256,15 @@ function validateModelConfig(runtime) {
243
256
  MAX_OUTPUT_TOKENS: getSetting("MAX_OUTPUT_TOKENS", "4096"),
244
257
  EMBEDDING_DIMENSION: embeddingDimension,
245
258
  LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")),
246
- CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled
259
+ CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled,
260
+ // Rate limiting settings - optimized for batch embeddings
261
+ // With batch embeddings, we send 100 texts in ONE API call
262
+ // 935 chunks / 100 = ~10 API calls instead of 935!
263
+ RATE_LIMIT_ENABLED: parseBooleanEnv(getSetting("RATE_LIMIT_ENABLED", "true")),
264
+ MAX_CONCURRENT_REQUESTS: getSetting("MAX_CONCURRENT_REQUESTS", "100"),
265
+ REQUESTS_PER_MINUTE: getSetting("REQUESTS_PER_MINUTE", "500"),
266
+ TOKENS_PER_MINUTE: getSetting("TOKENS_PER_MINUTE", "1000000"),
267
+ BATCH_DELAY_MS: getSetting("BATCH_DELAY_MS", "100")
247
268
  });
248
269
  validateConfigRequirements(config, assumePluginOpenAI);
249
270
  return config;
@@ -309,49 +330,36 @@ function validateConfigRequirements(config, assumePluginOpenAI) {
309
330
  }
310
331
  async function getProviderRateLimits(runtime) {
311
332
  const config = validateModelConfig(runtime);
312
- const getSetting = (key, defaultValue) => {
313
- if (runtime) {
314
- return runtime.getSetting(key) || defaultValue;
315
- }
316
- return process.env[key] || defaultValue;
317
- };
318
- const maxConcurrentRequests = parseInt(getSetting("MAX_CONCURRENT_REQUESTS", "30"), 10);
319
- const requestsPerMinute = parseInt(getSetting("REQUESTS_PER_MINUTE", "60"), 10);
320
- const tokensPerMinute = parseInt(getSetting("TOKENS_PER_MINUTE", "150000"), 10);
333
+ const rateLimitEnabled = config.RATE_LIMIT_ENABLED;
334
+ const maxConcurrentRequests = config.MAX_CONCURRENT_REQUESTS;
335
+ const requestsPerMinute = config.REQUESTS_PER_MINUTE;
336
+ const tokensPerMinute = config.TOKENS_PER_MINUTE;
337
+ const batchDelayMs = config.BATCH_DELAY_MS;
321
338
  const primaryProvider = config.TEXT_PROVIDER || config.EMBEDDING_PROVIDER;
339
+ if (!rateLimitEnabled) {
340
+ logger.info(
341
+ `[Document Processor] Rate limiting DISABLED - unlimited throughput mode (concurrent: ${maxConcurrentRequests}, batch delay: ${batchDelayMs}ms)`
342
+ );
343
+ return {
344
+ maxConcurrentRequests,
345
+ requestsPerMinute: Number.MAX_SAFE_INTEGER,
346
+ tokensPerMinute: Number.MAX_SAFE_INTEGER,
347
+ provider: primaryProvider || "unlimited",
348
+ rateLimitEnabled: false,
349
+ batchDelayMs
350
+ };
351
+ }
322
352
  logger.debug(
323
- `[Document Processor] Rate limiting for ${primaryProvider}: ${requestsPerMinute} RPM, ${tokensPerMinute} TPM, ${maxConcurrentRequests} concurrent`
353
+ `[Document Processor] Rate limiting for ${primaryProvider}: ${requestsPerMinute} RPM, ${tokensPerMinute} TPM, ${maxConcurrentRequests} concurrent, ${batchDelayMs}ms batch delay`
324
354
  );
325
- switch (primaryProvider) {
326
- case "anthropic":
327
- return {
328
- maxConcurrentRequests,
329
- requestsPerMinute,
330
- tokensPerMinute,
331
- provider: "anthropic"
332
- };
333
- case "openai":
334
- return {
335
- maxConcurrentRequests,
336
- requestsPerMinute: Math.min(requestsPerMinute, 3e3),
337
- tokensPerMinute: Math.min(tokensPerMinute, 15e4),
338
- provider: "openai"
339
- };
340
- case "google":
341
- return {
342
- maxConcurrentRequests,
343
- requestsPerMinute: Math.min(requestsPerMinute, 60),
344
- tokensPerMinute: Math.min(tokensPerMinute, 1e5),
345
- provider: "google"
346
- };
347
- default:
348
- return {
349
- maxConcurrentRequests,
350
- requestsPerMinute,
351
- tokensPerMinute,
352
- provider: primaryProvider || "unknown"
353
- };
354
- }
355
+ return {
356
+ maxConcurrentRequests,
357
+ requestsPerMinute,
358
+ tokensPerMinute,
359
+ provider: primaryProvider || "unknown",
360
+ rateLimitEnabled: true,
361
+ batchDelayMs
362
+ };
355
363
  }
356
364
 
357
365
  // src/ctx-embeddings.ts
@@ -1433,8 +1441,9 @@ function getCtxKnowledgeEnabled(runtime) {
1433
1441
  let source;
1434
1442
  let rawValue;
1435
1443
  if (runtime) {
1436
- rawValue = runtime.getSetting("CTX_KNOWLEDGE_ENABLED");
1437
- const cleanValue = rawValue?.toString().trim().toLowerCase();
1444
+ const settingValue = runtime.getSetting("CTX_KNOWLEDGE_ENABLED");
1445
+ rawValue = typeof settingValue === "string" ? settingValue : settingValue?.toString();
1446
+ const cleanValue = rawValue?.trim().toLowerCase();
1438
1447
  result = cleanValue === "true";
1439
1448
  source = "runtime.getSetting()";
1440
1449
  } else {
@@ -1490,15 +1499,22 @@ async function processFragmentsSynchronously({
1490
1499
  }
1491
1500
  const docName = documentTitle || documentId.substring(0, 8);
1492
1501
  logger4.info(`[Document Processor] "${docName}": Split into ${chunks.length} chunks`);
1493
- const providerLimits = await getProviderRateLimits();
1494
- const CONCURRENCY_LIMIT = Math.min(30, providerLimits.maxConcurrentRequests || 30);
1502
+ const providerLimits = await getProviderRateLimits(runtime);
1503
+ const CONCURRENCY_LIMIT = providerLimits.maxConcurrentRequests || 30;
1495
1504
  const rateLimiter = createRateLimiter(
1496
1505
  providerLimits.requestsPerMinute || 60,
1497
- providerLimits.tokensPerMinute
1498
- );
1499
- logger4.debug(
1500
- `[Document Processor] Rate limits: ${providerLimits.requestsPerMinute} RPM, ${providerLimits.tokensPerMinute} TPM (${providerLimits.provider}, concurrency: ${CONCURRENCY_LIMIT})`
1506
+ providerLimits.tokensPerMinute,
1507
+ providerLimits.rateLimitEnabled
1501
1508
  );
1509
+ if (!providerLimits.rateLimitEnabled) {
1510
+ logger4.info(
1511
+ `[Document Processor] UNLIMITED MODE: concurrency ${CONCURRENCY_LIMIT}, batch delay ${providerLimits.batchDelayMs}ms`
1512
+ );
1513
+ } else {
1514
+ logger4.debug(
1515
+ `[Document Processor] Rate limits: ${providerLimits.requestsPerMinute} RPM, ${providerLimits.tokensPerMinute} TPM (${providerLimits.provider}, concurrency: ${CONCURRENCY_LIMIT})`
1516
+ );
1517
+ }
1502
1518
  const { savedCount, failedCount } = await processAndSaveFragments({
1503
1519
  runtime,
1504
1520
  documentId,
@@ -1511,7 +1527,8 @@ async function processFragmentsSynchronously({
1511
1527
  worldId: worldId || agentId,
1512
1528
  concurrencyLimit: CONCURRENCY_LIMIT,
1513
1529
  rateLimiter,
1514
- documentTitle
1530
+ documentTitle,
1531
+ batchDelayMs: providerLimits.batchDelayMs
1515
1532
  });
1516
1533
  const successRate = (savedCount / chunks.length * 100).toFixed(1);
1517
1534
  if (failedCount > 0) {
@@ -1617,7 +1634,8 @@ async function processAndSaveFragments({
1617
1634
  worldId,
1618
1635
  concurrencyLimit,
1619
1636
  rateLimiter,
1620
- documentTitle
1637
+ documentTitle,
1638
+ batchDelayMs = 500
1621
1639
  }) {
1622
1640
  let savedCount = 0;
1623
1641
  let failedCount = 0;
@@ -1693,73 +1711,171 @@ async function processAndSaveFragments({
1693
1711
  failedChunks.push(originalChunkIndex);
1694
1712
  }
1695
1713
  }
1696
- if (i + concurrencyLimit < chunks.length) {
1697
- await new Promise((resolve2) => setTimeout(resolve2, 500));
1714
+ if (i + concurrencyLimit < chunks.length && batchDelayMs > 0) {
1715
+ await new Promise((resolve2) => setTimeout(resolve2, batchDelayMs));
1698
1716
  }
1699
1717
  }
1700
1718
  return { savedCount, failedCount, failedChunks };
1701
1719
  }
1720
+ var EMBEDDING_BATCH_SIZE = 100;
1702
1721
  async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLimiter) {
1703
1722
  const validChunks = contextualizedChunks.filter((chunk) => chunk.success);
1704
1723
  const failedChunks = contextualizedChunks.filter((chunk) => !chunk.success);
1705
- if (validChunks.length === 0) {
1706
- return failedChunks.map((chunk) => ({
1724
+ const results = [];
1725
+ for (const chunk of failedChunks) {
1726
+ results.push({
1707
1727
  success: false,
1708
1728
  index: chunk.index,
1709
1729
  error: new Error("Chunk processing failed"),
1710
1730
  text: chunk.contextualizedText
1711
- }));
1731
+ });
1712
1732
  }
1713
- return await Promise.all(
1714
- contextualizedChunks.map(async (contextualizedChunk) => {
1715
- if (!contextualizedChunk.success) {
1716
- return {
1717
- success: false,
1718
- index: contextualizedChunk.index,
1719
- error: new Error("Chunk processing failed"),
1720
- text: contextualizedChunk.contextualizedText
1721
- };
1733
+ if (validChunks.length === 0) {
1734
+ return results;
1735
+ }
1736
+ const useBatchEmbeddings = shouldUseBatchEmbeddings(runtime);
1737
+ if (useBatchEmbeddings) {
1738
+ logger4.info(`[Document Processor] Using BATCH embeddings for ${validChunks.length} chunks`);
1739
+ return await generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results);
1740
+ } else {
1741
+ logger4.info(`[Document Processor] Using individual embeddings for ${validChunks.length} chunks`);
1742
+ return await generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results);
1743
+ }
1744
+ }
1745
+ function shouldUseBatchEmbeddings(runtime) {
1746
+ const setting = runtime.getSetting("BATCH_EMBEDDINGS") ?? process.env.BATCH_EMBEDDINGS;
1747
+ if (setting === "false" || setting === false) {
1748
+ return false;
1749
+ }
1750
+ return true;
1751
+ }
1752
+ async function generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results) {
1753
+ for (let batchStart = 0; batchStart < validChunks.length; batchStart += EMBEDDING_BATCH_SIZE) {
1754
+ const batchEnd = Math.min(batchStart + EMBEDDING_BATCH_SIZE, validChunks.length);
1755
+ const batch = validChunks.slice(batchStart, batchEnd);
1756
+ const batchTexts = batch.map((c) => c.contextualizedText);
1757
+ const totalTokens = batchTexts.reduce((sum, text) => sum + estimateTokens(text), 0);
1758
+ await rateLimiter(totalTokens);
1759
+ logger4.info(
1760
+ `[Document Processor] Batch ${Math.floor(batchStart / EMBEDDING_BATCH_SIZE) + 1}/${Math.ceil(validChunks.length / EMBEDDING_BATCH_SIZE)}: ${batch.length} texts, ~${totalTokens} tokens`
1761
+ );
1762
+ try {
1763
+ const embeddings = await generateBatchEmbeddingsViaRuntime(runtime, batchTexts);
1764
+ for (let i = 0; i < batch.length; i++) {
1765
+ const chunk = batch[i];
1766
+ const embedding = embeddings[i];
1767
+ if (embedding && embedding.length > 0 && embedding[0] !== 0) {
1768
+ results.push({
1769
+ embedding,
1770
+ success: true,
1771
+ index: chunk.index,
1772
+ text: chunk.contextualizedText
1773
+ });
1774
+ } else {
1775
+ results.push({
1776
+ success: false,
1777
+ index: chunk.index,
1778
+ error: new Error("Empty or invalid embedding returned"),
1779
+ text: chunk.contextualizedText
1780
+ });
1781
+ }
1722
1782
  }
1723
- const embeddingTokens = estimateTokens(contextualizedChunk.contextualizedText);
1724
- await rateLimiter(embeddingTokens);
1725
- try {
1726
- const generateEmbeddingOperation = async () => {
1727
- return await generateEmbeddingWithValidation(
1728
- runtime,
1729
- contextualizedChunk.contextualizedText
1730
- );
1731
- };
1732
- const { embedding, success, error } = await withRateLimitRetry(
1733
- generateEmbeddingOperation,
1734
- `embedding generation for chunk ${contextualizedChunk.index}`
1735
- );
1736
- if (!success) {
1737
- return {
1783
+ } catch (error) {
1784
+ logger4.error(`[Document Processor] Batch embedding error: ${error.message}`);
1785
+ for (const chunk of batch) {
1786
+ try {
1787
+ const result = await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
1788
+ if (result.success && result.embedding) {
1789
+ results.push({
1790
+ embedding: result.embedding,
1791
+ success: true,
1792
+ index: chunk.index,
1793
+ text: chunk.contextualizedText
1794
+ });
1795
+ } else {
1796
+ results.push({
1797
+ success: false,
1798
+ index: chunk.index,
1799
+ error: result.error || new Error("Embedding failed"),
1800
+ text: chunk.contextualizedText
1801
+ });
1802
+ }
1803
+ } catch (fallbackError) {
1804
+ results.push({
1738
1805
  success: false,
1739
- index: contextualizedChunk.index,
1740
- error,
1741
- text: contextualizedChunk.contextualizedText
1742
- };
1806
+ index: chunk.index,
1807
+ error: fallbackError,
1808
+ text: chunk.contextualizedText
1809
+ });
1743
1810
  }
1744
- return {
1745
- embedding,
1746
- success: true,
1747
- index: contextualizedChunk.index,
1748
- text: contextualizedChunk.contextualizedText
1749
- };
1750
- } catch (error) {
1751
- logger4.error(
1752
- `Error generating embedding for chunk ${contextualizedChunk.index}: ${error.message}`
1753
- );
1754
- return {
1811
+ }
1812
+ }
1813
+ }
1814
+ return results;
1815
+ }
1816
+ async function generateBatchEmbeddingsViaRuntime(runtime, texts) {
1817
+ const batchResult = await runtime.useModel(
1818
+ ModelType.TEXT_EMBEDDING,
1819
+ { texts }
1820
+ // Handler supports { texts: string[] } for batch mode
1821
+ );
1822
+ if (Array.isArray(batchResult) && Array.isArray(batchResult[0])) {
1823
+ return batchResult;
1824
+ }
1825
+ if (Array.isArray(batchResult) && typeof batchResult[0] === "number") {
1826
+ logger4.warn("[Document Processor] Runtime returned single embedding for batch request - falling back to individual calls");
1827
+ const embeddings = await Promise.all(
1828
+ texts.map(async (text) => {
1829
+ const result = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
1830
+ if (Array.isArray(result)) {
1831
+ return result;
1832
+ }
1833
+ return result?.embedding || [];
1834
+ })
1835
+ );
1836
+ return embeddings;
1837
+ }
1838
+ logger4.error("[Document Processor] Unexpected batch result format:", typeof batchResult);
1839
+ throw new Error("Unexpected batch embedding result format");
1840
+ }
1841
+ async function generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results) {
1842
+ for (const chunk of validChunks) {
1843
+ const embeddingTokens = estimateTokens(chunk.contextualizedText);
1844
+ await rateLimiter(embeddingTokens);
1845
+ try {
1846
+ const generateEmbeddingOperation = async () => {
1847
+ return await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
1848
+ };
1849
+ const { embedding, success, error } = await withRateLimitRetry(
1850
+ generateEmbeddingOperation,
1851
+ `embedding generation for chunk ${chunk.index}`
1852
+ );
1853
+ if (!success) {
1854
+ results.push({
1755
1855
  success: false,
1756
- index: contextualizedChunk.index,
1856
+ index: chunk.index,
1757
1857
  error,
1758
- text: contextualizedChunk.contextualizedText
1759
- };
1858
+ text: chunk.contextualizedText
1859
+ });
1860
+ } else {
1861
+ results.push({
1862
+ embedding,
1863
+ success: true,
1864
+ index: chunk.index,
1865
+ text: chunk.contextualizedText
1866
+ });
1760
1867
  }
1761
- })
1762
- );
1868
+ } catch (error) {
1869
+ logger4.error(`Error generating embedding for chunk ${chunk.index}: ${error.message}`);
1870
+ results.push({
1871
+ success: false,
1872
+ index: chunk.index,
1873
+ error,
1874
+ text: chunk.contextualizedText
1875
+ });
1876
+ }
1877
+ }
1878
+ return results;
1763
1879
  }
1764
1880
  async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle) {
1765
1881
  const ctxEnabled = getCtxKnowledgeEnabled(runtime);
@@ -1795,10 +1911,11 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
1795
1911
  if (!chunks || chunks.length === 0) {
1796
1912
  return [];
1797
1913
  }
1798
- const providerLimits = await getProviderRateLimits();
1914
+ const providerLimits = await getProviderRateLimits(runtime);
1799
1915
  const rateLimiter = createRateLimiter(
1800
1916
  providerLimits.requestsPerMinute || 60,
1801
- providerLimits.tokensPerMinute
1917
+ providerLimits.tokensPerMinute,
1918
+ providerLimits.rateLimitEnabled
1802
1919
  );
1803
1920
  const config = validateModelConfig(runtime);
1804
1921
  const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
@@ -1839,9 +1956,11 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
1839
1956
  }
1840
1957
  } else {
1841
1958
  if (item.usesCaching) {
1959
+ const combinedPrompt = item.systemPrompt ? `${item.systemPrompt}
1960
+
1961
+ ${item.promptText}` : item.promptText;
1842
1962
  return await runtime.useModel(ModelType.TEXT_LARGE, {
1843
- prompt: item.promptText,
1844
- system: item.systemPrompt
1963
+ prompt: combinedPrompt
1845
1964
  });
1846
1965
  } else {
1847
1966
  return await runtime.useModel(ModelType.TEXT_LARGE, {
@@ -1945,17 +2064,11 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
1945
2064
  }
1946
2065
  async function generateEmbeddingWithValidation(runtime, text) {
1947
2066
  try {
1948
- const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, {
1949
- text
1950
- });
2067
+ const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
1951
2068
  const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
1952
2069
  if (!embedding || embedding.length === 0) {
1953
- logger4.warn(`Zero vector detected. Embedding result: ${JSON.stringify(embedding)}`);
1954
- return {
1955
- embedding: null,
1956
- success: false,
1957
- error: new Error("Zero vector detected")
1958
- };
2070
+ logger4.warn(`Zero vector detected`);
2071
+ return { embedding: null, success: false, error: new Error("Zero vector detected") };
1959
2072
  }
1960
2073
  return { embedding, success: true };
1961
2074
  } catch (error) {
@@ -1980,11 +2093,12 @@ async function withRateLimitRetry(operation, errorContext, retryDelay) {
1980
2093
  throw error;
1981
2094
  }
1982
2095
  }
1983
- function createRateLimiter(requestsPerMinute, tokensPerMinute) {
2096
+ function createRateLimiter(requestsPerMinute, tokensPerMinute, rateLimitEnabled = true) {
1984
2097
  const requestTimes = [];
1985
2098
  const tokenUsage = [];
1986
2099
  const intervalMs = 60 * 1e3;
1987
2100
  return async function rateLimiter(estimatedTokens = 1e3) {
2101
+ if (!rateLimitEnabled) return;
1988
2102
  const now = Date.now();
1989
2103
  while (requestTimes.length > 0 && now - requestTimes[0] > intervalMs) {
1990
2104
  requestTimes.shift();
@@ -1998,23 +2112,15 @@ function createRateLimiter(requestsPerMinute, tokensPerMinute) {
1998
2112
  if (requestLimitExceeded || tokenLimitExceeded) {
1999
2113
  let timeToWait = 0;
2000
2114
  if (requestLimitExceeded) {
2001
- const oldestRequest = requestTimes[0];
2002
- timeToWait = Math.max(timeToWait, oldestRequest + intervalMs - now);
2115
+ timeToWait = Math.max(timeToWait, requestTimes[0] + intervalMs - now);
2003
2116
  }
2004
2117
  if (tokenLimitExceeded && tokenUsage.length > 0) {
2005
- const oldestTokenUsage = tokenUsage[0];
2006
- timeToWait = Math.max(timeToWait, oldestTokenUsage.timestamp + intervalMs - now);
2118
+ timeToWait = Math.max(timeToWait, tokenUsage[0].timestamp + intervalMs - now);
2007
2119
  }
2008
2120
  if (timeToWait > 0) {
2009
2121
  const reason = requestLimitExceeded ? "request" : "token";
2010
2122
  if (timeToWait > 5e3) {
2011
- logger4.info(
2012
- `[Document Processor] Rate limiting: waiting ${Math.round(timeToWait / 1e3)}s due to ${reason} limit`
2013
- );
2014
- } else {
2015
- logger4.debug(
2016
- `[Document Processor] Rate limiting: ${timeToWait}ms wait (${reason} limit)`
2017
- );
2123
+ logger4.info(`[Rate Limiter] Waiting ${Math.round(timeToWait / 1e3)}s (${reason} limit)`);
2018
2124
  }
2019
2125
  await new Promise((resolve2) => setTimeout(resolve2, timeToWait));
2020
2126
  }
@@ -2274,7 +2380,8 @@ var KnowledgeService = class _KnowledgeService extends Service {
2274
2380
  );
2275
2381
  try {
2276
2382
  await new Promise((resolve2) => setTimeout(resolve2, 1e3));
2277
- const knowledgePath = this.runtime.getSetting("KNOWLEDGE_PATH");
2383
+ const knowledgePathSetting = this.runtime.getSetting("KNOWLEDGE_PATH");
2384
+ const knowledgePath = typeof knowledgePathSetting === "string" ? knowledgePathSetting : void 0;
2278
2385
  const result = await loadDocsFromPath(
2279
2386
  this,
2280
2387
  this.runtime.agentId,