@elizaos/plugin-knowledge 1.5.15 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.vite/manifest.json +1 -1
- package/dist/assets/{index-DRqE0iU1.js → index-DcxhjPNa.js} +4 -4
- package/dist/index.d.ts +22 -0
- package/dist/index.html +1 -1
- package/dist/index.js +234 -127
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -19,6 +19,11 @@ declare const ModelConfigSchema: z.ZodObject<{
|
|
|
19
19
|
EMBEDDING_DIMENSION: z.ZodEffects<z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>, number, string | number | undefined>;
|
|
20
20
|
LOAD_DOCS_ON_STARTUP: z.ZodDefault<z.ZodBoolean>;
|
|
21
21
|
CTX_KNOWLEDGE_ENABLED: z.ZodDefault<z.ZodBoolean>;
|
|
22
|
+
RATE_LIMIT_ENABLED: z.ZodDefault<z.ZodBoolean>;
|
|
23
|
+
MAX_CONCURRENT_REQUESTS: z.ZodEffects<z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>, number, string | number | undefined>;
|
|
24
|
+
REQUESTS_PER_MINUTE: z.ZodEffects<z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>, number, string | number | undefined>;
|
|
25
|
+
TOKENS_PER_MINUTE: z.ZodEffects<z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>, number, string | number | undefined>;
|
|
26
|
+
BATCH_DELAY_MS: z.ZodEffects<z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>, number, string | number | undefined>;
|
|
22
27
|
}, "strip", z.ZodTypeAny, {
|
|
23
28
|
TEXT_EMBEDDING_MODEL: string;
|
|
24
29
|
MAX_INPUT_TOKENS: number;
|
|
@@ -26,6 +31,11 @@ declare const ModelConfigSchema: z.ZodObject<{
|
|
|
26
31
|
EMBEDDING_DIMENSION: number;
|
|
27
32
|
LOAD_DOCS_ON_STARTUP: boolean;
|
|
28
33
|
CTX_KNOWLEDGE_ENABLED: boolean;
|
|
34
|
+
RATE_LIMIT_ENABLED: boolean;
|
|
35
|
+
MAX_CONCURRENT_REQUESTS: number;
|
|
36
|
+
REQUESTS_PER_MINUTE: number;
|
|
37
|
+
TOKENS_PER_MINUTE: number;
|
|
38
|
+
BATCH_DELAY_MS: number;
|
|
29
39
|
EMBEDDING_PROVIDER?: "openai" | "google" | undefined;
|
|
30
40
|
TEXT_PROVIDER?: "openai" | "google" | "anthropic" | "openrouter" | undefined;
|
|
31
41
|
OPENAI_API_KEY?: string | undefined;
|
|
@@ -55,6 +65,11 @@ declare const ModelConfigSchema: z.ZodObject<{
|
|
|
55
65
|
EMBEDDING_DIMENSION?: string | number | undefined;
|
|
56
66
|
LOAD_DOCS_ON_STARTUP?: boolean | undefined;
|
|
57
67
|
CTX_KNOWLEDGE_ENABLED?: boolean | undefined;
|
|
68
|
+
RATE_LIMIT_ENABLED?: boolean | undefined;
|
|
69
|
+
MAX_CONCURRENT_REQUESTS?: string | number | undefined;
|
|
70
|
+
REQUESTS_PER_MINUTE?: string | number | undefined;
|
|
71
|
+
TOKENS_PER_MINUTE?: string | number | undefined;
|
|
72
|
+
BATCH_DELAY_MS?: string | number | undefined;
|
|
58
73
|
}>;
|
|
59
74
|
type ModelConfig = z.infer<typeof ModelConfigSchema>;
|
|
60
75
|
/**
|
|
@@ -65,6 +80,8 @@ interface ProviderRateLimits {
|
|
|
65
80
|
requestsPerMinute: number;
|
|
66
81
|
tokensPerMinute?: number;
|
|
67
82
|
provider: string;
|
|
83
|
+
rateLimitEnabled: boolean;
|
|
84
|
+
batchDelayMs: number;
|
|
68
85
|
}
|
|
69
86
|
/**
|
|
70
87
|
* Options for text generation overrides
|
|
@@ -149,6 +166,11 @@ interface KnowledgeConfig {
|
|
|
149
166
|
EMBEDDING_PROVIDER?: string;
|
|
150
167
|
TEXT_PROVIDER?: string;
|
|
151
168
|
TEXT_EMBEDDING_MODEL?: string;
|
|
169
|
+
RATE_LIMIT_ENABLED?: boolean;
|
|
170
|
+
MAX_CONCURRENT_REQUESTS?: number;
|
|
171
|
+
REQUESTS_PER_MINUTE?: number;
|
|
172
|
+
TOKENS_PER_MINUTE?: number;
|
|
173
|
+
BATCH_DELAY_MS?: number;
|
|
152
174
|
}
|
|
153
175
|
interface LoadResult {
|
|
154
176
|
successful: number;
|
package/dist/index.html
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
|
6
6
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
7
7
|
<title>Agent Plugin View</title>
|
|
8
|
-
<script type="module" crossorigin src="./assets/index-
|
|
8
|
+
<script type="module" crossorigin src="./assets/index-DcxhjPNa.js"></script>
|
|
9
9
|
<link rel="stylesheet" crossorigin href="./assets/index-CFqKS0Ch.css">
|
|
10
10
|
</head>
|
|
11
11
|
<body>
|
package/dist/index.js
CHANGED
|
@@ -181,7 +181,20 @@ var ModelConfigSchema = z.object({
|
|
|
181
181
|
// config setting
|
|
182
182
|
LOAD_DOCS_ON_STARTUP: z.boolean().default(false),
|
|
183
183
|
// Contextual Knowledge settings
|
|
184
|
-
CTX_KNOWLEDGE_ENABLED: z.boolean().default(false)
|
|
184
|
+
CTX_KNOWLEDGE_ENABLED: z.boolean().default(false),
|
|
185
|
+
// Rate limiting settings
|
|
186
|
+
// Set RATE_LIMIT_ENABLED=false to disable all rate limiting for fast uploads
|
|
187
|
+
// Useful when using APIs without rate limits (e.g., self-hosted models)
|
|
188
|
+
// High defaults optimized for Vercel gateway / high-throughput APIs
|
|
189
|
+
RATE_LIMIT_ENABLED: z.boolean().default(true),
|
|
190
|
+
// Maximum concurrent requests (default: 150, set higher for faster processing)
|
|
191
|
+
MAX_CONCURRENT_REQUESTS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 150),
|
|
192
|
+
// Requests per minute limit (default: 300)
|
|
193
|
+
REQUESTS_PER_MINUTE: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 300),
|
|
194
|
+
// Tokens per minute limit (default: 750000)
|
|
195
|
+
TOKENS_PER_MINUTE: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 75e4),
|
|
196
|
+
// Delay between batches in milliseconds (default: 100, set to 0 for no delay)
|
|
197
|
+
BATCH_DELAY_MS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 100)
|
|
185
198
|
});
|
|
186
199
|
var KnowledgeServiceType = {
|
|
187
200
|
KNOWLEDGE: "knowledge"
|
|
@@ -243,7 +256,15 @@ function validateModelConfig(runtime) {
|
|
|
243
256
|
MAX_OUTPUT_TOKENS: getSetting("MAX_OUTPUT_TOKENS", "4096"),
|
|
244
257
|
EMBEDDING_DIMENSION: embeddingDimension,
|
|
245
258
|
LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")),
|
|
246
|
-
CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled
|
|
259
|
+
CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled,
|
|
260
|
+
// Rate limiting settings - optimized for batch embeddings
|
|
261
|
+
// With batch embeddings, we send 100 texts in ONE API call
|
|
262
|
+
// 935 chunks / 100 = ~10 API calls instead of 935!
|
|
263
|
+
RATE_LIMIT_ENABLED: parseBooleanEnv(getSetting("RATE_LIMIT_ENABLED", "true")),
|
|
264
|
+
MAX_CONCURRENT_REQUESTS: getSetting("MAX_CONCURRENT_REQUESTS", "100"),
|
|
265
|
+
REQUESTS_PER_MINUTE: getSetting("REQUESTS_PER_MINUTE", "500"),
|
|
266
|
+
TOKENS_PER_MINUTE: getSetting("TOKENS_PER_MINUTE", "1000000"),
|
|
267
|
+
BATCH_DELAY_MS: getSetting("BATCH_DELAY_MS", "100")
|
|
247
268
|
});
|
|
248
269
|
validateConfigRequirements(config, assumePluginOpenAI);
|
|
249
270
|
return config;
|
|
@@ -309,49 +330,36 @@ function validateConfigRequirements(config, assumePluginOpenAI) {
|
|
|
309
330
|
}
|
|
310
331
|
async function getProviderRateLimits(runtime) {
|
|
311
332
|
const config = validateModelConfig(runtime);
|
|
312
|
-
const
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
};
|
|
318
|
-
const maxConcurrentRequests = parseInt(getSetting("MAX_CONCURRENT_REQUESTS", "30"), 10);
|
|
319
|
-
const requestsPerMinute = parseInt(getSetting("REQUESTS_PER_MINUTE", "60"), 10);
|
|
320
|
-
const tokensPerMinute = parseInt(getSetting("TOKENS_PER_MINUTE", "150000"), 10);
|
|
333
|
+
const rateLimitEnabled = config.RATE_LIMIT_ENABLED;
|
|
334
|
+
const maxConcurrentRequests = config.MAX_CONCURRENT_REQUESTS;
|
|
335
|
+
const requestsPerMinute = config.REQUESTS_PER_MINUTE;
|
|
336
|
+
const tokensPerMinute = config.TOKENS_PER_MINUTE;
|
|
337
|
+
const batchDelayMs = config.BATCH_DELAY_MS;
|
|
321
338
|
const primaryProvider = config.TEXT_PROVIDER || config.EMBEDDING_PROVIDER;
|
|
339
|
+
if (!rateLimitEnabled) {
|
|
340
|
+
logger.info(
|
|
341
|
+
`[Document Processor] Rate limiting DISABLED - unlimited throughput mode (concurrent: ${maxConcurrentRequests}, batch delay: ${batchDelayMs}ms)`
|
|
342
|
+
);
|
|
343
|
+
return {
|
|
344
|
+
maxConcurrentRequests,
|
|
345
|
+
requestsPerMinute: Number.MAX_SAFE_INTEGER,
|
|
346
|
+
tokensPerMinute: Number.MAX_SAFE_INTEGER,
|
|
347
|
+
provider: primaryProvider || "unlimited",
|
|
348
|
+
rateLimitEnabled: false,
|
|
349
|
+
batchDelayMs
|
|
350
|
+
};
|
|
351
|
+
}
|
|
322
352
|
logger.debug(
|
|
323
|
-
`[Document Processor] Rate limiting for ${primaryProvider}: ${requestsPerMinute} RPM, ${tokensPerMinute} TPM, ${maxConcurrentRequests} concurrent`
|
|
353
|
+
`[Document Processor] Rate limiting for ${primaryProvider}: ${requestsPerMinute} RPM, ${tokensPerMinute} TPM, ${maxConcurrentRequests} concurrent, ${batchDelayMs}ms batch delay`
|
|
324
354
|
);
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
case "openai":
|
|
334
|
-
return {
|
|
335
|
-
maxConcurrentRequests,
|
|
336
|
-
requestsPerMinute: Math.min(requestsPerMinute, 3e3),
|
|
337
|
-
tokensPerMinute: Math.min(tokensPerMinute, 15e4),
|
|
338
|
-
provider: "openai"
|
|
339
|
-
};
|
|
340
|
-
case "google":
|
|
341
|
-
return {
|
|
342
|
-
maxConcurrentRequests,
|
|
343
|
-
requestsPerMinute: Math.min(requestsPerMinute, 60),
|
|
344
|
-
tokensPerMinute: Math.min(tokensPerMinute, 1e5),
|
|
345
|
-
provider: "google"
|
|
346
|
-
};
|
|
347
|
-
default:
|
|
348
|
-
return {
|
|
349
|
-
maxConcurrentRequests,
|
|
350
|
-
requestsPerMinute,
|
|
351
|
-
tokensPerMinute,
|
|
352
|
-
provider: primaryProvider || "unknown"
|
|
353
|
-
};
|
|
354
|
-
}
|
|
355
|
+
return {
|
|
356
|
+
maxConcurrentRequests,
|
|
357
|
+
requestsPerMinute,
|
|
358
|
+
tokensPerMinute,
|
|
359
|
+
provider: primaryProvider || "unknown",
|
|
360
|
+
rateLimitEnabled: true,
|
|
361
|
+
batchDelayMs
|
|
362
|
+
};
|
|
355
363
|
}
|
|
356
364
|
|
|
357
365
|
// src/ctx-embeddings.ts
|
|
@@ -1433,8 +1441,9 @@ function getCtxKnowledgeEnabled(runtime) {
|
|
|
1433
1441
|
let source;
|
|
1434
1442
|
let rawValue;
|
|
1435
1443
|
if (runtime) {
|
|
1436
|
-
|
|
1437
|
-
|
|
1444
|
+
const settingValue = runtime.getSetting("CTX_KNOWLEDGE_ENABLED");
|
|
1445
|
+
rawValue = typeof settingValue === "string" ? settingValue : settingValue?.toString();
|
|
1446
|
+
const cleanValue = rawValue?.trim().toLowerCase();
|
|
1438
1447
|
result = cleanValue === "true";
|
|
1439
1448
|
source = "runtime.getSetting()";
|
|
1440
1449
|
} else {
|
|
@@ -1490,15 +1499,22 @@ async function processFragmentsSynchronously({
|
|
|
1490
1499
|
}
|
|
1491
1500
|
const docName = documentTitle || documentId.substring(0, 8);
|
|
1492
1501
|
logger4.info(`[Document Processor] "${docName}": Split into ${chunks.length} chunks`);
|
|
1493
|
-
const providerLimits = await getProviderRateLimits();
|
|
1494
|
-
const CONCURRENCY_LIMIT =
|
|
1502
|
+
const providerLimits = await getProviderRateLimits(runtime);
|
|
1503
|
+
const CONCURRENCY_LIMIT = providerLimits.maxConcurrentRequests || 30;
|
|
1495
1504
|
const rateLimiter = createRateLimiter(
|
|
1496
1505
|
providerLimits.requestsPerMinute || 60,
|
|
1497
|
-
providerLimits.tokensPerMinute
|
|
1498
|
-
|
|
1499
|
-
logger4.debug(
|
|
1500
|
-
`[Document Processor] Rate limits: ${providerLimits.requestsPerMinute} RPM, ${providerLimits.tokensPerMinute} TPM (${providerLimits.provider}, concurrency: ${CONCURRENCY_LIMIT})`
|
|
1506
|
+
providerLimits.tokensPerMinute,
|
|
1507
|
+
providerLimits.rateLimitEnabled
|
|
1501
1508
|
);
|
|
1509
|
+
if (!providerLimits.rateLimitEnabled) {
|
|
1510
|
+
logger4.info(
|
|
1511
|
+
`[Document Processor] UNLIMITED MODE: concurrency ${CONCURRENCY_LIMIT}, batch delay ${providerLimits.batchDelayMs}ms`
|
|
1512
|
+
);
|
|
1513
|
+
} else {
|
|
1514
|
+
logger4.debug(
|
|
1515
|
+
`[Document Processor] Rate limits: ${providerLimits.requestsPerMinute} RPM, ${providerLimits.tokensPerMinute} TPM (${providerLimits.provider}, concurrency: ${CONCURRENCY_LIMIT})`
|
|
1516
|
+
);
|
|
1517
|
+
}
|
|
1502
1518
|
const { savedCount, failedCount } = await processAndSaveFragments({
|
|
1503
1519
|
runtime,
|
|
1504
1520
|
documentId,
|
|
@@ -1511,7 +1527,8 @@ async function processFragmentsSynchronously({
|
|
|
1511
1527
|
worldId: worldId || agentId,
|
|
1512
1528
|
concurrencyLimit: CONCURRENCY_LIMIT,
|
|
1513
1529
|
rateLimiter,
|
|
1514
|
-
documentTitle
|
|
1530
|
+
documentTitle,
|
|
1531
|
+
batchDelayMs: providerLimits.batchDelayMs
|
|
1515
1532
|
});
|
|
1516
1533
|
const successRate = (savedCount / chunks.length * 100).toFixed(1);
|
|
1517
1534
|
if (failedCount > 0) {
|
|
@@ -1617,7 +1634,8 @@ async function processAndSaveFragments({
|
|
|
1617
1634
|
worldId,
|
|
1618
1635
|
concurrencyLimit,
|
|
1619
1636
|
rateLimiter,
|
|
1620
|
-
documentTitle
|
|
1637
|
+
documentTitle,
|
|
1638
|
+
batchDelayMs = 500
|
|
1621
1639
|
}) {
|
|
1622
1640
|
let savedCount = 0;
|
|
1623
1641
|
let failedCount = 0;
|
|
@@ -1693,73 +1711,171 @@ async function processAndSaveFragments({
|
|
|
1693
1711
|
failedChunks.push(originalChunkIndex);
|
|
1694
1712
|
}
|
|
1695
1713
|
}
|
|
1696
|
-
if (i + concurrencyLimit < chunks.length) {
|
|
1697
|
-
await new Promise((resolve2) => setTimeout(resolve2,
|
|
1714
|
+
if (i + concurrencyLimit < chunks.length && batchDelayMs > 0) {
|
|
1715
|
+
await new Promise((resolve2) => setTimeout(resolve2, batchDelayMs));
|
|
1698
1716
|
}
|
|
1699
1717
|
}
|
|
1700
1718
|
return { savedCount, failedCount, failedChunks };
|
|
1701
1719
|
}
|
|
1720
|
+
var EMBEDDING_BATCH_SIZE = 100;
|
|
1702
1721
|
async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLimiter) {
|
|
1703
1722
|
const validChunks = contextualizedChunks.filter((chunk) => chunk.success);
|
|
1704
1723
|
const failedChunks = contextualizedChunks.filter((chunk) => !chunk.success);
|
|
1705
|
-
|
|
1706
|
-
|
|
1724
|
+
const results = [];
|
|
1725
|
+
for (const chunk of failedChunks) {
|
|
1726
|
+
results.push({
|
|
1707
1727
|
success: false,
|
|
1708
1728
|
index: chunk.index,
|
|
1709
1729
|
error: new Error("Chunk processing failed"),
|
|
1710
1730
|
text: chunk.contextualizedText
|
|
1711
|
-
})
|
|
1731
|
+
});
|
|
1712
1732
|
}
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1733
|
+
if (validChunks.length === 0) {
|
|
1734
|
+
return results;
|
|
1735
|
+
}
|
|
1736
|
+
const useBatchEmbeddings = shouldUseBatchEmbeddings(runtime);
|
|
1737
|
+
if (useBatchEmbeddings) {
|
|
1738
|
+
logger4.info(`[Document Processor] Using BATCH embeddings for ${validChunks.length} chunks`);
|
|
1739
|
+
return await generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results);
|
|
1740
|
+
} else {
|
|
1741
|
+
logger4.info(`[Document Processor] Using individual embeddings for ${validChunks.length} chunks`);
|
|
1742
|
+
return await generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results);
|
|
1743
|
+
}
|
|
1744
|
+
}
|
|
1745
|
+
function shouldUseBatchEmbeddings(runtime) {
|
|
1746
|
+
const setting = runtime.getSetting("BATCH_EMBEDDINGS") ?? process.env.BATCH_EMBEDDINGS;
|
|
1747
|
+
if (setting === "false" || setting === false) {
|
|
1748
|
+
return false;
|
|
1749
|
+
}
|
|
1750
|
+
return true;
|
|
1751
|
+
}
|
|
1752
|
+
async function generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results) {
|
|
1753
|
+
for (let batchStart = 0; batchStart < validChunks.length; batchStart += EMBEDDING_BATCH_SIZE) {
|
|
1754
|
+
const batchEnd = Math.min(batchStart + EMBEDDING_BATCH_SIZE, validChunks.length);
|
|
1755
|
+
const batch = validChunks.slice(batchStart, batchEnd);
|
|
1756
|
+
const batchTexts = batch.map((c) => c.contextualizedText);
|
|
1757
|
+
const totalTokens = batchTexts.reduce((sum, text) => sum + estimateTokens(text), 0);
|
|
1758
|
+
await rateLimiter(totalTokens);
|
|
1759
|
+
logger4.info(
|
|
1760
|
+
`[Document Processor] Batch ${Math.floor(batchStart / EMBEDDING_BATCH_SIZE) + 1}/${Math.ceil(validChunks.length / EMBEDDING_BATCH_SIZE)}: ${batch.length} texts, ~${totalTokens} tokens`
|
|
1761
|
+
);
|
|
1762
|
+
try {
|
|
1763
|
+
const embeddings = await generateBatchEmbeddingsViaRuntime(runtime, batchTexts);
|
|
1764
|
+
for (let i = 0; i < batch.length; i++) {
|
|
1765
|
+
const chunk = batch[i];
|
|
1766
|
+
const embedding = embeddings[i];
|
|
1767
|
+
if (embedding && embedding.length > 0 && embedding[0] !== 0) {
|
|
1768
|
+
results.push({
|
|
1769
|
+
embedding,
|
|
1770
|
+
success: true,
|
|
1771
|
+
index: chunk.index,
|
|
1772
|
+
text: chunk.contextualizedText
|
|
1773
|
+
});
|
|
1774
|
+
} else {
|
|
1775
|
+
results.push({
|
|
1776
|
+
success: false,
|
|
1777
|
+
index: chunk.index,
|
|
1778
|
+
error: new Error("Empty or invalid embedding returned"),
|
|
1779
|
+
text: chunk.contextualizedText
|
|
1780
|
+
});
|
|
1781
|
+
}
|
|
1722
1782
|
}
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1783
|
+
} catch (error) {
|
|
1784
|
+
logger4.error(`[Document Processor] Batch embedding error: ${error.message}`);
|
|
1785
|
+
for (const chunk of batch) {
|
|
1786
|
+
try {
|
|
1787
|
+
const result = await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
|
|
1788
|
+
if (result.success && result.embedding) {
|
|
1789
|
+
results.push({
|
|
1790
|
+
embedding: result.embedding,
|
|
1791
|
+
success: true,
|
|
1792
|
+
index: chunk.index,
|
|
1793
|
+
text: chunk.contextualizedText
|
|
1794
|
+
});
|
|
1795
|
+
} else {
|
|
1796
|
+
results.push({
|
|
1797
|
+
success: false,
|
|
1798
|
+
index: chunk.index,
|
|
1799
|
+
error: result.error || new Error("Embedding failed"),
|
|
1800
|
+
text: chunk.contextualizedText
|
|
1801
|
+
});
|
|
1802
|
+
}
|
|
1803
|
+
} catch (fallbackError) {
|
|
1804
|
+
results.push({
|
|
1738
1805
|
success: false,
|
|
1739
|
-
index:
|
|
1740
|
-
error,
|
|
1741
|
-
text:
|
|
1742
|
-
};
|
|
1806
|
+
index: chunk.index,
|
|
1807
|
+
error: fallbackError,
|
|
1808
|
+
text: chunk.contextualizedText
|
|
1809
|
+
});
|
|
1743
1810
|
}
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1811
|
+
}
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
return results;
|
|
1815
|
+
}
|
|
1816
|
+
async function generateBatchEmbeddingsViaRuntime(runtime, texts) {
|
|
1817
|
+
const batchResult = await runtime.useModel(
|
|
1818
|
+
ModelType.TEXT_EMBEDDING,
|
|
1819
|
+
{ texts }
|
|
1820
|
+
// Handler supports { texts: string[] } for batch mode
|
|
1821
|
+
);
|
|
1822
|
+
if (Array.isArray(batchResult) && Array.isArray(batchResult[0])) {
|
|
1823
|
+
return batchResult;
|
|
1824
|
+
}
|
|
1825
|
+
if (Array.isArray(batchResult) && typeof batchResult[0] === "number") {
|
|
1826
|
+
logger4.warn("[Document Processor] Runtime returned single embedding for batch request - falling back to individual calls");
|
|
1827
|
+
const embeddings = await Promise.all(
|
|
1828
|
+
texts.map(async (text) => {
|
|
1829
|
+
const result = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
|
|
1830
|
+
if (Array.isArray(result)) {
|
|
1831
|
+
return result;
|
|
1832
|
+
}
|
|
1833
|
+
return result?.embedding || [];
|
|
1834
|
+
})
|
|
1835
|
+
);
|
|
1836
|
+
return embeddings;
|
|
1837
|
+
}
|
|
1838
|
+
logger4.error("[Document Processor] Unexpected batch result format:", typeof batchResult);
|
|
1839
|
+
throw new Error("Unexpected batch embedding result format");
|
|
1840
|
+
}
|
|
1841
|
+
async function generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results) {
|
|
1842
|
+
for (const chunk of validChunks) {
|
|
1843
|
+
const embeddingTokens = estimateTokens(chunk.contextualizedText);
|
|
1844
|
+
await rateLimiter(embeddingTokens);
|
|
1845
|
+
try {
|
|
1846
|
+
const generateEmbeddingOperation = async () => {
|
|
1847
|
+
return await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
|
|
1848
|
+
};
|
|
1849
|
+
const { embedding, success, error } = await withRateLimitRetry(
|
|
1850
|
+
generateEmbeddingOperation,
|
|
1851
|
+
`embedding generation for chunk ${chunk.index}`
|
|
1852
|
+
);
|
|
1853
|
+
if (!success) {
|
|
1854
|
+
results.push({
|
|
1755
1855
|
success: false,
|
|
1756
|
-
index:
|
|
1856
|
+
index: chunk.index,
|
|
1757
1857
|
error,
|
|
1758
|
-
text:
|
|
1759
|
-
};
|
|
1858
|
+
text: chunk.contextualizedText
|
|
1859
|
+
});
|
|
1860
|
+
} else {
|
|
1861
|
+
results.push({
|
|
1862
|
+
embedding,
|
|
1863
|
+
success: true,
|
|
1864
|
+
index: chunk.index,
|
|
1865
|
+
text: chunk.contextualizedText
|
|
1866
|
+
});
|
|
1760
1867
|
}
|
|
1761
|
-
})
|
|
1762
|
-
|
|
1868
|
+
} catch (error) {
|
|
1869
|
+
logger4.error(`Error generating embedding for chunk ${chunk.index}: ${error.message}`);
|
|
1870
|
+
results.push({
|
|
1871
|
+
success: false,
|
|
1872
|
+
index: chunk.index,
|
|
1873
|
+
error,
|
|
1874
|
+
text: chunk.contextualizedText
|
|
1875
|
+
});
|
|
1876
|
+
}
|
|
1877
|
+
}
|
|
1878
|
+
return results;
|
|
1763
1879
|
}
|
|
1764
1880
|
async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle) {
|
|
1765
1881
|
const ctxEnabled = getCtxKnowledgeEnabled(runtime);
|
|
@@ -1795,10 +1911,11 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1795
1911
|
if (!chunks || chunks.length === 0) {
|
|
1796
1912
|
return [];
|
|
1797
1913
|
}
|
|
1798
|
-
const providerLimits = await getProviderRateLimits();
|
|
1914
|
+
const providerLimits = await getProviderRateLimits(runtime);
|
|
1799
1915
|
const rateLimiter = createRateLimiter(
|
|
1800
1916
|
providerLimits.requestsPerMinute || 60,
|
|
1801
|
-
providerLimits.tokensPerMinute
|
|
1917
|
+
providerLimits.tokensPerMinute,
|
|
1918
|
+
providerLimits.rateLimitEnabled
|
|
1802
1919
|
);
|
|
1803
1920
|
const config = validateModelConfig(runtime);
|
|
1804
1921
|
const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
|
|
@@ -1839,9 +1956,11 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1839
1956
|
}
|
|
1840
1957
|
} else {
|
|
1841
1958
|
if (item.usesCaching) {
|
|
1959
|
+
const combinedPrompt = item.systemPrompt ? `${item.systemPrompt}
|
|
1960
|
+
|
|
1961
|
+
${item.promptText}` : item.promptText;
|
|
1842
1962
|
return await runtime.useModel(ModelType.TEXT_LARGE, {
|
|
1843
|
-
prompt:
|
|
1844
|
-
system: item.systemPrompt
|
|
1963
|
+
prompt: combinedPrompt
|
|
1845
1964
|
});
|
|
1846
1965
|
} else {
|
|
1847
1966
|
return await runtime.useModel(ModelType.TEXT_LARGE, {
|
|
@@ -1945,17 +2064,11 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
|
|
|
1945
2064
|
}
|
|
1946
2065
|
async function generateEmbeddingWithValidation(runtime, text) {
|
|
1947
2066
|
try {
|
|
1948
|
-
const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, {
|
|
1949
|
-
text
|
|
1950
|
-
});
|
|
2067
|
+
const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
|
|
1951
2068
|
const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
|
|
1952
2069
|
if (!embedding || embedding.length === 0) {
|
|
1953
|
-
logger4.warn(`Zero vector detected
|
|
1954
|
-
return {
|
|
1955
|
-
embedding: null,
|
|
1956
|
-
success: false,
|
|
1957
|
-
error: new Error("Zero vector detected")
|
|
1958
|
-
};
|
|
2070
|
+
logger4.warn(`Zero vector detected`);
|
|
2071
|
+
return { embedding: null, success: false, error: new Error("Zero vector detected") };
|
|
1959
2072
|
}
|
|
1960
2073
|
return { embedding, success: true };
|
|
1961
2074
|
} catch (error) {
|
|
@@ -1980,11 +2093,12 @@ async function withRateLimitRetry(operation, errorContext, retryDelay) {
|
|
|
1980
2093
|
throw error;
|
|
1981
2094
|
}
|
|
1982
2095
|
}
|
|
1983
|
-
function createRateLimiter(requestsPerMinute, tokensPerMinute) {
|
|
2096
|
+
function createRateLimiter(requestsPerMinute, tokensPerMinute, rateLimitEnabled = true) {
|
|
1984
2097
|
const requestTimes = [];
|
|
1985
2098
|
const tokenUsage = [];
|
|
1986
2099
|
const intervalMs = 60 * 1e3;
|
|
1987
2100
|
return async function rateLimiter(estimatedTokens = 1e3) {
|
|
2101
|
+
if (!rateLimitEnabled) return;
|
|
1988
2102
|
const now = Date.now();
|
|
1989
2103
|
while (requestTimes.length > 0 && now - requestTimes[0] > intervalMs) {
|
|
1990
2104
|
requestTimes.shift();
|
|
@@ -1998,23 +2112,15 @@ function createRateLimiter(requestsPerMinute, tokensPerMinute) {
|
|
|
1998
2112
|
if (requestLimitExceeded || tokenLimitExceeded) {
|
|
1999
2113
|
let timeToWait = 0;
|
|
2000
2114
|
if (requestLimitExceeded) {
|
|
2001
|
-
|
|
2002
|
-
timeToWait = Math.max(timeToWait, oldestRequest + intervalMs - now);
|
|
2115
|
+
timeToWait = Math.max(timeToWait, requestTimes[0] + intervalMs - now);
|
|
2003
2116
|
}
|
|
2004
2117
|
if (tokenLimitExceeded && tokenUsage.length > 0) {
|
|
2005
|
-
|
|
2006
|
-
timeToWait = Math.max(timeToWait, oldestTokenUsage.timestamp + intervalMs - now);
|
|
2118
|
+
timeToWait = Math.max(timeToWait, tokenUsage[0].timestamp + intervalMs - now);
|
|
2007
2119
|
}
|
|
2008
2120
|
if (timeToWait > 0) {
|
|
2009
2121
|
const reason = requestLimitExceeded ? "request" : "token";
|
|
2010
2122
|
if (timeToWait > 5e3) {
|
|
2011
|
-
logger4.info(
|
|
2012
|
-
`[Document Processor] Rate limiting: waiting ${Math.round(timeToWait / 1e3)}s due to ${reason} limit`
|
|
2013
|
-
);
|
|
2014
|
-
} else {
|
|
2015
|
-
logger4.debug(
|
|
2016
|
-
`[Document Processor] Rate limiting: ${timeToWait}ms wait (${reason} limit)`
|
|
2017
|
-
);
|
|
2123
|
+
logger4.info(`[Rate Limiter] Waiting ${Math.round(timeToWait / 1e3)}s (${reason} limit)`);
|
|
2018
2124
|
}
|
|
2019
2125
|
await new Promise((resolve2) => setTimeout(resolve2, timeToWait));
|
|
2020
2126
|
}
|
|
@@ -2274,7 +2380,8 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2274
2380
|
);
|
|
2275
2381
|
try {
|
|
2276
2382
|
await new Promise((resolve2) => setTimeout(resolve2, 1e3));
|
|
2277
|
-
const
|
|
2383
|
+
const knowledgePathSetting = this.runtime.getSetting("KNOWLEDGE_PATH");
|
|
2384
|
+
const knowledgePath = typeof knowledgePathSetting === "string" ? knowledgePathSetting : void 0;
|
|
2278
2385
|
const result = await loadDocsFromPath(
|
|
2279
2386
|
this,
|
|
2280
2387
|
this.runtime.agentId,
|