@elizaos/plugin-knowledge 1.0.10 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -9
- package/dist/.vite/manifest.json +2 -2
- package/dist/assets/index-B5VEkqpw.css +1 -0
- package/dist/assets/index-DlOvU00u.js +169 -0
- package/dist/index.d.ts +15 -43
- package/dist/index.html +2 -2
- package/dist/index.js +1355 -511
- package/dist/index.js.map +1 -1
- package/package.json +20 -22
- package/dist/assets/index-CBT93PUU.css +0 -1
- package/dist/assets/index-ChiOWvZU.js +0 -165
- package/dist/chunk-UOE4LEMH.js +0 -695
- package/dist/chunk-UOE4LEMH.js.map +0 -1
- package/dist/docs-loader-PF5X4UMB.js +0 -9
- package/dist/docs-loader-PF5X4UMB.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,17 +1,147 @@
|
|
|
1
|
+
// src/service.ts
|
|
1
2
|
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
v4_default
|
|
11
|
-
} from "./chunk-UOE4LEMH.js";
|
|
3
|
+
createUniqueUuid,
|
|
4
|
+
logger as logger6,
|
|
5
|
+
MemoryType as MemoryType2,
|
|
6
|
+
ModelType as ModelType2,
|
|
7
|
+
Semaphore,
|
|
8
|
+
Service,
|
|
9
|
+
splitChunks as splitChunks2
|
|
10
|
+
} from "@elizaos/core";
|
|
12
11
|
|
|
13
|
-
// src/
|
|
14
|
-
import {
|
|
12
|
+
// src/document-processor.ts
|
|
13
|
+
import {
|
|
14
|
+
MemoryType,
|
|
15
|
+
ModelType,
|
|
16
|
+
logger as logger4,
|
|
17
|
+
splitChunks
|
|
18
|
+
} from "@elizaos/core";
|
|
19
|
+
|
|
20
|
+
// node_modules/uuid/dist/esm/regex.js
|
|
21
|
+
var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i;
|
|
22
|
+
|
|
23
|
+
// node_modules/uuid/dist/esm/validate.js
|
|
24
|
+
function validate(uuid) {
|
|
25
|
+
return typeof uuid === "string" && regex_default.test(uuid);
|
|
26
|
+
}
|
|
27
|
+
var validate_default = validate;
|
|
28
|
+
|
|
29
|
+
// node_modules/uuid/dist/esm/parse.js
|
|
30
|
+
function parse(uuid) {
|
|
31
|
+
if (!validate_default(uuid)) {
|
|
32
|
+
throw TypeError("Invalid UUID");
|
|
33
|
+
}
|
|
34
|
+
let v;
|
|
35
|
+
return Uint8Array.of((v = parseInt(uuid.slice(0, 8), 16)) >>> 24, v >>> 16 & 255, v >>> 8 & 255, v & 255, (v = parseInt(uuid.slice(9, 13), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(14, 18), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(19, 23), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(24, 36), 16)) / 1099511627776 & 255, v / 4294967296 & 255, v >>> 24 & 255, v >>> 16 & 255, v >>> 8 & 255, v & 255);
|
|
36
|
+
}
|
|
37
|
+
var parse_default = parse;
|
|
38
|
+
|
|
39
|
+
// node_modules/uuid/dist/esm/stringify.js
|
|
40
|
+
var byteToHex = [];
|
|
41
|
+
for (let i = 0; i < 256; ++i) {
|
|
42
|
+
byteToHex.push((i + 256).toString(16).slice(1));
|
|
43
|
+
}
|
|
44
|
+
function unsafeStringify(arr, offset = 0) {
|
|
45
|
+
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// node_modules/uuid/dist/esm/rng.js
|
|
49
|
+
import { randomFillSync } from "crypto";
|
|
50
|
+
var rnds8Pool = new Uint8Array(256);
|
|
51
|
+
var poolPtr = rnds8Pool.length;
|
|
52
|
+
function rng() {
|
|
53
|
+
if (poolPtr > rnds8Pool.length - 16) {
|
|
54
|
+
randomFillSync(rnds8Pool);
|
|
55
|
+
poolPtr = 0;
|
|
56
|
+
}
|
|
57
|
+
return rnds8Pool.slice(poolPtr, poolPtr += 16);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// node_modules/uuid/dist/esm/v35.js
|
|
61
|
+
function stringToBytes(str) {
|
|
62
|
+
str = unescape(encodeURIComponent(str));
|
|
63
|
+
const bytes = new Uint8Array(str.length);
|
|
64
|
+
for (let i = 0; i < str.length; ++i) {
|
|
65
|
+
bytes[i] = str.charCodeAt(i);
|
|
66
|
+
}
|
|
67
|
+
return bytes;
|
|
68
|
+
}
|
|
69
|
+
var DNS = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
|
|
70
|
+
var URL2 = "6ba7b811-9dad-11d1-80b4-00c04fd430c8";
|
|
71
|
+
function v35(version, hash, value, namespace, buf, offset) {
|
|
72
|
+
const valueBytes = typeof value === "string" ? stringToBytes(value) : value;
|
|
73
|
+
const namespaceBytes = typeof namespace === "string" ? parse_default(namespace) : namespace;
|
|
74
|
+
if (typeof namespace === "string") {
|
|
75
|
+
namespace = parse_default(namespace);
|
|
76
|
+
}
|
|
77
|
+
if (namespace?.length !== 16) {
|
|
78
|
+
throw TypeError("Namespace must be array-like (16 iterable integer values, 0-255)");
|
|
79
|
+
}
|
|
80
|
+
let bytes = new Uint8Array(16 + valueBytes.length);
|
|
81
|
+
bytes.set(namespaceBytes);
|
|
82
|
+
bytes.set(valueBytes, namespaceBytes.length);
|
|
83
|
+
bytes = hash(bytes);
|
|
84
|
+
bytes[6] = bytes[6] & 15 | version;
|
|
85
|
+
bytes[8] = bytes[8] & 63 | 128;
|
|
86
|
+
if (buf) {
|
|
87
|
+
offset = offset || 0;
|
|
88
|
+
for (let i = 0; i < 16; ++i) {
|
|
89
|
+
buf[offset + i] = bytes[i];
|
|
90
|
+
}
|
|
91
|
+
return buf;
|
|
92
|
+
}
|
|
93
|
+
return unsafeStringify(bytes);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// node_modules/uuid/dist/esm/native.js
|
|
97
|
+
import { randomUUID } from "crypto";
|
|
98
|
+
var native_default = { randomUUID };
|
|
99
|
+
|
|
100
|
+
// node_modules/uuid/dist/esm/v4.js
|
|
101
|
+
function v4(options, buf, offset) {
|
|
102
|
+
if (native_default.randomUUID && !buf && !options) {
|
|
103
|
+
return native_default.randomUUID();
|
|
104
|
+
}
|
|
105
|
+
options = options || {};
|
|
106
|
+
const rnds = options.random ?? options.rng?.() ?? rng();
|
|
107
|
+
if (rnds.length < 16) {
|
|
108
|
+
throw new Error("Random bytes length must be >= 16");
|
|
109
|
+
}
|
|
110
|
+
rnds[6] = rnds[6] & 15 | 64;
|
|
111
|
+
rnds[8] = rnds[8] & 63 | 128;
|
|
112
|
+
if (buf) {
|
|
113
|
+
offset = offset || 0;
|
|
114
|
+
if (offset < 0 || offset + 16 > buf.length) {
|
|
115
|
+
throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
|
|
116
|
+
}
|
|
117
|
+
for (let i = 0; i < 16; ++i) {
|
|
118
|
+
buf[offset + i] = rnds[i];
|
|
119
|
+
}
|
|
120
|
+
return buf;
|
|
121
|
+
}
|
|
122
|
+
return unsafeStringify(rnds);
|
|
123
|
+
}
|
|
124
|
+
var v4_default = v4;
|
|
125
|
+
|
|
126
|
+
// node_modules/uuid/dist/esm/sha1.js
|
|
127
|
+
import { createHash } from "crypto";
|
|
128
|
+
function sha1(bytes) {
|
|
129
|
+
if (Array.isArray(bytes)) {
|
|
130
|
+
bytes = Buffer.from(bytes);
|
|
131
|
+
} else if (typeof bytes === "string") {
|
|
132
|
+
bytes = Buffer.from(bytes, "utf8");
|
|
133
|
+
}
|
|
134
|
+
return createHash("sha1").update(bytes).digest();
|
|
135
|
+
}
|
|
136
|
+
var sha1_default = sha1;
|
|
137
|
+
|
|
138
|
+
// node_modules/uuid/dist/esm/v5.js
|
|
139
|
+
function v5(value, namespace, buf, offset) {
|
|
140
|
+
return v35(80, sha1_default, value, namespace, buf, offset);
|
|
141
|
+
}
|
|
142
|
+
v5.DNS = DNS;
|
|
143
|
+
v5.URL = URL2;
|
|
144
|
+
var v5_default = v5;
|
|
15
145
|
|
|
16
146
|
// src/types.ts
|
|
17
147
|
import z from "zod";
|
|
@@ -42,6 +172,8 @@ var ModelConfigSchema = z.object({
|
|
|
42
172
|
// For OpenAI: Only applies to text-embedding-3-small and text-embedding-3-large models
|
|
43
173
|
// Default: 1536 dimensions
|
|
44
174
|
EMBEDDING_DIMENSION: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 1536),
|
|
175
|
+
// config setting
|
|
176
|
+
LOAD_DOCS_ON_STARTUP: z.boolean().default(false),
|
|
45
177
|
// Contextual Knowledge settings
|
|
46
178
|
CTX_KNOWLEDGE_ENABLED: z.boolean().default(false)
|
|
47
179
|
});
|
|
@@ -52,26 +184,35 @@ var KnowledgeServiceType = {
|
|
|
52
184
|
// src/config.ts
|
|
53
185
|
import z2 from "zod";
|
|
54
186
|
import { logger } from "@elizaos/core";
|
|
187
|
+
var parseBooleanEnv = (value) => {
|
|
188
|
+
if (typeof value === "boolean") return value;
|
|
189
|
+
if (typeof value === "string") return value.toLowerCase() === "true";
|
|
190
|
+
return false;
|
|
191
|
+
};
|
|
55
192
|
function validateModelConfig(runtime) {
|
|
56
193
|
try {
|
|
57
194
|
const getSetting = (key, defaultValue) => {
|
|
58
195
|
if (runtime) {
|
|
59
|
-
return runtime.getSetting(key) || defaultValue;
|
|
196
|
+
return runtime.getSetting(key) || process.env[key] || defaultValue;
|
|
60
197
|
}
|
|
61
198
|
return process.env[key] || defaultValue;
|
|
62
199
|
};
|
|
63
|
-
const
|
|
64
|
-
logger.debug(
|
|
200
|
+
const ctxKnowledgeEnabled = parseBooleanEnv(getSetting("CTX_KNOWLEDGE_ENABLED", "false"));
|
|
201
|
+
logger.debug(
|
|
202
|
+
`[Document Processor] CTX_KNOWLEDGE_ENABLED: '${ctxKnowledgeEnabled} (runtime: ${!!runtime})`
|
|
203
|
+
);
|
|
65
204
|
const embeddingProvider = getSetting("EMBEDDING_PROVIDER");
|
|
66
205
|
const assumePluginOpenAI = !embeddingProvider;
|
|
67
206
|
if (assumePluginOpenAI) {
|
|
68
207
|
const openaiApiKey2 = getSetting("OPENAI_API_KEY");
|
|
69
208
|
const openaiEmbeddingModel = getSetting("OPENAI_EMBEDDING_MODEL");
|
|
70
209
|
if (openaiApiKey2 && openaiEmbeddingModel) {
|
|
71
|
-
logger.debug(
|
|
210
|
+
logger.debug(
|
|
211
|
+
"[Document Processor] EMBEDDING_PROVIDER not specified, using configuration from plugin-openai"
|
|
212
|
+
);
|
|
72
213
|
} else {
|
|
73
214
|
logger.debug(
|
|
74
|
-
"EMBEDDING_PROVIDER not specified. Assuming embeddings are provided by another plugin (e.g., plugin-google-genai)."
|
|
215
|
+
"[Document Processor] EMBEDDING_PROVIDER not specified. Assuming embeddings are provided by another plugin (e.g., plugin-google-genai)."
|
|
75
216
|
);
|
|
76
217
|
}
|
|
77
218
|
}
|
|
@@ -95,7 +236,8 @@ function validateModelConfig(runtime) {
|
|
|
95
236
|
MAX_INPUT_TOKENS: getSetting("MAX_INPUT_TOKENS", "4000"),
|
|
96
237
|
MAX_OUTPUT_TOKENS: getSetting("MAX_OUTPUT_TOKENS", "4096"),
|
|
97
238
|
EMBEDDING_DIMENSION: embeddingDimension,
|
|
98
|
-
|
|
239
|
+
LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")),
|
|
240
|
+
CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled
|
|
99
241
|
});
|
|
100
242
|
validateConfigRequirements(config, assumePluginOpenAI);
|
|
101
243
|
return config;
|
|
@@ -116,13 +258,15 @@ function validateConfigRequirements(config, assumePluginOpenAI) {
|
|
|
116
258
|
throw new Error('GOOGLE_API_KEY is required when EMBEDDING_PROVIDER is set to "google"');
|
|
117
259
|
}
|
|
118
260
|
if (!embeddingProvider) {
|
|
119
|
-
logger.debug(
|
|
261
|
+
logger.debug(
|
|
262
|
+
"[Document Processor] No EMBEDDING_PROVIDER specified. Embeddings will be handled by the runtime."
|
|
263
|
+
);
|
|
120
264
|
}
|
|
121
265
|
if (assumePluginOpenAI && config.OPENAI_API_KEY && !config.TEXT_EMBEDDING_MODEL) {
|
|
122
266
|
throw new Error("OPENAI_EMBEDDING_MODEL is required when using plugin-openai configuration");
|
|
123
267
|
}
|
|
124
268
|
if (config.CTX_KNOWLEDGE_ENABLED) {
|
|
125
|
-
logger.debug("
|
|
269
|
+
logger.debug("[Document Processor] CTX validation: Checking text generation settings...");
|
|
126
270
|
if (config.TEXT_PROVIDER === "openai" && !config.OPENAI_API_KEY) {
|
|
127
271
|
throw new Error('OPENAI_API_KEY is required when TEXT_PROVIDER is set to "openai"');
|
|
128
272
|
}
|
|
@@ -139,17 +283,21 @@ function validateConfigRequirements(config, assumePluginOpenAI) {
|
|
|
139
283
|
const modelName = config.TEXT_MODEL?.toLowerCase() || "";
|
|
140
284
|
if (modelName.includes("claude") || modelName.includes("gemini")) {
|
|
141
285
|
logger.debug(
|
|
142
|
-
`Using ${modelName} with OpenRouter. This configuration supports document caching for improved performance.`
|
|
286
|
+
`[Document Processor] Using ${modelName} with OpenRouter. This configuration supports document caching for improved performance.`
|
|
143
287
|
);
|
|
144
288
|
}
|
|
145
289
|
}
|
|
146
290
|
} else {
|
|
291
|
+
logger.info("[Document Processor] Contextual Knowledge is DISABLED!");
|
|
292
|
+
logger.info("[Document Processor] This means documents will NOT be enriched with context.");
|
|
147
293
|
if (assumePluginOpenAI) {
|
|
148
|
-
logger.
|
|
149
|
-
"
|
|
294
|
+
logger.info(
|
|
295
|
+
"[Document Processor] Embeddings will be handled by the runtime (e.g., plugin-openai, plugin-google-genai)."
|
|
150
296
|
);
|
|
151
297
|
} else {
|
|
152
|
-
logger.
|
|
298
|
+
logger.info(
|
|
299
|
+
"[Document Processor] Using configured embedding provider for basic embeddings only."
|
|
300
|
+
);
|
|
153
301
|
}
|
|
154
302
|
}
|
|
155
303
|
}
|
|
@@ -164,7 +312,18 @@ async function getProviderRateLimits(runtime) {
|
|
|
164
312
|
const maxConcurrentRequests = parseInt(getSetting("MAX_CONCURRENT_REQUESTS", "30"), 10);
|
|
165
313
|
const requestsPerMinute = parseInt(getSetting("REQUESTS_PER_MINUTE", "60"), 10);
|
|
166
314
|
const tokensPerMinute = parseInt(getSetting("TOKENS_PER_MINUTE", "150000"), 10);
|
|
167
|
-
|
|
315
|
+
const primaryProvider = config.TEXT_PROVIDER || config.EMBEDDING_PROVIDER;
|
|
316
|
+
logger.debug(
|
|
317
|
+
`[Document Processor] Rate limiting for ${primaryProvider}: ${requestsPerMinute} RPM, ${tokensPerMinute} TPM, ${maxConcurrentRequests} concurrent`
|
|
318
|
+
);
|
|
319
|
+
switch (primaryProvider) {
|
|
320
|
+
case "anthropic":
|
|
321
|
+
return {
|
|
322
|
+
maxConcurrentRequests,
|
|
323
|
+
requestsPerMinute,
|
|
324
|
+
tokensPerMinute,
|
|
325
|
+
provider: "anthropic"
|
|
326
|
+
};
|
|
168
327
|
case "openai":
|
|
169
328
|
return {
|
|
170
329
|
maxConcurrentRequests,
|
|
@@ -184,30 +343,11 @@ async function getProviderRateLimits(runtime) {
|
|
|
184
343
|
maxConcurrentRequests,
|
|
185
344
|
requestsPerMinute,
|
|
186
345
|
tokensPerMinute,
|
|
187
|
-
provider:
|
|
346
|
+
provider: primaryProvider || "unknown"
|
|
188
347
|
};
|
|
189
348
|
}
|
|
190
349
|
}
|
|
191
350
|
|
|
192
|
-
// src/service.ts
|
|
193
|
-
import {
|
|
194
|
-
createUniqueUuid,
|
|
195
|
-
logger as logger4,
|
|
196
|
-
MemoryType as MemoryType2,
|
|
197
|
-
ModelType as ModelType2,
|
|
198
|
-
Semaphore,
|
|
199
|
-
Service,
|
|
200
|
-
splitChunks as splitChunks2
|
|
201
|
-
} from "@elizaos/core";
|
|
202
|
-
|
|
203
|
-
// src/document-processor.ts
|
|
204
|
-
import {
|
|
205
|
-
MemoryType,
|
|
206
|
-
ModelType,
|
|
207
|
-
logger as logger3,
|
|
208
|
-
splitChunks
|
|
209
|
-
} from "@elizaos/core";
|
|
210
|
-
|
|
211
351
|
// src/ctx-embeddings.ts
|
|
212
352
|
var DEFAULT_CHUNK_TOKEN_SIZE = 500;
|
|
213
353
|
var DEFAULT_CHUNK_OVERLAP_TOKENS = 100;
|
|
@@ -405,9 +545,7 @@ Create an enriched version of this chunk by adding critical surrounding context.
|
|
|
405
545
|
Provide ONLY the enriched chunk text in your response:`;
|
|
406
546
|
function getContextualizationPrompt(docContent, chunkContent, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS, promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE) {
|
|
407
547
|
if (!docContent || !chunkContent) {
|
|
408
|
-
console.warn(
|
|
409
|
-
"Document content or chunk content is missing for contextualization."
|
|
410
|
-
);
|
|
548
|
+
console.warn("Document content or chunk content is missing for contextualization.");
|
|
411
549
|
return "Error: Document or chunk content missing.";
|
|
412
550
|
}
|
|
413
551
|
const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN);
|
|
@@ -478,15 +616,8 @@ function getPromptForMimeType(mimeType, docContent, chunkContent) {
|
|
|
478
616
|
minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
|
|
479
617
|
maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
|
|
480
618
|
promptTemplate = TECHNICAL_PROMPT_TEMPLATE;
|
|
481
|
-
console.debug("Using technical documentation prompt template");
|
|
482
619
|
}
|
|
483
|
-
return getContextualizationPrompt(
|
|
484
|
-
docContent,
|
|
485
|
-
chunkContent,
|
|
486
|
-
minTokens,
|
|
487
|
-
maxTokens,
|
|
488
|
-
promptTemplate
|
|
489
|
-
);
|
|
620
|
+
return getContextualizationPrompt(docContent, chunkContent, minTokens, maxTokens, promptTemplate);
|
|
490
621
|
}
|
|
491
622
|
function getCachingPromptForMimeType(mimeType, chunkContent) {
|
|
492
623
|
let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS;
|
|
@@ -506,12 +637,7 @@ function getCachingPromptForMimeType(mimeType, chunkContent) {
|
|
|
506
637
|
minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
|
|
507
638
|
maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
|
|
508
639
|
}
|
|
509
|
-
return getCachingContextualizationPrompt(
|
|
510
|
-
chunkContent,
|
|
511
|
-
mimeType,
|
|
512
|
-
minTokens,
|
|
513
|
-
maxTokens
|
|
514
|
-
);
|
|
640
|
+
return getCachingContextualizationPrompt(chunkContent, mimeType, minTokens, maxTokens);
|
|
515
641
|
}
|
|
516
642
|
function containsMathematicalContent(content) {
|
|
517
643
|
const latexMathPatterns = [
|
|
@@ -575,9 +701,7 @@ function containsMathematicalContent(content) {
|
|
|
575
701
|
"coefficient"
|
|
576
702
|
];
|
|
577
703
|
const contentLower = content.toLowerCase();
|
|
578
|
-
const mathKeywordCount = mathKeywords.filter(
|
|
579
|
-
(keyword) => contentLower.includes(keyword)
|
|
580
|
-
).length;
|
|
704
|
+
const mathKeywordCount = mathKeywords.filter((keyword) => contentLower.includes(keyword)).length;
|
|
581
705
|
return mathKeywordCount >= 2;
|
|
582
706
|
}
|
|
583
707
|
function isTechnicalDocumentation(content) {
|
|
@@ -626,9 +750,7 @@ function isTechnicalDocumentation(content) {
|
|
|
626
750
|
}
|
|
627
751
|
function getChunkWithContext(chunkContent, generatedContext) {
|
|
628
752
|
if (!generatedContext || generatedContext.trim() === "") {
|
|
629
|
-
console.warn(
|
|
630
|
-
"Generated context is empty. Falling back to original chunk content."
|
|
631
|
-
);
|
|
753
|
+
console.warn("Generated context is empty. Falling back to original chunk content.");
|
|
632
754
|
return chunkContent;
|
|
633
755
|
}
|
|
634
756
|
return generatedContext.trim();
|
|
@@ -641,8 +763,8 @@ import { createAnthropic } from "@ai-sdk/anthropic";
|
|
|
641
763
|
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
|
|
642
764
|
import { google } from "@ai-sdk/google";
|
|
643
765
|
import { logger as logger2 } from "@elizaos/core";
|
|
644
|
-
async function generateText(prompt, system, overrideConfig) {
|
|
645
|
-
const config = validateModelConfig();
|
|
766
|
+
async function generateText(runtime, prompt, system, overrideConfig) {
|
|
767
|
+
const config = validateModelConfig(runtime);
|
|
646
768
|
const provider = overrideConfig?.provider || config.TEXT_PROVIDER;
|
|
647
769
|
const modelName = overrideConfig?.modelName || config.TEXT_MODEL;
|
|
648
770
|
const maxTokens = overrideConfig?.maxTokens || config.MAX_OUTPUT_TOKENS;
|
|
@@ -650,11 +772,12 @@ async function generateText(prompt, system, overrideConfig) {
|
|
|
650
772
|
try {
|
|
651
773
|
switch (provider) {
|
|
652
774
|
case "anthropic":
|
|
653
|
-
return await generateAnthropicText(prompt, system, modelName, maxTokens);
|
|
775
|
+
return await generateAnthropicText(config, prompt, system, modelName, maxTokens);
|
|
654
776
|
case "openai":
|
|
655
|
-
return await generateOpenAIText(prompt, system, modelName, maxTokens);
|
|
777
|
+
return await generateOpenAIText(config, prompt, system, modelName, maxTokens);
|
|
656
778
|
case "openrouter":
|
|
657
779
|
return await generateOpenRouterText(
|
|
780
|
+
config,
|
|
658
781
|
prompt,
|
|
659
782
|
system,
|
|
660
783
|
modelName,
|
|
@@ -669,31 +792,47 @@ async function generateText(prompt, system, overrideConfig) {
|
|
|
669
792
|
throw new Error(`Unsupported text provider: ${provider}`);
|
|
670
793
|
}
|
|
671
794
|
} catch (error) {
|
|
672
|
-
logger2.error(`[
|
|
795
|
+
logger2.error(`[Document Processor] ${provider} ${modelName} error:`, error);
|
|
673
796
|
throw error;
|
|
674
797
|
}
|
|
675
798
|
}
|
|
676
|
-
async function generateAnthropicText(prompt, system, modelName, maxTokens) {
|
|
677
|
-
const config = validateModelConfig();
|
|
799
|
+
async function generateAnthropicText(config, prompt, system, modelName, maxTokens) {
|
|
678
800
|
const anthropic = createAnthropic({
|
|
679
801
|
apiKey: config.ANTHROPIC_API_KEY,
|
|
680
802
|
baseURL: config.ANTHROPIC_BASE_URL
|
|
681
803
|
});
|
|
682
804
|
const modelInstance = anthropic(modelName);
|
|
683
|
-
const
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
805
|
+
const maxRetries = 3;
|
|
806
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
807
|
+
try {
|
|
808
|
+
const result = await aiGenerateText({
|
|
809
|
+
model: modelInstance,
|
|
810
|
+
prompt,
|
|
811
|
+
system,
|
|
812
|
+
temperature: 0.3,
|
|
813
|
+
maxTokens
|
|
814
|
+
});
|
|
815
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
816
|
+
logger2.debug(
|
|
817
|
+
`[Document Processor] ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
818
|
+
);
|
|
819
|
+
return result;
|
|
820
|
+
} catch (error) {
|
|
821
|
+
const isRateLimit = error?.status === 429 || error?.message?.includes("rate limit") || error?.message?.includes("429");
|
|
822
|
+
if (isRateLimit && attempt < maxRetries - 1) {
|
|
823
|
+
const delay = Math.pow(2, attempt + 1) * 1e3;
|
|
824
|
+
logger2.warn(
|
|
825
|
+
`[Document Processor] Rate limit hit (${modelName}): attempt ${attempt + 1}/${maxRetries}, retrying in ${Math.round(delay / 1e3)}s`
|
|
826
|
+
);
|
|
827
|
+
await new Promise((resolve2) => setTimeout(resolve2, delay));
|
|
828
|
+
continue;
|
|
829
|
+
}
|
|
830
|
+
throw error;
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
throw new Error("Max retries exceeded for Anthropic text generation");
|
|
694
834
|
}
|
|
695
|
-
async function generateOpenAIText(prompt, system, modelName, maxTokens) {
|
|
696
|
-
const config = validateModelConfig();
|
|
835
|
+
async function generateOpenAIText(config, prompt, system, modelName, maxTokens) {
|
|
697
836
|
const openai = createOpenAI({
|
|
698
837
|
apiKey: config.OPENAI_API_KEY,
|
|
699
838
|
baseURL: config.OPENAI_BASE_URL
|
|
@@ -706,8 +845,9 @@ async function generateOpenAIText(prompt, system, modelName, maxTokens) {
|
|
|
706
845
|
temperature: 0.3,
|
|
707
846
|
maxTokens
|
|
708
847
|
});
|
|
848
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
709
849
|
logger2.debug(
|
|
710
|
-
`[
|
|
850
|
+
`[Document Processor] OpenAI ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
711
851
|
);
|
|
712
852
|
return result;
|
|
713
853
|
}
|
|
@@ -724,13 +864,13 @@ async function generateGoogleText(prompt, system, modelName, maxTokens, config)
|
|
|
724
864
|
temperature: 0.3,
|
|
725
865
|
maxTokens
|
|
726
866
|
});
|
|
867
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
727
868
|
logger2.debug(
|
|
728
|
-
`[
|
|
869
|
+
`[Document Processor] Google ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
729
870
|
);
|
|
730
871
|
return result;
|
|
731
872
|
}
|
|
732
|
-
async function generateOpenRouterText(prompt, system, modelName, maxTokens, cacheDocument, cacheOptions, autoCacheContextualRetrieval = true) {
|
|
733
|
-
const config = validateModelConfig();
|
|
873
|
+
async function generateOpenRouterText(config, prompt, system, modelName, maxTokens, cacheDocument, cacheOptions, autoCacheContextualRetrieval = true) {
|
|
734
874
|
const openrouter = createOpenRouter({
|
|
735
875
|
apiKey: config.OPENROUTER_API_KEY,
|
|
736
876
|
baseURL: config.OPENROUTER_BASE_URL
|
|
@@ -746,7 +886,7 @@ async function generateOpenRouterText(prompt, system, modelName, maxTokens, cach
|
|
|
746
886
|
if (docMatch && docMatch[1]) {
|
|
747
887
|
documentForCaching = docMatch[1].trim();
|
|
748
888
|
logger2.debug(
|
|
749
|
-
`[
|
|
889
|
+
`[Document Processor] Auto-detected document for caching (${documentForCaching.length} chars)`
|
|
750
890
|
);
|
|
751
891
|
}
|
|
752
892
|
}
|
|
@@ -777,13 +917,11 @@ async function generateOpenRouterText(prompt, system, modelName, maxTokens, cach
|
|
|
777
917
|
);
|
|
778
918
|
}
|
|
779
919
|
}
|
|
780
|
-
logger2.debug("[
|
|
920
|
+
logger2.debug("[Document Processor] Using standard request without caching");
|
|
781
921
|
return await generateStandardOpenRouterText(prompt, system, modelInstance, modelName, maxTokens);
|
|
782
922
|
}
|
|
783
923
|
async function generateClaudeWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching) {
|
|
784
|
-
logger2.debug(
|
|
785
|
-
`[LLM Service - OpenRouter] Using explicit prompt caching with Claude model ${modelName}`
|
|
786
|
-
);
|
|
924
|
+
logger2.debug(`[Document Processor] Using explicit prompt caching with Claude ${modelName}`);
|
|
787
925
|
const messages = [
|
|
788
926
|
// System message with cached document (if system is provided)
|
|
789
927
|
system ? {
|
|
@@ -835,7 +973,7 @@ async function generateClaudeWithCaching(promptText, system, modelInstance, mode
|
|
|
835
973
|
]
|
|
836
974
|
} : null
|
|
837
975
|
].filter(Boolean);
|
|
838
|
-
logger2.debug("[
|
|
976
|
+
logger2.debug("[Document Processor] Using Claude-specific caching structure");
|
|
839
977
|
const result = await aiGenerateText({
|
|
840
978
|
model: modelInstance,
|
|
841
979
|
messages,
|
|
@@ -850,8 +988,9 @@ async function generateClaudeWithCaching(promptText, system, modelInstance, mode
|
|
|
850
988
|
}
|
|
851
989
|
});
|
|
852
990
|
logCacheMetrics(result);
|
|
991
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
853
992
|
logger2.debug(
|
|
854
|
-
`[
|
|
993
|
+
`[Document Processor] OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
855
994
|
);
|
|
856
995
|
return result;
|
|
857
996
|
}
|
|
@@ -861,27 +1000,23 @@ async function generateGeminiWithCaching(promptText, system, modelInstance, mode
|
|
|
861
1000
|
const minTokensForImplicitCache = modelName.toLowerCase().includes("flash") ? 1028 : 2048;
|
|
862
1001
|
const likelyTriggersCaching = estimatedDocTokens >= minTokensForImplicitCache;
|
|
863
1002
|
if (usingImplicitCaching) {
|
|
1003
|
+
logger2.debug(`[Document Processor] Using Gemini 2.5 implicit caching with ${modelName}`);
|
|
864
1004
|
logger2.debug(
|
|
865
|
-
`[
|
|
866
|
-
);
|
|
867
|
-
logger2.debug(
|
|
868
|
-
`[LLM Service - OpenRouter] Gemini 2.5 models automatically cache large prompts (no cache_control needed)`
|
|
1005
|
+
`[Document Processor] Gemini 2.5 models automatically cache large prompts (no cache_control needed)`
|
|
869
1006
|
);
|
|
870
1007
|
if (likelyTriggersCaching) {
|
|
871
1008
|
logger2.debug(
|
|
872
|
-
`[
|
|
1009
|
+
`[Document Processor] Document ~${estimatedDocTokens} tokens exceeds ${minTokensForImplicitCache} token threshold for caching`
|
|
873
1010
|
);
|
|
874
1011
|
} else {
|
|
875
1012
|
logger2.debug(
|
|
876
|
-
`[
|
|
1013
|
+
`[Document Processor] Document ~${estimatedDocTokens} tokens may not meet ${minTokensForImplicitCache} token threshold for caching`
|
|
877
1014
|
);
|
|
878
1015
|
}
|
|
879
1016
|
} else {
|
|
1017
|
+
logger2.debug(`[Document Processor] Using standard prompt format with Gemini ${modelName}`);
|
|
880
1018
|
logger2.debug(
|
|
881
|
-
`[
|
|
882
|
-
);
|
|
883
|
-
logger2.debug(
|
|
884
|
-
`[LLM Service - OpenRouter] Note: Only Gemini 2.5 models support automatic implicit caching`
|
|
1019
|
+
`[Document Processor] Note: Only Gemini 2.5 models support automatic implicit caching`
|
|
885
1020
|
);
|
|
886
1021
|
}
|
|
887
1022
|
const geminiSystemPrefix = system ? `${system}
|
|
@@ -905,8 +1040,10 @@ ${promptText}`;
|
|
|
905
1040
|
}
|
|
906
1041
|
});
|
|
907
1042
|
logCacheMetrics(result);
|
|
1043
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
1044
|
+
const cachingType = usingImplicitCaching ? "implicit" : "standard";
|
|
908
1045
|
logger2.debug(
|
|
909
|
-
`[
|
|
1046
|
+
`[Document Processor] OpenRouter ${modelName} (${cachingType} caching): ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
910
1047
|
);
|
|
911
1048
|
return result;
|
|
912
1049
|
}
|
|
@@ -926,21 +1063,397 @@ async function generateStandardOpenRouterText(prompt, system, modelInstance, mod
|
|
|
926
1063
|
}
|
|
927
1064
|
}
|
|
928
1065
|
});
|
|
1066
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
929
1067
|
logger2.debug(
|
|
930
|
-
`[
|
|
1068
|
+
`[Document Processor] OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
931
1069
|
);
|
|
932
1070
|
return result;
|
|
933
1071
|
}
|
|
934
1072
|
function logCacheMetrics(result) {
|
|
935
1073
|
if (result.usage && result.usage.cacheTokens) {
|
|
936
1074
|
logger2.debug(
|
|
937
|
-
`[
|
|
1075
|
+
`[Document Processor] Cache metrics - tokens: ${result.usage.cacheTokens}, discount: ${result.usage.cacheDiscount}`
|
|
1076
|
+
);
|
|
1077
|
+
}
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
// src/utils.ts
|
|
1081
|
+
import { Buffer as Buffer2 } from "buffer";
|
|
1082
|
+
import * as mammoth from "mammoth";
|
|
1083
|
+
import { logger as logger3 } from "@elizaos/core";
|
|
1084
|
+
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
1085
|
+
import { createHash as createHash2 } from "crypto";
|
|
1086
|
+
var PLAIN_TEXT_CONTENT_TYPES = [
|
|
1087
|
+
"application/typescript",
|
|
1088
|
+
"text/typescript",
|
|
1089
|
+
"text/x-python",
|
|
1090
|
+
"application/x-python-code",
|
|
1091
|
+
"application/yaml",
|
|
1092
|
+
"text/yaml",
|
|
1093
|
+
"application/x-yaml",
|
|
1094
|
+
"application/json",
|
|
1095
|
+
"text/markdown",
|
|
1096
|
+
"text/csv"
|
|
1097
|
+
];
|
|
1098
|
+
var MAX_FALLBACK_SIZE_BYTES = 5 * 1024 * 1024;
|
|
1099
|
+
var BINARY_CHECK_BYTES = 1024;
|
|
1100
|
+
async function extractTextFromFileBuffer(fileBuffer, contentType, originalFilename) {
|
|
1101
|
+
const lowerContentType = contentType.toLowerCase();
|
|
1102
|
+
logger3.debug(
|
|
1103
|
+
`[TextUtil] Attempting to extract text from ${originalFilename} (type: ${contentType})`
|
|
1104
|
+
);
|
|
1105
|
+
if (lowerContentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
|
|
1106
|
+
logger3.debug(`[TextUtil] Extracting text from DOCX ${originalFilename} via mammoth.`);
|
|
1107
|
+
try {
|
|
1108
|
+
const result = await mammoth.extractRawText({ buffer: fileBuffer });
|
|
1109
|
+
logger3.debug(
|
|
1110
|
+
`[TextUtil] DOCX text extraction complete for ${originalFilename}. Text length: ${result.value.length}`
|
|
1111
|
+
);
|
|
1112
|
+
return result.value;
|
|
1113
|
+
} catch (docxError) {
|
|
1114
|
+
const errorMsg = `[TextUtil] Failed to parse DOCX file ${originalFilename}: ${docxError.message}`;
|
|
1115
|
+
logger3.error(errorMsg, docxError.stack);
|
|
1116
|
+
throw new Error(errorMsg);
|
|
1117
|
+
}
|
|
1118
|
+
} else if (lowerContentType === "application/msword" || originalFilename.toLowerCase().endsWith(".doc")) {
|
|
1119
|
+
logger3.debug(`[TextUtil] Handling Microsoft Word .doc file: ${originalFilename}`);
|
|
1120
|
+
return `[Microsoft Word Document: ${originalFilename}]
|
|
1121
|
+
|
|
1122
|
+
This document was indexed for search but cannot be displayed directly in the browser. The original document content is preserved for retrieval purposes.`;
|
|
1123
|
+
} else if (lowerContentType.startsWith("text/") || PLAIN_TEXT_CONTENT_TYPES.includes(lowerContentType)) {
|
|
1124
|
+
logger3.debug(
|
|
1125
|
+
`[TextUtil] Extracting text from plain text compatible file ${originalFilename} (type: ${contentType})`
|
|
1126
|
+
);
|
|
1127
|
+
return fileBuffer.toString("utf-8");
|
|
1128
|
+
} else {
|
|
1129
|
+
logger3.warn(
|
|
1130
|
+
`[TextUtil] Unsupported content type: "${contentType}" for ${originalFilename}. Attempting fallback to plain text.`
|
|
1131
|
+
);
|
|
1132
|
+
if (fileBuffer.length > MAX_FALLBACK_SIZE_BYTES) {
|
|
1133
|
+
const sizeErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) exceeds maximum size for fallback (${MAX_FALLBACK_SIZE_BYTES} bytes). Cannot process as plain text.`;
|
|
1134
|
+
logger3.error(sizeErrorMsg);
|
|
1135
|
+
throw new Error(sizeErrorMsg);
|
|
1136
|
+
}
|
|
1137
|
+
const initialBytes = fileBuffer.subarray(0, Math.min(fileBuffer.length, BINARY_CHECK_BYTES));
|
|
1138
|
+
if (initialBytes.includes(0)) {
|
|
1139
|
+
const binaryHeuristicMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) appears to be binary based on initial byte check. Cannot process as plain text.`;
|
|
1140
|
+
logger3.error(binaryHeuristicMsg);
|
|
1141
|
+
throw new Error(binaryHeuristicMsg);
|
|
1142
|
+
}
|
|
1143
|
+
try {
|
|
1144
|
+
const textContent = fileBuffer.toString("utf-8");
|
|
1145
|
+
if (textContent.includes("\uFFFD")) {
|
|
1146
|
+
const binaryErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) seems to be binary or has encoding issues after fallback to plain text (detected \uFFFD).`;
|
|
1147
|
+
logger3.error(binaryErrorMsg);
|
|
1148
|
+
throw new Error(binaryErrorMsg);
|
|
1149
|
+
}
|
|
1150
|
+
logger3.debug(
|
|
1151
|
+
`[TextUtil] Successfully processed unknown type ${contentType} as plain text after fallback for ${originalFilename}.`
|
|
1152
|
+
);
|
|
1153
|
+
return textContent;
|
|
1154
|
+
} catch (fallbackError) {
|
|
1155
|
+
const finalErrorMsg = `[TextUtil] Unsupported content type: ${contentType} for ${originalFilename}. Fallback to plain text also failed or indicated binary content.`;
|
|
1156
|
+
logger3.error(finalErrorMsg, fallbackError.message ? fallbackError.stack : void 0);
|
|
1157
|
+
throw new Error(finalErrorMsg);
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
async function convertPdfToTextFromBuffer(pdfBuffer, filename) {
|
|
1162
|
+
const docName = filename || "unnamed-document";
|
|
1163
|
+
logger3.debug(`[PdfService] Starting conversion for ${docName}`);
|
|
1164
|
+
try {
|
|
1165
|
+
const uint8Array = new Uint8Array(pdfBuffer);
|
|
1166
|
+
const pdf = await getDocument({ data: uint8Array }).promise;
|
|
1167
|
+
const numPages = pdf.numPages;
|
|
1168
|
+
const textPages = [];
|
|
1169
|
+
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
|
1170
|
+
logger3.debug(`[PdfService] Processing page ${pageNum}/${numPages}`);
|
|
1171
|
+
const page = await pdf.getPage(pageNum);
|
|
1172
|
+
const textContent = await page.getTextContent();
|
|
1173
|
+
const lineMap = /* @__PURE__ */ new Map();
|
|
1174
|
+
textContent.items.filter(isTextItem).forEach((item) => {
|
|
1175
|
+
const yPos = Math.round(item.transform[5]);
|
|
1176
|
+
if (!lineMap.has(yPos)) {
|
|
1177
|
+
lineMap.set(yPos, []);
|
|
1178
|
+
}
|
|
1179
|
+
lineMap.get(yPos).push(item);
|
|
1180
|
+
});
|
|
1181
|
+
const sortedLines = Array.from(lineMap.entries()).sort((a, b) => b[0] - a[0]).map(
|
|
1182
|
+
([_, items]) => items.sort((a, b) => a.transform[4] - b.transform[4]).map((item) => item.str).join(" ")
|
|
1183
|
+
);
|
|
1184
|
+
textPages.push(sortedLines.join("\n"));
|
|
1185
|
+
}
|
|
1186
|
+
const fullText = textPages.join("\n\n").replace(/\s+/g, " ").trim();
|
|
1187
|
+
logger3.debug(`[PdfService] Conversion complete for ${docName}, length: ${fullText.length}`);
|
|
1188
|
+
return fullText;
|
|
1189
|
+
} catch (error) {
|
|
1190
|
+
logger3.error(`[PdfService] Error converting PDF ${docName}:`, error.message);
|
|
1191
|
+
throw new Error(`Failed to convert PDF to text: ${error.message}`);
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
function isBinaryContentType(contentType, filename) {
|
|
1195
|
+
const textContentTypes = [
|
|
1196
|
+
"text/",
|
|
1197
|
+
"application/json",
|
|
1198
|
+
"application/xml",
|
|
1199
|
+
"application/javascript",
|
|
1200
|
+
"application/typescript",
|
|
1201
|
+
"application/x-yaml",
|
|
1202
|
+
"application/x-sh"
|
|
1203
|
+
];
|
|
1204
|
+
const isTextMimeType = textContentTypes.some((type) => contentType.includes(type));
|
|
1205
|
+
if (isTextMimeType) {
|
|
1206
|
+
return false;
|
|
1207
|
+
}
|
|
1208
|
+
const binaryContentTypes = [
|
|
1209
|
+
"application/pdf",
|
|
1210
|
+
"application/msword",
|
|
1211
|
+
"application/vnd.openxmlformats-officedocument",
|
|
1212
|
+
"application/vnd.ms-excel",
|
|
1213
|
+
"application/vnd.ms-powerpoint",
|
|
1214
|
+
"application/zip",
|
|
1215
|
+
"application/x-zip-compressed",
|
|
1216
|
+
"application/octet-stream",
|
|
1217
|
+
"image/",
|
|
1218
|
+
"audio/",
|
|
1219
|
+
"video/"
|
|
1220
|
+
];
|
|
1221
|
+
const isBinaryMimeType = binaryContentTypes.some((type) => contentType.includes(type));
|
|
1222
|
+
if (isBinaryMimeType) {
|
|
1223
|
+
return true;
|
|
1224
|
+
}
|
|
1225
|
+
const fileExt = filename.split(".").pop()?.toLowerCase() || "";
|
|
1226
|
+
const textExtensions = [
|
|
1227
|
+
"txt",
|
|
1228
|
+
"md",
|
|
1229
|
+
"markdown",
|
|
1230
|
+
"json",
|
|
1231
|
+
"xml",
|
|
1232
|
+
"html",
|
|
1233
|
+
"htm",
|
|
1234
|
+
"css",
|
|
1235
|
+
"js",
|
|
1236
|
+
"ts",
|
|
1237
|
+
"jsx",
|
|
1238
|
+
"tsx",
|
|
1239
|
+
"yaml",
|
|
1240
|
+
"yml",
|
|
1241
|
+
"toml",
|
|
1242
|
+
"ini",
|
|
1243
|
+
"cfg",
|
|
1244
|
+
"conf",
|
|
1245
|
+
"sh",
|
|
1246
|
+
"bash",
|
|
1247
|
+
"zsh",
|
|
1248
|
+
"fish",
|
|
1249
|
+
"py",
|
|
1250
|
+
"rb",
|
|
1251
|
+
"go",
|
|
1252
|
+
"rs",
|
|
1253
|
+
"java",
|
|
1254
|
+
"c",
|
|
1255
|
+
"cpp",
|
|
1256
|
+
"h",
|
|
1257
|
+
"hpp",
|
|
1258
|
+
"cs",
|
|
1259
|
+
"php",
|
|
1260
|
+
"sql",
|
|
1261
|
+
"r",
|
|
1262
|
+
"swift",
|
|
1263
|
+
"kt",
|
|
1264
|
+
"scala",
|
|
1265
|
+
"clj",
|
|
1266
|
+
"ex",
|
|
1267
|
+
"exs",
|
|
1268
|
+
"vim",
|
|
1269
|
+
"env",
|
|
1270
|
+
"gitignore",
|
|
1271
|
+
"dockerignore",
|
|
1272
|
+
"editorconfig",
|
|
1273
|
+
"log",
|
|
1274
|
+
"csv",
|
|
1275
|
+
"tsv",
|
|
1276
|
+
"properties",
|
|
1277
|
+
"gradle",
|
|
1278
|
+
"sbt",
|
|
1279
|
+
"makefile",
|
|
1280
|
+
"dockerfile",
|
|
1281
|
+
"vagrantfile",
|
|
1282
|
+
"gemfile",
|
|
1283
|
+
"rakefile",
|
|
1284
|
+
"podfile",
|
|
1285
|
+
"csproj",
|
|
1286
|
+
"vbproj",
|
|
1287
|
+
"fsproj",
|
|
1288
|
+
"sln",
|
|
1289
|
+
"pom"
|
|
1290
|
+
];
|
|
1291
|
+
if (textExtensions.includes(fileExt)) {
|
|
1292
|
+
return false;
|
|
1293
|
+
}
|
|
1294
|
+
const binaryExtensions = [
|
|
1295
|
+
"pdf",
|
|
1296
|
+
"docx",
|
|
1297
|
+
"doc",
|
|
1298
|
+
"xls",
|
|
1299
|
+
"xlsx",
|
|
1300
|
+
"ppt",
|
|
1301
|
+
"pptx",
|
|
1302
|
+
"zip",
|
|
1303
|
+
"rar",
|
|
1304
|
+
"7z",
|
|
1305
|
+
"tar",
|
|
1306
|
+
"gz",
|
|
1307
|
+
"bz2",
|
|
1308
|
+
"xz",
|
|
1309
|
+
"jpg",
|
|
1310
|
+
"jpeg",
|
|
1311
|
+
"png",
|
|
1312
|
+
"gif",
|
|
1313
|
+
"bmp",
|
|
1314
|
+
"svg",
|
|
1315
|
+
"ico",
|
|
1316
|
+
"webp",
|
|
1317
|
+
"mp3",
|
|
1318
|
+
"mp4",
|
|
1319
|
+
"avi",
|
|
1320
|
+
"mov",
|
|
1321
|
+
"wmv",
|
|
1322
|
+
"flv",
|
|
1323
|
+
"wav",
|
|
1324
|
+
"flac",
|
|
1325
|
+
"ogg",
|
|
1326
|
+
"exe",
|
|
1327
|
+
"dll",
|
|
1328
|
+
"so",
|
|
1329
|
+
"dylib",
|
|
1330
|
+
"bin",
|
|
1331
|
+
"dat",
|
|
1332
|
+
"db",
|
|
1333
|
+
"sqlite"
|
|
1334
|
+
];
|
|
1335
|
+
return binaryExtensions.includes(fileExt);
|
|
1336
|
+
}
|
|
1337
|
+
function isTextItem(item) {
|
|
1338
|
+
return "str" in item;
|
|
1339
|
+
}
|
|
1340
|
+
function normalizeS3Url(url) {
|
|
1341
|
+
try {
|
|
1342
|
+
const urlObj = new URL(url);
|
|
1343
|
+
return `${urlObj.origin}${urlObj.pathname}`;
|
|
1344
|
+
} catch (error) {
|
|
1345
|
+
logger3.warn(`[URL NORMALIZER] Failed to parse URL: ${url}. Returning original.`);
|
|
1346
|
+
return url;
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
async function fetchUrlContent(url) {
|
|
1350
|
+
logger3.debug(`[URL FETCHER] Fetching content from URL: ${url}`);
|
|
1351
|
+
try {
|
|
1352
|
+
const controller = new AbortController();
|
|
1353
|
+
const timeoutId = setTimeout(() => controller.abort(), 3e4);
|
|
1354
|
+
const response = await fetch(url, {
|
|
1355
|
+
signal: controller.signal,
|
|
1356
|
+
headers: {
|
|
1357
|
+
"User-Agent": "Eliza-Knowledge-Plugin/1.0"
|
|
1358
|
+
}
|
|
1359
|
+
});
|
|
1360
|
+
clearTimeout(timeoutId);
|
|
1361
|
+
if (!response.ok) {
|
|
1362
|
+
throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`);
|
|
1363
|
+
}
|
|
1364
|
+
const contentType = response.headers.get("content-type") || "application/octet-stream";
|
|
1365
|
+
logger3.debug(`[URL FETCHER] Content type from server: ${contentType} for URL: ${url}`);
|
|
1366
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
1367
|
+
const buffer = Buffer2.from(arrayBuffer);
|
|
1368
|
+
const base64Content = buffer.toString("base64");
|
|
1369
|
+
logger3.debug(
|
|
1370
|
+
`[URL FETCHER] Successfully fetched content from URL: ${url} (${buffer.length} bytes)`
|
|
938
1371
|
);
|
|
1372
|
+
return {
|
|
1373
|
+
content: base64Content,
|
|
1374
|
+
contentType
|
|
1375
|
+
};
|
|
1376
|
+
} catch (error) {
|
|
1377
|
+
logger3.error(`[URL FETCHER] Error fetching content from URL ${url}: ${error.message}`);
|
|
1378
|
+
throw new Error(`Failed to fetch content from URL: ${error.message}`);
|
|
1379
|
+
}
|
|
1380
|
+
}
|
|
1381
|
+
function looksLikeBase64(content) {
|
|
1382
|
+
if (!content || content.length === 0) return false;
|
|
1383
|
+
const cleanContent = content.replace(/\s/g, "");
|
|
1384
|
+
if (cleanContent.length < 16) return false;
|
|
1385
|
+
if (cleanContent.length % 4 !== 0) return false;
|
|
1386
|
+
const base64Regex = /^[A-Za-z0-9+/]*={0,2}$/;
|
|
1387
|
+
if (!base64Regex.test(cleanContent)) return false;
|
|
1388
|
+
const hasNumbers = /\d/.test(cleanContent);
|
|
1389
|
+
const hasUpperCase = /[A-Z]/.test(cleanContent);
|
|
1390
|
+
const hasLowerCase = /[a-z]/.test(cleanContent);
|
|
1391
|
+
return (hasNumbers || hasUpperCase) && hasLowerCase;
|
|
1392
|
+
}
|
|
1393
|
+
function generateContentBasedId(content, agentId, options) {
|
|
1394
|
+
const {
|
|
1395
|
+
maxChars = 2e3,
|
|
1396
|
+
// Use first 2000 chars by default
|
|
1397
|
+
includeFilename,
|
|
1398
|
+
contentType
|
|
1399
|
+
} = options || {};
|
|
1400
|
+
let contentForHashing;
|
|
1401
|
+
if (looksLikeBase64(content)) {
|
|
1402
|
+
try {
|
|
1403
|
+
const decoded = Buffer2.from(content, "base64").toString("utf8");
|
|
1404
|
+
if (!decoded.includes("\uFFFD") || contentType?.includes("pdf")) {
|
|
1405
|
+
contentForHashing = content.slice(0, maxChars);
|
|
1406
|
+
} else {
|
|
1407
|
+
contentForHashing = decoded.slice(0, maxChars);
|
|
1408
|
+
}
|
|
1409
|
+
} catch {
|
|
1410
|
+
contentForHashing = content.slice(0, maxChars);
|
|
1411
|
+
}
|
|
1412
|
+
} else {
|
|
1413
|
+
contentForHashing = content.slice(0, maxChars);
|
|
939
1414
|
}
|
|
1415
|
+
contentForHashing = contentForHashing.replace(/\r\n/g, "\n").replace(/\r/g, "\n").trim();
|
|
1416
|
+
const componentsToHash = [
|
|
1417
|
+
agentId,
|
|
1418
|
+
// Namespace by agent
|
|
1419
|
+
contentForHashing,
|
|
1420
|
+
// The actual content
|
|
1421
|
+
includeFilename || ""
|
|
1422
|
+
// Optional filename for additional uniqueness
|
|
1423
|
+
].filter(Boolean).join("::");
|
|
1424
|
+
const hash = createHash2("sha256").update(componentsToHash).digest("hex");
|
|
1425
|
+
const DOCUMENT_NAMESPACE = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
|
|
1426
|
+
const uuid = v5_default(hash, DOCUMENT_NAMESPACE);
|
|
1427
|
+
logger3.debug(
|
|
1428
|
+
`[generateContentBasedId] Generated UUID ${uuid} for document with content hash ${hash.slice(0, 8)}...`
|
|
1429
|
+
);
|
|
1430
|
+
return uuid;
|
|
940
1431
|
}
|
|
941
1432
|
|
|
942
1433
|
// src/document-processor.ts
|
|
943
|
-
|
|
1434
|
+
function estimateTokens(text) {
|
|
1435
|
+
return Math.ceil(text.length / 4);
|
|
1436
|
+
}
|
|
1437
|
+
function getCtxKnowledgeEnabled(runtime) {
|
|
1438
|
+
let result;
|
|
1439
|
+
let source;
|
|
1440
|
+
let rawValue;
|
|
1441
|
+
if (runtime) {
|
|
1442
|
+
rawValue = runtime.getSetting("CTX_KNOWLEDGE_ENABLED");
|
|
1443
|
+
const cleanValue = rawValue?.toString().trim().toLowerCase();
|
|
1444
|
+
result = cleanValue === "true";
|
|
1445
|
+
source = "runtime.getSetting()";
|
|
1446
|
+
} else {
|
|
1447
|
+
rawValue = process.env.CTX_KNOWLEDGE_ENABLED;
|
|
1448
|
+
const cleanValue = rawValue?.toString().trim().toLowerCase();
|
|
1449
|
+
result = cleanValue === "true";
|
|
1450
|
+
source = "process.env";
|
|
1451
|
+
}
|
|
1452
|
+
if (process.env.NODE_ENV === "development" && rawValue && !result) {
|
|
1453
|
+
logger4.debug(`[Document Processor] CTX config mismatch - ${source}: '${rawValue}' \u2192 ${result}`);
|
|
1454
|
+
}
|
|
1455
|
+
return result;
|
|
1456
|
+
}
|
|
944
1457
|
function shouldUseCustomLLM() {
|
|
945
1458
|
const textProvider = process.env.TEXT_PROVIDER;
|
|
946
1459
|
const textModel = process.env.TEXT_MODEL;
|
|
@@ -961,18 +1474,6 @@ function shouldUseCustomLLM() {
|
|
|
961
1474
|
}
|
|
962
1475
|
}
|
|
963
1476
|
var useCustomLLM = shouldUseCustomLLM();
|
|
964
|
-
if (ctxKnowledgeEnabled) {
|
|
965
|
-
logger3.info(`Document processor starting with Contextual Knowledge ENABLED`);
|
|
966
|
-
if (useCustomLLM) {
|
|
967
|
-
logger3.info(
|
|
968
|
-
`Using Custom LLM with provider: ${process.env.TEXT_PROVIDER}, model: ${process.env.TEXT_MODEL}`
|
|
969
|
-
);
|
|
970
|
-
} else {
|
|
971
|
-
logger3.info(`Using ElizaOS Runtime LLM (default behavior)`);
|
|
972
|
-
}
|
|
973
|
-
} else {
|
|
974
|
-
logger3.info(`Document processor starting with Contextual Knowledge DISABLED`);
|
|
975
|
-
}
|
|
976
1477
|
async function processFragmentsSynchronously({
|
|
977
1478
|
runtime,
|
|
978
1479
|
documentId,
|
|
@@ -981,21 +1482,29 @@ async function processFragmentsSynchronously({
|
|
|
981
1482
|
contentType,
|
|
982
1483
|
roomId,
|
|
983
1484
|
entityId,
|
|
984
|
-
worldId
|
|
1485
|
+
worldId,
|
|
1486
|
+
documentTitle
|
|
985
1487
|
}) {
|
|
986
1488
|
if (!fullDocumentText || fullDocumentText.trim() === "") {
|
|
987
|
-
|
|
1489
|
+
logger4.warn(`No text content available to chunk for document ${documentId}.`);
|
|
988
1490
|
return 0;
|
|
989
1491
|
}
|
|
990
1492
|
const chunks = await splitDocumentIntoChunks(fullDocumentText);
|
|
991
1493
|
if (chunks.length === 0) {
|
|
992
|
-
|
|
1494
|
+
logger4.warn(`No chunks generated from text for ${documentId}. No fragments to save.`);
|
|
993
1495
|
return 0;
|
|
994
1496
|
}
|
|
995
|
-
|
|
1497
|
+
const docName = documentTitle || documentId.substring(0, 8);
|
|
1498
|
+
logger4.info(`[Document Processor] "${docName}": Split into ${chunks.length} chunks`);
|
|
996
1499
|
const providerLimits = await getProviderRateLimits();
|
|
997
1500
|
const CONCURRENCY_LIMIT = Math.min(30, providerLimits.maxConcurrentRequests || 30);
|
|
998
|
-
const rateLimiter = createRateLimiter(
|
|
1501
|
+
const rateLimiter = createRateLimiter(
|
|
1502
|
+
providerLimits.requestsPerMinute || 60,
|
|
1503
|
+
providerLimits.tokensPerMinute
|
|
1504
|
+
);
|
|
1505
|
+
logger4.debug(
|
|
1506
|
+
`[Document Processor] Rate limits: ${providerLimits.requestsPerMinute} RPM, ${providerLimits.tokensPerMinute} TPM (${providerLimits.provider}, concurrency: ${CONCURRENCY_LIMIT})`
|
|
1507
|
+
);
|
|
999
1508
|
const { savedCount, failedCount } = await processAndSaveFragments({
|
|
1000
1509
|
runtime,
|
|
1001
1510
|
documentId,
|
|
@@ -1007,14 +1516,27 @@ async function processFragmentsSynchronously({
|
|
|
1007
1516
|
entityId: entityId || agentId,
|
|
1008
1517
|
worldId: worldId || agentId,
|
|
1009
1518
|
concurrencyLimit: CONCURRENCY_LIMIT,
|
|
1010
|
-
rateLimiter
|
|
1519
|
+
rateLimiter,
|
|
1520
|
+
documentTitle
|
|
1011
1521
|
});
|
|
1522
|
+
const successRate = (savedCount / chunks.length * 100).toFixed(1);
|
|
1012
1523
|
if (failedCount > 0) {
|
|
1013
|
-
|
|
1014
|
-
`
|
|
1524
|
+
logger4.warn(
|
|
1525
|
+
`[Document Processor] "${docName}": ${failedCount}/${chunks.length} chunks failed processing`
|
|
1015
1526
|
);
|
|
1016
1527
|
}
|
|
1017
|
-
|
|
1528
|
+
logger4.info(
|
|
1529
|
+
`[Document Processor] "${docName}" complete: ${savedCount}/${chunks.length} fragments saved (${successRate}% success)`
|
|
1530
|
+
);
|
|
1531
|
+
logKnowledgeGenerationSummary({
|
|
1532
|
+
documentId,
|
|
1533
|
+
totalChunks: chunks.length,
|
|
1534
|
+
savedCount,
|
|
1535
|
+
failedCount,
|
|
1536
|
+
successRate: parseFloat(successRate),
|
|
1537
|
+
ctxEnabled: getCtxKnowledgeEnabled(runtime),
|
|
1538
|
+
providerLimits
|
|
1539
|
+
});
|
|
1018
1540
|
return savedCount;
|
|
1019
1541
|
}
|
|
1020
1542
|
async function extractTextFromDocument(fileBuffer, contentType, originalFilename) {
|
|
@@ -1023,15 +1545,15 @@ async function extractTextFromDocument(fileBuffer, contentType, originalFilename
|
|
|
1023
1545
|
}
|
|
1024
1546
|
try {
|
|
1025
1547
|
if (contentType === "application/pdf") {
|
|
1026
|
-
|
|
1548
|
+
logger4.debug(`Extracting text from PDF: ${originalFilename}`);
|
|
1027
1549
|
return await convertPdfToTextFromBuffer(fileBuffer, originalFilename);
|
|
1028
1550
|
} else {
|
|
1029
|
-
|
|
1551
|
+
logger4.debug(`Extracting text from non-PDF: ${originalFilename} (Type: ${contentType})`);
|
|
1030
1552
|
if (contentType.includes("text/") || contentType.includes("application/json") || contentType.includes("application/xml")) {
|
|
1031
1553
|
try {
|
|
1032
1554
|
return fileBuffer.toString("utf8");
|
|
1033
1555
|
} catch (textError) {
|
|
1034
|
-
|
|
1556
|
+
logger4.warn(
|
|
1035
1557
|
`Failed to decode ${originalFilename} as UTF-8, falling back to binary extraction`
|
|
1036
1558
|
);
|
|
1037
1559
|
}
|
|
@@ -1039,7 +1561,7 @@ async function extractTextFromDocument(fileBuffer, contentType, originalFilename
|
|
|
1039
1561
|
return await extractTextFromFileBuffer(fileBuffer, contentType, originalFilename);
|
|
1040
1562
|
}
|
|
1041
1563
|
} catch (error) {
|
|
1042
|
-
|
|
1564
|
+
logger4.error(`Error extracting text from ${originalFilename}: ${error.message}`);
|
|
1043
1565
|
throw new Error(`Failed to extract text from ${originalFilename}: ${error.message}`);
|
|
1044
1566
|
}
|
|
1045
1567
|
}
|
|
@@ -1084,7 +1606,7 @@ async function splitDocumentIntoChunks(documentText) {
|
|
|
1084
1606
|
const tokenChunkOverlap = DEFAULT_CHUNK_OVERLAP_TOKENS;
|
|
1085
1607
|
const targetCharChunkSize = Math.round(tokenChunkSize * DEFAULT_CHARS_PER_TOKEN);
|
|
1086
1608
|
const targetCharChunkOverlap = Math.round(tokenChunkOverlap * DEFAULT_CHARS_PER_TOKEN);
|
|
1087
|
-
|
|
1609
|
+
logger4.debug(
|
|
1088
1610
|
`Using core splitChunks with settings: tokenChunkSize=${tokenChunkSize}, tokenChunkOverlap=${tokenChunkOverlap}, charChunkSize=${targetCharChunkSize}, charChunkOverlap=${targetCharChunkOverlap}`
|
|
1089
1611
|
);
|
|
1090
1612
|
return await splitChunks(documentText, tokenChunkSize, tokenChunkOverlap);
|
|
@@ -1100,7 +1622,8 @@ async function processAndSaveFragments({
|
|
|
1100
1622
|
entityId,
|
|
1101
1623
|
worldId,
|
|
1102
1624
|
concurrencyLimit,
|
|
1103
|
-
rateLimiter
|
|
1625
|
+
rateLimiter,
|
|
1626
|
+
documentTitle
|
|
1104
1627
|
}) {
|
|
1105
1628
|
let savedCount = 0;
|
|
1106
1629
|
let failedCount = 0;
|
|
@@ -1108,15 +1631,16 @@ async function processAndSaveFragments({
|
|
|
1108
1631
|
for (let i = 0; i < chunks.length; i += concurrencyLimit) {
|
|
1109
1632
|
const batchChunks = chunks.slice(i, i + concurrencyLimit);
|
|
1110
1633
|
const batchOriginalIndices = Array.from({ length: batchChunks.length }, (_, k) => i + k);
|
|
1111
|
-
|
|
1112
|
-
`
|
|
1634
|
+
logger4.debug(
|
|
1635
|
+
`[Document Processor] Batch ${Math.floor(i / concurrencyLimit) + 1}/${Math.ceil(chunks.length / concurrencyLimit)}: processing ${batchChunks.length} chunks (${batchOriginalIndices[0]}-${batchOriginalIndices[batchOriginalIndices.length - 1]})`
|
|
1113
1636
|
);
|
|
1114
1637
|
const contextualizedChunks = await getContextualizedChunks(
|
|
1115
1638
|
runtime,
|
|
1116
1639
|
fullDocumentText,
|
|
1117
1640
|
batchChunks,
|
|
1118
1641
|
contentType,
|
|
1119
|
-
batchOriginalIndices
|
|
1642
|
+
batchOriginalIndices,
|
|
1643
|
+
documentTitle
|
|
1120
1644
|
);
|
|
1121
1645
|
const embeddingResults = await generateEmbeddingsForChunks(
|
|
1122
1646
|
runtime,
|
|
@@ -1128,13 +1652,13 @@ async function processAndSaveFragments({
|
|
|
1128
1652
|
if (!result.success) {
|
|
1129
1653
|
failedCount++;
|
|
1130
1654
|
failedChunks.push(originalChunkIndex);
|
|
1131
|
-
|
|
1655
|
+
logger4.warn(`Failed to process chunk ${originalChunkIndex} for document ${documentId}`);
|
|
1132
1656
|
continue;
|
|
1133
1657
|
}
|
|
1134
1658
|
const contextualizedChunkText = result.text;
|
|
1135
1659
|
const embedding = result.embedding;
|
|
1136
1660
|
if (!embedding || embedding.length === 0) {
|
|
1137
|
-
|
|
1661
|
+
logger4.warn(
|
|
1138
1662
|
`Zero vector detected for chunk ${originalChunkIndex} (document ${documentId}). Embedding: ${JSON.stringify(result.embedding)}`
|
|
1139
1663
|
);
|
|
1140
1664
|
failedCount++;
|
|
@@ -1159,12 +1683,15 @@ async function processAndSaveFragments({
|
|
|
1159
1683
|
}
|
|
1160
1684
|
};
|
|
1161
1685
|
await runtime.createMemory(fragmentMemory, "knowledge");
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1686
|
+
if (originalChunkIndex === chunks.length - 1) {
|
|
1687
|
+
const docName = documentTitle || documentId.substring(0, 8);
|
|
1688
|
+
logger4.info(
|
|
1689
|
+
`[Document Processor] "${docName}": All ${chunks.length} chunks processed successfully`
|
|
1690
|
+
);
|
|
1691
|
+
}
|
|
1165
1692
|
savedCount++;
|
|
1166
1693
|
} catch (saveError) {
|
|
1167
|
-
|
|
1694
|
+
logger4.error(
|
|
1168
1695
|
`Error saving chunk ${originalChunkIndex} to database: ${saveError.message}`,
|
|
1169
1696
|
saveError.stack
|
|
1170
1697
|
);
|
|
@@ -1173,7 +1700,7 @@ async function processAndSaveFragments({
|
|
|
1173
1700
|
}
|
|
1174
1701
|
}
|
|
1175
1702
|
if (i + concurrencyLimit < chunks.length) {
|
|
1176
|
-
await new Promise((
|
|
1703
|
+
await new Promise((resolve2) => setTimeout(resolve2, 500));
|
|
1177
1704
|
}
|
|
1178
1705
|
}
|
|
1179
1706
|
return { savedCount, failedCount, failedChunks };
|
|
@@ -1199,7 +1726,8 @@ async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLi
|
|
|
1199
1726
|
text: contextualizedChunk.contextualizedText
|
|
1200
1727
|
};
|
|
1201
1728
|
}
|
|
1202
|
-
|
|
1729
|
+
const embeddingTokens = estimateTokens(contextualizedChunk.contextualizedText);
|
|
1730
|
+
await rateLimiter(embeddingTokens);
|
|
1203
1731
|
try {
|
|
1204
1732
|
const generateEmbeddingOperation = async () => {
|
|
1205
1733
|
return await generateEmbeddingWithValidation(
|
|
@@ -1226,7 +1754,7 @@ async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLi
|
|
|
1226
1754
|
text: contextualizedChunk.contextualizedText
|
|
1227
1755
|
};
|
|
1228
1756
|
} catch (error) {
|
|
1229
|
-
|
|
1757
|
+
logger4.error(
|
|
1230
1758
|
`Error generating embedding for chunk ${contextualizedChunk.index}: ${error.message}`
|
|
1231
1759
|
);
|
|
1232
1760
|
return {
|
|
@@ -1239,37 +1767,50 @@ async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLi
|
|
|
1239
1767
|
})
|
|
1240
1768
|
);
|
|
1241
1769
|
}
|
|
1242
|
-
async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices) {
|
|
1243
|
-
|
|
1244
|
-
|
|
1770
|
+
async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle) {
|
|
1771
|
+
const ctxEnabled = getCtxKnowledgeEnabled(runtime);
|
|
1772
|
+
if (batchOriginalIndices[0] === 0) {
|
|
1773
|
+
const docName = documentTitle || "Document";
|
|
1774
|
+
const provider = runtime?.getSetting("TEXT_PROVIDER") || process.env.TEXT_PROVIDER;
|
|
1775
|
+
const model = runtime?.getSetting("TEXT_MODEL") || process.env.TEXT_MODEL;
|
|
1776
|
+
logger4.info(
|
|
1777
|
+
`[Document Processor] "${docName}": CTX enrichment ${ctxEnabled ? "ENABLED" : "DISABLED"}${ctxEnabled ? ` (${provider}/${model})` : ""}`
|
|
1778
|
+
);
|
|
1779
|
+
}
|
|
1780
|
+
if (ctxEnabled && fullDocumentText) {
|
|
1245
1781
|
return await generateContextsInBatch(
|
|
1246
1782
|
runtime,
|
|
1247
1783
|
fullDocumentText,
|
|
1248
1784
|
chunks,
|
|
1249
1785
|
contentType,
|
|
1250
|
-
batchOriginalIndices
|
|
1786
|
+
batchOriginalIndices,
|
|
1787
|
+
documentTitle
|
|
1788
|
+
);
|
|
1789
|
+
} else if (!ctxEnabled && batchOriginalIndices[0] === 0) {
|
|
1790
|
+
logger4.debug(
|
|
1791
|
+
`[Document Processor] To enable CTX: Set CTX_KNOWLEDGE_ENABLED=true and configure TEXT_PROVIDER/TEXT_MODEL`
|
|
1251
1792
|
);
|
|
1252
|
-
} else {
|
|
1253
|
-
return chunks.map((chunkText, idx) => ({
|
|
1254
|
-
contextualizedText: chunkText,
|
|
1255
|
-
index: batchOriginalIndices[idx],
|
|
1256
|
-
success: true
|
|
1257
|
-
}));
|
|
1258
1793
|
}
|
|
1794
|
+
return chunks.map((chunkText, idx) => ({
|
|
1795
|
+
contextualizedText: chunkText,
|
|
1796
|
+
index: batchOriginalIndices[idx],
|
|
1797
|
+
success: true
|
|
1798
|
+
}));
|
|
1259
1799
|
}
|
|
1260
|
-
async function generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchIndices) {
|
|
1261
|
-
console.log("####### generateContextsInBatch FULLL DOCUMENT", fullDocumentText);
|
|
1262
|
-
console.log("####### generateContextsInBatch CHUNKS", chunks);
|
|
1800
|
+
async function generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchIndices, documentTitle) {
|
|
1263
1801
|
if (!chunks || chunks.length === 0) {
|
|
1264
1802
|
return [];
|
|
1265
1803
|
}
|
|
1266
1804
|
const providerLimits = await getProviderRateLimits();
|
|
1267
|
-
const rateLimiter = createRateLimiter(
|
|
1268
|
-
|
|
1805
|
+
const rateLimiter = createRateLimiter(
|
|
1806
|
+
providerLimits.requestsPerMinute || 60,
|
|
1807
|
+
providerLimits.tokensPerMinute
|
|
1808
|
+
);
|
|
1809
|
+
const config = validateModelConfig(runtime);
|
|
1269
1810
|
const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
|
|
1270
1811
|
const isUsingCacheCapableModel = isUsingOpenRouter && (config.TEXT_MODEL?.toLowerCase().includes("claude") || config.TEXT_MODEL?.toLowerCase().includes("gemini"));
|
|
1271
|
-
|
|
1272
|
-
`
|
|
1812
|
+
logger4.debug(
|
|
1813
|
+
`[Document Processor] Contextualizing ${chunks.length} chunks with ${config.TEXT_PROVIDER}/${config.TEXT_MODEL} (cache: ${isUsingCacheCapableModel})`
|
|
1273
1814
|
);
|
|
1274
1815
|
const promptConfigs = prepareContextPrompts(
|
|
1275
1816
|
chunks,
|
|
@@ -1287,19 +1828,20 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1287
1828
|
index: item.originalIndex
|
|
1288
1829
|
};
|
|
1289
1830
|
}
|
|
1290
|
-
|
|
1831
|
+
const llmTokens = estimateTokens(item.chunkText + (item.prompt || ""));
|
|
1832
|
+
await rateLimiter(llmTokens);
|
|
1291
1833
|
try {
|
|
1292
1834
|
let llmResponse;
|
|
1293
1835
|
const generateTextOperation = async () => {
|
|
1294
1836
|
if (useCustomLLM) {
|
|
1295
1837
|
if (item.usesCaching) {
|
|
1296
|
-
return await generateText(item.promptText, item.systemPrompt, {
|
|
1838
|
+
return await generateText(runtime, item.promptText, item.systemPrompt, {
|
|
1297
1839
|
cacheDocument: item.fullDocumentTextForContext,
|
|
1298
1840
|
cacheOptions: { type: "ephemeral" },
|
|
1299
1841
|
autoCacheContextualRetrieval: true
|
|
1300
1842
|
});
|
|
1301
1843
|
} else {
|
|
1302
|
-
return await generateText(item.prompt);
|
|
1844
|
+
return await generateText(runtime, item.prompt);
|
|
1303
1845
|
}
|
|
1304
1846
|
} else {
|
|
1305
1847
|
if (item.usesCaching) {
|
|
@@ -1320,16 +1862,19 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1320
1862
|
);
|
|
1321
1863
|
const generatedContext = typeof llmResponse === "string" ? llmResponse : llmResponse.text;
|
|
1322
1864
|
const contextualizedText = getChunkWithContext(item.chunkText, generatedContext);
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1865
|
+
if ((item.originalIndex + 1) % Math.max(1, Math.floor(chunks.length / 3)) === 0 || item.originalIndex === chunks.length - 1) {
|
|
1866
|
+
const docName = documentTitle || "Document";
|
|
1867
|
+
logger4.debug(
|
|
1868
|
+
`[Document Processor] "${docName}": Context added for ${item.originalIndex + 1}/${chunks.length} chunks`
|
|
1869
|
+
);
|
|
1870
|
+
}
|
|
1326
1871
|
return {
|
|
1327
1872
|
contextualizedText,
|
|
1328
1873
|
success: true,
|
|
1329
1874
|
index: item.originalIndex
|
|
1330
1875
|
};
|
|
1331
1876
|
} catch (error) {
|
|
1332
|
-
|
|
1877
|
+
logger4.error(
|
|
1333
1878
|
`Error generating context for chunk ${item.originalIndex}: ${error.message}`,
|
|
1334
1879
|
error.stack
|
|
1335
1880
|
);
|
|
@@ -1350,7 +1895,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
|
|
|
1350
1895
|
if (isUsingCacheCapableModel) {
|
|
1351
1896
|
const cachingPromptInfo = contentType ? getCachingPromptForMimeType(contentType, chunkText) : getCachingContextualizationPrompt(chunkText);
|
|
1352
1897
|
if (cachingPromptInfo.prompt.startsWith("Error:")) {
|
|
1353
|
-
|
|
1898
|
+
logger4.warn(
|
|
1354
1899
|
`Skipping contextualization for chunk ${originalIndex} due to: ${cachingPromptInfo.prompt}`
|
|
1355
1900
|
);
|
|
1356
1901
|
return {
|
|
@@ -1372,7 +1917,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
|
|
|
1372
1917
|
} else {
|
|
1373
1918
|
const prompt = contentType ? getPromptForMimeType(contentType, fullDocumentText, chunkText) : getContextualizationPrompt(fullDocumentText, chunkText);
|
|
1374
1919
|
if (prompt.startsWith("Error:")) {
|
|
1375
|
-
|
|
1920
|
+
logger4.warn(`Skipping contextualization for chunk ${originalIndex} due to: ${prompt}`);
|
|
1376
1921
|
return {
|
|
1377
1922
|
prompt: null,
|
|
1378
1923
|
originalIndex,
|
|
@@ -1390,7 +1935,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
|
|
|
1390
1935
|
};
|
|
1391
1936
|
}
|
|
1392
1937
|
} catch (error) {
|
|
1393
|
-
|
|
1938
|
+
logger4.error(
|
|
1394
1939
|
`Error preparing prompt for chunk ${originalIndex}: ${error.message}`,
|
|
1395
1940
|
error.stack
|
|
1396
1941
|
);
|
|
@@ -1411,7 +1956,7 @@ async function generateEmbeddingWithValidation(runtime, text) {
|
|
|
1411
1956
|
});
|
|
1412
1957
|
const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
|
|
1413
1958
|
if (!embedding || embedding.length === 0) {
|
|
1414
|
-
|
|
1959
|
+
logger4.warn(`Zero vector detected. Embedding result: ${JSON.stringify(embedding)}`);
|
|
1415
1960
|
return {
|
|
1416
1961
|
embedding: null,
|
|
1417
1962
|
success: false,
|
|
@@ -1429,43 +1974,298 @@ async function withRateLimitRetry(operation, errorContext, retryDelay) {
|
|
|
1429
1974
|
} catch (error) {
|
|
1430
1975
|
if (error.status === 429) {
|
|
1431
1976
|
const delay = retryDelay || error.headers?.["retry-after"] || 5;
|
|
1432
|
-
|
|
1433
|
-
await new Promise((
|
|
1977
|
+
logger4.warn(`Rate limit hit for ${errorContext}. Retrying after ${delay}s`);
|
|
1978
|
+
await new Promise((resolve2) => setTimeout(resolve2, delay * 1e3));
|
|
1434
1979
|
try {
|
|
1435
1980
|
return await operation();
|
|
1436
1981
|
} catch (retryError) {
|
|
1437
|
-
|
|
1982
|
+
logger4.error(`Failed after retry for ${errorContext}: ${retryError.message}`);
|
|
1438
1983
|
throw retryError;
|
|
1439
1984
|
}
|
|
1440
1985
|
}
|
|
1441
1986
|
throw error;
|
|
1442
1987
|
}
|
|
1443
1988
|
}
|
|
1444
|
-
function createRateLimiter(requestsPerMinute) {
|
|
1989
|
+
function createRateLimiter(requestsPerMinute, tokensPerMinute) {
|
|
1445
1990
|
const requestTimes = [];
|
|
1991
|
+
const tokenUsage = [];
|
|
1446
1992
|
const intervalMs = 60 * 1e3;
|
|
1447
|
-
return async function rateLimiter() {
|
|
1993
|
+
return async function rateLimiter(estimatedTokens = 1e3) {
|
|
1448
1994
|
const now = Date.now();
|
|
1449
1995
|
while (requestTimes.length > 0 && now - requestTimes[0] > intervalMs) {
|
|
1450
1996
|
requestTimes.shift();
|
|
1451
1997
|
}
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1998
|
+
while (tokenUsage.length > 0 && now - tokenUsage[0].timestamp > intervalMs) {
|
|
1999
|
+
tokenUsage.shift();
|
|
2000
|
+
}
|
|
2001
|
+
const currentTokens = tokenUsage.reduce((sum, usage) => sum + usage.tokens, 0);
|
|
2002
|
+
const requestLimitExceeded = requestTimes.length >= requestsPerMinute;
|
|
2003
|
+
const tokenLimitExceeded = tokensPerMinute && currentTokens + estimatedTokens > tokensPerMinute;
|
|
2004
|
+
if (requestLimitExceeded || tokenLimitExceeded) {
|
|
2005
|
+
let timeToWait = 0;
|
|
2006
|
+
if (requestLimitExceeded) {
|
|
2007
|
+
const oldestRequest = requestTimes[0];
|
|
2008
|
+
timeToWait = Math.max(timeToWait, oldestRequest + intervalMs - now);
|
|
2009
|
+
}
|
|
2010
|
+
if (tokenLimitExceeded && tokenUsage.length > 0) {
|
|
2011
|
+
const oldestTokenUsage = tokenUsage[0];
|
|
2012
|
+
timeToWait = Math.max(timeToWait, oldestTokenUsage.timestamp + intervalMs - now);
|
|
2013
|
+
}
|
|
1455
2014
|
if (timeToWait > 0) {
|
|
1456
|
-
|
|
1457
|
-
|
|
2015
|
+
const reason = requestLimitExceeded ? "request" : "token";
|
|
2016
|
+
if (timeToWait > 5e3) {
|
|
2017
|
+
logger4.info(
|
|
2018
|
+
`[Document Processor] Rate limiting: waiting ${Math.round(timeToWait / 1e3)}s due to ${reason} limit`
|
|
2019
|
+
);
|
|
2020
|
+
} else {
|
|
2021
|
+
logger4.debug(
|
|
2022
|
+
`[Document Processor] Rate limiting: ${timeToWait}ms wait (${reason} limit)`
|
|
2023
|
+
);
|
|
2024
|
+
}
|
|
2025
|
+
await new Promise((resolve2) => setTimeout(resolve2, timeToWait));
|
|
2026
|
+
}
|
|
2027
|
+
}
|
|
2028
|
+
requestTimes.push(now);
|
|
2029
|
+
if (tokensPerMinute) {
|
|
2030
|
+
tokenUsage.push({ timestamp: now, tokens: estimatedTokens });
|
|
2031
|
+
}
|
|
2032
|
+
};
|
|
2033
|
+
}
|
|
2034
|
+
function logKnowledgeGenerationSummary({
|
|
2035
|
+
totalChunks,
|
|
2036
|
+
savedCount,
|
|
2037
|
+
failedCount,
|
|
2038
|
+
ctxEnabled,
|
|
2039
|
+
providerLimits
|
|
2040
|
+
}) {
|
|
2041
|
+
if (failedCount > 0 || process.env.NODE_ENV === "development") {
|
|
2042
|
+
const status = failedCount > 0 ? "PARTIAL" : "SUCCESS";
|
|
2043
|
+
logger4.info(
|
|
2044
|
+
`[Document Processor] ${status}: ${savedCount}/${totalChunks} chunks, CTX: ${ctxEnabled ? "ON" : "OFF"}, Provider: ${providerLimits.provider}`
|
|
2045
|
+
);
|
|
2046
|
+
}
|
|
2047
|
+
if (failedCount > 0) {
|
|
2048
|
+
logger4.warn(`[Document Processor] ${failedCount} chunks failed processing`);
|
|
2049
|
+
}
|
|
2050
|
+
}
|
|
2051
|
+
|
|
2052
|
+
// src/docs-loader.ts
|
|
2053
|
+
import { logger as logger5 } from "@elizaos/core";
|
|
2054
|
+
import * as fs from "fs";
|
|
2055
|
+
import * as path from "path";
|
|
2056
|
+
function getKnowledgePath() {
|
|
2057
|
+
const envPath = process.env.KNOWLEDGE_PATH;
|
|
2058
|
+
if (envPath) {
|
|
2059
|
+
const resolvedPath = path.resolve(envPath);
|
|
2060
|
+
if (!fs.existsSync(resolvedPath)) {
|
|
2061
|
+
logger5.warn(`Knowledge path from environment variable does not exist: ${resolvedPath}`);
|
|
2062
|
+
logger5.warn("Please create the directory or update KNOWLEDGE_PATH environment variable");
|
|
2063
|
+
}
|
|
2064
|
+
return resolvedPath;
|
|
2065
|
+
}
|
|
2066
|
+
const defaultPath = path.join(process.cwd(), "docs");
|
|
2067
|
+
if (!fs.existsSync(defaultPath)) {
|
|
2068
|
+
logger5.info(`Default docs folder does not exist at: ${defaultPath}`);
|
|
2069
|
+
logger5.info("To use the knowledge plugin, either:");
|
|
2070
|
+
logger5.info('1. Create a "docs" folder in your project root');
|
|
2071
|
+
logger5.info("2. Set KNOWLEDGE_PATH environment variable to your documents folder");
|
|
2072
|
+
}
|
|
2073
|
+
return defaultPath;
|
|
2074
|
+
}
|
|
2075
|
+
async function loadDocsFromPath(service, agentId, worldId) {
|
|
2076
|
+
const docsPath = getKnowledgePath();
|
|
2077
|
+
if (!fs.existsSync(docsPath)) {
|
|
2078
|
+
logger5.warn(`Knowledge path does not exist: ${docsPath}`);
|
|
2079
|
+
return { total: 0, successful: 0, failed: 0 };
|
|
2080
|
+
}
|
|
2081
|
+
logger5.info(`Loading documents from: ${docsPath}`);
|
|
2082
|
+
const files = getAllFiles(docsPath);
|
|
2083
|
+
if (files.length === 0) {
|
|
2084
|
+
logger5.info("No files found in knowledge path");
|
|
2085
|
+
return { total: 0, successful: 0, failed: 0 };
|
|
2086
|
+
}
|
|
2087
|
+
logger5.info(`Found ${files.length} files to process`);
|
|
2088
|
+
let successful = 0;
|
|
2089
|
+
let failed = 0;
|
|
2090
|
+
for (const filePath of files) {
|
|
2091
|
+
try {
|
|
2092
|
+
const fileName = path.basename(filePath);
|
|
2093
|
+
const fileExt = path.extname(filePath).toLowerCase();
|
|
2094
|
+
if (fileName.startsWith(".")) {
|
|
2095
|
+
continue;
|
|
2096
|
+
}
|
|
2097
|
+
const contentType = getContentType(fileExt);
|
|
2098
|
+
if (!contentType) {
|
|
2099
|
+
logger5.debug(`Skipping unsupported file type: ${filePath}`);
|
|
2100
|
+
continue;
|
|
2101
|
+
}
|
|
2102
|
+
const fileBuffer = fs.readFileSync(filePath);
|
|
2103
|
+
const isBinary = isBinaryContentType(contentType, fileName);
|
|
2104
|
+
const content = isBinary ? fileBuffer.toString("base64") : fileBuffer.toString("utf-8");
|
|
2105
|
+
const knowledgeOptions = {
|
|
2106
|
+
clientDocumentId: "",
|
|
2107
|
+
// Will be generated by the service based on content
|
|
2108
|
+
contentType,
|
|
2109
|
+
originalFilename: fileName,
|
|
2110
|
+
worldId: worldId || agentId,
|
|
2111
|
+
content,
|
|
2112
|
+
roomId: agentId,
|
|
2113
|
+
entityId: agentId
|
|
2114
|
+
};
|
|
2115
|
+
logger5.debug(`Processing document: ${fileName}`);
|
|
2116
|
+
const result = await service.addKnowledge(knowledgeOptions);
|
|
2117
|
+
logger5.info(`\u2705 "${fileName}": ${result.fragmentCount} fragments created`);
|
|
2118
|
+
successful++;
|
|
2119
|
+
} catch (error) {
|
|
2120
|
+
logger5.error(`Failed to process file ${filePath}:`, error);
|
|
2121
|
+
failed++;
|
|
2122
|
+
}
|
|
2123
|
+
}
|
|
2124
|
+
logger5.info(
|
|
2125
|
+
`Document loading complete: ${successful} successful, ${failed} failed out of ${files.length} total`
|
|
2126
|
+
);
|
|
2127
|
+
return {
|
|
2128
|
+
total: files.length,
|
|
2129
|
+
successful,
|
|
2130
|
+
failed
|
|
2131
|
+
};
|
|
2132
|
+
}
|
|
2133
|
+
function getAllFiles(dirPath, files = []) {
|
|
2134
|
+
try {
|
|
2135
|
+
const entries = fs.readdirSync(dirPath, { withFileTypes: true });
|
|
2136
|
+
for (const entry of entries) {
|
|
2137
|
+
const fullPath = path.join(dirPath, entry.name);
|
|
2138
|
+
if (entry.isDirectory()) {
|
|
2139
|
+
if (!["node_modules", ".git", ".vscode", "dist", "build"].includes(entry.name)) {
|
|
2140
|
+
getAllFiles(fullPath, files);
|
|
2141
|
+
}
|
|
2142
|
+
} else if (entry.isFile()) {
|
|
2143
|
+
files.push(fullPath);
|
|
1458
2144
|
}
|
|
1459
2145
|
}
|
|
1460
|
-
|
|
2146
|
+
} catch (error) {
|
|
2147
|
+
logger5.error(`Error reading directory ${dirPath}:`, error);
|
|
2148
|
+
}
|
|
2149
|
+
return files;
|
|
2150
|
+
}
|
|
2151
|
+
function getContentType(extension) {
|
|
2152
|
+
const contentTypes = {
|
|
2153
|
+
// Text documents
|
|
2154
|
+
".txt": "text/plain",
|
|
2155
|
+
".md": "text/markdown",
|
|
2156
|
+
".markdown": "text/markdown",
|
|
2157
|
+
".tson": "text/plain",
|
|
2158
|
+
".xml": "application/xml",
|
|
2159
|
+
".csv": "text/csv",
|
|
2160
|
+
".tsv": "text/tab-separated-values",
|
|
2161
|
+
".log": "text/plain",
|
|
2162
|
+
// Web files
|
|
2163
|
+
".html": "text/html",
|
|
2164
|
+
".htm": "text/html",
|
|
2165
|
+
".css": "text/css",
|
|
2166
|
+
".scss": "text/x-scss",
|
|
2167
|
+
".sass": "text/x-sass",
|
|
2168
|
+
".less": "text/x-less",
|
|
2169
|
+
// JavaScript/TypeScript
|
|
2170
|
+
".js": "text/javascript",
|
|
2171
|
+
".jsx": "text/javascript",
|
|
2172
|
+
".ts": "text/typescript",
|
|
2173
|
+
".tsx": "text/typescript",
|
|
2174
|
+
".mjs": "text/javascript",
|
|
2175
|
+
".cjs": "text/javascript",
|
|
2176
|
+
".vue": "text/x-vue",
|
|
2177
|
+
".svelte": "text/x-svelte",
|
|
2178
|
+
".astro": "text/x-astro",
|
|
2179
|
+
// Python
|
|
2180
|
+
".py": "text/x-python",
|
|
2181
|
+
".pyw": "text/x-python",
|
|
2182
|
+
".pyi": "text/x-python",
|
|
2183
|
+
// Java/Kotlin/Scala
|
|
2184
|
+
".java": "text/x-java",
|
|
2185
|
+
".kt": "text/x-kotlin",
|
|
2186
|
+
".kts": "text/x-kotlin",
|
|
2187
|
+
".scala": "text/x-scala",
|
|
2188
|
+
// C/C++/C#
|
|
2189
|
+
".c": "text/x-c",
|
|
2190
|
+
".cpp": "text/x-c++",
|
|
2191
|
+
".cc": "text/x-c++",
|
|
2192
|
+
".cxx": "text/x-c++",
|
|
2193
|
+
".h": "text/x-c",
|
|
2194
|
+
".hpp": "text/x-c++",
|
|
2195
|
+
".cs": "text/x-csharp",
|
|
2196
|
+
// Other languages
|
|
2197
|
+
".php": "text/x-php",
|
|
2198
|
+
".rb": "text/x-ruby",
|
|
2199
|
+
".go": "text/x-go",
|
|
2200
|
+
".rs": "text/x-rust",
|
|
2201
|
+
".swift": "text/x-swift",
|
|
2202
|
+
".r": "text/x-r",
|
|
2203
|
+
".R": "text/x-r",
|
|
2204
|
+
".m": "text/x-objectivec",
|
|
2205
|
+
".mm": "text/x-objectivec",
|
|
2206
|
+
".clj": "text/x-clojure",
|
|
2207
|
+
".cljs": "text/x-clojure",
|
|
2208
|
+
".ex": "text/x-elixir",
|
|
2209
|
+
".exs": "text/x-elixir",
|
|
2210
|
+
".lua": "text/x-lua",
|
|
2211
|
+
".pl": "text/x-perl",
|
|
2212
|
+
".pm": "text/x-perl",
|
|
2213
|
+
".dart": "text/x-dart",
|
|
2214
|
+
".hs": "text/x-haskell",
|
|
2215
|
+
".elm": "text/x-elm",
|
|
2216
|
+
".ml": "text/x-ocaml",
|
|
2217
|
+
".fs": "text/x-fsharp",
|
|
2218
|
+
".fsx": "text/x-fsharp",
|
|
2219
|
+
".vb": "text/x-vb",
|
|
2220
|
+
".pas": "text/x-pascal",
|
|
2221
|
+
".d": "text/x-d",
|
|
2222
|
+
".nim": "text/x-nim",
|
|
2223
|
+
".zig": "text/x-zig",
|
|
2224
|
+
".jl": "text/x-julia",
|
|
2225
|
+
".tcl": "text/x-tcl",
|
|
2226
|
+
".awk": "text/x-awk",
|
|
2227
|
+
".sed": "text/x-sed",
|
|
2228
|
+
// Shell scripts
|
|
2229
|
+
".sh": "text/x-sh",
|
|
2230
|
+
".bash": "text/x-sh",
|
|
2231
|
+
".zsh": "text/x-sh",
|
|
2232
|
+
".fish": "text/x-fish",
|
|
2233
|
+
".ps1": "text/x-powershell",
|
|
2234
|
+
".bat": "text/x-batch",
|
|
2235
|
+
".cmd": "text/x-batch",
|
|
2236
|
+
// Config files
|
|
2237
|
+
".json": "application/json",
|
|
2238
|
+
".yaml": "text/x-yaml",
|
|
2239
|
+
".yml": "text/x-yaml",
|
|
2240
|
+
".toml": "text/x-toml",
|
|
2241
|
+
".ini": "text/x-ini",
|
|
2242
|
+
".cfg": "text/x-ini",
|
|
2243
|
+
".conf": "text/x-ini",
|
|
2244
|
+
".env": "text/plain",
|
|
2245
|
+
".gitignore": "text/plain",
|
|
2246
|
+
".dockerignore": "text/plain",
|
|
2247
|
+
".editorconfig": "text/plain",
|
|
2248
|
+
".properties": "text/x-properties",
|
|
2249
|
+
// Database
|
|
2250
|
+
".sql": "text/x-sql",
|
|
2251
|
+
// Binary documents
|
|
2252
|
+
".pdf": "application/pdf",
|
|
2253
|
+
".doc": "application/msword",
|
|
2254
|
+
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
1461
2255
|
};
|
|
2256
|
+
return contentTypes[extension] || null;
|
|
1462
2257
|
}
|
|
1463
2258
|
|
|
1464
2259
|
// src/service.ts
|
|
2260
|
+
var parseBooleanEnv2 = (value) => {
|
|
2261
|
+
if (typeof value === "boolean") return value;
|
|
2262
|
+
if (typeof value === "string") return value.toLowerCase() === "true";
|
|
2263
|
+
return false;
|
|
2264
|
+
};
|
|
1465
2265
|
var KnowledgeService = class _KnowledgeService extends Service {
|
|
1466
2266
|
static serviceType = "knowledge";
|
|
1467
|
-
config;
|
|
1468
|
-
knowledgeConfig;
|
|
2267
|
+
config = {};
|
|
2268
|
+
knowledgeConfig = {};
|
|
1469
2269
|
capabilityDescription = "Provides Retrieval Augmented Generation capabilities, including knowledge upload and querying.";
|
|
1470
2270
|
knowledgeProcessingSemaphore;
|
|
1471
2271
|
/**
|
|
@@ -1475,53 +2275,25 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1475
2275
|
constructor(runtime, config) {
|
|
1476
2276
|
super(runtime);
|
|
1477
2277
|
this.knowledgeProcessingSemaphore = new Semaphore(10);
|
|
1478
|
-
const parseBooleanEnv = (value) => {
|
|
1479
|
-
if (typeof value === "boolean") return value;
|
|
1480
|
-
if (typeof value === "string") return value.toLowerCase() === "true";
|
|
1481
|
-
return false;
|
|
1482
|
-
};
|
|
1483
|
-
const loadDocsOnStartup = parseBooleanEnv(config?.LOAD_DOCS_ON_STARTUP) || process.env.LOAD_DOCS_ON_STARTUP === "true";
|
|
1484
|
-
this.knowledgeConfig = {
|
|
1485
|
-
CTX_KNOWLEDGE_ENABLED: parseBooleanEnv(config?.CTX_KNOWLEDGE_ENABLED),
|
|
1486
|
-
LOAD_DOCS_ON_STARTUP: loadDocsOnStartup,
|
|
1487
|
-
MAX_INPUT_TOKENS: config?.MAX_INPUT_TOKENS,
|
|
1488
|
-
MAX_OUTPUT_TOKENS: config?.MAX_OUTPUT_TOKENS,
|
|
1489
|
-
EMBEDDING_PROVIDER: config?.EMBEDDING_PROVIDER,
|
|
1490
|
-
TEXT_PROVIDER: config?.TEXT_PROVIDER,
|
|
1491
|
-
TEXT_EMBEDDING_MODEL: config?.TEXT_EMBEDDING_MODEL
|
|
1492
|
-
};
|
|
1493
|
-
this.config = { ...this.knowledgeConfig };
|
|
1494
|
-
logger4.info(
|
|
1495
|
-
`KnowledgeService initialized for agent ${this.runtime.agentId} with config:`,
|
|
1496
|
-
this.knowledgeConfig
|
|
1497
|
-
);
|
|
1498
|
-
if (this.knowledgeConfig.LOAD_DOCS_ON_STARTUP) {
|
|
1499
|
-
logger4.info("LOAD_DOCS_ON_STARTUP is enabled. Loading documents from docs folder...");
|
|
1500
|
-
this.loadInitialDocuments().catch((error) => {
|
|
1501
|
-
logger4.error("Error during initial document loading in KnowledgeService:", error);
|
|
1502
|
-
});
|
|
1503
|
-
} else {
|
|
1504
|
-
logger4.info("LOAD_DOCS_ON_STARTUP is disabled. Skipping automatic document loading.");
|
|
1505
|
-
}
|
|
1506
2278
|
}
|
|
1507
2279
|
async loadInitialDocuments() {
|
|
1508
|
-
|
|
2280
|
+
logger6.info(
|
|
1509
2281
|
`KnowledgeService: Checking for documents to load on startup for agent ${this.runtime.agentId}`
|
|
1510
2282
|
);
|
|
1511
2283
|
try {
|
|
1512
|
-
await new Promise((
|
|
2284
|
+
await new Promise((resolve2) => setTimeout(resolve2, 1e3));
|
|
1513
2285
|
const result = await loadDocsFromPath(this, this.runtime.agentId);
|
|
1514
2286
|
if (result.successful > 0) {
|
|
1515
|
-
|
|
2287
|
+
logger6.info(
|
|
1516
2288
|
`KnowledgeService: Loaded ${result.successful} documents from docs folder on startup for agent ${this.runtime.agentId}`
|
|
1517
2289
|
);
|
|
1518
2290
|
} else {
|
|
1519
|
-
|
|
2291
|
+
logger6.info(
|
|
1520
2292
|
`KnowledgeService: No new documents found to load on startup for agent ${this.runtime.agentId}`
|
|
1521
2293
|
);
|
|
1522
2294
|
}
|
|
1523
2295
|
} catch (error) {
|
|
1524
|
-
|
|
2296
|
+
logger6.error(
|
|
1525
2297
|
`KnowledgeService: Error loading documents on startup for agent ${this.runtime.agentId}:`,
|
|
1526
2298
|
error
|
|
1527
2299
|
);
|
|
@@ -1533,23 +2305,90 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1533
2305
|
* @returns Initialized Knowledge service
|
|
1534
2306
|
*/
|
|
1535
2307
|
static async start(runtime) {
|
|
1536
|
-
|
|
2308
|
+
logger6.info(`Starting Knowledge service for agent: ${runtime.agentId}`);
|
|
2309
|
+
logger6.info("Initializing Knowledge Plugin...");
|
|
2310
|
+
let validatedConfig = {};
|
|
2311
|
+
try {
|
|
2312
|
+
logger6.info("Validating model configuration for Knowledge plugin...");
|
|
2313
|
+
logger6.debug(`[Knowledge Plugin] INIT DEBUG:`);
|
|
2314
|
+
logger6.debug(
|
|
2315
|
+
`[Knowledge Plugin] - process.env.CTX_KNOWLEDGE_ENABLED: '${process.env.CTX_KNOWLEDGE_ENABLED}'`
|
|
2316
|
+
);
|
|
2317
|
+
const config = {
|
|
2318
|
+
CTX_KNOWLEDGE_ENABLED: parseBooleanEnv2(runtime.getSetting("CTX_KNOWLEDGE_ENABLED"))
|
|
2319
|
+
};
|
|
2320
|
+
logger6.debug(
|
|
2321
|
+
`[Knowledge Plugin] - config.CTX_KNOWLEDGE_ENABLED: '${config.CTX_KNOWLEDGE_ENABLED}'`
|
|
2322
|
+
);
|
|
2323
|
+
logger6.debug(
|
|
2324
|
+
`[Knowledge Plugin] - runtime.getSetting('CTX_KNOWLEDGE_ENABLED'): '${runtime.getSetting("CTX_KNOWLEDGE_ENABLED")}'`
|
|
2325
|
+
);
|
|
2326
|
+
validatedConfig = validateModelConfig(runtime);
|
|
2327
|
+
const ctxEnabledFromEnv = parseBooleanEnv2(process.env.CTX_KNOWLEDGE_ENABLED);
|
|
2328
|
+
const ctxEnabledFromRuntime = parseBooleanEnv2(runtime.getSetting("CTX_KNOWLEDGE_ENABLED"));
|
|
2329
|
+
const ctxEnabledFromValidated = validatedConfig.CTX_KNOWLEDGE_ENABLED;
|
|
2330
|
+
const finalCtxEnabled = ctxEnabledFromValidated;
|
|
2331
|
+
logger6.debug(`[Knowledge Plugin] CTX_KNOWLEDGE_ENABLED sources:`);
|
|
2332
|
+
logger6.debug(`[Knowledge Plugin] - From env: ${ctxEnabledFromEnv}`);
|
|
2333
|
+
logger6.debug(`[Knowledge Plugin] - From runtime: ${ctxEnabledFromRuntime}`);
|
|
2334
|
+
logger6.debug(`[Knowledge Plugin] - FINAL RESULT: ${finalCtxEnabled}`);
|
|
2335
|
+
if (finalCtxEnabled) {
|
|
2336
|
+
logger6.info("Running in Contextual Knowledge mode with text generation capabilities.");
|
|
2337
|
+
logger6.info(
|
|
2338
|
+
`Using ${validatedConfig.EMBEDDING_PROVIDER || "auto-detected"} for embeddings and ${validatedConfig.TEXT_PROVIDER} for text generation.`
|
|
2339
|
+
);
|
|
2340
|
+
logger6.info(`Text model: ${validatedConfig.TEXT_MODEL}`);
|
|
2341
|
+
} else {
|
|
2342
|
+
const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
|
|
2343
|
+
logger6.warn(
|
|
2344
|
+
"Running in Basic Embedding mode - documents will NOT be enriched with context!"
|
|
2345
|
+
);
|
|
2346
|
+
logger6.info("To enable contextual enrichment:");
|
|
2347
|
+
logger6.info(" - Set CTX_KNOWLEDGE_ENABLED=true");
|
|
2348
|
+
logger6.info(" - Configure TEXT_PROVIDER (anthropic/openai/openrouter/google)");
|
|
2349
|
+
logger6.info(" - Configure TEXT_MODEL and API key");
|
|
2350
|
+
if (usingPluginOpenAI) {
|
|
2351
|
+
logger6.info("Using auto-detected configuration from plugin-openai for embeddings.");
|
|
2352
|
+
} else {
|
|
2353
|
+
logger6.info(
|
|
2354
|
+
`Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}.`
|
|
2355
|
+
);
|
|
2356
|
+
}
|
|
2357
|
+
}
|
|
2358
|
+
logger6.success("Model configuration validated successfully.");
|
|
2359
|
+
logger6.success(`Knowledge Plugin initialized for agent: ${runtime.character.name}`);
|
|
2360
|
+
logger6.info(
|
|
2361
|
+
"Knowledge Plugin initialized. Frontend panel should be discoverable via its public route."
|
|
2362
|
+
);
|
|
2363
|
+
} catch (error) {
|
|
2364
|
+
logger6.error("Failed to initialize Knowledge plugin:", error);
|
|
2365
|
+
throw error;
|
|
2366
|
+
}
|
|
1537
2367
|
const service = new _KnowledgeService(runtime);
|
|
2368
|
+
service.config = validatedConfig;
|
|
2369
|
+
if (service.config.LOAD_DOCS_ON_STARTUP) {
|
|
2370
|
+
logger6.info("LOAD_DOCS_ON_STARTUP is enabled. Loading documents from docs folder...");
|
|
2371
|
+
service.loadInitialDocuments().catch((error) => {
|
|
2372
|
+
logger6.error("Error during initial document loading in KnowledgeService:", error);
|
|
2373
|
+
});
|
|
2374
|
+
} else {
|
|
2375
|
+
logger6.info("LOAD_DOCS_ON_STARTUP is disabled. Skipping automatic document loading.");
|
|
2376
|
+
}
|
|
1538
2377
|
if (service.runtime.character?.knowledge && service.runtime.character.knowledge.length > 0) {
|
|
1539
|
-
|
|
2378
|
+
logger6.info(
|
|
1540
2379
|
`KnowledgeService: Processing ${service.runtime.character.knowledge.length} character knowledge items.`
|
|
1541
2380
|
);
|
|
1542
2381
|
const stringKnowledge = service.runtime.character.knowledge.filter(
|
|
1543
2382
|
(item) => typeof item === "string"
|
|
1544
2383
|
);
|
|
1545
2384
|
await service.processCharacterKnowledge(stringKnowledge).catch((err) => {
|
|
1546
|
-
|
|
2385
|
+
logger6.error(
|
|
1547
2386
|
`KnowledgeService: Error processing character knowledge during startup: ${err.message}`,
|
|
1548
2387
|
err
|
|
1549
2388
|
);
|
|
1550
2389
|
});
|
|
1551
2390
|
} else {
|
|
1552
|
-
|
|
2391
|
+
logger6.info(
|
|
1553
2392
|
`KnowledgeService: No character knowledge to process for agent ${runtime.agentId}.`
|
|
1554
2393
|
);
|
|
1555
2394
|
}
|
|
@@ -1560,10 +2399,10 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1560
2399
|
* @param runtime Agent runtime
|
|
1561
2400
|
*/
|
|
1562
2401
|
static async stop(runtime) {
|
|
1563
|
-
|
|
2402
|
+
logger6.info(`Stopping Knowledge service for agent: ${runtime.agentId}`);
|
|
1564
2403
|
const service = runtime.getService(_KnowledgeService.serviceType);
|
|
1565
2404
|
if (!service) {
|
|
1566
|
-
|
|
2405
|
+
logger6.warn(`KnowledgeService not found for agent ${runtime.agentId} during stop.`);
|
|
1567
2406
|
}
|
|
1568
2407
|
if (service instanceof _KnowledgeService) {
|
|
1569
2408
|
await service.stop();
|
|
@@ -1573,7 +2412,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1573
2412
|
* Stop the service
|
|
1574
2413
|
*/
|
|
1575
2414
|
async stop() {
|
|
1576
|
-
|
|
2415
|
+
logger6.info(`Knowledge service stopping for agent: ${this.runtime.character?.name}`);
|
|
1577
2416
|
}
|
|
1578
2417
|
/**
|
|
1579
2418
|
* Add knowledge to the system
|
|
@@ -1588,15 +2427,11 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1588
2427
|
maxChars: 2e3
|
|
1589
2428
|
// Use first 2KB of content for ID generation
|
|
1590
2429
|
});
|
|
1591
|
-
|
|
1592
|
-
`KnowledgeService processing document for agent: ${agentId}, file: ${options.originalFilename}, type: ${options.contentType}, generated ID: ${contentBasedId}`
|
|
1593
|
-
);
|
|
2430
|
+
logger6.info(`Processing "${options.originalFilename}" (${options.contentType})`);
|
|
1594
2431
|
try {
|
|
1595
2432
|
const existingDocument = await this.runtime.getMemoryById(contentBasedId);
|
|
1596
2433
|
if (existingDocument && existingDocument.metadata?.type === MemoryType2.DOCUMENT) {
|
|
1597
|
-
|
|
1598
|
-
`Document ${options.originalFilename} with ID ${contentBasedId} already exists. Skipping processing.`
|
|
1599
|
-
);
|
|
2434
|
+
logger6.info(`"${options.originalFilename}" already exists - skipping`);
|
|
1600
2435
|
const fragments = await this.runtime.getMemories({
|
|
1601
2436
|
tableName: "knowledge"
|
|
1602
2437
|
});
|
|
@@ -1610,7 +2445,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1610
2445
|
};
|
|
1611
2446
|
}
|
|
1612
2447
|
} catch (error) {
|
|
1613
|
-
|
|
2448
|
+
logger6.debug(
|
|
1614
2449
|
`Document ${contentBasedId} not found or error checking existence, proceeding with processing: ${error instanceof Error ? error.message : String(error)}`
|
|
1615
2450
|
);
|
|
1616
2451
|
}
|
|
@@ -1637,7 +2472,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1637
2472
|
}) {
|
|
1638
2473
|
const agentId = passedAgentId || this.runtime.agentId;
|
|
1639
2474
|
try {
|
|
1640
|
-
|
|
2475
|
+
logger6.debug(
|
|
1641
2476
|
`KnowledgeService: Processing document ${originalFilename} (type: ${contentType}) via processDocument for agent: ${agentId}`
|
|
1642
2477
|
);
|
|
1643
2478
|
let fileBuffer = null;
|
|
@@ -1648,7 +2483,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1648
2483
|
try {
|
|
1649
2484
|
fileBuffer = Buffer.from(content, "base64");
|
|
1650
2485
|
} catch (e) {
|
|
1651
|
-
|
|
2486
|
+
logger6.error(
|
|
1652
2487
|
`KnowledgeService: Failed to convert base64 to buffer for ${originalFilename}: ${e.message}`
|
|
1653
2488
|
);
|
|
1654
2489
|
throw new Error(`Invalid base64 content for PDF file ${originalFilename}`);
|
|
@@ -1659,7 +2494,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1659
2494
|
try {
|
|
1660
2495
|
fileBuffer = Buffer.from(content, "base64");
|
|
1661
2496
|
} catch (e) {
|
|
1662
|
-
|
|
2497
|
+
logger6.error(
|
|
1663
2498
|
`KnowledgeService: Failed to convert base64 to buffer for ${originalFilename}: ${e.message}`
|
|
1664
2499
|
);
|
|
1665
2500
|
throw new Error(`Invalid base64 content for binary file ${originalFilename}`);
|
|
@@ -1676,11 +2511,11 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1676
2511
|
if (invalidCharCount > 0 && invalidCharCount / textLength > 0.1) {
|
|
1677
2512
|
throw new Error("Decoded content contains too many invalid characters");
|
|
1678
2513
|
}
|
|
1679
|
-
|
|
2514
|
+
logger6.debug(`Successfully decoded base64 content for text file: ${originalFilename}`);
|
|
1680
2515
|
extractedText = decodedText;
|
|
1681
2516
|
documentContentToStore = decodedText;
|
|
1682
2517
|
} catch (e) {
|
|
1683
|
-
|
|
2518
|
+
logger6.error(
|
|
1684
2519
|
`Failed to decode base64 for ${originalFilename}: ${e instanceof Error ? e.message : String(e)}`
|
|
1685
2520
|
);
|
|
1686
2521
|
throw new Error(
|
|
@@ -1688,7 +2523,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1688
2523
|
);
|
|
1689
2524
|
}
|
|
1690
2525
|
} else {
|
|
1691
|
-
|
|
2526
|
+
logger6.debug(`Treating content as plain text for file: ${originalFilename}`);
|
|
1692
2527
|
extractedText = content;
|
|
1693
2528
|
documentContentToStore = content;
|
|
1694
2529
|
}
|
|
@@ -1697,7 +2532,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1697
2532
|
const noTextError = new Error(
|
|
1698
2533
|
`KnowledgeService: No text content extracted from ${originalFilename} (type: ${contentType}).`
|
|
1699
2534
|
);
|
|
1700
|
-
|
|
2535
|
+
logger6.warn(noTextError.message);
|
|
1701
2536
|
throw noTextError;
|
|
1702
2537
|
}
|
|
1703
2538
|
const documentMemory = createDocumentMemory({
|
|
@@ -1723,14 +2558,14 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1723
2558
|
roomId: roomId || agentId,
|
|
1724
2559
|
entityId: entityId || agentId
|
|
1725
2560
|
};
|
|
1726
|
-
|
|
2561
|
+
logger6.debug(
|
|
1727
2562
|
`KnowledgeService: Creating memory with agentId=${agentId}, entityId=${entityId}, roomId=${roomId}, this.runtime.agentId=${this.runtime.agentId}`
|
|
1728
2563
|
);
|
|
1729
|
-
|
|
2564
|
+
logger6.debug(
|
|
1730
2565
|
`KnowledgeService: memoryWithScope agentId=${memoryWithScope.agentId}, entityId=${memoryWithScope.entityId}`
|
|
1731
2566
|
);
|
|
1732
2567
|
await this.runtime.createMemory(memoryWithScope, "documents");
|
|
1733
|
-
|
|
2568
|
+
logger6.debug(
|
|
1734
2569
|
`KnowledgeService: Stored document ${originalFilename} (Memory ID: ${memoryWithScope.id})`
|
|
1735
2570
|
);
|
|
1736
2571
|
const fragmentCount = await processFragmentsSynchronously({
|
|
@@ -1742,18 +2577,17 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1742
2577
|
contentType,
|
|
1743
2578
|
roomId: roomId || agentId,
|
|
1744
2579
|
entityId: entityId || agentId,
|
|
1745
|
-
worldId: worldId || agentId
|
|
2580
|
+
worldId: worldId || agentId,
|
|
2581
|
+
documentTitle: originalFilename
|
|
1746
2582
|
});
|
|
1747
|
-
|
|
1748
|
-
`KnowledgeService: Document ${originalFilename} processed with ${fragmentCount} fragments for agent ${agentId}`
|
|
1749
|
-
);
|
|
2583
|
+
logger6.debug(`"${originalFilename}" stored with ${fragmentCount} fragments`);
|
|
1750
2584
|
return {
|
|
1751
2585
|
clientDocumentId,
|
|
1752
2586
|
storedDocumentMemoryId: memoryWithScope.id,
|
|
1753
2587
|
fragmentCount
|
|
1754
2588
|
};
|
|
1755
2589
|
} catch (error) {
|
|
1756
|
-
|
|
2590
|
+
logger6.error(
|
|
1757
2591
|
`KnowledgeService: Error processing document ${originalFilename}: ${error.message}`,
|
|
1758
2592
|
error.stack
|
|
1759
2593
|
);
|
|
@@ -1762,7 +2596,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1762
2596
|
}
|
|
1763
2597
|
// --- Knowledge methods moved from AgentRuntime ---
|
|
1764
2598
|
async handleProcessingError(error, context) {
|
|
1765
|
-
|
|
2599
|
+
logger6.error(`KnowledgeService: Error ${context}:`, error?.message || error || "Unknown error");
|
|
1766
2600
|
throw error;
|
|
1767
2601
|
}
|
|
1768
2602
|
async checkExistingKnowledge(knowledgeId) {
|
|
@@ -1770,9 +2604,9 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1770
2604
|
return !!existingDocument;
|
|
1771
2605
|
}
|
|
1772
2606
|
async getKnowledge(message, scope) {
|
|
1773
|
-
|
|
2607
|
+
logger6.debug("KnowledgeService: getKnowledge called for message id: " + message.id);
|
|
1774
2608
|
if (!message?.content?.text || message?.content?.text.trim().length === 0) {
|
|
1775
|
-
|
|
2609
|
+
logger6.warn("KnowledgeService: Invalid or empty message content for knowledge query.");
|
|
1776
2610
|
return [];
|
|
1777
2611
|
}
|
|
1778
2612
|
const embedding = await this.runtime.useModel(ModelType2.TEXT_EMBEDDING, {
|
|
@@ -1801,9 +2635,104 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1801
2635
|
worldId: fragment.worldId
|
|
1802
2636
|
}));
|
|
1803
2637
|
}
|
|
2638
|
+
/**
|
|
2639
|
+
* Enrich a conversation memory with RAG metadata
|
|
2640
|
+
* This can be called after response generation to add RAG tracking data
|
|
2641
|
+
* @param memoryId The ID of the conversation memory to enrich
|
|
2642
|
+
* @param ragMetadata The RAG metadata to add
|
|
2643
|
+
*/
|
|
2644
|
+
async enrichConversationMemoryWithRAG(memoryId, ragMetadata) {
|
|
2645
|
+
try {
|
|
2646
|
+
const existingMemory = await this.runtime.getMemoryById(memoryId);
|
|
2647
|
+
if (!existingMemory) {
|
|
2648
|
+
logger6.warn(`Cannot enrich memory ${memoryId} - memory not found`);
|
|
2649
|
+
return;
|
|
2650
|
+
}
|
|
2651
|
+
const updatedMetadata = {
|
|
2652
|
+
...existingMemory.metadata,
|
|
2653
|
+
knowledgeUsed: true,
|
|
2654
|
+
// Simple flag for UI to detect RAG usage
|
|
2655
|
+
ragUsage: {
|
|
2656
|
+
retrievedFragments: ragMetadata.retrievedFragments,
|
|
2657
|
+
queryText: ragMetadata.queryText,
|
|
2658
|
+
totalFragments: ragMetadata.totalFragments,
|
|
2659
|
+
retrievalTimestamp: ragMetadata.retrievalTimestamp,
|
|
2660
|
+
usedInResponse: true
|
|
2661
|
+
},
|
|
2662
|
+
timestamp: existingMemory.metadata?.timestamp || Date.now(),
|
|
2663
|
+
type: existingMemory.metadata?.type || "message"
|
|
2664
|
+
};
|
|
2665
|
+
await this.runtime.updateMemory({
|
|
2666
|
+
id: memoryId,
|
|
2667
|
+
metadata: updatedMetadata
|
|
2668
|
+
});
|
|
2669
|
+
logger6.debug(
|
|
2670
|
+
`Enriched conversation memory ${memoryId} with RAG data: ${ragMetadata.totalFragments} fragments`
|
|
2671
|
+
);
|
|
2672
|
+
} catch (error) {
|
|
2673
|
+
logger6.warn(
|
|
2674
|
+
`Failed to enrich conversation memory ${memoryId} with RAG data: ${error.message}`
|
|
2675
|
+
);
|
|
2676
|
+
}
|
|
2677
|
+
}
|
|
2678
|
+
/**
|
|
2679
|
+
* Set the current response memory ID for RAG tracking
|
|
2680
|
+
* This is called by the knowledge provider to track which response memory to enrich
|
|
2681
|
+
*/
|
|
2682
|
+
pendingRAGEnrichment = [];
|
|
2683
|
+
/**
|
|
2684
|
+
* Store RAG metadata for the next conversation memory that gets created
|
|
2685
|
+
* @param ragMetadata The RAG metadata to associate with the next memory
|
|
2686
|
+
*/
|
|
2687
|
+
setPendingRAGMetadata(ragMetadata) {
|
|
2688
|
+
const now = Date.now();
|
|
2689
|
+
this.pendingRAGEnrichment = this.pendingRAGEnrichment.filter(
|
|
2690
|
+
(entry) => now - entry.timestamp < 3e4
|
|
2691
|
+
);
|
|
2692
|
+
this.pendingRAGEnrichment.push({
|
|
2693
|
+
ragMetadata,
|
|
2694
|
+
timestamp: now
|
|
2695
|
+
});
|
|
2696
|
+
logger6.debug(`Stored pending RAG metadata for next conversation memory`);
|
|
2697
|
+
}
|
|
2698
|
+
/**
|
|
2699
|
+
* Try to enrich recent conversation memories with pending RAG metadata
|
|
2700
|
+
* This is called periodically to catch memories that were created after RAG retrieval
|
|
2701
|
+
*/
|
|
2702
|
+
async enrichRecentMemoriesWithPendingRAG() {
|
|
2703
|
+
if (this.pendingRAGEnrichment.length === 0) {
|
|
2704
|
+
return;
|
|
2705
|
+
}
|
|
2706
|
+
try {
|
|
2707
|
+
const recentMemories = await this.runtime.getMemories({
|
|
2708
|
+
tableName: "messages",
|
|
2709
|
+
count: 10
|
|
2710
|
+
});
|
|
2711
|
+
const now = Date.now();
|
|
2712
|
+
const recentConversationMemories = recentMemories.filter(
|
|
2713
|
+
(memory) => memory.metadata?.type === "message" && now - (memory.createdAt || 0) < 1e4 && // Created in last 10 seconds
|
|
2714
|
+
!memory.metadata?.ragUsage
|
|
2715
|
+
// Doesn't already have RAG data
|
|
2716
|
+
).sort((a, b) => (b.createdAt || 0) - (a.createdAt || 0));
|
|
2717
|
+
for (const pendingEntry of this.pendingRAGEnrichment) {
|
|
2718
|
+
const matchingMemory = recentConversationMemories.find(
|
|
2719
|
+
(memory) => (memory.createdAt || 0) > pendingEntry.timestamp
|
|
2720
|
+
);
|
|
2721
|
+
if (matchingMemory && matchingMemory.id) {
|
|
2722
|
+
await this.enrichConversationMemoryWithRAG(matchingMemory.id, pendingEntry.ragMetadata);
|
|
2723
|
+
const index = this.pendingRAGEnrichment.indexOf(pendingEntry);
|
|
2724
|
+
if (index > -1) {
|
|
2725
|
+
this.pendingRAGEnrichment.splice(index, 1);
|
|
2726
|
+
}
|
|
2727
|
+
}
|
|
2728
|
+
}
|
|
2729
|
+
} catch (error) {
|
|
2730
|
+
logger6.warn(`Error enriching recent memories with RAG data: ${error.message}`);
|
|
2731
|
+
}
|
|
2732
|
+
}
|
|
1804
2733
|
async processCharacterKnowledge(items) {
|
|
1805
|
-
await new Promise((
|
|
1806
|
-
|
|
2734
|
+
await new Promise((resolve2) => setTimeout(resolve2, 1e3));
|
|
2735
|
+
logger6.info(
|
|
1807
2736
|
`KnowledgeService: Processing ${items.length} character knowledge items for agent ${this.runtime.agentId}`
|
|
1808
2737
|
);
|
|
1809
2738
|
const processingPromises = items.map(async (item) => {
|
|
@@ -1816,12 +2745,12 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1816
2745
|
// A constant identifier for character knowledge
|
|
1817
2746
|
});
|
|
1818
2747
|
if (await this.checkExistingKnowledge(knowledgeId)) {
|
|
1819
|
-
|
|
2748
|
+
logger6.debug(
|
|
1820
2749
|
`KnowledgeService: Character knowledge item with ID ${knowledgeId} already exists. Skipping.`
|
|
1821
2750
|
);
|
|
1822
2751
|
return;
|
|
1823
2752
|
}
|
|
1824
|
-
|
|
2753
|
+
logger6.debug(
|
|
1825
2754
|
`KnowledgeService: Processing character knowledge for ${this.runtime.character?.name} - ${item.slice(0, 100)}`
|
|
1826
2755
|
);
|
|
1827
2756
|
let metadata = {
|
|
@@ -1872,7 +2801,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1872
2801
|
}
|
|
1873
2802
|
});
|
|
1874
2803
|
await Promise.all(processingPromises);
|
|
1875
|
-
|
|
2804
|
+
logger6.info(
|
|
1876
2805
|
`KnowledgeService: Finished processing character knowledge for agent ${this.runtime.agentId}.`
|
|
1877
2806
|
);
|
|
1878
2807
|
}
|
|
@@ -1892,7 +2821,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1892
2821
|
worldId: scope?.worldId ?? this.runtime.agentId,
|
|
1893
2822
|
entityId: scope?.entityId ?? this.runtime.agentId
|
|
1894
2823
|
};
|
|
1895
|
-
|
|
2824
|
+
logger6.debug(`KnowledgeService: _internalAddKnowledge called for item ID ${item.id}`);
|
|
1896
2825
|
const documentMemory = {
|
|
1897
2826
|
id: item.id,
|
|
1898
2827
|
// This ID should be the unique ID for the document being added.
|
|
@@ -1914,7 +2843,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1914
2843
|
};
|
|
1915
2844
|
const existingDocument = await this.runtime.getMemoryById(item.id);
|
|
1916
2845
|
if (existingDocument) {
|
|
1917
|
-
|
|
2846
|
+
logger6.debug(
|
|
1918
2847
|
`KnowledgeService: Document ${item.id} already exists in _internalAddKnowledge, updating...`
|
|
1919
2848
|
);
|
|
1920
2849
|
await this.runtime.updateMemory({
|
|
@@ -1938,13 +2867,13 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1938
2867
|
await this.processDocumentFragment(fragment);
|
|
1939
2868
|
fragmentsProcessed++;
|
|
1940
2869
|
} catch (error) {
|
|
1941
|
-
|
|
2870
|
+
logger6.error(
|
|
1942
2871
|
`KnowledgeService: Error processing fragment ${fragment.id} for document ${item.id}:`,
|
|
1943
2872
|
error
|
|
1944
2873
|
);
|
|
1945
2874
|
}
|
|
1946
2875
|
}
|
|
1947
|
-
|
|
2876
|
+
logger6.debug(
|
|
1948
2877
|
`KnowledgeService: Processed ${fragmentsProcessed}/${fragments.length} fragments for document ${item.id}.`
|
|
1949
2878
|
);
|
|
1950
2879
|
}
|
|
@@ -1953,7 +2882,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1953
2882
|
await this.runtime.addEmbeddingToMemory(fragment);
|
|
1954
2883
|
await this.runtime.createMemory(fragment, "knowledge");
|
|
1955
2884
|
} catch (error) {
|
|
1956
|
-
|
|
2885
|
+
logger6.error(
|
|
1957
2886
|
`KnowledgeService: Error processing fragment ${fragment.id}:`,
|
|
1958
2887
|
error instanceof Error ? error.message : String(error)
|
|
1959
2888
|
);
|
|
@@ -2018,7 +2947,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2018
2947
|
*/
|
|
2019
2948
|
async deleteMemory(memoryId) {
|
|
2020
2949
|
await this.runtime.deleteMemory(memoryId);
|
|
2021
|
-
|
|
2950
|
+
logger6.info(
|
|
2022
2951
|
`KnowledgeService: Deleted memory ${memoryId} for agent ${this.runtime.agentId}. Assumed it was a document or related fragment.`
|
|
2023
2952
|
);
|
|
2024
2953
|
}
|
|
@@ -2026,13 +2955,14 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2026
2955
|
};
|
|
2027
2956
|
|
|
2028
2957
|
// src/provider.ts
|
|
2029
|
-
import { addHeader } from "@elizaos/core";
|
|
2958
|
+
import { addHeader, logger as logger7 } from "@elizaos/core";
|
|
2030
2959
|
var knowledgeProvider = {
|
|
2031
2960
|
name: "KNOWLEDGE",
|
|
2032
2961
|
description: "Knowledge from the knowledge base that the agent knows, retrieved whenever the agent needs to answer a question about their expertise.",
|
|
2033
2962
|
dynamic: true,
|
|
2034
2963
|
get: async (runtime, message) => {
|
|
2035
|
-
const
|
|
2964
|
+
const knowledgeService = runtime.getService("knowledge");
|
|
2965
|
+
const knowledgeData = await knowledgeService?.getKnowledge(message);
|
|
2036
2966
|
const firstFiveKnowledgeItems = knowledgeData?.slice(0, 5);
|
|
2037
2967
|
let knowledge = (firstFiveKnowledgeItems && firstFiveKnowledgeItems.length > 0 ? addHeader(
|
|
2038
2968
|
"# Knowledge",
|
|
@@ -2042,23 +2972,61 @@ var knowledgeProvider = {
|
|
|
2042
2972
|
if (knowledge.length > 4e3 * tokenLength) {
|
|
2043
2973
|
knowledge = knowledge.slice(0, 4e3 * tokenLength);
|
|
2044
2974
|
}
|
|
2975
|
+
let ragMetadata = null;
|
|
2976
|
+
if (knowledgeData && knowledgeData.length > 0) {
|
|
2977
|
+
ragMetadata = {
|
|
2978
|
+
retrievedFragments: knowledgeData.map((fragment) => ({
|
|
2979
|
+
fragmentId: fragment.id,
|
|
2980
|
+
documentTitle: fragment.metadata?.filename || fragment.metadata?.title || "Unknown Document",
|
|
2981
|
+
similarityScore: fragment.similarity,
|
|
2982
|
+
contentPreview: (fragment.content?.text || "No content").substring(0, 100) + "..."
|
|
2983
|
+
})),
|
|
2984
|
+
queryText: message.content?.text || "Unknown query",
|
|
2985
|
+
totalFragments: knowledgeData.length,
|
|
2986
|
+
retrievalTimestamp: Date.now()
|
|
2987
|
+
};
|
|
2988
|
+
}
|
|
2989
|
+
if (knowledgeData && knowledgeData.length > 0 && knowledgeService && ragMetadata) {
|
|
2990
|
+
try {
|
|
2991
|
+
knowledgeService.setPendingRAGMetadata(ragMetadata);
|
|
2992
|
+
setTimeout(async () => {
|
|
2993
|
+
try {
|
|
2994
|
+
await knowledgeService.enrichRecentMemoriesWithPendingRAG();
|
|
2995
|
+
} catch (error) {
|
|
2996
|
+
logger7.warn("RAG memory enrichment failed:", error.message);
|
|
2997
|
+
}
|
|
2998
|
+
}, 2e3);
|
|
2999
|
+
} catch (error) {
|
|
3000
|
+
logger7.warn("RAG memory enrichment failed:", error.message);
|
|
3001
|
+
}
|
|
3002
|
+
}
|
|
2045
3003
|
return {
|
|
2046
3004
|
data: {
|
|
2047
|
-
knowledge
|
|
3005
|
+
knowledge,
|
|
3006
|
+
ragMetadata,
|
|
3007
|
+
// 🎯 Include RAG metadata for memory tracking
|
|
3008
|
+
knowledgeUsed: knowledgeData && knowledgeData.length > 0
|
|
3009
|
+
// Simple flag for easy detection
|
|
2048
3010
|
},
|
|
2049
3011
|
values: {
|
|
2050
|
-
knowledge
|
|
3012
|
+
knowledge,
|
|
3013
|
+
knowledgeUsed: knowledgeData && knowledgeData.length > 0
|
|
3014
|
+
// Simple flag for easy detection
|
|
2051
3015
|
},
|
|
2052
|
-
text: knowledge
|
|
3016
|
+
text: knowledge,
|
|
3017
|
+
ragMetadata,
|
|
3018
|
+
// 🎯 Also include at top level for easy access
|
|
3019
|
+
knowledgeUsed: knowledgeData && knowledgeData.length > 0
|
|
3020
|
+
// 🎯 Simple flag at top level too
|
|
2053
3021
|
};
|
|
2054
3022
|
}
|
|
2055
3023
|
};
|
|
2056
3024
|
|
|
2057
3025
|
// src/tests.ts
|
|
2058
3026
|
import { MemoryType as MemoryType3, ModelType as ModelType3 } from "@elizaos/core";
|
|
2059
|
-
import { Buffer as
|
|
2060
|
-
import * as
|
|
2061
|
-
import * as
|
|
3027
|
+
import { Buffer as Buffer3 } from "buffer";
|
|
3028
|
+
import * as fs2 from "fs";
|
|
3029
|
+
import * as path2 from "path";
|
|
2062
3030
|
var mockLogger = {
|
|
2063
3031
|
info: (() => {
|
|
2064
3032
|
const fn = (...args) => {
|
|
@@ -2192,9 +3160,7 @@ function createMockRuntime(overrides) {
|
|
|
2192
3160
|
return ids.map((id) => memories.get(id)).filter(Boolean);
|
|
2193
3161
|
},
|
|
2194
3162
|
async getMemoriesByRoomIds(params) {
|
|
2195
|
-
return Array.from(memories.values()).filter(
|
|
2196
|
-
(m) => params.roomIds.includes(m.roomId)
|
|
2197
|
-
);
|
|
3163
|
+
return Array.from(memories.values()).filter((m) => params.roomIds.includes(m.roomId));
|
|
2198
3164
|
},
|
|
2199
3165
|
async searchMemories(params) {
|
|
2200
3166
|
const fragments = Array.from(memories.values()).filter(
|
|
@@ -2457,9 +3423,9 @@ trailer
|
|
|
2457
3423
|
startxref
|
|
2458
3424
|
${465 + content.length}
|
|
2459
3425
|
%%EOF`;
|
|
2460
|
-
return
|
|
3426
|
+
return Buffer3.from(pdfContent);
|
|
2461
3427
|
}
|
|
2462
|
-
return
|
|
3428
|
+
return Buffer3.from(content, "utf-8");
|
|
2463
3429
|
}
|
|
2464
3430
|
var KnowledgeTestSuite = class {
|
|
2465
3431
|
name = "knowledge";
|
|
@@ -2472,10 +3438,10 @@ var KnowledgeTestSuite = class {
|
|
|
2472
3438
|
const originalEnv = { ...process.env };
|
|
2473
3439
|
delete process.env.KNOWLEDGE_PATH;
|
|
2474
3440
|
try {
|
|
2475
|
-
const docsPath =
|
|
2476
|
-
const docsExists =
|
|
3441
|
+
const docsPath = path2.join(process.cwd(), "docs");
|
|
3442
|
+
const docsExists = fs2.existsSync(docsPath);
|
|
2477
3443
|
if (!docsExists) {
|
|
2478
|
-
|
|
3444
|
+
fs2.mkdirSync(docsPath, { recursive: true });
|
|
2479
3445
|
}
|
|
2480
3446
|
await index_default.init({}, runtime);
|
|
2481
3447
|
const errorCalls = mockLogger.error.calls;
|
|
@@ -2483,7 +3449,7 @@ var KnowledgeTestSuite = class {
|
|
|
2483
3449
|
throw new Error(`Unexpected error during init: ${errorCalls[0]}`);
|
|
2484
3450
|
}
|
|
2485
3451
|
if (!docsExists) {
|
|
2486
|
-
|
|
3452
|
+
fs2.rmSync(docsPath, { recursive: true, force: true });
|
|
2487
3453
|
}
|
|
2488
3454
|
} finally {
|
|
2489
3455
|
process.env = originalEnv;
|
|
@@ -2496,13 +3462,13 @@ var KnowledgeTestSuite = class {
|
|
|
2496
3462
|
const originalEnv = { ...process.env };
|
|
2497
3463
|
delete process.env.KNOWLEDGE_PATH;
|
|
2498
3464
|
try {
|
|
2499
|
-
const docsPath =
|
|
2500
|
-
if (
|
|
2501
|
-
|
|
3465
|
+
const docsPath = path2.join(process.cwd(), "docs");
|
|
3466
|
+
if (fs2.existsSync(docsPath)) {
|
|
3467
|
+
fs2.renameSync(docsPath, docsPath + ".backup");
|
|
2502
3468
|
}
|
|
2503
3469
|
await index_default.init({}, runtime);
|
|
2504
|
-
if (
|
|
2505
|
-
|
|
3470
|
+
if (fs2.existsSync(docsPath + ".backup")) {
|
|
3471
|
+
fs2.renameSync(docsPath + ".backup", docsPath);
|
|
2506
3472
|
}
|
|
2507
3473
|
} finally {
|
|
2508
3474
|
process.env = originalEnv;
|
|
@@ -2521,9 +3487,7 @@ var KnowledgeTestSuite = class {
|
|
|
2521
3487
|
throw new Error("Incorrect service capability description");
|
|
2522
3488
|
}
|
|
2523
3489
|
runtime.services.set(KnowledgeService.serviceType, service);
|
|
2524
|
-
const retrievedService = runtime.getService(
|
|
2525
|
-
KnowledgeService.serviceType
|
|
2526
|
-
);
|
|
3490
|
+
const retrievedService = runtime.getService(KnowledgeService.serviceType);
|
|
2527
3491
|
if (retrievedService !== service) {
|
|
2528
3492
|
throw new Error("Service not properly registered with runtime");
|
|
2529
3493
|
}
|
|
@@ -2536,11 +3500,7 @@ var KnowledgeTestSuite = class {
|
|
|
2536
3500
|
fn: async (runtime) => {
|
|
2537
3501
|
const testContent = "This is a test document with some content.";
|
|
2538
3502
|
const buffer = createTestFileBuffer(testContent);
|
|
2539
|
-
const extractedText = await extractTextFromDocument(
|
|
2540
|
-
buffer,
|
|
2541
|
-
"text/plain",
|
|
2542
|
-
"test.txt"
|
|
2543
|
-
);
|
|
3503
|
+
const extractedText = await extractTextFromDocument(buffer, "text/plain", "test.txt");
|
|
2544
3504
|
if (extractedText !== testContent) {
|
|
2545
3505
|
throw new Error(`Expected "${testContent}", got "${extractedText}"`);
|
|
2546
3506
|
}
|
|
@@ -2549,7 +3509,7 @@ var KnowledgeTestSuite = class {
|
|
|
2549
3509
|
{
|
|
2550
3510
|
name: "Should handle empty file buffer",
|
|
2551
3511
|
fn: async (runtime) => {
|
|
2552
|
-
const emptyBuffer =
|
|
3512
|
+
const emptyBuffer = Buffer3.alloc(0);
|
|
2553
3513
|
try {
|
|
2554
3514
|
await extractTextFromDocument(emptyBuffer, "text/plain", "empty.txt");
|
|
2555
3515
|
throw new Error("Should have thrown error for empty buffer");
|
|
@@ -2612,9 +3572,7 @@ var KnowledgeTestSuite = class {
|
|
|
2612
3572
|
if (result.fragmentCount === 0) {
|
|
2613
3573
|
throw new Error("No fragments created");
|
|
2614
3574
|
}
|
|
2615
|
-
const storedDoc = await runtime.getMemoryById(
|
|
2616
|
-
result.storedDocumentMemoryId
|
|
2617
|
-
);
|
|
3575
|
+
const storedDoc = await runtime.getMemoryById(result.storedDocumentMemoryId);
|
|
2618
3576
|
if (!storedDoc) {
|
|
2619
3577
|
throw new Error("Document not found in storage");
|
|
2620
3578
|
}
|
|
@@ -2759,19 +3717,15 @@ var KnowledgeTestSuite = class {
|
|
|
2759
3717
|
}
|
|
2760
3718
|
});
|
|
2761
3719
|
const service = await KnowledgeService.start(knowledgeRuntime);
|
|
2762
|
-
await new Promise((
|
|
3720
|
+
await new Promise((resolve2) => setTimeout(resolve2, 2e3));
|
|
2763
3721
|
const memories = await knowledgeRuntime.getMemories({
|
|
2764
3722
|
tableName: "documents",
|
|
2765
3723
|
entityId: knowledgeRuntime.agentId
|
|
2766
3724
|
});
|
|
2767
3725
|
if (memories.length < 3) {
|
|
2768
|
-
throw new Error(
|
|
2769
|
-
`Expected at least 3 character knowledge items, got ${memories.length}`
|
|
2770
|
-
);
|
|
3726
|
+
throw new Error(`Expected at least 3 character knowledge items, got ${memories.length}`);
|
|
2771
3727
|
}
|
|
2772
|
-
const pathKnowledge = memories.find(
|
|
2773
|
-
(m) => m.content.text?.includes("markdown content")
|
|
2774
|
-
);
|
|
3728
|
+
const pathKnowledge = memories.find((m) => m.content.text?.includes("markdown content"));
|
|
2775
3729
|
if (!pathKnowledge) {
|
|
2776
3730
|
throw new Error("Path-based knowledge not found");
|
|
2777
3731
|
}
|
|
@@ -2873,11 +3827,7 @@ var KnowledgeTestSuite = class {
|
|
|
2873
3827
|
data: {},
|
|
2874
3828
|
text: ""
|
|
2875
3829
|
};
|
|
2876
|
-
const providerResult = await knowledgeProvider.get(
|
|
2877
|
-
runtime,
|
|
2878
|
-
queryMessage,
|
|
2879
|
-
state
|
|
2880
|
-
);
|
|
3830
|
+
const providerResult = await knowledgeProvider.get(runtime, queryMessage, state);
|
|
2881
3831
|
if (!providerResult.text || !providerResult.text.includes("qubit")) {
|
|
2882
3832
|
throw new Error("Provider did not return relevant knowledge");
|
|
2883
3833
|
}
|
|
@@ -2907,9 +3857,7 @@ var KnowledgeTestSuite = class {
|
|
|
2907
3857
|
};
|
|
2908
3858
|
const result = await service.addKnowledge(document);
|
|
2909
3859
|
if (result.fragmentCount < 2) {
|
|
2910
|
-
throw new Error(
|
|
2911
|
-
"Large document should be split into multiple fragments"
|
|
2912
|
-
);
|
|
3860
|
+
throw new Error("Large document should be split into multiple fragments");
|
|
2913
3861
|
}
|
|
2914
3862
|
const fragments = await runtime.getMemories({
|
|
2915
3863
|
tableName: "knowledge",
|
|
@@ -2961,9 +3909,9 @@ var KnowledgeTestSuite = class {
|
|
|
2961
3909
|
var tests_default = new KnowledgeTestSuite();
|
|
2962
3910
|
|
|
2963
3911
|
// src/actions.ts
|
|
2964
|
-
import { logger as
|
|
2965
|
-
import * as
|
|
2966
|
-
import * as
|
|
3912
|
+
import { logger as logger8, stringToUuid } from "@elizaos/core";
|
|
3913
|
+
import * as fs3 from "fs";
|
|
3914
|
+
import * as path3 from "path";
|
|
2967
3915
|
var processKnowledgeAction = {
|
|
2968
3916
|
name: "PROCESS_KNOWLEDGE",
|
|
2969
3917
|
description: "Process and store knowledge from a file path or text content into the knowledge base",
|
|
@@ -3014,25 +3962,19 @@ var processKnowledgeAction = {
|
|
|
3014
3962
|
"ingest",
|
|
3015
3963
|
"file"
|
|
3016
3964
|
];
|
|
3017
|
-
const hasKeyword = knowledgeKeywords.some(
|
|
3018
|
-
(keyword) => text.includes(keyword)
|
|
3019
|
-
);
|
|
3965
|
+
const hasKeyword = knowledgeKeywords.some((keyword) => text.includes(keyword));
|
|
3020
3966
|
const pathPattern = /(?:\/[\w.-]+)+|(?:[a-zA-Z]:[\\/][\w\s.-]+(?:[\\/][\w\s.-]+)*)/;
|
|
3021
3967
|
const hasPath = pathPattern.test(text);
|
|
3022
3968
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3023
3969
|
if (!service) {
|
|
3024
|
-
|
|
3025
|
-
"Knowledge service not available for PROCESS_KNOWLEDGE action"
|
|
3026
|
-
);
|
|
3970
|
+
logger8.warn("Knowledge service not available for PROCESS_KNOWLEDGE action");
|
|
3027
3971
|
return false;
|
|
3028
3972
|
}
|
|
3029
3973
|
return hasKeyword || hasPath;
|
|
3030
3974
|
},
|
|
3031
3975
|
handler: async (runtime, message, state, options, callback) => {
|
|
3032
3976
|
try {
|
|
3033
|
-
const service = runtime.getService(
|
|
3034
|
-
KnowledgeService.serviceType
|
|
3035
|
-
);
|
|
3977
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3036
3978
|
if (!service) {
|
|
3037
3979
|
throw new Error("Knowledge service not available");
|
|
3038
3980
|
}
|
|
@@ -3042,7 +3984,7 @@ var processKnowledgeAction = {
|
|
|
3042
3984
|
let response;
|
|
3043
3985
|
if (pathMatch) {
|
|
3044
3986
|
const filePath = pathMatch[0];
|
|
3045
|
-
if (!
|
|
3987
|
+
if (!fs3.existsSync(filePath)) {
|
|
3046
3988
|
response = {
|
|
3047
3989
|
text: `I couldn't find the file at ${filePath}. Please check the path and try again.`
|
|
3048
3990
|
};
|
|
@@ -3051,9 +3993,9 @@ var processKnowledgeAction = {
|
|
|
3051
3993
|
}
|
|
3052
3994
|
return;
|
|
3053
3995
|
}
|
|
3054
|
-
const fileBuffer =
|
|
3055
|
-
const fileName =
|
|
3056
|
-
const fileExt =
|
|
3996
|
+
const fileBuffer = fs3.readFileSync(filePath);
|
|
3997
|
+
const fileName = path3.basename(filePath);
|
|
3998
|
+
const fileExt = path3.extname(filePath).toLowerCase();
|
|
3057
3999
|
let contentType = "text/plain";
|
|
3058
4000
|
if (fileExt === ".pdf") contentType = "application/pdf";
|
|
3059
4001
|
else if (fileExt === ".docx")
|
|
@@ -3075,10 +4017,7 @@ var processKnowledgeAction = {
|
|
|
3075
4017
|
text: `I've successfully processed the document "${fileName}". It has been split into ${result.fragmentCount} searchable fragments and added to my knowledge base.`
|
|
3076
4018
|
};
|
|
3077
4019
|
} else {
|
|
3078
|
-
const knowledgeContent = text.replace(
|
|
3079
|
-
/^(add|store|remember|process|learn)\s+(this|that|the following)?:?\s*/i,
|
|
3080
|
-
""
|
|
3081
|
-
).trim();
|
|
4020
|
+
const knowledgeContent = text.replace(/^(add|store|remember|process|learn)\s+(this|that|the following)?:?\s*/i, "").trim();
|
|
3082
4021
|
if (!knowledgeContent) {
|
|
3083
4022
|
response = {
|
|
3084
4023
|
text: "I need some content to add to my knowledge base. Please provide text or a file path."
|
|
@@ -3106,7 +4045,7 @@ var processKnowledgeAction = {
|
|
|
3106
4045
|
await callback(response);
|
|
3107
4046
|
}
|
|
3108
4047
|
} catch (error) {
|
|
3109
|
-
|
|
4048
|
+
logger8.error("Error in PROCESS_KNOWLEDGE action:", error);
|
|
3110
4049
|
const errorResponse = {
|
|
3111
4050
|
text: `I encountered an error while processing the knowledge: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
3112
4051
|
};
|
|
@@ -3146,25 +4085,10 @@ var searchKnowledgeAction = {
|
|
|
3146
4085
|
],
|
|
3147
4086
|
validate: async (runtime, message, state) => {
|
|
3148
4087
|
const text = message.content.text?.toLowerCase() || "";
|
|
3149
|
-
const searchKeywords = [
|
|
3150
|
-
|
|
3151
|
-
|
|
3152
|
-
|
|
3153
|
-
"query",
|
|
3154
|
-
"what do you know about"
|
|
3155
|
-
];
|
|
3156
|
-
const knowledgeKeywords = [
|
|
3157
|
-
"knowledge",
|
|
3158
|
-
"information",
|
|
3159
|
-
"document",
|
|
3160
|
-
"database"
|
|
3161
|
-
];
|
|
3162
|
-
const hasSearchKeyword = searchKeywords.some(
|
|
3163
|
-
(keyword) => text.includes(keyword)
|
|
3164
|
-
);
|
|
3165
|
-
const hasKnowledgeKeyword = knowledgeKeywords.some(
|
|
3166
|
-
(keyword) => text.includes(keyword)
|
|
3167
|
-
);
|
|
4088
|
+
const searchKeywords = ["search", "find", "look up", "query", "what do you know about"];
|
|
4089
|
+
const knowledgeKeywords = ["knowledge", "information", "document", "database"];
|
|
4090
|
+
const hasSearchKeyword = searchKeywords.some((keyword) => text.includes(keyword));
|
|
4091
|
+
const hasKnowledgeKeyword = knowledgeKeywords.some((keyword) => text.includes(keyword));
|
|
3168
4092
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3169
4093
|
if (!service) {
|
|
3170
4094
|
return false;
|
|
@@ -3173,17 +4097,12 @@ var searchKnowledgeAction = {
|
|
|
3173
4097
|
},
|
|
3174
4098
|
handler: async (runtime, message, state, options, callback) => {
|
|
3175
4099
|
try {
|
|
3176
|
-
const service = runtime.getService(
|
|
3177
|
-
KnowledgeService.serviceType
|
|
3178
|
-
);
|
|
4100
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3179
4101
|
if (!service) {
|
|
3180
4102
|
throw new Error("Knowledge service not available");
|
|
3181
4103
|
}
|
|
3182
4104
|
const text = message.content.text || "";
|
|
3183
|
-
const query = text.replace(
|
|
3184
|
-
/^(search|find|look up|query)\s+(your\s+)?knowledge\s+(base\s+)?(for\s+)?/i,
|
|
3185
|
-
""
|
|
3186
|
-
).trim();
|
|
4105
|
+
const query = text.replace(/^(search|find|look up|query)\s+(your\s+)?knowledge\s+(base\s+)?(for\s+)?/i, "").trim();
|
|
3187
4106
|
if (!query) {
|
|
3188
4107
|
const response2 = {
|
|
3189
4108
|
text: "What would you like me to search for in my knowledge base?"
|
|
@@ -3217,7 +4136,7 @@ ${formattedResults}`
|
|
|
3217
4136
|
await callback(response);
|
|
3218
4137
|
}
|
|
3219
4138
|
} catch (error) {
|
|
3220
|
-
|
|
4139
|
+
logger8.error("Error in SEARCH_KNOWLEDGE action:", error);
|
|
3221
4140
|
const errorResponse = {
|
|
3222
4141
|
text: `I encountered an error while searching the knowledge base: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
3223
4142
|
};
|
|
@@ -3230,9 +4149,9 @@ ${formattedResults}`
|
|
|
3230
4149
|
var knowledgeActions = [processKnowledgeAction, searchKnowledgeAction];
|
|
3231
4150
|
|
|
3232
4151
|
// src/routes.ts
|
|
3233
|
-
import { createUniqueUuid as createUniqueUuid2, logger as
|
|
3234
|
-
import
|
|
3235
|
-
import
|
|
4152
|
+
import { createUniqueUuid as createUniqueUuid2, logger as logger9, ModelType as ModelType4 } from "@elizaos/core";
|
|
4153
|
+
import fs4 from "fs";
|
|
4154
|
+
import path4 from "path";
|
|
3236
4155
|
import multer from "multer";
|
|
3237
4156
|
var createUploadMiddleware = (runtime) => {
|
|
3238
4157
|
const uploadDir = runtime.getSetting("KNOWLEDGE_UPLOAD_DIR") || "/tmp/uploads/";
|
|
@@ -3277,11 +4196,11 @@ function sendError(res, status, code, message, details) {
|
|
|
3277
4196
|
res.end(JSON.stringify({ success: false, error: { code, message, details } }));
|
|
3278
4197
|
}
|
|
3279
4198
|
var cleanupFile = (filePath) => {
|
|
3280
|
-
if (filePath &&
|
|
4199
|
+
if (filePath && fs4.existsSync(filePath)) {
|
|
3281
4200
|
try {
|
|
3282
|
-
|
|
4201
|
+
fs4.unlinkSync(filePath);
|
|
3283
4202
|
} catch (error) {
|
|
3284
|
-
|
|
4203
|
+
logger9.error(`Error cleaning up file ${filePath}:`, error);
|
|
3285
4204
|
}
|
|
3286
4205
|
}
|
|
3287
4206
|
};
|
|
@@ -3308,15 +4227,15 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3308
4227
|
}
|
|
3309
4228
|
const invalidFiles = files.filter((file) => {
|
|
3310
4229
|
if (file.size === 0) {
|
|
3311
|
-
|
|
4230
|
+
logger9.warn(`File ${file.originalname} is empty`);
|
|
3312
4231
|
return true;
|
|
3313
4232
|
}
|
|
3314
4233
|
if (!file.originalname || file.originalname.trim() === "") {
|
|
3315
|
-
|
|
4234
|
+
logger9.warn(`File has no name`);
|
|
3316
4235
|
return true;
|
|
3317
4236
|
}
|
|
3318
4237
|
if (!file.path) {
|
|
3319
|
-
|
|
4238
|
+
logger9.warn(`File ${file.originalname} has no path`);
|
|
3320
4239
|
return true;
|
|
3321
4240
|
}
|
|
3322
4241
|
return false;
|
|
@@ -3333,7 +4252,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3333
4252
|
}
|
|
3334
4253
|
const agentId = req.body.agentId || req.query.agentId;
|
|
3335
4254
|
if (!agentId) {
|
|
3336
|
-
|
|
4255
|
+
logger9.error("[Document Processor] \u274C No agent ID provided in upload request");
|
|
3337
4256
|
return sendError(
|
|
3338
4257
|
res,
|
|
3339
4258
|
400,
|
|
@@ -3342,15 +4261,15 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3342
4261
|
);
|
|
3343
4262
|
}
|
|
3344
4263
|
const worldId = req.body.worldId || agentId;
|
|
3345
|
-
|
|
4264
|
+
logger9.info(`[Document Processor] \u{1F4E4} Processing file upload for agent: ${agentId}`);
|
|
3346
4265
|
const processingPromises = files.map(async (file, index) => {
|
|
3347
4266
|
const originalFilename = file.originalname;
|
|
3348
4267
|
const filePath = file.path;
|
|
3349
|
-
|
|
3350
|
-
`[
|
|
4268
|
+
logger9.debug(
|
|
4269
|
+
`[Document Processor] \u{1F4C4} Processing file: ${originalFilename} (agent: ${agentId})`
|
|
3351
4270
|
);
|
|
3352
4271
|
try {
|
|
3353
|
-
const fileBuffer = await
|
|
4272
|
+
const fileBuffer = await fs4.promises.readFile(filePath);
|
|
3354
4273
|
const base64Content = fileBuffer.toString("base64");
|
|
3355
4274
|
const addKnowledgeOpts = {
|
|
3356
4275
|
agentId,
|
|
@@ -3381,8 +4300,9 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3381
4300
|
status: "success"
|
|
3382
4301
|
};
|
|
3383
4302
|
} catch (fileError) {
|
|
3384
|
-
|
|
3385
|
-
`[
|
|
4303
|
+
logger9.error(
|
|
4304
|
+
`[Document Processor] \u274C Error processing file ${file.originalname}:`,
|
|
4305
|
+
fileError
|
|
3386
4306
|
);
|
|
3387
4307
|
cleanupFile(filePath);
|
|
3388
4308
|
return {
|
|
@@ -3403,7 +4323,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3403
4323
|
}
|
|
3404
4324
|
const agentId = req.body.agentId || req.query.agentId;
|
|
3405
4325
|
if (!agentId) {
|
|
3406
|
-
|
|
4326
|
+
logger9.error("[Document Processor] \u274C No agent ID provided in URL request");
|
|
3407
4327
|
return sendError(
|
|
3408
4328
|
res,
|
|
3409
4329
|
400,
|
|
@@ -3411,7 +4331,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3411
4331
|
"Agent ID is required for uploading knowledge from URLs"
|
|
3412
4332
|
);
|
|
3413
4333
|
}
|
|
3414
|
-
|
|
4334
|
+
logger9.info(`[Document Processor] \u{1F4E4} Processing URL upload for agent: ${agentId}`);
|
|
3415
4335
|
const processingPromises = fileUrls.map(async (fileUrl) => {
|
|
3416
4336
|
try {
|
|
3417
4337
|
const normalizedUrl = normalizeS3Url(fileUrl);
|
|
@@ -3419,7 +4339,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3419
4339
|
const pathSegments = urlObject.pathname.split("/");
|
|
3420
4340
|
const encodedFilename = pathSegments[pathSegments.length - 1] || "document.pdf";
|
|
3421
4341
|
const originalFilename = decodeURIComponent(encodedFilename);
|
|
3422
|
-
|
|
4342
|
+
logger9.debug(`[Document Processor] \u{1F310} Fetching content from URL: ${fileUrl}`);
|
|
3423
4343
|
const { content, contentType: fetchedContentType } = await fetchUrlContent(fileUrl);
|
|
3424
4344
|
let contentType = fetchedContentType;
|
|
3425
4345
|
if (contentType === "application/octet-stream") {
|
|
@@ -3459,8 +4379,8 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3459
4379
|
url: normalizedUrl
|
|
3460
4380
|
}
|
|
3461
4381
|
};
|
|
3462
|
-
|
|
3463
|
-
`[
|
|
4382
|
+
logger9.debug(
|
|
4383
|
+
`[Document Processor] \u{1F4C4} Processing knowledge from URL: ${originalFilename} (type: ${contentType})`
|
|
3464
4384
|
);
|
|
3465
4385
|
const result = await service.addKnowledge(addKnowledgeOpts);
|
|
3466
4386
|
return {
|
|
@@ -3474,7 +4394,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3474
4394
|
status: "success"
|
|
3475
4395
|
};
|
|
3476
4396
|
} catch (urlError) {
|
|
3477
|
-
|
|
4397
|
+
logger9.error(`[Document Processor] \u274C Error processing URL ${fileUrl}:`, urlError);
|
|
3478
4398
|
return {
|
|
3479
4399
|
fileUrl,
|
|
3480
4400
|
status: "error_processing",
|
|
@@ -3486,7 +4406,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3486
4406
|
sendSuccess(res, results);
|
|
3487
4407
|
}
|
|
3488
4408
|
} catch (error) {
|
|
3489
|
-
|
|
4409
|
+
logger9.error("[Document Processor] \u274C Error processing knowledge:", error);
|
|
3490
4410
|
if (hasUploadedFiles) {
|
|
3491
4411
|
cleanupFiles(req.files);
|
|
3492
4412
|
}
|
|
@@ -3504,7 +4424,7 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
|
|
|
3504
4424
|
);
|
|
3505
4425
|
}
|
|
3506
4426
|
try {
|
|
3507
|
-
const limit = req.query.limit ? Number.parseInt(req.query.limit, 10) :
|
|
4427
|
+
const limit = req.query.limit ? Number.parseInt(req.query.limit, 10) : 1e4;
|
|
3508
4428
|
const before = req.query.before ? Number.parseInt(req.query.before, 10) : Date.now();
|
|
3509
4429
|
const includeEmbedding = req.query.includeEmbedding === "true";
|
|
3510
4430
|
const agentId = req.query.agentId;
|
|
@@ -3525,8 +4445,8 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
|
|
|
3525
4445
|
// Or if the URL is stored in the metadata (check if it exists)
|
|
3526
4446
|
memory.metadata && "url" in memory.metadata && typeof memory.metadata.url === "string" && normalizedRequestUrls.includes(normalizeS3Url(memory.metadata.url))
|
|
3527
4447
|
);
|
|
3528
|
-
|
|
3529
|
-
`[
|
|
4448
|
+
logger9.debug(
|
|
4449
|
+
`[Document Processor] \u{1F50D} Filtered documents by URLs: ${fileUrls.length} URLs, found ${filteredMemories.length} matching documents`
|
|
3530
4450
|
);
|
|
3531
4451
|
}
|
|
3532
4452
|
const cleanMemories = includeEmbedding ? filteredMemories : filteredMemories.map((memory) => ({
|
|
@@ -3540,15 +4460,12 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
|
|
|
3540
4460
|
totalRequested: fileUrls ? fileUrls.length : 0
|
|
3541
4461
|
});
|
|
3542
4462
|
} catch (error) {
|
|
3543
|
-
|
|
4463
|
+
logger9.error("[Document Processor] \u274C Error retrieving documents:", error);
|
|
3544
4464
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve documents", error.message);
|
|
3545
4465
|
}
|
|
3546
4466
|
}
|
|
3547
4467
|
async function deleteKnowledgeDocumentHandler(req, res, runtime) {
|
|
3548
|
-
|
|
3549
|
-
- path: ${req.path}
|
|
3550
|
-
- params: ${JSON.stringify(req.params)}
|
|
3551
|
-
`);
|
|
4468
|
+
logger9.debug(`[Document Processor] \u{1F5D1}\uFE0F DELETE request for document: ${req.params.knowledgeId}`);
|
|
3552
4469
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3553
4470
|
if (!service) {
|
|
3554
4471
|
return sendError(
|
|
@@ -3560,29 +4477,22 @@ async function deleteKnowledgeDocumentHandler(req, res, runtime) {
|
|
|
3560
4477
|
}
|
|
3561
4478
|
const knowledgeId = req.params.knowledgeId;
|
|
3562
4479
|
if (!knowledgeId || knowledgeId.length < 36) {
|
|
3563
|
-
|
|
4480
|
+
logger9.error(`[Document Processor] \u274C Invalid knowledge ID format: ${knowledgeId}`);
|
|
3564
4481
|
return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
|
|
3565
4482
|
}
|
|
3566
4483
|
try {
|
|
3567
4484
|
const typedKnowledgeId = knowledgeId;
|
|
3568
|
-
|
|
3569
|
-
`[KNOWLEDGE DELETE HANDLER] Attempting to delete document with ID: ${typedKnowledgeId}`
|
|
3570
|
-
);
|
|
4485
|
+
logger9.debug(`[Document Processor] \u{1F5D1}\uFE0F Deleting document: ${typedKnowledgeId}`);
|
|
3571
4486
|
await service.deleteMemory(typedKnowledgeId);
|
|
3572
|
-
|
|
3573
|
-
`[KNOWLEDGE DELETE HANDLER] Successfully deleted document with ID: ${typedKnowledgeId}`
|
|
3574
|
-
);
|
|
4487
|
+
logger9.info(`[Document Processor] \u2705 Successfully deleted document: ${typedKnowledgeId}`);
|
|
3575
4488
|
sendSuccess(res, null, 204);
|
|
3576
4489
|
} catch (error) {
|
|
3577
|
-
|
|
4490
|
+
logger9.error(`[Document Processor] \u274C Error deleting document ${knowledgeId}:`, error);
|
|
3578
4491
|
sendError(res, 500, "DELETE_ERROR", "Failed to delete document", error.message);
|
|
3579
4492
|
}
|
|
3580
4493
|
}
|
|
3581
4494
|
async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
3582
|
-
|
|
3583
|
-
- path: ${req.path}
|
|
3584
|
-
- params: ${JSON.stringify(req.params)}
|
|
3585
|
-
`);
|
|
4495
|
+
logger9.debug(`[Document Processor] \u{1F50D} GET request for document: ${req.params.knowledgeId}`);
|
|
3586
4496
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3587
4497
|
if (!service) {
|
|
3588
4498
|
return sendError(
|
|
@@ -3594,15 +4504,15 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
|
3594
4504
|
}
|
|
3595
4505
|
const knowledgeId = req.params.knowledgeId;
|
|
3596
4506
|
if (!knowledgeId || knowledgeId.length < 36) {
|
|
3597
|
-
|
|
4507
|
+
logger9.error(`[Document Processor] \u274C Invalid knowledge ID format: ${knowledgeId}`);
|
|
3598
4508
|
return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
|
|
3599
4509
|
}
|
|
3600
4510
|
try {
|
|
3601
|
-
|
|
4511
|
+
logger9.debug(`[Document Processor] \u{1F50D} Retrieving document: ${knowledgeId}`);
|
|
3602
4512
|
const agentId = req.query.agentId;
|
|
3603
4513
|
const memories = await service.getMemories({
|
|
3604
4514
|
tableName: "documents",
|
|
3605
|
-
count:
|
|
4515
|
+
count: 1e4
|
|
3606
4516
|
});
|
|
3607
4517
|
const typedKnowledgeId = knowledgeId;
|
|
3608
4518
|
const document = memories.find((memory) => memory.id === typedKnowledgeId);
|
|
@@ -3615,19 +4525,19 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
|
3615
4525
|
};
|
|
3616
4526
|
sendSuccess(res, { document: cleanDocument });
|
|
3617
4527
|
} catch (error) {
|
|
3618
|
-
|
|
4528
|
+
logger9.error(`[Document Processor] \u274C Error retrieving document ${knowledgeId}:`, error);
|
|
3619
4529
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve document", error.message);
|
|
3620
4530
|
}
|
|
3621
4531
|
}
|
|
3622
4532
|
async function knowledgePanelHandler(req, res, runtime) {
|
|
3623
4533
|
const agentId = runtime.agentId;
|
|
3624
|
-
|
|
4534
|
+
logger9.debug(`[Document Processor] \u{1F310} Serving knowledge panel for agent ${agentId}`);
|
|
3625
4535
|
try {
|
|
3626
|
-
const currentDir =
|
|
3627
|
-
const frontendPath =
|
|
3628
|
-
|
|
3629
|
-
if (
|
|
3630
|
-
const html = await
|
|
4536
|
+
const currentDir = path4.dirname(new URL(import.meta.url).pathname);
|
|
4537
|
+
const frontendPath = path4.join(currentDir, "../dist/index.html");
|
|
4538
|
+
logger9.debug(`[Document Processor] \u{1F310} Looking for frontend at: ${frontendPath}`);
|
|
4539
|
+
if (fs4.existsSync(frontendPath)) {
|
|
4540
|
+
const html = await fs4.promises.readFile(frontendPath, "utf8");
|
|
3631
4541
|
const injectedHtml = html.replace(
|
|
3632
4542
|
"<head>",
|
|
3633
4543
|
`<head>
|
|
@@ -3643,10 +4553,10 @@ async function knowledgePanelHandler(req, res, runtime) {
|
|
|
3643
4553
|
} else {
|
|
3644
4554
|
let cssFile = "index.css";
|
|
3645
4555
|
let jsFile = "index.js";
|
|
3646
|
-
const manifestPath =
|
|
3647
|
-
if (
|
|
4556
|
+
const manifestPath = path4.join(currentDir, "../dist/manifest.json");
|
|
4557
|
+
if (fs4.existsSync(manifestPath)) {
|
|
3648
4558
|
try {
|
|
3649
|
-
const manifestContent = await
|
|
4559
|
+
const manifestContent = await fs4.promises.readFile(manifestPath, "utf8");
|
|
3650
4560
|
const manifest = JSON.parse(manifestContent);
|
|
3651
4561
|
for (const [key, value] of Object.entries(manifest)) {
|
|
3652
4562
|
if (typeof value === "object" && value !== null) {
|
|
@@ -3659,10 +4569,10 @@ async function knowledgePanelHandler(req, res, runtime) {
|
|
|
3659
4569
|
}
|
|
3660
4570
|
}
|
|
3661
4571
|
} catch (manifestError) {
|
|
3662
|
-
|
|
4572
|
+
logger9.error("[Document Processor] \u274C Error reading manifest:", manifestError);
|
|
3663
4573
|
}
|
|
3664
4574
|
}
|
|
3665
|
-
|
|
4575
|
+
logger9.debug(`[Document Processor] \u{1F310} Using fallback with CSS: ${cssFile}, JS: ${jsFile}`);
|
|
3666
4576
|
const html = `
|
|
3667
4577
|
<!DOCTYPE html>
|
|
3668
4578
|
<html lang="en">
|
|
@@ -3696,16 +4606,14 @@ async function knowledgePanelHandler(req, res, runtime) {
|
|
|
3696
4606
|
res.end(html);
|
|
3697
4607
|
}
|
|
3698
4608
|
} catch (error) {
|
|
3699
|
-
|
|
4609
|
+
logger9.error("[Document Processor] \u274C Error serving frontend:", error);
|
|
3700
4610
|
sendError(res, 500, "FRONTEND_ERROR", "Failed to load knowledge panel", error.message);
|
|
3701
4611
|
}
|
|
3702
4612
|
}
|
|
3703
4613
|
async function frontendAssetHandler(req, res, runtime) {
|
|
3704
4614
|
try {
|
|
3705
|
-
|
|
3706
|
-
|
|
3707
|
-
);
|
|
3708
|
-
const currentDir = path3.dirname(new URL(import.meta.url).pathname);
|
|
4615
|
+
logger9.debug(`[Document Processor] \u{1F310} Asset request: ${req.path}`);
|
|
4616
|
+
const currentDir = path4.dirname(new URL(import.meta.url).pathname);
|
|
3709
4617
|
const assetRequestPath = req.path;
|
|
3710
4618
|
const assetsMarker = "/assets/";
|
|
3711
4619
|
const assetsStartIndex = assetRequestPath.indexOf(assetsMarker);
|
|
@@ -3721,10 +4629,10 @@ async function frontendAssetHandler(req, res, runtime) {
|
|
|
3721
4629
|
`Invalid asset name: '${assetName}' from path ${assetRequestPath}`
|
|
3722
4630
|
);
|
|
3723
4631
|
}
|
|
3724
|
-
const assetPath =
|
|
3725
|
-
|
|
3726
|
-
if (
|
|
3727
|
-
const fileStream =
|
|
4632
|
+
const assetPath = path4.join(currentDir, "../dist/assets", assetName);
|
|
4633
|
+
logger9.debug(`[Document Processor] \u{1F310} Serving asset: ${assetPath}`);
|
|
4634
|
+
if (fs4.existsSync(assetPath)) {
|
|
4635
|
+
const fileStream = fs4.createReadStream(assetPath);
|
|
3728
4636
|
let contentType = "application/octet-stream";
|
|
3729
4637
|
if (assetPath.endsWith(".js")) {
|
|
3730
4638
|
contentType = "application/javascript";
|
|
@@ -3737,7 +4645,7 @@ async function frontendAssetHandler(req, res, runtime) {
|
|
|
3737
4645
|
sendError(res, 404, "NOT_FOUND", `Asset not found: ${req.url}`);
|
|
3738
4646
|
}
|
|
3739
4647
|
} catch (error) {
|
|
3740
|
-
|
|
4648
|
+
logger9.error(`[Document Processor] \u274C Error serving asset ${req.url}:`, error);
|
|
3741
4649
|
sendError(res, 500, "ASSET_ERROR", `Failed to load asset ${req.url}`, error.message);
|
|
3742
4650
|
}
|
|
3743
4651
|
}
|
|
@@ -3751,8 +4659,8 @@ async function getKnowledgeChunksHandler(req, res, runtime) {
|
|
|
3751
4659
|
const documentsOnly = req.query.documentsOnly === "true";
|
|
3752
4660
|
const documents = await service.getMemories({
|
|
3753
4661
|
tableName: "documents",
|
|
3754
|
-
count:
|
|
3755
|
-
//
|
|
4662
|
+
count: 1e4,
|
|
4663
|
+
// High limit to get all documents
|
|
3756
4664
|
end: Date.now()
|
|
3757
4665
|
});
|
|
3758
4666
|
if (documentsOnly) {
|
|
@@ -3798,7 +4706,7 @@ async function getKnowledgeChunksHandler(req, res, runtime) {
|
|
|
3798
4706
|
}
|
|
3799
4707
|
});
|
|
3800
4708
|
} catch (error) {
|
|
3801
|
-
|
|
4709
|
+
logger9.error("[Document Processor] \u274C Error retrieving chunks:", error);
|
|
3802
4710
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve knowledge chunks", error.message);
|
|
3803
4711
|
}
|
|
3804
4712
|
}
|
|
@@ -3820,15 +4728,15 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
3820
4728
|
return sendError(res, 400, "INVALID_QUERY", "Search query cannot be empty");
|
|
3821
4729
|
}
|
|
3822
4730
|
if (req.query.threshold && (parsedThreshold < 0 || parsedThreshold > 1)) {
|
|
3823
|
-
|
|
3824
|
-
`[
|
|
4731
|
+
logger9.debug(
|
|
4732
|
+
`[Document Processor] \u{1F50D} Threshold value ${parsedThreshold} was clamped to ${matchThreshold}`
|
|
3825
4733
|
);
|
|
3826
4734
|
}
|
|
3827
4735
|
if (req.query.limit && (parsedLimit < 1 || parsedLimit > 100)) {
|
|
3828
|
-
|
|
4736
|
+
logger9.debug(`[Document Processor] \u{1F50D} Limit value ${parsedLimit} was clamped to ${limit}`);
|
|
3829
4737
|
}
|
|
3830
|
-
|
|
3831
|
-
`[
|
|
4738
|
+
logger9.debug(
|
|
4739
|
+
`[Document Processor] \u{1F50D} Searching: "${searchText}" (threshold: ${matchThreshold}, limit: ${limit})`
|
|
3832
4740
|
);
|
|
3833
4741
|
const embedding = await runtime.useModel(ModelType4.TEXT_EMBEDDING, {
|
|
3834
4742
|
text: searchText
|
|
@@ -3854,7 +4762,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
3854
4762
|
documentFilename = document.metadata.filename || documentFilename;
|
|
3855
4763
|
}
|
|
3856
4764
|
} catch (e) {
|
|
3857
|
-
|
|
4765
|
+
logger9.debug(`Could not fetch document ${documentId} for fragment`);
|
|
3858
4766
|
}
|
|
3859
4767
|
}
|
|
3860
4768
|
return {
|
|
@@ -3869,8 +4777,8 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
3869
4777
|
};
|
|
3870
4778
|
})
|
|
3871
4779
|
);
|
|
3872
|
-
|
|
3873
|
-
`[
|
|
4780
|
+
logger9.info(
|
|
4781
|
+
`[Document Processor] \u{1F50D} Found ${enhancedResults.length} results for: "${searchText}"`
|
|
3874
4782
|
);
|
|
3875
4783
|
sendSuccess(res, {
|
|
3876
4784
|
query: searchText,
|
|
@@ -3879,7 +4787,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
3879
4787
|
count: enhancedResults.length
|
|
3880
4788
|
});
|
|
3881
4789
|
} catch (error) {
|
|
3882
|
-
|
|
4790
|
+
logger9.error("[Document Processor] \u274C Error searching knowledge:", error);
|
|
3883
4791
|
sendError(res, 500, "SEARCH_ERROR", "Failed to search knowledge", error.message);
|
|
3884
4792
|
}
|
|
3885
4793
|
}
|
|
@@ -3891,7 +4799,7 @@ async function uploadKnowledgeWithMulter(req, res, runtime) {
|
|
|
3891
4799
|
);
|
|
3892
4800
|
uploadArray(req, res, (err) => {
|
|
3893
4801
|
if (err) {
|
|
3894
|
-
|
|
4802
|
+
logger9.error("[Document Processor] \u274C File upload error:", err);
|
|
3895
4803
|
return sendError(res, 400, "UPLOAD_ERROR", err.message);
|
|
3896
4804
|
}
|
|
3897
4805
|
uploadKnowledgeHandler(req, res, runtime);
|
|
@@ -3946,70 +4854,6 @@ var knowledgeRoutes = [
|
|
|
3946
4854
|
var knowledgePlugin = {
|
|
3947
4855
|
name: "knowledge",
|
|
3948
4856
|
description: "Plugin for Retrieval Augmented Generation, including knowledge management and embedding.",
|
|
3949
|
-
config: {
|
|
3950
|
-
// Token limits - these will be read from runtime settings during init
|
|
3951
|
-
MAX_INPUT_TOKENS: "4000",
|
|
3952
|
-
MAX_OUTPUT_TOKENS: "4096",
|
|
3953
|
-
// Contextual Knowledge settings
|
|
3954
|
-
CTX_KNOWLEDGE_ENABLED: "false"
|
|
3955
|
-
},
|
|
3956
|
-
async init(config, runtime) {
|
|
3957
|
-
logger7.info("Initializing Knowledge Plugin...");
|
|
3958
|
-
try {
|
|
3959
|
-
logger7.info("Validating model configuration for Knowledge plugin...");
|
|
3960
|
-
const validatedConfig = validateModelConfig(runtime);
|
|
3961
|
-
if (validatedConfig.CTX_KNOWLEDGE_ENABLED) {
|
|
3962
|
-
logger7.info("Running in Contextual Knowledge mode with text generation capabilities.");
|
|
3963
|
-
logger7.info(
|
|
3964
|
-
`Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings and ${validatedConfig.TEXT_PROVIDER} for text generation.`
|
|
3965
|
-
);
|
|
3966
|
-
} else {
|
|
3967
|
-
const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
|
|
3968
|
-
if (usingPluginOpenAI) {
|
|
3969
|
-
logger7.info(
|
|
3970
|
-
"Running in Basic Embedding mode with auto-detected configuration from plugin-openai."
|
|
3971
|
-
);
|
|
3972
|
-
} else {
|
|
3973
|
-
logger7.info(
|
|
3974
|
-
"Running in Basic Embedding mode (CTX_KNOWLEDGE_ENABLED=false). TEXT_PROVIDER and TEXT_MODEL not required."
|
|
3975
|
-
);
|
|
3976
|
-
}
|
|
3977
|
-
logger7.info(
|
|
3978
|
-
`Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}.`
|
|
3979
|
-
);
|
|
3980
|
-
}
|
|
3981
|
-
logger7.info("Model configuration validated successfully.");
|
|
3982
|
-
if (runtime) {
|
|
3983
|
-
logger7.info(`Knowledge Plugin initialized for agent: ${runtime.agentId}`);
|
|
3984
|
-
const loadDocsOnStartup = config.LOAD_DOCS_ON_STARTUP === "true" || process.env.LOAD_DOCS_ON_STARTUP === "true";
|
|
3985
|
-
if (loadDocsOnStartup) {
|
|
3986
|
-
logger7.info("LOAD_DOCS_ON_STARTUP is enabled. Scheduling document loading...");
|
|
3987
|
-
setTimeout(async () => {
|
|
3988
|
-
try {
|
|
3989
|
-
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3990
|
-
if (service instanceof KnowledgeService) {
|
|
3991
|
-
const { loadDocsFromPath: loadDocsFromPath2 } = await import("./docs-loader-PF5X4UMB.js");
|
|
3992
|
-
const result = await loadDocsFromPath2(service, runtime.agentId);
|
|
3993
|
-
if (result.successful > 0) {
|
|
3994
|
-
logger7.info(`Loaded ${result.successful} documents from docs folder on startup`);
|
|
3995
|
-
}
|
|
3996
|
-
}
|
|
3997
|
-
} catch (error) {
|
|
3998
|
-
logger7.error("Error loading documents on startup:", error);
|
|
3999
|
-
}
|
|
4000
|
-
}, 5e3);
|
|
4001
|
-
} else {
|
|
4002
|
-
logger7.info("LOAD_DOCS_ON_STARTUP is not enabled. Skipping automatic document loading.");
|
|
4003
|
-
}
|
|
4004
|
-
}
|
|
4005
|
-
logger7.info(
|
|
4006
|
-
"Knowledge Plugin initialized. Frontend panel should be discoverable via its public route."
|
|
4007
|
-
);
|
|
4008
|
-
} catch (error) {
|
|
4009
|
-
logger7.error("Failed to initialize Knowledge plugin:", error);
|
|
4010
|
-
throw error;
|
|
4011
|
-
}
|
|
4012
|
-
},
|
|
4013
4857
|
services: [KnowledgeService],
|
|
4014
4858
|
providers: [knowledgeProvider],
|
|
4015
4859
|
routes: knowledgeRoutes,
|