@elizaos/plugin-knowledge 1.0.11 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/.vite/manifest.json +2 -2
- package/dist/assets/{index-CzI8hR5q.css → index-B5VEkqpw.css} +1 -1
- package/dist/assets/index-YT4-1nM5.js +169 -0
- package/dist/index.d.ts +8 -5
- package/dist/index.html +2 -2
- package/dist/index.js +974 -341
- package/dist/index.js.map +1 -1
- package/package.json +20 -22
- package/dist/assets/index-DimDNB3w.js +0 -160
- package/dist/chunk-RFXW7QQK.js +0 -695
- package/dist/chunk-RFXW7QQK.js.map +0 -1
- package/dist/docs-loader-5H4HRYEE.js +0 -9
- package/dist/docs-loader-5H4HRYEE.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,17 +1,147 @@
|
|
|
1
|
+
// src/service.ts
|
|
1
2
|
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
v4_default
|
|
11
|
-
} from "./chunk-RFXW7QQK.js";
|
|
3
|
+
createUniqueUuid,
|
|
4
|
+
logger as logger6,
|
|
5
|
+
MemoryType as MemoryType2,
|
|
6
|
+
ModelType as ModelType2,
|
|
7
|
+
Semaphore,
|
|
8
|
+
Service,
|
|
9
|
+
splitChunks as splitChunks2
|
|
10
|
+
} from "@elizaos/core";
|
|
12
11
|
|
|
13
|
-
// src/
|
|
14
|
-
import {
|
|
12
|
+
// src/document-processor.ts
|
|
13
|
+
import {
|
|
14
|
+
MemoryType,
|
|
15
|
+
ModelType,
|
|
16
|
+
logger as logger4,
|
|
17
|
+
splitChunks
|
|
18
|
+
} from "@elizaos/core";
|
|
19
|
+
|
|
20
|
+
// node_modules/uuid/dist/esm/regex.js
|
|
21
|
+
var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i;
|
|
22
|
+
|
|
23
|
+
// node_modules/uuid/dist/esm/validate.js
|
|
24
|
+
function validate(uuid) {
|
|
25
|
+
return typeof uuid === "string" && regex_default.test(uuid);
|
|
26
|
+
}
|
|
27
|
+
var validate_default = validate;
|
|
28
|
+
|
|
29
|
+
// node_modules/uuid/dist/esm/parse.js
|
|
30
|
+
function parse(uuid) {
|
|
31
|
+
if (!validate_default(uuid)) {
|
|
32
|
+
throw TypeError("Invalid UUID");
|
|
33
|
+
}
|
|
34
|
+
let v;
|
|
35
|
+
return Uint8Array.of((v = parseInt(uuid.slice(0, 8), 16)) >>> 24, v >>> 16 & 255, v >>> 8 & 255, v & 255, (v = parseInt(uuid.slice(9, 13), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(14, 18), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(19, 23), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(24, 36), 16)) / 1099511627776 & 255, v / 4294967296 & 255, v >>> 24 & 255, v >>> 16 & 255, v >>> 8 & 255, v & 255);
|
|
36
|
+
}
|
|
37
|
+
var parse_default = parse;
|
|
38
|
+
|
|
39
|
+
// node_modules/uuid/dist/esm/stringify.js
|
|
40
|
+
var byteToHex = [];
|
|
41
|
+
for (let i = 0; i < 256; ++i) {
|
|
42
|
+
byteToHex.push((i + 256).toString(16).slice(1));
|
|
43
|
+
}
|
|
44
|
+
function unsafeStringify(arr, offset = 0) {
|
|
45
|
+
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// node_modules/uuid/dist/esm/rng.js
|
|
49
|
+
import { randomFillSync } from "crypto";
|
|
50
|
+
var rnds8Pool = new Uint8Array(256);
|
|
51
|
+
var poolPtr = rnds8Pool.length;
|
|
52
|
+
function rng() {
|
|
53
|
+
if (poolPtr > rnds8Pool.length - 16) {
|
|
54
|
+
randomFillSync(rnds8Pool);
|
|
55
|
+
poolPtr = 0;
|
|
56
|
+
}
|
|
57
|
+
return rnds8Pool.slice(poolPtr, poolPtr += 16);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// node_modules/uuid/dist/esm/v35.js
|
|
61
|
+
function stringToBytes(str) {
|
|
62
|
+
str = unescape(encodeURIComponent(str));
|
|
63
|
+
const bytes = new Uint8Array(str.length);
|
|
64
|
+
for (let i = 0; i < str.length; ++i) {
|
|
65
|
+
bytes[i] = str.charCodeAt(i);
|
|
66
|
+
}
|
|
67
|
+
return bytes;
|
|
68
|
+
}
|
|
69
|
+
var DNS = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
|
|
70
|
+
var URL2 = "6ba7b811-9dad-11d1-80b4-00c04fd430c8";
|
|
71
|
+
function v35(version, hash, value, namespace, buf, offset) {
|
|
72
|
+
const valueBytes = typeof value === "string" ? stringToBytes(value) : value;
|
|
73
|
+
const namespaceBytes = typeof namespace === "string" ? parse_default(namespace) : namespace;
|
|
74
|
+
if (typeof namespace === "string") {
|
|
75
|
+
namespace = parse_default(namespace);
|
|
76
|
+
}
|
|
77
|
+
if (namespace?.length !== 16) {
|
|
78
|
+
throw TypeError("Namespace must be array-like (16 iterable integer values, 0-255)");
|
|
79
|
+
}
|
|
80
|
+
let bytes = new Uint8Array(16 + valueBytes.length);
|
|
81
|
+
bytes.set(namespaceBytes);
|
|
82
|
+
bytes.set(valueBytes, namespaceBytes.length);
|
|
83
|
+
bytes = hash(bytes);
|
|
84
|
+
bytes[6] = bytes[6] & 15 | version;
|
|
85
|
+
bytes[8] = bytes[8] & 63 | 128;
|
|
86
|
+
if (buf) {
|
|
87
|
+
offset = offset || 0;
|
|
88
|
+
for (let i = 0; i < 16; ++i) {
|
|
89
|
+
buf[offset + i] = bytes[i];
|
|
90
|
+
}
|
|
91
|
+
return buf;
|
|
92
|
+
}
|
|
93
|
+
return unsafeStringify(bytes);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// node_modules/uuid/dist/esm/native.js
|
|
97
|
+
import { randomUUID } from "crypto";
|
|
98
|
+
var native_default = { randomUUID };
|
|
99
|
+
|
|
100
|
+
// node_modules/uuid/dist/esm/v4.js
|
|
101
|
+
function v4(options, buf, offset) {
|
|
102
|
+
if (native_default.randomUUID && !buf && !options) {
|
|
103
|
+
return native_default.randomUUID();
|
|
104
|
+
}
|
|
105
|
+
options = options || {};
|
|
106
|
+
const rnds = options.random ?? options.rng?.() ?? rng();
|
|
107
|
+
if (rnds.length < 16) {
|
|
108
|
+
throw new Error("Random bytes length must be >= 16");
|
|
109
|
+
}
|
|
110
|
+
rnds[6] = rnds[6] & 15 | 64;
|
|
111
|
+
rnds[8] = rnds[8] & 63 | 128;
|
|
112
|
+
if (buf) {
|
|
113
|
+
offset = offset || 0;
|
|
114
|
+
if (offset < 0 || offset + 16 > buf.length) {
|
|
115
|
+
throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
|
|
116
|
+
}
|
|
117
|
+
for (let i = 0; i < 16; ++i) {
|
|
118
|
+
buf[offset + i] = rnds[i];
|
|
119
|
+
}
|
|
120
|
+
return buf;
|
|
121
|
+
}
|
|
122
|
+
return unsafeStringify(rnds);
|
|
123
|
+
}
|
|
124
|
+
var v4_default = v4;
|
|
125
|
+
|
|
126
|
+
// node_modules/uuid/dist/esm/sha1.js
|
|
127
|
+
import { createHash } from "crypto";
|
|
128
|
+
function sha1(bytes) {
|
|
129
|
+
if (Array.isArray(bytes)) {
|
|
130
|
+
bytes = Buffer.from(bytes);
|
|
131
|
+
} else if (typeof bytes === "string") {
|
|
132
|
+
bytes = Buffer.from(bytes, "utf8");
|
|
133
|
+
}
|
|
134
|
+
return createHash("sha1").update(bytes).digest();
|
|
135
|
+
}
|
|
136
|
+
var sha1_default = sha1;
|
|
137
|
+
|
|
138
|
+
// node_modules/uuid/dist/esm/v5.js
|
|
139
|
+
function v5(value, namespace, buf, offset) {
|
|
140
|
+
return v35(80, sha1_default, value, namespace, buf, offset);
|
|
141
|
+
}
|
|
142
|
+
v5.DNS = DNS;
|
|
143
|
+
v5.URL = URL2;
|
|
144
|
+
var v5_default = v5;
|
|
15
145
|
|
|
16
146
|
// src/types.ts
|
|
17
147
|
import z from "zod";
|
|
@@ -42,6 +172,8 @@ var ModelConfigSchema = z.object({
|
|
|
42
172
|
// For OpenAI: Only applies to text-embedding-3-small and text-embedding-3-large models
|
|
43
173
|
// Default: 1536 dimensions
|
|
44
174
|
EMBEDDING_DIMENSION: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 1536),
|
|
175
|
+
// config setting
|
|
176
|
+
LOAD_DOCS_ON_STARTUP: z.boolean().default(false),
|
|
45
177
|
// Contextual Knowledge settings
|
|
46
178
|
CTX_KNOWLEDGE_ENABLED: z.boolean().default(false)
|
|
47
179
|
});
|
|
@@ -52,19 +184,22 @@ var KnowledgeServiceType = {
|
|
|
52
184
|
// src/config.ts
|
|
53
185
|
import z2 from "zod";
|
|
54
186
|
import { logger } from "@elizaos/core";
|
|
187
|
+
var parseBooleanEnv = (value) => {
|
|
188
|
+
if (typeof value === "boolean") return value;
|
|
189
|
+
if (typeof value === "string") return value.toLowerCase() === "true";
|
|
190
|
+
return false;
|
|
191
|
+
};
|
|
55
192
|
function validateModelConfig(runtime) {
|
|
56
193
|
try {
|
|
57
194
|
const getSetting = (key, defaultValue) => {
|
|
58
195
|
if (runtime) {
|
|
59
|
-
return runtime.getSetting(key) || defaultValue;
|
|
196
|
+
return runtime.getSetting(key) || process.env[key] || defaultValue;
|
|
60
197
|
}
|
|
61
198
|
return process.env[key] || defaultValue;
|
|
62
199
|
};
|
|
63
|
-
const
|
|
64
|
-
const cleanSetting = ctxKnowledgeEnabledSetting?.toString().trim().toLowerCase();
|
|
65
|
-
const ctxKnowledgeEnabled = cleanSetting === "true";
|
|
200
|
+
const ctxKnowledgeEnabled = parseBooleanEnv(getSetting("CTX_KNOWLEDGE_ENABLED", "false"));
|
|
66
201
|
logger.debug(
|
|
67
|
-
`[Document Processor] CTX_KNOWLEDGE_ENABLED: '${
|
|
202
|
+
`[Document Processor] CTX_KNOWLEDGE_ENABLED: '${ctxKnowledgeEnabled} (runtime: ${!!runtime})`
|
|
68
203
|
);
|
|
69
204
|
const embeddingProvider = getSetting("EMBEDDING_PROVIDER");
|
|
70
205
|
const assumePluginOpenAI = !embeddingProvider;
|
|
@@ -101,6 +236,7 @@ function validateModelConfig(runtime) {
|
|
|
101
236
|
MAX_INPUT_TOKENS: getSetting("MAX_INPUT_TOKENS", "4000"),
|
|
102
237
|
MAX_OUTPUT_TOKENS: getSetting("MAX_OUTPUT_TOKENS", "4096"),
|
|
103
238
|
EMBEDDING_DIMENSION: embeddingDimension,
|
|
239
|
+
LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")),
|
|
104
240
|
CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled
|
|
105
241
|
});
|
|
106
242
|
validateConfigRequirements(config, assumePluginOpenAI);
|
|
@@ -212,25 +348,6 @@ async function getProviderRateLimits(runtime) {
|
|
|
212
348
|
}
|
|
213
349
|
}
|
|
214
350
|
|
|
215
|
-
// src/service.ts
|
|
216
|
-
import {
|
|
217
|
-
createUniqueUuid,
|
|
218
|
-
logger as logger4,
|
|
219
|
-
MemoryType as MemoryType2,
|
|
220
|
-
ModelType as ModelType2,
|
|
221
|
-
Semaphore,
|
|
222
|
-
Service,
|
|
223
|
-
splitChunks as splitChunks2
|
|
224
|
-
} from "@elizaos/core";
|
|
225
|
-
|
|
226
|
-
// src/document-processor.ts
|
|
227
|
-
import {
|
|
228
|
-
MemoryType,
|
|
229
|
-
ModelType,
|
|
230
|
-
logger as logger3,
|
|
231
|
-
splitChunks
|
|
232
|
-
} from "@elizaos/core";
|
|
233
|
-
|
|
234
351
|
// src/ctx-embeddings.ts
|
|
235
352
|
var DEFAULT_CHUNK_TOKEN_SIZE = 500;
|
|
236
353
|
var DEFAULT_CHUNK_OVERLAP_TOKENS = 100;
|
|
@@ -646,8 +763,8 @@ import { createAnthropic } from "@ai-sdk/anthropic";
|
|
|
646
763
|
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
|
|
647
764
|
import { google } from "@ai-sdk/google";
|
|
648
765
|
import { logger as logger2 } from "@elizaos/core";
|
|
649
|
-
async function generateText(prompt, system, overrideConfig) {
|
|
650
|
-
const config = validateModelConfig();
|
|
766
|
+
async function generateText(runtime, prompt, system, overrideConfig) {
|
|
767
|
+
const config = validateModelConfig(runtime);
|
|
651
768
|
const provider = overrideConfig?.provider || config.TEXT_PROVIDER;
|
|
652
769
|
const modelName = overrideConfig?.modelName || config.TEXT_MODEL;
|
|
653
770
|
const maxTokens = overrideConfig?.maxTokens || config.MAX_OUTPUT_TOKENS;
|
|
@@ -655,11 +772,12 @@ async function generateText(prompt, system, overrideConfig) {
|
|
|
655
772
|
try {
|
|
656
773
|
switch (provider) {
|
|
657
774
|
case "anthropic":
|
|
658
|
-
return await generateAnthropicText(prompt, system, modelName, maxTokens);
|
|
775
|
+
return await generateAnthropicText(config, prompt, system, modelName, maxTokens);
|
|
659
776
|
case "openai":
|
|
660
|
-
return await generateOpenAIText(prompt, system, modelName, maxTokens);
|
|
777
|
+
return await generateOpenAIText(config, prompt, system, modelName, maxTokens);
|
|
661
778
|
case "openrouter":
|
|
662
779
|
return await generateOpenRouterText(
|
|
780
|
+
config,
|
|
663
781
|
prompt,
|
|
664
782
|
system,
|
|
665
783
|
modelName,
|
|
@@ -678,8 +796,7 @@ async function generateText(prompt, system, overrideConfig) {
|
|
|
678
796
|
throw error;
|
|
679
797
|
}
|
|
680
798
|
}
|
|
681
|
-
async function generateAnthropicText(prompt, system, modelName, maxTokens) {
|
|
682
|
-
const config = validateModelConfig();
|
|
799
|
+
async function generateAnthropicText(config, prompt, system, modelName, maxTokens) {
|
|
683
800
|
const anthropic = createAnthropic({
|
|
684
801
|
apiKey: config.ANTHROPIC_API_KEY,
|
|
685
802
|
baseURL: config.ANTHROPIC_BASE_URL
|
|
@@ -707,7 +824,7 @@ async function generateAnthropicText(prompt, system, modelName, maxTokens) {
|
|
|
707
824
|
logger2.warn(
|
|
708
825
|
`[Document Processor] Rate limit hit (${modelName}): attempt ${attempt + 1}/${maxRetries}, retrying in ${Math.round(delay / 1e3)}s`
|
|
709
826
|
);
|
|
710
|
-
await new Promise((
|
|
827
|
+
await new Promise((resolve2) => setTimeout(resolve2, delay));
|
|
711
828
|
continue;
|
|
712
829
|
}
|
|
713
830
|
throw error;
|
|
@@ -715,8 +832,7 @@ async function generateAnthropicText(prompt, system, modelName, maxTokens) {
|
|
|
715
832
|
}
|
|
716
833
|
throw new Error("Max retries exceeded for Anthropic text generation");
|
|
717
834
|
}
|
|
718
|
-
async function generateOpenAIText(prompt, system, modelName, maxTokens) {
|
|
719
|
-
const config = validateModelConfig();
|
|
835
|
+
async function generateOpenAIText(config, prompt, system, modelName, maxTokens) {
|
|
720
836
|
const openai = createOpenAI({
|
|
721
837
|
apiKey: config.OPENAI_API_KEY,
|
|
722
838
|
baseURL: config.OPENAI_BASE_URL
|
|
@@ -754,8 +870,7 @@ async function generateGoogleText(prompt, system, modelName, maxTokens, config)
|
|
|
754
870
|
);
|
|
755
871
|
return result;
|
|
756
872
|
}
|
|
757
|
-
async function generateOpenRouterText(prompt, system, modelName, maxTokens, cacheDocument, cacheOptions, autoCacheContextualRetrieval = true) {
|
|
758
|
-
const config = validateModelConfig();
|
|
873
|
+
async function generateOpenRouterText(config, prompt, system, modelName, maxTokens, cacheDocument, cacheOptions, autoCacheContextualRetrieval = true) {
|
|
759
874
|
const openrouter = createOpenRouter({
|
|
760
875
|
apiKey: config.OPENROUTER_API_KEY,
|
|
761
876
|
baseURL: config.OPENROUTER_BASE_URL
|
|
@@ -962,6 +1077,359 @@ function logCacheMetrics(result) {
|
|
|
962
1077
|
}
|
|
963
1078
|
}
|
|
964
1079
|
|
|
1080
|
+
// src/utils.ts
|
|
1081
|
+
import { Buffer as Buffer2 } from "buffer";
|
|
1082
|
+
import * as mammoth from "mammoth";
|
|
1083
|
+
import { logger as logger3 } from "@elizaos/core";
|
|
1084
|
+
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
1085
|
+
import { createHash as createHash2 } from "crypto";
|
|
1086
|
+
var PLAIN_TEXT_CONTENT_TYPES = [
|
|
1087
|
+
"application/typescript",
|
|
1088
|
+
"text/typescript",
|
|
1089
|
+
"text/x-python",
|
|
1090
|
+
"application/x-python-code",
|
|
1091
|
+
"application/yaml",
|
|
1092
|
+
"text/yaml",
|
|
1093
|
+
"application/x-yaml",
|
|
1094
|
+
"application/json",
|
|
1095
|
+
"text/markdown",
|
|
1096
|
+
"text/csv"
|
|
1097
|
+
];
|
|
1098
|
+
var MAX_FALLBACK_SIZE_BYTES = 5 * 1024 * 1024;
|
|
1099
|
+
var BINARY_CHECK_BYTES = 1024;
|
|
1100
|
+
async function extractTextFromFileBuffer(fileBuffer, contentType, originalFilename) {
|
|
1101
|
+
const lowerContentType = contentType.toLowerCase();
|
|
1102
|
+
logger3.debug(
|
|
1103
|
+
`[TextUtil] Attempting to extract text from ${originalFilename} (type: ${contentType})`
|
|
1104
|
+
);
|
|
1105
|
+
if (lowerContentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
|
|
1106
|
+
logger3.debug(`[TextUtil] Extracting text from DOCX ${originalFilename} via mammoth.`);
|
|
1107
|
+
try {
|
|
1108
|
+
const result = await mammoth.extractRawText({ buffer: fileBuffer });
|
|
1109
|
+
logger3.debug(
|
|
1110
|
+
`[TextUtil] DOCX text extraction complete for ${originalFilename}. Text length: ${result.value.length}`
|
|
1111
|
+
);
|
|
1112
|
+
return result.value;
|
|
1113
|
+
} catch (docxError) {
|
|
1114
|
+
const errorMsg = `[TextUtil] Failed to parse DOCX file ${originalFilename}: ${docxError.message}`;
|
|
1115
|
+
logger3.error(errorMsg, docxError.stack);
|
|
1116
|
+
throw new Error(errorMsg);
|
|
1117
|
+
}
|
|
1118
|
+
} else if (lowerContentType === "application/msword" || originalFilename.toLowerCase().endsWith(".doc")) {
|
|
1119
|
+
logger3.debug(`[TextUtil] Handling Microsoft Word .doc file: ${originalFilename}`);
|
|
1120
|
+
return `[Microsoft Word Document: ${originalFilename}]
|
|
1121
|
+
|
|
1122
|
+
This document was indexed for search but cannot be displayed directly in the browser. The original document content is preserved for retrieval purposes.`;
|
|
1123
|
+
} else if (lowerContentType.startsWith("text/") || PLAIN_TEXT_CONTENT_TYPES.includes(lowerContentType)) {
|
|
1124
|
+
logger3.debug(
|
|
1125
|
+
`[TextUtil] Extracting text from plain text compatible file ${originalFilename} (type: ${contentType})`
|
|
1126
|
+
);
|
|
1127
|
+
return fileBuffer.toString("utf-8");
|
|
1128
|
+
} else {
|
|
1129
|
+
logger3.warn(
|
|
1130
|
+
`[TextUtil] Unsupported content type: "${contentType}" for ${originalFilename}. Attempting fallback to plain text.`
|
|
1131
|
+
);
|
|
1132
|
+
if (fileBuffer.length > MAX_FALLBACK_SIZE_BYTES) {
|
|
1133
|
+
const sizeErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) exceeds maximum size for fallback (${MAX_FALLBACK_SIZE_BYTES} bytes). Cannot process as plain text.`;
|
|
1134
|
+
logger3.error(sizeErrorMsg);
|
|
1135
|
+
throw new Error(sizeErrorMsg);
|
|
1136
|
+
}
|
|
1137
|
+
const initialBytes = fileBuffer.subarray(0, Math.min(fileBuffer.length, BINARY_CHECK_BYTES));
|
|
1138
|
+
if (initialBytes.includes(0)) {
|
|
1139
|
+
const binaryHeuristicMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) appears to be binary based on initial byte check. Cannot process as plain text.`;
|
|
1140
|
+
logger3.error(binaryHeuristicMsg);
|
|
1141
|
+
throw new Error(binaryHeuristicMsg);
|
|
1142
|
+
}
|
|
1143
|
+
try {
|
|
1144
|
+
const textContent = fileBuffer.toString("utf-8");
|
|
1145
|
+
if (textContent.includes("\uFFFD")) {
|
|
1146
|
+
const binaryErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) seems to be binary or has encoding issues after fallback to plain text (detected \uFFFD).`;
|
|
1147
|
+
logger3.error(binaryErrorMsg);
|
|
1148
|
+
throw new Error(binaryErrorMsg);
|
|
1149
|
+
}
|
|
1150
|
+
logger3.debug(
|
|
1151
|
+
`[TextUtil] Successfully processed unknown type ${contentType} as plain text after fallback for ${originalFilename}.`
|
|
1152
|
+
);
|
|
1153
|
+
return textContent;
|
|
1154
|
+
} catch (fallbackError) {
|
|
1155
|
+
const finalErrorMsg = `[TextUtil] Unsupported content type: ${contentType} for ${originalFilename}. Fallback to plain text also failed or indicated binary content.`;
|
|
1156
|
+
logger3.error(finalErrorMsg, fallbackError.message ? fallbackError.stack : void 0);
|
|
1157
|
+
throw new Error(finalErrorMsg);
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
async function convertPdfToTextFromBuffer(pdfBuffer, filename) {
|
|
1162
|
+
const docName = filename || "unnamed-document";
|
|
1163
|
+
logger3.debug(`[PdfService] Starting conversion for ${docName}`);
|
|
1164
|
+
try {
|
|
1165
|
+
const uint8Array = new Uint8Array(pdfBuffer);
|
|
1166
|
+
const pdf = await getDocument({ data: uint8Array }).promise;
|
|
1167
|
+
const numPages = pdf.numPages;
|
|
1168
|
+
const textPages = [];
|
|
1169
|
+
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
|
1170
|
+
logger3.debug(`[PdfService] Processing page ${pageNum}/${numPages}`);
|
|
1171
|
+
const page = await pdf.getPage(pageNum);
|
|
1172
|
+
const textContent = await page.getTextContent();
|
|
1173
|
+
const lineMap = /* @__PURE__ */ new Map();
|
|
1174
|
+
textContent.items.filter(isTextItem).forEach((item) => {
|
|
1175
|
+
const yPos = Math.round(item.transform[5]);
|
|
1176
|
+
if (!lineMap.has(yPos)) {
|
|
1177
|
+
lineMap.set(yPos, []);
|
|
1178
|
+
}
|
|
1179
|
+
lineMap.get(yPos).push(item);
|
|
1180
|
+
});
|
|
1181
|
+
const sortedLines = Array.from(lineMap.entries()).sort((a, b) => b[0] - a[0]).map(
|
|
1182
|
+
([_, items]) => items.sort((a, b) => a.transform[4] - b.transform[4]).map((item) => item.str).join(" ")
|
|
1183
|
+
);
|
|
1184
|
+
textPages.push(sortedLines.join("\n"));
|
|
1185
|
+
}
|
|
1186
|
+
const fullText = textPages.join("\n\n").replace(/\s+/g, " ").trim();
|
|
1187
|
+
logger3.debug(`[PdfService] Conversion complete for ${docName}, length: ${fullText.length}`);
|
|
1188
|
+
return fullText;
|
|
1189
|
+
} catch (error) {
|
|
1190
|
+
logger3.error(`[PdfService] Error converting PDF ${docName}:`, error.message);
|
|
1191
|
+
throw new Error(`Failed to convert PDF to text: ${error.message}`);
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
function isBinaryContentType(contentType, filename) {
|
|
1195
|
+
const textContentTypes = [
|
|
1196
|
+
"text/",
|
|
1197
|
+
"application/json",
|
|
1198
|
+
"application/xml",
|
|
1199
|
+
"application/javascript",
|
|
1200
|
+
"application/typescript",
|
|
1201
|
+
"application/x-yaml",
|
|
1202
|
+
"application/x-sh"
|
|
1203
|
+
];
|
|
1204
|
+
const isTextMimeType = textContentTypes.some((type) => contentType.includes(type));
|
|
1205
|
+
if (isTextMimeType) {
|
|
1206
|
+
return false;
|
|
1207
|
+
}
|
|
1208
|
+
const binaryContentTypes = [
|
|
1209
|
+
"application/pdf",
|
|
1210
|
+
"application/msword",
|
|
1211
|
+
"application/vnd.openxmlformats-officedocument",
|
|
1212
|
+
"application/vnd.ms-excel",
|
|
1213
|
+
"application/vnd.ms-powerpoint",
|
|
1214
|
+
"application/zip",
|
|
1215
|
+
"application/x-zip-compressed",
|
|
1216
|
+
"application/octet-stream",
|
|
1217
|
+
"image/",
|
|
1218
|
+
"audio/",
|
|
1219
|
+
"video/"
|
|
1220
|
+
];
|
|
1221
|
+
const isBinaryMimeType = binaryContentTypes.some((type) => contentType.includes(type));
|
|
1222
|
+
if (isBinaryMimeType) {
|
|
1223
|
+
return true;
|
|
1224
|
+
}
|
|
1225
|
+
const fileExt = filename.split(".").pop()?.toLowerCase() || "";
|
|
1226
|
+
const textExtensions = [
|
|
1227
|
+
"txt",
|
|
1228
|
+
"md",
|
|
1229
|
+
"markdown",
|
|
1230
|
+
"json",
|
|
1231
|
+
"xml",
|
|
1232
|
+
"html",
|
|
1233
|
+
"htm",
|
|
1234
|
+
"css",
|
|
1235
|
+
"js",
|
|
1236
|
+
"ts",
|
|
1237
|
+
"jsx",
|
|
1238
|
+
"tsx",
|
|
1239
|
+
"yaml",
|
|
1240
|
+
"yml",
|
|
1241
|
+
"toml",
|
|
1242
|
+
"ini",
|
|
1243
|
+
"cfg",
|
|
1244
|
+
"conf",
|
|
1245
|
+
"sh",
|
|
1246
|
+
"bash",
|
|
1247
|
+
"zsh",
|
|
1248
|
+
"fish",
|
|
1249
|
+
"py",
|
|
1250
|
+
"rb",
|
|
1251
|
+
"go",
|
|
1252
|
+
"rs",
|
|
1253
|
+
"java",
|
|
1254
|
+
"c",
|
|
1255
|
+
"cpp",
|
|
1256
|
+
"h",
|
|
1257
|
+
"hpp",
|
|
1258
|
+
"cs",
|
|
1259
|
+
"php",
|
|
1260
|
+
"sql",
|
|
1261
|
+
"r",
|
|
1262
|
+
"swift",
|
|
1263
|
+
"kt",
|
|
1264
|
+
"scala",
|
|
1265
|
+
"clj",
|
|
1266
|
+
"ex",
|
|
1267
|
+
"exs",
|
|
1268
|
+
"vim",
|
|
1269
|
+
"env",
|
|
1270
|
+
"gitignore",
|
|
1271
|
+
"dockerignore",
|
|
1272
|
+
"editorconfig",
|
|
1273
|
+
"log",
|
|
1274
|
+
"csv",
|
|
1275
|
+
"tsv",
|
|
1276
|
+
"properties",
|
|
1277
|
+
"gradle",
|
|
1278
|
+
"sbt",
|
|
1279
|
+
"makefile",
|
|
1280
|
+
"dockerfile",
|
|
1281
|
+
"vagrantfile",
|
|
1282
|
+
"gemfile",
|
|
1283
|
+
"rakefile",
|
|
1284
|
+
"podfile",
|
|
1285
|
+
"csproj",
|
|
1286
|
+
"vbproj",
|
|
1287
|
+
"fsproj",
|
|
1288
|
+
"sln",
|
|
1289
|
+
"pom"
|
|
1290
|
+
];
|
|
1291
|
+
if (textExtensions.includes(fileExt)) {
|
|
1292
|
+
return false;
|
|
1293
|
+
}
|
|
1294
|
+
const binaryExtensions = [
|
|
1295
|
+
"pdf",
|
|
1296
|
+
"docx",
|
|
1297
|
+
"doc",
|
|
1298
|
+
"xls",
|
|
1299
|
+
"xlsx",
|
|
1300
|
+
"ppt",
|
|
1301
|
+
"pptx",
|
|
1302
|
+
"zip",
|
|
1303
|
+
"rar",
|
|
1304
|
+
"7z",
|
|
1305
|
+
"tar",
|
|
1306
|
+
"gz",
|
|
1307
|
+
"bz2",
|
|
1308
|
+
"xz",
|
|
1309
|
+
"jpg",
|
|
1310
|
+
"jpeg",
|
|
1311
|
+
"png",
|
|
1312
|
+
"gif",
|
|
1313
|
+
"bmp",
|
|
1314
|
+
"svg",
|
|
1315
|
+
"ico",
|
|
1316
|
+
"webp",
|
|
1317
|
+
"mp3",
|
|
1318
|
+
"mp4",
|
|
1319
|
+
"avi",
|
|
1320
|
+
"mov",
|
|
1321
|
+
"wmv",
|
|
1322
|
+
"flv",
|
|
1323
|
+
"wav",
|
|
1324
|
+
"flac",
|
|
1325
|
+
"ogg",
|
|
1326
|
+
"exe",
|
|
1327
|
+
"dll",
|
|
1328
|
+
"so",
|
|
1329
|
+
"dylib",
|
|
1330
|
+
"bin",
|
|
1331
|
+
"dat",
|
|
1332
|
+
"db",
|
|
1333
|
+
"sqlite"
|
|
1334
|
+
];
|
|
1335
|
+
return binaryExtensions.includes(fileExt);
|
|
1336
|
+
}
|
|
1337
|
+
function isTextItem(item) {
|
|
1338
|
+
return "str" in item;
|
|
1339
|
+
}
|
|
1340
|
+
function normalizeS3Url(url) {
|
|
1341
|
+
try {
|
|
1342
|
+
const urlObj = new URL(url);
|
|
1343
|
+
return `${urlObj.origin}${urlObj.pathname}`;
|
|
1344
|
+
} catch (error) {
|
|
1345
|
+
logger3.warn(`[URL NORMALIZER] Failed to parse URL: ${url}. Returning original.`);
|
|
1346
|
+
return url;
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
async function fetchUrlContent(url) {
|
|
1350
|
+
logger3.debug(`[URL FETCHER] Fetching content from URL: ${url}`);
|
|
1351
|
+
try {
|
|
1352
|
+
const controller = new AbortController();
|
|
1353
|
+
const timeoutId = setTimeout(() => controller.abort(), 3e4);
|
|
1354
|
+
const response = await fetch(url, {
|
|
1355
|
+
signal: controller.signal,
|
|
1356
|
+
headers: {
|
|
1357
|
+
"User-Agent": "Eliza-Knowledge-Plugin/1.0"
|
|
1358
|
+
}
|
|
1359
|
+
});
|
|
1360
|
+
clearTimeout(timeoutId);
|
|
1361
|
+
if (!response.ok) {
|
|
1362
|
+
throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`);
|
|
1363
|
+
}
|
|
1364
|
+
const contentType = response.headers.get("content-type") || "application/octet-stream";
|
|
1365
|
+
logger3.debug(`[URL FETCHER] Content type from server: ${contentType} for URL: ${url}`);
|
|
1366
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
1367
|
+
const buffer = Buffer2.from(arrayBuffer);
|
|
1368
|
+
const base64Content = buffer.toString("base64");
|
|
1369
|
+
logger3.debug(
|
|
1370
|
+
`[URL FETCHER] Successfully fetched content from URL: ${url} (${buffer.length} bytes)`
|
|
1371
|
+
);
|
|
1372
|
+
return {
|
|
1373
|
+
content: base64Content,
|
|
1374
|
+
contentType
|
|
1375
|
+
};
|
|
1376
|
+
} catch (error) {
|
|
1377
|
+
logger3.error(`[URL FETCHER] Error fetching content from URL ${url}: ${error.message}`);
|
|
1378
|
+
throw new Error(`Failed to fetch content from URL: ${error.message}`);
|
|
1379
|
+
}
|
|
1380
|
+
}
|
|
1381
|
+
function looksLikeBase64(content) {
|
|
1382
|
+
if (!content || content.length === 0) return false;
|
|
1383
|
+
const cleanContent = content.replace(/\s/g, "");
|
|
1384
|
+
if (cleanContent.length < 16) return false;
|
|
1385
|
+
if (cleanContent.length % 4 !== 0) return false;
|
|
1386
|
+
const base64Regex = /^[A-Za-z0-9+/]*={0,2}$/;
|
|
1387
|
+
if (!base64Regex.test(cleanContent)) return false;
|
|
1388
|
+
const hasNumbers = /\d/.test(cleanContent);
|
|
1389
|
+
const hasUpperCase = /[A-Z]/.test(cleanContent);
|
|
1390
|
+
const hasLowerCase = /[a-z]/.test(cleanContent);
|
|
1391
|
+
return (hasNumbers || hasUpperCase) && hasLowerCase;
|
|
1392
|
+
}
|
|
1393
|
+
function generateContentBasedId(content, agentId, options) {
|
|
1394
|
+
const {
|
|
1395
|
+
maxChars = 2e3,
|
|
1396
|
+
// Use first 2000 chars by default
|
|
1397
|
+
includeFilename,
|
|
1398
|
+
contentType
|
|
1399
|
+
} = options || {};
|
|
1400
|
+
let contentForHashing;
|
|
1401
|
+
if (looksLikeBase64(content)) {
|
|
1402
|
+
try {
|
|
1403
|
+
const decoded = Buffer2.from(content, "base64").toString("utf8");
|
|
1404
|
+
if (!decoded.includes("\uFFFD") || contentType?.includes("pdf")) {
|
|
1405
|
+
contentForHashing = content.slice(0, maxChars);
|
|
1406
|
+
} else {
|
|
1407
|
+
contentForHashing = decoded.slice(0, maxChars);
|
|
1408
|
+
}
|
|
1409
|
+
} catch {
|
|
1410
|
+
contentForHashing = content.slice(0, maxChars);
|
|
1411
|
+
}
|
|
1412
|
+
} else {
|
|
1413
|
+
contentForHashing = content.slice(0, maxChars);
|
|
1414
|
+
}
|
|
1415
|
+
contentForHashing = contentForHashing.replace(/\r\n/g, "\n").replace(/\r/g, "\n").trim();
|
|
1416
|
+
const componentsToHash = [
|
|
1417
|
+
agentId,
|
|
1418
|
+
// Namespace by agent
|
|
1419
|
+
contentForHashing,
|
|
1420
|
+
// The actual content
|
|
1421
|
+
includeFilename || ""
|
|
1422
|
+
// Optional filename for additional uniqueness
|
|
1423
|
+
].filter(Boolean).join("::");
|
|
1424
|
+
const hash = createHash2("sha256").update(componentsToHash).digest("hex");
|
|
1425
|
+
const DOCUMENT_NAMESPACE = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
|
|
1426
|
+
const uuid = v5_default(hash, DOCUMENT_NAMESPACE);
|
|
1427
|
+
logger3.debug(
|
|
1428
|
+
`[generateContentBasedId] Generated UUID ${uuid} for document with content hash ${hash.slice(0, 8)}...`
|
|
1429
|
+
);
|
|
1430
|
+
return uuid;
|
|
1431
|
+
}
|
|
1432
|
+
|
|
965
1433
|
// src/document-processor.ts
|
|
966
1434
|
function estimateTokens(text) {
|
|
967
1435
|
return Math.ceil(text.length / 4);
|
|
@@ -982,7 +1450,7 @@ function getCtxKnowledgeEnabled(runtime) {
|
|
|
982
1450
|
source = "process.env";
|
|
983
1451
|
}
|
|
984
1452
|
if (process.env.NODE_ENV === "development" && rawValue && !result) {
|
|
985
|
-
|
|
1453
|
+
logger4.debug(`[Document Processor] CTX config mismatch - ${source}: '${rawValue}' \u2192 ${result}`);
|
|
986
1454
|
}
|
|
987
1455
|
return result;
|
|
988
1456
|
}
|
|
@@ -1018,23 +1486,23 @@ async function processFragmentsSynchronously({
|
|
|
1018
1486
|
documentTitle
|
|
1019
1487
|
}) {
|
|
1020
1488
|
if (!fullDocumentText || fullDocumentText.trim() === "") {
|
|
1021
|
-
|
|
1489
|
+
logger4.warn(`No text content available to chunk for document ${documentId}.`);
|
|
1022
1490
|
return 0;
|
|
1023
1491
|
}
|
|
1024
1492
|
const chunks = await splitDocumentIntoChunks(fullDocumentText);
|
|
1025
1493
|
if (chunks.length === 0) {
|
|
1026
|
-
|
|
1494
|
+
logger4.warn(`No chunks generated from text for ${documentId}. No fragments to save.`);
|
|
1027
1495
|
return 0;
|
|
1028
1496
|
}
|
|
1029
1497
|
const docName = documentTitle || documentId.substring(0, 8);
|
|
1030
|
-
|
|
1498
|
+
logger4.info(`[Document Processor] "${docName}": Split into ${chunks.length} chunks`);
|
|
1031
1499
|
const providerLimits = await getProviderRateLimits();
|
|
1032
1500
|
const CONCURRENCY_LIMIT = Math.min(30, providerLimits.maxConcurrentRequests || 30);
|
|
1033
1501
|
const rateLimiter = createRateLimiter(
|
|
1034
1502
|
providerLimits.requestsPerMinute || 60,
|
|
1035
1503
|
providerLimits.tokensPerMinute
|
|
1036
1504
|
);
|
|
1037
|
-
|
|
1505
|
+
logger4.debug(
|
|
1038
1506
|
`[Document Processor] Rate limits: ${providerLimits.requestsPerMinute} RPM, ${providerLimits.tokensPerMinute} TPM (${providerLimits.provider}, concurrency: ${CONCURRENCY_LIMIT})`
|
|
1039
1507
|
);
|
|
1040
1508
|
const { savedCount, failedCount } = await processAndSaveFragments({
|
|
@@ -1053,11 +1521,11 @@ async function processFragmentsSynchronously({
|
|
|
1053
1521
|
});
|
|
1054
1522
|
const successRate = (savedCount / chunks.length * 100).toFixed(1);
|
|
1055
1523
|
if (failedCount > 0) {
|
|
1056
|
-
|
|
1524
|
+
logger4.warn(
|
|
1057
1525
|
`[Document Processor] "${docName}": ${failedCount}/${chunks.length} chunks failed processing`
|
|
1058
1526
|
);
|
|
1059
1527
|
}
|
|
1060
|
-
|
|
1528
|
+
logger4.info(
|
|
1061
1529
|
`[Document Processor] "${docName}" complete: ${savedCount}/${chunks.length} fragments saved (${successRate}% success)`
|
|
1062
1530
|
);
|
|
1063
1531
|
logKnowledgeGenerationSummary({
|
|
@@ -1077,15 +1545,15 @@ async function extractTextFromDocument(fileBuffer, contentType, originalFilename
|
|
|
1077
1545
|
}
|
|
1078
1546
|
try {
|
|
1079
1547
|
if (contentType === "application/pdf") {
|
|
1080
|
-
|
|
1548
|
+
logger4.debug(`Extracting text from PDF: ${originalFilename}`);
|
|
1081
1549
|
return await convertPdfToTextFromBuffer(fileBuffer, originalFilename);
|
|
1082
1550
|
} else {
|
|
1083
|
-
|
|
1551
|
+
logger4.debug(`Extracting text from non-PDF: ${originalFilename} (Type: ${contentType})`);
|
|
1084
1552
|
if (contentType.includes("text/") || contentType.includes("application/json") || contentType.includes("application/xml")) {
|
|
1085
1553
|
try {
|
|
1086
1554
|
return fileBuffer.toString("utf8");
|
|
1087
1555
|
} catch (textError) {
|
|
1088
|
-
|
|
1556
|
+
logger4.warn(
|
|
1089
1557
|
`Failed to decode ${originalFilename} as UTF-8, falling back to binary extraction`
|
|
1090
1558
|
);
|
|
1091
1559
|
}
|
|
@@ -1093,7 +1561,7 @@ async function extractTextFromDocument(fileBuffer, contentType, originalFilename
|
|
|
1093
1561
|
return await extractTextFromFileBuffer(fileBuffer, contentType, originalFilename);
|
|
1094
1562
|
}
|
|
1095
1563
|
} catch (error) {
|
|
1096
|
-
|
|
1564
|
+
logger4.error(`Error extracting text from ${originalFilename}: ${error.message}`);
|
|
1097
1565
|
throw new Error(`Failed to extract text from ${originalFilename}: ${error.message}`);
|
|
1098
1566
|
}
|
|
1099
1567
|
}
|
|
@@ -1138,7 +1606,7 @@ async function splitDocumentIntoChunks(documentText) {
|
|
|
1138
1606
|
const tokenChunkOverlap = DEFAULT_CHUNK_OVERLAP_TOKENS;
|
|
1139
1607
|
const targetCharChunkSize = Math.round(tokenChunkSize * DEFAULT_CHARS_PER_TOKEN);
|
|
1140
1608
|
const targetCharChunkOverlap = Math.round(tokenChunkOverlap * DEFAULT_CHARS_PER_TOKEN);
|
|
1141
|
-
|
|
1609
|
+
logger4.debug(
|
|
1142
1610
|
`Using core splitChunks with settings: tokenChunkSize=${tokenChunkSize}, tokenChunkOverlap=${tokenChunkOverlap}, charChunkSize=${targetCharChunkSize}, charChunkOverlap=${targetCharChunkOverlap}`
|
|
1143
1611
|
);
|
|
1144
1612
|
return await splitChunks(documentText, tokenChunkSize, tokenChunkOverlap);
|
|
@@ -1163,7 +1631,7 @@ async function processAndSaveFragments({
|
|
|
1163
1631
|
for (let i = 0; i < chunks.length; i += concurrencyLimit) {
|
|
1164
1632
|
const batchChunks = chunks.slice(i, i + concurrencyLimit);
|
|
1165
1633
|
const batchOriginalIndices = Array.from({ length: batchChunks.length }, (_, k) => i + k);
|
|
1166
|
-
|
|
1634
|
+
logger4.debug(
|
|
1167
1635
|
`[Document Processor] Batch ${Math.floor(i / concurrencyLimit) + 1}/${Math.ceil(chunks.length / concurrencyLimit)}: processing ${batchChunks.length} chunks (${batchOriginalIndices[0]}-${batchOriginalIndices[batchOriginalIndices.length - 1]})`
|
|
1168
1636
|
);
|
|
1169
1637
|
const contextualizedChunks = await getContextualizedChunks(
|
|
@@ -1184,13 +1652,13 @@ async function processAndSaveFragments({
|
|
|
1184
1652
|
if (!result.success) {
|
|
1185
1653
|
failedCount++;
|
|
1186
1654
|
failedChunks.push(originalChunkIndex);
|
|
1187
|
-
|
|
1655
|
+
logger4.warn(`Failed to process chunk ${originalChunkIndex} for document ${documentId}`);
|
|
1188
1656
|
continue;
|
|
1189
1657
|
}
|
|
1190
1658
|
const contextualizedChunkText = result.text;
|
|
1191
1659
|
const embedding = result.embedding;
|
|
1192
1660
|
if (!embedding || embedding.length === 0) {
|
|
1193
|
-
|
|
1661
|
+
logger4.warn(
|
|
1194
1662
|
`Zero vector detected for chunk ${originalChunkIndex} (document ${documentId}). Embedding: ${JSON.stringify(result.embedding)}`
|
|
1195
1663
|
);
|
|
1196
1664
|
failedCount++;
|
|
@@ -1217,13 +1685,13 @@ async function processAndSaveFragments({
|
|
|
1217
1685
|
await runtime.createMemory(fragmentMemory, "knowledge");
|
|
1218
1686
|
if (originalChunkIndex === chunks.length - 1) {
|
|
1219
1687
|
const docName = documentTitle || documentId.substring(0, 8);
|
|
1220
|
-
|
|
1688
|
+
logger4.info(
|
|
1221
1689
|
`[Document Processor] "${docName}": All ${chunks.length} chunks processed successfully`
|
|
1222
1690
|
);
|
|
1223
1691
|
}
|
|
1224
1692
|
savedCount++;
|
|
1225
1693
|
} catch (saveError) {
|
|
1226
|
-
|
|
1694
|
+
logger4.error(
|
|
1227
1695
|
`Error saving chunk ${originalChunkIndex} to database: ${saveError.message}`,
|
|
1228
1696
|
saveError.stack
|
|
1229
1697
|
);
|
|
@@ -1232,7 +1700,7 @@ async function processAndSaveFragments({
|
|
|
1232
1700
|
}
|
|
1233
1701
|
}
|
|
1234
1702
|
if (i + concurrencyLimit < chunks.length) {
|
|
1235
|
-
await new Promise((
|
|
1703
|
+
await new Promise((resolve2) => setTimeout(resolve2, 500));
|
|
1236
1704
|
}
|
|
1237
1705
|
}
|
|
1238
1706
|
return { savedCount, failedCount, failedChunks };
|
|
@@ -1286,7 +1754,7 @@ async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLi
|
|
|
1286
1754
|
text: contextualizedChunk.contextualizedText
|
|
1287
1755
|
};
|
|
1288
1756
|
} catch (error) {
|
|
1289
|
-
|
|
1757
|
+
logger4.error(
|
|
1290
1758
|
`Error generating embedding for chunk ${contextualizedChunk.index}: ${error.message}`
|
|
1291
1759
|
);
|
|
1292
1760
|
return {
|
|
@@ -1305,7 +1773,7 @@ async function getContextualizedChunks(runtime, fullDocumentText, chunks, conten
|
|
|
1305
1773
|
const docName = documentTitle || "Document";
|
|
1306
1774
|
const provider = runtime?.getSetting("TEXT_PROVIDER") || process.env.TEXT_PROVIDER;
|
|
1307
1775
|
const model = runtime?.getSetting("TEXT_MODEL") || process.env.TEXT_MODEL;
|
|
1308
|
-
|
|
1776
|
+
logger4.info(
|
|
1309
1777
|
`[Document Processor] "${docName}": CTX enrichment ${ctxEnabled ? "ENABLED" : "DISABLED"}${ctxEnabled ? ` (${provider}/${model})` : ""}`
|
|
1310
1778
|
);
|
|
1311
1779
|
}
|
|
@@ -1319,7 +1787,7 @@ async function getContextualizedChunks(runtime, fullDocumentText, chunks, conten
|
|
|
1319
1787
|
documentTitle
|
|
1320
1788
|
);
|
|
1321
1789
|
} else if (!ctxEnabled && batchOriginalIndices[0] === 0) {
|
|
1322
|
-
|
|
1790
|
+
logger4.debug(
|
|
1323
1791
|
`[Document Processor] To enable CTX: Set CTX_KNOWLEDGE_ENABLED=true and configure TEXT_PROVIDER/TEXT_MODEL`
|
|
1324
1792
|
);
|
|
1325
1793
|
}
|
|
@@ -1338,10 +1806,10 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1338
1806
|
providerLimits.requestsPerMinute || 60,
|
|
1339
1807
|
providerLimits.tokensPerMinute
|
|
1340
1808
|
);
|
|
1341
|
-
const config = validateModelConfig();
|
|
1809
|
+
const config = validateModelConfig(runtime);
|
|
1342
1810
|
const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
|
|
1343
1811
|
const isUsingCacheCapableModel = isUsingOpenRouter && (config.TEXT_MODEL?.toLowerCase().includes("claude") || config.TEXT_MODEL?.toLowerCase().includes("gemini"));
|
|
1344
|
-
|
|
1812
|
+
logger4.debug(
|
|
1345
1813
|
`[Document Processor] Contextualizing ${chunks.length} chunks with ${config.TEXT_PROVIDER}/${config.TEXT_MODEL} (cache: ${isUsingCacheCapableModel})`
|
|
1346
1814
|
);
|
|
1347
1815
|
const promptConfigs = prepareContextPrompts(
|
|
@@ -1367,13 +1835,13 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1367
1835
|
const generateTextOperation = async () => {
|
|
1368
1836
|
if (useCustomLLM) {
|
|
1369
1837
|
if (item.usesCaching) {
|
|
1370
|
-
return await generateText(item.promptText, item.systemPrompt, {
|
|
1838
|
+
return await generateText(runtime, item.promptText, item.systemPrompt, {
|
|
1371
1839
|
cacheDocument: item.fullDocumentTextForContext,
|
|
1372
1840
|
cacheOptions: { type: "ephemeral" },
|
|
1373
1841
|
autoCacheContextualRetrieval: true
|
|
1374
1842
|
});
|
|
1375
1843
|
} else {
|
|
1376
|
-
return await generateText(item.prompt);
|
|
1844
|
+
return await generateText(runtime, item.prompt);
|
|
1377
1845
|
}
|
|
1378
1846
|
} else {
|
|
1379
1847
|
if (item.usesCaching) {
|
|
@@ -1396,7 +1864,7 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1396
1864
|
const contextualizedText = getChunkWithContext(item.chunkText, generatedContext);
|
|
1397
1865
|
if ((item.originalIndex + 1) % Math.max(1, Math.floor(chunks.length / 3)) === 0 || item.originalIndex === chunks.length - 1) {
|
|
1398
1866
|
const docName = documentTitle || "Document";
|
|
1399
|
-
|
|
1867
|
+
logger4.debug(
|
|
1400
1868
|
`[Document Processor] "${docName}": Context added for ${item.originalIndex + 1}/${chunks.length} chunks`
|
|
1401
1869
|
);
|
|
1402
1870
|
}
|
|
@@ -1406,7 +1874,7 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1406
1874
|
index: item.originalIndex
|
|
1407
1875
|
};
|
|
1408
1876
|
} catch (error) {
|
|
1409
|
-
|
|
1877
|
+
logger4.error(
|
|
1410
1878
|
`Error generating context for chunk ${item.originalIndex}: ${error.message}`,
|
|
1411
1879
|
error.stack
|
|
1412
1880
|
);
|
|
@@ -1427,7 +1895,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
|
|
|
1427
1895
|
if (isUsingCacheCapableModel) {
|
|
1428
1896
|
const cachingPromptInfo = contentType ? getCachingPromptForMimeType(contentType, chunkText) : getCachingContextualizationPrompt(chunkText);
|
|
1429
1897
|
if (cachingPromptInfo.prompt.startsWith("Error:")) {
|
|
1430
|
-
|
|
1898
|
+
logger4.warn(
|
|
1431
1899
|
`Skipping contextualization for chunk ${originalIndex} due to: ${cachingPromptInfo.prompt}`
|
|
1432
1900
|
);
|
|
1433
1901
|
return {
|
|
@@ -1449,7 +1917,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
|
|
|
1449
1917
|
} else {
|
|
1450
1918
|
const prompt = contentType ? getPromptForMimeType(contentType, fullDocumentText, chunkText) : getContextualizationPrompt(fullDocumentText, chunkText);
|
|
1451
1919
|
if (prompt.startsWith("Error:")) {
|
|
1452
|
-
|
|
1920
|
+
logger4.warn(`Skipping contextualization for chunk ${originalIndex} due to: ${prompt}`);
|
|
1453
1921
|
return {
|
|
1454
1922
|
prompt: null,
|
|
1455
1923
|
originalIndex,
|
|
@@ -1467,7 +1935,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
|
|
|
1467
1935
|
};
|
|
1468
1936
|
}
|
|
1469
1937
|
} catch (error) {
|
|
1470
|
-
|
|
1938
|
+
logger4.error(
|
|
1471
1939
|
`Error preparing prompt for chunk ${originalIndex}: ${error.message}`,
|
|
1472
1940
|
error.stack
|
|
1473
1941
|
);
|
|
@@ -1488,7 +1956,7 @@ async function generateEmbeddingWithValidation(runtime, text) {
|
|
|
1488
1956
|
});
|
|
1489
1957
|
const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
|
|
1490
1958
|
if (!embedding || embedding.length === 0) {
|
|
1491
|
-
|
|
1959
|
+
logger4.warn(`Zero vector detected. Embedding result: ${JSON.stringify(embedding)}`);
|
|
1492
1960
|
return {
|
|
1493
1961
|
embedding: null,
|
|
1494
1962
|
success: false,
|
|
@@ -1506,12 +1974,12 @@ async function withRateLimitRetry(operation, errorContext, retryDelay) {
|
|
|
1506
1974
|
} catch (error) {
|
|
1507
1975
|
if (error.status === 429) {
|
|
1508
1976
|
const delay = retryDelay || error.headers?.["retry-after"] || 5;
|
|
1509
|
-
|
|
1510
|
-
await new Promise((
|
|
1977
|
+
logger4.warn(`Rate limit hit for ${errorContext}. Retrying after ${delay}s`);
|
|
1978
|
+
await new Promise((resolve2) => setTimeout(resolve2, delay * 1e3));
|
|
1511
1979
|
try {
|
|
1512
1980
|
return await operation();
|
|
1513
1981
|
} catch (retryError) {
|
|
1514
|
-
|
|
1982
|
+
logger4.error(`Failed after retry for ${errorContext}: ${retryError.message}`);
|
|
1515
1983
|
throw retryError;
|
|
1516
1984
|
}
|
|
1517
1985
|
}
|
|
@@ -1546,15 +2014,15 @@ function createRateLimiter(requestsPerMinute, tokensPerMinute) {
|
|
|
1546
2014
|
if (timeToWait > 0) {
|
|
1547
2015
|
const reason = requestLimitExceeded ? "request" : "token";
|
|
1548
2016
|
if (timeToWait > 5e3) {
|
|
1549
|
-
|
|
2017
|
+
logger4.info(
|
|
1550
2018
|
`[Document Processor] Rate limiting: waiting ${Math.round(timeToWait / 1e3)}s due to ${reason} limit`
|
|
1551
2019
|
);
|
|
1552
2020
|
} else {
|
|
1553
|
-
|
|
2021
|
+
logger4.debug(
|
|
1554
2022
|
`[Document Processor] Rate limiting: ${timeToWait}ms wait (${reason} limit)`
|
|
1555
2023
|
);
|
|
1556
2024
|
}
|
|
1557
|
-
await new Promise((
|
|
2025
|
+
await new Promise((resolve2) => setTimeout(resolve2, timeToWait));
|
|
1558
2026
|
}
|
|
1559
2027
|
}
|
|
1560
2028
|
requestTimes.push(now);
|
|
@@ -1572,20 +2040,230 @@ function logKnowledgeGenerationSummary({
|
|
|
1572
2040
|
}) {
|
|
1573
2041
|
if (failedCount > 0 || process.env.NODE_ENV === "development") {
|
|
1574
2042
|
const status = failedCount > 0 ? "PARTIAL" : "SUCCESS";
|
|
1575
|
-
|
|
2043
|
+
logger4.info(
|
|
1576
2044
|
`[Document Processor] ${status}: ${savedCount}/${totalChunks} chunks, CTX: ${ctxEnabled ? "ON" : "OFF"}, Provider: ${providerLimits.provider}`
|
|
1577
2045
|
);
|
|
1578
2046
|
}
|
|
1579
2047
|
if (failedCount > 0) {
|
|
1580
|
-
|
|
2048
|
+
logger4.warn(`[Document Processor] ${failedCount} chunks failed processing`);
|
|
1581
2049
|
}
|
|
1582
2050
|
}
|
|
1583
2051
|
|
|
2052
|
+
// src/docs-loader.ts
|
|
2053
|
+
import { logger as logger5 } from "@elizaos/core";
|
|
2054
|
+
import * as fs from "fs";
|
|
2055
|
+
import * as path from "path";
|
|
2056
|
+
function getKnowledgePath(runtimePath) {
|
|
2057
|
+
const knowledgePath = runtimePath || process.env.KNOWLEDGE_PATH || path.join(process.cwd(), "docs");
|
|
2058
|
+
const resolvedPath = path.resolve(knowledgePath);
|
|
2059
|
+
if (!fs.existsSync(resolvedPath)) {
|
|
2060
|
+
logger5.warn(`Knowledge path does not exist: ${resolvedPath}`);
|
|
2061
|
+
if (runtimePath) {
|
|
2062
|
+
logger5.warn("Please create the directory or update KNOWLEDGE_PATH in agent settings");
|
|
2063
|
+
} else if (process.env.KNOWLEDGE_PATH) {
|
|
2064
|
+
logger5.warn("Please create the directory or update KNOWLEDGE_PATH environment variable");
|
|
2065
|
+
} else {
|
|
2066
|
+
logger5.info("To use the knowledge plugin, either:");
|
|
2067
|
+
logger5.info('1. Create a "docs" folder in your project root');
|
|
2068
|
+
logger5.info("2. Set KNOWLEDGE_PATH in agent settings or environment variable");
|
|
2069
|
+
}
|
|
2070
|
+
}
|
|
2071
|
+
return resolvedPath;
|
|
2072
|
+
}
|
|
2073
|
+
async function loadDocsFromPath(service, agentId, worldId, knowledgePath) {
|
|
2074
|
+
const docsPath = getKnowledgePath(knowledgePath);
|
|
2075
|
+
if (!fs.existsSync(docsPath)) {
|
|
2076
|
+
logger5.warn(`Knowledge path does not exist: ${docsPath}`);
|
|
2077
|
+
return { total: 0, successful: 0, failed: 0 };
|
|
2078
|
+
}
|
|
2079
|
+
logger5.info(`Loading documents from: ${docsPath}`);
|
|
2080
|
+
const files = getAllFiles(docsPath);
|
|
2081
|
+
if (files.length === 0) {
|
|
2082
|
+
logger5.info("No files found in knowledge path");
|
|
2083
|
+
return { total: 0, successful: 0, failed: 0 };
|
|
2084
|
+
}
|
|
2085
|
+
logger5.info(`Found ${files.length} files to process`);
|
|
2086
|
+
let successful = 0;
|
|
2087
|
+
let failed = 0;
|
|
2088
|
+
for (const filePath of files) {
|
|
2089
|
+
try {
|
|
2090
|
+
const fileName = path.basename(filePath);
|
|
2091
|
+
const fileExt = path.extname(filePath).toLowerCase();
|
|
2092
|
+
if (fileName.startsWith(".")) {
|
|
2093
|
+
continue;
|
|
2094
|
+
}
|
|
2095
|
+
const contentType = getContentType(fileExt);
|
|
2096
|
+
if (!contentType) {
|
|
2097
|
+
logger5.debug(`Skipping unsupported file type: ${filePath}`);
|
|
2098
|
+
continue;
|
|
2099
|
+
}
|
|
2100
|
+
const fileBuffer = fs.readFileSync(filePath);
|
|
2101
|
+
const isBinary = isBinaryContentType(contentType, fileName);
|
|
2102
|
+
const content = isBinary ? fileBuffer.toString("base64") : fileBuffer.toString("utf-8");
|
|
2103
|
+
const knowledgeOptions = {
|
|
2104
|
+
clientDocumentId: "",
|
|
2105
|
+
// Will be generated by the service based on content
|
|
2106
|
+
contentType,
|
|
2107
|
+
originalFilename: fileName,
|
|
2108
|
+
worldId: worldId || agentId,
|
|
2109
|
+
content,
|
|
2110
|
+
roomId: agentId,
|
|
2111
|
+
entityId: agentId
|
|
2112
|
+
};
|
|
2113
|
+
logger5.debug(`Processing document: ${fileName}`);
|
|
2114
|
+
const result = await service.addKnowledge(knowledgeOptions);
|
|
2115
|
+
logger5.info(`\u2705 "${fileName}": ${result.fragmentCount} fragments created`);
|
|
2116
|
+
successful++;
|
|
2117
|
+
} catch (error) {
|
|
2118
|
+
logger5.error(`Failed to process file ${filePath}:`, error);
|
|
2119
|
+
failed++;
|
|
2120
|
+
}
|
|
2121
|
+
}
|
|
2122
|
+
logger5.info(
|
|
2123
|
+
`Document loading complete: ${successful} successful, ${failed} failed out of ${files.length} total`
|
|
2124
|
+
);
|
|
2125
|
+
return {
|
|
2126
|
+
total: files.length,
|
|
2127
|
+
successful,
|
|
2128
|
+
failed
|
|
2129
|
+
};
|
|
2130
|
+
}
|
|
2131
|
+
function getAllFiles(dirPath, files = []) {
|
|
2132
|
+
try {
|
|
2133
|
+
const entries = fs.readdirSync(dirPath, { withFileTypes: true });
|
|
2134
|
+
for (const entry of entries) {
|
|
2135
|
+
const fullPath = path.join(dirPath, entry.name);
|
|
2136
|
+
if (entry.isDirectory()) {
|
|
2137
|
+
if (!["node_modules", ".git", ".vscode", "dist", "build"].includes(entry.name)) {
|
|
2138
|
+
getAllFiles(fullPath, files);
|
|
2139
|
+
}
|
|
2140
|
+
} else if (entry.isFile()) {
|
|
2141
|
+
files.push(fullPath);
|
|
2142
|
+
}
|
|
2143
|
+
}
|
|
2144
|
+
} catch (error) {
|
|
2145
|
+
logger5.error(`Error reading directory ${dirPath}:`, error);
|
|
2146
|
+
}
|
|
2147
|
+
return files;
|
|
2148
|
+
}
|
|
2149
|
+
function getContentType(extension) {
|
|
2150
|
+
const contentTypes = {
|
|
2151
|
+
// Text documents
|
|
2152
|
+
".txt": "text/plain",
|
|
2153
|
+
".md": "text/markdown",
|
|
2154
|
+
".markdown": "text/markdown",
|
|
2155
|
+
".tson": "text/plain",
|
|
2156
|
+
".xml": "application/xml",
|
|
2157
|
+
".csv": "text/csv",
|
|
2158
|
+
".tsv": "text/tab-separated-values",
|
|
2159
|
+
".log": "text/plain",
|
|
2160
|
+
// Web files
|
|
2161
|
+
".html": "text/html",
|
|
2162
|
+
".htm": "text/html",
|
|
2163
|
+
".css": "text/css",
|
|
2164
|
+
".scss": "text/x-scss",
|
|
2165
|
+
".sass": "text/x-sass",
|
|
2166
|
+
".less": "text/x-less",
|
|
2167
|
+
// JavaScript/TypeScript
|
|
2168
|
+
".js": "text/javascript",
|
|
2169
|
+
".jsx": "text/javascript",
|
|
2170
|
+
".ts": "text/typescript",
|
|
2171
|
+
".tsx": "text/typescript",
|
|
2172
|
+
".mjs": "text/javascript",
|
|
2173
|
+
".cjs": "text/javascript",
|
|
2174
|
+
".vue": "text/x-vue",
|
|
2175
|
+
".svelte": "text/x-svelte",
|
|
2176
|
+
".astro": "text/x-astro",
|
|
2177
|
+
// Python
|
|
2178
|
+
".py": "text/x-python",
|
|
2179
|
+
".pyw": "text/x-python",
|
|
2180
|
+
".pyi": "text/x-python",
|
|
2181
|
+
// Java/Kotlin/Scala
|
|
2182
|
+
".java": "text/x-java",
|
|
2183
|
+
".kt": "text/x-kotlin",
|
|
2184
|
+
".kts": "text/x-kotlin",
|
|
2185
|
+
".scala": "text/x-scala",
|
|
2186
|
+
// C/C++/C#
|
|
2187
|
+
".c": "text/x-c",
|
|
2188
|
+
".cpp": "text/x-c++",
|
|
2189
|
+
".cc": "text/x-c++",
|
|
2190
|
+
".cxx": "text/x-c++",
|
|
2191
|
+
".h": "text/x-c",
|
|
2192
|
+
".hpp": "text/x-c++",
|
|
2193
|
+
".cs": "text/x-csharp",
|
|
2194
|
+
// Other languages
|
|
2195
|
+
".php": "text/x-php",
|
|
2196
|
+
".rb": "text/x-ruby",
|
|
2197
|
+
".go": "text/x-go",
|
|
2198
|
+
".rs": "text/x-rust",
|
|
2199
|
+
".swift": "text/x-swift",
|
|
2200
|
+
".r": "text/x-r",
|
|
2201
|
+
".R": "text/x-r",
|
|
2202
|
+
".m": "text/x-objectivec",
|
|
2203
|
+
".mm": "text/x-objectivec",
|
|
2204
|
+
".clj": "text/x-clojure",
|
|
2205
|
+
".cljs": "text/x-clojure",
|
|
2206
|
+
".ex": "text/x-elixir",
|
|
2207
|
+
".exs": "text/x-elixir",
|
|
2208
|
+
".lua": "text/x-lua",
|
|
2209
|
+
".pl": "text/x-perl",
|
|
2210
|
+
".pm": "text/x-perl",
|
|
2211
|
+
".dart": "text/x-dart",
|
|
2212
|
+
".hs": "text/x-haskell",
|
|
2213
|
+
".elm": "text/x-elm",
|
|
2214
|
+
".ml": "text/x-ocaml",
|
|
2215
|
+
".fs": "text/x-fsharp",
|
|
2216
|
+
".fsx": "text/x-fsharp",
|
|
2217
|
+
".vb": "text/x-vb",
|
|
2218
|
+
".pas": "text/x-pascal",
|
|
2219
|
+
".d": "text/x-d",
|
|
2220
|
+
".nim": "text/x-nim",
|
|
2221
|
+
".zig": "text/x-zig",
|
|
2222
|
+
".jl": "text/x-julia",
|
|
2223
|
+
".tcl": "text/x-tcl",
|
|
2224
|
+
".awk": "text/x-awk",
|
|
2225
|
+
".sed": "text/x-sed",
|
|
2226
|
+
// Shell scripts
|
|
2227
|
+
".sh": "text/x-sh",
|
|
2228
|
+
".bash": "text/x-sh",
|
|
2229
|
+
".zsh": "text/x-sh",
|
|
2230
|
+
".fish": "text/x-fish",
|
|
2231
|
+
".ps1": "text/x-powershell",
|
|
2232
|
+
".bat": "text/x-batch",
|
|
2233
|
+
".cmd": "text/x-batch",
|
|
2234
|
+
// Config files
|
|
2235
|
+
".json": "application/json",
|
|
2236
|
+
".yaml": "text/x-yaml",
|
|
2237
|
+
".yml": "text/x-yaml",
|
|
2238
|
+
".toml": "text/x-toml",
|
|
2239
|
+
".ini": "text/x-ini",
|
|
2240
|
+
".cfg": "text/x-ini",
|
|
2241
|
+
".conf": "text/x-ini",
|
|
2242
|
+
".env": "text/plain",
|
|
2243
|
+
".gitignore": "text/plain",
|
|
2244
|
+
".dockerignore": "text/plain",
|
|
2245
|
+
".editorconfig": "text/plain",
|
|
2246
|
+
".properties": "text/x-properties",
|
|
2247
|
+
// Database
|
|
2248
|
+
".sql": "text/x-sql",
|
|
2249
|
+
// Binary documents
|
|
2250
|
+
".pdf": "application/pdf",
|
|
2251
|
+
".doc": "application/msword",
|
|
2252
|
+
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
2253
|
+
};
|
|
2254
|
+
return contentTypes[extension] || null;
|
|
2255
|
+
}
|
|
2256
|
+
|
|
1584
2257
|
// src/service.ts
|
|
2258
|
+
var parseBooleanEnv2 = (value) => {
|
|
2259
|
+
if (typeof value === "boolean") return value;
|
|
2260
|
+
if (typeof value === "string") return value.toLowerCase() === "true";
|
|
2261
|
+
return false;
|
|
2262
|
+
};
|
|
1585
2263
|
var KnowledgeService = class _KnowledgeService extends Service {
|
|
1586
2264
|
static serviceType = "knowledge";
|
|
1587
|
-
config;
|
|
1588
|
-
knowledgeConfig;
|
|
2265
|
+
config = {};
|
|
2266
|
+
knowledgeConfig = {};
|
|
1589
2267
|
capabilityDescription = "Provides Retrieval Augmented Generation capabilities, including knowledge upload and querying.";
|
|
1590
2268
|
knowledgeProcessingSemaphore;
|
|
1591
2269
|
/**
|
|
@@ -1595,53 +2273,32 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1595
2273
|
constructor(runtime, config) {
|
|
1596
2274
|
super(runtime);
|
|
1597
2275
|
this.knowledgeProcessingSemaphore = new Semaphore(10);
|
|
1598
|
-
const parseBooleanEnv = (value) => {
|
|
1599
|
-
if (typeof value === "boolean") return value;
|
|
1600
|
-
if (typeof value === "string") return value.toLowerCase() === "true";
|
|
1601
|
-
return false;
|
|
1602
|
-
};
|
|
1603
|
-
const loadDocsOnStartup = parseBooleanEnv(config?.LOAD_DOCS_ON_STARTUP) || process.env.LOAD_DOCS_ON_STARTUP === "true";
|
|
1604
|
-
this.knowledgeConfig = {
|
|
1605
|
-
CTX_KNOWLEDGE_ENABLED: parseBooleanEnv(config?.CTX_KNOWLEDGE_ENABLED),
|
|
1606
|
-
LOAD_DOCS_ON_STARTUP: loadDocsOnStartup,
|
|
1607
|
-
MAX_INPUT_TOKENS: config?.MAX_INPUT_TOKENS,
|
|
1608
|
-
MAX_OUTPUT_TOKENS: config?.MAX_OUTPUT_TOKENS,
|
|
1609
|
-
EMBEDDING_PROVIDER: config?.EMBEDDING_PROVIDER,
|
|
1610
|
-
TEXT_PROVIDER: config?.TEXT_PROVIDER,
|
|
1611
|
-
TEXT_EMBEDDING_MODEL: config?.TEXT_EMBEDDING_MODEL
|
|
1612
|
-
};
|
|
1613
|
-
this.config = { ...this.knowledgeConfig };
|
|
1614
|
-
logger4.info(
|
|
1615
|
-
`KnowledgeService initialized for agent ${this.runtime.agentId} with config:`,
|
|
1616
|
-
this.knowledgeConfig
|
|
1617
|
-
);
|
|
1618
|
-
if (this.knowledgeConfig.LOAD_DOCS_ON_STARTUP) {
|
|
1619
|
-
logger4.info("LOAD_DOCS_ON_STARTUP is enabled. Loading documents from docs folder...");
|
|
1620
|
-
this.loadInitialDocuments().catch((error) => {
|
|
1621
|
-
logger4.error("Error during initial document loading in KnowledgeService:", error);
|
|
1622
|
-
});
|
|
1623
|
-
} else {
|
|
1624
|
-
logger4.info("LOAD_DOCS_ON_STARTUP is disabled. Skipping automatic document loading.");
|
|
1625
|
-
}
|
|
1626
2276
|
}
|
|
1627
2277
|
async loadInitialDocuments() {
|
|
1628
|
-
|
|
2278
|
+
logger6.info(
|
|
1629
2279
|
`KnowledgeService: Checking for documents to load on startup for agent ${this.runtime.agentId}`
|
|
1630
2280
|
);
|
|
1631
2281
|
try {
|
|
1632
|
-
await new Promise((
|
|
1633
|
-
const
|
|
2282
|
+
await new Promise((resolve2) => setTimeout(resolve2, 1e3));
|
|
2283
|
+
const knowledgePath = this.runtime.getSetting("KNOWLEDGE_PATH");
|
|
2284
|
+
const result = await loadDocsFromPath(
|
|
2285
|
+
this,
|
|
2286
|
+
this.runtime.agentId,
|
|
2287
|
+
void 0,
|
|
2288
|
+
// worldId
|
|
2289
|
+
knowledgePath
|
|
2290
|
+
);
|
|
1634
2291
|
if (result.successful > 0) {
|
|
1635
|
-
|
|
2292
|
+
logger6.info(
|
|
1636
2293
|
`KnowledgeService: Loaded ${result.successful} documents from docs folder on startup for agent ${this.runtime.agentId}`
|
|
1637
2294
|
);
|
|
1638
2295
|
} else {
|
|
1639
|
-
|
|
2296
|
+
logger6.info(
|
|
1640
2297
|
`KnowledgeService: No new documents found to load on startup for agent ${this.runtime.agentId}`
|
|
1641
2298
|
);
|
|
1642
2299
|
}
|
|
1643
2300
|
} catch (error) {
|
|
1644
|
-
|
|
2301
|
+
logger6.error(
|
|
1645
2302
|
`KnowledgeService: Error loading documents on startup for agent ${this.runtime.agentId}:`,
|
|
1646
2303
|
error
|
|
1647
2304
|
);
|
|
@@ -1653,23 +2310,90 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1653
2310
|
* @returns Initialized Knowledge service
|
|
1654
2311
|
*/
|
|
1655
2312
|
static async start(runtime) {
|
|
1656
|
-
|
|
2313
|
+
logger6.info(`Starting Knowledge service for agent: ${runtime.agentId}`);
|
|
2314
|
+
logger6.info("Initializing Knowledge Plugin...");
|
|
2315
|
+
let validatedConfig = {};
|
|
2316
|
+
try {
|
|
2317
|
+
logger6.info("Validating model configuration for Knowledge plugin...");
|
|
2318
|
+
logger6.debug(`[Knowledge Plugin] INIT DEBUG:`);
|
|
2319
|
+
logger6.debug(
|
|
2320
|
+
`[Knowledge Plugin] - process.env.CTX_KNOWLEDGE_ENABLED: '${process.env.CTX_KNOWLEDGE_ENABLED}'`
|
|
2321
|
+
);
|
|
2322
|
+
const config = {
|
|
2323
|
+
CTX_KNOWLEDGE_ENABLED: parseBooleanEnv2(runtime.getSetting("CTX_KNOWLEDGE_ENABLED"))
|
|
2324
|
+
};
|
|
2325
|
+
logger6.debug(
|
|
2326
|
+
`[Knowledge Plugin] - config.CTX_KNOWLEDGE_ENABLED: '${config.CTX_KNOWLEDGE_ENABLED}'`
|
|
2327
|
+
);
|
|
2328
|
+
logger6.debug(
|
|
2329
|
+
`[Knowledge Plugin] - runtime.getSetting('CTX_KNOWLEDGE_ENABLED'): '${runtime.getSetting("CTX_KNOWLEDGE_ENABLED")}'`
|
|
2330
|
+
);
|
|
2331
|
+
validatedConfig = validateModelConfig(runtime);
|
|
2332
|
+
const ctxEnabledFromEnv = parseBooleanEnv2(process.env.CTX_KNOWLEDGE_ENABLED);
|
|
2333
|
+
const ctxEnabledFromRuntime = parseBooleanEnv2(runtime.getSetting("CTX_KNOWLEDGE_ENABLED"));
|
|
2334
|
+
const ctxEnabledFromValidated = validatedConfig.CTX_KNOWLEDGE_ENABLED;
|
|
2335
|
+
const finalCtxEnabled = ctxEnabledFromValidated;
|
|
2336
|
+
logger6.debug(`[Knowledge Plugin] CTX_KNOWLEDGE_ENABLED sources:`);
|
|
2337
|
+
logger6.debug(`[Knowledge Plugin] - From env: ${ctxEnabledFromEnv}`);
|
|
2338
|
+
logger6.debug(`[Knowledge Plugin] - From runtime: ${ctxEnabledFromRuntime}`);
|
|
2339
|
+
logger6.debug(`[Knowledge Plugin] - FINAL RESULT: ${finalCtxEnabled}`);
|
|
2340
|
+
if (finalCtxEnabled) {
|
|
2341
|
+
logger6.info("Running in Contextual Knowledge mode with text generation capabilities.");
|
|
2342
|
+
logger6.info(
|
|
2343
|
+
`Using ${validatedConfig.EMBEDDING_PROVIDER || "auto-detected"} for embeddings and ${validatedConfig.TEXT_PROVIDER} for text generation.`
|
|
2344
|
+
);
|
|
2345
|
+
logger6.info(`Text model: ${validatedConfig.TEXT_MODEL}`);
|
|
2346
|
+
} else {
|
|
2347
|
+
const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
|
|
2348
|
+
logger6.warn(
|
|
2349
|
+
"Running in Basic Embedding mode - documents will NOT be enriched with context!"
|
|
2350
|
+
);
|
|
2351
|
+
logger6.info("To enable contextual enrichment:");
|
|
2352
|
+
logger6.info(" - Set CTX_KNOWLEDGE_ENABLED=true");
|
|
2353
|
+
logger6.info(" - Configure TEXT_PROVIDER (anthropic/openai/openrouter/google)");
|
|
2354
|
+
logger6.info(" - Configure TEXT_MODEL and API key");
|
|
2355
|
+
if (usingPluginOpenAI) {
|
|
2356
|
+
logger6.info("Using auto-detected configuration from plugin-openai for embeddings.");
|
|
2357
|
+
} else {
|
|
2358
|
+
logger6.info(
|
|
2359
|
+
`Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}.`
|
|
2360
|
+
);
|
|
2361
|
+
}
|
|
2362
|
+
}
|
|
2363
|
+
logger6.success("Model configuration validated successfully.");
|
|
2364
|
+
logger6.success(`Knowledge Plugin initialized for agent: ${runtime.character.name}`);
|
|
2365
|
+
logger6.info(
|
|
2366
|
+
"Knowledge Plugin initialized. Frontend panel should be discoverable via its public route."
|
|
2367
|
+
);
|
|
2368
|
+
} catch (error) {
|
|
2369
|
+
logger6.error("Failed to initialize Knowledge plugin:", error);
|
|
2370
|
+
throw error;
|
|
2371
|
+
}
|
|
1657
2372
|
const service = new _KnowledgeService(runtime);
|
|
2373
|
+
service.config = validatedConfig;
|
|
2374
|
+
if (service.config.LOAD_DOCS_ON_STARTUP) {
|
|
2375
|
+
logger6.info("LOAD_DOCS_ON_STARTUP is enabled. Loading documents from docs folder...");
|
|
2376
|
+
service.loadInitialDocuments().catch((error) => {
|
|
2377
|
+
logger6.error("Error during initial document loading in KnowledgeService:", error);
|
|
2378
|
+
});
|
|
2379
|
+
} else {
|
|
2380
|
+
logger6.info("LOAD_DOCS_ON_STARTUP is disabled. Skipping automatic document loading.");
|
|
2381
|
+
}
|
|
1658
2382
|
if (service.runtime.character?.knowledge && service.runtime.character.knowledge.length > 0) {
|
|
1659
|
-
|
|
2383
|
+
logger6.info(
|
|
1660
2384
|
`KnowledgeService: Processing ${service.runtime.character.knowledge.length} character knowledge items.`
|
|
1661
2385
|
);
|
|
1662
2386
|
const stringKnowledge = service.runtime.character.knowledge.filter(
|
|
1663
2387
|
(item) => typeof item === "string"
|
|
1664
2388
|
);
|
|
1665
2389
|
await service.processCharacterKnowledge(stringKnowledge).catch((err) => {
|
|
1666
|
-
|
|
2390
|
+
logger6.error(
|
|
1667
2391
|
`KnowledgeService: Error processing character knowledge during startup: ${err.message}`,
|
|
1668
2392
|
err
|
|
1669
2393
|
);
|
|
1670
2394
|
});
|
|
1671
2395
|
} else {
|
|
1672
|
-
|
|
2396
|
+
logger6.info(
|
|
1673
2397
|
`KnowledgeService: No character knowledge to process for agent ${runtime.agentId}.`
|
|
1674
2398
|
);
|
|
1675
2399
|
}
|
|
@@ -1680,10 +2404,10 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1680
2404
|
* @param runtime Agent runtime
|
|
1681
2405
|
*/
|
|
1682
2406
|
static async stop(runtime) {
|
|
1683
|
-
|
|
2407
|
+
logger6.info(`Stopping Knowledge service for agent: ${runtime.agentId}`);
|
|
1684
2408
|
const service = runtime.getService(_KnowledgeService.serviceType);
|
|
1685
2409
|
if (!service) {
|
|
1686
|
-
|
|
2410
|
+
logger6.warn(`KnowledgeService not found for agent ${runtime.agentId} during stop.`);
|
|
1687
2411
|
}
|
|
1688
2412
|
if (service instanceof _KnowledgeService) {
|
|
1689
2413
|
await service.stop();
|
|
@@ -1693,7 +2417,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1693
2417
|
* Stop the service
|
|
1694
2418
|
*/
|
|
1695
2419
|
async stop() {
|
|
1696
|
-
|
|
2420
|
+
logger6.info(`Knowledge service stopping for agent: ${this.runtime.character?.name}`);
|
|
1697
2421
|
}
|
|
1698
2422
|
/**
|
|
1699
2423
|
* Add knowledge to the system
|
|
@@ -1708,11 +2432,11 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1708
2432
|
maxChars: 2e3
|
|
1709
2433
|
// Use first 2KB of content for ID generation
|
|
1710
2434
|
});
|
|
1711
|
-
|
|
2435
|
+
logger6.info(`Processing "${options.originalFilename}" (${options.contentType})`);
|
|
1712
2436
|
try {
|
|
1713
2437
|
const existingDocument = await this.runtime.getMemoryById(contentBasedId);
|
|
1714
2438
|
if (existingDocument && existingDocument.metadata?.type === MemoryType2.DOCUMENT) {
|
|
1715
|
-
|
|
2439
|
+
logger6.info(`"${options.originalFilename}" already exists - skipping`);
|
|
1716
2440
|
const fragments = await this.runtime.getMemories({
|
|
1717
2441
|
tableName: "knowledge"
|
|
1718
2442
|
});
|
|
@@ -1726,7 +2450,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1726
2450
|
};
|
|
1727
2451
|
}
|
|
1728
2452
|
} catch (error) {
|
|
1729
|
-
|
|
2453
|
+
logger6.debug(
|
|
1730
2454
|
`Document ${contentBasedId} not found or error checking existence, proceeding with processing: ${error instanceof Error ? error.message : String(error)}`
|
|
1731
2455
|
);
|
|
1732
2456
|
}
|
|
@@ -1753,7 +2477,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1753
2477
|
}) {
|
|
1754
2478
|
const agentId = passedAgentId || this.runtime.agentId;
|
|
1755
2479
|
try {
|
|
1756
|
-
|
|
2480
|
+
logger6.debug(
|
|
1757
2481
|
`KnowledgeService: Processing document ${originalFilename} (type: ${contentType}) via processDocument for agent: ${agentId}`
|
|
1758
2482
|
);
|
|
1759
2483
|
let fileBuffer = null;
|
|
@@ -1764,7 +2488,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1764
2488
|
try {
|
|
1765
2489
|
fileBuffer = Buffer.from(content, "base64");
|
|
1766
2490
|
} catch (e) {
|
|
1767
|
-
|
|
2491
|
+
logger6.error(
|
|
1768
2492
|
`KnowledgeService: Failed to convert base64 to buffer for ${originalFilename}: ${e.message}`
|
|
1769
2493
|
);
|
|
1770
2494
|
throw new Error(`Invalid base64 content for PDF file ${originalFilename}`);
|
|
@@ -1775,7 +2499,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1775
2499
|
try {
|
|
1776
2500
|
fileBuffer = Buffer.from(content, "base64");
|
|
1777
2501
|
} catch (e) {
|
|
1778
|
-
|
|
2502
|
+
logger6.error(
|
|
1779
2503
|
`KnowledgeService: Failed to convert base64 to buffer for ${originalFilename}: ${e.message}`
|
|
1780
2504
|
);
|
|
1781
2505
|
throw new Error(`Invalid base64 content for binary file ${originalFilename}`);
|
|
@@ -1792,11 +2516,11 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1792
2516
|
if (invalidCharCount > 0 && invalidCharCount / textLength > 0.1) {
|
|
1793
2517
|
throw new Error("Decoded content contains too many invalid characters");
|
|
1794
2518
|
}
|
|
1795
|
-
|
|
2519
|
+
logger6.debug(`Successfully decoded base64 content for text file: ${originalFilename}`);
|
|
1796
2520
|
extractedText = decodedText;
|
|
1797
2521
|
documentContentToStore = decodedText;
|
|
1798
2522
|
} catch (e) {
|
|
1799
|
-
|
|
2523
|
+
logger6.error(
|
|
1800
2524
|
`Failed to decode base64 for ${originalFilename}: ${e instanceof Error ? e.message : String(e)}`
|
|
1801
2525
|
);
|
|
1802
2526
|
throw new Error(
|
|
@@ -1804,7 +2528,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1804
2528
|
);
|
|
1805
2529
|
}
|
|
1806
2530
|
} else {
|
|
1807
|
-
|
|
2531
|
+
logger6.debug(`Treating content as plain text for file: ${originalFilename}`);
|
|
1808
2532
|
extractedText = content;
|
|
1809
2533
|
documentContentToStore = content;
|
|
1810
2534
|
}
|
|
@@ -1813,7 +2537,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1813
2537
|
const noTextError = new Error(
|
|
1814
2538
|
`KnowledgeService: No text content extracted from ${originalFilename} (type: ${contentType}).`
|
|
1815
2539
|
);
|
|
1816
|
-
|
|
2540
|
+
logger6.warn(noTextError.message);
|
|
1817
2541
|
throw noTextError;
|
|
1818
2542
|
}
|
|
1819
2543
|
const documentMemory = createDocumentMemory({
|
|
@@ -1839,14 +2563,14 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1839
2563
|
roomId: roomId || agentId,
|
|
1840
2564
|
entityId: entityId || agentId
|
|
1841
2565
|
};
|
|
1842
|
-
|
|
2566
|
+
logger6.debug(
|
|
1843
2567
|
`KnowledgeService: Creating memory with agentId=${agentId}, entityId=${entityId}, roomId=${roomId}, this.runtime.agentId=${this.runtime.agentId}`
|
|
1844
2568
|
);
|
|
1845
|
-
|
|
2569
|
+
logger6.debug(
|
|
1846
2570
|
`KnowledgeService: memoryWithScope agentId=${memoryWithScope.agentId}, entityId=${memoryWithScope.entityId}`
|
|
1847
2571
|
);
|
|
1848
2572
|
await this.runtime.createMemory(memoryWithScope, "documents");
|
|
1849
|
-
|
|
2573
|
+
logger6.debug(
|
|
1850
2574
|
`KnowledgeService: Stored document ${originalFilename} (Memory ID: ${memoryWithScope.id})`
|
|
1851
2575
|
);
|
|
1852
2576
|
const fragmentCount = await processFragmentsSynchronously({
|
|
@@ -1861,14 +2585,14 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1861
2585
|
worldId: worldId || agentId,
|
|
1862
2586
|
documentTitle: originalFilename
|
|
1863
2587
|
});
|
|
1864
|
-
|
|
2588
|
+
logger6.debug(`"${originalFilename}" stored with ${fragmentCount} fragments`);
|
|
1865
2589
|
return {
|
|
1866
2590
|
clientDocumentId,
|
|
1867
2591
|
storedDocumentMemoryId: memoryWithScope.id,
|
|
1868
2592
|
fragmentCount
|
|
1869
2593
|
};
|
|
1870
2594
|
} catch (error) {
|
|
1871
|
-
|
|
2595
|
+
logger6.error(
|
|
1872
2596
|
`KnowledgeService: Error processing document ${originalFilename}: ${error.message}`,
|
|
1873
2597
|
error.stack
|
|
1874
2598
|
);
|
|
@@ -1877,7 +2601,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1877
2601
|
}
|
|
1878
2602
|
// --- Knowledge methods moved from AgentRuntime ---
|
|
1879
2603
|
async handleProcessingError(error, context) {
|
|
1880
|
-
|
|
2604
|
+
logger6.error(`KnowledgeService: Error ${context}:`, error?.message || error || "Unknown error");
|
|
1881
2605
|
throw error;
|
|
1882
2606
|
}
|
|
1883
2607
|
async checkExistingKnowledge(knowledgeId) {
|
|
@@ -1885,9 +2609,9 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1885
2609
|
return !!existingDocument;
|
|
1886
2610
|
}
|
|
1887
2611
|
async getKnowledge(message, scope) {
|
|
1888
|
-
|
|
2612
|
+
logger6.debug("KnowledgeService: getKnowledge called for message id: " + message.id);
|
|
1889
2613
|
if (!message?.content?.text || message?.content?.text.trim().length === 0) {
|
|
1890
|
-
|
|
2614
|
+
logger6.warn("KnowledgeService: Invalid or empty message content for knowledge query.");
|
|
1891
2615
|
return [];
|
|
1892
2616
|
}
|
|
1893
2617
|
const embedding = await this.runtime.useModel(ModelType2.TEXT_EMBEDDING, {
|
|
@@ -1926,7 +2650,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1926
2650
|
try {
|
|
1927
2651
|
const existingMemory = await this.runtime.getMemoryById(memoryId);
|
|
1928
2652
|
if (!existingMemory) {
|
|
1929
|
-
|
|
2653
|
+
logger6.warn(`Cannot enrich memory ${memoryId} - memory not found`);
|
|
1930
2654
|
return;
|
|
1931
2655
|
}
|
|
1932
2656
|
const updatedMetadata = {
|
|
@@ -1947,11 +2671,11 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1947
2671
|
id: memoryId,
|
|
1948
2672
|
metadata: updatedMetadata
|
|
1949
2673
|
});
|
|
1950
|
-
|
|
2674
|
+
logger6.debug(
|
|
1951
2675
|
`Enriched conversation memory ${memoryId} with RAG data: ${ragMetadata.totalFragments} fragments`
|
|
1952
2676
|
);
|
|
1953
2677
|
} catch (error) {
|
|
1954
|
-
|
|
2678
|
+
logger6.warn(
|
|
1955
2679
|
`Failed to enrich conversation memory ${memoryId} with RAG data: ${error.message}`
|
|
1956
2680
|
);
|
|
1957
2681
|
}
|
|
@@ -1974,7 +2698,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1974
2698
|
ragMetadata,
|
|
1975
2699
|
timestamp: now
|
|
1976
2700
|
});
|
|
1977
|
-
|
|
2701
|
+
logger6.debug(`Stored pending RAG metadata for next conversation memory`);
|
|
1978
2702
|
}
|
|
1979
2703
|
/**
|
|
1980
2704
|
* Try to enrich recent conversation memories with pending RAG metadata
|
|
@@ -2008,12 +2732,12 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2008
2732
|
}
|
|
2009
2733
|
}
|
|
2010
2734
|
} catch (error) {
|
|
2011
|
-
|
|
2735
|
+
logger6.warn(`Error enriching recent memories with RAG data: ${error.message}`);
|
|
2012
2736
|
}
|
|
2013
2737
|
}
|
|
2014
2738
|
async processCharacterKnowledge(items) {
|
|
2015
|
-
await new Promise((
|
|
2016
|
-
|
|
2739
|
+
await new Promise((resolve2) => setTimeout(resolve2, 1e3));
|
|
2740
|
+
logger6.info(
|
|
2017
2741
|
`KnowledgeService: Processing ${items.length} character knowledge items for agent ${this.runtime.agentId}`
|
|
2018
2742
|
);
|
|
2019
2743
|
const processingPromises = items.map(async (item) => {
|
|
@@ -2026,12 +2750,12 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2026
2750
|
// A constant identifier for character knowledge
|
|
2027
2751
|
});
|
|
2028
2752
|
if (await this.checkExistingKnowledge(knowledgeId)) {
|
|
2029
|
-
|
|
2753
|
+
logger6.debug(
|
|
2030
2754
|
`KnowledgeService: Character knowledge item with ID ${knowledgeId} already exists. Skipping.`
|
|
2031
2755
|
);
|
|
2032
2756
|
return;
|
|
2033
2757
|
}
|
|
2034
|
-
|
|
2758
|
+
logger6.debug(
|
|
2035
2759
|
`KnowledgeService: Processing character knowledge for ${this.runtime.character?.name} - ${item.slice(0, 100)}`
|
|
2036
2760
|
);
|
|
2037
2761
|
let metadata = {
|
|
@@ -2082,7 +2806,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2082
2806
|
}
|
|
2083
2807
|
});
|
|
2084
2808
|
await Promise.all(processingPromises);
|
|
2085
|
-
|
|
2809
|
+
logger6.info(
|
|
2086
2810
|
`KnowledgeService: Finished processing character knowledge for agent ${this.runtime.agentId}.`
|
|
2087
2811
|
);
|
|
2088
2812
|
}
|
|
@@ -2102,7 +2826,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2102
2826
|
worldId: scope?.worldId ?? this.runtime.agentId,
|
|
2103
2827
|
entityId: scope?.entityId ?? this.runtime.agentId
|
|
2104
2828
|
};
|
|
2105
|
-
|
|
2829
|
+
logger6.debug(`KnowledgeService: _internalAddKnowledge called for item ID ${item.id}`);
|
|
2106
2830
|
const documentMemory = {
|
|
2107
2831
|
id: item.id,
|
|
2108
2832
|
// This ID should be the unique ID for the document being added.
|
|
@@ -2124,7 +2848,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2124
2848
|
};
|
|
2125
2849
|
const existingDocument = await this.runtime.getMemoryById(item.id);
|
|
2126
2850
|
if (existingDocument) {
|
|
2127
|
-
|
|
2851
|
+
logger6.debug(
|
|
2128
2852
|
`KnowledgeService: Document ${item.id} already exists in _internalAddKnowledge, updating...`
|
|
2129
2853
|
);
|
|
2130
2854
|
await this.runtime.updateMemory({
|
|
@@ -2148,13 +2872,13 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2148
2872
|
await this.processDocumentFragment(fragment);
|
|
2149
2873
|
fragmentsProcessed++;
|
|
2150
2874
|
} catch (error) {
|
|
2151
|
-
|
|
2875
|
+
logger6.error(
|
|
2152
2876
|
`KnowledgeService: Error processing fragment ${fragment.id} for document ${item.id}:`,
|
|
2153
2877
|
error
|
|
2154
2878
|
);
|
|
2155
2879
|
}
|
|
2156
2880
|
}
|
|
2157
|
-
|
|
2881
|
+
logger6.debug(
|
|
2158
2882
|
`KnowledgeService: Processed ${fragmentsProcessed}/${fragments.length} fragments for document ${item.id}.`
|
|
2159
2883
|
);
|
|
2160
2884
|
}
|
|
@@ -2163,7 +2887,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2163
2887
|
await this.runtime.addEmbeddingToMemory(fragment);
|
|
2164
2888
|
await this.runtime.createMemory(fragment, "knowledge");
|
|
2165
2889
|
} catch (error) {
|
|
2166
|
-
|
|
2890
|
+
logger6.error(
|
|
2167
2891
|
`KnowledgeService: Error processing fragment ${fragment.id}:`,
|
|
2168
2892
|
error instanceof Error ? error.message : String(error)
|
|
2169
2893
|
);
|
|
@@ -2228,7 +2952,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2228
2952
|
*/
|
|
2229
2953
|
async deleteMemory(memoryId) {
|
|
2230
2954
|
await this.runtime.deleteMemory(memoryId);
|
|
2231
|
-
|
|
2955
|
+
logger6.info(
|
|
2232
2956
|
`KnowledgeService: Deleted memory ${memoryId} for agent ${this.runtime.agentId}. Assumed it was a document or related fragment.`
|
|
2233
2957
|
);
|
|
2234
2958
|
}
|
|
@@ -2236,7 +2960,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2236
2960
|
};
|
|
2237
2961
|
|
|
2238
2962
|
// src/provider.ts
|
|
2239
|
-
import { addHeader, logger as
|
|
2963
|
+
import { addHeader, logger as logger7 } from "@elizaos/core";
|
|
2240
2964
|
var knowledgeProvider = {
|
|
2241
2965
|
name: "KNOWLEDGE",
|
|
2242
2966
|
description: "Knowledge from the knowledge base that the agent knows, retrieved whenever the agent needs to answer a question about their expertise.",
|
|
@@ -2274,11 +2998,11 @@ var knowledgeProvider = {
|
|
|
2274
2998
|
try {
|
|
2275
2999
|
await knowledgeService.enrichRecentMemoriesWithPendingRAG();
|
|
2276
3000
|
} catch (error) {
|
|
2277
|
-
|
|
3001
|
+
logger7.warn("RAG memory enrichment failed:", error.message);
|
|
2278
3002
|
}
|
|
2279
3003
|
}, 2e3);
|
|
2280
3004
|
} catch (error) {
|
|
2281
|
-
|
|
3005
|
+
logger7.warn("RAG memory enrichment failed:", error.message);
|
|
2282
3006
|
}
|
|
2283
3007
|
}
|
|
2284
3008
|
return {
|
|
@@ -2305,9 +3029,9 @@ var knowledgeProvider = {
|
|
|
2305
3029
|
|
|
2306
3030
|
// src/tests.ts
|
|
2307
3031
|
import { MemoryType as MemoryType3, ModelType as ModelType3 } from "@elizaos/core";
|
|
2308
|
-
import { Buffer as
|
|
2309
|
-
import * as
|
|
2310
|
-
import * as
|
|
3032
|
+
import { Buffer as Buffer3 } from "buffer";
|
|
3033
|
+
import * as fs2 from "fs";
|
|
3034
|
+
import * as path2 from "path";
|
|
2311
3035
|
var mockLogger = {
|
|
2312
3036
|
info: (() => {
|
|
2313
3037
|
const fn = (...args) => {
|
|
@@ -2704,9 +3428,9 @@ trailer
|
|
|
2704
3428
|
startxref
|
|
2705
3429
|
${465 + content.length}
|
|
2706
3430
|
%%EOF`;
|
|
2707
|
-
return
|
|
3431
|
+
return Buffer3.from(pdfContent);
|
|
2708
3432
|
}
|
|
2709
|
-
return
|
|
3433
|
+
return Buffer3.from(content, "utf-8");
|
|
2710
3434
|
}
|
|
2711
3435
|
var KnowledgeTestSuite = class {
|
|
2712
3436
|
name = "knowledge";
|
|
@@ -2719,10 +3443,10 @@ var KnowledgeTestSuite = class {
|
|
|
2719
3443
|
const originalEnv = { ...process.env };
|
|
2720
3444
|
delete process.env.KNOWLEDGE_PATH;
|
|
2721
3445
|
try {
|
|
2722
|
-
const docsPath =
|
|
2723
|
-
const docsExists =
|
|
3446
|
+
const docsPath = path2.join(process.cwd(), "docs");
|
|
3447
|
+
const docsExists = fs2.existsSync(docsPath);
|
|
2724
3448
|
if (!docsExists) {
|
|
2725
|
-
|
|
3449
|
+
fs2.mkdirSync(docsPath, { recursive: true });
|
|
2726
3450
|
}
|
|
2727
3451
|
await index_default.init({}, runtime);
|
|
2728
3452
|
const errorCalls = mockLogger.error.calls;
|
|
@@ -2730,7 +3454,7 @@ var KnowledgeTestSuite = class {
|
|
|
2730
3454
|
throw new Error(`Unexpected error during init: ${errorCalls[0]}`);
|
|
2731
3455
|
}
|
|
2732
3456
|
if (!docsExists) {
|
|
2733
|
-
|
|
3457
|
+
fs2.rmSync(docsPath, { recursive: true, force: true });
|
|
2734
3458
|
}
|
|
2735
3459
|
} finally {
|
|
2736
3460
|
process.env = originalEnv;
|
|
@@ -2743,13 +3467,13 @@ var KnowledgeTestSuite = class {
|
|
|
2743
3467
|
const originalEnv = { ...process.env };
|
|
2744
3468
|
delete process.env.KNOWLEDGE_PATH;
|
|
2745
3469
|
try {
|
|
2746
|
-
const docsPath =
|
|
2747
|
-
if (
|
|
2748
|
-
|
|
3470
|
+
const docsPath = path2.join(process.cwd(), "docs");
|
|
3471
|
+
if (fs2.existsSync(docsPath)) {
|
|
3472
|
+
fs2.renameSync(docsPath, docsPath + ".backup");
|
|
2749
3473
|
}
|
|
2750
3474
|
await index_default.init({}, runtime);
|
|
2751
|
-
if (
|
|
2752
|
-
|
|
3475
|
+
if (fs2.existsSync(docsPath + ".backup")) {
|
|
3476
|
+
fs2.renameSync(docsPath + ".backup", docsPath);
|
|
2753
3477
|
}
|
|
2754
3478
|
} finally {
|
|
2755
3479
|
process.env = originalEnv;
|
|
@@ -2790,7 +3514,7 @@ var KnowledgeTestSuite = class {
|
|
|
2790
3514
|
{
|
|
2791
3515
|
name: "Should handle empty file buffer",
|
|
2792
3516
|
fn: async (runtime) => {
|
|
2793
|
-
const emptyBuffer =
|
|
3517
|
+
const emptyBuffer = Buffer3.alloc(0);
|
|
2794
3518
|
try {
|
|
2795
3519
|
await extractTextFromDocument(emptyBuffer, "text/plain", "empty.txt");
|
|
2796
3520
|
throw new Error("Should have thrown error for empty buffer");
|
|
@@ -2998,7 +3722,7 @@ var KnowledgeTestSuite = class {
|
|
|
2998
3722
|
}
|
|
2999
3723
|
});
|
|
3000
3724
|
const service = await KnowledgeService.start(knowledgeRuntime);
|
|
3001
|
-
await new Promise((
|
|
3725
|
+
await new Promise((resolve2) => setTimeout(resolve2, 2e3));
|
|
3002
3726
|
const memories = await knowledgeRuntime.getMemories({
|
|
3003
3727
|
tableName: "documents",
|
|
3004
3728
|
entityId: knowledgeRuntime.agentId
|
|
@@ -3190,9 +3914,9 @@ var KnowledgeTestSuite = class {
|
|
|
3190
3914
|
var tests_default = new KnowledgeTestSuite();
|
|
3191
3915
|
|
|
3192
3916
|
// src/actions.ts
|
|
3193
|
-
import { logger as
|
|
3194
|
-
import * as
|
|
3195
|
-
import * as
|
|
3917
|
+
import { logger as logger8, stringToUuid } from "@elizaos/core";
|
|
3918
|
+
import * as fs3 from "fs";
|
|
3919
|
+
import * as path3 from "path";
|
|
3196
3920
|
var processKnowledgeAction = {
|
|
3197
3921
|
name: "PROCESS_KNOWLEDGE",
|
|
3198
3922
|
description: "Process and store knowledge from a file path or text content into the knowledge base",
|
|
@@ -3248,7 +3972,7 @@ var processKnowledgeAction = {
|
|
|
3248
3972
|
const hasPath = pathPattern.test(text);
|
|
3249
3973
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3250
3974
|
if (!service) {
|
|
3251
|
-
|
|
3975
|
+
logger8.warn("Knowledge service not available for PROCESS_KNOWLEDGE action");
|
|
3252
3976
|
return false;
|
|
3253
3977
|
}
|
|
3254
3978
|
return hasKeyword || hasPath;
|
|
@@ -3265,7 +3989,7 @@ var processKnowledgeAction = {
|
|
|
3265
3989
|
let response;
|
|
3266
3990
|
if (pathMatch) {
|
|
3267
3991
|
const filePath = pathMatch[0];
|
|
3268
|
-
if (!
|
|
3992
|
+
if (!fs3.existsSync(filePath)) {
|
|
3269
3993
|
response = {
|
|
3270
3994
|
text: `I couldn't find the file at ${filePath}. Please check the path and try again.`
|
|
3271
3995
|
};
|
|
@@ -3274,9 +3998,9 @@ var processKnowledgeAction = {
|
|
|
3274
3998
|
}
|
|
3275
3999
|
return;
|
|
3276
4000
|
}
|
|
3277
|
-
const fileBuffer =
|
|
3278
|
-
const fileName =
|
|
3279
|
-
const fileExt =
|
|
4001
|
+
const fileBuffer = fs3.readFileSync(filePath);
|
|
4002
|
+
const fileName = path3.basename(filePath);
|
|
4003
|
+
const fileExt = path3.extname(filePath).toLowerCase();
|
|
3280
4004
|
let contentType = "text/plain";
|
|
3281
4005
|
if (fileExt === ".pdf") contentType = "application/pdf";
|
|
3282
4006
|
else if (fileExt === ".docx")
|
|
@@ -3326,7 +4050,7 @@ var processKnowledgeAction = {
|
|
|
3326
4050
|
await callback(response);
|
|
3327
4051
|
}
|
|
3328
4052
|
} catch (error) {
|
|
3329
|
-
|
|
4053
|
+
logger8.error("Error in PROCESS_KNOWLEDGE action:", error);
|
|
3330
4054
|
const errorResponse = {
|
|
3331
4055
|
text: `I encountered an error while processing the knowledge: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
3332
4056
|
};
|
|
@@ -3417,7 +4141,7 @@ ${formattedResults}`
|
|
|
3417
4141
|
await callback(response);
|
|
3418
4142
|
}
|
|
3419
4143
|
} catch (error) {
|
|
3420
|
-
|
|
4144
|
+
logger8.error("Error in SEARCH_KNOWLEDGE action:", error);
|
|
3421
4145
|
const errorResponse = {
|
|
3422
4146
|
text: `I encountered an error while searching the knowledge base: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
3423
4147
|
};
|
|
@@ -3430,9 +4154,9 @@ ${formattedResults}`
|
|
|
3430
4154
|
var knowledgeActions = [processKnowledgeAction, searchKnowledgeAction];
|
|
3431
4155
|
|
|
3432
4156
|
// src/routes.ts
|
|
3433
|
-
import { createUniqueUuid as createUniqueUuid2, logger as
|
|
3434
|
-
import
|
|
3435
|
-
import
|
|
4157
|
+
import { createUniqueUuid as createUniqueUuid2, logger as logger9, ModelType as ModelType4 } from "@elizaos/core";
|
|
4158
|
+
import fs4 from "fs";
|
|
4159
|
+
import path4 from "path";
|
|
3436
4160
|
import multer from "multer";
|
|
3437
4161
|
var createUploadMiddleware = (runtime) => {
|
|
3438
4162
|
const uploadDir = runtime.getSetting("KNOWLEDGE_UPLOAD_DIR") || "/tmp/uploads/";
|
|
@@ -3477,11 +4201,11 @@ function sendError(res, status, code, message, details) {
|
|
|
3477
4201
|
res.end(JSON.stringify({ success: false, error: { code, message, details } }));
|
|
3478
4202
|
}
|
|
3479
4203
|
var cleanupFile = (filePath) => {
|
|
3480
|
-
if (filePath &&
|
|
4204
|
+
if (filePath && fs4.existsSync(filePath)) {
|
|
3481
4205
|
try {
|
|
3482
|
-
|
|
4206
|
+
fs4.unlinkSync(filePath);
|
|
3483
4207
|
} catch (error) {
|
|
3484
|
-
|
|
4208
|
+
logger9.error(`Error cleaning up file ${filePath}:`, error);
|
|
3485
4209
|
}
|
|
3486
4210
|
}
|
|
3487
4211
|
};
|
|
@@ -3508,15 +4232,15 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3508
4232
|
}
|
|
3509
4233
|
const invalidFiles = files.filter((file) => {
|
|
3510
4234
|
if (file.size === 0) {
|
|
3511
|
-
|
|
4235
|
+
logger9.warn(`File ${file.originalname} is empty`);
|
|
3512
4236
|
return true;
|
|
3513
4237
|
}
|
|
3514
4238
|
if (!file.originalname || file.originalname.trim() === "") {
|
|
3515
|
-
|
|
4239
|
+
logger9.warn(`File has no name`);
|
|
3516
4240
|
return true;
|
|
3517
4241
|
}
|
|
3518
4242
|
if (!file.path) {
|
|
3519
|
-
|
|
4243
|
+
logger9.warn(`File ${file.originalname} has no path`);
|
|
3520
4244
|
return true;
|
|
3521
4245
|
}
|
|
3522
4246
|
return false;
|
|
@@ -3533,7 +4257,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3533
4257
|
}
|
|
3534
4258
|
const agentId = req.body.agentId || req.query.agentId;
|
|
3535
4259
|
if (!agentId) {
|
|
3536
|
-
|
|
4260
|
+
logger9.error("[Document Processor] \u274C No agent ID provided in upload request");
|
|
3537
4261
|
return sendError(
|
|
3538
4262
|
res,
|
|
3539
4263
|
400,
|
|
@@ -3542,15 +4266,15 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3542
4266
|
);
|
|
3543
4267
|
}
|
|
3544
4268
|
const worldId = req.body.worldId || agentId;
|
|
3545
|
-
|
|
4269
|
+
logger9.info(`[Document Processor] \u{1F4E4} Processing file upload for agent: ${agentId}`);
|
|
3546
4270
|
const processingPromises = files.map(async (file, index) => {
|
|
3547
4271
|
const originalFilename = file.originalname;
|
|
3548
4272
|
const filePath = file.path;
|
|
3549
|
-
|
|
4273
|
+
logger9.debug(
|
|
3550
4274
|
`[Document Processor] \u{1F4C4} Processing file: ${originalFilename} (agent: ${agentId})`
|
|
3551
4275
|
);
|
|
3552
4276
|
try {
|
|
3553
|
-
const fileBuffer = await
|
|
4277
|
+
const fileBuffer = await fs4.promises.readFile(filePath);
|
|
3554
4278
|
const base64Content = fileBuffer.toString("base64");
|
|
3555
4279
|
const addKnowledgeOpts = {
|
|
3556
4280
|
agentId,
|
|
@@ -3581,7 +4305,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3581
4305
|
status: "success"
|
|
3582
4306
|
};
|
|
3583
4307
|
} catch (fileError) {
|
|
3584
|
-
|
|
4308
|
+
logger9.error(
|
|
3585
4309
|
`[Document Processor] \u274C Error processing file ${file.originalname}:`,
|
|
3586
4310
|
fileError
|
|
3587
4311
|
);
|
|
@@ -3604,7 +4328,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3604
4328
|
}
|
|
3605
4329
|
const agentId = req.body.agentId || req.query.agentId;
|
|
3606
4330
|
if (!agentId) {
|
|
3607
|
-
|
|
4331
|
+
logger9.error("[Document Processor] \u274C No agent ID provided in URL request");
|
|
3608
4332
|
return sendError(
|
|
3609
4333
|
res,
|
|
3610
4334
|
400,
|
|
@@ -3612,7 +4336,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3612
4336
|
"Agent ID is required for uploading knowledge from URLs"
|
|
3613
4337
|
);
|
|
3614
4338
|
}
|
|
3615
|
-
|
|
4339
|
+
logger9.info(`[Document Processor] \u{1F4E4} Processing URL upload for agent: ${agentId}`);
|
|
3616
4340
|
const processingPromises = fileUrls.map(async (fileUrl) => {
|
|
3617
4341
|
try {
|
|
3618
4342
|
const normalizedUrl = normalizeS3Url(fileUrl);
|
|
@@ -3620,7 +4344,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3620
4344
|
const pathSegments = urlObject.pathname.split("/");
|
|
3621
4345
|
const encodedFilename = pathSegments[pathSegments.length - 1] || "document.pdf";
|
|
3622
4346
|
const originalFilename = decodeURIComponent(encodedFilename);
|
|
3623
|
-
|
|
4347
|
+
logger9.debug(`[Document Processor] \u{1F310} Fetching content from URL: ${fileUrl}`);
|
|
3624
4348
|
const { content, contentType: fetchedContentType } = await fetchUrlContent(fileUrl);
|
|
3625
4349
|
let contentType = fetchedContentType;
|
|
3626
4350
|
if (contentType === "application/octet-stream") {
|
|
@@ -3660,7 +4384,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3660
4384
|
url: normalizedUrl
|
|
3661
4385
|
}
|
|
3662
4386
|
};
|
|
3663
|
-
|
|
4387
|
+
logger9.debug(
|
|
3664
4388
|
`[Document Processor] \u{1F4C4} Processing knowledge from URL: ${originalFilename} (type: ${contentType})`
|
|
3665
4389
|
);
|
|
3666
4390
|
const result = await service.addKnowledge(addKnowledgeOpts);
|
|
@@ -3675,7 +4399,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3675
4399
|
status: "success"
|
|
3676
4400
|
};
|
|
3677
4401
|
} catch (urlError) {
|
|
3678
|
-
|
|
4402
|
+
logger9.error(`[Document Processor] \u274C Error processing URL ${fileUrl}:`, urlError);
|
|
3679
4403
|
return {
|
|
3680
4404
|
fileUrl,
|
|
3681
4405
|
status: "error_processing",
|
|
@@ -3687,7 +4411,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3687
4411
|
sendSuccess(res, results);
|
|
3688
4412
|
}
|
|
3689
4413
|
} catch (error) {
|
|
3690
|
-
|
|
4414
|
+
logger9.error("[Document Processor] \u274C Error processing knowledge:", error);
|
|
3691
4415
|
if (hasUploadedFiles) {
|
|
3692
4416
|
cleanupFiles(req.files);
|
|
3693
4417
|
}
|
|
@@ -3726,7 +4450,7 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
|
|
|
3726
4450
|
// Or if the URL is stored in the metadata (check if it exists)
|
|
3727
4451
|
memory.metadata && "url" in memory.metadata && typeof memory.metadata.url === "string" && normalizedRequestUrls.includes(normalizeS3Url(memory.metadata.url))
|
|
3728
4452
|
);
|
|
3729
|
-
|
|
4453
|
+
logger9.debug(
|
|
3730
4454
|
`[Document Processor] \u{1F50D} Filtered documents by URLs: ${fileUrls.length} URLs, found ${filteredMemories.length} matching documents`
|
|
3731
4455
|
);
|
|
3732
4456
|
}
|
|
@@ -3741,12 +4465,12 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
|
|
|
3741
4465
|
totalRequested: fileUrls ? fileUrls.length : 0
|
|
3742
4466
|
});
|
|
3743
4467
|
} catch (error) {
|
|
3744
|
-
|
|
4468
|
+
logger9.error("[Document Processor] \u274C Error retrieving documents:", error);
|
|
3745
4469
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve documents", error.message);
|
|
3746
4470
|
}
|
|
3747
4471
|
}
|
|
3748
4472
|
async function deleteKnowledgeDocumentHandler(req, res, runtime) {
|
|
3749
|
-
|
|
4473
|
+
logger9.debug(`[Document Processor] \u{1F5D1}\uFE0F DELETE request for document: ${req.params.knowledgeId}`);
|
|
3750
4474
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3751
4475
|
if (!service) {
|
|
3752
4476
|
return sendError(
|
|
@@ -3758,22 +4482,22 @@ async function deleteKnowledgeDocumentHandler(req, res, runtime) {
|
|
|
3758
4482
|
}
|
|
3759
4483
|
const knowledgeId = req.params.knowledgeId;
|
|
3760
4484
|
if (!knowledgeId || knowledgeId.length < 36) {
|
|
3761
|
-
|
|
4485
|
+
logger9.error(`[Document Processor] \u274C Invalid knowledge ID format: ${knowledgeId}`);
|
|
3762
4486
|
return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
|
|
3763
4487
|
}
|
|
3764
4488
|
try {
|
|
3765
4489
|
const typedKnowledgeId = knowledgeId;
|
|
3766
|
-
|
|
4490
|
+
logger9.debug(`[Document Processor] \u{1F5D1}\uFE0F Deleting document: ${typedKnowledgeId}`);
|
|
3767
4491
|
await service.deleteMemory(typedKnowledgeId);
|
|
3768
|
-
|
|
4492
|
+
logger9.info(`[Document Processor] \u2705 Successfully deleted document: ${typedKnowledgeId}`);
|
|
3769
4493
|
sendSuccess(res, null, 204);
|
|
3770
4494
|
} catch (error) {
|
|
3771
|
-
|
|
4495
|
+
logger9.error(`[Document Processor] \u274C Error deleting document ${knowledgeId}:`, error);
|
|
3772
4496
|
sendError(res, 500, "DELETE_ERROR", "Failed to delete document", error.message);
|
|
3773
4497
|
}
|
|
3774
4498
|
}
|
|
3775
4499
|
async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
3776
|
-
|
|
4500
|
+
logger9.debug(`[Document Processor] \u{1F50D} GET request for document: ${req.params.knowledgeId}`);
|
|
3777
4501
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3778
4502
|
if (!service) {
|
|
3779
4503
|
return sendError(
|
|
@@ -3785,11 +4509,11 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
|
3785
4509
|
}
|
|
3786
4510
|
const knowledgeId = req.params.knowledgeId;
|
|
3787
4511
|
if (!knowledgeId || knowledgeId.length < 36) {
|
|
3788
|
-
|
|
4512
|
+
logger9.error(`[Document Processor] \u274C Invalid knowledge ID format: ${knowledgeId}`);
|
|
3789
4513
|
return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
|
|
3790
4514
|
}
|
|
3791
4515
|
try {
|
|
3792
|
-
|
|
4516
|
+
logger9.debug(`[Document Processor] \u{1F50D} Retrieving document: ${knowledgeId}`);
|
|
3793
4517
|
const agentId = req.query.agentId;
|
|
3794
4518
|
const memories = await service.getMemories({
|
|
3795
4519
|
tableName: "documents",
|
|
@@ -3806,19 +4530,19 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
|
3806
4530
|
};
|
|
3807
4531
|
sendSuccess(res, { document: cleanDocument });
|
|
3808
4532
|
} catch (error) {
|
|
3809
|
-
|
|
4533
|
+
logger9.error(`[Document Processor] \u274C Error retrieving document ${knowledgeId}:`, error);
|
|
3810
4534
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve document", error.message);
|
|
3811
4535
|
}
|
|
3812
4536
|
}
|
|
3813
4537
|
async function knowledgePanelHandler(req, res, runtime) {
|
|
3814
4538
|
const agentId = runtime.agentId;
|
|
3815
|
-
|
|
4539
|
+
logger9.debug(`[Document Processor] \u{1F310} Serving knowledge panel for agent ${agentId}`);
|
|
3816
4540
|
try {
|
|
3817
|
-
const currentDir =
|
|
3818
|
-
const frontendPath =
|
|
3819
|
-
|
|
3820
|
-
if (
|
|
3821
|
-
const html = await
|
|
4541
|
+
const currentDir = path4.dirname(new URL(import.meta.url).pathname);
|
|
4542
|
+
const frontendPath = path4.join(currentDir, "../dist/index.html");
|
|
4543
|
+
logger9.debug(`[Document Processor] \u{1F310} Looking for frontend at: ${frontendPath}`);
|
|
4544
|
+
if (fs4.existsSync(frontendPath)) {
|
|
4545
|
+
const html = await fs4.promises.readFile(frontendPath, "utf8");
|
|
3822
4546
|
const injectedHtml = html.replace(
|
|
3823
4547
|
"<head>",
|
|
3824
4548
|
`<head>
|
|
@@ -3834,10 +4558,10 @@ async function knowledgePanelHandler(req, res, runtime) {
|
|
|
3834
4558
|
} else {
|
|
3835
4559
|
let cssFile = "index.css";
|
|
3836
4560
|
let jsFile = "index.js";
|
|
3837
|
-
const manifestPath =
|
|
3838
|
-
if (
|
|
4561
|
+
const manifestPath = path4.join(currentDir, "../dist/manifest.json");
|
|
4562
|
+
if (fs4.existsSync(manifestPath)) {
|
|
3839
4563
|
try {
|
|
3840
|
-
const manifestContent = await
|
|
4564
|
+
const manifestContent = await fs4.promises.readFile(manifestPath, "utf8");
|
|
3841
4565
|
const manifest = JSON.parse(manifestContent);
|
|
3842
4566
|
for (const [key, value] of Object.entries(manifest)) {
|
|
3843
4567
|
if (typeof value === "object" && value !== null) {
|
|
@@ -3850,10 +4574,10 @@ async function knowledgePanelHandler(req, res, runtime) {
|
|
|
3850
4574
|
}
|
|
3851
4575
|
}
|
|
3852
4576
|
} catch (manifestError) {
|
|
3853
|
-
|
|
4577
|
+
logger9.error("[Document Processor] \u274C Error reading manifest:", manifestError);
|
|
3854
4578
|
}
|
|
3855
4579
|
}
|
|
3856
|
-
|
|
4580
|
+
logger9.debug(`[Document Processor] \u{1F310} Using fallback with CSS: ${cssFile}, JS: ${jsFile}`);
|
|
3857
4581
|
const html = `
|
|
3858
4582
|
<!DOCTYPE html>
|
|
3859
4583
|
<html lang="en">
|
|
@@ -3887,14 +4611,14 @@ async function knowledgePanelHandler(req, res, runtime) {
|
|
|
3887
4611
|
res.end(html);
|
|
3888
4612
|
}
|
|
3889
4613
|
} catch (error) {
|
|
3890
|
-
|
|
4614
|
+
logger9.error("[Document Processor] \u274C Error serving frontend:", error);
|
|
3891
4615
|
sendError(res, 500, "FRONTEND_ERROR", "Failed to load knowledge panel", error.message);
|
|
3892
4616
|
}
|
|
3893
4617
|
}
|
|
3894
4618
|
async function frontendAssetHandler(req, res, runtime) {
|
|
3895
4619
|
try {
|
|
3896
|
-
|
|
3897
|
-
const currentDir =
|
|
4620
|
+
logger9.debug(`[Document Processor] \u{1F310} Asset request: ${req.path}`);
|
|
4621
|
+
const currentDir = path4.dirname(new URL(import.meta.url).pathname);
|
|
3898
4622
|
const assetRequestPath = req.path;
|
|
3899
4623
|
const assetsMarker = "/assets/";
|
|
3900
4624
|
const assetsStartIndex = assetRequestPath.indexOf(assetsMarker);
|
|
@@ -3910,10 +4634,10 @@ async function frontendAssetHandler(req, res, runtime) {
|
|
|
3910
4634
|
`Invalid asset name: '${assetName}' from path ${assetRequestPath}`
|
|
3911
4635
|
);
|
|
3912
4636
|
}
|
|
3913
|
-
const assetPath =
|
|
3914
|
-
|
|
3915
|
-
if (
|
|
3916
|
-
const fileStream =
|
|
4637
|
+
const assetPath = path4.join(currentDir, "../dist/assets", assetName);
|
|
4638
|
+
logger9.debug(`[Document Processor] \u{1F310} Serving asset: ${assetPath}`);
|
|
4639
|
+
if (fs4.existsSync(assetPath)) {
|
|
4640
|
+
const fileStream = fs4.createReadStream(assetPath);
|
|
3917
4641
|
let contentType = "application/octet-stream";
|
|
3918
4642
|
if (assetPath.endsWith(".js")) {
|
|
3919
4643
|
contentType = "application/javascript";
|
|
@@ -3926,7 +4650,7 @@ async function frontendAssetHandler(req, res, runtime) {
|
|
|
3926
4650
|
sendError(res, 404, "NOT_FOUND", `Asset not found: ${req.url}`);
|
|
3927
4651
|
}
|
|
3928
4652
|
} catch (error) {
|
|
3929
|
-
|
|
4653
|
+
logger9.error(`[Document Processor] \u274C Error serving asset ${req.url}:`, error);
|
|
3930
4654
|
sendError(res, 500, "ASSET_ERROR", `Failed to load asset ${req.url}`, error.message);
|
|
3931
4655
|
}
|
|
3932
4656
|
}
|
|
@@ -3987,7 +4711,7 @@ async function getKnowledgeChunksHandler(req, res, runtime) {
|
|
|
3987
4711
|
}
|
|
3988
4712
|
});
|
|
3989
4713
|
} catch (error) {
|
|
3990
|
-
|
|
4714
|
+
logger9.error("[Document Processor] \u274C Error retrieving chunks:", error);
|
|
3991
4715
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve knowledge chunks", error.message);
|
|
3992
4716
|
}
|
|
3993
4717
|
}
|
|
@@ -4009,14 +4733,14 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
4009
4733
|
return sendError(res, 400, "INVALID_QUERY", "Search query cannot be empty");
|
|
4010
4734
|
}
|
|
4011
4735
|
if (req.query.threshold && (parsedThreshold < 0 || parsedThreshold > 1)) {
|
|
4012
|
-
|
|
4736
|
+
logger9.debug(
|
|
4013
4737
|
`[Document Processor] \u{1F50D} Threshold value ${parsedThreshold} was clamped to ${matchThreshold}`
|
|
4014
4738
|
);
|
|
4015
4739
|
}
|
|
4016
4740
|
if (req.query.limit && (parsedLimit < 1 || parsedLimit > 100)) {
|
|
4017
|
-
|
|
4741
|
+
logger9.debug(`[Document Processor] \u{1F50D} Limit value ${parsedLimit} was clamped to ${limit}`);
|
|
4018
4742
|
}
|
|
4019
|
-
|
|
4743
|
+
logger9.debug(
|
|
4020
4744
|
`[Document Processor] \u{1F50D} Searching: "${searchText}" (threshold: ${matchThreshold}, limit: ${limit})`
|
|
4021
4745
|
);
|
|
4022
4746
|
const embedding = await runtime.useModel(ModelType4.TEXT_EMBEDDING, {
|
|
@@ -4043,7 +4767,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
4043
4767
|
documentFilename = document.metadata.filename || documentFilename;
|
|
4044
4768
|
}
|
|
4045
4769
|
} catch (e) {
|
|
4046
|
-
|
|
4770
|
+
logger9.debug(`Could not fetch document ${documentId} for fragment`);
|
|
4047
4771
|
}
|
|
4048
4772
|
}
|
|
4049
4773
|
return {
|
|
@@ -4058,7 +4782,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
4058
4782
|
};
|
|
4059
4783
|
})
|
|
4060
4784
|
);
|
|
4061
|
-
|
|
4785
|
+
logger9.info(
|
|
4062
4786
|
`[Document Processor] \u{1F50D} Found ${enhancedResults.length} results for: "${searchText}"`
|
|
4063
4787
|
);
|
|
4064
4788
|
sendSuccess(res, {
|
|
@@ -4068,7 +4792,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
4068
4792
|
count: enhancedResults.length
|
|
4069
4793
|
});
|
|
4070
4794
|
} catch (error) {
|
|
4071
|
-
|
|
4795
|
+
logger9.error("[Document Processor] \u274C Error searching knowledge:", error);
|
|
4072
4796
|
sendError(res, 500, "SEARCH_ERROR", "Failed to search knowledge", error.message);
|
|
4073
4797
|
}
|
|
4074
4798
|
}
|
|
@@ -4080,7 +4804,7 @@ async function uploadKnowledgeWithMulter(req, res, runtime) {
|
|
|
4080
4804
|
);
|
|
4081
4805
|
uploadArray(req, res, (err) => {
|
|
4082
4806
|
if (err) {
|
|
4083
|
-
|
|
4807
|
+
logger9.error("[Document Processor] \u274C File upload error:", err);
|
|
4084
4808
|
return sendError(res, 400, "UPLOAD_ERROR", err.message);
|
|
4085
4809
|
}
|
|
4086
4810
|
uploadKnowledgeHandler(req, res, runtime);
|
|
@@ -4135,97 +4859,6 @@ var knowledgeRoutes = [
|
|
|
4135
4859
|
var knowledgePlugin = {
|
|
4136
4860
|
name: "knowledge",
|
|
4137
4861
|
description: "Plugin for Retrieval Augmented Generation, including knowledge management and embedding.",
|
|
4138
|
-
config: {
|
|
4139
|
-
// Token limits - these will be read from runtime settings during init
|
|
4140
|
-
MAX_INPUT_TOKENS: "4000",
|
|
4141
|
-
MAX_OUTPUT_TOKENS: "4096",
|
|
4142
|
-
// Contextual Knowledge settings
|
|
4143
|
-
CTX_KNOWLEDGE_ENABLED: "false"
|
|
4144
|
-
},
|
|
4145
|
-
async init(config, runtime) {
|
|
4146
|
-
logger8.info("Initializing Knowledge Plugin...");
|
|
4147
|
-
try {
|
|
4148
|
-
logger8.info("Validating model configuration for Knowledge plugin...");
|
|
4149
|
-
logger8.info(`[Knowledge Plugin] INIT DEBUG:`);
|
|
4150
|
-
logger8.info(`[Knowledge Plugin] - Runtime available: ${!!runtime}`);
|
|
4151
|
-
logger8.info(
|
|
4152
|
-
`[Knowledge Plugin] - process.env.CTX_KNOWLEDGE_ENABLED: '${process.env.CTX_KNOWLEDGE_ENABLED}'`
|
|
4153
|
-
);
|
|
4154
|
-
logger8.info(
|
|
4155
|
-
`[Knowledge Plugin] - config.CTX_KNOWLEDGE_ENABLED: '${config.CTX_KNOWLEDGE_ENABLED}'`
|
|
4156
|
-
);
|
|
4157
|
-
if (runtime) {
|
|
4158
|
-
logger8.info(
|
|
4159
|
-
`[Knowledge Plugin] - runtime.getSetting('CTX_KNOWLEDGE_ENABLED'): '${runtime.getSetting("CTX_KNOWLEDGE_ENABLED")}'`
|
|
4160
|
-
);
|
|
4161
|
-
}
|
|
4162
|
-
const validatedConfig = validateModelConfig(runtime);
|
|
4163
|
-
const ctxEnabledFromEnv = process.env.CTX_KNOWLEDGE_ENABLED === "true" || process.env.CTX_KNOWLEDGE_ENABLED === "True";
|
|
4164
|
-
const ctxEnabledFromConfig = config.CTX_KNOWLEDGE_ENABLED === "true" || config.CTX_KNOWLEDGE_ENABLED === "True";
|
|
4165
|
-
const ctxEnabledFromValidated = validatedConfig.CTX_KNOWLEDGE_ENABLED;
|
|
4166
|
-
const ctxEnabledFromRuntime = runtime ? runtime.getSetting("CTX_KNOWLEDGE_ENABLED") === "true" || runtime.getSetting("CTX_KNOWLEDGE_ENABLED") === "True" : false;
|
|
4167
|
-
const finalCtxEnabled = ctxEnabledFromEnv || ctxEnabledFromConfig || ctxEnabledFromValidated || ctxEnabledFromRuntime;
|
|
4168
|
-
logger8.info(`[Knowledge Plugin] CTX_KNOWLEDGE_ENABLED sources:`);
|
|
4169
|
-
logger8.info(`[Knowledge Plugin] - From env: ${ctxEnabledFromEnv}`);
|
|
4170
|
-
logger8.info(`[Knowledge Plugin] - From config: ${ctxEnabledFromConfig}`);
|
|
4171
|
-
logger8.info(`[Knowledge Plugin] - From validated: ${ctxEnabledFromValidated}`);
|
|
4172
|
-
logger8.info(`[Knowledge Plugin] - From runtime: ${ctxEnabledFromRuntime}`);
|
|
4173
|
-
logger8.info(`[Knowledge Plugin] - FINAL RESULT: ${finalCtxEnabled}`);
|
|
4174
|
-
if (finalCtxEnabled) {
|
|
4175
|
-
logger8.info("Running in Contextual Knowledge mode with text generation capabilities.");
|
|
4176
|
-
logger8.info(
|
|
4177
|
-
`Using ${validatedConfig.EMBEDDING_PROVIDER || "auto-detected"} for embeddings and ${validatedConfig.TEXT_PROVIDER || process.env.TEXT_PROVIDER} for text generation.`
|
|
4178
|
-
);
|
|
4179
|
-
logger8.info(`Text model: ${validatedConfig.TEXT_MODEL || process.env.TEXT_MODEL}`);
|
|
4180
|
-
} else {
|
|
4181
|
-
const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
|
|
4182
|
-
logger8.warn(
|
|
4183
|
-
"Running in Basic Embedding mode - documents will NOT be enriched with context!"
|
|
4184
|
-
);
|
|
4185
|
-
logger8.info("To enable contextual enrichment:");
|
|
4186
|
-
logger8.info(" - Set CTX_KNOWLEDGE_ENABLED=true");
|
|
4187
|
-
logger8.info(" - Configure TEXT_PROVIDER (anthropic/openai/openrouter/google)");
|
|
4188
|
-
logger8.info(" - Configure TEXT_MODEL and API key");
|
|
4189
|
-
if (usingPluginOpenAI) {
|
|
4190
|
-
logger8.info("Using auto-detected configuration from plugin-openai for embeddings.");
|
|
4191
|
-
} else {
|
|
4192
|
-
logger8.info(
|
|
4193
|
-
`Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}.`
|
|
4194
|
-
);
|
|
4195
|
-
}
|
|
4196
|
-
}
|
|
4197
|
-
logger8.info("Model configuration validated successfully.");
|
|
4198
|
-
if (runtime) {
|
|
4199
|
-
logger8.info(`Knowledge Plugin initialized for agent: ${runtime.agentId}`);
|
|
4200
|
-
const loadDocsOnStartup = config.LOAD_DOCS_ON_STARTUP === "true" || process.env.LOAD_DOCS_ON_STARTUP === "true";
|
|
4201
|
-
if (loadDocsOnStartup) {
|
|
4202
|
-
logger8.info("LOAD_DOCS_ON_STARTUP is enabled. Scheduling document loading...");
|
|
4203
|
-
setTimeout(async () => {
|
|
4204
|
-
try {
|
|
4205
|
-
const service = runtime.getService(KnowledgeService.serviceType);
|
|
4206
|
-
if (service instanceof KnowledgeService) {
|
|
4207
|
-
const { loadDocsFromPath: loadDocsFromPath2 } = await import("./docs-loader-5H4HRYEE.js");
|
|
4208
|
-
const result = await loadDocsFromPath2(service, runtime.agentId);
|
|
4209
|
-
if (result.successful > 0) {
|
|
4210
|
-
logger8.info(`Loaded ${result.successful} documents from docs folder on startup`);
|
|
4211
|
-
}
|
|
4212
|
-
}
|
|
4213
|
-
} catch (error) {
|
|
4214
|
-
logger8.error("Error loading documents on startup:", error);
|
|
4215
|
-
}
|
|
4216
|
-
}, 5e3);
|
|
4217
|
-
} else {
|
|
4218
|
-
logger8.info("LOAD_DOCS_ON_STARTUP is not enabled. Skipping automatic document loading.");
|
|
4219
|
-
}
|
|
4220
|
-
}
|
|
4221
|
-
logger8.info(
|
|
4222
|
-
"Knowledge Plugin initialized. Frontend panel should be discoverable via its public route."
|
|
4223
|
-
);
|
|
4224
|
-
} catch (error) {
|
|
4225
|
-
logger8.error("Failed to initialize Knowledge plugin:", error);
|
|
4226
|
-
throw error;
|
|
4227
|
-
}
|
|
4228
|
-
},
|
|
4229
4862
|
services: [KnowledgeService],
|
|
4230
4863
|
providers: [knowledgeProvider],
|
|
4231
4864
|
routes: knowledgeRoutes,
|