@elizaos/plugin-knowledge 1.0.11 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.vite/manifest.json +2 -2
- package/dist/assets/{index-CzI8hR5q.css → index-B5VEkqpw.css} +1 -1
- package/dist/assets/index-DlOvU00u.js +169 -0
- package/dist/index.d.ts +15 -40
- package/dist/index.html +2 -2
- package/dist/index.js +968 -340
- package/dist/index.js.map +1 -1
- package/package.json +20 -22
- package/dist/assets/index-DimDNB3w.js +0 -160
- package/dist/chunk-RFXW7QQK.js +0 -695
- package/dist/chunk-RFXW7QQK.js.map +0 -1
- package/dist/docs-loader-5H4HRYEE.js +0 -9
- package/dist/docs-loader-5H4HRYEE.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,17 +1,147 @@
|
|
|
1
|
+
// src/service.ts
|
|
1
2
|
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
v4_default
|
|
11
|
-
} from "./chunk-RFXW7QQK.js";
|
|
3
|
+
createUniqueUuid,
|
|
4
|
+
logger as logger6,
|
|
5
|
+
MemoryType as MemoryType2,
|
|
6
|
+
ModelType as ModelType2,
|
|
7
|
+
Semaphore,
|
|
8
|
+
Service,
|
|
9
|
+
splitChunks as splitChunks2
|
|
10
|
+
} from "@elizaos/core";
|
|
12
11
|
|
|
13
|
-
// src/
|
|
14
|
-
import {
|
|
12
|
+
// src/document-processor.ts
|
|
13
|
+
import {
|
|
14
|
+
MemoryType,
|
|
15
|
+
ModelType,
|
|
16
|
+
logger as logger4,
|
|
17
|
+
splitChunks
|
|
18
|
+
} from "@elizaos/core";
|
|
19
|
+
|
|
20
|
+
// node_modules/uuid/dist/esm/regex.js
|
|
21
|
+
var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i;
|
|
22
|
+
|
|
23
|
+
// node_modules/uuid/dist/esm/validate.js
|
|
24
|
+
function validate(uuid) {
|
|
25
|
+
return typeof uuid === "string" && regex_default.test(uuid);
|
|
26
|
+
}
|
|
27
|
+
var validate_default = validate;
|
|
28
|
+
|
|
29
|
+
// node_modules/uuid/dist/esm/parse.js
|
|
30
|
+
function parse(uuid) {
|
|
31
|
+
if (!validate_default(uuid)) {
|
|
32
|
+
throw TypeError("Invalid UUID");
|
|
33
|
+
}
|
|
34
|
+
let v;
|
|
35
|
+
return Uint8Array.of((v = parseInt(uuid.slice(0, 8), 16)) >>> 24, v >>> 16 & 255, v >>> 8 & 255, v & 255, (v = parseInt(uuid.slice(9, 13), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(14, 18), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(19, 23), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(24, 36), 16)) / 1099511627776 & 255, v / 4294967296 & 255, v >>> 24 & 255, v >>> 16 & 255, v >>> 8 & 255, v & 255);
|
|
36
|
+
}
|
|
37
|
+
var parse_default = parse;
|
|
38
|
+
|
|
39
|
+
// node_modules/uuid/dist/esm/stringify.js
|
|
40
|
+
var byteToHex = [];
|
|
41
|
+
for (let i = 0; i < 256; ++i) {
|
|
42
|
+
byteToHex.push((i + 256).toString(16).slice(1));
|
|
43
|
+
}
|
|
44
|
+
function unsafeStringify(arr, offset = 0) {
|
|
45
|
+
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// node_modules/uuid/dist/esm/rng.js
|
|
49
|
+
import { randomFillSync } from "crypto";
|
|
50
|
+
var rnds8Pool = new Uint8Array(256);
|
|
51
|
+
var poolPtr = rnds8Pool.length;
|
|
52
|
+
function rng() {
|
|
53
|
+
if (poolPtr > rnds8Pool.length - 16) {
|
|
54
|
+
randomFillSync(rnds8Pool);
|
|
55
|
+
poolPtr = 0;
|
|
56
|
+
}
|
|
57
|
+
return rnds8Pool.slice(poolPtr, poolPtr += 16);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// node_modules/uuid/dist/esm/v35.js
|
|
61
|
+
function stringToBytes(str) {
|
|
62
|
+
str = unescape(encodeURIComponent(str));
|
|
63
|
+
const bytes = new Uint8Array(str.length);
|
|
64
|
+
for (let i = 0; i < str.length; ++i) {
|
|
65
|
+
bytes[i] = str.charCodeAt(i);
|
|
66
|
+
}
|
|
67
|
+
return bytes;
|
|
68
|
+
}
|
|
69
|
+
var DNS = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
|
|
70
|
+
var URL2 = "6ba7b811-9dad-11d1-80b4-00c04fd430c8";
|
|
71
|
+
function v35(version, hash, value, namespace, buf, offset) {
|
|
72
|
+
const valueBytes = typeof value === "string" ? stringToBytes(value) : value;
|
|
73
|
+
const namespaceBytes = typeof namespace === "string" ? parse_default(namespace) : namespace;
|
|
74
|
+
if (typeof namespace === "string") {
|
|
75
|
+
namespace = parse_default(namespace);
|
|
76
|
+
}
|
|
77
|
+
if (namespace?.length !== 16) {
|
|
78
|
+
throw TypeError("Namespace must be array-like (16 iterable integer values, 0-255)");
|
|
79
|
+
}
|
|
80
|
+
let bytes = new Uint8Array(16 + valueBytes.length);
|
|
81
|
+
bytes.set(namespaceBytes);
|
|
82
|
+
bytes.set(valueBytes, namespaceBytes.length);
|
|
83
|
+
bytes = hash(bytes);
|
|
84
|
+
bytes[6] = bytes[6] & 15 | version;
|
|
85
|
+
bytes[8] = bytes[8] & 63 | 128;
|
|
86
|
+
if (buf) {
|
|
87
|
+
offset = offset || 0;
|
|
88
|
+
for (let i = 0; i < 16; ++i) {
|
|
89
|
+
buf[offset + i] = bytes[i];
|
|
90
|
+
}
|
|
91
|
+
return buf;
|
|
92
|
+
}
|
|
93
|
+
return unsafeStringify(bytes);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// node_modules/uuid/dist/esm/native.js
|
|
97
|
+
import { randomUUID } from "crypto";
|
|
98
|
+
var native_default = { randomUUID };
|
|
99
|
+
|
|
100
|
+
// node_modules/uuid/dist/esm/v4.js
|
|
101
|
+
function v4(options, buf, offset) {
|
|
102
|
+
if (native_default.randomUUID && !buf && !options) {
|
|
103
|
+
return native_default.randomUUID();
|
|
104
|
+
}
|
|
105
|
+
options = options || {};
|
|
106
|
+
const rnds = options.random ?? options.rng?.() ?? rng();
|
|
107
|
+
if (rnds.length < 16) {
|
|
108
|
+
throw new Error("Random bytes length must be >= 16");
|
|
109
|
+
}
|
|
110
|
+
rnds[6] = rnds[6] & 15 | 64;
|
|
111
|
+
rnds[8] = rnds[8] & 63 | 128;
|
|
112
|
+
if (buf) {
|
|
113
|
+
offset = offset || 0;
|
|
114
|
+
if (offset < 0 || offset + 16 > buf.length) {
|
|
115
|
+
throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
|
|
116
|
+
}
|
|
117
|
+
for (let i = 0; i < 16; ++i) {
|
|
118
|
+
buf[offset + i] = rnds[i];
|
|
119
|
+
}
|
|
120
|
+
return buf;
|
|
121
|
+
}
|
|
122
|
+
return unsafeStringify(rnds);
|
|
123
|
+
}
|
|
124
|
+
var v4_default = v4;
|
|
125
|
+
|
|
126
|
+
// node_modules/uuid/dist/esm/sha1.js
|
|
127
|
+
import { createHash } from "crypto";
|
|
128
|
+
function sha1(bytes) {
|
|
129
|
+
if (Array.isArray(bytes)) {
|
|
130
|
+
bytes = Buffer.from(bytes);
|
|
131
|
+
} else if (typeof bytes === "string") {
|
|
132
|
+
bytes = Buffer.from(bytes, "utf8");
|
|
133
|
+
}
|
|
134
|
+
return createHash("sha1").update(bytes).digest();
|
|
135
|
+
}
|
|
136
|
+
var sha1_default = sha1;
|
|
137
|
+
|
|
138
|
+
// node_modules/uuid/dist/esm/v5.js
|
|
139
|
+
function v5(value, namespace, buf, offset) {
|
|
140
|
+
return v35(80, sha1_default, value, namespace, buf, offset);
|
|
141
|
+
}
|
|
142
|
+
v5.DNS = DNS;
|
|
143
|
+
v5.URL = URL2;
|
|
144
|
+
var v5_default = v5;
|
|
15
145
|
|
|
16
146
|
// src/types.ts
|
|
17
147
|
import z from "zod";
|
|
@@ -42,6 +172,8 @@ var ModelConfigSchema = z.object({
|
|
|
42
172
|
// For OpenAI: Only applies to text-embedding-3-small and text-embedding-3-large models
|
|
43
173
|
// Default: 1536 dimensions
|
|
44
174
|
EMBEDDING_DIMENSION: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 1536),
|
|
175
|
+
// config setting
|
|
176
|
+
LOAD_DOCS_ON_STARTUP: z.boolean().default(false),
|
|
45
177
|
// Contextual Knowledge settings
|
|
46
178
|
CTX_KNOWLEDGE_ENABLED: z.boolean().default(false)
|
|
47
179
|
});
|
|
@@ -52,19 +184,22 @@ var KnowledgeServiceType = {
|
|
|
52
184
|
// src/config.ts
|
|
53
185
|
import z2 from "zod";
|
|
54
186
|
import { logger } from "@elizaos/core";
|
|
187
|
+
var parseBooleanEnv = (value) => {
|
|
188
|
+
if (typeof value === "boolean") return value;
|
|
189
|
+
if (typeof value === "string") return value.toLowerCase() === "true";
|
|
190
|
+
return false;
|
|
191
|
+
};
|
|
55
192
|
function validateModelConfig(runtime) {
|
|
56
193
|
try {
|
|
57
194
|
const getSetting = (key, defaultValue) => {
|
|
58
195
|
if (runtime) {
|
|
59
|
-
return runtime.getSetting(key) || defaultValue;
|
|
196
|
+
return runtime.getSetting(key) || process.env[key] || defaultValue;
|
|
60
197
|
}
|
|
61
198
|
return process.env[key] || defaultValue;
|
|
62
199
|
};
|
|
63
|
-
const
|
|
64
|
-
const cleanSetting = ctxKnowledgeEnabledSetting?.toString().trim().toLowerCase();
|
|
65
|
-
const ctxKnowledgeEnabled = cleanSetting === "true";
|
|
200
|
+
const ctxKnowledgeEnabled = parseBooleanEnv(getSetting("CTX_KNOWLEDGE_ENABLED", "false"));
|
|
66
201
|
logger.debug(
|
|
67
|
-
`[Document Processor] CTX_KNOWLEDGE_ENABLED: '${
|
|
202
|
+
`[Document Processor] CTX_KNOWLEDGE_ENABLED: '${ctxKnowledgeEnabled} (runtime: ${!!runtime})`
|
|
68
203
|
);
|
|
69
204
|
const embeddingProvider = getSetting("EMBEDDING_PROVIDER");
|
|
70
205
|
const assumePluginOpenAI = !embeddingProvider;
|
|
@@ -101,6 +236,7 @@ function validateModelConfig(runtime) {
|
|
|
101
236
|
MAX_INPUT_TOKENS: getSetting("MAX_INPUT_TOKENS", "4000"),
|
|
102
237
|
MAX_OUTPUT_TOKENS: getSetting("MAX_OUTPUT_TOKENS", "4096"),
|
|
103
238
|
EMBEDDING_DIMENSION: embeddingDimension,
|
|
239
|
+
LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")),
|
|
104
240
|
CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled
|
|
105
241
|
});
|
|
106
242
|
validateConfigRequirements(config, assumePluginOpenAI);
|
|
@@ -212,25 +348,6 @@ async function getProviderRateLimits(runtime) {
|
|
|
212
348
|
}
|
|
213
349
|
}
|
|
214
350
|
|
|
215
|
-
// src/service.ts
|
|
216
|
-
import {
|
|
217
|
-
createUniqueUuid,
|
|
218
|
-
logger as logger4,
|
|
219
|
-
MemoryType as MemoryType2,
|
|
220
|
-
ModelType as ModelType2,
|
|
221
|
-
Semaphore,
|
|
222
|
-
Service,
|
|
223
|
-
splitChunks as splitChunks2
|
|
224
|
-
} from "@elizaos/core";
|
|
225
|
-
|
|
226
|
-
// src/document-processor.ts
|
|
227
|
-
import {
|
|
228
|
-
MemoryType,
|
|
229
|
-
ModelType,
|
|
230
|
-
logger as logger3,
|
|
231
|
-
splitChunks
|
|
232
|
-
} from "@elizaos/core";
|
|
233
|
-
|
|
234
351
|
// src/ctx-embeddings.ts
|
|
235
352
|
var DEFAULT_CHUNK_TOKEN_SIZE = 500;
|
|
236
353
|
var DEFAULT_CHUNK_OVERLAP_TOKENS = 100;
|
|
@@ -646,8 +763,8 @@ import { createAnthropic } from "@ai-sdk/anthropic";
|
|
|
646
763
|
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
|
|
647
764
|
import { google } from "@ai-sdk/google";
|
|
648
765
|
import { logger as logger2 } from "@elizaos/core";
|
|
649
|
-
async function generateText(prompt, system, overrideConfig) {
|
|
650
|
-
const config = validateModelConfig();
|
|
766
|
+
async function generateText(runtime, prompt, system, overrideConfig) {
|
|
767
|
+
const config = validateModelConfig(runtime);
|
|
651
768
|
const provider = overrideConfig?.provider || config.TEXT_PROVIDER;
|
|
652
769
|
const modelName = overrideConfig?.modelName || config.TEXT_MODEL;
|
|
653
770
|
const maxTokens = overrideConfig?.maxTokens || config.MAX_OUTPUT_TOKENS;
|
|
@@ -655,11 +772,12 @@ async function generateText(prompt, system, overrideConfig) {
|
|
|
655
772
|
try {
|
|
656
773
|
switch (provider) {
|
|
657
774
|
case "anthropic":
|
|
658
|
-
return await generateAnthropicText(prompt, system, modelName, maxTokens);
|
|
775
|
+
return await generateAnthropicText(config, prompt, system, modelName, maxTokens);
|
|
659
776
|
case "openai":
|
|
660
|
-
return await generateOpenAIText(prompt, system, modelName, maxTokens);
|
|
777
|
+
return await generateOpenAIText(config, prompt, system, modelName, maxTokens);
|
|
661
778
|
case "openrouter":
|
|
662
779
|
return await generateOpenRouterText(
|
|
780
|
+
config,
|
|
663
781
|
prompt,
|
|
664
782
|
system,
|
|
665
783
|
modelName,
|
|
@@ -678,8 +796,7 @@ async function generateText(prompt, system, overrideConfig) {
|
|
|
678
796
|
throw error;
|
|
679
797
|
}
|
|
680
798
|
}
|
|
681
|
-
async function generateAnthropicText(prompt, system, modelName, maxTokens) {
|
|
682
|
-
const config = validateModelConfig();
|
|
799
|
+
async function generateAnthropicText(config, prompt, system, modelName, maxTokens) {
|
|
683
800
|
const anthropic = createAnthropic({
|
|
684
801
|
apiKey: config.ANTHROPIC_API_KEY,
|
|
685
802
|
baseURL: config.ANTHROPIC_BASE_URL
|
|
@@ -707,7 +824,7 @@ async function generateAnthropicText(prompt, system, modelName, maxTokens) {
|
|
|
707
824
|
logger2.warn(
|
|
708
825
|
`[Document Processor] Rate limit hit (${modelName}): attempt ${attempt + 1}/${maxRetries}, retrying in ${Math.round(delay / 1e3)}s`
|
|
709
826
|
);
|
|
710
|
-
await new Promise((
|
|
827
|
+
await new Promise((resolve2) => setTimeout(resolve2, delay));
|
|
711
828
|
continue;
|
|
712
829
|
}
|
|
713
830
|
throw error;
|
|
@@ -715,8 +832,7 @@ async function generateAnthropicText(prompt, system, modelName, maxTokens) {
|
|
|
715
832
|
}
|
|
716
833
|
throw new Error("Max retries exceeded for Anthropic text generation");
|
|
717
834
|
}
|
|
718
|
-
async function generateOpenAIText(prompt, system, modelName, maxTokens) {
|
|
719
|
-
const config = validateModelConfig();
|
|
835
|
+
async function generateOpenAIText(config, prompt, system, modelName, maxTokens) {
|
|
720
836
|
const openai = createOpenAI({
|
|
721
837
|
apiKey: config.OPENAI_API_KEY,
|
|
722
838
|
baseURL: config.OPENAI_BASE_URL
|
|
@@ -754,8 +870,7 @@ async function generateGoogleText(prompt, system, modelName, maxTokens, config)
|
|
|
754
870
|
);
|
|
755
871
|
return result;
|
|
756
872
|
}
|
|
757
|
-
async function generateOpenRouterText(prompt, system, modelName, maxTokens, cacheDocument, cacheOptions, autoCacheContextualRetrieval = true) {
|
|
758
|
-
const config = validateModelConfig();
|
|
873
|
+
async function generateOpenRouterText(config, prompt, system, modelName, maxTokens, cacheDocument, cacheOptions, autoCacheContextualRetrieval = true) {
|
|
759
874
|
const openrouter = createOpenRouter({
|
|
760
875
|
apiKey: config.OPENROUTER_API_KEY,
|
|
761
876
|
baseURL: config.OPENROUTER_BASE_URL
|
|
@@ -962,6 +1077,359 @@ function logCacheMetrics(result) {
|
|
|
962
1077
|
}
|
|
963
1078
|
}
|
|
964
1079
|
|
|
1080
|
+
// src/utils.ts
|
|
1081
|
+
import { Buffer as Buffer2 } from "buffer";
|
|
1082
|
+
import * as mammoth from "mammoth";
|
|
1083
|
+
import { logger as logger3 } from "@elizaos/core";
|
|
1084
|
+
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
1085
|
+
import { createHash as createHash2 } from "crypto";
|
|
1086
|
+
var PLAIN_TEXT_CONTENT_TYPES = [
|
|
1087
|
+
"application/typescript",
|
|
1088
|
+
"text/typescript",
|
|
1089
|
+
"text/x-python",
|
|
1090
|
+
"application/x-python-code",
|
|
1091
|
+
"application/yaml",
|
|
1092
|
+
"text/yaml",
|
|
1093
|
+
"application/x-yaml",
|
|
1094
|
+
"application/json",
|
|
1095
|
+
"text/markdown",
|
|
1096
|
+
"text/csv"
|
|
1097
|
+
];
|
|
1098
|
+
var MAX_FALLBACK_SIZE_BYTES = 5 * 1024 * 1024;
|
|
1099
|
+
var BINARY_CHECK_BYTES = 1024;
|
|
1100
|
+
async function extractTextFromFileBuffer(fileBuffer, contentType, originalFilename) {
|
|
1101
|
+
const lowerContentType = contentType.toLowerCase();
|
|
1102
|
+
logger3.debug(
|
|
1103
|
+
`[TextUtil] Attempting to extract text from ${originalFilename} (type: ${contentType})`
|
|
1104
|
+
);
|
|
1105
|
+
if (lowerContentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
|
|
1106
|
+
logger3.debug(`[TextUtil] Extracting text from DOCX ${originalFilename} via mammoth.`);
|
|
1107
|
+
try {
|
|
1108
|
+
const result = await mammoth.extractRawText({ buffer: fileBuffer });
|
|
1109
|
+
logger3.debug(
|
|
1110
|
+
`[TextUtil] DOCX text extraction complete for ${originalFilename}. Text length: ${result.value.length}`
|
|
1111
|
+
);
|
|
1112
|
+
return result.value;
|
|
1113
|
+
} catch (docxError) {
|
|
1114
|
+
const errorMsg = `[TextUtil] Failed to parse DOCX file ${originalFilename}: ${docxError.message}`;
|
|
1115
|
+
logger3.error(errorMsg, docxError.stack);
|
|
1116
|
+
throw new Error(errorMsg);
|
|
1117
|
+
}
|
|
1118
|
+
} else if (lowerContentType === "application/msword" || originalFilename.toLowerCase().endsWith(".doc")) {
|
|
1119
|
+
logger3.debug(`[TextUtil] Handling Microsoft Word .doc file: ${originalFilename}`);
|
|
1120
|
+
return `[Microsoft Word Document: ${originalFilename}]
|
|
1121
|
+
|
|
1122
|
+
This document was indexed for search but cannot be displayed directly in the browser. The original document content is preserved for retrieval purposes.`;
|
|
1123
|
+
} else if (lowerContentType.startsWith("text/") || PLAIN_TEXT_CONTENT_TYPES.includes(lowerContentType)) {
|
|
1124
|
+
logger3.debug(
|
|
1125
|
+
`[TextUtil] Extracting text from plain text compatible file ${originalFilename} (type: ${contentType})`
|
|
1126
|
+
);
|
|
1127
|
+
return fileBuffer.toString("utf-8");
|
|
1128
|
+
} else {
|
|
1129
|
+
logger3.warn(
|
|
1130
|
+
`[TextUtil] Unsupported content type: "${contentType}" for ${originalFilename}. Attempting fallback to plain text.`
|
|
1131
|
+
);
|
|
1132
|
+
if (fileBuffer.length > MAX_FALLBACK_SIZE_BYTES) {
|
|
1133
|
+
const sizeErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) exceeds maximum size for fallback (${MAX_FALLBACK_SIZE_BYTES} bytes). Cannot process as plain text.`;
|
|
1134
|
+
logger3.error(sizeErrorMsg);
|
|
1135
|
+
throw new Error(sizeErrorMsg);
|
|
1136
|
+
}
|
|
1137
|
+
const initialBytes = fileBuffer.subarray(0, Math.min(fileBuffer.length, BINARY_CHECK_BYTES));
|
|
1138
|
+
if (initialBytes.includes(0)) {
|
|
1139
|
+
const binaryHeuristicMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) appears to be binary based on initial byte check. Cannot process as plain text.`;
|
|
1140
|
+
logger3.error(binaryHeuristicMsg);
|
|
1141
|
+
throw new Error(binaryHeuristicMsg);
|
|
1142
|
+
}
|
|
1143
|
+
try {
|
|
1144
|
+
const textContent = fileBuffer.toString("utf-8");
|
|
1145
|
+
if (textContent.includes("\uFFFD")) {
|
|
1146
|
+
const binaryErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) seems to be binary or has encoding issues after fallback to plain text (detected \uFFFD).`;
|
|
1147
|
+
logger3.error(binaryErrorMsg);
|
|
1148
|
+
throw new Error(binaryErrorMsg);
|
|
1149
|
+
}
|
|
1150
|
+
logger3.debug(
|
|
1151
|
+
`[TextUtil] Successfully processed unknown type ${contentType} as plain text after fallback for ${originalFilename}.`
|
|
1152
|
+
);
|
|
1153
|
+
return textContent;
|
|
1154
|
+
} catch (fallbackError) {
|
|
1155
|
+
const finalErrorMsg = `[TextUtil] Unsupported content type: ${contentType} for ${originalFilename}. Fallback to plain text also failed or indicated binary content.`;
|
|
1156
|
+
logger3.error(finalErrorMsg, fallbackError.message ? fallbackError.stack : void 0);
|
|
1157
|
+
throw new Error(finalErrorMsg);
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
async function convertPdfToTextFromBuffer(pdfBuffer, filename) {
|
|
1162
|
+
const docName = filename || "unnamed-document";
|
|
1163
|
+
logger3.debug(`[PdfService] Starting conversion for ${docName}`);
|
|
1164
|
+
try {
|
|
1165
|
+
const uint8Array = new Uint8Array(pdfBuffer);
|
|
1166
|
+
const pdf = await getDocument({ data: uint8Array }).promise;
|
|
1167
|
+
const numPages = pdf.numPages;
|
|
1168
|
+
const textPages = [];
|
|
1169
|
+
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
|
1170
|
+
logger3.debug(`[PdfService] Processing page ${pageNum}/${numPages}`);
|
|
1171
|
+
const page = await pdf.getPage(pageNum);
|
|
1172
|
+
const textContent = await page.getTextContent();
|
|
1173
|
+
const lineMap = /* @__PURE__ */ new Map();
|
|
1174
|
+
textContent.items.filter(isTextItem).forEach((item) => {
|
|
1175
|
+
const yPos = Math.round(item.transform[5]);
|
|
1176
|
+
if (!lineMap.has(yPos)) {
|
|
1177
|
+
lineMap.set(yPos, []);
|
|
1178
|
+
}
|
|
1179
|
+
lineMap.get(yPos).push(item);
|
|
1180
|
+
});
|
|
1181
|
+
const sortedLines = Array.from(lineMap.entries()).sort((a, b) => b[0] - a[0]).map(
|
|
1182
|
+
([_, items]) => items.sort((a, b) => a.transform[4] - b.transform[4]).map((item) => item.str).join(" ")
|
|
1183
|
+
);
|
|
1184
|
+
textPages.push(sortedLines.join("\n"));
|
|
1185
|
+
}
|
|
1186
|
+
const fullText = textPages.join("\n\n").replace(/\s+/g, " ").trim();
|
|
1187
|
+
logger3.debug(`[PdfService] Conversion complete for ${docName}, length: ${fullText.length}`);
|
|
1188
|
+
return fullText;
|
|
1189
|
+
} catch (error) {
|
|
1190
|
+
logger3.error(`[PdfService] Error converting PDF ${docName}:`, error.message);
|
|
1191
|
+
throw new Error(`Failed to convert PDF to text: ${error.message}`);
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
function isBinaryContentType(contentType, filename) {
|
|
1195
|
+
const textContentTypes = [
|
|
1196
|
+
"text/",
|
|
1197
|
+
"application/json",
|
|
1198
|
+
"application/xml",
|
|
1199
|
+
"application/javascript",
|
|
1200
|
+
"application/typescript",
|
|
1201
|
+
"application/x-yaml",
|
|
1202
|
+
"application/x-sh"
|
|
1203
|
+
];
|
|
1204
|
+
const isTextMimeType = textContentTypes.some((type) => contentType.includes(type));
|
|
1205
|
+
if (isTextMimeType) {
|
|
1206
|
+
return false;
|
|
1207
|
+
}
|
|
1208
|
+
const binaryContentTypes = [
|
|
1209
|
+
"application/pdf",
|
|
1210
|
+
"application/msword",
|
|
1211
|
+
"application/vnd.openxmlformats-officedocument",
|
|
1212
|
+
"application/vnd.ms-excel",
|
|
1213
|
+
"application/vnd.ms-powerpoint",
|
|
1214
|
+
"application/zip",
|
|
1215
|
+
"application/x-zip-compressed",
|
|
1216
|
+
"application/octet-stream",
|
|
1217
|
+
"image/",
|
|
1218
|
+
"audio/",
|
|
1219
|
+
"video/"
|
|
1220
|
+
];
|
|
1221
|
+
const isBinaryMimeType = binaryContentTypes.some((type) => contentType.includes(type));
|
|
1222
|
+
if (isBinaryMimeType) {
|
|
1223
|
+
return true;
|
|
1224
|
+
}
|
|
1225
|
+
const fileExt = filename.split(".").pop()?.toLowerCase() || "";
|
|
1226
|
+
const textExtensions = [
|
|
1227
|
+
"txt",
|
|
1228
|
+
"md",
|
|
1229
|
+
"markdown",
|
|
1230
|
+
"json",
|
|
1231
|
+
"xml",
|
|
1232
|
+
"html",
|
|
1233
|
+
"htm",
|
|
1234
|
+
"css",
|
|
1235
|
+
"js",
|
|
1236
|
+
"ts",
|
|
1237
|
+
"jsx",
|
|
1238
|
+
"tsx",
|
|
1239
|
+
"yaml",
|
|
1240
|
+
"yml",
|
|
1241
|
+
"toml",
|
|
1242
|
+
"ini",
|
|
1243
|
+
"cfg",
|
|
1244
|
+
"conf",
|
|
1245
|
+
"sh",
|
|
1246
|
+
"bash",
|
|
1247
|
+
"zsh",
|
|
1248
|
+
"fish",
|
|
1249
|
+
"py",
|
|
1250
|
+
"rb",
|
|
1251
|
+
"go",
|
|
1252
|
+
"rs",
|
|
1253
|
+
"java",
|
|
1254
|
+
"c",
|
|
1255
|
+
"cpp",
|
|
1256
|
+
"h",
|
|
1257
|
+
"hpp",
|
|
1258
|
+
"cs",
|
|
1259
|
+
"php",
|
|
1260
|
+
"sql",
|
|
1261
|
+
"r",
|
|
1262
|
+
"swift",
|
|
1263
|
+
"kt",
|
|
1264
|
+
"scala",
|
|
1265
|
+
"clj",
|
|
1266
|
+
"ex",
|
|
1267
|
+
"exs",
|
|
1268
|
+
"vim",
|
|
1269
|
+
"env",
|
|
1270
|
+
"gitignore",
|
|
1271
|
+
"dockerignore",
|
|
1272
|
+
"editorconfig",
|
|
1273
|
+
"log",
|
|
1274
|
+
"csv",
|
|
1275
|
+
"tsv",
|
|
1276
|
+
"properties",
|
|
1277
|
+
"gradle",
|
|
1278
|
+
"sbt",
|
|
1279
|
+
"makefile",
|
|
1280
|
+
"dockerfile",
|
|
1281
|
+
"vagrantfile",
|
|
1282
|
+
"gemfile",
|
|
1283
|
+
"rakefile",
|
|
1284
|
+
"podfile",
|
|
1285
|
+
"csproj",
|
|
1286
|
+
"vbproj",
|
|
1287
|
+
"fsproj",
|
|
1288
|
+
"sln",
|
|
1289
|
+
"pom"
|
|
1290
|
+
];
|
|
1291
|
+
if (textExtensions.includes(fileExt)) {
|
|
1292
|
+
return false;
|
|
1293
|
+
}
|
|
1294
|
+
const binaryExtensions = [
|
|
1295
|
+
"pdf",
|
|
1296
|
+
"docx",
|
|
1297
|
+
"doc",
|
|
1298
|
+
"xls",
|
|
1299
|
+
"xlsx",
|
|
1300
|
+
"ppt",
|
|
1301
|
+
"pptx",
|
|
1302
|
+
"zip",
|
|
1303
|
+
"rar",
|
|
1304
|
+
"7z",
|
|
1305
|
+
"tar",
|
|
1306
|
+
"gz",
|
|
1307
|
+
"bz2",
|
|
1308
|
+
"xz",
|
|
1309
|
+
"jpg",
|
|
1310
|
+
"jpeg",
|
|
1311
|
+
"png",
|
|
1312
|
+
"gif",
|
|
1313
|
+
"bmp",
|
|
1314
|
+
"svg",
|
|
1315
|
+
"ico",
|
|
1316
|
+
"webp",
|
|
1317
|
+
"mp3",
|
|
1318
|
+
"mp4",
|
|
1319
|
+
"avi",
|
|
1320
|
+
"mov",
|
|
1321
|
+
"wmv",
|
|
1322
|
+
"flv",
|
|
1323
|
+
"wav",
|
|
1324
|
+
"flac",
|
|
1325
|
+
"ogg",
|
|
1326
|
+
"exe",
|
|
1327
|
+
"dll",
|
|
1328
|
+
"so",
|
|
1329
|
+
"dylib",
|
|
1330
|
+
"bin",
|
|
1331
|
+
"dat",
|
|
1332
|
+
"db",
|
|
1333
|
+
"sqlite"
|
|
1334
|
+
];
|
|
1335
|
+
return binaryExtensions.includes(fileExt);
|
|
1336
|
+
}
|
|
1337
|
+
function isTextItem(item) {
|
|
1338
|
+
return "str" in item;
|
|
1339
|
+
}
|
|
1340
|
+
function normalizeS3Url(url) {
|
|
1341
|
+
try {
|
|
1342
|
+
const urlObj = new URL(url);
|
|
1343
|
+
return `${urlObj.origin}${urlObj.pathname}`;
|
|
1344
|
+
} catch (error) {
|
|
1345
|
+
logger3.warn(`[URL NORMALIZER] Failed to parse URL: ${url}. Returning original.`);
|
|
1346
|
+
return url;
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
async function fetchUrlContent(url) {
|
|
1350
|
+
logger3.debug(`[URL FETCHER] Fetching content from URL: ${url}`);
|
|
1351
|
+
try {
|
|
1352
|
+
const controller = new AbortController();
|
|
1353
|
+
const timeoutId = setTimeout(() => controller.abort(), 3e4);
|
|
1354
|
+
const response = await fetch(url, {
|
|
1355
|
+
signal: controller.signal,
|
|
1356
|
+
headers: {
|
|
1357
|
+
"User-Agent": "Eliza-Knowledge-Plugin/1.0"
|
|
1358
|
+
}
|
|
1359
|
+
});
|
|
1360
|
+
clearTimeout(timeoutId);
|
|
1361
|
+
if (!response.ok) {
|
|
1362
|
+
throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`);
|
|
1363
|
+
}
|
|
1364
|
+
const contentType = response.headers.get("content-type") || "application/octet-stream";
|
|
1365
|
+
logger3.debug(`[URL FETCHER] Content type from server: ${contentType} for URL: ${url}`);
|
|
1366
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
1367
|
+
const buffer = Buffer2.from(arrayBuffer);
|
|
1368
|
+
const base64Content = buffer.toString("base64");
|
|
1369
|
+
logger3.debug(
|
|
1370
|
+
`[URL FETCHER] Successfully fetched content from URL: ${url} (${buffer.length} bytes)`
|
|
1371
|
+
);
|
|
1372
|
+
return {
|
|
1373
|
+
content: base64Content,
|
|
1374
|
+
contentType
|
|
1375
|
+
};
|
|
1376
|
+
} catch (error) {
|
|
1377
|
+
logger3.error(`[URL FETCHER] Error fetching content from URL ${url}: ${error.message}`);
|
|
1378
|
+
throw new Error(`Failed to fetch content from URL: ${error.message}`);
|
|
1379
|
+
}
|
|
1380
|
+
}
|
|
1381
|
+
function looksLikeBase64(content) {
|
|
1382
|
+
if (!content || content.length === 0) return false;
|
|
1383
|
+
const cleanContent = content.replace(/\s/g, "");
|
|
1384
|
+
if (cleanContent.length < 16) return false;
|
|
1385
|
+
if (cleanContent.length % 4 !== 0) return false;
|
|
1386
|
+
const base64Regex = /^[A-Za-z0-9+/]*={0,2}$/;
|
|
1387
|
+
if (!base64Regex.test(cleanContent)) return false;
|
|
1388
|
+
const hasNumbers = /\d/.test(cleanContent);
|
|
1389
|
+
const hasUpperCase = /[A-Z]/.test(cleanContent);
|
|
1390
|
+
const hasLowerCase = /[a-z]/.test(cleanContent);
|
|
1391
|
+
return (hasNumbers || hasUpperCase) && hasLowerCase;
|
|
1392
|
+
}
|
|
1393
|
+
function generateContentBasedId(content, agentId, options) {
|
|
1394
|
+
const {
|
|
1395
|
+
maxChars = 2e3,
|
|
1396
|
+
// Use first 2000 chars by default
|
|
1397
|
+
includeFilename,
|
|
1398
|
+
contentType
|
|
1399
|
+
} = options || {};
|
|
1400
|
+
let contentForHashing;
|
|
1401
|
+
if (looksLikeBase64(content)) {
|
|
1402
|
+
try {
|
|
1403
|
+
const decoded = Buffer2.from(content, "base64").toString("utf8");
|
|
1404
|
+
if (!decoded.includes("\uFFFD") || contentType?.includes("pdf")) {
|
|
1405
|
+
contentForHashing = content.slice(0, maxChars);
|
|
1406
|
+
} else {
|
|
1407
|
+
contentForHashing = decoded.slice(0, maxChars);
|
|
1408
|
+
}
|
|
1409
|
+
} catch {
|
|
1410
|
+
contentForHashing = content.slice(0, maxChars);
|
|
1411
|
+
}
|
|
1412
|
+
} else {
|
|
1413
|
+
contentForHashing = content.slice(0, maxChars);
|
|
1414
|
+
}
|
|
1415
|
+
contentForHashing = contentForHashing.replace(/\r\n/g, "\n").replace(/\r/g, "\n").trim();
|
|
1416
|
+
const componentsToHash = [
|
|
1417
|
+
agentId,
|
|
1418
|
+
// Namespace by agent
|
|
1419
|
+
contentForHashing,
|
|
1420
|
+
// The actual content
|
|
1421
|
+
includeFilename || ""
|
|
1422
|
+
// Optional filename for additional uniqueness
|
|
1423
|
+
].filter(Boolean).join("::");
|
|
1424
|
+
const hash = createHash2("sha256").update(componentsToHash).digest("hex");
|
|
1425
|
+
const DOCUMENT_NAMESPACE = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
|
|
1426
|
+
const uuid = v5_default(hash, DOCUMENT_NAMESPACE);
|
|
1427
|
+
logger3.debug(
|
|
1428
|
+
`[generateContentBasedId] Generated UUID ${uuid} for document with content hash ${hash.slice(0, 8)}...`
|
|
1429
|
+
);
|
|
1430
|
+
return uuid;
|
|
1431
|
+
}
|
|
1432
|
+
|
|
965
1433
|
// src/document-processor.ts
|
|
966
1434
|
function estimateTokens(text) {
|
|
967
1435
|
return Math.ceil(text.length / 4);
|
|
@@ -982,7 +1450,7 @@ function getCtxKnowledgeEnabled(runtime) {
|
|
|
982
1450
|
source = "process.env";
|
|
983
1451
|
}
|
|
984
1452
|
if (process.env.NODE_ENV === "development" && rawValue && !result) {
|
|
985
|
-
|
|
1453
|
+
logger4.debug(`[Document Processor] CTX config mismatch - ${source}: '${rawValue}' \u2192 ${result}`);
|
|
986
1454
|
}
|
|
987
1455
|
return result;
|
|
988
1456
|
}
|
|
@@ -1018,23 +1486,23 @@ async function processFragmentsSynchronously({
|
|
|
1018
1486
|
documentTitle
|
|
1019
1487
|
}) {
|
|
1020
1488
|
if (!fullDocumentText || fullDocumentText.trim() === "") {
|
|
1021
|
-
|
|
1489
|
+
logger4.warn(`No text content available to chunk for document ${documentId}.`);
|
|
1022
1490
|
return 0;
|
|
1023
1491
|
}
|
|
1024
1492
|
const chunks = await splitDocumentIntoChunks(fullDocumentText);
|
|
1025
1493
|
if (chunks.length === 0) {
|
|
1026
|
-
|
|
1494
|
+
logger4.warn(`No chunks generated from text for ${documentId}. No fragments to save.`);
|
|
1027
1495
|
return 0;
|
|
1028
1496
|
}
|
|
1029
1497
|
const docName = documentTitle || documentId.substring(0, 8);
|
|
1030
|
-
|
|
1498
|
+
logger4.info(`[Document Processor] "${docName}": Split into ${chunks.length} chunks`);
|
|
1031
1499
|
const providerLimits = await getProviderRateLimits();
|
|
1032
1500
|
const CONCURRENCY_LIMIT = Math.min(30, providerLimits.maxConcurrentRequests || 30);
|
|
1033
1501
|
const rateLimiter = createRateLimiter(
|
|
1034
1502
|
providerLimits.requestsPerMinute || 60,
|
|
1035
1503
|
providerLimits.tokensPerMinute
|
|
1036
1504
|
);
|
|
1037
|
-
|
|
1505
|
+
logger4.debug(
|
|
1038
1506
|
`[Document Processor] Rate limits: ${providerLimits.requestsPerMinute} RPM, ${providerLimits.tokensPerMinute} TPM (${providerLimits.provider}, concurrency: ${CONCURRENCY_LIMIT})`
|
|
1039
1507
|
);
|
|
1040
1508
|
const { savedCount, failedCount } = await processAndSaveFragments({
|
|
@@ -1053,11 +1521,11 @@ async function processFragmentsSynchronously({
|
|
|
1053
1521
|
});
|
|
1054
1522
|
const successRate = (savedCount / chunks.length * 100).toFixed(1);
|
|
1055
1523
|
if (failedCount > 0) {
|
|
1056
|
-
|
|
1524
|
+
logger4.warn(
|
|
1057
1525
|
`[Document Processor] "${docName}": ${failedCount}/${chunks.length} chunks failed processing`
|
|
1058
1526
|
);
|
|
1059
1527
|
}
|
|
1060
|
-
|
|
1528
|
+
logger4.info(
|
|
1061
1529
|
`[Document Processor] "${docName}" complete: ${savedCount}/${chunks.length} fragments saved (${successRate}% success)`
|
|
1062
1530
|
);
|
|
1063
1531
|
logKnowledgeGenerationSummary({
|
|
@@ -1077,15 +1545,15 @@ async function extractTextFromDocument(fileBuffer, contentType, originalFilename
|
|
|
1077
1545
|
}
|
|
1078
1546
|
try {
|
|
1079
1547
|
if (contentType === "application/pdf") {
|
|
1080
|
-
|
|
1548
|
+
logger4.debug(`Extracting text from PDF: ${originalFilename}`);
|
|
1081
1549
|
return await convertPdfToTextFromBuffer(fileBuffer, originalFilename);
|
|
1082
1550
|
} else {
|
|
1083
|
-
|
|
1551
|
+
logger4.debug(`Extracting text from non-PDF: ${originalFilename} (Type: ${contentType})`);
|
|
1084
1552
|
if (contentType.includes("text/") || contentType.includes("application/json") || contentType.includes("application/xml")) {
|
|
1085
1553
|
try {
|
|
1086
1554
|
return fileBuffer.toString("utf8");
|
|
1087
1555
|
} catch (textError) {
|
|
1088
|
-
|
|
1556
|
+
logger4.warn(
|
|
1089
1557
|
`Failed to decode ${originalFilename} as UTF-8, falling back to binary extraction`
|
|
1090
1558
|
);
|
|
1091
1559
|
}
|
|
@@ -1093,7 +1561,7 @@ async function extractTextFromDocument(fileBuffer, contentType, originalFilename
|
|
|
1093
1561
|
return await extractTextFromFileBuffer(fileBuffer, contentType, originalFilename);
|
|
1094
1562
|
}
|
|
1095
1563
|
} catch (error) {
|
|
1096
|
-
|
|
1564
|
+
logger4.error(`Error extracting text from ${originalFilename}: ${error.message}`);
|
|
1097
1565
|
throw new Error(`Failed to extract text from ${originalFilename}: ${error.message}`);
|
|
1098
1566
|
}
|
|
1099
1567
|
}
|
|
@@ -1138,7 +1606,7 @@ async function splitDocumentIntoChunks(documentText) {
|
|
|
1138
1606
|
const tokenChunkOverlap = DEFAULT_CHUNK_OVERLAP_TOKENS;
|
|
1139
1607
|
const targetCharChunkSize = Math.round(tokenChunkSize * DEFAULT_CHARS_PER_TOKEN);
|
|
1140
1608
|
const targetCharChunkOverlap = Math.round(tokenChunkOverlap * DEFAULT_CHARS_PER_TOKEN);
|
|
1141
|
-
|
|
1609
|
+
logger4.debug(
|
|
1142
1610
|
`Using core splitChunks with settings: tokenChunkSize=${tokenChunkSize}, tokenChunkOverlap=${tokenChunkOverlap}, charChunkSize=${targetCharChunkSize}, charChunkOverlap=${targetCharChunkOverlap}`
|
|
1143
1611
|
);
|
|
1144
1612
|
return await splitChunks(documentText, tokenChunkSize, tokenChunkOverlap);
|
|
@@ -1163,7 +1631,7 @@ async function processAndSaveFragments({
|
|
|
1163
1631
|
for (let i = 0; i < chunks.length; i += concurrencyLimit) {
|
|
1164
1632
|
const batchChunks = chunks.slice(i, i + concurrencyLimit);
|
|
1165
1633
|
const batchOriginalIndices = Array.from({ length: batchChunks.length }, (_, k) => i + k);
|
|
1166
|
-
|
|
1634
|
+
logger4.debug(
|
|
1167
1635
|
`[Document Processor] Batch ${Math.floor(i / concurrencyLimit) + 1}/${Math.ceil(chunks.length / concurrencyLimit)}: processing ${batchChunks.length} chunks (${batchOriginalIndices[0]}-${batchOriginalIndices[batchOriginalIndices.length - 1]})`
|
|
1168
1636
|
);
|
|
1169
1637
|
const contextualizedChunks = await getContextualizedChunks(
|
|
@@ -1184,13 +1652,13 @@ async function processAndSaveFragments({
|
|
|
1184
1652
|
if (!result.success) {
|
|
1185
1653
|
failedCount++;
|
|
1186
1654
|
failedChunks.push(originalChunkIndex);
|
|
1187
|
-
|
|
1655
|
+
logger4.warn(`Failed to process chunk ${originalChunkIndex} for document ${documentId}`);
|
|
1188
1656
|
continue;
|
|
1189
1657
|
}
|
|
1190
1658
|
const contextualizedChunkText = result.text;
|
|
1191
1659
|
const embedding = result.embedding;
|
|
1192
1660
|
if (!embedding || embedding.length === 0) {
|
|
1193
|
-
|
|
1661
|
+
logger4.warn(
|
|
1194
1662
|
`Zero vector detected for chunk ${originalChunkIndex} (document ${documentId}). Embedding: ${JSON.stringify(result.embedding)}`
|
|
1195
1663
|
);
|
|
1196
1664
|
failedCount++;
|
|
@@ -1217,13 +1685,13 @@ async function processAndSaveFragments({
|
|
|
1217
1685
|
await runtime.createMemory(fragmentMemory, "knowledge");
|
|
1218
1686
|
if (originalChunkIndex === chunks.length - 1) {
|
|
1219
1687
|
const docName = documentTitle || documentId.substring(0, 8);
|
|
1220
|
-
|
|
1688
|
+
logger4.info(
|
|
1221
1689
|
`[Document Processor] "${docName}": All ${chunks.length} chunks processed successfully`
|
|
1222
1690
|
);
|
|
1223
1691
|
}
|
|
1224
1692
|
savedCount++;
|
|
1225
1693
|
} catch (saveError) {
|
|
1226
|
-
|
|
1694
|
+
logger4.error(
|
|
1227
1695
|
`Error saving chunk ${originalChunkIndex} to database: ${saveError.message}`,
|
|
1228
1696
|
saveError.stack
|
|
1229
1697
|
);
|
|
@@ -1232,7 +1700,7 @@ async function processAndSaveFragments({
|
|
|
1232
1700
|
}
|
|
1233
1701
|
}
|
|
1234
1702
|
if (i + concurrencyLimit < chunks.length) {
|
|
1235
|
-
await new Promise((
|
|
1703
|
+
await new Promise((resolve2) => setTimeout(resolve2, 500));
|
|
1236
1704
|
}
|
|
1237
1705
|
}
|
|
1238
1706
|
return { savedCount, failedCount, failedChunks };
|
|
@@ -1286,7 +1754,7 @@ async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLi
|
|
|
1286
1754
|
text: contextualizedChunk.contextualizedText
|
|
1287
1755
|
};
|
|
1288
1756
|
} catch (error) {
|
|
1289
|
-
|
|
1757
|
+
logger4.error(
|
|
1290
1758
|
`Error generating embedding for chunk ${contextualizedChunk.index}: ${error.message}`
|
|
1291
1759
|
);
|
|
1292
1760
|
return {
|
|
@@ -1305,7 +1773,7 @@ async function getContextualizedChunks(runtime, fullDocumentText, chunks, conten
|
|
|
1305
1773
|
const docName = documentTitle || "Document";
|
|
1306
1774
|
const provider = runtime?.getSetting("TEXT_PROVIDER") || process.env.TEXT_PROVIDER;
|
|
1307
1775
|
const model = runtime?.getSetting("TEXT_MODEL") || process.env.TEXT_MODEL;
|
|
1308
|
-
|
|
1776
|
+
logger4.info(
|
|
1309
1777
|
`[Document Processor] "${docName}": CTX enrichment ${ctxEnabled ? "ENABLED" : "DISABLED"}${ctxEnabled ? ` (${provider}/${model})` : ""}`
|
|
1310
1778
|
);
|
|
1311
1779
|
}
|
|
@@ -1319,7 +1787,7 @@ async function getContextualizedChunks(runtime, fullDocumentText, chunks, conten
|
|
|
1319
1787
|
documentTitle
|
|
1320
1788
|
);
|
|
1321
1789
|
} else if (!ctxEnabled && batchOriginalIndices[0] === 0) {
|
|
1322
|
-
|
|
1790
|
+
logger4.debug(
|
|
1323
1791
|
`[Document Processor] To enable CTX: Set CTX_KNOWLEDGE_ENABLED=true and configure TEXT_PROVIDER/TEXT_MODEL`
|
|
1324
1792
|
);
|
|
1325
1793
|
}
|
|
@@ -1338,10 +1806,10 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1338
1806
|
providerLimits.requestsPerMinute || 60,
|
|
1339
1807
|
providerLimits.tokensPerMinute
|
|
1340
1808
|
);
|
|
1341
|
-
const config = validateModelConfig();
|
|
1809
|
+
const config = validateModelConfig(runtime);
|
|
1342
1810
|
const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
|
|
1343
1811
|
const isUsingCacheCapableModel = isUsingOpenRouter && (config.TEXT_MODEL?.toLowerCase().includes("claude") || config.TEXT_MODEL?.toLowerCase().includes("gemini"));
|
|
1344
|
-
|
|
1812
|
+
logger4.debug(
|
|
1345
1813
|
`[Document Processor] Contextualizing ${chunks.length} chunks with ${config.TEXT_PROVIDER}/${config.TEXT_MODEL} (cache: ${isUsingCacheCapableModel})`
|
|
1346
1814
|
);
|
|
1347
1815
|
const promptConfigs = prepareContextPrompts(
|
|
@@ -1367,13 +1835,13 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1367
1835
|
const generateTextOperation = async () => {
|
|
1368
1836
|
if (useCustomLLM) {
|
|
1369
1837
|
if (item.usesCaching) {
|
|
1370
|
-
return await generateText(item.promptText, item.systemPrompt, {
|
|
1838
|
+
return await generateText(runtime, item.promptText, item.systemPrompt, {
|
|
1371
1839
|
cacheDocument: item.fullDocumentTextForContext,
|
|
1372
1840
|
cacheOptions: { type: "ephemeral" },
|
|
1373
1841
|
autoCacheContextualRetrieval: true
|
|
1374
1842
|
});
|
|
1375
1843
|
} else {
|
|
1376
|
-
return await generateText(item.prompt);
|
|
1844
|
+
return await generateText(runtime, item.prompt);
|
|
1377
1845
|
}
|
|
1378
1846
|
} else {
|
|
1379
1847
|
if (item.usesCaching) {
|
|
@@ -1396,7 +1864,7 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1396
1864
|
const contextualizedText = getChunkWithContext(item.chunkText, generatedContext);
|
|
1397
1865
|
if ((item.originalIndex + 1) % Math.max(1, Math.floor(chunks.length / 3)) === 0 || item.originalIndex === chunks.length - 1) {
|
|
1398
1866
|
const docName = documentTitle || "Document";
|
|
1399
|
-
|
|
1867
|
+
logger4.debug(
|
|
1400
1868
|
`[Document Processor] "${docName}": Context added for ${item.originalIndex + 1}/${chunks.length} chunks`
|
|
1401
1869
|
);
|
|
1402
1870
|
}
|
|
@@ -1406,7 +1874,7 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1406
1874
|
index: item.originalIndex
|
|
1407
1875
|
};
|
|
1408
1876
|
} catch (error) {
|
|
1409
|
-
|
|
1877
|
+
logger4.error(
|
|
1410
1878
|
`Error generating context for chunk ${item.originalIndex}: ${error.message}`,
|
|
1411
1879
|
error.stack
|
|
1412
1880
|
);
|
|
@@ -1427,7 +1895,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
|
|
|
1427
1895
|
if (isUsingCacheCapableModel) {
|
|
1428
1896
|
const cachingPromptInfo = contentType ? getCachingPromptForMimeType(contentType, chunkText) : getCachingContextualizationPrompt(chunkText);
|
|
1429
1897
|
if (cachingPromptInfo.prompt.startsWith("Error:")) {
|
|
1430
|
-
|
|
1898
|
+
logger4.warn(
|
|
1431
1899
|
`Skipping contextualization for chunk ${originalIndex} due to: ${cachingPromptInfo.prompt}`
|
|
1432
1900
|
);
|
|
1433
1901
|
return {
|
|
@@ -1449,7 +1917,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
|
|
|
1449
1917
|
} else {
|
|
1450
1918
|
const prompt = contentType ? getPromptForMimeType(contentType, fullDocumentText, chunkText) : getContextualizationPrompt(fullDocumentText, chunkText);
|
|
1451
1919
|
if (prompt.startsWith("Error:")) {
|
|
1452
|
-
|
|
1920
|
+
logger4.warn(`Skipping contextualization for chunk ${originalIndex} due to: ${prompt}`);
|
|
1453
1921
|
return {
|
|
1454
1922
|
prompt: null,
|
|
1455
1923
|
originalIndex,
|
|
@@ -1467,7 +1935,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
|
|
|
1467
1935
|
};
|
|
1468
1936
|
}
|
|
1469
1937
|
} catch (error) {
|
|
1470
|
-
|
|
1938
|
+
logger4.error(
|
|
1471
1939
|
`Error preparing prompt for chunk ${originalIndex}: ${error.message}`,
|
|
1472
1940
|
error.stack
|
|
1473
1941
|
);
|
|
@@ -1488,7 +1956,7 @@ async function generateEmbeddingWithValidation(runtime, text) {
|
|
|
1488
1956
|
});
|
|
1489
1957
|
const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
|
|
1490
1958
|
if (!embedding || embedding.length === 0) {
|
|
1491
|
-
|
|
1959
|
+
logger4.warn(`Zero vector detected. Embedding result: ${JSON.stringify(embedding)}`);
|
|
1492
1960
|
return {
|
|
1493
1961
|
embedding: null,
|
|
1494
1962
|
success: false,
|
|
@@ -1506,12 +1974,12 @@ async function withRateLimitRetry(operation, errorContext, retryDelay) {
|
|
|
1506
1974
|
} catch (error) {
|
|
1507
1975
|
if (error.status === 429) {
|
|
1508
1976
|
const delay = retryDelay || error.headers?.["retry-after"] || 5;
|
|
1509
|
-
|
|
1510
|
-
await new Promise((
|
|
1977
|
+
logger4.warn(`Rate limit hit for ${errorContext}. Retrying after ${delay}s`);
|
|
1978
|
+
await new Promise((resolve2) => setTimeout(resolve2, delay * 1e3));
|
|
1511
1979
|
try {
|
|
1512
1980
|
return await operation();
|
|
1513
1981
|
} catch (retryError) {
|
|
1514
|
-
|
|
1982
|
+
logger4.error(`Failed after retry for ${errorContext}: ${retryError.message}`);
|
|
1515
1983
|
throw retryError;
|
|
1516
1984
|
}
|
|
1517
1985
|
}
|
|
@@ -1546,15 +2014,15 @@ function createRateLimiter(requestsPerMinute, tokensPerMinute) {
|
|
|
1546
2014
|
if (timeToWait > 0) {
|
|
1547
2015
|
const reason = requestLimitExceeded ? "request" : "token";
|
|
1548
2016
|
if (timeToWait > 5e3) {
|
|
1549
|
-
|
|
2017
|
+
logger4.info(
|
|
1550
2018
|
`[Document Processor] Rate limiting: waiting ${Math.round(timeToWait / 1e3)}s due to ${reason} limit`
|
|
1551
2019
|
);
|
|
1552
2020
|
} else {
|
|
1553
|
-
|
|
2021
|
+
logger4.debug(
|
|
1554
2022
|
`[Document Processor] Rate limiting: ${timeToWait}ms wait (${reason} limit)`
|
|
1555
2023
|
);
|
|
1556
2024
|
}
|
|
1557
|
-
await new Promise((
|
|
2025
|
+
await new Promise((resolve2) => setTimeout(resolve2, timeToWait));
|
|
1558
2026
|
}
|
|
1559
2027
|
}
|
|
1560
2028
|
requestTimes.push(now);
|
|
@@ -1572,20 +2040,232 @@ function logKnowledgeGenerationSummary({
|
|
|
1572
2040
|
}) {
|
|
1573
2041
|
if (failedCount > 0 || process.env.NODE_ENV === "development") {
|
|
1574
2042
|
const status = failedCount > 0 ? "PARTIAL" : "SUCCESS";
|
|
1575
|
-
|
|
2043
|
+
logger4.info(
|
|
1576
2044
|
`[Document Processor] ${status}: ${savedCount}/${totalChunks} chunks, CTX: ${ctxEnabled ? "ON" : "OFF"}, Provider: ${providerLimits.provider}`
|
|
1577
2045
|
);
|
|
1578
2046
|
}
|
|
1579
2047
|
if (failedCount > 0) {
|
|
1580
|
-
|
|
2048
|
+
logger4.warn(`[Document Processor] ${failedCount} chunks failed processing`);
|
|
2049
|
+
}
|
|
2050
|
+
}
|
|
2051
|
+
|
|
2052
|
+
// src/docs-loader.ts
|
|
2053
|
+
import { logger as logger5 } from "@elizaos/core";
|
|
2054
|
+
import * as fs from "fs";
|
|
2055
|
+
import * as path from "path";
|
|
2056
|
+
function getKnowledgePath() {
|
|
2057
|
+
const envPath = process.env.KNOWLEDGE_PATH;
|
|
2058
|
+
if (envPath) {
|
|
2059
|
+
const resolvedPath = path.resolve(envPath);
|
|
2060
|
+
if (!fs.existsSync(resolvedPath)) {
|
|
2061
|
+
logger5.warn(`Knowledge path from environment variable does not exist: ${resolvedPath}`);
|
|
2062
|
+
logger5.warn("Please create the directory or update KNOWLEDGE_PATH environment variable");
|
|
2063
|
+
}
|
|
2064
|
+
return resolvedPath;
|
|
2065
|
+
}
|
|
2066
|
+
const defaultPath = path.join(process.cwd(), "docs");
|
|
2067
|
+
if (!fs.existsSync(defaultPath)) {
|
|
2068
|
+
logger5.info(`Default docs folder does not exist at: ${defaultPath}`);
|
|
2069
|
+
logger5.info("To use the knowledge plugin, either:");
|
|
2070
|
+
logger5.info('1. Create a "docs" folder in your project root');
|
|
2071
|
+
logger5.info("2. Set KNOWLEDGE_PATH environment variable to your documents folder");
|
|
2072
|
+
}
|
|
2073
|
+
return defaultPath;
|
|
2074
|
+
}
|
|
2075
|
+
async function loadDocsFromPath(service, agentId, worldId) {
|
|
2076
|
+
const docsPath = getKnowledgePath();
|
|
2077
|
+
if (!fs.existsSync(docsPath)) {
|
|
2078
|
+
logger5.warn(`Knowledge path does not exist: ${docsPath}`);
|
|
2079
|
+
return { total: 0, successful: 0, failed: 0 };
|
|
2080
|
+
}
|
|
2081
|
+
logger5.info(`Loading documents from: ${docsPath}`);
|
|
2082
|
+
const files = getAllFiles(docsPath);
|
|
2083
|
+
if (files.length === 0) {
|
|
2084
|
+
logger5.info("No files found in knowledge path");
|
|
2085
|
+
return { total: 0, successful: 0, failed: 0 };
|
|
2086
|
+
}
|
|
2087
|
+
logger5.info(`Found ${files.length} files to process`);
|
|
2088
|
+
let successful = 0;
|
|
2089
|
+
let failed = 0;
|
|
2090
|
+
for (const filePath of files) {
|
|
2091
|
+
try {
|
|
2092
|
+
const fileName = path.basename(filePath);
|
|
2093
|
+
const fileExt = path.extname(filePath).toLowerCase();
|
|
2094
|
+
if (fileName.startsWith(".")) {
|
|
2095
|
+
continue;
|
|
2096
|
+
}
|
|
2097
|
+
const contentType = getContentType(fileExt);
|
|
2098
|
+
if (!contentType) {
|
|
2099
|
+
logger5.debug(`Skipping unsupported file type: ${filePath}`);
|
|
2100
|
+
continue;
|
|
2101
|
+
}
|
|
2102
|
+
const fileBuffer = fs.readFileSync(filePath);
|
|
2103
|
+
const isBinary = isBinaryContentType(contentType, fileName);
|
|
2104
|
+
const content = isBinary ? fileBuffer.toString("base64") : fileBuffer.toString("utf-8");
|
|
2105
|
+
const knowledgeOptions = {
|
|
2106
|
+
clientDocumentId: "",
|
|
2107
|
+
// Will be generated by the service based on content
|
|
2108
|
+
contentType,
|
|
2109
|
+
originalFilename: fileName,
|
|
2110
|
+
worldId: worldId || agentId,
|
|
2111
|
+
content,
|
|
2112
|
+
roomId: agentId,
|
|
2113
|
+
entityId: agentId
|
|
2114
|
+
};
|
|
2115
|
+
logger5.debug(`Processing document: ${fileName}`);
|
|
2116
|
+
const result = await service.addKnowledge(knowledgeOptions);
|
|
2117
|
+
logger5.info(`\u2705 "${fileName}": ${result.fragmentCount} fragments created`);
|
|
2118
|
+
successful++;
|
|
2119
|
+
} catch (error) {
|
|
2120
|
+
logger5.error(`Failed to process file ${filePath}:`, error);
|
|
2121
|
+
failed++;
|
|
2122
|
+
}
|
|
2123
|
+
}
|
|
2124
|
+
logger5.info(
|
|
2125
|
+
`Document loading complete: ${successful} successful, ${failed} failed out of ${files.length} total`
|
|
2126
|
+
);
|
|
2127
|
+
return {
|
|
2128
|
+
total: files.length,
|
|
2129
|
+
successful,
|
|
2130
|
+
failed
|
|
2131
|
+
};
|
|
2132
|
+
}
|
|
2133
|
+
function getAllFiles(dirPath, files = []) {
|
|
2134
|
+
try {
|
|
2135
|
+
const entries = fs.readdirSync(dirPath, { withFileTypes: true });
|
|
2136
|
+
for (const entry of entries) {
|
|
2137
|
+
const fullPath = path.join(dirPath, entry.name);
|
|
2138
|
+
if (entry.isDirectory()) {
|
|
2139
|
+
if (!["node_modules", ".git", ".vscode", "dist", "build"].includes(entry.name)) {
|
|
2140
|
+
getAllFiles(fullPath, files);
|
|
2141
|
+
}
|
|
2142
|
+
} else if (entry.isFile()) {
|
|
2143
|
+
files.push(fullPath);
|
|
2144
|
+
}
|
|
2145
|
+
}
|
|
2146
|
+
} catch (error) {
|
|
2147
|
+
logger5.error(`Error reading directory ${dirPath}:`, error);
|
|
1581
2148
|
}
|
|
2149
|
+
return files;
|
|
2150
|
+
}
|
|
2151
|
+
function getContentType(extension) {
|
|
2152
|
+
const contentTypes = {
|
|
2153
|
+
// Text documents
|
|
2154
|
+
".txt": "text/plain",
|
|
2155
|
+
".md": "text/markdown",
|
|
2156
|
+
".markdown": "text/markdown",
|
|
2157
|
+
".tson": "text/plain",
|
|
2158
|
+
".xml": "application/xml",
|
|
2159
|
+
".csv": "text/csv",
|
|
2160
|
+
".tsv": "text/tab-separated-values",
|
|
2161
|
+
".log": "text/plain",
|
|
2162
|
+
// Web files
|
|
2163
|
+
".html": "text/html",
|
|
2164
|
+
".htm": "text/html",
|
|
2165
|
+
".css": "text/css",
|
|
2166
|
+
".scss": "text/x-scss",
|
|
2167
|
+
".sass": "text/x-sass",
|
|
2168
|
+
".less": "text/x-less",
|
|
2169
|
+
// JavaScript/TypeScript
|
|
2170
|
+
".js": "text/javascript",
|
|
2171
|
+
".jsx": "text/javascript",
|
|
2172
|
+
".ts": "text/typescript",
|
|
2173
|
+
".tsx": "text/typescript",
|
|
2174
|
+
".mjs": "text/javascript",
|
|
2175
|
+
".cjs": "text/javascript",
|
|
2176
|
+
".vue": "text/x-vue",
|
|
2177
|
+
".svelte": "text/x-svelte",
|
|
2178
|
+
".astro": "text/x-astro",
|
|
2179
|
+
// Python
|
|
2180
|
+
".py": "text/x-python",
|
|
2181
|
+
".pyw": "text/x-python",
|
|
2182
|
+
".pyi": "text/x-python",
|
|
2183
|
+
// Java/Kotlin/Scala
|
|
2184
|
+
".java": "text/x-java",
|
|
2185
|
+
".kt": "text/x-kotlin",
|
|
2186
|
+
".kts": "text/x-kotlin",
|
|
2187
|
+
".scala": "text/x-scala",
|
|
2188
|
+
// C/C++/C#
|
|
2189
|
+
".c": "text/x-c",
|
|
2190
|
+
".cpp": "text/x-c++",
|
|
2191
|
+
".cc": "text/x-c++",
|
|
2192
|
+
".cxx": "text/x-c++",
|
|
2193
|
+
".h": "text/x-c",
|
|
2194
|
+
".hpp": "text/x-c++",
|
|
2195
|
+
".cs": "text/x-csharp",
|
|
2196
|
+
// Other languages
|
|
2197
|
+
".php": "text/x-php",
|
|
2198
|
+
".rb": "text/x-ruby",
|
|
2199
|
+
".go": "text/x-go",
|
|
2200
|
+
".rs": "text/x-rust",
|
|
2201
|
+
".swift": "text/x-swift",
|
|
2202
|
+
".r": "text/x-r",
|
|
2203
|
+
".R": "text/x-r",
|
|
2204
|
+
".m": "text/x-objectivec",
|
|
2205
|
+
".mm": "text/x-objectivec",
|
|
2206
|
+
".clj": "text/x-clojure",
|
|
2207
|
+
".cljs": "text/x-clojure",
|
|
2208
|
+
".ex": "text/x-elixir",
|
|
2209
|
+
".exs": "text/x-elixir",
|
|
2210
|
+
".lua": "text/x-lua",
|
|
2211
|
+
".pl": "text/x-perl",
|
|
2212
|
+
".pm": "text/x-perl",
|
|
2213
|
+
".dart": "text/x-dart",
|
|
2214
|
+
".hs": "text/x-haskell",
|
|
2215
|
+
".elm": "text/x-elm",
|
|
2216
|
+
".ml": "text/x-ocaml",
|
|
2217
|
+
".fs": "text/x-fsharp",
|
|
2218
|
+
".fsx": "text/x-fsharp",
|
|
2219
|
+
".vb": "text/x-vb",
|
|
2220
|
+
".pas": "text/x-pascal",
|
|
2221
|
+
".d": "text/x-d",
|
|
2222
|
+
".nim": "text/x-nim",
|
|
2223
|
+
".zig": "text/x-zig",
|
|
2224
|
+
".jl": "text/x-julia",
|
|
2225
|
+
".tcl": "text/x-tcl",
|
|
2226
|
+
".awk": "text/x-awk",
|
|
2227
|
+
".sed": "text/x-sed",
|
|
2228
|
+
// Shell scripts
|
|
2229
|
+
".sh": "text/x-sh",
|
|
2230
|
+
".bash": "text/x-sh",
|
|
2231
|
+
".zsh": "text/x-sh",
|
|
2232
|
+
".fish": "text/x-fish",
|
|
2233
|
+
".ps1": "text/x-powershell",
|
|
2234
|
+
".bat": "text/x-batch",
|
|
2235
|
+
".cmd": "text/x-batch",
|
|
2236
|
+
// Config files
|
|
2237
|
+
".json": "application/json",
|
|
2238
|
+
".yaml": "text/x-yaml",
|
|
2239
|
+
".yml": "text/x-yaml",
|
|
2240
|
+
".toml": "text/x-toml",
|
|
2241
|
+
".ini": "text/x-ini",
|
|
2242
|
+
".cfg": "text/x-ini",
|
|
2243
|
+
".conf": "text/x-ini",
|
|
2244
|
+
".env": "text/plain",
|
|
2245
|
+
".gitignore": "text/plain",
|
|
2246
|
+
".dockerignore": "text/plain",
|
|
2247
|
+
".editorconfig": "text/plain",
|
|
2248
|
+
".properties": "text/x-properties",
|
|
2249
|
+
// Database
|
|
2250
|
+
".sql": "text/x-sql",
|
|
2251
|
+
// Binary documents
|
|
2252
|
+
".pdf": "application/pdf",
|
|
2253
|
+
".doc": "application/msword",
|
|
2254
|
+
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
2255
|
+
};
|
|
2256
|
+
return contentTypes[extension] || null;
|
|
1582
2257
|
}
|
|
1583
2258
|
|
|
1584
2259
|
// src/service.ts
|
|
2260
|
+
var parseBooleanEnv2 = (value) => {
|
|
2261
|
+
if (typeof value === "boolean") return value;
|
|
2262
|
+
if (typeof value === "string") return value.toLowerCase() === "true";
|
|
2263
|
+
return false;
|
|
2264
|
+
};
|
|
1585
2265
|
var KnowledgeService = class _KnowledgeService extends Service {
|
|
1586
2266
|
static serviceType = "knowledge";
|
|
1587
|
-
config;
|
|
1588
|
-
knowledgeConfig;
|
|
2267
|
+
config = {};
|
|
2268
|
+
knowledgeConfig = {};
|
|
1589
2269
|
capabilityDescription = "Provides Retrieval Augmented Generation capabilities, including knowledge upload and querying.";
|
|
1590
2270
|
knowledgeProcessingSemaphore;
|
|
1591
2271
|
/**
|
|
@@ -1595,53 +2275,25 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1595
2275
|
constructor(runtime, config) {
|
|
1596
2276
|
super(runtime);
|
|
1597
2277
|
this.knowledgeProcessingSemaphore = new Semaphore(10);
|
|
1598
|
-
const parseBooleanEnv = (value) => {
|
|
1599
|
-
if (typeof value === "boolean") return value;
|
|
1600
|
-
if (typeof value === "string") return value.toLowerCase() === "true";
|
|
1601
|
-
return false;
|
|
1602
|
-
};
|
|
1603
|
-
const loadDocsOnStartup = parseBooleanEnv(config?.LOAD_DOCS_ON_STARTUP) || process.env.LOAD_DOCS_ON_STARTUP === "true";
|
|
1604
|
-
this.knowledgeConfig = {
|
|
1605
|
-
CTX_KNOWLEDGE_ENABLED: parseBooleanEnv(config?.CTX_KNOWLEDGE_ENABLED),
|
|
1606
|
-
LOAD_DOCS_ON_STARTUP: loadDocsOnStartup,
|
|
1607
|
-
MAX_INPUT_TOKENS: config?.MAX_INPUT_TOKENS,
|
|
1608
|
-
MAX_OUTPUT_TOKENS: config?.MAX_OUTPUT_TOKENS,
|
|
1609
|
-
EMBEDDING_PROVIDER: config?.EMBEDDING_PROVIDER,
|
|
1610
|
-
TEXT_PROVIDER: config?.TEXT_PROVIDER,
|
|
1611
|
-
TEXT_EMBEDDING_MODEL: config?.TEXT_EMBEDDING_MODEL
|
|
1612
|
-
};
|
|
1613
|
-
this.config = { ...this.knowledgeConfig };
|
|
1614
|
-
logger4.info(
|
|
1615
|
-
`KnowledgeService initialized for agent ${this.runtime.agentId} with config:`,
|
|
1616
|
-
this.knowledgeConfig
|
|
1617
|
-
);
|
|
1618
|
-
if (this.knowledgeConfig.LOAD_DOCS_ON_STARTUP) {
|
|
1619
|
-
logger4.info("LOAD_DOCS_ON_STARTUP is enabled. Loading documents from docs folder...");
|
|
1620
|
-
this.loadInitialDocuments().catch((error) => {
|
|
1621
|
-
logger4.error("Error during initial document loading in KnowledgeService:", error);
|
|
1622
|
-
});
|
|
1623
|
-
} else {
|
|
1624
|
-
logger4.info("LOAD_DOCS_ON_STARTUP is disabled. Skipping automatic document loading.");
|
|
1625
|
-
}
|
|
1626
2278
|
}
|
|
1627
2279
|
async loadInitialDocuments() {
|
|
1628
|
-
|
|
2280
|
+
logger6.info(
|
|
1629
2281
|
`KnowledgeService: Checking for documents to load on startup for agent ${this.runtime.agentId}`
|
|
1630
2282
|
);
|
|
1631
2283
|
try {
|
|
1632
|
-
await new Promise((
|
|
2284
|
+
await new Promise((resolve2) => setTimeout(resolve2, 1e3));
|
|
1633
2285
|
const result = await loadDocsFromPath(this, this.runtime.agentId);
|
|
1634
2286
|
if (result.successful > 0) {
|
|
1635
|
-
|
|
2287
|
+
logger6.info(
|
|
1636
2288
|
`KnowledgeService: Loaded ${result.successful} documents from docs folder on startup for agent ${this.runtime.agentId}`
|
|
1637
2289
|
);
|
|
1638
2290
|
} else {
|
|
1639
|
-
|
|
2291
|
+
logger6.info(
|
|
1640
2292
|
`KnowledgeService: No new documents found to load on startup for agent ${this.runtime.agentId}`
|
|
1641
2293
|
);
|
|
1642
2294
|
}
|
|
1643
2295
|
} catch (error) {
|
|
1644
|
-
|
|
2296
|
+
logger6.error(
|
|
1645
2297
|
`KnowledgeService: Error loading documents on startup for agent ${this.runtime.agentId}:`,
|
|
1646
2298
|
error
|
|
1647
2299
|
);
|
|
@@ -1653,23 +2305,90 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1653
2305
|
* @returns Initialized Knowledge service
|
|
1654
2306
|
*/
|
|
1655
2307
|
static async start(runtime) {
|
|
1656
|
-
|
|
2308
|
+
logger6.info(`Starting Knowledge service for agent: ${runtime.agentId}`);
|
|
2309
|
+
logger6.info("Initializing Knowledge Plugin...");
|
|
2310
|
+
let validatedConfig = {};
|
|
2311
|
+
try {
|
|
2312
|
+
logger6.info("Validating model configuration for Knowledge plugin...");
|
|
2313
|
+
logger6.debug(`[Knowledge Plugin] INIT DEBUG:`);
|
|
2314
|
+
logger6.debug(
|
|
2315
|
+
`[Knowledge Plugin] - process.env.CTX_KNOWLEDGE_ENABLED: '${process.env.CTX_KNOWLEDGE_ENABLED}'`
|
|
2316
|
+
);
|
|
2317
|
+
const config = {
|
|
2318
|
+
CTX_KNOWLEDGE_ENABLED: parseBooleanEnv2(runtime.getSetting("CTX_KNOWLEDGE_ENABLED"))
|
|
2319
|
+
};
|
|
2320
|
+
logger6.debug(
|
|
2321
|
+
`[Knowledge Plugin] - config.CTX_KNOWLEDGE_ENABLED: '${config.CTX_KNOWLEDGE_ENABLED}'`
|
|
2322
|
+
);
|
|
2323
|
+
logger6.debug(
|
|
2324
|
+
`[Knowledge Plugin] - runtime.getSetting('CTX_KNOWLEDGE_ENABLED'): '${runtime.getSetting("CTX_KNOWLEDGE_ENABLED")}'`
|
|
2325
|
+
);
|
|
2326
|
+
validatedConfig = validateModelConfig(runtime);
|
|
2327
|
+
const ctxEnabledFromEnv = parseBooleanEnv2(process.env.CTX_KNOWLEDGE_ENABLED);
|
|
2328
|
+
const ctxEnabledFromRuntime = parseBooleanEnv2(runtime.getSetting("CTX_KNOWLEDGE_ENABLED"));
|
|
2329
|
+
const ctxEnabledFromValidated = validatedConfig.CTX_KNOWLEDGE_ENABLED;
|
|
2330
|
+
const finalCtxEnabled = ctxEnabledFromValidated;
|
|
2331
|
+
logger6.debug(`[Knowledge Plugin] CTX_KNOWLEDGE_ENABLED sources:`);
|
|
2332
|
+
logger6.debug(`[Knowledge Plugin] - From env: ${ctxEnabledFromEnv}`);
|
|
2333
|
+
logger6.debug(`[Knowledge Plugin] - From runtime: ${ctxEnabledFromRuntime}`);
|
|
2334
|
+
logger6.debug(`[Knowledge Plugin] - FINAL RESULT: ${finalCtxEnabled}`);
|
|
2335
|
+
if (finalCtxEnabled) {
|
|
2336
|
+
logger6.info("Running in Contextual Knowledge mode with text generation capabilities.");
|
|
2337
|
+
logger6.info(
|
|
2338
|
+
`Using ${validatedConfig.EMBEDDING_PROVIDER || "auto-detected"} for embeddings and ${validatedConfig.TEXT_PROVIDER} for text generation.`
|
|
2339
|
+
);
|
|
2340
|
+
logger6.info(`Text model: ${validatedConfig.TEXT_MODEL}`);
|
|
2341
|
+
} else {
|
|
2342
|
+
const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
|
|
2343
|
+
logger6.warn(
|
|
2344
|
+
"Running in Basic Embedding mode - documents will NOT be enriched with context!"
|
|
2345
|
+
);
|
|
2346
|
+
logger6.info("To enable contextual enrichment:");
|
|
2347
|
+
logger6.info(" - Set CTX_KNOWLEDGE_ENABLED=true");
|
|
2348
|
+
logger6.info(" - Configure TEXT_PROVIDER (anthropic/openai/openrouter/google)");
|
|
2349
|
+
logger6.info(" - Configure TEXT_MODEL and API key");
|
|
2350
|
+
if (usingPluginOpenAI) {
|
|
2351
|
+
logger6.info("Using auto-detected configuration from plugin-openai for embeddings.");
|
|
2352
|
+
} else {
|
|
2353
|
+
logger6.info(
|
|
2354
|
+
`Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}.`
|
|
2355
|
+
);
|
|
2356
|
+
}
|
|
2357
|
+
}
|
|
2358
|
+
logger6.success("Model configuration validated successfully.");
|
|
2359
|
+
logger6.success(`Knowledge Plugin initialized for agent: ${runtime.character.name}`);
|
|
2360
|
+
logger6.info(
|
|
2361
|
+
"Knowledge Plugin initialized. Frontend panel should be discoverable via its public route."
|
|
2362
|
+
);
|
|
2363
|
+
} catch (error) {
|
|
2364
|
+
logger6.error("Failed to initialize Knowledge plugin:", error);
|
|
2365
|
+
throw error;
|
|
2366
|
+
}
|
|
1657
2367
|
const service = new _KnowledgeService(runtime);
|
|
2368
|
+
service.config = validatedConfig;
|
|
2369
|
+
if (service.config.LOAD_DOCS_ON_STARTUP) {
|
|
2370
|
+
logger6.info("LOAD_DOCS_ON_STARTUP is enabled. Loading documents from docs folder...");
|
|
2371
|
+
service.loadInitialDocuments().catch((error) => {
|
|
2372
|
+
logger6.error("Error during initial document loading in KnowledgeService:", error);
|
|
2373
|
+
});
|
|
2374
|
+
} else {
|
|
2375
|
+
logger6.info("LOAD_DOCS_ON_STARTUP is disabled. Skipping automatic document loading.");
|
|
2376
|
+
}
|
|
1658
2377
|
if (service.runtime.character?.knowledge && service.runtime.character.knowledge.length > 0) {
|
|
1659
|
-
|
|
2378
|
+
logger6.info(
|
|
1660
2379
|
`KnowledgeService: Processing ${service.runtime.character.knowledge.length} character knowledge items.`
|
|
1661
2380
|
);
|
|
1662
2381
|
const stringKnowledge = service.runtime.character.knowledge.filter(
|
|
1663
2382
|
(item) => typeof item === "string"
|
|
1664
2383
|
);
|
|
1665
2384
|
await service.processCharacterKnowledge(stringKnowledge).catch((err) => {
|
|
1666
|
-
|
|
2385
|
+
logger6.error(
|
|
1667
2386
|
`KnowledgeService: Error processing character knowledge during startup: ${err.message}`,
|
|
1668
2387
|
err
|
|
1669
2388
|
);
|
|
1670
2389
|
});
|
|
1671
2390
|
} else {
|
|
1672
|
-
|
|
2391
|
+
logger6.info(
|
|
1673
2392
|
`KnowledgeService: No character knowledge to process for agent ${runtime.agentId}.`
|
|
1674
2393
|
);
|
|
1675
2394
|
}
|
|
@@ -1680,10 +2399,10 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1680
2399
|
* @param runtime Agent runtime
|
|
1681
2400
|
*/
|
|
1682
2401
|
static async stop(runtime) {
|
|
1683
|
-
|
|
2402
|
+
logger6.info(`Stopping Knowledge service for agent: ${runtime.agentId}`);
|
|
1684
2403
|
const service = runtime.getService(_KnowledgeService.serviceType);
|
|
1685
2404
|
if (!service) {
|
|
1686
|
-
|
|
2405
|
+
logger6.warn(`KnowledgeService not found for agent ${runtime.agentId} during stop.`);
|
|
1687
2406
|
}
|
|
1688
2407
|
if (service instanceof _KnowledgeService) {
|
|
1689
2408
|
await service.stop();
|
|
@@ -1693,7 +2412,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1693
2412
|
* Stop the service
|
|
1694
2413
|
*/
|
|
1695
2414
|
async stop() {
|
|
1696
|
-
|
|
2415
|
+
logger6.info(`Knowledge service stopping for agent: ${this.runtime.character?.name}`);
|
|
1697
2416
|
}
|
|
1698
2417
|
/**
|
|
1699
2418
|
* Add knowledge to the system
|
|
@@ -1708,11 +2427,11 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1708
2427
|
maxChars: 2e3
|
|
1709
2428
|
// Use first 2KB of content for ID generation
|
|
1710
2429
|
});
|
|
1711
|
-
|
|
2430
|
+
logger6.info(`Processing "${options.originalFilename}" (${options.contentType})`);
|
|
1712
2431
|
try {
|
|
1713
2432
|
const existingDocument = await this.runtime.getMemoryById(contentBasedId);
|
|
1714
2433
|
if (existingDocument && existingDocument.metadata?.type === MemoryType2.DOCUMENT) {
|
|
1715
|
-
|
|
2434
|
+
logger6.info(`"${options.originalFilename}" already exists - skipping`);
|
|
1716
2435
|
const fragments = await this.runtime.getMemories({
|
|
1717
2436
|
tableName: "knowledge"
|
|
1718
2437
|
});
|
|
@@ -1726,7 +2445,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1726
2445
|
};
|
|
1727
2446
|
}
|
|
1728
2447
|
} catch (error) {
|
|
1729
|
-
|
|
2448
|
+
logger6.debug(
|
|
1730
2449
|
`Document ${contentBasedId} not found or error checking existence, proceeding with processing: ${error instanceof Error ? error.message : String(error)}`
|
|
1731
2450
|
);
|
|
1732
2451
|
}
|
|
@@ -1753,7 +2472,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1753
2472
|
}) {
|
|
1754
2473
|
const agentId = passedAgentId || this.runtime.agentId;
|
|
1755
2474
|
try {
|
|
1756
|
-
|
|
2475
|
+
logger6.debug(
|
|
1757
2476
|
`KnowledgeService: Processing document ${originalFilename} (type: ${contentType}) via processDocument for agent: ${agentId}`
|
|
1758
2477
|
);
|
|
1759
2478
|
let fileBuffer = null;
|
|
@@ -1764,7 +2483,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1764
2483
|
try {
|
|
1765
2484
|
fileBuffer = Buffer.from(content, "base64");
|
|
1766
2485
|
} catch (e) {
|
|
1767
|
-
|
|
2486
|
+
logger6.error(
|
|
1768
2487
|
`KnowledgeService: Failed to convert base64 to buffer for ${originalFilename}: ${e.message}`
|
|
1769
2488
|
);
|
|
1770
2489
|
throw new Error(`Invalid base64 content for PDF file ${originalFilename}`);
|
|
@@ -1775,7 +2494,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1775
2494
|
try {
|
|
1776
2495
|
fileBuffer = Buffer.from(content, "base64");
|
|
1777
2496
|
} catch (e) {
|
|
1778
|
-
|
|
2497
|
+
logger6.error(
|
|
1779
2498
|
`KnowledgeService: Failed to convert base64 to buffer for ${originalFilename}: ${e.message}`
|
|
1780
2499
|
);
|
|
1781
2500
|
throw new Error(`Invalid base64 content for binary file ${originalFilename}`);
|
|
@@ -1792,11 +2511,11 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1792
2511
|
if (invalidCharCount > 0 && invalidCharCount / textLength > 0.1) {
|
|
1793
2512
|
throw new Error("Decoded content contains too many invalid characters");
|
|
1794
2513
|
}
|
|
1795
|
-
|
|
2514
|
+
logger6.debug(`Successfully decoded base64 content for text file: ${originalFilename}`);
|
|
1796
2515
|
extractedText = decodedText;
|
|
1797
2516
|
documentContentToStore = decodedText;
|
|
1798
2517
|
} catch (e) {
|
|
1799
|
-
|
|
2518
|
+
logger6.error(
|
|
1800
2519
|
`Failed to decode base64 for ${originalFilename}: ${e instanceof Error ? e.message : String(e)}`
|
|
1801
2520
|
);
|
|
1802
2521
|
throw new Error(
|
|
@@ -1804,7 +2523,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1804
2523
|
);
|
|
1805
2524
|
}
|
|
1806
2525
|
} else {
|
|
1807
|
-
|
|
2526
|
+
logger6.debug(`Treating content as plain text for file: ${originalFilename}`);
|
|
1808
2527
|
extractedText = content;
|
|
1809
2528
|
documentContentToStore = content;
|
|
1810
2529
|
}
|
|
@@ -1813,7 +2532,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1813
2532
|
const noTextError = new Error(
|
|
1814
2533
|
`KnowledgeService: No text content extracted from ${originalFilename} (type: ${contentType}).`
|
|
1815
2534
|
);
|
|
1816
|
-
|
|
2535
|
+
logger6.warn(noTextError.message);
|
|
1817
2536
|
throw noTextError;
|
|
1818
2537
|
}
|
|
1819
2538
|
const documentMemory = createDocumentMemory({
|
|
@@ -1839,14 +2558,14 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1839
2558
|
roomId: roomId || agentId,
|
|
1840
2559
|
entityId: entityId || agentId
|
|
1841
2560
|
};
|
|
1842
|
-
|
|
2561
|
+
logger6.debug(
|
|
1843
2562
|
`KnowledgeService: Creating memory with agentId=${agentId}, entityId=${entityId}, roomId=${roomId}, this.runtime.agentId=${this.runtime.agentId}`
|
|
1844
2563
|
);
|
|
1845
|
-
|
|
2564
|
+
logger6.debug(
|
|
1846
2565
|
`KnowledgeService: memoryWithScope agentId=${memoryWithScope.agentId}, entityId=${memoryWithScope.entityId}`
|
|
1847
2566
|
);
|
|
1848
2567
|
await this.runtime.createMemory(memoryWithScope, "documents");
|
|
1849
|
-
|
|
2568
|
+
logger6.debug(
|
|
1850
2569
|
`KnowledgeService: Stored document ${originalFilename} (Memory ID: ${memoryWithScope.id})`
|
|
1851
2570
|
);
|
|
1852
2571
|
const fragmentCount = await processFragmentsSynchronously({
|
|
@@ -1861,14 +2580,14 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1861
2580
|
worldId: worldId || agentId,
|
|
1862
2581
|
documentTitle: originalFilename
|
|
1863
2582
|
});
|
|
1864
|
-
|
|
2583
|
+
logger6.debug(`"${originalFilename}" stored with ${fragmentCount} fragments`);
|
|
1865
2584
|
return {
|
|
1866
2585
|
clientDocumentId,
|
|
1867
2586
|
storedDocumentMemoryId: memoryWithScope.id,
|
|
1868
2587
|
fragmentCount
|
|
1869
2588
|
};
|
|
1870
2589
|
} catch (error) {
|
|
1871
|
-
|
|
2590
|
+
logger6.error(
|
|
1872
2591
|
`KnowledgeService: Error processing document ${originalFilename}: ${error.message}`,
|
|
1873
2592
|
error.stack
|
|
1874
2593
|
);
|
|
@@ -1877,7 +2596,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1877
2596
|
}
|
|
1878
2597
|
// --- Knowledge methods moved from AgentRuntime ---
|
|
1879
2598
|
async handleProcessingError(error, context) {
|
|
1880
|
-
|
|
2599
|
+
logger6.error(`KnowledgeService: Error ${context}:`, error?.message || error || "Unknown error");
|
|
1881
2600
|
throw error;
|
|
1882
2601
|
}
|
|
1883
2602
|
async checkExistingKnowledge(knowledgeId) {
|
|
@@ -1885,9 +2604,9 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1885
2604
|
return !!existingDocument;
|
|
1886
2605
|
}
|
|
1887
2606
|
async getKnowledge(message, scope) {
|
|
1888
|
-
|
|
2607
|
+
logger6.debug("KnowledgeService: getKnowledge called for message id: " + message.id);
|
|
1889
2608
|
if (!message?.content?.text || message?.content?.text.trim().length === 0) {
|
|
1890
|
-
|
|
2609
|
+
logger6.warn("KnowledgeService: Invalid or empty message content for knowledge query.");
|
|
1891
2610
|
return [];
|
|
1892
2611
|
}
|
|
1893
2612
|
const embedding = await this.runtime.useModel(ModelType2.TEXT_EMBEDDING, {
|
|
@@ -1926,7 +2645,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1926
2645
|
try {
|
|
1927
2646
|
const existingMemory = await this.runtime.getMemoryById(memoryId);
|
|
1928
2647
|
if (!existingMemory) {
|
|
1929
|
-
|
|
2648
|
+
logger6.warn(`Cannot enrich memory ${memoryId} - memory not found`);
|
|
1930
2649
|
return;
|
|
1931
2650
|
}
|
|
1932
2651
|
const updatedMetadata = {
|
|
@@ -1947,11 +2666,11 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1947
2666
|
id: memoryId,
|
|
1948
2667
|
metadata: updatedMetadata
|
|
1949
2668
|
});
|
|
1950
|
-
|
|
2669
|
+
logger6.debug(
|
|
1951
2670
|
`Enriched conversation memory ${memoryId} with RAG data: ${ragMetadata.totalFragments} fragments`
|
|
1952
2671
|
);
|
|
1953
2672
|
} catch (error) {
|
|
1954
|
-
|
|
2673
|
+
logger6.warn(
|
|
1955
2674
|
`Failed to enrich conversation memory ${memoryId} with RAG data: ${error.message}`
|
|
1956
2675
|
);
|
|
1957
2676
|
}
|
|
@@ -1974,7 +2693,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1974
2693
|
ragMetadata,
|
|
1975
2694
|
timestamp: now
|
|
1976
2695
|
});
|
|
1977
|
-
|
|
2696
|
+
logger6.debug(`Stored pending RAG metadata for next conversation memory`);
|
|
1978
2697
|
}
|
|
1979
2698
|
/**
|
|
1980
2699
|
* Try to enrich recent conversation memories with pending RAG metadata
|
|
@@ -2008,12 +2727,12 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2008
2727
|
}
|
|
2009
2728
|
}
|
|
2010
2729
|
} catch (error) {
|
|
2011
|
-
|
|
2730
|
+
logger6.warn(`Error enriching recent memories with RAG data: ${error.message}`);
|
|
2012
2731
|
}
|
|
2013
2732
|
}
|
|
2014
2733
|
async processCharacterKnowledge(items) {
|
|
2015
|
-
await new Promise((
|
|
2016
|
-
|
|
2734
|
+
await new Promise((resolve2) => setTimeout(resolve2, 1e3));
|
|
2735
|
+
logger6.info(
|
|
2017
2736
|
`KnowledgeService: Processing ${items.length} character knowledge items for agent ${this.runtime.agentId}`
|
|
2018
2737
|
);
|
|
2019
2738
|
const processingPromises = items.map(async (item) => {
|
|
@@ -2026,12 +2745,12 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2026
2745
|
// A constant identifier for character knowledge
|
|
2027
2746
|
});
|
|
2028
2747
|
if (await this.checkExistingKnowledge(knowledgeId)) {
|
|
2029
|
-
|
|
2748
|
+
logger6.debug(
|
|
2030
2749
|
`KnowledgeService: Character knowledge item with ID ${knowledgeId} already exists. Skipping.`
|
|
2031
2750
|
);
|
|
2032
2751
|
return;
|
|
2033
2752
|
}
|
|
2034
|
-
|
|
2753
|
+
logger6.debug(
|
|
2035
2754
|
`KnowledgeService: Processing character knowledge for ${this.runtime.character?.name} - ${item.slice(0, 100)}`
|
|
2036
2755
|
);
|
|
2037
2756
|
let metadata = {
|
|
@@ -2082,7 +2801,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2082
2801
|
}
|
|
2083
2802
|
});
|
|
2084
2803
|
await Promise.all(processingPromises);
|
|
2085
|
-
|
|
2804
|
+
logger6.info(
|
|
2086
2805
|
`KnowledgeService: Finished processing character knowledge for agent ${this.runtime.agentId}.`
|
|
2087
2806
|
);
|
|
2088
2807
|
}
|
|
@@ -2102,7 +2821,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2102
2821
|
worldId: scope?.worldId ?? this.runtime.agentId,
|
|
2103
2822
|
entityId: scope?.entityId ?? this.runtime.agentId
|
|
2104
2823
|
};
|
|
2105
|
-
|
|
2824
|
+
logger6.debug(`KnowledgeService: _internalAddKnowledge called for item ID ${item.id}`);
|
|
2106
2825
|
const documentMemory = {
|
|
2107
2826
|
id: item.id,
|
|
2108
2827
|
// This ID should be the unique ID for the document being added.
|
|
@@ -2124,7 +2843,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2124
2843
|
};
|
|
2125
2844
|
const existingDocument = await this.runtime.getMemoryById(item.id);
|
|
2126
2845
|
if (existingDocument) {
|
|
2127
|
-
|
|
2846
|
+
logger6.debug(
|
|
2128
2847
|
`KnowledgeService: Document ${item.id} already exists in _internalAddKnowledge, updating...`
|
|
2129
2848
|
);
|
|
2130
2849
|
await this.runtime.updateMemory({
|
|
@@ -2148,13 +2867,13 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2148
2867
|
await this.processDocumentFragment(fragment);
|
|
2149
2868
|
fragmentsProcessed++;
|
|
2150
2869
|
} catch (error) {
|
|
2151
|
-
|
|
2870
|
+
logger6.error(
|
|
2152
2871
|
`KnowledgeService: Error processing fragment ${fragment.id} for document ${item.id}:`,
|
|
2153
2872
|
error
|
|
2154
2873
|
);
|
|
2155
2874
|
}
|
|
2156
2875
|
}
|
|
2157
|
-
|
|
2876
|
+
logger6.debug(
|
|
2158
2877
|
`KnowledgeService: Processed ${fragmentsProcessed}/${fragments.length} fragments for document ${item.id}.`
|
|
2159
2878
|
);
|
|
2160
2879
|
}
|
|
@@ -2163,7 +2882,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2163
2882
|
await this.runtime.addEmbeddingToMemory(fragment);
|
|
2164
2883
|
await this.runtime.createMemory(fragment, "knowledge");
|
|
2165
2884
|
} catch (error) {
|
|
2166
|
-
|
|
2885
|
+
logger6.error(
|
|
2167
2886
|
`KnowledgeService: Error processing fragment ${fragment.id}:`,
|
|
2168
2887
|
error instanceof Error ? error.message : String(error)
|
|
2169
2888
|
);
|
|
@@ -2228,7 +2947,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2228
2947
|
*/
|
|
2229
2948
|
async deleteMemory(memoryId) {
|
|
2230
2949
|
await this.runtime.deleteMemory(memoryId);
|
|
2231
|
-
|
|
2950
|
+
logger6.info(
|
|
2232
2951
|
`KnowledgeService: Deleted memory ${memoryId} for agent ${this.runtime.agentId}. Assumed it was a document or related fragment.`
|
|
2233
2952
|
);
|
|
2234
2953
|
}
|
|
@@ -2236,7 +2955,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2236
2955
|
};
|
|
2237
2956
|
|
|
2238
2957
|
// src/provider.ts
|
|
2239
|
-
import { addHeader, logger as
|
|
2958
|
+
import { addHeader, logger as logger7 } from "@elizaos/core";
|
|
2240
2959
|
var knowledgeProvider = {
|
|
2241
2960
|
name: "KNOWLEDGE",
|
|
2242
2961
|
description: "Knowledge from the knowledge base that the agent knows, retrieved whenever the agent needs to answer a question about their expertise.",
|
|
@@ -2274,11 +2993,11 @@ var knowledgeProvider = {
|
|
|
2274
2993
|
try {
|
|
2275
2994
|
await knowledgeService.enrichRecentMemoriesWithPendingRAG();
|
|
2276
2995
|
} catch (error) {
|
|
2277
|
-
|
|
2996
|
+
logger7.warn("RAG memory enrichment failed:", error.message);
|
|
2278
2997
|
}
|
|
2279
2998
|
}, 2e3);
|
|
2280
2999
|
} catch (error) {
|
|
2281
|
-
|
|
3000
|
+
logger7.warn("RAG memory enrichment failed:", error.message);
|
|
2282
3001
|
}
|
|
2283
3002
|
}
|
|
2284
3003
|
return {
|
|
@@ -2305,9 +3024,9 @@ var knowledgeProvider = {
|
|
|
2305
3024
|
|
|
2306
3025
|
// src/tests.ts
|
|
2307
3026
|
import { MemoryType as MemoryType3, ModelType as ModelType3 } from "@elizaos/core";
|
|
2308
|
-
import { Buffer as
|
|
2309
|
-
import * as
|
|
2310
|
-
import * as
|
|
3027
|
+
import { Buffer as Buffer3 } from "buffer";
|
|
3028
|
+
import * as fs2 from "fs";
|
|
3029
|
+
import * as path2 from "path";
|
|
2311
3030
|
var mockLogger = {
|
|
2312
3031
|
info: (() => {
|
|
2313
3032
|
const fn = (...args) => {
|
|
@@ -2704,9 +3423,9 @@ trailer
|
|
|
2704
3423
|
startxref
|
|
2705
3424
|
${465 + content.length}
|
|
2706
3425
|
%%EOF`;
|
|
2707
|
-
return
|
|
3426
|
+
return Buffer3.from(pdfContent);
|
|
2708
3427
|
}
|
|
2709
|
-
return
|
|
3428
|
+
return Buffer3.from(content, "utf-8");
|
|
2710
3429
|
}
|
|
2711
3430
|
var KnowledgeTestSuite = class {
|
|
2712
3431
|
name = "knowledge";
|
|
@@ -2719,10 +3438,10 @@ var KnowledgeTestSuite = class {
|
|
|
2719
3438
|
const originalEnv = { ...process.env };
|
|
2720
3439
|
delete process.env.KNOWLEDGE_PATH;
|
|
2721
3440
|
try {
|
|
2722
|
-
const docsPath =
|
|
2723
|
-
const docsExists =
|
|
3441
|
+
const docsPath = path2.join(process.cwd(), "docs");
|
|
3442
|
+
const docsExists = fs2.existsSync(docsPath);
|
|
2724
3443
|
if (!docsExists) {
|
|
2725
|
-
|
|
3444
|
+
fs2.mkdirSync(docsPath, { recursive: true });
|
|
2726
3445
|
}
|
|
2727
3446
|
await index_default.init({}, runtime);
|
|
2728
3447
|
const errorCalls = mockLogger.error.calls;
|
|
@@ -2730,7 +3449,7 @@ var KnowledgeTestSuite = class {
|
|
|
2730
3449
|
throw new Error(`Unexpected error during init: ${errorCalls[0]}`);
|
|
2731
3450
|
}
|
|
2732
3451
|
if (!docsExists) {
|
|
2733
|
-
|
|
3452
|
+
fs2.rmSync(docsPath, { recursive: true, force: true });
|
|
2734
3453
|
}
|
|
2735
3454
|
} finally {
|
|
2736
3455
|
process.env = originalEnv;
|
|
@@ -2743,13 +3462,13 @@ var KnowledgeTestSuite = class {
|
|
|
2743
3462
|
const originalEnv = { ...process.env };
|
|
2744
3463
|
delete process.env.KNOWLEDGE_PATH;
|
|
2745
3464
|
try {
|
|
2746
|
-
const docsPath =
|
|
2747
|
-
if (
|
|
2748
|
-
|
|
3465
|
+
const docsPath = path2.join(process.cwd(), "docs");
|
|
3466
|
+
if (fs2.existsSync(docsPath)) {
|
|
3467
|
+
fs2.renameSync(docsPath, docsPath + ".backup");
|
|
2749
3468
|
}
|
|
2750
3469
|
await index_default.init({}, runtime);
|
|
2751
|
-
if (
|
|
2752
|
-
|
|
3470
|
+
if (fs2.existsSync(docsPath + ".backup")) {
|
|
3471
|
+
fs2.renameSync(docsPath + ".backup", docsPath);
|
|
2753
3472
|
}
|
|
2754
3473
|
} finally {
|
|
2755
3474
|
process.env = originalEnv;
|
|
@@ -2790,7 +3509,7 @@ var KnowledgeTestSuite = class {
|
|
|
2790
3509
|
{
|
|
2791
3510
|
name: "Should handle empty file buffer",
|
|
2792
3511
|
fn: async (runtime) => {
|
|
2793
|
-
const emptyBuffer =
|
|
3512
|
+
const emptyBuffer = Buffer3.alloc(0);
|
|
2794
3513
|
try {
|
|
2795
3514
|
await extractTextFromDocument(emptyBuffer, "text/plain", "empty.txt");
|
|
2796
3515
|
throw new Error("Should have thrown error for empty buffer");
|
|
@@ -2998,7 +3717,7 @@ var KnowledgeTestSuite = class {
|
|
|
2998
3717
|
}
|
|
2999
3718
|
});
|
|
3000
3719
|
const service = await KnowledgeService.start(knowledgeRuntime);
|
|
3001
|
-
await new Promise((
|
|
3720
|
+
await new Promise((resolve2) => setTimeout(resolve2, 2e3));
|
|
3002
3721
|
const memories = await knowledgeRuntime.getMemories({
|
|
3003
3722
|
tableName: "documents",
|
|
3004
3723
|
entityId: knowledgeRuntime.agentId
|
|
@@ -3190,9 +3909,9 @@ var KnowledgeTestSuite = class {
|
|
|
3190
3909
|
var tests_default = new KnowledgeTestSuite();
|
|
3191
3910
|
|
|
3192
3911
|
// src/actions.ts
|
|
3193
|
-
import { logger as
|
|
3194
|
-
import * as
|
|
3195
|
-
import * as
|
|
3912
|
+
import { logger as logger8, stringToUuid } from "@elizaos/core";
|
|
3913
|
+
import * as fs3 from "fs";
|
|
3914
|
+
import * as path3 from "path";
|
|
3196
3915
|
var processKnowledgeAction = {
|
|
3197
3916
|
name: "PROCESS_KNOWLEDGE",
|
|
3198
3917
|
description: "Process and store knowledge from a file path or text content into the knowledge base",
|
|
@@ -3248,7 +3967,7 @@ var processKnowledgeAction = {
|
|
|
3248
3967
|
const hasPath = pathPattern.test(text);
|
|
3249
3968
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3250
3969
|
if (!service) {
|
|
3251
|
-
|
|
3970
|
+
logger8.warn("Knowledge service not available for PROCESS_KNOWLEDGE action");
|
|
3252
3971
|
return false;
|
|
3253
3972
|
}
|
|
3254
3973
|
return hasKeyword || hasPath;
|
|
@@ -3265,7 +3984,7 @@ var processKnowledgeAction = {
|
|
|
3265
3984
|
let response;
|
|
3266
3985
|
if (pathMatch) {
|
|
3267
3986
|
const filePath = pathMatch[0];
|
|
3268
|
-
if (!
|
|
3987
|
+
if (!fs3.existsSync(filePath)) {
|
|
3269
3988
|
response = {
|
|
3270
3989
|
text: `I couldn't find the file at ${filePath}. Please check the path and try again.`
|
|
3271
3990
|
};
|
|
@@ -3274,9 +3993,9 @@ var processKnowledgeAction = {
|
|
|
3274
3993
|
}
|
|
3275
3994
|
return;
|
|
3276
3995
|
}
|
|
3277
|
-
const fileBuffer =
|
|
3278
|
-
const fileName =
|
|
3279
|
-
const fileExt =
|
|
3996
|
+
const fileBuffer = fs3.readFileSync(filePath);
|
|
3997
|
+
const fileName = path3.basename(filePath);
|
|
3998
|
+
const fileExt = path3.extname(filePath).toLowerCase();
|
|
3280
3999
|
let contentType = "text/plain";
|
|
3281
4000
|
if (fileExt === ".pdf") contentType = "application/pdf";
|
|
3282
4001
|
else if (fileExt === ".docx")
|
|
@@ -3326,7 +4045,7 @@ var processKnowledgeAction = {
|
|
|
3326
4045
|
await callback(response);
|
|
3327
4046
|
}
|
|
3328
4047
|
} catch (error) {
|
|
3329
|
-
|
|
4048
|
+
logger8.error("Error in PROCESS_KNOWLEDGE action:", error);
|
|
3330
4049
|
const errorResponse = {
|
|
3331
4050
|
text: `I encountered an error while processing the knowledge: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
3332
4051
|
};
|
|
@@ -3417,7 +4136,7 @@ ${formattedResults}`
|
|
|
3417
4136
|
await callback(response);
|
|
3418
4137
|
}
|
|
3419
4138
|
} catch (error) {
|
|
3420
|
-
|
|
4139
|
+
logger8.error("Error in SEARCH_KNOWLEDGE action:", error);
|
|
3421
4140
|
const errorResponse = {
|
|
3422
4141
|
text: `I encountered an error while searching the knowledge base: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
3423
4142
|
};
|
|
@@ -3430,9 +4149,9 @@ ${formattedResults}`
|
|
|
3430
4149
|
var knowledgeActions = [processKnowledgeAction, searchKnowledgeAction];
|
|
3431
4150
|
|
|
3432
4151
|
// src/routes.ts
|
|
3433
|
-
import { createUniqueUuid as createUniqueUuid2, logger as
|
|
3434
|
-
import
|
|
3435
|
-
import
|
|
4152
|
+
import { createUniqueUuid as createUniqueUuid2, logger as logger9, ModelType as ModelType4 } from "@elizaos/core";
|
|
4153
|
+
import fs4 from "fs";
|
|
4154
|
+
import path4 from "path";
|
|
3436
4155
|
import multer from "multer";
|
|
3437
4156
|
var createUploadMiddleware = (runtime) => {
|
|
3438
4157
|
const uploadDir = runtime.getSetting("KNOWLEDGE_UPLOAD_DIR") || "/tmp/uploads/";
|
|
@@ -3477,11 +4196,11 @@ function sendError(res, status, code, message, details) {
|
|
|
3477
4196
|
res.end(JSON.stringify({ success: false, error: { code, message, details } }));
|
|
3478
4197
|
}
|
|
3479
4198
|
var cleanupFile = (filePath) => {
|
|
3480
|
-
if (filePath &&
|
|
4199
|
+
if (filePath && fs4.existsSync(filePath)) {
|
|
3481
4200
|
try {
|
|
3482
|
-
|
|
4201
|
+
fs4.unlinkSync(filePath);
|
|
3483
4202
|
} catch (error) {
|
|
3484
|
-
|
|
4203
|
+
logger9.error(`Error cleaning up file ${filePath}:`, error);
|
|
3485
4204
|
}
|
|
3486
4205
|
}
|
|
3487
4206
|
};
|
|
@@ -3508,15 +4227,15 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3508
4227
|
}
|
|
3509
4228
|
const invalidFiles = files.filter((file) => {
|
|
3510
4229
|
if (file.size === 0) {
|
|
3511
|
-
|
|
4230
|
+
logger9.warn(`File ${file.originalname} is empty`);
|
|
3512
4231
|
return true;
|
|
3513
4232
|
}
|
|
3514
4233
|
if (!file.originalname || file.originalname.trim() === "") {
|
|
3515
|
-
|
|
4234
|
+
logger9.warn(`File has no name`);
|
|
3516
4235
|
return true;
|
|
3517
4236
|
}
|
|
3518
4237
|
if (!file.path) {
|
|
3519
|
-
|
|
4238
|
+
logger9.warn(`File ${file.originalname} has no path`);
|
|
3520
4239
|
return true;
|
|
3521
4240
|
}
|
|
3522
4241
|
return false;
|
|
@@ -3533,7 +4252,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3533
4252
|
}
|
|
3534
4253
|
const agentId = req.body.agentId || req.query.agentId;
|
|
3535
4254
|
if (!agentId) {
|
|
3536
|
-
|
|
4255
|
+
logger9.error("[Document Processor] \u274C No agent ID provided in upload request");
|
|
3537
4256
|
return sendError(
|
|
3538
4257
|
res,
|
|
3539
4258
|
400,
|
|
@@ -3542,15 +4261,15 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3542
4261
|
);
|
|
3543
4262
|
}
|
|
3544
4263
|
const worldId = req.body.worldId || agentId;
|
|
3545
|
-
|
|
4264
|
+
logger9.info(`[Document Processor] \u{1F4E4} Processing file upload for agent: ${agentId}`);
|
|
3546
4265
|
const processingPromises = files.map(async (file, index) => {
|
|
3547
4266
|
const originalFilename = file.originalname;
|
|
3548
4267
|
const filePath = file.path;
|
|
3549
|
-
|
|
4268
|
+
logger9.debug(
|
|
3550
4269
|
`[Document Processor] \u{1F4C4} Processing file: ${originalFilename} (agent: ${agentId})`
|
|
3551
4270
|
);
|
|
3552
4271
|
try {
|
|
3553
|
-
const fileBuffer = await
|
|
4272
|
+
const fileBuffer = await fs4.promises.readFile(filePath);
|
|
3554
4273
|
const base64Content = fileBuffer.toString("base64");
|
|
3555
4274
|
const addKnowledgeOpts = {
|
|
3556
4275
|
agentId,
|
|
@@ -3581,7 +4300,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3581
4300
|
status: "success"
|
|
3582
4301
|
};
|
|
3583
4302
|
} catch (fileError) {
|
|
3584
|
-
|
|
4303
|
+
logger9.error(
|
|
3585
4304
|
`[Document Processor] \u274C Error processing file ${file.originalname}:`,
|
|
3586
4305
|
fileError
|
|
3587
4306
|
);
|
|
@@ -3604,7 +4323,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3604
4323
|
}
|
|
3605
4324
|
const agentId = req.body.agentId || req.query.agentId;
|
|
3606
4325
|
if (!agentId) {
|
|
3607
|
-
|
|
4326
|
+
logger9.error("[Document Processor] \u274C No agent ID provided in URL request");
|
|
3608
4327
|
return sendError(
|
|
3609
4328
|
res,
|
|
3610
4329
|
400,
|
|
@@ -3612,7 +4331,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3612
4331
|
"Agent ID is required for uploading knowledge from URLs"
|
|
3613
4332
|
);
|
|
3614
4333
|
}
|
|
3615
|
-
|
|
4334
|
+
logger9.info(`[Document Processor] \u{1F4E4} Processing URL upload for agent: ${agentId}`);
|
|
3616
4335
|
const processingPromises = fileUrls.map(async (fileUrl) => {
|
|
3617
4336
|
try {
|
|
3618
4337
|
const normalizedUrl = normalizeS3Url(fileUrl);
|
|
@@ -3620,7 +4339,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3620
4339
|
const pathSegments = urlObject.pathname.split("/");
|
|
3621
4340
|
const encodedFilename = pathSegments[pathSegments.length - 1] || "document.pdf";
|
|
3622
4341
|
const originalFilename = decodeURIComponent(encodedFilename);
|
|
3623
|
-
|
|
4342
|
+
logger9.debug(`[Document Processor] \u{1F310} Fetching content from URL: ${fileUrl}`);
|
|
3624
4343
|
const { content, contentType: fetchedContentType } = await fetchUrlContent(fileUrl);
|
|
3625
4344
|
let contentType = fetchedContentType;
|
|
3626
4345
|
if (contentType === "application/octet-stream") {
|
|
@@ -3660,7 +4379,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3660
4379
|
url: normalizedUrl
|
|
3661
4380
|
}
|
|
3662
4381
|
};
|
|
3663
|
-
|
|
4382
|
+
logger9.debug(
|
|
3664
4383
|
`[Document Processor] \u{1F4C4} Processing knowledge from URL: ${originalFilename} (type: ${contentType})`
|
|
3665
4384
|
);
|
|
3666
4385
|
const result = await service.addKnowledge(addKnowledgeOpts);
|
|
@@ -3675,7 +4394,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3675
4394
|
status: "success"
|
|
3676
4395
|
};
|
|
3677
4396
|
} catch (urlError) {
|
|
3678
|
-
|
|
4397
|
+
logger9.error(`[Document Processor] \u274C Error processing URL ${fileUrl}:`, urlError);
|
|
3679
4398
|
return {
|
|
3680
4399
|
fileUrl,
|
|
3681
4400
|
status: "error_processing",
|
|
@@ -3687,7 +4406,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3687
4406
|
sendSuccess(res, results);
|
|
3688
4407
|
}
|
|
3689
4408
|
} catch (error) {
|
|
3690
|
-
|
|
4409
|
+
logger9.error("[Document Processor] \u274C Error processing knowledge:", error);
|
|
3691
4410
|
if (hasUploadedFiles) {
|
|
3692
4411
|
cleanupFiles(req.files);
|
|
3693
4412
|
}
|
|
@@ -3726,7 +4445,7 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
|
|
|
3726
4445
|
// Or if the URL is stored in the metadata (check if it exists)
|
|
3727
4446
|
memory.metadata && "url" in memory.metadata && typeof memory.metadata.url === "string" && normalizedRequestUrls.includes(normalizeS3Url(memory.metadata.url))
|
|
3728
4447
|
);
|
|
3729
|
-
|
|
4448
|
+
logger9.debug(
|
|
3730
4449
|
`[Document Processor] \u{1F50D} Filtered documents by URLs: ${fileUrls.length} URLs, found ${filteredMemories.length} matching documents`
|
|
3731
4450
|
);
|
|
3732
4451
|
}
|
|
@@ -3741,12 +4460,12 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
|
|
|
3741
4460
|
totalRequested: fileUrls ? fileUrls.length : 0
|
|
3742
4461
|
});
|
|
3743
4462
|
} catch (error) {
|
|
3744
|
-
|
|
4463
|
+
logger9.error("[Document Processor] \u274C Error retrieving documents:", error);
|
|
3745
4464
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve documents", error.message);
|
|
3746
4465
|
}
|
|
3747
4466
|
}
|
|
3748
4467
|
async function deleteKnowledgeDocumentHandler(req, res, runtime) {
|
|
3749
|
-
|
|
4468
|
+
logger9.debug(`[Document Processor] \u{1F5D1}\uFE0F DELETE request for document: ${req.params.knowledgeId}`);
|
|
3750
4469
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3751
4470
|
if (!service) {
|
|
3752
4471
|
return sendError(
|
|
@@ -3758,22 +4477,22 @@ async function deleteKnowledgeDocumentHandler(req, res, runtime) {
|
|
|
3758
4477
|
}
|
|
3759
4478
|
const knowledgeId = req.params.knowledgeId;
|
|
3760
4479
|
if (!knowledgeId || knowledgeId.length < 36) {
|
|
3761
|
-
|
|
4480
|
+
logger9.error(`[Document Processor] \u274C Invalid knowledge ID format: ${knowledgeId}`);
|
|
3762
4481
|
return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
|
|
3763
4482
|
}
|
|
3764
4483
|
try {
|
|
3765
4484
|
const typedKnowledgeId = knowledgeId;
|
|
3766
|
-
|
|
4485
|
+
logger9.debug(`[Document Processor] \u{1F5D1}\uFE0F Deleting document: ${typedKnowledgeId}`);
|
|
3767
4486
|
await service.deleteMemory(typedKnowledgeId);
|
|
3768
|
-
|
|
4487
|
+
logger9.info(`[Document Processor] \u2705 Successfully deleted document: ${typedKnowledgeId}`);
|
|
3769
4488
|
sendSuccess(res, null, 204);
|
|
3770
4489
|
} catch (error) {
|
|
3771
|
-
|
|
4490
|
+
logger9.error(`[Document Processor] \u274C Error deleting document ${knowledgeId}:`, error);
|
|
3772
4491
|
sendError(res, 500, "DELETE_ERROR", "Failed to delete document", error.message);
|
|
3773
4492
|
}
|
|
3774
4493
|
}
|
|
3775
4494
|
async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
3776
|
-
|
|
4495
|
+
logger9.debug(`[Document Processor] \u{1F50D} GET request for document: ${req.params.knowledgeId}`);
|
|
3777
4496
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3778
4497
|
if (!service) {
|
|
3779
4498
|
return sendError(
|
|
@@ -3785,11 +4504,11 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
|
3785
4504
|
}
|
|
3786
4505
|
const knowledgeId = req.params.knowledgeId;
|
|
3787
4506
|
if (!knowledgeId || knowledgeId.length < 36) {
|
|
3788
|
-
|
|
4507
|
+
logger9.error(`[Document Processor] \u274C Invalid knowledge ID format: ${knowledgeId}`);
|
|
3789
4508
|
return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
|
|
3790
4509
|
}
|
|
3791
4510
|
try {
|
|
3792
|
-
|
|
4511
|
+
logger9.debug(`[Document Processor] \u{1F50D} Retrieving document: ${knowledgeId}`);
|
|
3793
4512
|
const agentId = req.query.agentId;
|
|
3794
4513
|
const memories = await service.getMemories({
|
|
3795
4514
|
tableName: "documents",
|
|
@@ -3806,19 +4525,19 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
|
3806
4525
|
};
|
|
3807
4526
|
sendSuccess(res, { document: cleanDocument });
|
|
3808
4527
|
} catch (error) {
|
|
3809
|
-
|
|
4528
|
+
logger9.error(`[Document Processor] \u274C Error retrieving document ${knowledgeId}:`, error);
|
|
3810
4529
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve document", error.message);
|
|
3811
4530
|
}
|
|
3812
4531
|
}
|
|
3813
4532
|
async function knowledgePanelHandler(req, res, runtime) {
|
|
3814
4533
|
const agentId = runtime.agentId;
|
|
3815
|
-
|
|
4534
|
+
logger9.debug(`[Document Processor] \u{1F310} Serving knowledge panel for agent ${agentId}`);
|
|
3816
4535
|
try {
|
|
3817
|
-
const currentDir =
|
|
3818
|
-
const frontendPath =
|
|
3819
|
-
|
|
3820
|
-
if (
|
|
3821
|
-
const html = await
|
|
4536
|
+
const currentDir = path4.dirname(new URL(import.meta.url).pathname);
|
|
4537
|
+
const frontendPath = path4.join(currentDir, "../dist/index.html");
|
|
4538
|
+
logger9.debug(`[Document Processor] \u{1F310} Looking for frontend at: ${frontendPath}`);
|
|
4539
|
+
if (fs4.existsSync(frontendPath)) {
|
|
4540
|
+
const html = await fs4.promises.readFile(frontendPath, "utf8");
|
|
3822
4541
|
const injectedHtml = html.replace(
|
|
3823
4542
|
"<head>",
|
|
3824
4543
|
`<head>
|
|
@@ -3834,10 +4553,10 @@ async function knowledgePanelHandler(req, res, runtime) {
|
|
|
3834
4553
|
} else {
|
|
3835
4554
|
let cssFile = "index.css";
|
|
3836
4555
|
let jsFile = "index.js";
|
|
3837
|
-
const manifestPath =
|
|
3838
|
-
if (
|
|
4556
|
+
const manifestPath = path4.join(currentDir, "../dist/manifest.json");
|
|
4557
|
+
if (fs4.existsSync(manifestPath)) {
|
|
3839
4558
|
try {
|
|
3840
|
-
const manifestContent = await
|
|
4559
|
+
const manifestContent = await fs4.promises.readFile(manifestPath, "utf8");
|
|
3841
4560
|
const manifest = JSON.parse(manifestContent);
|
|
3842
4561
|
for (const [key, value] of Object.entries(manifest)) {
|
|
3843
4562
|
if (typeof value === "object" && value !== null) {
|
|
@@ -3850,10 +4569,10 @@ async function knowledgePanelHandler(req, res, runtime) {
|
|
|
3850
4569
|
}
|
|
3851
4570
|
}
|
|
3852
4571
|
} catch (manifestError) {
|
|
3853
|
-
|
|
4572
|
+
logger9.error("[Document Processor] \u274C Error reading manifest:", manifestError);
|
|
3854
4573
|
}
|
|
3855
4574
|
}
|
|
3856
|
-
|
|
4575
|
+
logger9.debug(`[Document Processor] \u{1F310} Using fallback with CSS: ${cssFile}, JS: ${jsFile}`);
|
|
3857
4576
|
const html = `
|
|
3858
4577
|
<!DOCTYPE html>
|
|
3859
4578
|
<html lang="en">
|
|
@@ -3887,14 +4606,14 @@ async function knowledgePanelHandler(req, res, runtime) {
|
|
|
3887
4606
|
res.end(html);
|
|
3888
4607
|
}
|
|
3889
4608
|
} catch (error) {
|
|
3890
|
-
|
|
4609
|
+
logger9.error("[Document Processor] \u274C Error serving frontend:", error);
|
|
3891
4610
|
sendError(res, 500, "FRONTEND_ERROR", "Failed to load knowledge panel", error.message);
|
|
3892
4611
|
}
|
|
3893
4612
|
}
|
|
3894
4613
|
async function frontendAssetHandler(req, res, runtime) {
|
|
3895
4614
|
try {
|
|
3896
|
-
|
|
3897
|
-
const currentDir =
|
|
4615
|
+
logger9.debug(`[Document Processor] \u{1F310} Asset request: ${req.path}`);
|
|
4616
|
+
const currentDir = path4.dirname(new URL(import.meta.url).pathname);
|
|
3898
4617
|
const assetRequestPath = req.path;
|
|
3899
4618
|
const assetsMarker = "/assets/";
|
|
3900
4619
|
const assetsStartIndex = assetRequestPath.indexOf(assetsMarker);
|
|
@@ -3910,10 +4629,10 @@ async function frontendAssetHandler(req, res, runtime) {
|
|
|
3910
4629
|
`Invalid asset name: '${assetName}' from path ${assetRequestPath}`
|
|
3911
4630
|
);
|
|
3912
4631
|
}
|
|
3913
|
-
const assetPath =
|
|
3914
|
-
|
|
3915
|
-
if (
|
|
3916
|
-
const fileStream =
|
|
4632
|
+
const assetPath = path4.join(currentDir, "../dist/assets", assetName);
|
|
4633
|
+
logger9.debug(`[Document Processor] \u{1F310} Serving asset: ${assetPath}`);
|
|
4634
|
+
if (fs4.existsSync(assetPath)) {
|
|
4635
|
+
const fileStream = fs4.createReadStream(assetPath);
|
|
3917
4636
|
let contentType = "application/octet-stream";
|
|
3918
4637
|
if (assetPath.endsWith(".js")) {
|
|
3919
4638
|
contentType = "application/javascript";
|
|
@@ -3926,7 +4645,7 @@ async function frontendAssetHandler(req, res, runtime) {
|
|
|
3926
4645
|
sendError(res, 404, "NOT_FOUND", `Asset not found: ${req.url}`);
|
|
3927
4646
|
}
|
|
3928
4647
|
} catch (error) {
|
|
3929
|
-
|
|
4648
|
+
logger9.error(`[Document Processor] \u274C Error serving asset ${req.url}:`, error);
|
|
3930
4649
|
sendError(res, 500, "ASSET_ERROR", `Failed to load asset ${req.url}`, error.message);
|
|
3931
4650
|
}
|
|
3932
4651
|
}
|
|
@@ -3987,7 +4706,7 @@ async function getKnowledgeChunksHandler(req, res, runtime) {
|
|
|
3987
4706
|
}
|
|
3988
4707
|
});
|
|
3989
4708
|
} catch (error) {
|
|
3990
|
-
|
|
4709
|
+
logger9.error("[Document Processor] \u274C Error retrieving chunks:", error);
|
|
3991
4710
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve knowledge chunks", error.message);
|
|
3992
4711
|
}
|
|
3993
4712
|
}
|
|
@@ -4009,14 +4728,14 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
4009
4728
|
return sendError(res, 400, "INVALID_QUERY", "Search query cannot be empty");
|
|
4010
4729
|
}
|
|
4011
4730
|
if (req.query.threshold && (parsedThreshold < 0 || parsedThreshold > 1)) {
|
|
4012
|
-
|
|
4731
|
+
logger9.debug(
|
|
4013
4732
|
`[Document Processor] \u{1F50D} Threshold value ${parsedThreshold} was clamped to ${matchThreshold}`
|
|
4014
4733
|
);
|
|
4015
4734
|
}
|
|
4016
4735
|
if (req.query.limit && (parsedLimit < 1 || parsedLimit > 100)) {
|
|
4017
|
-
|
|
4736
|
+
logger9.debug(`[Document Processor] \u{1F50D} Limit value ${parsedLimit} was clamped to ${limit}`);
|
|
4018
4737
|
}
|
|
4019
|
-
|
|
4738
|
+
logger9.debug(
|
|
4020
4739
|
`[Document Processor] \u{1F50D} Searching: "${searchText}" (threshold: ${matchThreshold}, limit: ${limit})`
|
|
4021
4740
|
);
|
|
4022
4741
|
const embedding = await runtime.useModel(ModelType4.TEXT_EMBEDDING, {
|
|
@@ -4043,7 +4762,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
4043
4762
|
documentFilename = document.metadata.filename || documentFilename;
|
|
4044
4763
|
}
|
|
4045
4764
|
} catch (e) {
|
|
4046
|
-
|
|
4765
|
+
logger9.debug(`Could not fetch document ${documentId} for fragment`);
|
|
4047
4766
|
}
|
|
4048
4767
|
}
|
|
4049
4768
|
return {
|
|
@@ -4058,7 +4777,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
4058
4777
|
};
|
|
4059
4778
|
})
|
|
4060
4779
|
);
|
|
4061
|
-
|
|
4780
|
+
logger9.info(
|
|
4062
4781
|
`[Document Processor] \u{1F50D} Found ${enhancedResults.length} results for: "${searchText}"`
|
|
4063
4782
|
);
|
|
4064
4783
|
sendSuccess(res, {
|
|
@@ -4068,7 +4787,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
4068
4787
|
count: enhancedResults.length
|
|
4069
4788
|
});
|
|
4070
4789
|
} catch (error) {
|
|
4071
|
-
|
|
4790
|
+
logger9.error("[Document Processor] \u274C Error searching knowledge:", error);
|
|
4072
4791
|
sendError(res, 500, "SEARCH_ERROR", "Failed to search knowledge", error.message);
|
|
4073
4792
|
}
|
|
4074
4793
|
}
|
|
@@ -4080,7 +4799,7 @@ async function uploadKnowledgeWithMulter(req, res, runtime) {
|
|
|
4080
4799
|
);
|
|
4081
4800
|
uploadArray(req, res, (err) => {
|
|
4082
4801
|
if (err) {
|
|
4083
|
-
|
|
4802
|
+
logger9.error("[Document Processor] \u274C File upload error:", err);
|
|
4084
4803
|
return sendError(res, 400, "UPLOAD_ERROR", err.message);
|
|
4085
4804
|
}
|
|
4086
4805
|
uploadKnowledgeHandler(req, res, runtime);
|
|
@@ -4135,97 +4854,6 @@ var knowledgeRoutes = [
|
|
|
4135
4854
|
var knowledgePlugin = {
|
|
4136
4855
|
name: "knowledge",
|
|
4137
4856
|
description: "Plugin for Retrieval Augmented Generation, including knowledge management and embedding.",
|
|
4138
|
-
config: {
|
|
4139
|
-
// Token limits - these will be read from runtime settings during init
|
|
4140
|
-
MAX_INPUT_TOKENS: "4000",
|
|
4141
|
-
MAX_OUTPUT_TOKENS: "4096",
|
|
4142
|
-
// Contextual Knowledge settings
|
|
4143
|
-
CTX_KNOWLEDGE_ENABLED: "false"
|
|
4144
|
-
},
|
|
4145
|
-
async init(config, runtime) {
|
|
4146
|
-
logger8.info("Initializing Knowledge Plugin...");
|
|
4147
|
-
try {
|
|
4148
|
-
logger8.info("Validating model configuration for Knowledge plugin...");
|
|
4149
|
-
logger8.info(`[Knowledge Plugin] INIT DEBUG:`);
|
|
4150
|
-
logger8.info(`[Knowledge Plugin] - Runtime available: ${!!runtime}`);
|
|
4151
|
-
logger8.info(
|
|
4152
|
-
`[Knowledge Plugin] - process.env.CTX_KNOWLEDGE_ENABLED: '${process.env.CTX_KNOWLEDGE_ENABLED}'`
|
|
4153
|
-
);
|
|
4154
|
-
logger8.info(
|
|
4155
|
-
`[Knowledge Plugin] - config.CTX_KNOWLEDGE_ENABLED: '${config.CTX_KNOWLEDGE_ENABLED}'`
|
|
4156
|
-
);
|
|
4157
|
-
if (runtime) {
|
|
4158
|
-
logger8.info(
|
|
4159
|
-
`[Knowledge Plugin] - runtime.getSetting('CTX_KNOWLEDGE_ENABLED'): '${runtime.getSetting("CTX_KNOWLEDGE_ENABLED")}'`
|
|
4160
|
-
);
|
|
4161
|
-
}
|
|
4162
|
-
const validatedConfig = validateModelConfig(runtime);
|
|
4163
|
-
const ctxEnabledFromEnv = process.env.CTX_KNOWLEDGE_ENABLED === "true" || process.env.CTX_KNOWLEDGE_ENABLED === "True";
|
|
4164
|
-
const ctxEnabledFromConfig = config.CTX_KNOWLEDGE_ENABLED === "true" || config.CTX_KNOWLEDGE_ENABLED === "True";
|
|
4165
|
-
const ctxEnabledFromValidated = validatedConfig.CTX_KNOWLEDGE_ENABLED;
|
|
4166
|
-
const ctxEnabledFromRuntime = runtime ? runtime.getSetting("CTX_KNOWLEDGE_ENABLED") === "true" || runtime.getSetting("CTX_KNOWLEDGE_ENABLED") === "True" : false;
|
|
4167
|
-
const finalCtxEnabled = ctxEnabledFromEnv || ctxEnabledFromConfig || ctxEnabledFromValidated || ctxEnabledFromRuntime;
|
|
4168
|
-
logger8.info(`[Knowledge Plugin] CTX_KNOWLEDGE_ENABLED sources:`);
|
|
4169
|
-
logger8.info(`[Knowledge Plugin] - From env: ${ctxEnabledFromEnv}`);
|
|
4170
|
-
logger8.info(`[Knowledge Plugin] - From config: ${ctxEnabledFromConfig}`);
|
|
4171
|
-
logger8.info(`[Knowledge Plugin] - From validated: ${ctxEnabledFromValidated}`);
|
|
4172
|
-
logger8.info(`[Knowledge Plugin] - From runtime: ${ctxEnabledFromRuntime}`);
|
|
4173
|
-
logger8.info(`[Knowledge Plugin] - FINAL RESULT: ${finalCtxEnabled}`);
|
|
4174
|
-
if (finalCtxEnabled) {
|
|
4175
|
-
logger8.info("Running in Contextual Knowledge mode with text generation capabilities.");
|
|
4176
|
-
logger8.info(
|
|
4177
|
-
`Using ${validatedConfig.EMBEDDING_PROVIDER || "auto-detected"} for embeddings and ${validatedConfig.TEXT_PROVIDER || process.env.TEXT_PROVIDER} for text generation.`
|
|
4178
|
-
);
|
|
4179
|
-
logger8.info(`Text model: ${validatedConfig.TEXT_MODEL || process.env.TEXT_MODEL}`);
|
|
4180
|
-
} else {
|
|
4181
|
-
const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
|
|
4182
|
-
logger8.warn(
|
|
4183
|
-
"Running in Basic Embedding mode - documents will NOT be enriched with context!"
|
|
4184
|
-
);
|
|
4185
|
-
logger8.info("To enable contextual enrichment:");
|
|
4186
|
-
logger8.info(" - Set CTX_KNOWLEDGE_ENABLED=true");
|
|
4187
|
-
logger8.info(" - Configure TEXT_PROVIDER (anthropic/openai/openrouter/google)");
|
|
4188
|
-
logger8.info(" - Configure TEXT_MODEL and API key");
|
|
4189
|
-
if (usingPluginOpenAI) {
|
|
4190
|
-
logger8.info("Using auto-detected configuration from plugin-openai for embeddings.");
|
|
4191
|
-
} else {
|
|
4192
|
-
logger8.info(
|
|
4193
|
-
`Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}.`
|
|
4194
|
-
);
|
|
4195
|
-
}
|
|
4196
|
-
}
|
|
4197
|
-
logger8.info("Model configuration validated successfully.");
|
|
4198
|
-
if (runtime) {
|
|
4199
|
-
logger8.info(`Knowledge Plugin initialized for agent: ${runtime.agentId}`);
|
|
4200
|
-
const loadDocsOnStartup = config.LOAD_DOCS_ON_STARTUP === "true" || process.env.LOAD_DOCS_ON_STARTUP === "true";
|
|
4201
|
-
if (loadDocsOnStartup) {
|
|
4202
|
-
logger8.info("LOAD_DOCS_ON_STARTUP is enabled. Scheduling document loading...");
|
|
4203
|
-
setTimeout(async () => {
|
|
4204
|
-
try {
|
|
4205
|
-
const service = runtime.getService(KnowledgeService.serviceType);
|
|
4206
|
-
if (service instanceof KnowledgeService) {
|
|
4207
|
-
const { loadDocsFromPath: loadDocsFromPath2 } = await import("./docs-loader-5H4HRYEE.js");
|
|
4208
|
-
const result = await loadDocsFromPath2(service, runtime.agentId);
|
|
4209
|
-
if (result.successful > 0) {
|
|
4210
|
-
logger8.info(`Loaded ${result.successful} documents from docs folder on startup`);
|
|
4211
|
-
}
|
|
4212
|
-
}
|
|
4213
|
-
} catch (error) {
|
|
4214
|
-
logger8.error("Error loading documents on startup:", error);
|
|
4215
|
-
}
|
|
4216
|
-
}, 5e3);
|
|
4217
|
-
} else {
|
|
4218
|
-
logger8.info("LOAD_DOCS_ON_STARTUP is not enabled. Skipping automatic document loading.");
|
|
4219
|
-
}
|
|
4220
|
-
}
|
|
4221
|
-
logger8.info(
|
|
4222
|
-
"Knowledge Plugin initialized. Frontend panel should be discoverable via its public route."
|
|
4223
|
-
);
|
|
4224
|
-
} catch (error) {
|
|
4225
|
-
logger8.error("Failed to initialize Knowledge plugin:", error);
|
|
4226
|
-
throw error;
|
|
4227
|
-
}
|
|
4228
|
-
},
|
|
4229
4857
|
services: [KnowledgeService],
|
|
4230
4858
|
providers: [knowledgeProvider],
|
|
4231
4859
|
routes: knowledgeRoutes,
|