@elizaos/plugin-knowledge 2.0.0-alpha.5 → 2.0.0-alpha.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/actions.d.ts +5 -0
- package/dist/actions.d.ts.map +1 -0
- package/dist/browser/index.browser.js +191 -0
- package/dist/browser/index.browser.js.map +12 -0
- package/dist/cjs/index.node.cjs +3756 -0
- package/dist/cjs/index.node.cjs.map +32 -0
- package/dist/config.d.ts +5 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/ctx-embeddings.d.ts +53 -0
- package/dist/ctx-embeddings.d.ts.map +1 -0
- package/dist/docs-loader.d.ts +9 -0
- package/dist/docs-loader.d.ts.map +1 -0
- package/dist/document-processor.d.ts +26 -0
- package/dist/document-processor.d.ts.map +1 -0
- package/dist/documents-provider.d.ts +3 -0
- package/dist/documents-provider.d.ts.map +1 -0
- package/dist/index.browser.d.ts +5 -0
- package/dist/index.browser.d.ts.map +1 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.node.d.ts +2 -0
- package/dist/index.node.d.ts.map +1 -0
- package/dist/llm.d.ts +27 -0
- package/dist/llm.d.ts.map +1 -0
- package/dist/node/index.node.js +3724 -0
- package/dist/node/index.node.js.map +32 -0
- package/dist/provider.d.ts +3 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes.d.ts +3 -0
- package/dist/routes.d.ts.map +1 -0
- package/dist/service.d.ts +76 -0
- package/dist/service.d.ts.map +1 -0
- package/dist/types.d.ts +138 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/utils.d.ts +17 -0
- package/dist/utils.d.ts.map +1 -0
- package/package.json +2 -2
|
@@ -0,0 +1,3756 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
2
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __toESM = (mod, isNodeMode, target) => {
|
|
8
|
+
target = mod != null ? __create(__getProtoOf(mod)) : {};
|
|
9
|
+
const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
|
|
10
|
+
for (let key of __getOwnPropNames(mod))
|
|
11
|
+
if (!__hasOwnProp.call(to, key))
|
|
12
|
+
__defProp(to, key, {
|
|
13
|
+
get: () => mod[key],
|
|
14
|
+
enumerable: true
|
|
15
|
+
});
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __moduleCache = /* @__PURE__ */ new WeakMap;
|
|
19
|
+
var __toCommonJS = (from) => {
|
|
20
|
+
var entry = __moduleCache.get(from), desc;
|
|
21
|
+
if (entry)
|
|
22
|
+
return entry;
|
|
23
|
+
entry = __defProp({}, "__esModule", { value: true });
|
|
24
|
+
if (from && typeof from === "object" || typeof from === "function")
|
|
25
|
+
__getOwnPropNames(from).map((key) => !__hasOwnProp.call(entry, key) && __defProp(entry, key, {
|
|
26
|
+
get: () => from[key],
|
|
27
|
+
enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
|
|
28
|
+
}));
|
|
29
|
+
__moduleCache.set(from, entry);
|
|
30
|
+
return entry;
|
|
31
|
+
};
|
|
32
|
+
var __export = (target, all) => {
|
|
33
|
+
for (var name in all)
|
|
34
|
+
__defProp(target, name, {
|
|
35
|
+
get: all[name],
|
|
36
|
+
enumerable: true,
|
|
37
|
+
configurable: true,
|
|
38
|
+
set: (newValue) => all[name] = () => newValue
|
|
39
|
+
});
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
// index.node.ts
|
|
43
|
+
var exports_index_node = {};
|
|
44
|
+
__export(exports_index_node, {
|
|
45
|
+
knowledgeProvider: () => knowledgeProvider,
|
|
46
|
+
knowledgePluginHeadless: () => knowledgePluginHeadless,
|
|
47
|
+
knowledgePluginCore: () => knowledgePluginCore,
|
|
48
|
+
knowledgePlugin: () => knowledgePlugin,
|
|
49
|
+
documentsProvider: () => documentsProvider,
|
|
50
|
+
createKnowledgePlugin: () => createKnowledgePlugin,
|
|
51
|
+
ModelConfigSchema: () => ModelConfigSchema,
|
|
52
|
+
KnowledgeServiceType: () => KnowledgeServiceType,
|
|
53
|
+
KnowledgeService: () => KnowledgeService
|
|
54
|
+
});
|
|
55
|
+
module.exports = __toCommonJS(exports_index_node);
|
|
56
|
+
|
|
57
|
+
// actions.ts
|
|
58
|
+
var fs2 = __toESM(require("node:fs"));
|
|
59
|
+
var path2 = __toESM(require("node:path"));
|
|
60
|
+
var import_core5 = require("@elizaos/core");
|
|
61
|
+
|
|
62
|
+
// service.ts
|
|
63
|
+
var import_core4 = require("@elizaos/core");
|
|
64
|
+
|
|
65
|
+
// config.ts
|
|
66
|
+
var import_zod2 = __toESM(require("zod"));
|
|
67
|
+
|
|
68
|
+
// types.ts
|
|
69
|
+
var import_zod = __toESM(require("zod"));
|
|
70
|
+
var ModelConfigSchema = import_zod.default.object({
|
|
71
|
+
EMBEDDING_PROVIDER: import_zod.default.enum(["openai", "google"]).optional(),
|
|
72
|
+
TEXT_PROVIDER: import_zod.default.enum(["openai", "anthropic", "openrouter", "google"]).optional(),
|
|
73
|
+
OPENAI_API_KEY: import_zod.default.string().optional(),
|
|
74
|
+
ANTHROPIC_API_KEY: import_zod.default.string().optional(),
|
|
75
|
+
OPENROUTER_API_KEY: import_zod.default.string().optional(),
|
|
76
|
+
GOOGLE_API_KEY: import_zod.default.string().optional(),
|
|
77
|
+
OPENAI_BASE_URL: import_zod.default.string().optional(),
|
|
78
|
+
ANTHROPIC_BASE_URL: import_zod.default.string().optional(),
|
|
79
|
+
OPENROUTER_BASE_URL: import_zod.default.string().optional(),
|
|
80
|
+
GOOGLE_BASE_URL: import_zod.default.string().optional(),
|
|
81
|
+
TEXT_EMBEDDING_MODEL: import_zod.default.string(),
|
|
82
|
+
TEXT_MODEL: import_zod.default.string().optional(),
|
|
83
|
+
MAX_INPUT_TOKENS: import_zod.default.string().or(import_zod.default.number()).transform((val) => typeof val === "string" ? parseInt(val, 10) : val),
|
|
84
|
+
MAX_OUTPUT_TOKENS: import_zod.default.string().or(import_zod.default.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 4096),
|
|
85
|
+
EMBEDDING_DIMENSION: import_zod.default.string().or(import_zod.default.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 1536),
|
|
86
|
+
LOAD_DOCS_ON_STARTUP: import_zod.default.boolean().default(false),
|
|
87
|
+
CTX_KNOWLEDGE_ENABLED: import_zod.default.boolean().default(false),
|
|
88
|
+
RATE_LIMIT_ENABLED: import_zod.default.boolean().default(true),
|
|
89
|
+
MAX_CONCURRENT_REQUESTS: import_zod.default.string().or(import_zod.default.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 150),
|
|
90
|
+
REQUESTS_PER_MINUTE: import_zod.default.string().or(import_zod.default.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 300),
|
|
91
|
+
TOKENS_PER_MINUTE: import_zod.default.string().or(import_zod.default.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 750000),
|
|
92
|
+
BATCH_DELAY_MS: import_zod.default.string().or(import_zod.default.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 100)
|
|
93
|
+
});
|
|
94
|
+
var KnowledgeServiceType = {
|
|
95
|
+
KNOWLEDGE: "knowledge"
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
// config.ts
|
|
99
|
+
var parseBooleanEnv = (value) => {
|
|
100
|
+
if (typeof value === "boolean")
|
|
101
|
+
return value;
|
|
102
|
+
if (typeof value === "number")
|
|
103
|
+
return value !== 0;
|
|
104
|
+
if (typeof value === "string")
|
|
105
|
+
return value.toLowerCase() === "true";
|
|
106
|
+
return false;
|
|
107
|
+
};
|
|
108
|
+
function validateModelConfig(runtime) {
|
|
109
|
+
try {
|
|
110
|
+
const getSetting = (key, defaultValue) => {
|
|
111
|
+
if (runtime) {
|
|
112
|
+
return runtime.getSetting(key) || process.env[key] || defaultValue;
|
|
113
|
+
}
|
|
114
|
+
return process.env[key] || defaultValue;
|
|
115
|
+
};
|
|
116
|
+
const ctxKnowledgeEnabled = parseBooleanEnv(getSetting("CTX_KNOWLEDGE_ENABLED", "false"));
|
|
117
|
+
const embeddingProvider = getSetting("EMBEDDING_PROVIDER");
|
|
118
|
+
const assumePluginOpenAI = !embeddingProvider;
|
|
119
|
+
const textEmbeddingModel = getSetting("TEXT_EMBEDDING_MODEL") || getSetting("OPENAI_EMBEDDING_MODEL") || "text-embedding-3-small";
|
|
120
|
+
const embeddingDimension = getSetting("EMBEDDING_DIMENSION") || getSetting("OPENAI_EMBEDDING_DIMENSIONS") || "1536";
|
|
121
|
+
const openaiApiKey = getSetting("OPENAI_API_KEY");
|
|
122
|
+
const config = ModelConfigSchema.parse({
|
|
123
|
+
EMBEDDING_PROVIDER: embeddingProvider,
|
|
124
|
+
TEXT_PROVIDER: getSetting("TEXT_PROVIDER"),
|
|
125
|
+
OPENAI_API_KEY: openaiApiKey,
|
|
126
|
+
ANTHROPIC_API_KEY: getSetting("ANTHROPIC_API_KEY"),
|
|
127
|
+
OPENROUTER_API_KEY: getSetting("OPENROUTER_API_KEY"),
|
|
128
|
+
GOOGLE_API_KEY: getSetting("GOOGLE_API_KEY"),
|
|
129
|
+
OPENAI_BASE_URL: getSetting("OPENAI_BASE_URL"),
|
|
130
|
+
ANTHROPIC_BASE_URL: getSetting("ANTHROPIC_BASE_URL"),
|
|
131
|
+
OPENROUTER_BASE_URL: getSetting("OPENROUTER_BASE_URL"),
|
|
132
|
+
GOOGLE_BASE_URL: getSetting("GOOGLE_BASE_URL"),
|
|
133
|
+
TEXT_EMBEDDING_MODEL: textEmbeddingModel,
|
|
134
|
+
TEXT_MODEL: getSetting("TEXT_MODEL"),
|
|
135
|
+
MAX_INPUT_TOKENS: getSetting("MAX_INPUT_TOKENS", "4000"),
|
|
136
|
+
MAX_OUTPUT_TOKENS: getSetting("MAX_OUTPUT_TOKENS", "4096"),
|
|
137
|
+
EMBEDDING_DIMENSION: embeddingDimension,
|
|
138
|
+
LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")),
|
|
139
|
+
CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled,
|
|
140
|
+
RATE_LIMIT_ENABLED: parseBooleanEnv(getSetting("RATE_LIMIT_ENABLED", "true")),
|
|
141
|
+
MAX_CONCURRENT_REQUESTS: getSetting("MAX_CONCURRENT_REQUESTS", "100"),
|
|
142
|
+
REQUESTS_PER_MINUTE: getSetting("REQUESTS_PER_MINUTE", "500"),
|
|
143
|
+
TOKENS_PER_MINUTE: getSetting("TOKENS_PER_MINUTE", "1000000"),
|
|
144
|
+
BATCH_DELAY_MS: getSetting("BATCH_DELAY_MS", "100")
|
|
145
|
+
});
|
|
146
|
+
validateConfigRequirements(config, assumePluginOpenAI);
|
|
147
|
+
return config;
|
|
148
|
+
} catch (error) {
|
|
149
|
+
if (error instanceof import_zod2.default.ZodError) {
|
|
150
|
+
const issues = error.issues.map((issue) => `${issue.path.join(".")}: ${issue.message}`).join(", ");
|
|
151
|
+
throw new Error(`Model configuration validation failed: ${issues}`);
|
|
152
|
+
}
|
|
153
|
+
throw error;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
function validateConfigRequirements(config, assumePluginOpenAI) {
|
|
157
|
+
const embeddingProvider = config.EMBEDDING_PROVIDER;
|
|
158
|
+
if (embeddingProvider === "openai" && !config.OPENAI_API_KEY) {
|
|
159
|
+
throw new Error('OPENAI_API_KEY is required when EMBEDDING_PROVIDER is set to "openai"');
|
|
160
|
+
}
|
|
161
|
+
if (embeddingProvider === "google" && !config.GOOGLE_API_KEY) {
|
|
162
|
+
throw new Error('GOOGLE_API_KEY is required when EMBEDDING_PROVIDER is set to "google"');
|
|
163
|
+
}
|
|
164
|
+
if (assumePluginOpenAI && config.OPENAI_API_KEY && !config.TEXT_EMBEDDING_MODEL) {
|
|
165
|
+
throw new Error("OPENAI_EMBEDDING_MODEL is required when using plugin-openai configuration");
|
|
166
|
+
}
|
|
167
|
+
if (config.CTX_KNOWLEDGE_ENABLED) {
|
|
168
|
+
if (config.TEXT_PROVIDER === "openai" && !config.OPENAI_API_KEY) {
|
|
169
|
+
throw new Error('OPENAI_API_KEY is required when TEXT_PROVIDER is set to "openai"');
|
|
170
|
+
}
|
|
171
|
+
if (config.TEXT_PROVIDER === "anthropic" && !config.ANTHROPIC_API_KEY) {
|
|
172
|
+
throw new Error('ANTHROPIC_API_KEY is required when TEXT_PROVIDER is set to "anthropic"');
|
|
173
|
+
}
|
|
174
|
+
if (config.TEXT_PROVIDER === "openrouter" && !config.OPENROUTER_API_KEY) {
|
|
175
|
+
throw new Error('OPENROUTER_API_KEY is required when TEXT_PROVIDER is set to "openrouter"');
|
|
176
|
+
}
|
|
177
|
+
if (config.TEXT_PROVIDER === "google" && !config.GOOGLE_API_KEY) {
|
|
178
|
+
throw new Error('GOOGLE_API_KEY is required when TEXT_PROVIDER is set to "google"');
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
async function getProviderRateLimits(runtime) {
|
|
183
|
+
const config = validateModelConfig(runtime);
|
|
184
|
+
const rateLimitEnabled = config.RATE_LIMIT_ENABLED;
|
|
185
|
+
const maxConcurrentRequests = config.MAX_CONCURRENT_REQUESTS;
|
|
186
|
+
const requestsPerMinute = config.REQUESTS_PER_MINUTE;
|
|
187
|
+
const tokensPerMinute = config.TOKENS_PER_MINUTE;
|
|
188
|
+
const batchDelayMs = config.BATCH_DELAY_MS;
|
|
189
|
+
const primaryProvider = config.TEXT_PROVIDER || config.EMBEDDING_PROVIDER;
|
|
190
|
+
if (!rateLimitEnabled) {
|
|
191
|
+
return {
|
|
192
|
+
maxConcurrentRequests,
|
|
193
|
+
requestsPerMinute: Number.MAX_SAFE_INTEGER,
|
|
194
|
+
tokensPerMinute: Number.MAX_SAFE_INTEGER,
|
|
195
|
+
provider: primaryProvider || "unlimited",
|
|
196
|
+
rateLimitEnabled: false,
|
|
197
|
+
batchDelayMs
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
return {
|
|
201
|
+
maxConcurrentRequests,
|
|
202
|
+
requestsPerMinute,
|
|
203
|
+
tokensPerMinute,
|
|
204
|
+
provider: primaryProvider || "unlimited",
|
|
205
|
+
rateLimitEnabled: true,
|
|
206
|
+
batchDelayMs
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// docs-loader.ts
|
|
211
|
+
var fs = __toESM(require("node:fs"));
|
|
212
|
+
var path = __toESM(require("node:path"));
|
|
213
|
+
var import_core = require("@elizaos/core");
|
|
214
|
+
|
|
215
|
+
// utils.ts
|
|
216
|
+
var import_node_buffer = require("node:buffer");
|
|
217
|
+
var import_node_crypto4 = require("node:crypto");
|
|
218
|
+
var mammoth = __toESM(require("mammoth"));
|
|
219
|
+
var import_unpdf = require("unpdf");
|
|
220
|
+
|
|
221
|
+
// node_modules/uuid/dist-node/regex.js
|
|
222
|
+
var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i;
|
|
223
|
+
|
|
224
|
+
// node_modules/uuid/dist-node/validate.js
|
|
225
|
+
function validate(uuid) {
|
|
226
|
+
return typeof uuid === "string" && regex_default.test(uuid);
|
|
227
|
+
}
|
|
228
|
+
var validate_default = validate;
|
|
229
|
+
|
|
230
|
+
// node_modules/uuid/dist-node/parse.js
|
|
231
|
+
function parse(uuid) {
|
|
232
|
+
if (!validate_default(uuid)) {
|
|
233
|
+
throw TypeError("Invalid UUID");
|
|
234
|
+
}
|
|
235
|
+
let v;
|
|
236
|
+
return Uint8Array.of((v = parseInt(uuid.slice(0, 8), 16)) >>> 24, v >>> 16 & 255, v >>> 8 & 255, v & 255, (v = parseInt(uuid.slice(9, 13), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(14, 18), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(19, 23), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(24, 36), 16)) / 1099511627776 & 255, v / 4294967296 & 255, v >>> 24 & 255, v >>> 16 & 255, v >>> 8 & 255, v & 255);
|
|
237
|
+
}
|
|
238
|
+
var parse_default = parse;
|
|
239
|
+
|
|
240
|
+
// node_modules/uuid/dist-node/stringify.js
|
|
241
|
+
var byteToHex = [];
|
|
242
|
+
for (let i = 0;i < 256; ++i) {
|
|
243
|
+
byteToHex.push((i + 256).toString(16).slice(1));
|
|
244
|
+
}
|
|
245
|
+
function unsafeStringify(arr, offset = 0) {
|
|
246
|
+
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// node_modules/uuid/dist-node/rng.js
|
|
250
|
+
var import_node_crypto = require("node:crypto");
|
|
251
|
+
var rnds8Pool = new Uint8Array(256);
|
|
252
|
+
var poolPtr = rnds8Pool.length;
|
|
253
|
+
function rng() {
|
|
254
|
+
if (poolPtr > rnds8Pool.length - 16) {
|
|
255
|
+
import_node_crypto.randomFillSync(rnds8Pool);
|
|
256
|
+
poolPtr = 0;
|
|
257
|
+
}
|
|
258
|
+
return rnds8Pool.slice(poolPtr, poolPtr += 16);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// node_modules/uuid/dist-node/v35.js
|
|
262
|
+
function stringToBytes(str) {
|
|
263
|
+
str = unescape(encodeURIComponent(str));
|
|
264
|
+
const bytes = new Uint8Array(str.length);
|
|
265
|
+
for (let i = 0;i < str.length; ++i) {
|
|
266
|
+
bytes[i] = str.charCodeAt(i);
|
|
267
|
+
}
|
|
268
|
+
return bytes;
|
|
269
|
+
}
|
|
270
|
+
var DNS = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
|
|
271
|
+
var URL2 = "6ba7b811-9dad-11d1-80b4-00c04fd430c8";
|
|
272
|
+
function v35(version, hash, value, namespace, buf, offset) {
|
|
273
|
+
const valueBytes = typeof value === "string" ? stringToBytes(value) : value;
|
|
274
|
+
const namespaceBytes = typeof namespace === "string" ? parse_default(namespace) : namespace;
|
|
275
|
+
if (typeof namespace === "string") {
|
|
276
|
+
namespace = parse_default(namespace);
|
|
277
|
+
}
|
|
278
|
+
if (namespace?.length !== 16) {
|
|
279
|
+
throw TypeError("Namespace must be array-like (16 iterable integer values, 0-255)");
|
|
280
|
+
}
|
|
281
|
+
let bytes = new Uint8Array(16 + valueBytes.length);
|
|
282
|
+
bytes.set(namespaceBytes);
|
|
283
|
+
bytes.set(valueBytes, namespaceBytes.length);
|
|
284
|
+
bytes = hash(bytes);
|
|
285
|
+
bytes[6] = bytes[6] & 15 | version;
|
|
286
|
+
bytes[8] = bytes[8] & 63 | 128;
|
|
287
|
+
if (buf) {
|
|
288
|
+
offset = offset || 0;
|
|
289
|
+
for (let i = 0;i < 16; ++i) {
|
|
290
|
+
buf[offset + i] = bytes[i];
|
|
291
|
+
}
|
|
292
|
+
return buf;
|
|
293
|
+
}
|
|
294
|
+
return unsafeStringify(bytes);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// node_modules/uuid/dist-node/native.js
|
|
298
|
+
var import_node_crypto2 = require("node:crypto");
|
|
299
|
+
var native_default = { randomUUID: import_node_crypto2.randomUUID };
|
|
300
|
+
|
|
301
|
+
// node_modules/uuid/dist-node/v4.js
|
|
302
|
+
function _v4(options, buf, offset) {
|
|
303
|
+
options = options || {};
|
|
304
|
+
const rnds = options.random ?? options.rng?.() ?? rng();
|
|
305
|
+
if (rnds.length < 16) {
|
|
306
|
+
throw new Error("Random bytes length must be >= 16");
|
|
307
|
+
}
|
|
308
|
+
rnds[6] = rnds[6] & 15 | 64;
|
|
309
|
+
rnds[8] = rnds[8] & 63 | 128;
|
|
310
|
+
if (buf) {
|
|
311
|
+
offset = offset || 0;
|
|
312
|
+
if (offset < 0 || offset + 16 > buf.length) {
|
|
313
|
+
throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
|
|
314
|
+
}
|
|
315
|
+
for (let i = 0;i < 16; ++i) {
|
|
316
|
+
buf[offset + i] = rnds[i];
|
|
317
|
+
}
|
|
318
|
+
return buf;
|
|
319
|
+
}
|
|
320
|
+
return unsafeStringify(rnds);
|
|
321
|
+
}
|
|
322
|
+
function v4(options, buf, offset) {
|
|
323
|
+
if (native_default.randomUUID && !buf && !options) {
|
|
324
|
+
return native_default.randomUUID();
|
|
325
|
+
}
|
|
326
|
+
return _v4(options, buf, offset);
|
|
327
|
+
}
|
|
328
|
+
var v4_default = v4;
|
|
329
|
+
// node_modules/uuid/dist-node/sha1.js
|
|
330
|
+
var import_node_crypto3 = require("node:crypto");
|
|
331
|
+
function sha1(bytes) {
|
|
332
|
+
if (Array.isArray(bytes)) {
|
|
333
|
+
bytes = Buffer.from(bytes);
|
|
334
|
+
} else if (typeof bytes === "string") {
|
|
335
|
+
bytes = Buffer.from(bytes, "utf8");
|
|
336
|
+
}
|
|
337
|
+
return import_node_crypto3.createHash("sha1").update(bytes).digest();
|
|
338
|
+
}
|
|
339
|
+
var sha1_default = sha1;
|
|
340
|
+
|
|
341
|
+
// node_modules/uuid/dist-node/v5.js
|
|
342
|
+
function v5(value, namespace, buf, offset) {
|
|
343
|
+
return v35(80, sha1_default, value, namespace, buf, offset);
|
|
344
|
+
}
|
|
345
|
+
v5.DNS = DNS;
|
|
346
|
+
v5.URL = URL2;
|
|
347
|
+
var v5_default = v5;
|
|
348
|
+
// utils.ts
|
|
349
|
+
var PLAIN_TEXT_CONTENT_TYPES = [
|
|
350
|
+
"application/typescript",
|
|
351
|
+
"text/typescript",
|
|
352
|
+
"text/x-python",
|
|
353
|
+
"application/x-python-code",
|
|
354
|
+
"application/yaml",
|
|
355
|
+
"text/yaml",
|
|
356
|
+
"application/x-yaml",
|
|
357
|
+
"application/json",
|
|
358
|
+
"text/markdown",
|
|
359
|
+
"text/csv"
|
|
360
|
+
];
|
|
361
|
+
var MAX_FALLBACK_SIZE_BYTES = 5 * 1024 * 1024;
|
|
362
|
+
var BINARY_CHECK_BYTES = 1024;
|
|
363
|
+
async function extractTextFromFileBuffer(fileBuffer, contentType, originalFilename) {
|
|
364
|
+
const lowerContentType = contentType.toLowerCase();
|
|
365
|
+
if (lowerContentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
|
|
366
|
+
try {
|
|
367
|
+
const result = await mammoth.extractRawText({ buffer: fileBuffer });
|
|
368
|
+
return result.value;
|
|
369
|
+
} catch (docxError) {
|
|
370
|
+
const errorMessage = docxError instanceof Error ? docxError.message : String(docxError);
|
|
371
|
+
throw new Error(`Failed to parse DOCX file ${originalFilename}: ${errorMessage}`);
|
|
372
|
+
}
|
|
373
|
+
} else if (lowerContentType === "application/msword" || originalFilename.toLowerCase().endsWith(".doc")) {
|
|
374
|
+
return `[Microsoft Word Document: ${originalFilename}]
|
|
375
|
+
|
|
376
|
+
This document was indexed for search but cannot be displayed directly in the browser. The original document content is preserved for retrieval purposes.`;
|
|
377
|
+
} else if (lowerContentType.startsWith("text/") || PLAIN_TEXT_CONTENT_TYPES.includes(lowerContentType)) {
|
|
378
|
+
return fileBuffer.toString("utf-8");
|
|
379
|
+
} else {
|
|
380
|
+
if (fileBuffer.length > MAX_FALLBACK_SIZE_BYTES) {
|
|
381
|
+
throw new Error(`File ${originalFilename} exceeds maximum size for fallback (${MAX_FALLBACK_SIZE_BYTES} bytes)`);
|
|
382
|
+
}
|
|
383
|
+
const initialBytes = fileBuffer.subarray(0, Math.min(fileBuffer.length, BINARY_CHECK_BYTES));
|
|
384
|
+
if (initialBytes.includes(0)) {
|
|
385
|
+
throw new Error(`File ${originalFilename} appears to be binary based on initial byte check`);
|
|
386
|
+
}
|
|
387
|
+
try {
|
|
388
|
+
const textContent = fileBuffer.toString("utf-8");
|
|
389
|
+
if (textContent.includes("�")) {
|
|
390
|
+
throw new Error(`File ${originalFilename} seems to be binary or has encoding issues (detected �)`);
|
|
391
|
+
}
|
|
392
|
+
return textContent;
|
|
393
|
+
} catch (_fallbackError) {
|
|
394
|
+
throw new Error(`Unsupported content type: ${contentType} for ${originalFilename}. Fallback to plain text failed`);
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
async function convertPdfToTextFromBuffer(pdfBuffer, _filename) {
|
|
399
|
+
try {
|
|
400
|
+
const uint8Array = new Uint8Array(pdfBuffer.buffer.slice(pdfBuffer.byteOffset, pdfBuffer.byteOffset + pdfBuffer.byteLength));
|
|
401
|
+
const result = await import_unpdf.extractText(uint8Array, {
|
|
402
|
+
mergePages: true
|
|
403
|
+
});
|
|
404
|
+
if (!result.text || result.text.trim().length === 0) {
|
|
405
|
+
return "";
|
|
406
|
+
}
|
|
407
|
+
const cleanedText = result.text.split(`
|
|
408
|
+
`).map((line) => line.trim()).filter((line) => line.length > 0).join(`
|
|
409
|
+
`).replace(/\n{3,}/g, `
|
|
410
|
+
|
|
411
|
+
`);
|
|
412
|
+
return cleanedText;
|
|
413
|
+
} catch (error) {
|
|
414
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
415
|
+
throw new Error(`Failed to convert PDF to text: ${errorMessage}`);
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
function isBinaryContentType(contentType, filename) {
|
|
419
|
+
const textContentTypes = [
|
|
420
|
+
"text/",
|
|
421
|
+
"application/json",
|
|
422
|
+
"application/xml",
|
|
423
|
+
"application/javascript",
|
|
424
|
+
"application/typescript",
|
|
425
|
+
"application/x-yaml",
|
|
426
|
+
"application/x-sh"
|
|
427
|
+
];
|
|
428
|
+
const isTextMimeType = textContentTypes.some((type) => contentType.includes(type));
|
|
429
|
+
if (isTextMimeType) {
|
|
430
|
+
return false;
|
|
431
|
+
}
|
|
432
|
+
const binaryContentTypes = [
|
|
433
|
+
"application/pdf",
|
|
434
|
+
"application/msword",
|
|
435
|
+
"application/vnd.openxmlformats-officedocument",
|
|
436
|
+
"application/vnd.ms-excel",
|
|
437
|
+
"application/vnd.ms-powerpoint",
|
|
438
|
+
"application/zip",
|
|
439
|
+
"application/x-zip-compressed",
|
|
440
|
+
"application/octet-stream",
|
|
441
|
+
"image/",
|
|
442
|
+
"audio/",
|
|
443
|
+
"video/"
|
|
444
|
+
];
|
|
445
|
+
const isBinaryMimeType = binaryContentTypes.some((type) => contentType.includes(type));
|
|
446
|
+
if (isBinaryMimeType) {
|
|
447
|
+
return true;
|
|
448
|
+
}
|
|
449
|
+
const fileExt = filename.split(".").pop()?.toLowerCase() || "";
|
|
450
|
+
const textExtensions = [
|
|
451
|
+
"txt",
|
|
452
|
+
"md",
|
|
453
|
+
"markdown",
|
|
454
|
+
"json",
|
|
455
|
+
"xml",
|
|
456
|
+
"html",
|
|
457
|
+
"htm",
|
|
458
|
+
"css",
|
|
459
|
+
"js",
|
|
460
|
+
"ts",
|
|
461
|
+
"jsx",
|
|
462
|
+
"tsx",
|
|
463
|
+
"yaml",
|
|
464
|
+
"yml",
|
|
465
|
+
"toml",
|
|
466
|
+
"ini",
|
|
467
|
+
"cfg",
|
|
468
|
+
"conf",
|
|
469
|
+
"sh",
|
|
470
|
+
"bash",
|
|
471
|
+
"zsh",
|
|
472
|
+
"fish",
|
|
473
|
+
"py",
|
|
474
|
+
"rb",
|
|
475
|
+
"go",
|
|
476
|
+
"rs",
|
|
477
|
+
"java",
|
|
478
|
+
"c",
|
|
479
|
+
"cpp",
|
|
480
|
+
"h",
|
|
481
|
+
"hpp",
|
|
482
|
+
"cs",
|
|
483
|
+
"php",
|
|
484
|
+
"sql",
|
|
485
|
+
"r",
|
|
486
|
+
"swift",
|
|
487
|
+
"kt",
|
|
488
|
+
"scala",
|
|
489
|
+
"clj",
|
|
490
|
+
"ex",
|
|
491
|
+
"exs",
|
|
492
|
+
"vim",
|
|
493
|
+
"env",
|
|
494
|
+
"gitignore",
|
|
495
|
+
"dockerignore",
|
|
496
|
+
"editorconfig",
|
|
497
|
+
"log",
|
|
498
|
+
"csv",
|
|
499
|
+
"tsv",
|
|
500
|
+
"properties",
|
|
501
|
+
"gradle",
|
|
502
|
+
"sbt",
|
|
503
|
+
"makefile",
|
|
504
|
+
"dockerfile",
|
|
505
|
+
"vagrantfile",
|
|
506
|
+
"gemfile",
|
|
507
|
+
"rakefile",
|
|
508
|
+
"podfile",
|
|
509
|
+
"csproj",
|
|
510
|
+
"vbproj",
|
|
511
|
+
"fsproj",
|
|
512
|
+
"sln",
|
|
513
|
+
"pom"
|
|
514
|
+
];
|
|
515
|
+
if (textExtensions.includes(fileExt)) {
|
|
516
|
+
return false;
|
|
517
|
+
}
|
|
518
|
+
const binaryExtensions = [
|
|
519
|
+
"pdf",
|
|
520
|
+
"docx",
|
|
521
|
+
"doc",
|
|
522
|
+
"xls",
|
|
523
|
+
"xlsx",
|
|
524
|
+
"ppt",
|
|
525
|
+
"pptx",
|
|
526
|
+
"zip",
|
|
527
|
+
"rar",
|
|
528
|
+
"7z",
|
|
529
|
+
"tar",
|
|
530
|
+
"gz",
|
|
531
|
+
"bz2",
|
|
532
|
+
"xz",
|
|
533
|
+
"jpg",
|
|
534
|
+
"jpeg",
|
|
535
|
+
"png",
|
|
536
|
+
"gif",
|
|
537
|
+
"bmp",
|
|
538
|
+
"svg",
|
|
539
|
+
"ico",
|
|
540
|
+
"webp",
|
|
541
|
+
"mp3",
|
|
542
|
+
"mp4",
|
|
543
|
+
"avi",
|
|
544
|
+
"mov",
|
|
545
|
+
"wmv",
|
|
546
|
+
"flv",
|
|
547
|
+
"wav",
|
|
548
|
+
"flac",
|
|
549
|
+
"ogg",
|
|
550
|
+
"exe",
|
|
551
|
+
"dll",
|
|
552
|
+
"so",
|
|
553
|
+
"dylib",
|
|
554
|
+
"bin",
|
|
555
|
+
"dat",
|
|
556
|
+
"db",
|
|
557
|
+
"sqlite"
|
|
558
|
+
];
|
|
559
|
+
return binaryExtensions.includes(fileExt);
|
|
560
|
+
}
|
|
561
|
+
function normalizeS3Url(url) {
|
|
562
|
+
try {
|
|
563
|
+
const urlObj = new URL(url);
|
|
564
|
+
return `${urlObj.origin}${urlObj.pathname}`;
|
|
565
|
+
} catch {
|
|
566
|
+
return url;
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
async function fetchUrlContent(url) {
|
|
570
|
+
try {
|
|
571
|
+
const controller = new AbortController;
|
|
572
|
+
const timeoutId = setTimeout(() => controller.abort(), 30000);
|
|
573
|
+
const response = await fetch(url, {
|
|
574
|
+
signal: controller.signal,
|
|
575
|
+
headers: {
|
|
576
|
+
"User-Agent": "Eliza-Knowledge-Plugin/1.0"
|
|
577
|
+
}
|
|
578
|
+
});
|
|
579
|
+
clearTimeout(timeoutId);
|
|
580
|
+
if (!response.ok) {
|
|
581
|
+
throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`);
|
|
582
|
+
}
|
|
583
|
+
const contentType = response.headers.get("content-type") || "application/octet-stream";
|
|
584
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
585
|
+
const buffer = import_node_buffer.Buffer.from(arrayBuffer);
|
|
586
|
+
const base64Content = buffer.toString("base64");
|
|
587
|
+
return {
|
|
588
|
+
content: base64Content,
|
|
589
|
+
contentType
|
|
590
|
+
};
|
|
591
|
+
} catch (error) {
|
|
592
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
593
|
+
throw new Error(`Failed to fetch content from URL: ${errorMessage}`);
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
function looksLikeBase64(content) {
|
|
597
|
+
if (!content || content.length === 0)
|
|
598
|
+
return false;
|
|
599
|
+
const cleanContent = content.replace(/\s/g, "");
|
|
600
|
+
if (cleanContent.length < 16)
|
|
601
|
+
return false;
|
|
602
|
+
if (cleanContent.length % 4 !== 0)
|
|
603
|
+
return false;
|
|
604
|
+
const base64Regex = /^[A-Za-z0-9+/]*={0,2}$/;
|
|
605
|
+
if (!base64Regex.test(cleanContent))
|
|
606
|
+
return false;
|
|
607
|
+
const hasNumbers = /\d/.test(cleanContent);
|
|
608
|
+
const hasUpperCase = /[A-Z]/.test(cleanContent);
|
|
609
|
+
const hasLowerCase = /[a-z]/.test(cleanContent);
|
|
610
|
+
return (hasNumbers || hasUpperCase) && hasLowerCase;
|
|
611
|
+
}
|
|
612
|
+
function generateContentBasedId(content, agentId, options) {
|
|
613
|
+
const { maxChars = 2000, includeFilename, contentType } = options || {};
|
|
614
|
+
let contentForHashing;
|
|
615
|
+
if (looksLikeBase64(content)) {
|
|
616
|
+
try {
|
|
617
|
+
const decoded = import_node_buffer.Buffer.from(content, "base64").toString("utf8");
|
|
618
|
+
if (!decoded.includes("�") || contentType?.includes("pdf")) {
|
|
619
|
+
contentForHashing = content.slice(0, maxChars);
|
|
620
|
+
} else {
|
|
621
|
+
contentForHashing = decoded.slice(0, maxChars);
|
|
622
|
+
}
|
|
623
|
+
} catch {
|
|
624
|
+
contentForHashing = content.slice(0, maxChars);
|
|
625
|
+
}
|
|
626
|
+
} else {
|
|
627
|
+
contentForHashing = content.slice(0, maxChars);
|
|
628
|
+
}
|
|
629
|
+
contentForHashing = contentForHashing.replace(/\r\n/g, `
|
|
630
|
+
`).replace(/\r/g, `
|
|
631
|
+
`).trim();
|
|
632
|
+
const componentsToHash = [agentId, contentForHashing, includeFilename || ""].filter(Boolean).join("::");
|
|
633
|
+
const hash = import_node_crypto4.createHash("sha256").update(componentsToHash).digest("hex");
|
|
634
|
+
const DOCUMENT_NAMESPACE = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
|
|
635
|
+
return v5_default(hash, DOCUMENT_NAMESPACE);
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
// docs-loader.ts
|
|
639
|
+
function getKnowledgePath(runtimePath) {
|
|
640
|
+
const knowledgePath = runtimePath || process.env.KNOWLEDGE_PATH || path.join(process.cwd(), "docs");
|
|
641
|
+
const resolvedPath = path.resolve(knowledgePath);
|
|
642
|
+
if (!fs.existsSync(resolvedPath)) {
|
|
643
|
+
import_core.logger.warn(`Knowledge path does not exist: ${resolvedPath}`);
|
|
644
|
+
if (runtimePath) {
|
|
645
|
+
import_core.logger.warn("Please create the directory or update KNOWLEDGE_PATH in agent settings");
|
|
646
|
+
} else if (process.env.KNOWLEDGE_PATH) {
|
|
647
|
+
import_core.logger.warn("Please create the directory or update KNOWLEDGE_PATH environment variable");
|
|
648
|
+
} else {
|
|
649
|
+
import_core.logger.info("To use the knowledge plugin, either:");
|
|
650
|
+
import_core.logger.info('1. Create a "docs" folder in your project root');
|
|
651
|
+
import_core.logger.info("2. Set KNOWLEDGE_PATH in agent settings or environment variable");
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
return resolvedPath;
|
|
655
|
+
}
|
|
656
|
+
async function loadDocsFromPath(service, agentId, worldId, knowledgePath) {
|
|
657
|
+
const docsPath = getKnowledgePath(knowledgePath);
|
|
658
|
+
if (!fs.existsSync(docsPath)) {
|
|
659
|
+
import_core.logger.warn(`Knowledge path does not exist: ${docsPath}`);
|
|
660
|
+
return { total: 0, successful: 0, failed: 0 };
|
|
661
|
+
}
|
|
662
|
+
import_core.logger.info(`Loading documents from: ${docsPath}`);
|
|
663
|
+
const files = getAllFiles(docsPath);
|
|
664
|
+
if (files.length === 0) {
|
|
665
|
+
import_core.logger.info("No files found in knowledge path");
|
|
666
|
+
return { total: 0, successful: 0, failed: 0 };
|
|
667
|
+
}
|
|
668
|
+
import_core.logger.info(`Found ${files.length} files to process`);
|
|
669
|
+
let successful = 0;
|
|
670
|
+
let failed = 0;
|
|
671
|
+
for (const filePath of files) {
|
|
672
|
+
try {
|
|
673
|
+
const fileName = path.basename(filePath);
|
|
674
|
+
const fileExt = path.extname(filePath).toLowerCase();
|
|
675
|
+
if (fileName.startsWith(".")) {
|
|
676
|
+
continue;
|
|
677
|
+
}
|
|
678
|
+
const contentType = getContentType(fileExt);
|
|
679
|
+
if (!contentType) {
|
|
680
|
+
import_core.logger.debug(`Skipping unsupported file type: ${filePath}`);
|
|
681
|
+
continue;
|
|
682
|
+
}
|
|
683
|
+
const fileBuffer = fs.readFileSync(filePath);
|
|
684
|
+
const isBinary = isBinaryContentType(contentType, fileName);
|
|
685
|
+
const content = isBinary ? fileBuffer.toString("base64") : fileBuffer.toString("utf-8");
|
|
686
|
+
const knowledgeOptions = {
|
|
687
|
+
clientDocumentId: "",
|
|
688
|
+
contentType,
|
|
689
|
+
originalFilename: fileName,
|
|
690
|
+
worldId: worldId || agentId,
|
|
691
|
+
content,
|
|
692
|
+
roomId: agentId,
|
|
693
|
+
entityId: agentId
|
|
694
|
+
};
|
|
695
|
+
import_core.logger.debug(`Processing document: ${fileName}`);
|
|
696
|
+
const result = await service.addKnowledge(knowledgeOptions);
|
|
697
|
+
import_core.logger.info(`✅ "${fileName}": ${result.fragmentCount} fragments created`);
|
|
698
|
+
successful++;
|
|
699
|
+
} catch (error) {
|
|
700
|
+
import_core.logger.error({ error }, `Failed to process file ${filePath}`);
|
|
701
|
+
failed++;
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
import_core.logger.info(`Document loading complete: ${successful} successful, ${failed} failed out of ${files.length} total`);
|
|
705
|
+
return {
|
|
706
|
+
total: files.length,
|
|
707
|
+
successful,
|
|
708
|
+
failed
|
|
709
|
+
};
|
|
710
|
+
}
|
|
711
|
+
function getAllFiles(dirPath, files = []) {
|
|
712
|
+
try {
|
|
713
|
+
const entries = fs.readdirSync(dirPath, { withFileTypes: true });
|
|
714
|
+
for (const entry of entries) {
|
|
715
|
+
const fullPath = path.join(dirPath, entry.name);
|
|
716
|
+
if (entry.isDirectory()) {
|
|
717
|
+
if (!["node_modules", ".git", ".vscode", "dist", "build"].includes(entry.name)) {
|
|
718
|
+
getAllFiles(fullPath, files);
|
|
719
|
+
}
|
|
720
|
+
} else if (entry.isFile()) {
|
|
721
|
+
files.push(fullPath);
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
} catch (error) {
|
|
725
|
+
import_core.logger.error({ error }, `Error reading directory ${dirPath}`);
|
|
726
|
+
}
|
|
727
|
+
return files;
|
|
728
|
+
}
|
|
729
|
+
function getContentType(extension) {
|
|
730
|
+
const contentTypes = {
|
|
731
|
+
".txt": "text/plain",
|
|
732
|
+
".md": "text/markdown",
|
|
733
|
+
".markdown": "text/markdown",
|
|
734
|
+
".tson": "text/plain",
|
|
735
|
+
".xml": "application/xml",
|
|
736
|
+
".csv": "text/csv",
|
|
737
|
+
".tsv": "text/tab-separated-values",
|
|
738
|
+
".log": "text/plain",
|
|
739
|
+
".html": "text/html",
|
|
740
|
+
".htm": "text/html",
|
|
741
|
+
".css": "text/css",
|
|
742
|
+
".scss": "text/x-scss",
|
|
743
|
+
".sass": "text/x-sass",
|
|
744
|
+
".less": "text/x-less",
|
|
745
|
+
".js": "text/javascript",
|
|
746
|
+
".jsx": "text/javascript",
|
|
747
|
+
".ts": "text/typescript",
|
|
748
|
+
".tsx": "text/typescript",
|
|
749
|
+
".mjs": "text/javascript",
|
|
750
|
+
".cjs": "text/javascript",
|
|
751
|
+
".vue": "text/x-vue",
|
|
752
|
+
".svelte": "text/x-svelte",
|
|
753
|
+
".astro": "text/x-astro",
|
|
754
|
+
".py": "text/x-python",
|
|
755
|
+
".pyw": "text/x-python",
|
|
756
|
+
".pyi": "text/x-python",
|
|
757
|
+
".java": "text/x-java",
|
|
758
|
+
".kt": "text/x-kotlin",
|
|
759
|
+
".kts": "text/x-kotlin",
|
|
760
|
+
".scala": "text/x-scala",
|
|
761
|
+
".c": "text/x-c",
|
|
762
|
+
".cpp": "text/x-c++",
|
|
763
|
+
".cc": "text/x-c++",
|
|
764
|
+
".cxx": "text/x-c++",
|
|
765
|
+
".h": "text/x-c",
|
|
766
|
+
".hpp": "text/x-c++",
|
|
767
|
+
".cs": "text/x-csharp",
|
|
768
|
+
".php": "text/x-php",
|
|
769
|
+
".rb": "text/x-ruby",
|
|
770
|
+
".go": "text/x-go",
|
|
771
|
+
".rs": "text/x-rust",
|
|
772
|
+
".swift": "text/x-swift",
|
|
773
|
+
".r": "text/x-r",
|
|
774
|
+
".R": "text/x-r",
|
|
775
|
+
".m": "text/x-objectivec",
|
|
776
|
+
".mm": "text/x-objectivec",
|
|
777
|
+
".clj": "text/x-clojure",
|
|
778
|
+
".cljs": "text/x-clojure",
|
|
779
|
+
".ex": "text/x-elixir",
|
|
780
|
+
".exs": "text/x-elixir",
|
|
781
|
+
".lua": "text/x-lua",
|
|
782
|
+
".pl": "text/x-perl",
|
|
783
|
+
".pm": "text/x-perl",
|
|
784
|
+
".dart": "text/x-dart",
|
|
785
|
+
".hs": "text/x-haskell",
|
|
786
|
+
".elm": "text/x-elm",
|
|
787
|
+
".ml": "text/x-ocaml",
|
|
788
|
+
".fs": "text/x-fsharp",
|
|
789
|
+
".fsx": "text/x-fsharp",
|
|
790
|
+
".vb": "text/x-vb",
|
|
791
|
+
".pas": "text/x-pascal",
|
|
792
|
+
".d": "text/x-d",
|
|
793
|
+
".nim": "text/x-nim",
|
|
794
|
+
".zig": "text/x-zig",
|
|
795
|
+
".jl": "text/x-julia",
|
|
796
|
+
".tcl": "text/x-tcl",
|
|
797
|
+
".awk": "text/x-awk",
|
|
798
|
+
".sed": "text/x-sed",
|
|
799
|
+
".sh": "text/x-sh",
|
|
800
|
+
".bash": "text/x-sh",
|
|
801
|
+
".zsh": "text/x-sh",
|
|
802
|
+
".fish": "text/x-fish",
|
|
803
|
+
".ps1": "text/x-powershell",
|
|
804
|
+
".bat": "text/x-batch",
|
|
805
|
+
".cmd": "text/x-batch",
|
|
806
|
+
".json": "application/json",
|
|
807
|
+
".yaml": "text/x-yaml",
|
|
808
|
+
".yml": "text/x-yaml",
|
|
809
|
+
".toml": "text/x-toml",
|
|
810
|
+
".ini": "text/x-ini",
|
|
811
|
+
".cfg": "text/x-ini",
|
|
812
|
+
".conf": "text/x-ini",
|
|
813
|
+
".env": "text/plain",
|
|
814
|
+
".gitignore": "text/plain",
|
|
815
|
+
".dockerignore": "text/plain",
|
|
816
|
+
".editorconfig": "text/plain",
|
|
817
|
+
".properties": "text/x-properties",
|
|
818
|
+
".sql": "text/x-sql",
|
|
819
|
+
".pdf": "application/pdf",
|
|
820
|
+
".doc": "application/msword",
|
|
821
|
+
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
822
|
+
};
|
|
823
|
+
return contentTypes[extension] || null;
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
// document-processor.ts
|
|
827
|
+
var import_core3 = require("@elizaos/core");
|
|
828
|
+
|
|
829
|
+
// ctx-embeddings.ts
|
|
830
|
+
var DEFAULT_CHUNK_TOKEN_SIZE = 500;
|
|
831
|
+
var DEFAULT_CHUNK_OVERLAP_TOKENS = 100;
|
|
832
|
+
var DEFAULT_CHARS_PER_TOKEN = 3.5;
|
|
833
|
+
var CONTEXT_TARGETS = {
|
|
834
|
+
DEFAULT: {
|
|
835
|
+
MIN_TOKENS: 60,
|
|
836
|
+
MAX_TOKENS: 120
|
|
837
|
+
},
|
|
838
|
+
PDF: {
|
|
839
|
+
MIN_TOKENS: 80,
|
|
840
|
+
MAX_TOKENS: 150
|
|
841
|
+
},
|
|
842
|
+
MATH_PDF: {
|
|
843
|
+
MIN_TOKENS: 100,
|
|
844
|
+
MAX_TOKENS: 180
|
|
845
|
+
},
|
|
846
|
+
CODE: {
|
|
847
|
+
MIN_TOKENS: 100,
|
|
848
|
+
MAX_TOKENS: 200
|
|
849
|
+
},
|
|
850
|
+
TECHNICAL: {
|
|
851
|
+
MIN_TOKENS: 80,
|
|
852
|
+
MAX_TOKENS: 160
|
|
853
|
+
}
|
|
854
|
+
};
|
|
855
|
+
var SYSTEM_PROMPTS = {
|
|
856
|
+
DEFAULT: "You are a precision text augmentation tool. Your task is to expand a given text chunk with its direct context from a larger document. You must: 1) Keep the original chunk intact; 2) Add critical context from surrounding text; 3) Never summarize or rephrase the original chunk; 4) Create contextually rich output for improved semantic retrieval.",
|
|
857
|
+
CODE: "You are a precision code augmentation tool. Your task is to expand a given code chunk with necessary context from the larger codebase. You must: 1) Keep the original code chunk intact with exact syntax and indentation; 2) Add relevant imports, function signatures, or class definitions; 3) Include critical surrounding code context; 4) Create contextually rich output that maintains correct syntax.",
|
|
858
|
+
PDF: "You are a precision document augmentation tool. Your task is to expand a given PDF text chunk with its direct context from the larger document. You must: 1) Keep the original chunk intact; 2) Add section headings, references, or figure captions; 3) Include text that immediately precedes and follows the chunk; 4) Create contextually rich output that maintains the document's original structure.",
|
|
859
|
+
MATH_PDF: "You are a precision mathematical content augmentation tool. Your task is to expand a given mathematical text chunk with essential context. You must: 1) Keep original mathematical notations and expressions exactly as they appear; 2) Add relevant definitions, theorems, or equations from elsewhere in the document; 3) Preserve all LaTeX or mathematical formatting; 4) Create contextually rich output for improved mathematical comprehension.",
|
|
860
|
+
TECHNICAL: "You are a precision technical documentation augmentation tool. Your task is to expand a technical document chunk with critical context. You must: 1) Keep the original chunk intact including all technical terminology; 2) Add relevant configuration examples, parameter definitions, or API references; 3) Include any prerequisite information; 4) Create contextually rich output that maintains technical accuracy."
|
|
861
|
+
};
|
|
862
|
+
var CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE = `
|
|
863
|
+
<document>
|
|
864
|
+
{doc_content}
|
|
865
|
+
</document>
|
|
866
|
+
|
|
867
|
+
Here is the chunk we want to situate within the whole document:
|
|
868
|
+
<chunk>
|
|
869
|
+
{chunk_content}
|
|
870
|
+
</chunk>
|
|
871
|
+
|
|
872
|
+
Create an enriched version of this chunk by adding critical surrounding context. Follow these guidelines:
|
|
873
|
+
|
|
874
|
+
1. Identify the document's main topic and key information relevant to understanding this chunk
|
|
875
|
+
2. Include 2-3 sentences before the chunk that provide essential context
|
|
876
|
+
3. Include 2-3 sentences after the chunk that complete thoughts or provide resolution
|
|
877
|
+
4. For technical documents, include any definitions or explanations of terms used in the chunk
|
|
878
|
+
5. For narrative content, include character or setting information needed to understand the chunk
|
|
879
|
+
6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
|
|
880
|
+
7. Do not use phrases like "this chunk discusses" - directly present the context
|
|
881
|
+
8. The total length should be between {min_tokens} and {max_tokens} tokens
|
|
882
|
+
9. Format the response as a single coherent paragraph
|
|
883
|
+
|
|
884
|
+
Provide ONLY the enriched chunk text in your response:`;
|
|
885
|
+
var CACHED_CHUNK_PROMPT_TEMPLATE = `
|
|
886
|
+
Here is the chunk we want to situate within the whole document:
|
|
887
|
+
<chunk>
|
|
888
|
+
{chunk_content}
|
|
889
|
+
</chunk>
|
|
890
|
+
|
|
891
|
+
Create an enriched version of this chunk by adding critical surrounding context. Follow these guidelines:
|
|
892
|
+
|
|
893
|
+
1. Identify the document's main topic and key information relevant to understanding this chunk
|
|
894
|
+
2. Include 2-3 sentences before the chunk that provide essential context
|
|
895
|
+
3. Include 2-3 sentences after the chunk that complete thoughts or provide resolution
|
|
896
|
+
4. For technical documents, include any definitions or explanations of terms used in the chunk
|
|
897
|
+
5. For narrative content, include character or setting information needed to understand the chunk
|
|
898
|
+
6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
|
|
899
|
+
7. Do not use phrases like "this chunk discusses" - directly present the context
|
|
900
|
+
8. The total length should be between {min_tokens} and {max_tokens} tokens
|
|
901
|
+
9. Format the response as a single coherent paragraph
|
|
902
|
+
|
|
903
|
+
Provide ONLY the enriched chunk text in your response:`;
|
|
904
|
+
var CACHED_CODE_CHUNK_PROMPT_TEMPLATE = `
|
|
905
|
+
Here is the chunk of code we want to situate within the whole document:
|
|
906
|
+
<chunk>
|
|
907
|
+
{chunk_content}
|
|
908
|
+
</chunk>
|
|
909
|
+
|
|
910
|
+
Create an enriched version of this code chunk by adding critical surrounding context. Follow these guidelines:
|
|
911
|
+
|
|
912
|
+
1. Preserve ALL code syntax, indentation, and comments exactly as they appear
|
|
913
|
+
2. Include any import statements, function definitions, or class declarations that this code depends on
|
|
914
|
+
3. Add necessary type definitions or interfaces that are referenced in this chunk
|
|
915
|
+
4. Include any crucial comments from elsewhere in the document that explain this code
|
|
916
|
+
5. If there are key variable declarations or initializations earlier in the document, include those
|
|
917
|
+
6. Keep the original chunk COMPLETELY INTACT and UNCHANGED in your response
|
|
918
|
+
7. The total length should be between {min_tokens} and {max_tokens} tokens
|
|
919
|
+
8. Do NOT include implementation details for functions that are only called but not defined in this chunk
|
|
920
|
+
|
|
921
|
+
Provide ONLY the enriched code chunk in your response:`;
|
|
922
|
+
var CACHED_MATH_PDF_PROMPT_TEMPLATE = `
|
|
923
|
+
Here is the chunk we want to situate within the whole document:
|
|
924
|
+
<chunk>
|
|
925
|
+
{chunk_content}
|
|
926
|
+
</chunk>
|
|
927
|
+
|
|
928
|
+
Create an enriched version of this chunk by adding critical surrounding context. This document contains mathematical content that requires special handling. Follow these guidelines:
|
|
929
|
+
|
|
930
|
+
1. Preserve ALL mathematical notation exactly as it appears in the chunk
|
|
931
|
+
2. Include any defining equations, variables, or parameters mentioned earlier in the document that relate to this chunk
|
|
932
|
+
3. Add section/subsection names or figure references if they help situate the chunk
|
|
933
|
+
4. If variables or symbols are defined elsewhere in the document, include these definitions
|
|
934
|
+
5. If mathematical expressions appear corrupted, try to infer their meaning from context
|
|
935
|
+
6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
|
|
936
|
+
7. The total length should be between {min_tokens} and {max_tokens} tokens
|
|
937
|
+
8. Format the response as a coherent mathematical explanation
|
|
938
|
+
|
|
939
|
+
Provide ONLY the enriched chunk text in your response:`;
|
|
940
|
+
var CACHED_TECHNICAL_PROMPT_TEMPLATE = `
|
|
941
|
+
Here is the chunk we want to situate within the whole document:
|
|
942
|
+
<chunk>
|
|
943
|
+
{chunk_content}
|
|
944
|
+
</chunk>
|
|
945
|
+
|
|
946
|
+
Create an enriched version of this chunk by adding critical surrounding context. This appears to be technical documentation that requires special handling. Follow these guidelines:
|
|
947
|
+
|
|
948
|
+
1. Preserve ALL technical terminology, product names, and version numbers exactly as they appear
|
|
949
|
+
2. Include any prerequisite information or requirements mentioned earlier in the document
|
|
950
|
+
3. Add section/subsection headings or navigation path to situate this chunk within the document structure
|
|
951
|
+
4. Include any definitions of technical terms, acronyms, or jargon used in this chunk
|
|
952
|
+
5. If this chunk references specific configurations, include relevant parameter explanations
|
|
953
|
+
6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
|
|
954
|
+
7. The total length should be between {min_tokens} and {max_tokens} tokens
|
|
955
|
+
8. Format the response maintaining any hierarchical structure present in the original
|
|
956
|
+
|
|
957
|
+
Provide ONLY the enriched chunk text in your response:`;
|
|
958
|
+
var MATH_PDF_PROMPT_TEMPLATE = `
|
|
959
|
+
<document>
|
|
960
|
+
{doc_content}
|
|
961
|
+
</document>
|
|
962
|
+
|
|
963
|
+
Here is the chunk we want to situate within the whole document:
|
|
964
|
+
<chunk>
|
|
965
|
+
{chunk_content}
|
|
966
|
+
</chunk>
|
|
967
|
+
|
|
968
|
+
Create an enriched version of this chunk by adding critical surrounding context. This document contains mathematical content that requires special handling. Follow these guidelines:
|
|
969
|
+
|
|
970
|
+
1. Preserve ALL mathematical notation exactly as it appears in the chunk
|
|
971
|
+
2. Include any defining equations, variables, or parameters mentioned earlier in the document that relate to this chunk
|
|
972
|
+
3. Add section/subsection names or figure references if they help situate the chunk
|
|
973
|
+
4. If variables or symbols are defined elsewhere in the document, include these definitions
|
|
974
|
+
5. If mathematical expressions appear corrupted, try to infer their meaning from context
|
|
975
|
+
6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
|
|
976
|
+
7. The total length should be between {min_tokens} and {max_tokens} tokens
|
|
977
|
+
8. Format the response as a coherent mathematical explanation
|
|
978
|
+
|
|
979
|
+
Provide ONLY the enriched chunk text in your response:`;
|
|
980
|
+
var CODE_PROMPT_TEMPLATE = `
|
|
981
|
+
<document>
|
|
982
|
+
{doc_content}
|
|
983
|
+
</document>
|
|
984
|
+
|
|
985
|
+
Here is the chunk of code we want to situate within the whole document:
|
|
986
|
+
<chunk>
|
|
987
|
+
{chunk_content}
|
|
988
|
+
</chunk>
|
|
989
|
+
|
|
990
|
+
Create an enriched version of this code chunk by adding critical surrounding context. Follow these guidelines:
|
|
991
|
+
|
|
992
|
+
1. Preserve ALL code syntax, indentation, and comments exactly as they appear
|
|
993
|
+
2. Include any import statements, function definitions, or class declarations that this code depends on
|
|
994
|
+
3. Add necessary type definitions or interfaces that are referenced in this chunk
|
|
995
|
+
4. Include any crucial comments from elsewhere in the document that explain this code
|
|
996
|
+
5. If there are key variable declarations or initializations earlier in the document, include those
|
|
997
|
+
6. Keep the original chunk COMPLETELY INTACT and UNCHANGED in your response
|
|
998
|
+
7. The total length should be between {min_tokens} and {max_tokens} tokens
|
|
999
|
+
8. Do NOT include implementation details for functions that are only called but not defined in this chunk
|
|
1000
|
+
|
|
1001
|
+
Provide ONLY the enriched code chunk in your response:`;
|
|
1002
|
+
var TECHNICAL_PROMPT_TEMPLATE = `
|
|
1003
|
+
<document>
|
|
1004
|
+
{doc_content}
|
|
1005
|
+
</document>
|
|
1006
|
+
|
|
1007
|
+
Here is the chunk we want to situate within the whole document:
|
|
1008
|
+
<chunk>
|
|
1009
|
+
{chunk_content}
|
|
1010
|
+
</chunk>
|
|
1011
|
+
|
|
1012
|
+
Create an enriched version of this chunk by adding critical surrounding context. This appears to be technical documentation that requires special handling. Follow these guidelines:
|
|
1013
|
+
|
|
1014
|
+
1. Preserve ALL technical terminology, product names, and version numbers exactly as they appear
|
|
1015
|
+
2. Include any prerequisite information or requirements mentioned earlier in the document
|
|
1016
|
+
3. Add section/subsection headings or navigation path to situate this chunk within the document structure
|
|
1017
|
+
4. Include any definitions of technical terms, acronyms, or jargon used in this chunk
|
|
1018
|
+
5. If this chunk references specific configurations, include relevant parameter explanations
|
|
1019
|
+
6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
|
|
1020
|
+
7. The total length should be between {min_tokens} and {max_tokens} tokens
|
|
1021
|
+
8. Format the response maintaining any hierarchical structure present in the original
|
|
1022
|
+
|
|
1023
|
+
Provide ONLY the enriched chunk text in your response:`;
|
|
1024
|
+
function getContextualizationPrompt(docContent, chunkContent, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS, promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE) {
|
|
1025
|
+
if (!docContent || !chunkContent) {
|
|
1026
|
+
return "Error: Document or chunk content missing.";
|
|
1027
|
+
}
|
|
1028
|
+
const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN);
|
|
1029
|
+
if (chunkTokens > maxTokens * 0.7) {
|
|
1030
|
+
maxTokens = Math.ceil(chunkTokens * 1.3);
|
|
1031
|
+
minTokens = chunkTokens;
|
|
1032
|
+
}
|
|
1033
|
+
return promptTemplate.replace("{doc_content}", docContent).replace("{chunk_content}", chunkContent).replace("{min_tokens}", minTokens.toString()).replace("{max_tokens}", maxTokens.toString());
|
|
1034
|
+
}
|
|
1035
|
+
function getCachingContextualizationPrompt(chunkContent, contentType, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS) {
|
|
1036
|
+
if (!chunkContent) {
|
|
1037
|
+
return {
|
|
1038
|
+
prompt: "Error: Chunk content missing.",
|
|
1039
|
+
systemPrompt: SYSTEM_PROMPTS.DEFAULT
|
|
1040
|
+
};
|
|
1041
|
+
}
|
|
1042
|
+
const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN);
|
|
1043
|
+
if (chunkTokens > maxTokens * 0.7) {
|
|
1044
|
+
maxTokens = Math.ceil(chunkTokens * 1.3);
|
|
1045
|
+
minTokens = chunkTokens;
|
|
1046
|
+
}
|
|
1047
|
+
let promptTemplate = CACHED_CHUNK_PROMPT_TEMPLATE;
|
|
1048
|
+
let systemPrompt = SYSTEM_PROMPTS.DEFAULT;
|
|
1049
|
+
if (contentType) {
|
|
1050
|
+
if (contentType.includes("javascript") || contentType.includes("typescript") || contentType.includes("python") || contentType.includes("java") || contentType.includes("c++") || contentType.includes("code")) {
|
|
1051
|
+
promptTemplate = CACHED_CODE_CHUNK_PROMPT_TEMPLATE;
|
|
1052
|
+
systemPrompt = SYSTEM_PROMPTS.CODE;
|
|
1053
|
+
} else if (contentType.includes("pdf")) {
|
|
1054
|
+
if (containsMathematicalContent(chunkContent)) {
|
|
1055
|
+
promptTemplate = CACHED_MATH_PDF_PROMPT_TEMPLATE;
|
|
1056
|
+
systemPrompt = SYSTEM_PROMPTS.MATH_PDF;
|
|
1057
|
+
} else {
|
|
1058
|
+
systemPrompt = SYSTEM_PROMPTS.PDF;
|
|
1059
|
+
}
|
|
1060
|
+
} else if (contentType.includes("markdown") || contentType.includes("text/html") || isTechnicalDocumentation(chunkContent)) {
|
|
1061
|
+
promptTemplate = CACHED_TECHNICAL_PROMPT_TEMPLATE;
|
|
1062
|
+
systemPrompt = SYSTEM_PROMPTS.TECHNICAL;
|
|
1063
|
+
}
|
|
1064
|
+
}
|
|
1065
|
+
const formattedPrompt = promptTemplate.replace("{chunk_content}", chunkContent).replace("{min_tokens}", minTokens.toString()).replace("{max_tokens}", maxTokens.toString());
|
|
1066
|
+
return {
|
|
1067
|
+
prompt: formattedPrompt,
|
|
1068
|
+
systemPrompt
|
|
1069
|
+
};
|
|
1070
|
+
}
|
|
1071
|
+
function getPromptForMimeType(mimeType, docContent, chunkContent) {
|
|
1072
|
+
let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS;
|
|
1073
|
+
let maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS;
|
|
1074
|
+
let promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE;
|
|
1075
|
+
if (mimeType.includes("pdf")) {
|
|
1076
|
+
if (containsMathematicalContent(docContent)) {
|
|
1077
|
+
minTokens = CONTEXT_TARGETS.MATH_PDF.MIN_TOKENS;
|
|
1078
|
+
maxTokens = CONTEXT_TARGETS.MATH_PDF.MAX_TOKENS;
|
|
1079
|
+
promptTemplate = MATH_PDF_PROMPT_TEMPLATE;
|
|
1080
|
+
} else {
|
|
1081
|
+
minTokens = CONTEXT_TARGETS.PDF.MIN_TOKENS;
|
|
1082
|
+
maxTokens = CONTEXT_TARGETS.PDF.MAX_TOKENS;
|
|
1083
|
+
}
|
|
1084
|
+
} else if (mimeType.includes("javascript") || mimeType.includes("typescript") || mimeType.includes("python") || mimeType.includes("java") || mimeType.includes("c++") || mimeType.includes("code")) {
|
|
1085
|
+
minTokens = CONTEXT_TARGETS.CODE.MIN_TOKENS;
|
|
1086
|
+
maxTokens = CONTEXT_TARGETS.CODE.MAX_TOKENS;
|
|
1087
|
+
promptTemplate = CODE_PROMPT_TEMPLATE;
|
|
1088
|
+
} else if (isTechnicalDocumentation(docContent) || mimeType.includes("markdown") || mimeType.includes("text/html")) {
|
|
1089
|
+
minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
|
|
1090
|
+
maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
|
|
1091
|
+
promptTemplate = TECHNICAL_PROMPT_TEMPLATE;
|
|
1092
|
+
}
|
|
1093
|
+
return getContextualizationPrompt(docContent, chunkContent, minTokens, maxTokens, promptTemplate);
|
|
1094
|
+
}
|
|
1095
|
+
function getCachingPromptForMimeType(mimeType, chunkContent) {
|
|
1096
|
+
let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS;
|
|
1097
|
+
let maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS;
|
|
1098
|
+
if (mimeType.includes("pdf")) {
|
|
1099
|
+
if (containsMathematicalContent(chunkContent)) {
|
|
1100
|
+
minTokens = CONTEXT_TARGETS.MATH_PDF.MIN_TOKENS;
|
|
1101
|
+
maxTokens = CONTEXT_TARGETS.MATH_PDF.MAX_TOKENS;
|
|
1102
|
+
} else {
|
|
1103
|
+
minTokens = CONTEXT_TARGETS.PDF.MIN_TOKENS;
|
|
1104
|
+
maxTokens = CONTEXT_TARGETS.PDF.MAX_TOKENS;
|
|
1105
|
+
}
|
|
1106
|
+
} else if (mimeType.includes("javascript") || mimeType.includes("typescript") || mimeType.includes("python") || mimeType.includes("java") || mimeType.includes("c++") || mimeType.includes("code")) {
|
|
1107
|
+
minTokens = CONTEXT_TARGETS.CODE.MIN_TOKENS;
|
|
1108
|
+
maxTokens = CONTEXT_TARGETS.CODE.MAX_TOKENS;
|
|
1109
|
+
} else if (isTechnicalDocumentation(chunkContent) || mimeType.includes("markdown") || mimeType.includes("text/html")) {
|
|
1110
|
+
minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
|
|
1111
|
+
maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
|
|
1112
|
+
}
|
|
1113
|
+
return getCachingContextualizationPrompt(chunkContent, mimeType, minTokens, maxTokens);
|
|
1114
|
+
}
|
|
1115
|
+
function containsMathematicalContent(content) {
|
|
1116
|
+
const latexMathPatterns = [
|
|
1117
|
+
/\$\$.+?\$\$/s,
|
|
1118
|
+
/\$.+?\$/g,
|
|
1119
|
+
/\\begin\{equation\}/,
|
|
1120
|
+
/\\begin\{align\}/,
|
|
1121
|
+
/\\sum_/,
|
|
1122
|
+
/\\int/,
|
|
1123
|
+
/\\frac\{/,
|
|
1124
|
+
/\\sqrt\{/,
|
|
1125
|
+
/\\alpha|\\beta|\\gamma|\\delta|\\theta|\\lambda|\\sigma/,
|
|
1126
|
+
/\\nabla|\\partial/
|
|
1127
|
+
];
|
|
1128
|
+
const generalMathPatterns = [
|
|
1129
|
+
/[≠≤≥±∞∫∂∑∏√∈∉⊆⊇⊂⊃∪∩]/,
|
|
1130
|
+
/\b[a-zA-Z]\^[0-9]/,
|
|
1131
|
+
/\(\s*-?\d+(\.\d+)?\s*,\s*-?\d+(\.\d+)?\s*\)/,
|
|
1132
|
+
/\b[xyz]\s*=\s*-?\d+(\.\d+)?/,
|
|
1133
|
+
/\[\s*-?\d+(\.\d+)?\s*,\s*-?\d+(\.\d+)?\s*\]/,
|
|
1134
|
+
/\b\d+\s*×\s*\d+/
|
|
1135
|
+
];
|
|
1136
|
+
for (const pattern of latexMathPatterns) {
|
|
1137
|
+
if (pattern.test(content)) {
|
|
1138
|
+
return true;
|
|
1139
|
+
}
|
|
1140
|
+
}
|
|
1141
|
+
for (const pattern of generalMathPatterns) {
|
|
1142
|
+
if (pattern.test(content)) {
|
|
1143
|
+
return true;
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
const mathKeywords = [
|
|
1147
|
+
"theorem",
|
|
1148
|
+
"lemma",
|
|
1149
|
+
"proof",
|
|
1150
|
+
"equation",
|
|
1151
|
+
"function",
|
|
1152
|
+
"derivative",
|
|
1153
|
+
"integral",
|
|
1154
|
+
"matrix",
|
|
1155
|
+
"vector",
|
|
1156
|
+
"algorithm",
|
|
1157
|
+
"constraint",
|
|
1158
|
+
"coefficient"
|
|
1159
|
+
];
|
|
1160
|
+
const contentLower = content.toLowerCase();
|
|
1161
|
+
const mathKeywordCount = mathKeywords.filter((keyword) => contentLower.includes(keyword)).length;
|
|
1162
|
+
return mathKeywordCount >= 2;
|
|
1163
|
+
}
|
|
1164
|
+
function isTechnicalDocumentation(content) {
|
|
1165
|
+
const technicalPatterns = [
|
|
1166
|
+
/\b(version|v)\s*\d+\.\d+(\.\d+)?/i,
|
|
1167
|
+
/\b(api|sdk|cli)\b/i,
|
|
1168
|
+
/\b(http|https|ftp):\/\//i,
|
|
1169
|
+
/\b(GET|POST|PUT|DELETE)\b/,
|
|
1170
|
+
/<\/?[a-z][\s\S]*>/i,
|
|
1171
|
+
/\bREADME\b|\bCHANGELOG\b/i,
|
|
1172
|
+
/\b(config|configuration)\b/i,
|
|
1173
|
+
/\b(parameter|param|argument|arg)\b/i
|
|
1174
|
+
];
|
|
1175
|
+
const docHeadings = [
|
|
1176
|
+
/\b(Introduction|Overview|Getting Started|Installation|Usage|API Reference|Troubleshooting)\b/i
|
|
1177
|
+
];
|
|
1178
|
+
for (const pattern of [...technicalPatterns, ...docHeadings]) {
|
|
1179
|
+
if (pattern.test(content)) {
|
|
1180
|
+
return true;
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
const listPatterns = [/\d+\.\s.+\n\d+\.\s.+/, /•\s.+\n•\s.+/, /\*\s.+\n\*\s.+/, /-\s.+\n-\s.+/];
|
|
1184
|
+
for (const pattern of listPatterns) {
|
|
1185
|
+
if (pattern.test(content)) {
|
|
1186
|
+
return true;
|
|
1187
|
+
}
|
|
1188
|
+
}
|
|
1189
|
+
return false;
|
|
1190
|
+
}
|
|
1191
|
+
function getChunkWithContext(chunkContent, generatedContext) {
|
|
1192
|
+
if (!generatedContext || generatedContext.trim() === "") {
|
|
1193
|
+
return chunkContent;
|
|
1194
|
+
}
|
|
1195
|
+
return generatedContext.trim();
|
|
1196
|
+
}
|
|
1197
|
+
|
|
1198
|
+
// llm.ts
|
|
1199
|
+
var import_anthropic = require("@ai-sdk/anthropic");
|
|
1200
|
+
var import_google = require("@ai-sdk/google");
|
|
1201
|
+
var import_openai = require("@ai-sdk/openai");
|
|
1202
|
+
var import_core2 = require("@elizaos/core");
|
|
1203
|
+
var import_ai_sdk_provider = require("@openrouter/ai-sdk-provider");
|
|
1204
|
+
var import_ai = require("ai");
|
|
1205
|
+
async function generateText(runtime, prompt, system, overrideConfig) {
|
|
1206
|
+
const config = validateModelConfig(runtime);
|
|
1207
|
+
const provider = overrideConfig?.provider || config.TEXT_PROVIDER;
|
|
1208
|
+
const modelName = overrideConfig?.modelName || config.TEXT_MODEL;
|
|
1209
|
+
const maxTokens = overrideConfig?.maxTokens || config.MAX_OUTPUT_TOKENS;
|
|
1210
|
+
const autoCacheContextualRetrieval = overrideConfig?.autoCacheContextualRetrieval !== false;
|
|
1211
|
+
if (!modelName) {
|
|
1212
|
+
throw new Error(`No model name configured for provider: ${provider}`);
|
|
1213
|
+
}
|
|
1214
|
+
try {
|
|
1215
|
+
switch (provider) {
|
|
1216
|
+
case "anthropic":
|
|
1217
|
+
return await generateAnthropicText(config, prompt, system, modelName, maxTokens);
|
|
1218
|
+
case "openai":
|
|
1219
|
+
return await generateOpenAIText(config, prompt, system, modelName, maxTokens);
|
|
1220
|
+
case "openrouter":
|
|
1221
|
+
return await generateOpenRouterText(config, prompt, system, modelName, maxTokens, overrideConfig?.cacheDocument, overrideConfig?.cacheOptions, autoCacheContextualRetrieval);
|
|
1222
|
+
case "google":
|
|
1223
|
+
return await generateGoogleText(prompt, system, modelName, maxTokens, config);
|
|
1224
|
+
default:
|
|
1225
|
+
throw new Error(`Unsupported text provider: ${provider}`);
|
|
1226
|
+
}
|
|
1227
|
+
} catch (error) {
|
|
1228
|
+
import_core2.logger.error({ error }, `${provider} ${modelName} error`);
|
|
1229
|
+
throw error;
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1232
|
+
async function generateAnthropicText(config, prompt, system, modelName, maxTokens) {
|
|
1233
|
+
const anthropic = import_anthropic.createAnthropic({
|
|
1234
|
+
apiKey: config.ANTHROPIC_API_KEY,
|
|
1235
|
+
baseURL: config.ANTHROPIC_BASE_URL
|
|
1236
|
+
});
|
|
1237
|
+
const modelInstance = anthropic(modelName);
|
|
1238
|
+
const maxRetries = 3;
|
|
1239
|
+
for (let attempt = 0;attempt < maxRetries; attempt++) {
|
|
1240
|
+
try {
|
|
1241
|
+
return await import_ai.generateText({
|
|
1242
|
+
model: modelInstance,
|
|
1243
|
+
prompt,
|
|
1244
|
+
system,
|
|
1245
|
+
temperature: 0.3,
|
|
1246
|
+
maxOutputTokens: maxTokens
|
|
1247
|
+
});
|
|
1248
|
+
} catch (error) {
|
|
1249
|
+
const errorObj = error;
|
|
1250
|
+
const isRateLimit = errorObj?.status === 429 || errorObj?.message?.includes("rate limit") || errorObj?.message?.includes("429");
|
|
1251
|
+
if (isRateLimit && attempt < maxRetries - 1) {
|
|
1252
|
+
const delay = 2 ** (attempt + 1) * 1000;
|
|
1253
|
+
await new Promise((resolve2) => setTimeout(resolve2, delay));
|
|
1254
|
+
continue;
|
|
1255
|
+
}
|
|
1256
|
+
throw error;
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
throw new Error("Max retries exceeded for Anthropic text generation");
|
|
1260
|
+
}
|
|
1261
|
+
async function generateOpenAIText(config, prompt, system, modelName, maxTokens) {
|
|
1262
|
+
const openai = import_openai.createOpenAI({
|
|
1263
|
+
apiKey: config.OPENAI_API_KEY,
|
|
1264
|
+
baseURL: config.OPENAI_BASE_URL
|
|
1265
|
+
});
|
|
1266
|
+
const modelInstance = openai.chat(modelName);
|
|
1267
|
+
const result = await import_ai.generateText({
|
|
1268
|
+
model: modelInstance,
|
|
1269
|
+
prompt,
|
|
1270
|
+
system,
|
|
1271
|
+
temperature: 0.3,
|
|
1272
|
+
maxOutputTokens: maxTokens
|
|
1273
|
+
});
|
|
1274
|
+
return result;
|
|
1275
|
+
}
|
|
1276
|
+
async function generateGoogleText(prompt, system, modelName, maxTokens, config) {
|
|
1277
|
+
const googleProvider = import_google.google;
|
|
1278
|
+
if (config.GOOGLE_API_KEY) {
|
|
1279
|
+
process.env.GOOGLE_GENERATIVE_AI_API_KEY = config.GOOGLE_API_KEY;
|
|
1280
|
+
}
|
|
1281
|
+
const modelInstance = googleProvider(modelName);
|
|
1282
|
+
const result = await import_ai.generateText({
|
|
1283
|
+
model: modelInstance,
|
|
1284
|
+
prompt,
|
|
1285
|
+
system,
|
|
1286
|
+
temperature: 0.3,
|
|
1287
|
+
maxOutputTokens: maxTokens
|
|
1288
|
+
});
|
|
1289
|
+
return result;
|
|
1290
|
+
}
|
|
1291
|
+
async function generateOpenRouterText(config, prompt, system, modelName, maxTokens, cacheDocument, _cacheOptions, autoCacheContextualRetrieval = true) {
|
|
1292
|
+
const openrouter = import_ai_sdk_provider.createOpenRouter({
|
|
1293
|
+
apiKey: config.OPENROUTER_API_KEY,
|
|
1294
|
+
baseURL: config.OPENROUTER_BASE_URL
|
|
1295
|
+
});
|
|
1296
|
+
const modelInstance = openrouter.chat(modelName);
|
|
1297
|
+
const isClaudeModel = modelName.toLowerCase().includes("claude");
|
|
1298
|
+
const isGeminiModel = modelName.toLowerCase().includes("gemini");
|
|
1299
|
+
const isGemini25Model = modelName.toLowerCase().includes("gemini-2.5");
|
|
1300
|
+
const supportsCaching = isClaudeModel || isGeminiModel;
|
|
1301
|
+
let documentForCaching = cacheDocument;
|
|
1302
|
+
if (!documentForCaching && autoCacheContextualRetrieval && supportsCaching) {
|
|
1303
|
+
const docMatch = prompt.match(/<document>([\s\S]*?)<\/document>/);
|
|
1304
|
+
if (docMatch?.[1]) {
|
|
1305
|
+
documentForCaching = docMatch[1].trim();
|
|
1306
|
+
}
|
|
1307
|
+
}
|
|
1308
|
+
if (documentForCaching && supportsCaching) {
|
|
1309
|
+
let promptText = prompt;
|
|
1310
|
+
if (promptText.includes("<document>")) {
|
|
1311
|
+
promptText = promptText.replace(/<document>[\s\S]*?<\/document>/, "").trim();
|
|
1312
|
+
}
|
|
1313
|
+
if (isClaudeModel) {
|
|
1314
|
+
return await generateClaudeWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching);
|
|
1315
|
+
} else if (isGeminiModel) {
|
|
1316
|
+
return await generateGeminiWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching, isGemini25Model);
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
return await generateStandardOpenRouterText(prompt, system, modelInstance, modelName, maxTokens);
|
|
1320
|
+
}
|
|
1321
|
+
async function generateClaudeWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching) {
|
|
1322
|
+
const messages = [
|
|
1323
|
+
system ? {
|
|
1324
|
+
role: "system",
|
|
1325
|
+
content: [
|
|
1326
|
+
{
|
|
1327
|
+
type: "text",
|
|
1328
|
+
text: system
|
|
1329
|
+
},
|
|
1330
|
+
{
|
|
1331
|
+
type: "text",
|
|
1332
|
+
text: documentForCaching,
|
|
1333
|
+
cache_control: {
|
|
1334
|
+
type: "ephemeral"
|
|
1335
|
+
}
|
|
1336
|
+
}
|
|
1337
|
+
]
|
|
1338
|
+
} : {
|
|
1339
|
+
role: "user",
|
|
1340
|
+
content: [
|
|
1341
|
+
{
|
|
1342
|
+
type: "text",
|
|
1343
|
+
text: "Document for context:"
|
|
1344
|
+
},
|
|
1345
|
+
{
|
|
1346
|
+
type: "text",
|
|
1347
|
+
text: documentForCaching,
|
|
1348
|
+
cache_control: {
|
|
1349
|
+
type: "ephemeral"
|
|
1350
|
+
}
|
|
1351
|
+
},
|
|
1352
|
+
{
|
|
1353
|
+
type: "text",
|
|
1354
|
+
text: promptText
|
|
1355
|
+
}
|
|
1356
|
+
]
|
|
1357
|
+
},
|
|
1358
|
+
system ? {
|
|
1359
|
+
role: "user",
|
|
1360
|
+
content: [
|
|
1361
|
+
{
|
|
1362
|
+
type: "text",
|
|
1363
|
+
text: promptText
|
|
1364
|
+
}
|
|
1365
|
+
]
|
|
1366
|
+
} : null
|
|
1367
|
+
].filter(Boolean);
|
|
1368
|
+
const result = await import_ai.generateText({
|
|
1369
|
+
model: modelInstance,
|
|
1370
|
+
messages,
|
|
1371
|
+
temperature: 0.3,
|
|
1372
|
+
maxOutputTokens: maxTokens,
|
|
1373
|
+
providerOptions: {
|
|
1374
|
+
openrouter: {
|
|
1375
|
+
usage: {
|
|
1376
|
+
include: true
|
|
1377
|
+
}
|
|
1378
|
+
}
|
|
1379
|
+
}
|
|
1380
|
+
});
|
|
1381
|
+
logCacheMetrics(result);
|
|
1382
|
+
const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0);
|
|
1383
|
+
import_core2.logger.debug(`OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.inputTokens || 0}→${result.usage.outputTokens || 0})`);
|
|
1384
|
+
return result;
|
|
1385
|
+
}
|
|
1386
|
+
async function generateGeminiWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching, _isGemini25Model) {
|
|
1387
|
+
const geminiSystemPrefix = system ? `${system}
|
|
1388
|
+
|
|
1389
|
+
` : "";
|
|
1390
|
+
const geminiPrompt = `${geminiSystemPrefix}${documentForCaching}
|
|
1391
|
+
|
|
1392
|
+
${promptText}`;
|
|
1393
|
+
const result = await import_ai.generateText({
|
|
1394
|
+
model: modelInstance,
|
|
1395
|
+
prompt: geminiPrompt,
|
|
1396
|
+
temperature: 0.3,
|
|
1397
|
+
maxOutputTokens: maxTokens,
|
|
1398
|
+
providerOptions: {
|
|
1399
|
+
openrouter: {
|
|
1400
|
+
usage: {
|
|
1401
|
+
include: true
|
|
1402
|
+
}
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
});
|
|
1406
|
+
logCacheMetrics(result);
|
|
1407
|
+
const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0);
|
|
1408
|
+
import_core2.logger.debug(`OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.inputTokens || 0}→${result.usage.outputTokens || 0})`);
|
|
1409
|
+
return result;
|
|
1410
|
+
}
|
|
1411
|
+
async function generateStandardOpenRouterText(prompt, system, modelInstance, modelName, maxTokens) {
|
|
1412
|
+
const result = await import_ai.generateText({
|
|
1413
|
+
model: modelInstance,
|
|
1414
|
+
prompt,
|
|
1415
|
+
system,
|
|
1416
|
+
temperature: 0.3,
|
|
1417
|
+
maxOutputTokens: maxTokens,
|
|
1418
|
+
providerOptions: {
|
|
1419
|
+
openrouter: {
|
|
1420
|
+
usage: {
|
|
1421
|
+
include: true
|
|
1422
|
+
}
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1425
|
+
});
|
|
1426
|
+
const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0);
|
|
1427
|
+
import_core2.logger.debug(`OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.inputTokens || 0}→${result.usage.outputTokens || 0})`);
|
|
1428
|
+
return result;
|
|
1429
|
+
}
|
|
1430
|
+
function logCacheMetrics(_result) {}
|
|
1431
|
+
|
|
1432
|
+
// document-processor.ts
|
|
1433
|
+
function estimateTokens(text) {
|
|
1434
|
+
return Math.ceil(text.length / 4);
|
|
1435
|
+
}
|
|
1436
|
+
function getCtxKnowledgeEnabled(runtime) {
|
|
1437
|
+
let result;
|
|
1438
|
+
let _source;
|
|
1439
|
+
let rawValue;
|
|
1440
|
+
if (runtime) {
|
|
1441
|
+
const settingValue = runtime.getSetting("CTX_KNOWLEDGE_ENABLED");
|
|
1442
|
+
rawValue = typeof settingValue === "string" ? settingValue : settingValue?.toString();
|
|
1443
|
+
const cleanValue = rawValue?.trim().toLowerCase();
|
|
1444
|
+
result = cleanValue === "true";
|
|
1445
|
+
} else {
|
|
1446
|
+
rawValue = process.env.CTX_KNOWLEDGE_ENABLED;
|
|
1447
|
+
const cleanValue = rawValue?.toString().trim().toLowerCase();
|
|
1448
|
+
result = cleanValue === "true";
|
|
1449
|
+
}
|
|
1450
|
+
return result;
|
|
1451
|
+
}
|
|
1452
|
+
function shouldUseCustomLLM() {
|
|
1453
|
+
const textProvider = process.env.TEXT_PROVIDER;
|
|
1454
|
+
const textModel = process.env.TEXT_MODEL;
|
|
1455
|
+
if (!textProvider || !textModel) {
|
|
1456
|
+
return false;
|
|
1457
|
+
}
|
|
1458
|
+
switch (textProvider.toLowerCase()) {
|
|
1459
|
+
case "openrouter":
|
|
1460
|
+
return !!process.env.OPENROUTER_API_KEY;
|
|
1461
|
+
case "openai":
|
|
1462
|
+
return !!process.env.OPENAI_API_KEY;
|
|
1463
|
+
case "anthropic":
|
|
1464
|
+
return !!process.env.ANTHROPIC_API_KEY;
|
|
1465
|
+
case "google":
|
|
1466
|
+
return !!process.env.GOOGLE_API_KEY;
|
|
1467
|
+
default:
|
|
1468
|
+
return false;
|
|
1469
|
+
}
|
|
1470
|
+
}
|
|
1471
|
+
var useCustomLLM = shouldUseCustomLLM();
|
|
1472
|
+
async function processFragmentsSynchronously({
|
|
1473
|
+
runtime,
|
|
1474
|
+
documentId,
|
|
1475
|
+
fullDocumentText,
|
|
1476
|
+
agentId,
|
|
1477
|
+
contentType,
|
|
1478
|
+
roomId,
|
|
1479
|
+
entityId,
|
|
1480
|
+
worldId,
|
|
1481
|
+
documentTitle
|
|
1482
|
+
}) {
|
|
1483
|
+
if (!fullDocumentText || fullDocumentText.trim() === "") {
|
|
1484
|
+
import_core3.logger.warn(`No text content available for document ${documentId}`);
|
|
1485
|
+
return 0;
|
|
1486
|
+
}
|
|
1487
|
+
const chunks = await splitDocumentIntoChunks(fullDocumentText);
|
|
1488
|
+
if (chunks.length === 0) {
|
|
1489
|
+
import_core3.logger.warn(`No chunks generated for document ${documentId}`);
|
|
1490
|
+
return 0;
|
|
1491
|
+
}
|
|
1492
|
+
import_core3.logger.info(`Split into ${chunks.length} chunks`);
|
|
1493
|
+
const providerLimits = await getProviderRateLimits(runtime);
|
|
1494
|
+
const CONCURRENCY_LIMIT = providerLimits.maxConcurrentRequests || 30;
|
|
1495
|
+
const rateLimiter = createRateLimiter(providerLimits.requestsPerMinute || 60, providerLimits.tokensPerMinute, providerLimits.rateLimitEnabled);
|
|
1496
|
+
const { savedCount, failedCount } = await processAndSaveFragments({
|
|
1497
|
+
runtime,
|
|
1498
|
+
documentId,
|
|
1499
|
+
chunks,
|
|
1500
|
+
fullDocumentText,
|
|
1501
|
+
contentType,
|
|
1502
|
+
agentId,
|
|
1503
|
+
roomId: roomId || agentId,
|
|
1504
|
+
entityId: entityId || agentId,
|
|
1505
|
+
worldId: worldId || agentId,
|
|
1506
|
+
concurrencyLimit: CONCURRENCY_LIMIT,
|
|
1507
|
+
rateLimiter,
|
|
1508
|
+
documentTitle,
|
|
1509
|
+
batchDelayMs: providerLimits.batchDelayMs
|
|
1510
|
+
});
|
|
1511
|
+
if (failedCount > 0) {
|
|
1512
|
+
import_core3.logger.warn(`${failedCount}/${chunks.length} chunks failed processing`);
|
|
1513
|
+
}
|
|
1514
|
+
return savedCount;
|
|
1515
|
+
}
|
|
1516
|
+
async function extractTextFromDocument(fileBuffer, contentType, originalFilename) {
|
|
1517
|
+
if (!fileBuffer || fileBuffer.length === 0) {
|
|
1518
|
+
throw new Error(`Empty file buffer provided for ${originalFilename}`);
|
|
1519
|
+
}
|
|
1520
|
+
try {
|
|
1521
|
+
if (contentType === "application/pdf") {
|
|
1522
|
+
import_core3.logger.debug(`Extracting text from PDF: ${originalFilename}`);
|
|
1523
|
+
return await convertPdfToTextFromBuffer(fileBuffer, originalFilename);
|
|
1524
|
+
} else {
|
|
1525
|
+
if (contentType.includes("text/") || contentType.includes("application/json") || contentType.includes("application/xml")) {
|
|
1526
|
+
try {
|
|
1527
|
+
return fileBuffer.toString("utf8");
|
|
1528
|
+
} catch (_textError) {
|
|
1529
|
+
import_core3.logger.warn(`Failed to decode ${originalFilename} as UTF-8`);
|
|
1530
|
+
}
|
|
1531
|
+
}
|
|
1532
|
+
return await extractTextFromFileBuffer(fileBuffer, contentType, originalFilename);
|
|
1533
|
+
}
|
|
1534
|
+
} catch (error) {
|
|
1535
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1536
|
+
import_core3.logger.error(`Error extracting text from ${originalFilename}: ${errorMessage}`);
|
|
1537
|
+
throw new Error(`Failed to extract text from ${originalFilename}: ${errorMessage}`);
|
|
1538
|
+
}
|
|
1539
|
+
}
|
|
1540
|
+
function createDocumentMemory({
|
|
1541
|
+
text,
|
|
1542
|
+
agentId,
|
|
1543
|
+
clientDocumentId,
|
|
1544
|
+
originalFilename,
|
|
1545
|
+
contentType,
|
|
1546
|
+
worldId,
|
|
1547
|
+
fileSize,
|
|
1548
|
+
documentId,
|
|
1549
|
+
customMetadata
|
|
1550
|
+
}) {
|
|
1551
|
+
const fileExt = originalFilename.split(".").pop()?.toLowerCase() || "";
|
|
1552
|
+
const title = originalFilename.replace(`.${fileExt}`, "");
|
|
1553
|
+
const docId = documentId || v4_default();
|
|
1554
|
+
return {
|
|
1555
|
+
id: docId,
|
|
1556
|
+
agentId,
|
|
1557
|
+
roomId: agentId,
|
|
1558
|
+
worldId,
|
|
1559
|
+
entityId: agentId,
|
|
1560
|
+
content: { text },
|
|
1561
|
+
metadata: {
|
|
1562
|
+
type: import_core3.MemoryType.CUSTOM,
|
|
1563
|
+
documentId: clientDocumentId,
|
|
1564
|
+
originalFilename,
|
|
1565
|
+
contentType,
|
|
1566
|
+
title,
|
|
1567
|
+
fileExt,
|
|
1568
|
+
fileSize,
|
|
1569
|
+
source: "rag-service-main-upload",
|
|
1570
|
+
timestamp: Date.now(),
|
|
1571
|
+
...customMetadata || {}
|
|
1572
|
+
}
|
|
1573
|
+
};
|
|
1574
|
+
}
|
|
1575
|
+
async function splitDocumentIntoChunks(documentText) {
|
|
1576
|
+
const tokenChunkSize = DEFAULT_CHUNK_TOKEN_SIZE;
|
|
1577
|
+
const tokenChunkOverlap = DEFAULT_CHUNK_OVERLAP_TOKENS;
|
|
1578
|
+
return await import_core3.splitChunks(documentText, tokenChunkSize, tokenChunkOverlap);
|
|
1579
|
+
}
|
|
1580
|
+
async function processAndSaveFragments({
|
|
1581
|
+
runtime,
|
|
1582
|
+
documentId,
|
|
1583
|
+
chunks,
|
|
1584
|
+
fullDocumentText,
|
|
1585
|
+
contentType,
|
|
1586
|
+
agentId,
|
|
1587
|
+
roomId,
|
|
1588
|
+
entityId,
|
|
1589
|
+
worldId,
|
|
1590
|
+
concurrencyLimit,
|
|
1591
|
+
rateLimiter,
|
|
1592
|
+
documentTitle,
|
|
1593
|
+
batchDelayMs = 500
|
|
1594
|
+
}) {
|
|
1595
|
+
let savedCount = 0;
|
|
1596
|
+
let failedCount = 0;
|
|
1597
|
+
const failedChunks = [];
|
|
1598
|
+
for (let i = 0;i < chunks.length; i += concurrencyLimit) {
|
|
1599
|
+
const batchChunks = chunks.slice(i, i + concurrencyLimit);
|
|
1600
|
+
const batchOriginalIndices = Array.from({ length: batchChunks.length }, (_, k) => i + k);
|
|
1601
|
+
const contextualizedChunks = await getContextualizedChunks(runtime, fullDocumentText, batchChunks, contentType, batchOriginalIndices, documentTitle);
|
|
1602
|
+
const embeddingResults = await generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLimiter);
|
|
1603
|
+
for (const result of embeddingResults) {
|
|
1604
|
+
const originalChunkIndex = result.index;
|
|
1605
|
+
if (!result.success) {
|
|
1606
|
+
failedCount++;
|
|
1607
|
+
failedChunks.push(originalChunkIndex);
|
|
1608
|
+
import_core3.logger.warn(`Failed to process chunk ${originalChunkIndex} for document ${documentId}`);
|
|
1609
|
+
continue;
|
|
1610
|
+
}
|
|
1611
|
+
const contextualizedChunkText = result.text;
|
|
1612
|
+
const embedding = result.embedding;
|
|
1613
|
+
if (!embedding || embedding.length === 0) {
|
|
1614
|
+
failedCount++;
|
|
1615
|
+
failedChunks.push(originalChunkIndex);
|
|
1616
|
+
continue;
|
|
1617
|
+
}
|
|
1618
|
+
try {
|
|
1619
|
+
const fragmentMemory = {
|
|
1620
|
+
id: v4_default(),
|
|
1621
|
+
agentId,
|
|
1622
|
+
roomId: roomId || agentId,
|
|
1623
|
+
worldId: worldId || agentId,
|
|
1624
|
+
entityId: entityId || agentId,
|
|
1625
|
+
embedding,
|
|
1626
|
+
content: { text: contextualizedChunkText },
|
|
1627
|
+
metadata: {
|
|
1628
|
+
type: import_core3.MemoryType.FRAGMENT,
|
|
1629
|
+
documentId,
|
|
1630
|
+
position: originalChunkIndex,
|
|
1631
|
+
timestamp: Date.now(),
|
|
1632
|
+
source: "rag-service-fragment-sync"
|
|
1633
|
+
}
|
|
1634
|
+
};
|
|
1635
|
+
await runtime.createMemory(fragmentMemory, "knowledge");
|
|
1636
|
+
savedCount++;
|
|
1637
|
+
} catch (saveError) {
|
|
1638
|
+
const errorMessage = saveError instanceof Error ? saveError.message : String(saveError);
|
|
1639
|
+
import_core3.logger.error(`Error saving chunk ${originalChunkIndex} to database: ${errorMessage}`);
|
|
1640
|
+
failedCount++;
|
|
1641
|
+
failedChunks.push(originalChunkIndex);
|
|
1642
|
+
}
|
|
1643
|
+
}
|
|
1644
|
+
if (i + concurrencyLimit < chunks.length && batchDelayMs > 0) {
|
|
1645
|
+
await new Promise((resolve2) => setTimeout(resolve2, batchDelayMs));
|
|
1646
|
+
}
|
|
1647
|
+
}
|
|
1648
|
+
return { savedCount, failedCount, failedChunks };
|
|
1649
|
+
}
|
|
1650
|
+
var EMBEDDING_BATCH_SIZE = 100;
|
|
1651
|
+
async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLimiter) {
|
|
1652
|
+
const validChunks = contextualizedChunks.filter((chunk) => chunk.success);
|
|
1653
|
+
const failedChunks = contextualizedChunks.filter((chunk) => !chunk.success);
|
|
1654
|
+
const results = [];
|
|
1655
|
+
for (const chunk of failedChunks) {
|
|
1656
|
+
results.push({
|
|
1657
|
+
success: false,
|
|
1658
|
+
index: chunk.index,
|
|
1659
|
+
error: new Error("Chunk processing failed"),
|
|
1660
|
+
text: chunk.contextualizedText
|
|
1661
|
+
});
|
|
1662
|
+
}
|
|
1663
|
+
if (validChunks.length === 0) {
|
|
1664
|
+
return results;
|
|
1665
|
+
}
|
|
1666
|
+
const useBatchEmbeddings = shouldUseBatchEmbeddings(runtime);
|
|
1667
|
+
if (useBatchEmbeddings) {
|
|
1668
|
+
return await generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results);
|
|
1669
|
+
} else {
|
|
1670
|
+
return await generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results);
|
|
1671
|
+
}
|
|
1672
|
+
}
|
|
1673
|
+
function shouldUseBatchEmbeddings(runtime) {
|
|
1674
|
+
const setting = runtime.getSetting("BATCH_EMBEDDINGS") ?? process.env.BATCH_EMBEDDINGS;
|
|
1675
|
+
return setting === "true" || setting === true;
|
|
1676
|
+
}
|
|
1677
|
+
async function generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results) {
|
|
1678
|
+
for (let batchStart = 0;batchStart < validChunks.length; batchStart += EMBEDDING_BATCH_SIZE) {
|
|
1679
|
+
const batchEnd = Math.min(batchStart + EMBEDDING_BATCH_SIZE, validChunks.length);
|
|
1680
|
+
const batch = validChunks.slice(batchStart, batchEnd);
|
|
1681
|
+
const batchTexts = batch.map((c) => c.contextualizedText);
|
|
1682
|
+
const totalTokens = batchTexts.reduce((sum, text) => sum + estimateTokens(text), 0);
|
|
1683
|
+
await rateLimiter(totalTokens);
|
|
1684
|
+
try {
|
|
1685
|
+
const embeddings = await generateBatchEmbeddingsViaRuntime(runtime, batchTexts);
|
|
1686
|
+
for (let i = 0;i < batch.length; i++) {
|
|
1687
|
+
const chunk = batch[i];
|
|
1688
|
+
const embedding = embeddings[i];
|
|
1689
|
+
if (embedding && embedding.length > 0 && embedding[0] !== 0) {
|
|
1690
|
+
results.push({
|
|
1691
|
+
embedding,
|
|
1692
|
+
success: true,
|
|
1693
|
+
index: chunk.index,
|
|
1694
|
+
text: chunk.contextualizedText
|
|
1695
|
+
});
|
|
1696
|
+
} else {
|
|
1697
|
+
results.push({
|
|
1698
|
+
success: false,
|
|
1699
|
+
index: chunk.index,
|
|
1700
|
+
error: new Error("Empty or invalid embedding returned"),
|
|
1701
|
+
text: chunk.contextualizedText
|
|
1702
|
+
});
|
|
1703
|
+
}
|
|
1704
|
+
}
|
|
1705
|
+
} catch (error) {
|
|
1706
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1707
|
+
import_core3.logger.error(`Batch embedding error: ${errorMessage}`);
|
|
1708
|
+
for (const chunk of batch) {
|
|
1709
|
+
try {
|
|
1710
|
+
const result = await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
|
|
1711
|
+
if (result.success && result.embedding) {
|
|
1712
|
+
results.push({
|
|
1713
|
+
embedding: result.embedding,
|
|
1714
|
+
success: true,
|
|
1715
|
+
index: chunk.index,
|
|
1716
|
+
text: chunk.contextualizedText
|
|
1717
|
+
});
|
|
1718
|
+
} else {
|
|
1719
|
+
results.push({
|
|
1720
|
+
success: false,
|
|
1721
|
+
index: chunk.index,
|
|
1722
|
+
error: result.error instanceof Error ? result.error : new Error("Embedding failed"),
|
|
1723
|
+
text: chunk.contextualizedText
|
|
1724
|
+
});
|
|
1725
|
+
}
|
|
1726
|
+
} catch (fallbackError) {
|
|
1727
|
+
results.push({
|
|
1728
|
+
success: false,
|
|
1729
|
+
index: chunk.index,
|
|
1730
|
+
error: fallbackError instanceof Error ? fallbackError : new Error(String(fallbackError)),
|
|
1731
|
+
text: chunk.contextualizedText
|
|
1732
|
+
});
|
|
1733
|
+
}
|
|
1734
|
+
}
|
|
1735
|
+
}
|
|
1736
|
+
}
|
|
1737
|
+
return results;
|
|
1738
|
+
}
|
|
1739
|
+
async function generateBatchEmbeddingsViaRuntime(runtime, texts) {
|
|
1740
|
+
const batchResult = await runtime.useModel(import_core3.ModelType.TEXT_EMBEDDING, { texts });
|
|
1741
|
+
const isEmbeddingBatch = (val) => Array.isArray(val) && val.length > 0 && Array.isArray(val[0]) && typeof val[0][0] === "number";
|
|
1742
|
+
const isEmbeddingVector = (val) => Array.isArray(val) && val.length > 0 && typeof val[0] === "number";
|
|
1743
|
+
if (isEmbeddingBatch(batchResult)) {
|
|
1744
|
+
return batchResult;
|
|
1745
|
+
}
|
|
1746
|
+
if (isEmbeddingVector(batchResult)) {
|
|
1747
|
+
const embeddings = await Promise.all(texts.map(async (text) => {
|
|
1748
|
+
const result = await runtime.useModel(import_core3.ModelType.TEXT_EMBEDDING, { text });
|
|
1749
|
+
if (isEmbeddingVector(result)) {
|
|
1750
|
+
return result;
|
|
1751
|
+
}
|
|
1752
|
+
const embeddingResult = result;
|
|
1753
|
+
return embeddingResult?.embedding ?? [];
|
|
1754
|
+
}));
|
|
1755
|
+
return embeddings;
|
|
1756
|
+
}
|
|
1757
|
+
throw new Error("Unexpected batch embedding result format");
|
|
1758
|
+
}
|
|
1759
|
+
async function generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results) {
|
|
1760
|
+
for (const chunk of validChunks) {
|
|
1761
|
+
const embeddingTokens = estimateTokens(chunk.contextualizedText);
|
|
1762
|
+
await rateLimiter(embeddingTokens);
|
|
1763
|
+
try {
|
|
1764
|
+
const generateEmbeddingOperation = async () => {
|
|
1765
|
+
return await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
|
|
1766
|
+
};
|
|
1767
|
+
const { embedding, success, error } = await withRateLimitRetry(generateEmbeddingOperation, `embedding generation for chunk ${chunk.index}`);
|
|
1768
|
+
if (!success) {
|
|
1769
|
+
results.push({
|
|
1770
|
+
success: false,
|
|
1771
|
+
index: chunk.index,
|
|
1772
|
+
error,
|
|
1773
|
+
text: chunk.contextualizedText
|
|
1774
|
+
});
|
|
1775
|
+
} else {
|
|
1776
|
+
results.push({
|
|
1777
|
+
embedding: embedding ?? undefined,
|
|
1778
|
+
success: true,
|
|
1779
|
+
index: chunk.index,
|
|
1780
|
+
text: chunk.contextualizedText
|
|
1781
|
+
});
|
|
1782
|
+
}
|
|
1783
|
+
} catch (error) {
|
|
1784
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1785
|
+
import_core3.logger.error(`Error generating embedding for chunk ${chunk.index}: ${errorMessage}`);
|
|
1786
|
+
results.push({
|
|
1787
|
+
success: false,
|
|
1788
|
+
index: chunk.index,
|
|
1789
|
+
error: error instanceof Error ? error : new Error(String(error)),
|
|
1790
|
+
text: chunk.contextualizedText
|
|
1791
|
+
});
|
|
1792
|
+
}
|
|
1793
|
+
}
|
|
1794
|
+
return results;
|
|
1795
|
+
}
|
|
1796
|
+
async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle) {
|
|
1797
|
+
const ctxEnabled = getCtxKnowledgeEnabled(runtime);
|
|
1798
|
+
if (ctxEnabled && fullDocumentText) {
|
|
1799
|
+
return await generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle);
|
|
1800
|
+
}
|
|
1801
|
+
return chunks.map((chunkText, idx) => ({
|
|
1802
|
+
contextualizedText: chunkText,
|
|
1803
|
+
index: batchOriginalIndices[idx],
|
|
1804
|
+
success: true
|
|
1805
|
+
}));
|
|
1806
|
+
}
|
|
1807
|
+
async function generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchIndices, _documentTitle) {
|
|
1808
|
+
if (!chunks || chunks.length === 0) {
|
|
1809
|
+
return [];
|
|
1810
|
+
}
|
|
1811
|
+
const providerLimits = await getProviderRateLimits(runtime);
|
|
1812
|
+
const rateLimiter = createRateLimiter(providerLimits.requestsPerMinute || 60, providerLimits.tokensPerMinute, providerLimits.rateLimitEnabled);
|
|
1813
|
+
const config = validateModelConfig(runtime);
|
|
1814
|
+
const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
|
|
1815
|
+
const isUsingCacheCapableModel = isUsingOpenRouter && (config.TEXT_MODEL?.toLowerCase().includes("claude") || config.TEXT_MODEL?.toLowerCase().includes("gemini"));
|
|
1816
|
+
import_core3.logger.debug(`Contextualizing ${chunks.length} chunks with ${config.TEXT_PROVIDER}/${config.TEXT_MODEL} (cache: ${isUsingCacheCapableModel})`);
|
|
1817
|
+
const promptConfigs = prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndices, isUsingCacheCapableModel);
|
|
1818
|
+
const contextualizedChunks = await Promise.all(promptConfigs.map(async (item) => {
|
|
1819
|
+
if (!item.valid) {
|
|
1820
|
+
return {
|
|
1821
|
+
contextualizedText: item.chunkText,
|
|
1822
|
+
success: false,
|
|
1823
|
+
index: item.originalIndex
|
|
1824
|
+
};
|
|
1825
|
+
}
|
|
1826
|
+
const llmTokens = estimateTokens(item.chunkText + (item.prompt || ""));
|
|
1827
|
+
await rateLimiter(llmTokens);
|
|
1828
|
+
try {
|
|
1829
|
+
const generateTextOperation = async () => {
|
|
1830
|
+
if (useCustomLLM) {
|
|
1831
|
+
if (item.usesCaching && item.promptText) {
|
|
1832
|
+
return await generateText(runtime, item.promptText, item.systemPrompt, {
|
|
1833
|
+
cacheDocument: item.fullDocumentTextForContext,
|
|
1834
|
+
cacheOptions: { type: "ephemeral" },
|
|
1835
|
+
autoCacheContextualRetrieval: true
|
|
1836
|
+
});
|
|
1837
|
+
} else if (item.prompt) {
|
|
1838
|
+
return await generateText(runtime, item.prompt);
|
|
1839
|
+
}
|
|
1840
|
+
throw new Error("Missing prompt for text generation");
|
|
1841
|
+
} else {
|
|
1842
|
+
if (item.usesCaching && item.promptText) {
|
|
1843
|
+
const combinedPrompt = item.systemPrompt ? `${item.systemPrompt}
|
|
1844
|
+
|
|
1845
|
+
${item.promptText}` : item.promptText;
|
|
1846
|
+
return await runtime.useModel(import_core3.ModelType.TEXT_LARGE, {
|
|
1847
|
+
prompt: combinedPrompt
|
|
1848
|
+
});
|
|
1849
|
+
} else if (item.prompt) {
|
|
1850
|
+
return await runtime.useModel(import_core3.ModelType.TEXT_LARGE, {
|
|
1851
|
+
prompt: item.prompt
|
|
1852
|
+
});
|
|
1853
|
+
}
|
|
1854
|
+
throw new Error("Missing prompt for text generation");
|
|
1855
|
+
}
|
|
1856
|
+
};
|
|
1857
|
+
const llmResponse = await withRateLimitRetry(generateTextOperation, `context generation for chunk ${item.originalIndex}`);
|
|
1858
|
+
const generatedContext = typeof llmResponse === "string" ? llmResponse : llmResponse.text;
|
|
1859
|
+
const contextualizedText = getChunkWithContext(item.chunkText, generatedContext);
|
|
1860
|
+
return {
|
|
1861
|
+
contextualizedText,
|
|
1862
|
+
success: true,
|
|
1863
|
+
index: item.originalIndex
|
|
1864
|
+
};
|
|
1865
|
+
} catch (error) {
|
|
1866
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1867
|
+
import_core3.logger.error(`Error generating context for chunk ${item.originalIndex}: ${errorMessage}`);
|
|
1868
|
+
return {
|
|
1869
|
+
contextualizedText: item.chunkText,
|
|
1870
|
+
success: false,
|
|
1871
|
+
index: item.originalIndex
|
|
1872
|
+
};
|
|
1873
|
+
}
|
|
1874
|
+
}));
|
|
1875
|
+
return contextualizedChunks;
|
|
1876
|
+
}
|
|
1877
|
+
function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndices, isUsingCacheCapableModel = false) {
|
|
1878
|
+
return chunks.map((chunkText, idx) => {
|
|
1879
|
+
const originalIndex = batchIndices ? batchIndices[idx] : idx;
|
|
1880
|
+
try {
|
|
1881
|
+
if (isUsingCacheCapableModel) {
|
|
1882
|
+
const cachingPromptInfo = contentType ? getCachingPromptForMimeType(contentType, chunkText) : getCachingContextualizationPrompt(chunkText);
|
|
1883
|
+
if (cachingPromptInfo.prompt.startsWith("Error:")) {
|
|
1884
|
+
return {
|
|
1885
|
+
originalIndex,
|
|
1886
|
+
chunkText,
|
|
1887
|
+
valid: false,
|
|
1888
|
+
usesCaching: false
|
|
1889
|
+
};
|
|
1890
|
+
}
|
|
1891
|
+
return {
|
|
1892
|
+
valid: true,
|
|
1893
|
+
originalIndex,
|
|
1894
|
+
chunkText,
|
|
1895
|
+
usesCaching: true,
|
|
1896
|
+
systemPrompt: cachingPromptInfo.systemPrompt,
|
|
1897
|
+
promptText: cachingPromptInfo.prompt,
|
|
1898
|
+
fullDocumentTextForContext: fullDocumentText
|
|
1899
|
+
};
|
|
1900
|
+
} else {
|
|
1901
|
+
const prompt = contentType ? getPromptForMimeType(contentType, fullDocumentText, chunkText) : getContextualizationPrompt(fullDocumentText, chunkText);
|
|
1902
|
+
if (prompt.startsWith("Error:")) {
|
|
1903
|
+
return {
|
|
1904
|
+
prompt: null,
|
|
1905
|
+
originalIndex,
|
|
1906
|
+
chunkText,
|
|
1907
|
+
valid: false,
|
|
1908
|
+
usesCaching: false
|
|
1909
|
+
};
|
|
1910
|
+
}
|
|
1911
|
+
return {
|
|
1912
|
+
prompt,
|
|
1913
|
+
originalIndex,
|
|
1914
|
+
chunkText,
|
|
1915
|
+
valid: true,
|
|
1916
|
+
usesCaching: false
|
|
1917
|
+
};
|
|
1918
|
+
}
|
|
1919
|
+
} catch (error) {
|
|
1920
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1921
|
+
import_core3.logger.error(`Error preparing prompt for chunk ${originalIndex}: ${errorMessage}`);
|
|
1922
|
+
return {
|
|
1923
|
+
prompt: null,
|
|
1924
|
+
originalIndex,
|
|
1925
|
+
chunkText,
|
|
1926
|
+
valid: false,
|
|
1927
|
+
usesCaching: false
|
|
1928
|
+
};
|
|
1929
|
+
}
|
|
1930
|
+
});
|
|
1931
|
+
}
|
|
1932
|
+
async function generateEmbeddingWithValidation(runtime, text) {
|
|
1933
|
+
try {
|
|
1934
|
+
const embeddingResult = await runtime.useModel(import_core3.ModelType.TEXT_EMBEDDING, { text });
|
|
1935
|
+
const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
|
|
1936
|
+
if (!embedding || embedding.length === 0) {
|
|
1937
|
+
return { embedding: null, success: false, error: new Error("Zero vector detected") };
|
|
1938
|
+
}
|
|
1939
|
+
return { embedding, success: true };
|
|
1940
|
+
} catch (error) {
|
|
1941
|
+
return {
|
|
1942
|
+
embedding: null,
|
|
1943
|
+
success: false,
|
|
1944
|
+
error: error instanceof Error ? error : new Error(String(error))
|
|
1945
|
+
};
|
|
1946
|
+
}
|
|
1947
|
+
}
|
|
1948
|
+
async function withRateLimitRetry(operation, errorContext, retryDelay) {
|
|
1949
|
+
try {
|
|
1950
|
+
return await operation();
|
|
1951
|
+
} catch (error) {
|
|
1952
|
+
const errorWithStatus = error;
|
|
1953
|
+
if (errorWithStatus.status === 429) {
|
|
1954
|
+
const delay = retryDelay || errorWithStatus.headers?.["retry-after"] || 5;
|
|
1955
|
+
await new Promise((resolve2) => setTimeout(resolve2, delay * 1000));
|
|
1956
|
+
try {
|
|
1957
|
+
return await operation();
|
|
1958
|
+
} catch (retryError) {
|
|
1959
|
+
const retryErrorMessage = retryError instanceof Error ? retryError.message : String(retryError);
|
|
1960
|
+
import_core3.logger.error(`Failed after retry for ${errorContext}: ${retryErrorMessage}`);
|
|
1961
|
+
throw retryError;
|
|
1962
|
+
}
|
|
1963
|
+
}
|
|
1964
|
+
throw error;
|
|
1965
|
+
}
|
|
1966
|
+
}
|
|
1967
|
+
function createRateLimiter(requestsPerMinute, tokensPerMinute, rateLimitEnabled = true) {
|
|
1968
|
+
const requestTimes = [];
|
|
1969
|
+
const tokenUsage = [];
|
|
1970
|
+
const intervalMs = 60 * 1000;
|
|
1971
|
+
return async function rateLimiter(estimatedTokens = 1000) {
|
|
1972
|
+
if (!rateLimitEnabled)
|
|
1973
|
+
return;
|
|
1974
|
+
const now = Date.now();
|
|
1975
|
+
while (requestTimes.length > 0 && now - requestTimes[0] > intervalMs) {
|
|
1976
|
+
requestTimes.shift();
|
|
1977
|
+
}
|
|
1978
|
+
while (tokenUsage.length > 0 && now - tokenUsage[0].timestamp > intervalMs) {
|
|
1979
|
+
tokenUsage.shift();
|
|
1980
|
+
}
|
|
1981
|
+
const currentTokens = tokenUsage.reduce((sum, usage) => sum + usage.tokens, 0);
|
|
1982
|
+
const requestLimitExceeded = requestTimes.length >= requestsPerMinute;
|
|
1983
|
+
const tokenLimitExceeded = tokensPerMinute && currentTokens + estimatedTokens > tokensPerMinute;
|
|
1984
|
+
if (requestLimitExceeded || tokenLimitExceeded) {
|
|
1985
|
+
let timeToWait = 0;
|
|
1986
|
+
if (requestLimitExceeded) {
|
|
1987
|
+
timeToWait = Math.max(timeToWait, requestTimes[0] + intervalMs - now);
|
|
1988
|
+
}
|
|
1989
|
+
if (tokenLimitExceeded && tokenUsage.length > 0) {
|
|
1990
|
+
timeToWait = Math.max(timeToWait, tokenUsage[0].timestamp + intervalMs - now);
|
|
1991
|
+
}
|
|
1992
|
+
if (timeToWait > 0) {
|
|
1993
|
+
await new Promise((resolve2) => setTimeout(resolve2, timeToWait));
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1996
|
+
requestTimes.push(now);
|
|
1997
|
+
if (tokensPerMinute) {
|
|
1998
|
+
tokenUsage.push({ timestamp: now, tokens: estimatedTokens });
|
|
1999
|
+
}
|
|
2000
|
+
};
|
|
2001
|
+
}
|
|
2002
|
+
|
|
2003
|
+
// service.ts
|
|
2004
|
+
class KnowledgeService extends import_core4.Service {
|
|
2005
|
+
static serviceType = "knowledge";
|
|
2006
|
+
config = {};
|
|
2007
|
+
capabilityDescription = "Provides Retrieval Augmented Generation capabilities, including knowledge upload and querying.";
|
|
2008
|
+
knowledgeProcessingSemaphore;
|
|
2009
|
+
constructor(runtime, _config) {
|
|
2010
|
+
super(runtime);
|
|
2011
|
+
this.knowledgeProcessingSemaphore = new import_core4.Semaphore(10);
|
|
2012
|
+
}
|
|
2013
|
+
async loadInitialDocuments() {
|
|
2014
|
+
import_core4.logger.info(`Loading documents on startup for agent ${this.runtime.agentId}`);
|
|
2015
|
+
try {
|
|
2016
|
+
await new Promise((resolve2) => setTimeout(resolve2, 1000));
|
|
2017
|
+
const knowledgePathSetting = this.runtime.getSetting("KNOWLEDGE_PATH");
|
|
2018
|
+
const knowledgePath = typeof knowledgePathSetting === "string" ? knowledgePathSetting : undefined;
|
|
2019
|
+
const result = await loadDocsFromPath(this, this.runtime.agentId, undefined, knowledgePath);
|
|
2020
|
+
if (result.successful > 0) {
|
|
2021
|
+
import_core4.logger.info(`Loaded ${result.successful} documents on startup`);
|
|
2022
|
+
}
|
|
2023
|
+
} catch (error) {
|
|
2024
|
+
import_core4.logger.error({ error }, "Error loading documents on startup");
|
|
2025
|
+
}
|
|
2026
|
+
}
|
|
2027
|
+
static async start(runtime) {
|
|
2028
|
+
import_core4.logger.info(`Starting Knowledge service for agent: ${runtime.agentId}`);
|
|
2029
|
+
const validatedConfig = validateModelConfig(runtime);
|
|
2030
|
+
const ctxEnabled = validatedConfig.CTX_KNOWLEDGE_ENABLED;
|
|
2031
|
+
if (ctxEnabled) {
|
|
2032
|
+
import_core4.logger.info(`Contextual Knowledge enabled: ${validatedConfig.EMBEDDING_PROVIDER || "auto"} embeddings, ${validatedConfig.TEXT_PROVIDER} text generation`);
|
|
2033
|
+
import_core4.logger.info(`Text model: ${validatedConfig.TEXT_MODEL}`);
|
|
2034
|
+
} else {
|
|
2035
|
+
const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
|
|
2036
|
+
import_core4.logger.warn("Basic Embedding mode - documents will not be enriched with context");
|
|
2037
|
+
import_core4.logger.info("To enable contextual enrichment: Set CTX_KNOWLEDGE_ENABLED=true and configure TEXT_PROVIDER/TEXT_MODEL");
|
|
2038
|
+
if (usingPluginOpenAI) {
|
|
2039
|
+
import_core4.logger.info("Using plugin-openai configuration for embeddings");
|
|
2040
|
+
} else {
|
|
2041
|
+
import_core4.logger.info(`Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}`);
|
|
2042
|
+
}
|
|
2043
|
+
}
|
|
2044
|
+
const service = new KnowledgeService(runtime);
|
|
2045
|
+
service.config = validatedConfig;
|
|
2046
|
+
if (service.config.LOAD_DOCS_ON_STARTUP) {
|
|
2047
|
+
service.loadInitialDocuments().catch((error) => {
|
|
2048
|
+
import_core4.logger.error({ error }, "Error loading initial documents");
|
|
2049
|
+
});
|
|
2050
|
+
}
|
|
2051
|
+
if (service.runtime.character?.knowledge && service.runtime.character.knowledge.length > 0) {
|
|
2052
|
+
const stringKnowledge = service.runtime.character.knowledge.map((item) => {
|
|
2053
|
+
const itemAny = item;
|
|
2054
|
+
if (itemAny?.item?.case === "path" && typeof itemAny.item.value === "string") {
|
|
2055
|
+
return itemAny.item.value;
|
|
2056
|
+
}
|
|
2057
|
+
if (typeof itemAny?.path === "string") {
|
|
2058
|
+
return itemAny.path;
|
|
2059
|
+
}
|
|
2060
|
+
if (typeof item === "string") {
|
|
2061
|
+
return item;
|
|
2062
|
+
}
|
|
2063
|
+
return null;
|
|
2064
|
+
}).filter((item) => item !== null);
|
|
2065
|
+
await service.processCharacterKnowledge(stringKnowledge).catch((err) => {
|
|
2066
|
+
import_core4.logger.error({ error: err }, "Error processing character knowledge");
|
|
2067
|
+
});
|
|
2068
|
+
}
|
|
2069
|
+
return service;
|
|
2070
|
+
}
|
|
2071
|
+
static async stop(runtime) {
|
|
2072
|
+
import_core4.logger.info(`Stopping Knowledge service for agent: ${runtime.agentId}`);
|
|
2073
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
2074
|
+
if (!service) {
|
|
2075
|
+
import_core4.logger.warn(`KnowledgeService not found for agent ${runtime.agentId} during stop.`);
|
|
2076
|
+
}
|
|
2077
|
+
if (service instanceof KnowledgeService) {
|
|
2078
|
+
await service.stop();
|
|
2079
|
+
}
|
|
2080
|
+
}
|
|
2081
|
+
async stop() {
|
|
2082
|
+
import_core4.logger.info(`Knowledge service stopping for agent: ${this.runtime.character?.name}`);
|
|
2083
|
+
}
|
|
2084
|
+
async addKnowledge(options) {
|
|
2085
|
+
const agentId = options.agentId || this.runtime.agentId;
|
|
2086
|
+
const contentBasedId = generateContentBasedId(options.content, agentId, {
|
|
2087
|
+
includeFilename: options.originalFilename,
|
|
2088
|
+
contentType: options.contentType,
|
|
2089
|
+
maxChars: 2000
|
|
2090
|
+
});
|
|
2091
|
+
import_core4.logger.info(`Processing "${options.originalFilename}" (${options.contentType})`);
|
|
2092
|
+
try {
|
|
2093
|
+
const existingDocument = await this.runtime.getMemoryById(contentBasedId);
|
|
2094
|
+
if (existingDocument && existingDocument.metadata?.type === import_core4.MemoryType.DOCUMENT) {
|
|
2095
|
+
import_core4.logger.info(`"${options.originalFilename}" already exists - skipping`);
|
|
2096
|
+
const fragments = await this.runtime.getMemories({
|
|
2097
|
+
tableName: "knowledge"
|
|
2098
|
+
});
|
|
2099
|
+
const relatedFragments = fragments.filter((f) => f.metadata?.type === import_core4.MemoryType.FRAGMENT && f.metadata.documentId === contentBasedId);
|
|
2100
|
+
return {
|
|
2101
|
+
clientDocumentId: contentBasedId,
|
|
2102
|
+
storedDocumentMemoryId: existingDocument.id,
|
|
2103
|
+
fragmentCount: relatedFragments.length
|
|
2104
|
+
};
|
|
2105
|
+
}
|
|
2106
|
+
} catch (error) {
|
|
2107
|
+
import_core4.logger.debug(`Document ${contentBasedId} not found or error checking existence, proceeding with processing: ${error instanceof Error ? error.message : String(error)}`);
|
|
2108
|
+
}
|
|
2109
|
+
return this.processDocument({
|
|
2110
|
+
...options,
|
|
2111
|
+
clientDocumentId: contentBasedId
|
|
2112
|
+
});
|
|
2113
|
+
}
|
|
2114
|
+
async processDocument({
|
|
2115
|
+
agentId: passedAgentId,
|
|
2116
|
+
clientDocumentId,
|
|
2117
|
+
contentType,
|
|
2118
|
+
originalFilename,
|
|
2119
|
+
worldId,
|
|
2120
|
+
content,
|
|
2121
|
+
roomId,
|
|
2122
|
+
entityId,
|
|
2123
|
+
metadata
|
|
2124
|
+
}) {
|
|
2125
|
+
const agentId = passedAgentId || this.runtime.agentId;
|
|
2126
|
+
try {
|
|
2127
|
+
import_core4.logger.debug(`Processing document ${originalFilename} (type: ${contentType}) for agent: ${agentId}`);
|
|
2128
|
+
let fileBuffer = null;
|
|
2129
|
+
let extractedText;
|
|
2130
|
+
let documentContentToStore;
|
|
2131
|
+
const isPdfFile = contentType === "application/pdf" || originalFilename.toLowerCase().endsWith(".pdf");
|
|
2132
|
+
if (isPdfFile) {
|
|
2133
|
+
try {
|
|
2134
|
+
fileBuffer = Buffer.from(content, "base64");
|
|
2135
|
+
} catch (e) {
|
|
2136
|
+
import_core4.logger.error({ error: e }, `Failed to convert base64 to buffer for ${originalFilename}`);
|
|
2137
|
+
throw new Error(`Invalid base64 content for PDF file ${originalFilename}`);
|
|
2138
|
+
}
|
|
2139
|
+
extractedText = await extractTextFromDocument(fileBuffer, contentType, originalFilename);
|
|
2140
|
+
documentContentToStore = content;
|
|
2141
|
+
} else if (isBinaryContentType(contentType, originalFilename)) {
|
|
2142
|
+
try {
|
|
2143
|
+
fileBuffer = Buffer.from(content, "base64");
|
|
2144
|
+
} catch (e) {
|
|
2145
|
+
import_core4.logger.error({ error: e }, `Failed to convert base64 to buffer for ${originalFilename}`);
|
|
2146
|
+
throw new Error(`Invalid base64 content for binary file ${originalFilename}`);
|
|
2147
|
+
}
|
|
2148
|
+
extractedText = await extractTextFromDocument(fileBuffer, contentType, originalFilename);
|
|
2149
|
+
documentContentToStore = extractedText;
|
|
2150
|
+
} else {
|
|
2151
|
+
if (looksLikeBase64(content)) {
|
|
2152
|
+
try {
|
|
2153
|
+
const decodedBuffer = Buffer.from(content, "base64");
|
|
2154
|
+
const decodedText = decodedBuffer.toString("utf8");
|
|
2155
|
+
const invalidCharCount = (decodedText.match(/\ufffd/g) || []).length;
|
|
2156
|
+
const textLength = decodedText.length;
|
|
2157
|
+
if (invalidCharCount > 0 && invalidCharCount / textLength > 0.1) {
|
|
2158
|
+
throw new Error("Decoded content contains too many invalid characters");
|
|
2159
|
+
}
|
|
2160
|
+
import_core4.logger.debug(`Successfully decoded base64 content for text file: ${originalFilename}`);
|
|
2161
|
+
extractedText = decodedText;
|
|
2162
|
+
documentContentToStore = decodedText;
|
|
2163
|
+
} catch (e) {
|
|
2164
|
+
import_core4.logger.error({ error: e instanceof Error ? e : new Error(String(e)) }, `Failed to decode base64 for ${originalFilename}`);
|
|
2165
|
+
throw new Error(`File ${originalFilename} appears to be corrupted or incorrectly encoded`);
|
|
2166
|
+
}
|
|
2167
|
+
} else {
|
|
2168
|
+
import_core4.logger.debug(`Treating content as plain text for file: ${originalFilename}`);
|
|
2169
|
+
extractedText = content;
|
|
2170
|
+
documentContentToStore = content;
|
|
2171
|
+
}
|
|
2172
|
+
}
|
|
2173
|
+
if (!extractedText || extractedText.trim() === "") {
|
|
2174
|
+
throw new Error(`No text content extracted from ${originalFilename} (type: ${contentType})`);
|
|
2175
|
+
}
|
|
2176
|
+
const documentMemory = createDocumentMemory({
|
|
2177
|
+
text: documentContentToStore,
|
|
2178
|
+
agentId,
|
|
2179
|
+
clientDocumentId,
|
|
2180
|
+
originalFilename,
|
|
2181
|
+
contentType,
|
|
2182
|
+
worldId,
|
|
2183
|
+
fileSize: fileBuffer ? fileBuffer.length : extractedText.length,
|
|
2184
|
+
documentId: clientDocumentId,
|
|
2185
|
+
customMetadata: metadata
|
|
2186
|
+
});
|
|
2187
|
+
const memoryWithScope = {
|
|
2188
|
+
...documentMemory,
|
|
2189
|
+
id: clientDocumentId,
|
|
2190
|
+
agentId,
|
|
2191
|
+
roomId: roomId || agentId,
|
|
2192
|
+
entityId: entityId || agentId
|
|
2193
|
+
};
|
|
2194
|
+
await this.runtime.createMemory(memoryWithScope, "documents");
|
|
2195
|
+
const fragmentCount = await processFragmentsSynchronously({
|
|
2196
|
+
runtime: this.runtime,
|
|
2197
|
+
documentId: clientDocumentId,
|
|
2198
|
+
fullDocumentText: extractedText,
|
|
2199
|
+
agentId,
|
|
2200
|
+
contentType,
|
|
2201
|
+
roomId: roomId || agentId,
|
|
2202
|
+
entityId: entityId || agentId,
|
|
2203
|
+
worldId: worldId || agentId,
|
|
2204
|
+
documentTitle: originalFilename
|
|
2205
|
+
});
|
|
2206
|
+
import_core4.logger.debug(`"${originalFilename}" stored with ${fragmentCount} fragments`);
|
|
2207
|
+
return {
|
|
2208
|
+
clientDocumentId,
|
|
2209
|
+
storedDocumentMemoryId: memoryWithScope.id,
|
|
2210
|
+
fragmentCount
|
|
2211
|
+
};
|
|
2212
|
+
} catch (error) {
|
|
2213
|
+
import_core4.logger.error({ error }, `Error processing document ${originalFilename}`);
|
|
2214
|
+
throw error;
|
|
2215
|
+
}
|
|
2216
|
+
}
|
|
2217
|
+
async checkExistingKnowledge(knowledgeId) {
|
|
2218
|
+
const existingDocument = await this.runtime.getMemoryById(knowledgeId);
|
|
2219
|
+
return !!existingDocument;
|
|
2220
|
+
}
|
|
2221
|
+
async getKnowledge(message, scope) {
|
|
2222
|
+
if (!message?.content?.text || message?.content?.text.trim().length === 0) {
|
|
2223
|
+
import_core4.logger.warn("Invalid or empty message content for knowledge query");
|
|
2224
|
+
return [];
|
|
2225
|
+
}
|
|
2226
|
+
const embedding = await this.runtime.useModel(import_core4.ModelType.TEXT_EMBEDDING, {
|
|
2227
|
+
text: message.content.text
|
|
2228
|
+
});
|
|
2229
|
+
const filterScope = {};
|
|
2230
|
+
if (scope?.roomId)
|
|
2231
|
+
filterScope.roomId = scope.roomId;
|
|
2232
|
+
if (scope?.worldId)
|
|
2233
|
+
filterScope.worldId = scope.worldId;
|
|
2234
|
+
if (scope?.entityId)
|
|
2235
|
+
filterScope.entityId = scope.entityId;
|
|
2236
|
+
const fragments = await this.runtime.searchMemories({
|
|
2237
|
+
tableName: "knowledge",
|
|
2238
|
+
embedding,
|
|
2239
|
+
query: message.content.text,
|
|
2240
|
+
...filterScope,
|
|
2241
|
+
count: 20,
|
|
2242
|
+
match_threshold: 0.1
|
|
2243
|
+
});
|
|
2244
|
+
return fragments.filter((fragment) => fragment.id !== undefined).map((fragment) => ({
|
|
2245
|
+
id: fragment.id,
|
|
2246
|
+
content: fragment.content,
|
|
2247
|
+
similarity: fragment.similarity,
|
|
2248
|
+
metadata: fragment.metadata,
|
|
2249
|
+
worldId: fragment.worldId
|
|
2250
|
+
}));
|
|
2251
|
+
}
|
|
2252
|
+
async enrichConversationMemoryWithRAG(memoryId, ragMetadata) {
|
|
2253
|
+
try {
|
|
2254
|
+
const existingMemory = await this.runtime.getMemoryById(memoryId);
|
|
2255
|
+
if (!existingMemory) {
|
|
2256
|
+
import_core4.logger.warn(`Cannot enrich memory ${memoryId} - memory not found`);
|
|
2257
|
+
return;
|
|
2258
|
+
}
|
|
2259
|
+
const ragUsageData = {
|
|
2260
|
+
retrievedFragments: ragMetadata.retrievedFragments,
|
|
2261
|
+
queryText: ragMetadata.queryText,
|
|
2262
|
+
totalFragments: ragMetadata.totalFragments,
|
|
2263
|
+
retrievalTimestamp: ragMetadata.retrievalTimestamp,
|
|
2264
|
+
usedInResponse: true
|
|
2265
|
+
};
|
|
2266
|
+
const updatedMetadata = {
|
|
2267
|
+
...existingMemory.metadata,
|
|
2268
|
+
knowledgeUsed: true,
|
|
2269
|
+
ragUsage: JSON.stringify(ragUsageData),
|
|
2270
|
+
timestamp: existingMemory.metadata?.timestamp ?? Date.now(),
|
|
2271
|
+
type: import_core4.MemoryType.CUSTOM
|
|
2272
|
+
};
|
|
2273
|
+
await this.runtime.updateMemory({
|
|
2274
|
+
id: memoryId,
|
|
2275
|
+
metadata: updatedMetadata
|
|
2276
|
+
});
|
|
2277
|
+
} catch (error) {
|
|
2278
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
2279
|
+
import_core4.logger.warn(`Failed to enrich conversation memory ${memoryId} with RAG data: ${errorMessage}`);
|
|
2280
|
+
}
|
|
2281
|
+
}
|
|
2282
|
+
pendingRAGEnrichment = [];
|
|
2283
|
+
setPendingRAGMetadata(ragMetadata) {
|
|
2284
|
+
const now = Date.now();
|
|
2285
|
+
this.pendingRAGEnrichment = this.pendingRAGEnrichment.filter((entry) => now - entry.timestamp < 30000);
|
|
2286
|
+
this.pendingRAGEnrichment.push({
|
|
2287
|
+
ragMetadata,
|
|
2288
|
+
timestamp: now
|
|
2289
|
+
});
|
|
2290
|
+
}
|
|
2291
|
+
async enrichRecentMemoriesWithPendingRAG() {
|
|
2292
|
+
if (this.pendingRAGEnrichment.length === 0) {
|
|
2293
|
+
return;
|
|
2294
|
+
}
|
|
2295
|
+
try {
|
|
2296
|
+
const recentMemories = await this.runtime.getMemories({
|
|
2297
|
+
tableName: "messages",
|
|
2298
|
+
count: 10
|
|
2299
|
+
});
|
|
2300
|
+
const now = Date.now();
|
|
2301
|
+
const recentConversationMemories = recentMemories.filter((memory) => memory.metadata?.type === "message" && now - (memory.createdAt || 0) < 1e4 && !(memory.metadata && ("ragUsage" in memory.metadata) && memory.metadata.ragUsage)).sort((a, b) => (b.createdAt || 0) - (a.createdAt || 0));
|
|
2302
|
+
for (const pendingEntry of this.pendingRAGEnrichment) {
|
|
2303
|
+
const matchingMemory = recentConversationMemories.find((memory) => (memory.createdAt || 0) > pendingEntry.timestamp);
|
|
2304
|
+
if (matchingMemory?.id) {
|
|
2305
|
+
await this.enrichConversationMemoryWithRAG(matchingMemory.id, pendingEntry.ragMetadata);
|
|
2306
|
+
const index = this.pendingRAGEnrichment.indexOf(pendingEntry);
|
|
2307
|
+
if (index > -1) {
|
|
2308
|
+
this.pendingRAGEnrichment.splice(index, 1);
|
|
2309
|
+
}
|
|
2310
|
+
}
|
|
2311
|
+
}
|
|
2312
|
+
} catch (error) {
|
|
2313
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
2314
|
+
import_core4.logger.warn(`Error enriching recent memories with RAG data: ${errorMessage}`);
|
|
2315
|
+
}
|
|
2316
|
+
}
|
|
2317
|
+
async processCharacterKnowledge(items) {
|
|
2318
|
+
await new Promise((resolve2) => setTimeout(resolve2, 1000));
|
|
2319
|
+
import_core4.logger.info(`Processing ${items.length} character knowledge items`);
|
|
2320
|
+
const processingPromises = items.map(async (item) => {
|
|
2321
|
+
await this.knowledgeProcessingSemaphore.acquire();
|
|
2322
|
+
try {
|
|
2323
|
+
const knowledgeId = generateContentBasedId(item, this.runtime.agentId, {
|
|
2324
|
+
maxChars: 2000,
|
|
2325
|
+
includeFilename: "character-knowledge"
|
|
2326
|
+
});
|
|
2327
|
+
if (await this.checkExistingKnowledge(knowledgeId)) {
|
|
2328
|
+
return;
|
|
2329
|
+
}
|
|
2330
|
+
let metadata = {
|
|
2331
|
+
type: import_core4.MemoryType.CUSTOM,
|
|
2332
|
+
timestamp: Date.now(),
|
|
2333
|
+
source: "character"
|
|
2334
|
+
};
|
|
2335
|
+
const pathMatch = item.match(/^Path: (.+?)(?:\n|\r\n)/);
|
|
2336
|
+
if (pathMatch) {
|
|
2337
|
+
const filePath = pathMatch[1].trim();
|
|
2338
|
+
const extension = filePath.split(".").pop() || "";
|
|
2339
|
+
const filename = filePath.split("/").pop() || "";
|
|
2340
|
+
const title = filename.replace(`.${extension}`, "");
|
|
2341
|
+
metadata = {
|
|
2342
|
+
...metadata,
|
|
2343
|
+
path: filePath,
|
|
2344
|
+
filename,
|
|
2345
|
+
fileExt: extension,
|
|
2346
|
+
title,
|
|
2347
|
+
fileType: `text/${extension || "plain"}`,
|
|
2348
|
+
fileSize: item.length
|
|
2349
|
+
};
|
|
2350
|
+
}
|
|
2351
|
+
await this._internalAddKnowledge({
|
|
2352
|
+
id: knowledgeId,
|
|
2353
|
+
content: {
|
|
2354
|
+
text: item
|
|
2355
|
+
},
|
|
2356
|
+
metadata
|
|
2357
|
+
}, undefined, {
|
|
2358
|
+
roomId: this.runtime.agentId,
|
|
2359
|
+
entityId: this.runtime.agentId,
|
|
2360
|
+
worldId: this.runtime.agentId
|
|
2361
|
+
});
|
|
2362
|
+
} catch (error) {
|
|
2363
|
+
import_core4.logger.error({ error }, "Error processing character knowledge");
|
|
2364
|
+
} finally {
|
|
2365
|
+
this.knowledgeProcessingSemaphore.release();
|
|
2366
|
+
}
|
|
2367
|
+
});
|
|
2368
|
+
await Promise.all(processingPromises);
|
|
2369
|
+
}
|
|
2370
|
+
async _internalAddKnowledge(item, options = {
|
|
2371
|
+
targetTokens: 1500,
|
|
2372
|
+
overlap: 200,
|
|
2373
|
+
modelContextSize: 4096
|
|
2374
|
+
}, scope = {
|
|
2375
|
+
roomId: this.runtime.agentId,
|
|
2376
|
+
entityId: this.runtime.agentId,
|
|
2377
|
+
worldId: this.runtime.agentId
|
|
2378
|
+
}) {
|
|
2379
|
+
const finalScope = {
|
|
2380
|
+
roomId: scope?.roomId ?? this.runtime.agentId,
|
|
2381
|
+
worldId: scope?.worldId ?? this.runtime.agentId,
|
|
2382
|
+
entityId: scope?.entityId ?? this.runtime.agentId
|
|
2383
|
+
};
|
|
2384
|
+
const documentMetadata = {
|
|
2385
|
+
...item.metadata ?? {},
|
|
2386
|
+
type: import_core4.MemoryType.CUSTOM,
|
|
2387
|
+
documentId: item.id
|
|
2388
|
+
};
|
|
2389
|
+
const documentMemory = {
|
|
2390
|
+
id: item.id,
|
|
2391
|
+
agentId: this.runtime.agentId,
|
|
2392
|
+
roomId: finalScope.roomId,
|
|
2393
|
+
worldId: finalScope.worldId,
|
|
2394
|
+
entityId: finalScope.entityId,
|
|
2395
|
+
content: item.content,
|
|
2396
|
+
metadata: documentMetadata,
|
|
2397
|
+
createdAt: Date.now()
|
|
2398
|
+
};
|
|
2399
|
+
const existingDocument = await this.runtime.getMemoryById(item.id);
|
|
2400
|
+
if (existingDocument) {
|
|
2401
|
+
await this.runtime.updateMemory({
|
|
2402
|
+
...documentMemory,
|
|
2403
|
+
id: item.id
|
|
2404
|
+
});
|
|
2405
|
+
} else {
|
|
2406
|
+
await this.runtime.createMemory(documentMemory, "documents");
|
|
2407
|
+
}
|
|
2408
|
+
const fragments = await this.splitAndCreateFragments(item, options.targetTokens, options.overlap, finalScope);
|
|
2409
|
+
for (const fragment of fragments) {
|
|
2410
|
+
try {
|
|
2411
|
+
await this.processDocumentFragment(fragment);
|
|
2412
|
+
} catch (error) {
|
|
2413
|
+
import_core4.logger.error({ error }, `KnowledgeService: Error processing fragment ${fragment.id} for document ${item.id}`);
|
|
2414
|
+
}
|
|
2415
|
+
}
|
|
2416
|
+
}
|
|
2417
|
+
async processDocumentFragment(fragment) {
|
|
2418
|
+
try {
|
|
2419
|
+
await this.runtime.addEmbeddingToMemory(fragment);
|
|
2420
|
+
await this.runtime.createMemory(fragment, "knowledge");
|
|
2421
|
+
} catch (error) {
|
|
2422
|
+
import_core4.logger.error({ error }, `Error processing fragment ${fragment.id}`);
|
|
2423
|
+
throw error;
|
|
2424
|
+
}
|
|
2425
|
+
}
|
|
2426
|
+
async splitAndCreateFragments(document, targetTokens, overlap, scope) {
|
|
2427
|
+
if (!document.content.text) {
|
|
2428
|
+
return [];
|
|
2429
|
+
}
|
|
2430
|
+
const text = document.content.text;
|
|
2431
|
+
const chunks = await import_core4.splitChunks(text, targetTokens, overlap);
|
|
2432
|
+
return chunks.map((chunk, index) => {
|
|
2433
|
+
const fragmentIdContent = `${document.id}-fragment-${index}-${Date.now()}`;
|
|
2434
|
+
const fragmentId = import_core4.createUniqueUuid(this.runtime, fragmentIdContent);
|
|
2435
|
+
return {
|
|
2436
|
+
id: fragmentId,
|
|
2437
|
+
entityId: scope.entityId,
|
|
2438
|
+
agentId: this.runtime.agentId,
|
|
2439
|
+
roomId: scope.roomId,
|
|
2440
|
+
worldId: scope.worldId,
|
|
2441
|
+
content: {
|
|
2442
|
+
text: chunk
|
|
2443
|
+
},
|
|
2444
|
+
metadata: {
|
|
2445
|
+
...document.metadata || {},
|
|
2446
|
+
type: import_core4.MemoryType.FRAGMENT,
|
|
2447
|
+
documentId: document.id,
|
|
2448
|
+
position: index,
|
|
2449
|
+
timestamp: Date.now()
|
|
2450
|
+
},
|
|
2451
|
+
createdAt: Date.now()
|
|
2452
|
+
};
|
|
2453
|
+
});
|
|
2454
|
+
}
|
|
2455
|
+
async getMemories(params) {
|
|
2456
|
+
return this.runtime.getMemories({
|
|
2457
|
+
...params,
|
|
2458
|
+
agentId: this.runtime.agentId
|
|
2459
|
+
});
|
|
2460
|
+
}
|
|
2461
|
+
async countMemories(params) {
|
|
2462
|
+
const roomId = params.roomId || this.runtime.agentId;
|
|
2463
|
+
const unique = params.unique ?? false;
|
|
2464
|
+
const tableName = params.tableName;
|
|
2465
|
+
return this.runtime.countMemories(roomId, unique, tableName);
|
|
2466
|
+
}
|
|
2467
|
+
async deleteMemory(memoryId) {
|
|
2468
|
+
await this.runtime.deleteMemory(memoryId);
|
|
2469
|
+
}
|
|
2470
|
+
}
|
|
2471
|
+
|
|
2472
|
+
// actions.ts
|
|
2473
|
+
var processKnowledgeAction = {
|
|
2474
|
+
name: "PROCESS_KNOWLEDGE",
|
|
2475
|
+
description: "Process and store knowledge from a file path or text content into the knowledge base",
|
|
2476
|
+
similes: [],
|
|
2477
|
+
examples: [
|
|
2478
|
+
[
|
|
2479
|
+
{
|
|
2480
|
+
name: "user",
|
|
2481
|
+
content: {
|
|
2482
|
+
text: "Process the document at /path/to/document.pdf"
|
|
2483
|
+
}
|
|
2484
|
+
},
|
|
2485
|
+
{
|
|
2486
|
+
name: "assistant",
|
|
2487
|
+
content: {
|
|
2488
|
+
text: "I'll process the document at /path/to/document.pdf and add it to my knowledge base.",
|
|
2489
|
+
actions: ["PROCESS_KNOWLEDGE"]
|
|
2490
|
+
}
|
|
2491
|
+
}
|
|
2492
|
+
],
|
|
2493
|
+
[
|
|
2494
|
+
{
|
|
2495
|
+
name: "user",
|
|
2496
|
+
content: {
|
|
2497
|
+
text: "Add this to your knowledge: The capital of France is Paris."
|
|
2498
|
+
}
|
|
2499
|
+
},
|
|
2500
|
+
{
|
|
2501
|
+
name: "assistant",
|
|
2502
|
+
content: {
|
|
2503
|
+
text: "I'll add that information to my knowledge base.",
|
|
2504
|
+
actions: ["PROCESS_KNOWLEDGE"]
|
|
2505
|
+
}
|
|
2506
|
+
}
|
|
2507
|
+
]
|
|
2508
|
+
],
|
|
2509
|
+
validate: async (runtime, message, state, options) => {
|
|
2510
|
+
const __avTextRaw = typeof message?.content?.text === "string" ? message.content.text : "";
|
|
2511
|
+
const __avText = __avTextRaw.toLowerCase();
|
|
2512
|
+
const __avKeywords = ["process", "knowledge"];
|
|
2513
|
+
const __avKeywordOk = __avKeywords.length > 0 && __avKeywords.some((kw) => kw.length > 0 && __avText.includes(kw));
|
|
2514
|
+
const __avRegex = /\b(?:process|knowledge)\b/i;
|
|
2515
|
+
const __avRegexOk = __avRegex.test(__avText);
|
|
2516
|
+
const __avSource = String(message?.content?.source ?? message?.source ?? "");
|
|
2517
|
+
const __avExpectedSource = "";
|
|
2518
|
+
const __avSourceOk = __avExpectedSource ? __avSource === __avExpectedSource : Boolean(__avSource || state || runtime?.agentId || runtime?.getService);
|
|
2519
|
+
const __avOptions = options && typeof options === "object" ? options : {};
|
|
2520
|
+
const __avInputOk = __avText.trim().length > 0 || Object.keys(__avOptions).length > 0 || Boolean(message?.content && typeof message.content === "object");
|
|
2521
|
+
if (!(__avKeywordOk && __avRegexOk && __avSourceOk && __avInputOk)) {
|
|
2522
|
+
return false;
|
|
2523
|
+
}
|
|
2524
|
+
const __avLegacyValidate = async (runtime2, message2, _state) => {
|
|
2525
|
+
const text = message2.content.text?.toLowerCase() || "";
|
|
2526
|
+
const knowledgeKeywords = [
|
|
2527
|
+
"process",
|
|
2528
|
+
"add",
|
|
2529
|
+
"upload",
|
|
2530
|
+
"document",
|
|
2531
|
+
"knowledge",
|
|
2532
|
+
"learn",
|
|
2533
|
+
"remember",
|
|
2534
|
+
"store",
|
|
2535
|
+
"ingest",
|
|
2536
|
+
"file"
|
|
2537
|
+
];
|
|
2538
|
+
const hasKeyword = knowledgeKeywords.some((keyword) => text.includes(keyword));
|
|
2539
|
+
const pathPattern = /(?:\/[\w.-]+)+|(?:[a-zA-Z]:[\\/][\w\s.-]+(?:[\\/][\w\s.-]+)*)/;
|
|
2540
|
+
const hasPath = pathPattern.test(text);
|
|
2541
|
+
const service = runtime2.getService(KnowledgeService.serviceType);
|
|
2542
|
+
if (!service) {
|
|
2543
|
+
import_core5.logger.warn("Knowledge service not available for PROCESS_KNOWLEDGE action");
|
|
2544
|
+
return false;
|
|
2545
|
+
}
|
|
2546
|
+
return hasKeyword || hasPath;
|
|
2547
|
+
};
|
|
2548
|
+
try {
|
|
2549
|
+
return Boolean(await __avLegacyValidate(runtime, message, state, options));
|
|
2550
|
+
} catch {
|
|
2551
|
+
return false;
|
|
2552
|
+
}
|
|
2553
|
+
},
|
|
2554
|
+
handler: async (runtime, message, _state, _options, callback) => {
|
|
2555
|
+
try {
|
|
2556
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
2557
|
+
if (!service) {
|
|
2558
|
+
throw new Error("Knowledge service not available");
|
|
2559
|
+
}
|
|
2560
|
+
const text = message.content.text || "";
|
|
2561
|
+
const pathPattern = /(?:\/[\w.-]+)+|(?:[a-zA-Z]:[\\/][\w\s.-]+(?:[\\/][\w\s.-]+)*)/;
|
|
2562
|
+
const pathMatch = text.match(pathPattern);
|
|
2563
|
+
let response;
|
|
2564
|
+
if (pathMatch) {
|
|
2565
|
+
const filePath = pathMatch[0];
|
|
2566
|
+
if (!fs2.existsSync(filePath)) {
|
|
2567
|
+
response = {
|
|
2568
|
+
text: `I couldn't find the file at ${filePath}. Please check the path and try again.`
|
|
2569
|
+
};
|
|
2570
|
+
if (callback) {
|
|
2571
|
+
await callback(response);
|
|
2572
|
+
}
|
|
2573
|
+
return;
|
|
2574
|
+
}
|
|
2575
|
+
const fileBuffer = fs2.readFileSync(filePath);
|
|
2576
|
+
const fileName = path2.basename(filePath);
|
|
2577
|
+
const fileExt = path2.extname(filePath).toLowerCase();
|
|
2578
|
+
let contentType = "text/plain";
|
|
2579
|
+
if (fileExt === ".pdf")
|
|
2580
|
+
contentType = "application/pdf";
|
|
2581
|
+
else if (fileExt === ".docx")
|
|
2582
|
+
contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
|
|
2583
|
+
else if (fileExt === ".doc")
|
|
2584
|
+
contentType = "application/msword";
|
|
2585
|
+
else if ([".txt", ".md", ".tson", ".xml", ".csv"].includes(fileExt))
|
|
2586
|
+
contentType = "text/plain";
|
|
2587
|
+
const knowledgeOptions = {
|
|
2588
|
+
clientDocumentId: import_core5.stringToUuid(runtime.agentId + fileName + Date.now()),
|
|
2589
|
+
contentType,
|
|
2590
|
+
originalFilename: fileName,
|
|
2591
|
+
worldId: runtime.agentId,
|
|
2592
|
+
content: fileBuffer.toString("base64"),
|
|
2593
|
+
roomId: message.roomId,
|
|
2594
|
+
entityId: message.entityId
|
|
2595
|
+
};
|
|
2596
|
+
const result = await service.addKnowledge(knowledgeOptions);
|
|
2597
|
+
response = {
|
|
2598
|
+
text: `I've successfully processed the document "${fileName}". It has been split into ${result?.fragmentCount || 0} searchable fragments and added to my knowledge base.`
|
|
2599
|
+
};
|
|
2600
|
+
} else {
|
|
2601
|
+
const knowledgeContent = text.replace(/^(add|store|remember|process|learn)\s+(this|that|the following)?:?\s*/i, "").trim();
|
|
2602
|
+
if (!knowledgeContent) {
|
|
2603
|
+
response = {
|
|
2604
|
+
text: "I need some content to add to my knowledge base. Please provide text or a file path."
|
|
2605
|
+
};
|
|
2606
|
+
if (callback) {
|
|
2607
|
+
await callback(response);
|
|
2608
|
+
}
|
|
2609
|
+
return;
|
|
2610
|
+
}
|
|
2611
|
+
const knowledgeOptions = {
|
|
2612
|
+
clientDocumentId: import_core5.stringToUuid(`${runtime.agentId}text${Date.now()}user-knowledge`),
|
|
2613
|
+
contentType: "text/plain",
|
|
2614
|
+
originalFilename: "user-knowledge.txt",
|
|
2615
|
+
worldId: runtime.agentId,
|
|
2616
|
+
content: knowledgeContent,
|
|
2617
|
+
roomId: message.roomId,
|
|
2618
|
+
entityId: message.entityId
|
|
2619
|
+
};
|
|
2620
|
+
await service.addKnowledge(knowledgeOptions);
|
|
2621
|
+
response = {
|
|
2622
|
+
text: `I've added that information to my knowledge base. It has been stored and indexed for future reference.`
|
|
2623
|
+
};
|
|
2624
|
+
}
|
|
2625
|
+
if (callback) {
|
|
2626
|
+
await callback(response);
|
|
2627
|
+
}
|
|
2628
|
+
return { success: true, text: response.text };
|
|
2629
|
+
} catch (error) {
|
|
2630
|
+
import_core5.logger.error({ error }, "Error in PROCESS_KNOWLEDGE action");
|
|
2631
|
+
const errorResponse = {
|
|
2632
|
+
text: `I encountered an error while processing the knowledge: ${error instanceof Error ? error.message : String(error)}`
|
|
2633
|
+
};
|
|
2634
|
+
if (callback) {
|
|
2635
|
+
await callback(errorResponse);
|
|
2636
|
+
}
|
|
2637
|
+
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
|
2638
|
+
}
|
|
2639
|
+
}
|
|
2640
|
+
};
|
|
2641
|
+
var searchKnowledgeAction = {
|
|
2642
|
+
name: "SEARCH_KNOWLEDGE",
|
|
2643
|
+
description: "Search the knowledge base for specific information",
|
|
2644
|
+
similes: [
|
|
2645
|
+
"search knowledge",
|
|
2646
|
+
"find information",
|
|
2647
|
+
"look up",
|
|
2648
|
+
"query knowledge base",
|
|
2649
|
+
"search documents",
|
|
2650
|
+
"find in knowledge"
|
|
2651
|
+
],
|
|
2652
|
+
examples: [
|
|
2653
|
+
[
|
|
2654
|
+
{
|
|
2655
|
+
name: "user",
|
|
2656
|
+
content: {
|
|
2657
|
+
text: "Search your knowledge for information about quantum computing"
|
|
2658
|
+
}
|
|
2659
|
+
},
|
|
2660
|
+
{
|
|
2661
|
+
name: "assistant",
|
|
2662
|
+
content: {
|
|
2663
|
+
text: "I'll search my knowledge base for information about quantum computing.",
|
|
2664
|
+
actions: ["SEARCH_KNOWLEDGE"]
|
|
2665
|
+
}
|
|
2666
|
+
}
|
|
2667
|
+
]
|
|
2668
|
+
],
|
|
2669
|
+
validate: async (runtime, message, state, options) => {
|
|
2670
|
+
const __avTextRaw = typeof message?.content?.text === "string" ? message.content.text : "";
|
|
2671
|
+
const __avText = __avTextRaw.toLowerCase();
|
|
2672
|
+
const __avKeywords = ["search", "knowledge"];
|
|
2673
|
+
const __avKeywordOk = __avKeywords.length > 0 && __avKeywords.some((kw) => kw.length > 0 && __avText.includes(kw));
|
|
2674
|
+
const __avRegex = /\b(?:search|knowledge)\b/i;
|
|
2675
|
+
const __avRegexOk = __avRegex.test(__avText);
|
|
2676
|
+
const __avSource = String(message?.content?.source ?? message?.source ?? "");
|
|
2677
|
+
const __avExpectedSource = "";
|
|
2678
|
+
const __avSourceOk = __avExpectedSource ? __avSource === __avExpectedSource : Boolean(__avSource || state || runtime?.agentId || runtime?.getService);
|
|
2679
|
+
const __avOptions = options && typeof options === "object" ? options : {};
|
|
2680
|
+
const __avInputOk = __avText.trim().length > 0 || Object.keys(__avOptions).length > 0 || Boolean(message?.content && typeof message.content === "object");
|
|
2681
|
+
if (!(__avKeywordOk && __avRegexOk && __avSourceOk && __avInputOk)) {
|
|
2682
|
+
return false;
|
|
2683
|
+
}
|
|
2684
|
+
const __avLegacyValidate = async (runtime2, message2, _state) => {
|
|
2685
|
+
const text = message2.content.text?.toLowerCase() || "";
|
|
2686
|
+
const searchKeywords = ["search", "find", "look up", "query", "what do you know about"];
|
|
2687
|
+
const knowledgeKeywords = ["knowledge", "information", "document", "database"];
|
|
2688
|
+
const hasSearchKeyword = searchKeywords.some((keyword) => text.includes(keyword));
|
|
2689
|
+
const hasKnowledgeKeyword = knowledgeKeywords.some((keyword) => text.includes(keyword));
|
|
2690
|
+
const service = runtime2.getService(KnowledgeService.serviceType);
|
|
2691
|
+
if (!service) {
|
|
2692
|
+
return false;
|
|
2693
|
+
}
|
|
2694
|
+
return hasSearchKeyword && hasKnowledgeKeyword;
|
|
2695
|
+
};
|
|
2696
|
+
try {
|
|
2697
|
+
return Boolean(await __avLegacyValidate(runtime, message, state, options));
|
|
2698
|
+
} catch {
|
|
2699
|
+
return false;
|
|
2700
|
+
}
|
|
2701
|
+
},
|
|
2702
|
+
handler: async (runtime, message, _state, _options, callback) => {
|
|
2703
|
+
try {
|
|
2704
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
2705
|
+
if (!service) {
|
|
2706
|
+
throw new Error("Knowledge service not available");
|
|
2707
|
+
}
|
|
2708
|
+
const text = message.content.text || "";
|
|
2709
|
+
const query = text.replace(/^(search|find|look up|query)\s+(your\s+)?knowledge\s+(base\s+)?(for\s+)?/i, "").trim();
|
|
2710
|
+
if (!query) {
|
|
2711
|
+
const response2 = {
|
|
2712
|
+
text: "What would you like me to search for in my knowledge base?"
|
|
2713
|
+
};
|
|
2714
|
+
if (callback) {
|
|
2715
|
+
await callback(response2);
|
|
2716
|
+
}
|
|
2717
|
+
return;
|
|
2718
|
+
}
|
|
2719
|
+
const searchMessage = {
|
|
2720
|
+
...message,
|
|
2721
|
+
content: {
|
|
2722
|
+
text: query
|
|
2723
|
+
}
|
|
2724
|
+
};
|
|
2725
|
+
const results = await service.getKnowledge(searchMessage);
|
|
2726
|
+
let response;
|
|
2727
|
+
if (results.length === 0) {
|
|
2728
|
+
response = {
|
|
2729
|
+
text: `I couldn't find any information about "${query}" in my knowledge base.`
|
|
2730
|
+
};
|
|
2731
|
+
} else {
|
|
2732
|
+
const formattedResults = results.slice(0, 3).map((item, index) => `${index + 1}. ${item.content.text}`).join(`
|
|
2733
|
+
|
|
2734
|
+
`);
|
|
2735
|
+
response = {
|
|
2736
|
+
text: `Here's what I found about "${query}":
|
|
2737
|
+
|
|
2738
|
+
${formattedResults}`
|
|
2739
|
+
};
|
|
2740
|
+
}
|
|
2741
|
+
if (callback) {
|
|
2742
|
+
await callback(response);
|
|
2743
|
+
}
|
|
2744
|
+
return { success: true, text: response.text };
|
|
2745
|
+
} catch (error) {
|
|
2746
|
+
import_core5.logger.error({ error }, "Error in SEARCH_KNOWLEDGE action");
|
|
2747
|
+
const errorResponse = {
|
|
2748
|
+
text: `I encountered an error while searching the knowledge base: ${error instanceof Error ? error.message : String(error)}`
|
|
2749
|
+
};
|
|
2750
|
+
if (callback) {
|
|
2751
|
+
await callback(errorResponse);
|
|
2752
|
+
}
|
|
2753
|
+
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
|
2754
|
+
}
|
|
2755
|
+
}
|
|
2756
|
+
};
|
|
2757
|
+
var knowledgeActions = [processKnowledgeAction, searchKnowledgeAction];
|
|
2758
|
+
|
|
2759
|
+
// documents-provider.ts
|
|
2760
|
+
var import_core6 = require("@elizaos/core");
|
|
2761
|
+
var documentsProvider = {
|
|
2762
|
+
name: "AVAILABLE_DOCUMENTS",
|
|
2763
|
+
description: "List of documents available in the knowledge base. Shows which documents the agent can reference and retrieve information from.",
|
|
2764
|
+
dynamic: true,
|
|
2765
|
+
get: async (runtime, _message, _state) => {
|
|
2766
|
+
try {
|
|
2767
|
+
const knowledgeService = runtime.getService("knowledge");
|
|
2768
|
+
if (!knowledgeService) {
|
|
2769
|
+
import_core6.logger.warn("Knowledge service not available for documents provider");
|
|
2770
|
+
return {
|
|
2771
|
+
data: { documents: [] },
|
|
2772
|
+
values: {
|
|
2773
|
+
documentsCount: 0,
|
|
2774
|
+
documents: "",
|
|
2775
|
+
availableDocuments: ""
|
|
2776
|
+
},
|
|
2777
|
+
text: ""
|
|
2778
|
+
};
|
|
2779
|
+
}
|
|
2780
|
+
const allMemories = await knowledgeService.getMemories({
|
|
2781
|
+
tableName: "documents",
|
|
2782
|
+
roomId: runtime.agentId,
|
|
2783
|
+
count: 100
|
|
2784
|
+
});
|
|
2785
|
+
const documents = allMemories.filter((memory) => memory.metadata?.type === import_core6.MemoryType.DOCUMENT);
|
|
2786
|
+
if (!documents || documents.length === 0) {
|
|
2787
|
+
return {
|
|
2788
|
+
data: { documents: [] },
|
|
2789
|
+
values: {
|
|
2790
|
+
documentsCount: 0,
|
|
2791
|
+
documents: "",
|
|
2792
|
+
availableDocuments: ""
|
|
2793
|
+
},
|
|
2794
|
+
text: ""
|
|
2795
|
+
};
|
|
2796
|
+
}
|
|
2797
|
+
const documentsList = documents.map((doc, index) => {
|
|
2798
|
+
const metadata = doc.metadata;
|
|
2799
|
+
const filename = metadata?.filename || metadata?.title || `Document ${index + 1}`;
|
|
2800
|
+
const fileType = metadata?.fileExt || metadata?.fileType || "";
|
|
2801
|
+
const source = metadata?.source || "upload";
|
|
2802
|
+
const fileSize = metadata?.fileSize;
|
|
2803
|
+
const parts = [filename];
|
|
2804
|
+
if (fileType) {
|
|
2805
|
+
parts.push(fileType);
|
|
2806
|
+
}
|
|
2807
|
+
if (fileSize) {
|
|
2808
|
+
const sizeKB = Math.round(fileSize / 1024);
|
|
2809
|
+
if (sizeKB > 1024) {
|
|
2810
|
+
parts.push(`${Math.round(sizeKB / 1024)}MB`);
|
|
2811
|
+
} else {
|
|
2812
|
+
parts.push(`${sizeKB}KB`);
|
|
2813
|
+
}
|
|
2814
|
+
}
|
|
2815
|
+
if (source && source !== "upload") {
|
|
2816
|
+
parts.push(`from ${source}`);
|
|
2817
|
+
}
|
|
2818
|
+
return parts.join(" - ");
|
|
2819
|
+
}).join(`
|
|
2820
|
+
`);
|
|
2821
|
+
const documentsText = import_core6.addHeader("# Available Documents", `${documents.length} document(s) in knowledge base:
|
|
2822
|
+
${documentsList}`);
|
|
2823
|
+
return {
|
|
2824
|
+
data: {
|
|
2825
|
+
documents: documents.map((doc) => ({
|
|
2826
|
+
id: doc.id,
|
|
2827
|
+
filename: doc.metadata?.filename || doc.metadata?.title,
|
|
2828
|
+
fileType: doc.metadata?.fileType || doc.metadata?.fileExt,
|
|
2829
|
+
source: doc.metadata?.source
|
|
2830
|
+
})),
|
|
2831
|
+
count: documents.length
|
|
2832
|
+
},
|
|
2833
|
+
values: {
|
|
2834
|
+
documentsCount: documents.length,
|
|
2835
|
+
documents: documentsList,
|
|
2836
|
+
availableDocuments: documentsText
|
|
2837
|
+
},
|
|
2838
|
+
text: documentsText
|
|
2839
|
+
};
|
|
2840
|
+
} catch (error) {
|
|
2841
|
+
import_core6.logger.error("Error in documents provider:", error instanceof Error ? error.message : String(error));
|
|
2842
|
+
return {
|
|
2843
|
+
data: { documents: [], error: error instanceof Error ? error.message : String(error) },
|
|
2844
|
+
values: {
|
|
2845
|
+
documentsCount: 0,
|
|
2846
|
+
documents: "",
|
|
2847
|
+
availableDocuments: ""
|
|
2848
|
+
},
|
|
2849
|
+
text: ""
|
|
2850
|
+
};
|
|
2851
|
+
}
|
|
2852
|
+
}
|
|
2853
|
+
};
|
|
2854
|
+
|
|
2855
|
+
// provider.ts
|
|
2856
|
+
var import_core7 = require("@elizaos/core");
|
|
2857
|
+
var knowledgeProvider = {
|
|
2858
|
+
name: "KNOWLEDGE",
|
|
2859
|
+
description: "Knowledge from the knowledge base that the agent knows, retrieved whenever the agent needs to answer a question about their expertise.",
|
|
2860
|
+
dynamic: true,
|
|
2861
|
+
get: async (runtime, message) => {
|
|
2862
|
+
const knowledgeService = runtime.getService("knowledge");
|
|
2863
|
+
const knowledgeData = await knowledgeService?.getKnowledge(message);
|
|
2864
|
+
if (!knowledgeData || knowledgeData.length === 0) {
|
|
2865
|
+
return {
|
|
2866
|
+
text: "",
|
|
2867
|
+
values: { knowledge: "", knowledgeUsed: false },
|
|
2868
|
+
data: { knowledge: "", ragMetadata: null, knowledgeUsed: false }
|
|
2869
|
+
};
|
|
2870
|
+
}
|
|
2871
|
+
const firstFiveKnowledgeItems = knowledgeData.slice(0, 5);
|
|
2872
|
+
let knowledge = import_core7.addHeader("# Knowledge", firstFiveKnowledgeItems.map((item) => `- ${item.content.text}`).join(`
|
|
2873
|
+
`));
|
|
2874
|
+
const tokenLength = 3.5;
|
|
2875
|
+
const maxChars = 4000 * tokenLength;
|
|
2876
|
+
if (knowledge.length > maxChars) {
|
|
2877
|
+
knowledge = knowledge.slice(0, maxChars);
|
|
2878
|
+
}
|
|
2879
|
+
const ragMetadata = {
|
|
2880
|
+
retrievedFragments: knowledgeData.map((fragment) => {
|
|
2881
|
+
const fragmentMetadata = fragment.metadata;
|
|
2882
|
+
return {
|
|
2883
|
+
fragmentId: fragment.id,
|
|
2884
|
+
documentTitle: fragmentMetadata?.filename || fragmentMetadata?.title || "",
|
|
2885
|
+
similarityScore: fragment.similarity,
|
|
2886
|
+
contentPreview: `${(fragment.content?.text || "").substring(0, 100)}...`
|
|
2887
|
+
};
|
|
2888
|
+
}),
|
|
2889
|
+
queryText: message.content?.text || "",
|
|
2890
|
+
totalFragments: knowledgeData.length,
|
|
2891
|
+
retrievalTimestamp: Date.now()
|
|
2892
|
+
};
|
|
2893
|
+
knowledgeService.setPendingRAGMetadata(ragMetadata);
|
|
2894
|
+
setTimeout(async () => {
|
|
2895
|
+
await knowledgeService.enrichRecentMemoriesWithPendingRAG();
|
|
2896
|
+
}, 2000);
|
|
2897
|
+
return {
|
|
2898
|
+
data: {
|
|
2899
|
+
knowledge,
|
|
2900
|
+
ragMetadata,
|
|
2901
|
+
knowledgeUsed: true
|
|
2902
|
+
},
|
|
2903
|
+
values: {
|
|
2904
|
+
knowledge,
|
|
2905
|
+
knowledgeUsed: true
|
|
2906
|
+
},
|
|
2907
|
+
text: knowledge,
|
|
2908
|
+
ragMetadata,
|
|
2909
|
+
knowledgeUsed: true
|
|
2910
|
+
};
|
|
2911
|
+
}
|
|
2912
|
+
};
|
|
2913
|
+
|
|
2914
|
+
// routes.ts
|
|
2915
|
+
var import_node_fs = __toESM(require("node:fs"));
|
|
2916
|
+
var import_node_path = __toESM(require("node:path"));
|
|
2917
|
+
var import_core8 = require("@elizaos/core");
|
|
2918
|
+
var import_multer = __toESM(require("multer"));
|
|
2919
|
+
function asWritableStream(res) {
|
|
2920
|
+
return res;
|
|
2921
|
+
}
|
|
2922
|
+
var createUploadMiddleware = (runtime) => {
|
|
2923
|
+
const uploadDir = String(runtime.getSetting("KNOWLEDGE_UPLOAD_DIR") || "/tmp/uploads/");
|
|
2924
|
+
const maxFileSize = parseInt(String(runtime.getSetting("KNOWLEDGE_MAX_FILE_SIZE") || "52428800"), 10);
|
|
2925
|
+
const maxFiles = parseInt(String(runtime.getSetting("KNOWLEDGE_MAX_FILES") || "10"), 10);
|
|
2926
|
+
const allowedMimeTypes = String(runtime.getSetting("KNOWLEDGE_ALLOWED_MIME_TYPES") || "").split(",").filter(Boolean).length > 0 ? String(runtime.getSetting("KNOWLEDGE_ALLOWED_MIME_TYPES") || "").split(",") : [
|
|
2927
|
+
"text/plain",
|
|
2928
|
+
"text/markdown",
|
|
2929
|
+
"application/pdf",
|
|
2930
|
+
"application/msword",
|
|
2931
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
2932
|
+
"text/html",
|
|
2933
|
+
"application/json",
|
|
2934
|
+
"application/xml",
|
|
2935
|
+
"text/csv"
|
|
2936
|
+
];
|
|
2937
|
+
return import_multer.default({
|
|
2938
|
+
dest: uploadDir,
|
|
2939
|
+
limits: {
|
|
2940
|
+
fileSize: maxFileSize,
|
|
2941
|
+
files: maxFiles
|
|
2942
|
+
},
|
|
2943
|
+
fileFilter: (_req, file, cb) => {
|
|
2944
|
+
if (allowedMimeTypes.includes(file.mimetype)) {
|
|
2945
|
+
cb(null, true);
|
|
2946
|
+
} else {
|
|
2947
|
+
cb(new Error(`File type ${file.mimetype} not allowed. Allowed types: ${allowedMimeTypes.join(", ")}`));
|
|
2948
|
+
}
|
|
2949
|
+
}
|
|
2950
|
+
});
|
|
2951
|
+
};
|
|
2952
|
+
function sendSuccess(res, data, status = 200) {
|
|
2953
|
+
res.writeHead(status, { "Content-Type": "application/json" });
|
|
2954
|
+
res.end(JSON.stringify({ success: true, data }));
|
|
2955
|
+
}
|
|
2956
|
+
function sendError(res, status, code, message, details) {
|
|
2957
|
+
res.writeHead(status, { "Content-Type": "application/json" });
|
|
2958
|
+
res.end(JSON.stringify({ success: false, error: { code, message, details } }));
|
|
2959
|
+
}
|
|
2960
|
+
var cleanupFile = (filePath) => {
|
|
2961
|
+
if (filePath && import_node_fs.default.existsSync(filePath)) {
|
|
2962
|
+
try {
|
|
2963
|
+
import_node_fs.default.unlinkSync(filePath);
|
|
2964
|
+
} catch (error) {
|
|
2965
|
+
import_core8.logger.error({ error }, `Error cleaning up file ${filePath}`);
|
|
2966
|
+
}
|
|
2967
|
+
}
|
|
2968
|
+
};
|
|
2969
|
+
var cleanupFiles = (files) => {
|
|
2970
|
+
if (files) {
|
|
2971
|
+
files.forEach((file) => {
|
|
2972
|
+
cleanupFile(file.path);
|
|
2973
|
+
});
|
|
2974
|
+
}
|
|
2975
|
+
};
|
|
2976
|
+
async function uploadKnowledgeHandler(req, res, runtime) {
|
|
2977
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
2978
|
+
if (!service) {
|
|
2979
|
+
return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found");
|
|
2980
|
+
}
|
|
2981
|
+
const hasUploadedFiles = req.files && req.files.length > 0;
|
|
2982
|
+
const isJsonRequest = !hasUploadedFiles && req.body && (req.body.fileUrl || req.body.fileUrls);
|
|
2983
|
+
if (!hasUploadedFiles && !isJsonRequest) {
|
|
2984
|
+
return sendError(res, 400, "INVALID_REQUEST", "Request must contain either files or URLs");
|
|
2985
|
+
}
|
|
2986
|
+
try {
|
|
2987
|
+
if (hasUploadedFiles) {
|
|
2988
|
+
const files = req.files;
|
|
2989
|
+
if (!files || files.length === 0) {
|
|
2990
|
+
return sendError(res, 400, "NO_FILES", "No files uploaded");
|
|
2991
|
+
}
|
|
2992
|
+
const invalidFiles = files.filter((file) => {
|
|
2993
|
+
if (file.size === 0) {
|
|
2994
|
+
import_core8.logger.warn(`File ${file.originalname} is empty`);
|
|
2995
|
+
return true;
|
|
2996
|
+
}
|
|
2997
|
+
if (!file.originalname || file.originalname.trim() === "") {
|
|
2998
|
+
import_core8.logger.warn(`File has no name`);
|
|
2999
|
+
return true;
|
|
3000
|
+
}
|
|
3001
|
+
if (!file.path) {
|
|
3002
|
+
import_core8.logger.warn(`File ${file.originalname} has no path`);
|
|
3003
|
+
return true;
|
|
3004
|
+
}
|
|
3005
|
+
return false;
|
|
3006
|
+
});
|
|
3007
|
+
if (invalidFiles.length > 0) {
|
|
3008
|
+
cleanupFiles(files);
|
|
3009
|
+
const invalidFileNames = invalidFiles.map((f) => f.originalname || "unnamed").join(", ");
|
|
3010
|
+
return sendError(res, 400, "INVALID_FILES", `Invalid or corrupted files: ${invalidFileNames}`);
|
|
3011
|
+
}
|
|
3012
|
+
const agentId = req.body?.agentId || req.query?.agentId;
|
|
3013
|
+
if (!agentId) {
|
|
3014
|
+
import_core8.logger.error("No agent ID provided in upload request");
|
|
3015
|
+
return sendError(res, 400, "MISSING_AGENT_ID", "Agent ID is required for uploading knowledge");
|
|
3016
|
+
}
|
|
3017
|
+
const worldId = req.body?.worldId || agentId;
|
|
3018
|
+
import_core8.logger.info(`Processing file upload for agent: ${agentId}`);
|
|
3019
|
+
const processingPromises = files.map(async (file) => {
|
|
3020
|
+
const originalFilename = file.originalname;
|
|
3021
|
+
const filePath = file.path;
|
|
3022
|
+
try {
|
|
3023
|
+
const fileBuffer = await import_node_fs.default.promises.readFile(filePath);
|
|
3024
|
+
const base64Content = fileBuffer.toString("base64");
|
|
3025
|
+
const addKnowledgeOpts = {
|
|
3026
|
+
agentId,
|
|
3027
|
+
clientDocumentId: "",
|
|
3028
|
+
contentType: file.mimetype,
|
|
3029
|
+
originalFilename,
|
|
3030
|
+
content: base64Content,
|
|
3031
|
+
worldId,
|
|
3032
|
+
roomId: agentId,
|
|
3033
|
+
entityId: agentId
|
|
3034
|
+
};
|
|
3035
|
+
const result = await service.addKnowledge(addKnowledgeOpts);
|
|
3036
|
+
cleanupFile(filePath);
|
|
3037
|
+
return {
|
|
3038
|
+
id: result.clientDocumentId,
|
|
3039
|
+
filename: originalFilename,
|
|
3040
|
+
type: file.mimetype,
|
|
3041
|
+
size: file.size,
|
|
3042
|
+
uploadedAt: Date.now(),
|
|
3043
|
+
status: "success"
|
|
3044
|
+
};
|
|
3045
|
+
} catch (fileError) {
|
|
3046
|
+
import_core8.logger.error(`Error processing file ${file.originalname}: ${fileError instanceof Error ? fileError.message : String(fileError)}`);
|
|
3047
|
+
cleanupFile(filePath);
|
|
3048
|
+
return {
|
|
3049
|
+
id: "",
|
|
3050
|
+
filename: originalFilename,
|
|
3051
|
+
status: "error_processing",
|
|
3052
|
+
error: fileError instanceof Error ? fileError.message : String(fileError)
|
|
3053
|
+
};
|
|
3054
|
+
}
|
|
3055
|
+
});
|
|
3056
|
+
const results = await Promise.all(processingPromises);
|
|
3057
|
+
sendSuccess(res, results);
|
|
3058
|
+
} else if (isJsonRequest) {
|
|
3059
|
+
const fileUrls = Array.isArray(req.body?.fileUrls) ? req.body?.fileUrls : req.body?.fileUrl ? [req.body?.fileUrl] : [];
|
|
3060
|
+
if (fileUrls.length === 0) {
|
|
3061
|
+
return sendError(res, 400, "MISSING_URL", "File URL is required");
|
|
3062
|
+
}
|
|
3063
|
+
const agentId = req.body?.agentId || req.query?.agentId;
|
|
3064
|
+
if (!agentId) {
|
|
3065
|
+
import_core8.logger.error("No agent ID provided in URL request");
|
|
3066
|
+
return sendError(res, 400, "MISSING_AGENT_ID", "Agent ID is required for uploading knowledge from URLs");
|
|
3067
|
+
}
|
|
3068
|
+
const processingPromises = fileUrls.map(async (fileUrl) => {
|
|
3069
|
+
try {
|
|
3070
|
+
const normalizedUrl = normalizeS3Url(fileUrl);
|
|
3071
|
+
const urlObject = new URL(fileUrl);
|
|
3072
|
+
const pathSegments = urlObject.pathname.split("/");
|
|
3073
|
+
const encodedFilename = pathSegments[pathSegments.length - 1] || "document.pdf";
|
|
3074
|
+
const originalFilename = decodeURIComponent(encodedFilename);
|
|
3075
|
+
import_core8.logger.debug(`Fetching content from URL: ${fileUrl}`);
|
|
3076
|
+
const { content, contentType: fetchedContentType } = await fetchUrlContent(fileUrl);
|
|
3077
|
+
let contentType = fetchedContentType;
|
|
3078
|
+
if (contentType === "application/octet-stream") {
|
|
3079
|
+
const fileExtension = originalFilename.split(".").pop()?.toLowerCase();
|
|
3080
|
+
if (fileExtension) {
|
|
3081
|
+
if (["pdf"].includes(fileExtension)) {
|
|
3082
|
+
contentType = "application/pdf";
|
|
3083
|
+
} else if (["txt", "text"].includes(fileExtension)) {
|
|
3084
|
+
contentType = "text/plain";
|
|
3085
|
+
} else if (["md", "markdown"].includes(fileExtension)) {
|
|
3086
|
+
contentType = "text/markdown";
|
|
3087
|
+
} else if (["doc", "docx"].includes(fileExtension)) {
|
|
3088
|
+
contentType = "application/msword";
|
|
3089
|
+
} else if (["html", "htm"].includes(fileExtension)) {
|
|
3090
|
+
contentType = "text/html";
|
|
3091
|
+
} else if (["json"].includes(fileExtension)) {
|
|
3092
|
+
contentType = "application/json";
|
|
3093
|
+
} else if (["xml"].includes(fileExtension)) {
|
|
3094
|
+
contentType = "application/xml";
|
|
3095
|
+
}
|
|
3096
|
+
}
|
|
3097
|
+
}
|
|
3098
|
+
const addKnowledgeOpts = {
|
|
3099
|
+
agentId,
|
|
3100
|
+
clientDocumentId: "",
|
|
3101
|
+
contentType,
|
|
3102
|
+
originalFilename,
|
|
3103
|
+
content,
|
|
3104
|
+
worldId: agentId,
|
|
3105
|
+
roomId: agentId,
|
|
3106
|
+
entityId: agentId,
|
|
3107
|
+
metadata: {
|
|
3108
|
+
url: normalizedUrl
|
|
3109
|
+
}
|
|
3110
|
+
};
|
|
3111
|
+
const result = await service.addKnowledge(addKnowledgeOpts);
|
|
3112
|
+
return {
|
|
3113
|
+
id: result.clientDocumentId,
|
|
3114
|
+
fileUrl,
|
|
3115
|
+
filename: originalFilename,
|
|
3116
|
+
message: "Knowledge created successfully",
|
|
3117
|
+
createdAt: Date.now(),
|
|
3118
|
+
fragmentCount: result.fragmentCount,
|
|
3119
|
+
status: "success"
|
|
3120
|
+
};
|
|
3121
|
+
} catch (urlError) {
|
|
3122
|
+
import_core8.logger.error(`Error processing URL ${fileUrl}: ${urlError instanceof Error ? urlError.message : String(urlError)}`);
|
|
3123
|
+
return {
|
|
3124
|
+
fileUrl,
|
|
3125
|
+
status: "error_processing",
|
|
3126
|
+
error: urlError instanceof Error ? urlError.message : String(urlError)
|
|
3127
|
+
};
|
|
3128
|
+
}
|
|
3129
|
+
});
|
|
3130
|
+
const results = await Promise.all(processingPromises);
|
|
3131
|
+
sendSuccess(res, results);
|
|
3132
|
+
}
|
|
3133
|
+
} catch (error) {
|
|
3134
|
+
import_core8.logger.error({ error }, "Error processing knowledge");
|
|
3135
|
+
if (hasUploadedFiles) {
|
|
3136
|
+
cleanupFiles(req.files);
|
|
3137
|
+
}
|
|
3138
|
+
sendError(res, 500, "PROCESSING_ERROR", "Failed to process knowledge", error instanceof Error ? error.message : String(error));
|
|
3139
|
+
}
|
|
3140
|
+
}
|
|
3141
|
+
async function getKnowledgeDocumentsHandler(req, res, runtime) {
|
|
3142
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3143
|
+
if (!service) {
|
|
3144
|
+
return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found for getKnowledgeDocumentsHandler");
|
|
3145
|
+
}
|
|
3146
|
+
try {
|
|
3147
|
+
const limit = req.query?.limit ? Number.parseInt(req.query.limit, 10) : 1e4;
|
|
3148
|
+
const before = req.query?.before ? Number.parseInt(req.query.before, 10) : Date.now();
|
|
3149
|
+
const includeEmbedding = req.query?.includeEmbedding === "true";
|
|
3150
|
+
const fileUrls = req.query?.fileUrls ? typeof req.query?.fileUrls === "string" && req.query.fileUrls.includes(",") ? req.query.fileUrls.split(",") : [req.query?.fileUrls] : null;
|
|
3151
|
+
const memories = await service.getMemories({
|
|
3152
|
+
tableName: "documents",
|
|
3153
|
+
count: limit,
|
|
3154
|
+
end: before
|
|
3155
|
+
});
|
|
3156
|
+
let filteredMemories = memories;
|
|
3157
|
+
if (fileUrls && fileUrls.length > 0) {
|
|
3158
|
+
const normalizedRequestUrls = fileUrls.map((url) => normalizeS3Url(String(url)));
|
|
3159
|
+
const urlBasedIds = normalizedRequestUrls.map((url) => import_core8.createUniqueUuid(runtime, url));
|
|
3160
|
+
filteredMemories = memories.filter((memory) => urlBasedIds.includes(memory.id) || memory.metadata && ("url" in memory.metadata) && typeof memory.metadata.url === "string" && normalizedRequestUrls.includes(normalizeS3Url(memory.metadata.url)));
|
|
3161
|
+
}
|
|
3162
|
+
const cleanMemories = includeEmbedding ? filteredMemories : filteredMemories.map((memory) => ({
|
|
3163
|
+
...memory,
|
|
3164
|
+
embedding: undefined
|
|
3165
|
+
}));
|
|
3166
|
+
sendSuccess(res, {
|
|
3167
|
+
memories: cleanMemories,
|
|
3168
|
+
urlFiltered: !!fileUrls,
|
|
3169
|
+
totalFound: cleanMemories.length,
|
|
3170
|
+
totalRequested: fileUrls ? fileUrls.length : 0
|
|
3171
|
+
});
|
|
3172
|
+
} catch (error) {
|
|
3173
|
+
import_core8.logger.error({ error }, "Error retrieving documents");
|
|
3174
|
+
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve documents", error instanceof Error ? error.message : String(error));
|
|
3175
|
+
}
|
|
3176
|
+
}
|
|
3177
|
+
async function deleteKnowledgeDocumentHandler(req, res, runtime) {
|
|
3178
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3179
|
+
if (!service) {
|
|
3180
|
+
return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found for deleteKnowledgeDocumentHandler");
|
|
3181
|
+
}
|
|
3182
|
+
const knowledgeId = req.params?.knowledgeId;
|
|
3183
|
+
if (!knowledgeId || knowledgeId.length < 36) {
|
|
3184
|
+
import_core8.logger.error(`Invalid knowledge ID format: ${knowledgeId}`);
|
|
3185
|
+
return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
|
|
3186
|
+
}
|
|
3187
|
+
try {
|
|
3188
|
+
const typedKnowledgeId = knowledgeId;
|
|
3189
|
+
import_core8.logger.debug(`Deleting document: ${typedKnowledgeId}`);
|
|
3190
|
+
await service.deleteMemory(typedKnowledgeId);
|
|
3191
|
+
sendSuccess(res, null, 204);
|
|
3192
|
+
} catch (error) {
|
|
3193
|
+
import_core8.logger.error({ error }, `Error deleting document ${knowledgeId}`);
|
|
3194
|
+
sendError(res, 500, "DELETE_ERROR", "Failed to delete document", error instanceof Error ? error.message : String(error));
|
|
3195
|
+
}
|
|
3196
|
+
}
|
|
3197
|
+
async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
3198
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3199
|
+
if (!service) {
|
|
3200
|
+
return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found for getKnowledgeByIdHandler");
|
|
3201
|
+
}
|
|
3202
|
+
const knowledgeId = req.params?.knowledgeId;
|
|
3203
|
+
if (!knowledgeId || knowledgeId.length < 36) {
|
|
3204
|
+
import_core8.logger.error(`Invalid knowledge ID format: ${knowledgeId}`);
|
|
3205
|
+
return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
|
|
3206
|
+
}
|
|
3207
|
+
try {
|
|
3208
|
+
import_core8.logger.debug(`Retrieving document: ${knowledgeId}`);
|
|
3209
|
+
const memories = await service.getMemories({
|
|
3210
|
+
tableName: "documents",
|
|
3211
|
+
count: 1e4
|
|
3212
|
+
});
|
|
3213
|
+
const typedKnowledgeId = knowledgeId;
|
|
3214
|
+
const document = memories.find((memory) => memory.id === typedKnowledgeId);
|
|
3215
|
+
if (!document) {
|
|
3216
|
+
return sendError(res, 404, "NOT_FOUND", `Knowledge with ID ${typedKnowledgeId} not found`);
|
|
3217
|
+
}
|
|
3218
|
+
const cleanDocument = {
|
|
3219
|
+
...document,
|
|
3220
|
+
embedding: undefined
|
|
3221
|
+
};
|
|
3222
|
+
sendSuccess(res, { document: cleanDocument });
|
|
3223
|
+
} catch (error) {
|
|
3224
|
+
import_core8.logger.error({ error }, `Error retrieving document ${knowledgeId}`);
|
|
3225
|
+
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve document", error instanceof Error ? error.message : String(error));
|
|
3226
|
+
}
|
|
3227
|
+
}
|
|
3228
|
+
async function knowledgePanelHandler(req, res, runtime) {
|
|
3229
|
+
const agentId = runtime.agentId;
|
|
3230
|
+
const requestPath = req.originalUrl || req.url || req.path || "";
|
|
3231
|
+
const pluginBasePath = requestPath.replace(/\/display.*$/, "");
|
|
3232
|
+
try {
|
|
3233
|
+
const currentDir = import_node_path.default.dirname(new URL("file:///Users/shawwalters/eliza-workspace/milady/plugins/plugin-knowledge/typescript/routes.ts").pathname);
|
|
3234
|
+
const frontendPath = import_node_path.default.join(currentDir, "../dist/index.html");
|
|
3235
|
+
if (import_node_fs.default.existsSync(frontendPath)) {
|
|
3236
|
+
const html = await import_node_fs.default.promises.readFile(frontendPath, "utf8");
|
|
3237
|
+
let injectedHtml = html.replace("<head>", `<head>
|
|
3238
|
+
<script>
|
|
3239
|
+
window.ELIZA_CONFIG = {
|
|
3240
|
+
agentId: '${agentId}',
|
|
3241
|
+
apiBase: '${pluginBasePath}'
|
|
3242
|
+
};
|
|
3243
|
+
</script>`);
|
|
3244
|
+
injectedHtml = injectedHtml.replace(/src="\.\/assets\//g, `src="${pluginBasePath}/assets/`);
|
|
3245
|
+
injectedHtml = injectedHtml.replace(/href="\.\/assets\//g, `href="${pluginBasePath}/assets/`);
|
|
3246
|
+
res.writeHead(200, { "Content-Type": "text/html" });
|
|
3247
|
+
res.end(injectedHtml);
|
|
3248
|
+
} else {
|
|
3249
|
+
let cssFile = "index.css";
|
|
3250
|
+
let jsFile = "index.js";
|
|
3251
|
+
const manifestPath = import_node_path.default.join(currentDir, "../dist/manifest.json");
|
|
3252
|
+
if (import_node_fs.default.existsSync(manifestPath)) {
|
|
3253
|
+
try {
|
|
3254
|
+
const manifestContent = await import_node_fs.default.promises.readFile(manifestPath, "utf8");
|
|
3255
|
+
const manifest = JSON.parse(manifestContent);
|
|
3256
|
+
for (const [key, value] of Object.entries(manifest)) {
|
|
3257
|
+
if (typeof value === "object" && value !== null) {
|
|
3258
|
+
const entry = value;
|
|
3259
|
+
if (key.endsWith(".css") || entry.file?.endsWith(".css")) {
|
|
3260
|
+
cssFile = entry.file || key;
|
|
3261
|
+
}
|
|
3262
|
+
if (key.endsWith(".js") || entry.file?.endsWith(".js")) {
|
|
3263
|
+
jsFile = entry.file || key;
|
|
3264
|
+
}
|
|
3265
|
+
}
|
|
3266
|
+
}
|
|
3267
|
+
} catch {}
|
|
3268
|
+
}
|
|
3269
|
+
const html = `
|
|
3270
|
+
<!DOCTYPE html>
|
|
3271
|
+
<html lang="en">
|
|
3272
|
+
<head>
|
|
3273
|
+
<meta charset="UTF-8">
|
|
3274
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
3275
|
+
<title>Knowledge</title>
|
|
3276
|
+
<script>
|
|
3277
|
+
window.ELIZA_CONFIG = {
|
|
3278
|
+
agentId: '${agentId}',
|
|
3279
|
+
apiBase: '${pluginBasePath}'
|
|
3280
|
+
};
|
|
3281
|
+
</script>
|
|
3282
|
+
<link rel="stylesheet" href="${pluginBasePath}/assets/${cssFile}">
|
|
3283
|
+
<style>
|
|
3284
|
+
body { font-family: system-ui, -apple-system, sans-serif; margin: 0; padding: 20px; }
|
|
3285
|
+
.container { max-width: 1200px; margin: 0 auto; }
|
|
3286
|
+
.loading { text-align: center; padding: 40px; color: #666; }
|
|
3287
|
+
</style>
|
|
3288
|
+
</head>
|
|
3289
|
+
<body>
|
|
3290
|
+
<div class="container">
|
|
3291
|
+
<div id="root">
|
|
3292
|
+
<div class="loading">Loading Knowledge Library...</div>
|
|
3293
|
+
</div>
|
|
3294
|
+
</div>
|
|
3295
|
+
<script type="module" src="${pluginBasePath}/assets/${jsFile}"></script>
|
|
3296
|
+
</body>
|
|
3297
|
+
</html>`;
|
|
3298
|
+
res.writeHead(200, { "Content-Type": "text/html" });
|
|
3299
|
+
res.end(html);
|
|
3300
|
+
}
|
|
3301
|
+
} catch (error) {
|
|
3302
|
+
import_core8.logger.error({ error }, "Error serving frontend");
|
|
3303
|
+
sendError(res, 500, "FRONTEND_ERROR", "Failed to load knowledge panel", error instanceof Error ? error.message : String(error));
|
|
3304
|
+
}
|
|
3305
|
+
}
|
|
3306
|
+
async function frontendAssetHandler(req, res, _runtime) {
|
|
3307
|
+
try {
|
|
3308
|
+
const fullPath = req.originalUrl || req.url || req.path || "";
|
|
3309
|
+
const currentDir = import_node_path.default.dirname(new URL("file:///Users/shawwalters/eliza-workspace/milady/plugins/plugin-knowledge/typescript/routes.ts").pathname);
|
|
3310
|
+
const assetsMarker = "/assets/";
|
|
3311
|
+
const assetsStartIndex = fullPath.lastIndexOf(assetsMarker);
|
|
3312
|
+
let assetName = null;
|
|
3313
|
+
if (assetsStartIndex !== -1) {
|
|
3314
|
+
assetName = fullPath.substring(assetsStartIndex + assetsMarker.length);
|
|
3315
|
+
const queryIndex = assetName.indexOf("?");
|
|
3316
|
+
if (queryIndex !== -1) {
|
|
3317
|
+
assetName = assetName.substring(0, queryIndex);
|
|
3318
|
+
}
|
|
3319
|
+
}
|
|
3320
|
+
if (!assetName || assetName.includes("..")) {
|
|
3321
|
+
return sendError(res, 400, "BAD_REQUEST", `Invalid asset name: '${assetName}' from path ${fullPath}`);
|
|
3322
|
+
}
|
|
3323
|
+
const assetPath = import_node_path.default.join(currentDir, "../dist/assets", assetName);
|
|
3324
|
+
if (import_node_fs.default.existsSync(assetPath)) {
|
|
3325
|
+
const fileStream = import_node_fs.default.createReadStream(assetPath);
|
|
3326
|
+
let contentType = "application/octet-stream";
|
|
3327
|
+
if (assetPath.endsWith(".js")) {
|
|
3328
|
+
contentType = "application/javascript";
|
|
3329
|
+
} else if (assetPath.endsWith(".css")) {
|
|
3330
|
+
contentType = "text/css";
|
|
3331
|
+
}
|
|
3332
|
+
res.writeHead(200, { "Content-Type": contentType });
|
|
3333
|
+
fileStream.pipe(asWritableStream(res));
|
|
3334
|
+
} else {
|
|
3335
|
+
sendError(res, 404, "NOT_FOUND", `Asset not found: ${req.url}`);
|
|
3336
|
+
}
|
|
3337
|
+
} catch (error) {
|
|
3338
|
+
import_core8.logger.error({ error }, `Error serving asset ${req.url}`);
|
|
3339
|
+
sendError(res, 500, "ASSET_ERROR", `Failed to load asset ${req.url}`, error instanceof Error ? error.message : String(error));
|
|
3340
|
+
}
|
|
3341
|
+
}
|
|
3342
|
+
async function getKnowledgeChunksHandler(req, res, runtime) {
|
|
3343
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3344
|
+
if (!service) {
|
|
3345
|
+
return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found");
|
|
3346
|
+
}
|
|
3347
|
+
try {
|
|
3348
|
+
const documentId = req.query?.documentId;
|
|
3349
|
+
const documentsOnly = req.query?.documentsOnly === "true";
|
|
3350
|
+
const documents = await service.getMemories({
|
|
3351
|
+
tableName: "documents",
|
|
3352
|
+
count: 1e4,
|
|
3353
|
+
end: Date.now()
|
|
3354
|
+
});
|
|
3355
|
+
if (documentsOnly) {
|
|
3356
|
+
sendSuccess(res, {
|
|
3357
|
+
chunks: documents,
|
|
3358
|
+
stats: {
|
|
3359
|
+
documents: documents.length,
|
|
3360
|
+
fragments: 0,
|
|
3361
|
+
mode: "documents-only"
|
|
3362
|
+
}
|
|
3363
|
+
});
|
|
3364
|
+
return;
|
|
3365
|
+
}
|
|
3366
|
+
if (documentId) {
|
|
3367
|
+
const allFragments = await service.getMemories({
|
|
3368
|
+
tableName: "knowledge",
|
|
3369
|
+
count: 50000
|
|
3370
|
+
});
|
|
3371
|
+
const documentFragments = allFragments.filter((fragment) => {
|
|
3372
|
+
const metadata = fragment.metadata;
|
|
3373
|
+
return typeof metadata?.documentId === "string" && metadata.documentId === documentId;
|
|
3374
|
+
});
|
|
3375
|
+
const specificDocument = documents.find((d) => d.id === documentId);
|
|
3376
|
+
const results = specificDocument ? [specificDocument, ...documentFragments] : documentFragments;
|
|
3377
|
+
sendSuccess(res, {
|
|
3378
|
+
chunks: results,
|
|
3379
|
+
stats: {
|
|
3380
|
+
documents: specificDocument ? 1 : 0,
|
|
3381
|
+
fragments: documentFragments.length,
|
|
3382
|
+
mode: "single-document",
|
|
3383
|
+
documentId
|
|
3384
|
+
}
|
|
3385
|
+
});
|
|
3386
|
+
return;
|
|
3387
|
+
}
|
|
3388
|
+
sendSuccess(res, {
|
|
3389
|
+
chunks: documents,
|
|
3390
|
+
stats: {
|
|
3391
|
+
documents: documents.length,
|
|
3392
|
+
fragments: 0,
|
|
3393
|
+
mode: "documents-only"
|
|
3394
|
+
}
|
|
3395
|
+
});
|
|
3396
|
+
} catch (error) {
|
|
3397
|
+
import_core8.logger.error({ error }, "Error retrieving chunks");
|
|
3398
|
+
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve knowledge chunks", error instanceof Error ? error.message : String(error));
|
|
3399
|
+
}
|
|
3400
|
+
}
|
|
3401
|
+
async function searchKnowledgeHandler(req, res, runtime) {
|
|
3402
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3403
|
+
if (!service) {
|
|
3404
|
+
return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found");
|
|
3405
|
+
}
|
|
3406
|
+
try {
|
|
3407
|
+
const searchText = req.query?.q;
|
|
3408
|
+
const parsedThreshold = req.query?.threshold ? Number.parseFloat(req.query.threshold) : NaN;
|
|
3409
|
+
let matchThreshold = Number.isNaN(parsedThreshold) ? 0.5 : parsedThreshold;
|
|
3410
|
+
matchThreshold = Math.max(0, Math.min(1, matchThreshold));
|
|
3411
|
+
const parsedLimit = req.query?.limit ? Number.parseInt(req.query.limit, 10) : NaN;
|
|
3412
|
+
let limit = Number.isNaN(parsedLimit) ? 20 : parsedLimit;
|
|
3413
|
+
limit = Math.max(1, Math.min(100, limit));
|
|
3414
|
+
const agentId = req.query?.agentId || runtime.agentId;
|
|
3415
|
+
if (!searchText || searchText.trim().length === 0) {
|
|
3416
|
+
return sendError(res, 400, "INVALID_QUERY", "Search query cannot be empty");
|
|
3417
|
+
}
|
|
3418
|
+
const embedding = await runtime.useModel(import_core8.ModelType.TEXT_EMBEDDING, {
|
|
3419
|
+
text: searchText
|
|
3420
|
+
});
|
|
3421
|
+
const results = await runtime.searchMemories({
|
|
3422
|
+
tableName: "knowledge",
|
|
3423
|
+
embedding,
|
|
3424
|
+
query: searchText,
|
|
3425
|
+
count: limit,
|
|
3426
|
+
match_threshold: matchThreshold,
|
|
3427
|
+
roomId: agentId
|
|
3428
|
+
});
|
|
3429
|
+
const enhancedResults = await Promise.all(results.map(async (fragment) => {
|
|
3430
|
+
let documentTitle = "";
|
|
3431
|
+
let documentFilename = "";
|
|
3432
|
+
if (fragment.metadata && typeof fragment.metadata === "object" && "documentId" in fragment.metadata) {
|
|
3433
|
+
const documentId = fragment.metadata.documentId;
|
|
3434
|
+
try {
|
|
3435
|
+
const document = await runtime.getMemoryById(documentId);
|
|
3436
|
+
if (document?.metadata) {
|
|
3437
|
+
const docMetadata = document.metadata;
|
|
3438
|
+
documentTitle = (typeof docMetadata.title === "string" ? docMetadata.title : undefined) || (typeof docMetadata.filename === "string" ? docMetadata.filename : undefined) || "";
|
|
3439
|
+
documentFilename = (typeof docMetadata.filename === "string" ? docMetadata.filename : undefined) || "";
|
|
3440
|
+
}
|
|
3441
|
+
} catch {}
|
|
3442
|
+
}
|
|
3443
|
+
return {
|
|
3444
|
+
id: fragment.id,
|
|
3445
|
+
content: fragment.content,
|
|
3446
|
+
similarity: fragment.similarity || 0,
|
|
3447
|
+
metadata: {
|
|
3448
|
+
...fragment.metadata || {},
|
|
3449
|
+
documentTitle,
|
|
3450
|
+
documentFilename
|
|
3451
|
+
}
|
|
3452
|
+
};
|
|
3453
|
+
}));
|
|
3454
|
+
sendSuccess(res, {
|
|
3455
|
+
query: searchText,
|
|
3456
|
+
threshold: matchThreshold,
|
|
3457
|
+
results: enhancedResults,
|
|
3458
|
+
count: enhancedResults.length
|
|
3459
|
+
});
|
|
3460
|
+
} catch (error) {
|
|
3461
|
+
import_core8.logger.error({ error }, "Error searching knowledge");
|
|
3462
|
+
sendError(res, 500, "SEARCH_ERROR", "Failed to search knowledge", error instanceof Error ? error.message : String(error));
|
|
3463
|
+
}
|
|
3464
|
+
}
|
|
3465
|
+
async function getGraphNodesHandler(req, res, runtime) {
|
|
3466
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3467
|
+
if (!service) {
|
|
3468
|
+
return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found");
|
|
3469
|
+
}
|
|
3470
|
+
try {
|
|
3471
|
+
const parsedPage = req.query?.page ? Number.parseInt(req.query.page, 10) : 1;
|
|
3472
|
+
const parsedLimit = req.query?.limit ? Number.parseInt(req.query.limit, 10) : 20;
|
|
3473
|
+
const type = req.query?.type;
|
|
3474
|
+
const agentId = req.query?.agentId || runtime.agentId;
|
|
3475
|
+
const page = Number.isNaN(parsedPage) || parsedPage < 1 ? 1 : parsedPage;
|
|
3476
|
+
const limit = Number.isNaN(parsedLimit) || parsedLimit < 1 ? 20 : Math.min(parsedLimit, 50);
|
|
3477
|
+
const offset = (page - 1) * limit;
|
|
3478
|
+
const totalDocuments = await service.countMemories({
|
|
3479
|
+
tableName: "documents",
|
|
3480
|
+
roomId: agentId,
|
|
3481
|
+
unique: false
|
|
3482
|
+
});
|
|
3483
|
+
const totalPages = Math.ceil(totalDocuments / limit);
|
|
3484
|
+
const hasMore = page < totalPages;
|
|
3485
|
+
const paginatedDocuments = await service.getMemories({
|
|
3486
|
+
tableName: "documents",
|
|
3487
|
+
roomId: agentId,
|
|
3488
|
+
count: limit,
|
|
3489
|
+
offset
|
|
3490
|
+
});
|
|
3491
|
+
const nodes = [];
|
|
3492
|
+
const links = [];
|
|
3493
|
+
paginatedDocuments.forEach((doc) => {
|
|
3494
|
+
if (!doc.id) {
|
|
3495
|
+
import_core8.logger.warn("Skipping document without ID");
|
|
3496
|
+
return;
|
|
3497
|
+
}
|
|
3498
|
+
nodes.push({ id: doc.id, type: "document" });
|
|
3499
|
+
});
|
|
3500
|
+
if (type !== "document") {
|
|
3501
|
+
const allFragments = await service.getMemories({
|
|
3502
|
+
tableName: "knowledge",
|
|
3503
|
+
roomId: agentId,
|
|
3504
|
+
count: 50000
|
|
3505
|
+
});
|
|
3506
|
+
paginatedDocuments.forEach((doc) => {
|
|
3507
|
+
if (!doc.id) {
|
|
3508
|
+
return;
|
|
3509
|
+
}
|
|
3510
|
+
const docFragments = allFragments.filter((fragment) => {
|
|
3511
|
+
const metadata = fragment.metadata;
|
|
3512
|
+
const typeString = typeof metadata?.type === "string" ? metadata.type : null;
|
|
3513
|
+
const isFragment = typeString && typeString.toLowerCase() === "fragment" || metadata?.type === import_core8.MemoryType.FRAGMENT || !metadata?.type && metadata?.documentId;
|
|
3514
|
+
return metadata?.documentId === doc.id && isFragment;
|
|
3515
|
+
});
|
|
3516
|
+
docFragments.forEach((frag) => {
|
|
3517
|
+
const docId = doc.id;
|
|
3518
|
+
if (!frag.id || !docId) {
|
|
3519
|
+
return;
|
|
3520
|
+
}
|
|
3521
|
+
nodes.push({ id: frag.id, type: "fragment" });
|
|
3522
|
+
links.push({ source: docId, target: frag.id });
|
|
3523
|
+
});
|
|
3524
|
+
});
|
|
3525
|
+
}
|
|
3526
|
+
sendSuccess(res, {
|
|
3527
|
+
nodes,
|
|
3528
|
+
links,
|
|
3529
|
+
pagination: {
|
|
3530
|
+
currentPage: page,
|
|
3531
|
+
totalPages,
|
|
3532
|
+
hasMore,
|
|
3533
|
+
totalDocuments
|
|
3534
|
+
}
|
|
3535
|
+
});
|
|
3536
|
+
} catch (error) {
|
|
3537
|
+
import_core8.logger.error({ error }, "Error fetching graph nodes");
|
|
3538
|
+
sendError(res, 500, "GRAPH_ERROR", "Failed to fetch graph nodes", error instanceof Error ? error.message : String(error));
|
|
3539
|
+
}
|
|
3540
|
+
}
|
|
3541
|
+
async function getGraphNodeDetailsHandler(req, res, runtime) {
|
|
3542
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3543
|
+
if (!service) {
|
|
3544
|
+
return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found");
|
|
3545
|
+
}
|
|
3546
|
+
const nodeId = req.params?.nodeId;
|
|
3547
|
+
const agentId = req.query?.agentId || runtime.agentId;
|
|
3548
|
+
if (!nodeId || nodeId.length < 36) {
|
|
3549
|
+
return sendError(res, 400, "INVALID_ID", "Invalid node ID format");
|
|
3550
|
+
}
|
|
3551
|
+
try {
|
|
3552
|
+
const allDocuments = await service.getMemories({
|
|
3553
|
+
tableName: "documents",
|
|
3554
|
+
count: 1e4
|
|
3555
|
+
});
|
|
3556
|
+
let document = allDocuments.find((doc) => doc.id === nodeId && doc.roomId === agentId);
|
|
3557
|
+
if (!document) {
|
|
3558
|
+
document = allDocuments.find((doc) => doc.id === nodeId);
|
|
3559
|
+
}
|
|
3560
|
+
if (document) {
|
|
3561
|
+
sendSuccess(res, {
|
|
3562
|
+
id: document.id,
|
|
3563
|
+
type: "document",
|
|
3564
|
+
content: document.content,
|
|
3565
|
+
metadata: document.metadata,
|
|
3566
|
+
createdAt: document.createdAt,
|
|
3567
|
+
entityId: document.entityId,
|
|
3568
|
+
roomId: document.roomId,
|
|
3569
|
+
agentId: document.agentId,
|
|
3570
|
+
worldId: document.worldId
|
|
3571
|
+
});
|
|
3572
|
+
return;
|
|
3573
|
+
}
|
|
3574
|
+
const allFragments = await service.getMemories({
|
|
3575
|
+
tableName: "knowledge",
|
|
3576
|
+
count: 50000
|
|
3577
|
+
});
|
|
3578
|
+
let fragment = allFragments.find((frag) => frag.id === nodeId && frag.roomId === agentId);
|
|
3579
|
+
if (!fragment) {
|
|
3580
|
+
fragment = allFragments.find((frag) => frag.id === nodeId);
|
|
3581
|
+
}
|
|
3582
|
+
if (fragment) {
|
|
3583
|
+
sendSuccess(res, {
|
|
3584
|
+
id: fragment.id,
|
|
3585
|
+
type: "fragment",
|
|
3586
|
+
content: fragment.content,
|
|
3587
|
+
metadata: fragment.metadata,
|
|
3588
|
+
createdAt: fragment.createdAt,
|
|
3589
|
+
entityId: fragment.entityId,
|
|
3590
|
+
roomId: fragment.roomId,
|
|
3591
|
+
agentId: fragment.agentId,
|
|
3592
|
+
worldId: fragment.worldId
|
|
3593
|
+
});
|
|
3594
|
+
return;
|
|
3595
|
+
}
|
|
3596
|
+
import_core8.logger.error(`Node ${nodeId} not found`);
|
|
3597
|
+
sendError(res, 404, "NOT_FOUND", `Node with ID ${nodeId} not found`);
|
|
3598
|
+
} catch (error) {
|
|
3599
|
+
import_core8.logger.error({ error }, `Error fetching node details for ${nodeId}`);
|
|
3600
|
+
sendError(res, 500, "GRAPH_ERROR", "Failed to fetch node details", error instanceof Error ? error.message : String(error));
|
|
3601
|
+
}
|
|
3602
|
+
}
|
|
3603
|
+
async function expandDocumentGraphHandler(req, res, runtime) {
|
|
3604
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3605
|
+
if (!service) {
|
|
3606
|
+
return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found");
|
|
3607
|
+
}
|
|
3608
|
+
const documentId = req.params?.documentId;
|
|
3609
|
+
const agentId = req.query?.agentId || runtime.agentId;
|
|
3610
|
+
if (!documentId || documentId.length < 36) {
|
|
3611
|
+
return sendError(res, 400, "INVALID_ID", "Invalid document ID format");
|
|
3612
|
+
}
|
|
3613
|
+
try {
|
|
3614
|
+
const allFragments = await service.getMemories({
|
|
3615
|
+
tableName: "knowledge",
|
|
3616
|
+
roomId: agentId,
|
|
3617
|
+
count: 50000
|
|
3618
|
+
});
|
|
3619
|
+
const documentFragments = allFragments.filter((fragment) => {
|
|
3620
|
+
const metadata = fragment.metadata;
|
|
3621
|
+
const typeString = typeof metadata?.type === "string" ? metadata.type : null;
|
|
3622
|
+
const isFragment = typeString && typeString.toLowerCase() === "fragment" || metadata?.type === import_core8.MemoryType.FRAGMENT || !metadata?.type && metadata?.documentId;
|
|
3623
|
+
return metadata?.documentId === documentId && isFragment;
|
|
3624
|
+
});
|
|
3625
|
+
const nodes = documentFragments.filter((frag) => frag.id !== undefined).map((frag) => ({
|
|
3626
|
+
id: frag.id,
|
|
3627
|
+
type: "fragment"
|
|
3628
|
+
}));
|
|
3629
|
+
const links = documentFragments.filter((frag) => frag.id !== undefined).map((frag) => ({
|
|
3630
|
+
source: documentId,
|
|
3631
|
+
target: frag.id
|
|
3632
|
+
}));
|
|
3633
|
+
sendSuccess(res, {
|
|
3634
|
+
documentId,
|
|
3635
|
+
nodes,
|
|
3636
|
+
links,
|
|
3637
|
+
fragmentCount: nodes.length
|
|
3638
|
+
});
|
|
3639
|
+
} catch (error) {
|
|
3640
|
+
import_core8.logger.error({ error }, `Error expanding document ${documentId}`);
|
|
3641
|
+
sendError(res, 500, "GRAPH_ERROR", "Failed to expand document", error instanceof Error ? error.message : String(error));
|
|
3642
|
+
}
|
|
3643
|
+
}
|
|
3644
|
+
async function uploadKnowledgeWithMulter(req, res, runtime) {
|
|
3645
|
+
const upload = createUploadMiddleware(runtime);
|
|
3646
|
+
const uploadArray = upload.array("files", parseInt(String(runtime.getSetting("KNOWLEDGE_MAX_FILES") || "10"), 10));
|
|
3647
|
+
uploadArray(req, res, (err) => {
|
|
3648
|
+
if (err) {
|
|
3649
|
+
import_core8.logger.error({ error: err }, "File upload error");
|
|
3650
|
+
return sendError(res, 400, "UPLOAD_ERROR", err.message);
|
|
3651
|
+
}
|
|
3652
|
+
uploadKnowledgeHandler(req, res, runtime);
|
|
3653
|
+
});
|
|
3654
|
+
}
|
|
3655
|
+
function asRouteHandler(handler) {
|
|
3656
|
+
return handler;
|
|
3657
|
+
}
|
|
3658
|
+
var knowledgeRoutes = [
|
|
3659
|
+
{
|
|
3660
|
+
type: "GET",
|
|
3661
|
+
name: "Knowledge",
|
|
3662
|
+
path: "/display",
|
|
3663
|
+
handler: asRouteHandler(knowledgePanelHandler),
|
|
3664
|
+
public: true
|
|
3665
|
+
},
|
|
3666
|
+
{
|
|
3667
|
+
type: "GET",
|
|
3668
|
+
path: "/assets/*",
|
|
3669
|
+
handler: asRouteHandler(frontendAssetHandler)
|
|
3670
|
+
},
|
|
3671
|
+
{
|
|
3672
|
+
type: "POST",
|
|
3673
|
+
path: "/documents",
|
|
3674
|
+
handler: asRouteHandler(uploadKnowledgeWithMulter)
|
|
3675
|
+
},
|
|
3676
|
+
{
|
|
3677
|
+
type: "GET",
|
|
3678
|
+
path: "/documents",
|
|
3679
|
+
handler: asRouteHandler(getKnowledgeDocumentsHandler)
|
|
3680
|
+
},
|
|
3681
|
+
{
|
|
3682
|
+
type: "GET",
|
|
3683
|
+
path: "/documents/:knowledgeId",
|
|
3684
|
+
handler: asRouteHandler(getKnowledgeByIdHandler)
|
|
3685
|
+
},
|
|
3686
|
+
{
|
|
3687
|
+
type: "DELETE",
|
|
3688
|
+
path: "/documents/:knowledgeId",
|
|
3689
|
+
handler: asRouteHandler(deleteKnowledgeDocumentHandler)
|
|
3690
|
+
},
|
|
3691
|
+
{
|
|
3692
|
+
type: "GET",
|
|
3693
|
+
path: "/knowledges",
|
|
3694
|
+
handler: asRouteHandler(getKnowledgeChunksHandler)
|
|
3695
|
+
},
|
|
3696
|
+
{
|
|
3697
|
+
type: "GET",
|
|
3698
|
+
path: "/search",
|
|
3699
|
+
handler: asRouteHandler(searchKnowledgeHandler)
|
|
3700
|
+
},
|
|
3701
|
+
{
|
|
3702
|
+
type: "GET",
|
|
3703
|
+
path: "/graph/nodes",
|
|
3704
|
+
handler: asRouteHandler(getGraphNodesHandler)
|
|
3705
|
+
},
|
|
3706
|
+
{
|
|
3707
|
+
type: "GET",
|
|
3708
|
+
path: "/graph/node/:nodeId",
|
|
3709
|
+
handler: asRouteHandler(getGraphNodeDetailsHandler)
|
|
3710
|
+
},
|
|
3711
|
+
{
|
|
3712
|
+
type: "GET",
|
|
3713
|
+
path: "/graph/expand/:documentId",
|
|
3714
|
+
handler: asRouteHandler(expandDocumentGraphHandler)
|
|
3715
|
+
}
|
|
3716
|
+
];
|
|
3717
|
+
|
|
3718
|
+
// index.ts
|
|
3719
|
+
function createKnowledgePlugin(config = {}) {
|
|
3720
|
+
const { enableUI = true, enableRoutes = true, enableActions = true, enableTests = true } = config;
|
|
3721
|
+
const plugin = {
|
|
3722
|
+
name: "knowledge",
|
|
3723
|
+
description: "Plugin for Retrieval Augmented Generation, including knowledge management and embedding.",
|
|
3724
|
+
services: [KnowledgeService],
|
|
3725
|
+
providers: [knowledgeProvider, documentsProvider]
|
|
3726
|
+
};
|
|
3727
|
+
if (enableUI || enableRoutes) {
|
|
3728
|
+
plugin.routes = knowledgeRoutes;
|
|
3729
|
+
}
|
|
3730
|
+
if (enableActions) {
|
|
3731
|
+
plugin.actions = knowledgeActions;
|
|
3732
|
+
}
|
|
3733
|
+
if (enableTests) {}
|
|
3734
|
+
return plugin;
|
|
3735
|
+
}
|
|
3736
|
+
var knowledgePluginCore = createKnowledgePlugin({
|
|
3737
|
+
enableUI: false,
|
|
3738
|
+
enableRoutes: false,
|
|
3739
|
+
enableActions: false,
|
|
3740
|
+
enableTests: false
|
|
3741
|
+
});
|
|
3742
|
+
var knowledgePluginHeadless = createKnowledgePlugin({
|
|
3743
|
+
enableUI: false,
|
|
3744
|
+
enableRoutes: false,
|
|
3745
|
+
enableActions: true,
|
|
3746
|
+
enableTests: false
|
|
3747
|
+
});
|
|
3748
|
+
var knowledgePlugin = createKnowledgePlugin({
|
|
3749
|
+
enableUI: true,
|
|
3750
|
+
enableRoutes: true,
|
|
3751
|
+
enableActions: true,
|
|
3752
|
+
enableTests: true
|
|
3753
|
+
});
|
|
3754
|
+
|
|
3755
|
+
//# debugId=83F51FED4EE57D6B64756E2164756E21
|
|
3756
|
+
//# sourceMappingURL=index.node.cjs.map
|