@elizaos/plugin-knowledge 2.0.0-alpha.8 → 2.0.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2875 +1,54 @@
1
- // actions.ts
2
- import * as fs2 from "node:fs";
3
- import * as path2 from "node:path";
4
- import { logger as logger5, stringToUuid } from "@elizaos/core";
5
-
6
- // service.ts
7
- import {
8
- createUniqueUuid,
9
- logger as logger4,
10
- MemoryType as MemoryType2,
11
- ModelType as ModelType2,
12
- Semaphore,
13
- Service,
14
- splitChunks as splitChunks2
15
- } from "@elizaos/core";
16
-
17
- // config.ts
18
- import z2 from "zod";
19
-
20
- // types.ts
21
- import z from "zod";
22
- var ModelConfigSchema = z.object({
23
- EMBEDDING_PROVIDER: z.enum(["openai", "google"]).optional(),
24
- TEXT_PROVIDER: z.enum(["openai", "anthropic", "openrouter", "google"]).optional(),
25
- OPENAI_API_KEY: z.string().optional(),
26
- ANTHROPIC_API_KEY: z.string().optional(),
27
- OPENROUTER_API_KEY: z.string().optional(),
28
- GOOGLE_API_KEY: z.string().optional(),
29
- OPENAI_BASE_URL: z.string().optional(),
30
- ANTHROPIC_BASE_URL: z.string().optional(),
31
- OPENROUTER_BASE_URL: z.string().optional(),
32
- GOOGLE_BASE_URL: z.string().optional(),
33
- TEXT_EMBEDDING_MODEL: z.string(),
34
- TEXT_MODEL: z.string().optional(),
35
- MAX_INPUT_TOKENS: z.string().or(z.number()).transform((val) => typeof val === "string" ? parseInt(val, 10) : val),
36
- MAX_OUTPUT_TOKENS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 4096),
37
- EMBEDDING_DIMENSION: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 1536),
38
- LOAD_DOCS_ON_STARTUP: z.boolean().default(false),
39
- CTX_KNOWLEDGE_ENABLED: z.boolean().default(false),
40
- RATE_LIMIT_ENABLED: z.boolean().default(true),
41
- MAX_CONCURRENT_REQUESTS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 150),
42
- REQUESTS_PER_MINUTE: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 300),
43
- TOKENS_PER_MINUTE: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 750000),
44
- BATCH_DELAY_MS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 100)
45
- });
46
- var KnowledgeServiceType = {
47
- KNOWLEDGE: "knowledge"
48
- };
49
-
50
- // config.ts
51
- var parseBooleanEnv = (value) => {
52
- if (typeof value === "boolean")
53
- return value;
54
- if (typeof value === "number")
55
- return value !== 0;
56
- if (typeof value === "string")
57
- return value.toLowerCase() === "true";
58
- return false;
59
- };
60
- function validateModelConfig(runtime) {
61
- try {
62
- const getSetting = (key, defaultValue) => {
63
- if (runtime) {
64
- return runtime.getSetting(key) || process.env[key] || defaultValue;
65
- }
66
- return process.env[key] || defaultValue;
67
- };
68
- const ctxKnowledgeEnabled = parseBooleanEnv(getSetting("CTX_KNOWLEDGE_ENABLED", "false"));
69
- const embeddingProvider = getSetting("EMBEDDING_PROVIDER");
70
- const assumePluginOpenAI = !embeddingProvider;
71
- const textEmbeddingModel = getSetting("TEXT_EMBEDDING_MODEL") || getSetting("OPENAI_EMBEDDING_MODEL") || "text-embedding-3-small";
72
- const embeddingDimension = getSetting("EMBEDDING_DIMENSION") || getSetting("OPENAI_EMBEDDING_DIMENSIONS") || "1536";
73
- const openaiApiKey = getSetting("OPENAI_API_KEY");
74
- const config = ModelConfigSchema.parse({
75
- EMBEDDING_PROVIDER: embeddingProvider,
76
- TEXT_PROVIDER: getSetting("TEXT_PROVIDER"),
77
- OPENAI_API_KEY: openaiApiKey,
78
- ANTHROPIC_API_KEY: getSetting("ANTHROPIC_API_KEY"),
79
- OPENROUTER_API_KEY: getSetting("OPENROUTER_API_KEY"),
80
- GOOGLE_API_KEY: getSetting("GOOGLE_API_KEY"),
81
- OPENAI_BASE_URL: getSetting("OPENAI_BASE_URL"),
82
- ANTHROPIC_BASE_URL: getSetting("ANTHROPIC_BASE_URL"),
83
- OPENROUTER_BASE_URL: getSetting("OPENROUTER_BASE_URL"),
84
- GOOGLE_BASE_URL: getSetting("GOOGLE_BASE_URL"),
85
- TEXT_EMBEDDING_MODEL: textEmbeddingModel,
86
- TEXT_MODEL: getSetting("TEXT_MODEL"),
87
- MAX_INPUT_TOKENS: getSetting("MAX_INPUT_TOKENS", "4000"),
88
- MAX_OUTPUT_TOKENS: getSetting("MAX_OUTPUT_TOKENS", "4096"),
89
- EMBEDDING_DIMENSION: embeddingDimension,
90
- LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")),
91
- CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled,
92
- RATE_LIMIT_ENABLED: parseBooleanEnv(getSetting("RATE_LIMIT_ENABLED", "true")),
93
- MAX_CONCURRENT_REQUESTS: getSetting("MAX_CONCURRENT_REQUESTS", "100"),
94
- REQUESTS_PER_MINUTE: getSetting("REQUESTS_PER_MINUTE", "500"),
95
- TOKENS_PER_MINUTE: getSetting("TOKENS_PER_MINUTE", "1000000"),
96
- BATCH_DELAY_MS: getSetting("BATCH_DELAY_MS", "100")
97
- });
98
- validateConfigRequirements(config, assumePluginOpenAI);
99
- return config;
100
- } catch (error) {
101
- if (error instanceof z2.ZodError) {
102
- const issues = error.issues.map((issue) => `${issue.path.join(".")}: ${issue.message}`).join(", ");
103
- throw new Error(`Model configuration validation failed: ${issues}`);
104
- }
105
- throw error;
106
- }
107
- }
108
- function validateConfigRequirements(config, assumePluginOpenAI) {
109
- const embeddingProvider = config.EMBEDDING_PROVIDER;
110
- if (embeddingProvider === "openai" && !config.OPENAI_API_KEY) {
111
- throw new Error('OPENAI_API_KEY is required when EMBEDDING_PROVIDER is set to "openai"');
112
- }
113
- if (embeddingProvider === "google" && !config.GOOGLE_API_KEY) {
114
- throw new Error('GOOGLE_API_KEY is required when EMBEDDING_PROVIDER is set to "google"');
115
- }
116
- if (assumePluginOpenAI && config.OPENAI_API_KEY && !config.TEXT_EMBEDDING_MODEL) {
117
- throw new Error("OPENAI_EMBEDDING_MODEL is required when using plugin-openai configuration");
118
- }
119
- if (config.CTX_KNOWLEDGE_ENABLED) {
120
- if (config.TEXT_PROVIDER === "openai" && !config.OPENAI_API_KEY) {
121
- throw new Error('OPENAI_API_KEY is required when TEXT_PROVIDER is set to "openai"');
122
- }
123
- if (config.TEXT_PROVIDER === "anthropic" && !config.ANTHROPIC_API_KEY) {
124
- throw new Error('ANTHROPIC_API_KEY is required when TEXT_PROVIDER is set to "anthropic"');
125
- }
126
- if (config.TEXT_PROVIDER === "openrouter" && !config.OPENROUTER_API_KEY) {
127
- throw new Error('OPENROUTER_API_KEY is required when TEXT_PROVIDER is set to "openrouter"');
128
- }
129
- if (config.TEXT_PROVIDER === "google" && !config.GOOGLE_API_KEY) {
130
- throw new Error('GOOGLE_API_KEY is required when TEXT_PROVIDER is set to "google"');
131
- }
132
- }
133
- }
134
- async function getProviderRateLimits(runtime) {
135
- const config = validateModelConfig(runtime);
136
- const rateLimitEnabled = config.RATE_LIMIT_ENABLED;
137
- const maxConcurrentRequests = config.MAX_CONCURRENT_REQUESTS;
138
- const requestsPerMinute = config.REQUESTS_PER_MINUTE;
139
- const tokensPerMinute = config.TOKENS_PER_MINUTE;
140
- const batchDelayMs = config.BATCH_DELAY_MS;
141
- const primaryProvider = config.TEXT_PROVIDER || config.EMBEDDING_PROVIDER;
142
- if (!rateLimitEnabled) {
143
- return {
144
- maxConcurrentRequests,
145
- requestsPerMinute: Number.MAX_SAFE_INTEGER,
146
- tokensPerMinute: Number.MAX_SAFE_INTEGER,
147
- provider: primaryProvider || "unlimited",
148
- rateLimitEnabled: false,
149
- batchDelayMs
150
- };
151
- }
152
- return {
153
- maxConcurrentRequests,
154
- requestsPerMinute,
155
- tokensPerMinute,
156
- provider: primaryProvider || "unlimited",
157
- rateLimitEnabled: true,
158
- batchDelayMs
159
- };
160
- }
161
-
162
- // docs-loader.ts
163
- import * as fs from "node:fs";
164
- import * as path from "node:path";
165
- import { logger } from "@elizaos/core";
166
-
167
- // utils.ts
168
- import { Buffer as Buffer2 } from "node:buffer";
169
- import { createHash as createHash2 } from "node:crypto";
170
- import * as mammoth from "mammoth";
171
- import { extractText } from "unpdf";
172
- // ../../../node_modules/.bun/uuid@13.0.0/node_modules/uuid/dist-node/native.js
173
- import { randomUUID } from "node:crypto";
174
- var native_default = { randomUUID };
175
-
176
- // ../../../node_modules/.bun/uuid@13.0.0/node_modules/uuid/dist-node/rng.js
177
- import { randomFillSync } from "node:crypto";
178
- var rnds8Pool = new Uint8Array(256);
179
- var poolPtr = rnds8Pool.length;
180
- function rng() {
181
- if (poolPtr > rnds8Pool.length - 16) {
182
- randomFillSync(rnds8Pool);
183
- poolPtr = 0;
184
- }
185
- return rnds8Pool.slice(poolPtr, poolPtr += 16);
186
- }
187
-
188
- // ../../../node_modules/.bun/uuid@13.0.0/node_modules/uuid/dist-node/regex.js
189
- var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i;
190
-
191
- // ../../../node_modules/.bun/uuid@13.0.0/node_modules/uuid/dist-node/validate.js
192
- function validate(uuid) {
193
- return typeof uuid === "string" && regex_default.test(uuid);
194
- }
195
- var validate_default = validate;
196
-
197
- // ../../../node_modules/.bun/uuid@13.0.0/node_modules/uuid/dist-node/stringify.js
198
- var byteToHex = [];
199
- for (let i = 0;i < 256; ++i) {
200
- byteToHex.push((i + 256).toString(16).slice(1));
201
- }
202
- function unsafeStringify(arr, offset = 0) {
203
- return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
204
- }
205
-
206
- // ../../../node_modules/.bun/uuid@13.0.0/node_modules/uuid/dist-node/v4.js
207
- function _v4(options, buf, offset) {
208
- options = options || {};
209
- const rnds = options.random ?? options.rng?.() ?? rng();
210
- if (rnds.length < 16) {
211
- throw new Error("Random bytes length must be >= 16");
212
- }
213
- rnds[6] = rnds[6] & 15 | 64;
214
- rnds[8] = rnds[8] & 63 | 128;
215
- if (buf) {
216
- offset = offset || 0;
217
- if (offset < 0 || offset + 16 > buf.length) {
218
- throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
219
- }
220
- for (let i = 0;i < 16; ++i) {
221
- buf[offset + i] = rnds[i];
222
- }
223
- return buf;
224
- }
225
- return unsafeStringify(rnds);
226
- }
227
- function v4(options, buf, offset) {
228
- if (native_default.randomUUID && !buf && !options) {
229
- return native_default.randomUUID();
230
- }
231
- return _v4(options, buf, offset);
232
- }
233
- var v4_default = v4;
234
- // ../../../node_modules/.bun/uuid@13.0.0/node_modules/uuid/dist-node/sha1.js
235
- import { createHash } from "node:crypto";
236
- function sha1(bytes) {
237
- if (Array.isArray(bytes)) {
238
- bytes = Buffer.from(bytes);
239
- } else if (typeof bytes === "string") {
240
- bytes = Buffer.from(bytes, "utf8");
241
- }
242
- return createHash("sha1").update(bytes).digest();
243
- }
244
- var sha1_default = sha1;
245
-
246
- // ../../../node_modules/.bun/uuid@13.0.0/node_modules/uuid/dist-node/parse.js
247
- function parse(uuid) {
248
- if (!validate_default(uuid)) {
249
- throw TypeError("Invalid UUID");
250
- }
251
- let v;
252
- return Uint8Array.of((v = parseInt(uuid.slice(0, 8), 16)) >>> 24, v >>> 16 & 255, v >>> 8 & 255, v & 255, (v = parseInt(uuid.slice(9, 13), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(14, 18), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(19, 23), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(24, 36), 16)) / 1099511627776 & 255, v / 4294967296 & 255, v >>> 24 & 255, v >>> 16 & 255, v >>> 8 & 255, v & 255);
253
- }
254
- var parse_default = parse;
255
-
256
- // ../../../node_modules/.bun/uuid@13.0.0/node_modules/uuid/dist-node/v35.js
257
- function stringToBytes(str) {
258
- str = unescape(encodeURIComponent(str));
259
- const bytes = new Uint8Array(str.length);
260
- for (let i = 0;i < str.length; ++i) {
261
- bytes[i] = str.charCodeAt(i);
262
- }
263
- return bytes;
264
- }
265
- var DNS = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
266
- var URL2 = "6ba7b811-9dad-11d1-80b4-00c04fd430c8";
267
- function v35(version, hash, value, namespace, buf, offset) {
268
- const valueBytes = typeof value === "string" ? stringToBytes(value) : value;
269
- const namespaceBytes = typeof namespace === "string" ? parse_default(namespace) : namespace;
270
- if (typeof namespace === "string") {
271
- namespace = parse_default(namespace);
272
- }
273
- if (namespace?.length !== 16) {
274
- throw TypeError("Namespace must be array-like (16 iterable integer values, 0-255)");
275
- }
276
- let bytes = new Uint8Array(16 + valueBytes.length);
277
- bytes.set(namespaceBytes);
278
- bytes.set(valueBytes, namespaceBytes.length);
279
- bytes = hash(bytes);
280
- bytes[6] = bytes[6] & 15 | version;
281
- bytes[8] = bytes[8] & 63 | 128;
282
- if (buf) {
283
- offset = offset || 0;
284
- for (let i = 0;i < 16; ++i) {
285
- buf[offset + i] = bytes[i];
286
- }
287
- return buf;
288
- }
289
- return unsafeStringify(bytes);
290
- }
291
-
292
- // ../../../node_modules/.bun/uuid@13.0.0/node_modules/uuid/dist-node/v5.js
293
- function v5(value, namespace, buf, offset) {
294
- return v35(80, sha1_default, value, namespace, buf, offset);
295
- }
296
- v5.DNS = DNS;
297
- v5.URL = URL2;
298
- var v5_default = v5;
299
- // utils.ts
300
- var PLAIN_TEXT_CONTENT_TYPES = [
301
- "application/typescript",
302
- "text/typescript",
303
- "text/x-python",
304
- "application/x-python-code",
305
- "application/yaml",
306
- "text/yaml",
307
- "application/x-yaml",
308
- "application/json",
309
- "text/markdown",
310
- "text/csv"
311
- ];
312
- var MAX_FALLBACK_SIZE_BYTES = 5 * 1024 * 1024;
313
- var BINARY_CHECK_BYTES = 1024;
314
- async function extractTextFromFileBuffer(fileBuffer, contentType, originalFilename) {
315
- const lowerContentType = contentType.toLowerCase();
316
- if (lowerContentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
317
- try {
318
- const result = await mammoth.extractRawText({ buffer: fileBuffer });
319
- return result.value;
320
- } catch (docxError) {
321
- const errorMessage = docxError instanceof Error ? docxError.message : String(docxError);
322
- throw new Error(`Failed to parse DOCX file ${originalFilename}: ${errorMessage}`);
323
- }
324
- } else if (lowerContentType === "application/msword" || originalFilename.toLowerCase().endsWith(".doc")) {
325
- return `[Microsoft Word Document: ${originalFilename}]
326
-
327
- This document was indexed for search but cannot be displayed directly in the browser. The original document content is preserved for retrieval purposes.`;
328
- } else if (lowerContentType.startsWith("text/") || PLAIN_TEXT_CONTENT_TYPES.includes(lowerContentType)) {
329
- return fileBuffer.toString("utf-8");
330
- } else {
331
- if (fileBuffer.length > MAX_FALLBACK_SIZE_BYTES) {
332
- throw new Error(`File ${originalFilename} exceeds maximum size for fallback (${MAX_FALLBACK_SIZE_BYTES} bytes)`);
333
- }
334
- const initialBytes = fileBuffer.subarray(0, Math.min(fileBuffer.length, BINARY_CHECK_BYTES));
335
- if (initialBytes.includes(0)) {
336
- throw new Error(`File ${originalFilename} appears to be binary based on initial byte check`);
337
- }
338
- try {
339
- const textContent = fileBuffer.toString("utf-8");
340
- if (textContent.includes("�")) {
341
- throw new Error(`File ${originalFilename} seems to be binary or has encoding issues (detected �)`);
342
- }
343
- return textContent;
344
- } catch (_fallbackError) {
345
- throw new Error(`Unsupported content type: ${contentType} for ${originalFilename}. Fallback to plain text failed`);
346
- }
347
- }
348
- }
349
- async function convertPdfToTextFromBuffer(pdfBuffer, _filename) {
350
- try {
351
- const uint8Array = new Uint8Array(pdfBuffer.buffer.slice(pdfBuffer.byteOffset, pdfBuffer.byteOffset + pdfBuffer.byteLength));
352
- const result = await extractText(uint8Array, {
353
- mergePages: true
354
- });
355
- if (!result.text || result.text.trim().length === 0) {
356
- return "";
357
- }
358
- const cleanedText = result.text.split(`
359
- `).map((line) => line.trim()).filter((line) => line.length > 0).join(`
360
- `).replace(/\n{3,}/g, `
361
-
362
- `);
363
- return cleanedText;
364
- } catch (error) {
365
- const errorMessage = error instanceof Error ? error.message : String(error);
366
- throw new Error(`Failed to convert PDF to text: ${errorMessage}`);
367
- }
368
- }
369
- function isBinaryContentType(contentType, filename) {
370
- const textContentTypes = [
371
- "text/",
372
- "application/json",
373
- "application/xml",
374
- "application/javascript",
375
- "application/typescript",
376
- "application/x-yaml",
377
- "application/x-sh"
378
- ];
379
- const isTextMimeType = textContentTypes.some((type) => contentType.includes(type));
380
- if (isTextMimeType) {
381
- return false;
382
- }
383
- const binaryContentTypes = [
384
- "application/pdf",
385
- "application/msword",
386
- "application/vnd.openxmlformats-officedocument",
387
- "application/vnd.ms-excel",
388
- "application/vnd.ms-powerpoint",
389
- "application/zip",
390
- "application/x-zip-compressed",
391
- "application/octet-stream",
392
- "image/",
393
- "audio/",
394
- "video/"
395
- ];
396
- const isBinaryMimeType = binaryContentTypes.some((type) => contentType.includes(type));
397
- if (isBinaryMimeType) {
398
- return true;
399
- }
400
- const fileExt = filename.split(".").pop()?.toLowerCase() || "";
401
- const textExtensions = [
402
- "txt",
403
- "md",
404
- "markdown",
405
- "json",
406
- "xml",
407
- "html",
408
- "htm",
409
- "css",
410
- "js",
411
- "ts",
412
- "jsx",
413
- "tsx",
414
- "yaml",
415
- "yml",
416
- "toml",
417
- "ini",
418
- "cfg",
419
- "conf",
420
- "sh",
421
- "bash",
422
- "zsh",
423
- "fish",
424
- "py",
425
- "rb",
426
- "go",
427
- "rs",
428
- "java",
429
- "c",
430
- "cpp",
431
- "h",
432
- "hpp",
433
- "cs",
434
- "php",
435
- "sql",
436
- "r",
437
- "swift",
438
- "kt",
439
- "scala",
440
- "clj",
441
- "ex",
442
- "exs",
443
- "vim",
444
- "env",
445
- "gitignore",
446
- "dockerignore",
447
- "editorconfig",
448
- "log",
449
- "csv",
450
- "tsv",
451
- "properties",
452
- "gradle",
453
- "sbt",
454
- "makefile",
455
- "dockerfile",
456
- "vagrantfile",
457
- "gemfile",
458
- "rakefile",
459
- "podfile",
460
- "csproj",
461
- "vbproj",
462
- "fsproj",
463
- "sln",
464
- "pom"
465
- ];
466
- if (textExtensions.includes(fileExt)) {
467
- return false;
468
- }
469
- const binaryExtensions = [
470
- "pdf",
471
- "docx",
472
- "doc",
473
- "xls",
474
- "xlsx",
475
- "ppt",
476
- "pptx",
477
- "zip",
478
- "rar",
479
- "7z",
480
- "tar",
481
- "gz",
482
- "bz2",
483
- "xz",
484
- "jpg",
485
- "jpeg",
486
- "png",
487
- "gif",
488
- "bmp",
489
- "svg",
490
- "ico",
491
- "webp",
492
- "mp3",
493
- "mp4",
494
- "avi",
495
- "mov",
496
- "wmv",
497
- "flv",
498
- "wav",
499
- "flac",
500
- "ogg",
501
- "exe",
502
- "dll",
503
- "so",
504
- "dylib",
505
- "bin",
506
- "dat",
507
- "db",
508
- "sqlite"
509
- ];
510
- return binaryExtensions.includes(fileExt);
511
- }
512
- function normalizeS3Url(url) {
513
- try {
514
- const urlObj = new URL(url);
515
- return `${urlObj.origin}${urlObj.pathname}`;
516
- } catch {
517
- return url;
518
- }
519
- }
520
- async function fetchUrlContent(url) {
521
- try {
522
- const controller = new AbortController;
523
- const timeoutId = setTimeout(() => controller.abort(), 30000);
524
- const response = await fetch(url, {
525
- signal: controller.signal,
526
- headers: {
527
- "User-Agent": "Eliza-Knowledge-Plugin/1.0"
528
- }
529
- });
530
- clearTimeout(timeoutId);
531
- if (!response.ok) {
532
- throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`);
533
- }
534
- const contentType = response.headers.get("content-type") || "application/octet-stream";
535
- const arrayBuffer = await response.arrayBuffer();
536
- const buffer = Buffer2.from(arrayBuffer);
537
- const base64Content = buffer.toString("base64");
538
- return {
539
- content: base64Content,
540
- contentType
541
- };
542
- } catch (error) {
543
- const errorMessage = error instanceof Error ? error.message : String(error);
544
- throw new Error(`Failed to fetch content from URL: ${errorMessage}`);
545
- }
546
- }
547
- function looksLikeBase64(content) {
548
- if (!content || content.length === 0)
549
- return false;
550
- const cleanContent = content.replace(/\s/g, "");
551
- if (cleanContent.length < 16)
552
- return false;
553
- if (cleanContent.length % 4 !== 0)
554
- return false;
555
- const base64Regex = /^[A-Za-z0-9+/]*={0,2}$/;
556
- if (!base64Regex.test(cleanContent))
557
- return false;
558
- const hasNumbers = /\d/.test(cleanContent);
559
- const hasUpperCase = /[A-Z]/.test(cleanContent);
560
- const hasLowerCase = /[a-z]/.test(cleanContent);
561
- return (hasNumbers || hasUpperCase) && hasLowerCase;
562
- }
563
- function generateContentBasedId(content, agentId, options) {
564
- const { maxChars = 2000, includeFilename, contentType } = options || {};
565
- let contentForHashing;
566
- if (looksLikeBase64(content)) {
567
- try {
568
- const decoded = Buffer2.from(content, "base64").toString("utf8");
569
- if (!decoded.includes("�") || contentType?.includes("pdf")) {
570
- contentForHashing = content.slice(0, maxChars);
571
- } else {
572
- contentForHashing = decoded.slice(0, maxChars);
573
- }
574
- } catch {
575
- contentForHashing = content.slice(0, maxChars);
576
- }
577
- } else {
578
- contentForHashing = content.slice(0, maxChars);
579
- }
580
- contentForHashing = contentForHashing.replace(/\r\n/g, `
581
- `).replace(/\r/g, `
582
- `).trim();
583
- const componentsToHash = [agentId, contentForHashing, includeFilename || ""].filter(Boolean).join("::");
584
- const hash = createHash2("sha256").update(componentsToHash).digest("hex");
585
- const DOCUMENT_NAMESPACE = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
586
- return v5_default(hash, DOCUMENT_NAMESPACE);
587
- }
588
-
589
- // docs-loader.ts
590
- function getKnowledgePath(runtimePath) {
591
- const knowledgePath = runtimePath || process.env.KNOWLEDGE_PATH || path.join(process.cwd(), "docs");
592
- const resolvedPath = path.resolve(knowledgePath);
593
- if (!fs.existsSync(resolvedPath)) {
594
- logger.warn(`Knowledge path does not exist: ${resolvedPath}`);
595
- if (runtimePath) {
596
- logger.warn("Please create the directory or update KNOWLEDGE_PATH in agent settings");
597
- } else if (process.env.KNOWLEDGE_PATH) {
598
- logger.warn("Please create the directory or update KNOWLEDGE_PATH environment variable");
599
- } else {
600
- logger.info("To use the knowledge plugin, either:");
601
- logger.info('1. Create a "docs" folder in your project root');
602
- logger.info("2. Set KNOWLEDGE_PATH in agent settings or environment variable");
603
- }
604
- }
605
- return resolvedPath;
606
- }
607
- async function loadDocsFromPath(service, agentId, worldId, knowledgePath) {
608
- const docsPath = getKnowledgePath(knowledgePath);
609
- if (!fs.existsSync(docsPath)) {
610
- logger.warn(`Knowledge path does not exist: ${docsPath}`);
611
- return { total: 0, successful: 0, failed: 0 };
612
- }
613
- logger.info(`Loading documents from: ${docsPath}`);
614
- const files = getAllFiles(docsPath);
615
- if (files.length === 0) {
616
- logger.info("No files found in knowledge path");
617
- return { total: 0, successful: 0, failed: 0 };
618
- }
619
- logger.info(`Found ${files.length} files to process`);
620
- let successful = 0;
621
- let failed = 0;
622
- for (const filePath of files) {
623
- try {
624
- const fileName = path.basename(filePath);
625
- const fileExt = path.extname(filePath).toLowerCase();
626
- if (fileName.startsWith(".")) {
627
- continue;
628
- }
629
- const contentType = getContentType(fileExt);
630
- if (!contentType) {
631
- logger.debug(`Skipping unsupported file type: ${filePath}`);
632
- continue;
633
- }
634
- const fileBuffer = fs.readFileSync(filePath);
635
- const isBinary = isBinaryContentType(contentType, fileName);
636
- const content = isBinary ? fileBuffer.toString("base64") : fileBuffer.toString("utf-8");
637
- const knowledgeOptions = {
638
- clientDocumentId: "",
639
- contentType,
640
- originalFilename: fileName,
641
- worldId: worldId || agentId,
642
- content,
643
- roomId: agentId,
644
- entityId: agentId
645
- };
646
- logger.debug(`Processing document: ${fileName}`);
647
- const result = await service.addKnowledge(knowledgeOptions);
648
- logger.info(`✅ "${fileName}": ${result.fragmentCount} fragments created`);
649
- successful++;
650
- } catch (error) {
651
- logger.error({ error }, `Failed to process file ${filePath}`);
652
- failed++;
653
- }
654
- }
655
- logger.info(`Document loading complete: ${successful} successful, ${failed} failed out of ${files.length} total`);
656
- return {
657
- total: files.length,
658
- successful,
659
- failed
660
- };
661
- }
662
- function getAllFiles(dirPath, files = []) {
663
- try {
664
- const entries = fs.readdirSync(dirPath, { withFileTypes: true });
665
- for (const entry of entries) {
666
- const fullPath = path.join(dirPath, entry.name);
667
- if (entry.isDirectory()) {
668
- if (!["node_modules", ".git", ".vscode", "dist", "build"].includes(entry.name)) {
669
- getAllFiles(fullPath, files);
670
- }
671
- } else if (entry.isFile()) {
672
- files.push(fullPath);
673
- }
674
- }
675
- } catch (error) {
676
- logger.error({ error }, `Error reading directory ${dirPath}`);
677
- }
678
- return files;
679
- }
680
- function getContentType(extension) {
681
- const contentTypes = {
682
- ".txt": "text/plain",
683
- ".md": "text/markdown",
684
- ".markdown": "text/markdown",
685
- ".tson": "text/plain",
686
- ".xml": "application/xml",
687
- ".csv": "text/csv",
688
- ".tsv": "text/tab-separated-values",
689
- ".log": "text/plain",
690
- ".html": "text/html",
691
- ".htm": "text/html",
692
- ".css": "text/css",
693
- ".scss": "text/x-scss",
694
- ".sass": "text/x-sass",
695
- ".less": "text/x-less",
696
- ".js": "text/javascript",
697
- ".jsx": "text/javascript",
698
- ".ts": "text/typescript",
699
- ".tsx": "text/typescript",
700
- ".mjs": "text/javascript",
701
- ".cjs": "text/javascript",
702
- ".vue": "text/x-vue",
703
- ".svelte": "text/x-svelte",
704
- ".astro": "text/x-astro",
705
- ".py": "text/x-python",
706
- ".pyw": "text/x-python",
707
- ".pyi": "text/x-python",
708
- ".java": "text/x-java",
709
- ".kt": "text/x-kotlin",
710
- ".kts": "text/x-kotlin",
711
- ".scala": "text/x-scala",
712
- ".c": "text/x-c",
713
- ".cpp": "text/x-c++",
714
- ".cc": "text/x-c++",
715
- ".cxx": "text/x-c++",
716
- ".h": "text/x-c",
717
- ".hpp": "text/x-c++",
718
- ".cs": "text/x-csharp",
719
- ".php": "text/x-php",
720
- ".rb": "text/x-ruby",
721
- ".go": "text/x-go",
722
- ".rs": "text/x-rust",
723
- ".swift": "text/x-swift",
724
- ".r": "text/x-r",
725
- ".R": "text/x-r",
726
- ".m": "text/x-objectivec",
727
- ".mm": "text/x-objectivec",
728
- ".clj": "text/x-clojure",
729
- ".cljs": "text/x-clojure",
730
- ".ex": "text/x-elixir",
731
- ".exs": "text/x-elixir",
732
- ".lua": "text/x-lua",
733
- ".pl": "text/x-perl",
734
- ".pm": "text/x-perl",
735
- ".dart": "text/x-dart",
736
- ".hs": "text/x-haskell",
737
- ".elm": "text/x-elm",
738
- ".ml": "text/x-ocaml",
739
- ".fs": "text/x-fsharp",
740
- ".fsx": "text/x-fsharp",
741
- ".vb": "text/x-vb",
742
- ".pas": "text/x-pascal",
743
- ".d": "text/x-d",
744
- ".nim": "text/x-nim",
745
- ".zig": "text/x-zig",
746
- ".jl": "text/x-julia",
747
- ".tcl": "text/x-tcl",
748
- ".awk": "text/x-awk",
749
- ".sed": "text/x-sed",
750
- ".sh": "text/x-sh",
751
- ".bash": "text/x-sh",
752
- ".zsh": "text/x-sh",
753
- ".fish": "text/x-fish",
754
- ".ps1": "text/x-powershell",
755
- ".bat": "text/x-batch",
756
- ".cmd": "text/x-batch",
757
- ".json": "application/json",
758
- ".yaml": "text/x-yaml",
759
- ".yml": "text/x-yaml",
760
- ".toml": "text/x-toml",
761
- ".ini": "text/x-ini",
762
- ".cfg": "text/x-ini",
763
- ".conf": "text/x-ini",
764
- ".env": "text/plain",
765
- ".gitignore": "text/plain",
766
- ".dockerignore": "text/plain",
767
- ".editorconfig": "text/plain",
768
- ".properties": "text/x-properties",
769
- ".sql": "text/x-sql",
770
- ".pdf": "application/pdf",
771
- ".doc": "application/msword",
772
- ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
773
- };
774
- return contentTypes[extension] || null;
775
- }
776
-
777
- // document-processor.ts
778
- import {
779
- logger as logger3,
780
- MemoryType,
781
- ModelType,
782
- splitChunks
783
- } from "@elizaos/core";
784
-
785
- // ctx-embeddings.ts
786
- var DEFAULT_CHUNK_TOKEN_SIZE = 500;
787
- var DEFAULT_CHUNK_OVERLAP_TOKENS = 100;
788
- var DEFAULT_CHARS_PER_TOKEN = 3.5;
789
- var CONTEXT_TARGETS = {
790
- DEFAULT: {
791
- MIN_TOKENS: 60,
792
- MAX_TOKENS: 120
793
- },
794
- PDF: {
795
- MIN_TOKENS: 80,
796
- MAX_TOKENS: 150
797
- },
798
- MATH_PDF: {
799
- MIN_TOKENS: 100,
800
- MAX_TOKENS: 180
801
- },
802
- CODE: {
803
- MIN_TOKENS: 100,
804
- MAX_TOKENS: 200
805
- },
806
- TECHNICAL: {
807
- MIN_TOKENS: 80,
808
- MAX_TOKENS: 160
809
- }
810
- };
811
- var SYSTEM_PROMPTS = {
812
- DEFAULT: "You are a precision text augmentation tool. Your task is to expand a given text chunk with its direct context from a larger document. You must: 1) Keep the original chunk intact; 2) Add critical context from surrounding text; 3) Never summarize or rephrase the original chunk; 4) Create contextually rich output for improved semantic retrieval.",
813
- CODE: "You are a precision code augmentation tool. Your task is to expand a given code chunk with necessary context from the larger codebase. You must: 1) Keep the original code chunk intact with exact syntax and indentation; 2) Add relevant imports, function signatures, or class definitions; 3) Include critical surrounding code context; 4) Create contextually rich output that maintains correct syntax.",
814
- PDF: "You are a precision document augmentation tool. Your task is to expand a given PDF text chunk with its direct context from the larger document. You must: 1) Keep the original chunk intact; 2) Add section headings, references, or figure captions; 3) Include text that immediately precedes and follows the chunk; 4) Create contextually rich output that maintains the document's original structure.",
815
- MATH_PDF: "You are a precision mathematical content augmentation tool. Your task is to expand a given mathematical text chunk with essential context. You must: 1) Keep original mathematical notations and expressions exactly as they appear; 2) Add relevant definitions, theorems, or equations from elsewhere in the document; 3) Preserve all LaTeX or mathematical formatting; 4) Create contextually rich output for improved mathematical comprehension.",
816
- TECHNICAL: "You are a precision technical documentation augmentation tool. Your task is to expand a technical document chunk with critical context. You must: 1) Keep the original chunk intact including all technical terminology; 2) Add relevant configuration examples, parameter definitions, or API references; 3) Include any prerequisite information; 4) Create contextually rich output that maintains technical accuracy."
817
- };
818
- var CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE = `
819
- <document>
820
- {doc_content}
821
- </document>
822
-
823
- Here is the chunk we want to situate within the whole document:
824
- <chunk>
825
- {chunk_content}
826
- </chunk>
827
-
828
- Create an enriched version of this chunk by adding critical surrounding context. Follow these guidelines:
829
-
830
- 1. Identify the document's main topic and key information relevant to understanding this chunk
831
- 2. Include 2-3 sentences before the chunk that provide essential context
832
- 3. Include 2-3 sentences after the chunk that complete thoughts or provide resolution
833
- 4. For technical documents, include any definitions or explanations of terms used in the chunk
834
- 5. For narrative content, include character or setting information needed to understand the chunk
835
- 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
836
- 7. Do not use phrases like "this chunk discusses" - directly present the context
837
- 8. The total length should be between {min_tokens} and {max_tokens} tokens
838
- 9. Format the response as a single coherent paragraph
839
-
840
- Provide ONLY the enriched chunk text in your response:`;
841
- var CACHED_CHUNK_PROMPT_TEMPLATE = `
842
- Here is the chunk we want to situate within the whole document:
843
- <chunk>
844
- {chunk_content}
845
- </chunk>
846
-
847
- Create an enriched version of this chunk by adding critical surrounding context. Follow these guidelines:
848
-
849
- 1. Identify the document's main topic and key information relevant to understanding this chunk
850
- 2. Include 2-3 sentences before the chunk that provide essential context
851
- 3. Include 2-3 sentences after the chunk that complete thoughts or provide resolution
852
- 4. For technical documents, include any definitions or explanations of terms used in the chunk
853
- 5. For narrative content, include character or setting information needed to understand the chunk
854
- 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
855
- 7. Do not use phrases like "this chunk discusses" - directly present the context
856
- 8. The total length should be between {min_tokens} and {max_tokens} tokens
857
- 9. Format the response as a single coherent paragraph
858
-
859
- Provide ONLY the enriched chunk text in your response:`;
860
- var CACHED_CODE_CHUNK_PROMPT_TEMPLATE = `
861
- Here is the chunk of code we want to situate within the whole document:
862
- <chunk>
863
- {chunk_content}
864
- </chunk>
865
-
866
- Create an enriched version of this code chunk by adding critical surrounding context. Follow these guidelines:
867
-
868
- 1. Preserve ALL code syntax, indentation, and comments exactly as they appear
869
- 2. Include any import statements, function definitions, or class declarations that this code depends on
870
- 3. Add necessary type definitions or interfaces that are referenced in this chunk
871
- 4. Include any crucial comments from elsewhere in the document that explain this code
872
- 5. If there are key variable declarations or initializations earlier in the document, include those
873
- 6. Keep the original chunk COMPLETELY INTACT and UNCHANGED in your response
874
- 7. The total length should be between {min_tokens} and {max_tokens} tokens
875
- 8. Do NOT include implementation details for functions that are only called but not defined in this chunk
876
-
877
- Provide ONLY the enriched code chunk in your response:`;
878
- var CACHED_MATH_PDF_PROMPT_TEMPLATE = `
879
- Here is the chunk we want to situate within the whole document:
880
- <chunk>
881
- {chunk_content}
882
- </chunk>
883
-
884
- Create an enriched version of this chunk by adding critical surrounding context. This document contains mathematical content that requires special handling. Follow these guidelines:
885
-
886
- 1. Preserve ALL mathematical notation exactly as it appears in the chunk
887
- 2. Include any defining equations, variables, or parameters mentioned earlier in the document that relate to this chunk
888
- 3. Add section/subsection names or figure references if they help situate the chunk
889
- 4. If variables or symbols are defined elsewhere in the document, include these definitions
890
- 5. If mathematical expressions appear corrupted, try to infer their meaning from context
891
- 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
892
- 7. The total length should be between {min_tokens} and {max_tokens} tokens
893
- 8. Format the response as a coherent mathematical explanation
894
-
895
- Provide ONLY the enriched chunk text in your response:`;
896
- var CACHED_TECHNICAL_PROMPT_TEMPLATE = `
897
- Here is the chunk we want to situate within the whole document:
898
- <chunk>
899
- {chunk_content}
900
- </chunk>
901
-
902
- Create an enriched version of this chunk by adding critical surrounding context. This appears to be technical documentation that requires special handling. Follow these guidelines:
903
-
904
- 1. Preserve ALL technical terminology, product names, and version numbers exactly as they appear
905
- 2. Include any prerequisite information or requirements mentioned earlier in the document
906
- 3. Add section/subsection headings or navigation path to situate this chunk within the document structure
907
- 4. Include any definitions of technical terms, acronyms, or jargon used in this chunk
908
- 5. If this chunk references specific configurations, include relevant parameter explanations
909
- 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
910
- 7. The total length should be between {min_tokens} and {max_tokens} tokens
911
- 8. Format the response maintaining any hierarchical structure present in the original
912
-
913
- Provide ONLY the enriched chunk text in your response:`;
914
- var MATH_PDF_PROMPT_TEMPLATE = `
915
- <document>
916
- {doc_content}
917
- </document>
918
-
919
- Here is the chunk we want to situate within the whole document:
920
- <chunk>
921
- {chunk_content}
922
- </chunk>
923
-
924
- Create an enriched version of this chunk by adding critical surrounding context. This document contains mathematical content that requires special handling. Follow these guidelines:
925
-
926
- 1. Preserve ALL mathematical notation exactly as it appears in the chunk
927
- 2. Include any defining equations, variables, or parameters mentioned earlier in the document that relate to this chunk
928
- 3. Add section/subsection names or figure references if they help situate the chunk
929
- 4. If variables or symbols are defined elsewhere in the document, include these definitions
930
- 5. If mathematical expressions appear corrupted, try to infer their meaning from context
931
- 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
932
- 7. The total length should be between {min_tokens} and {max_tokens} tokens
933
- 8. Format the response as a coherent mathematical explanation
934
-
935
- Provide ONLY the enriched chunk text in your response:`;
936
- var CODE_PROMPT_TEMPLATE = `
937
- <document>
938
- {doc_content}
939
- </document>
940
-
941
- Here is the chunk of code we want to situate within the whole document:
942
- <chunk>
943
- {chunk_content}
944
- </chunk>
945
-
946
- Create an enriched version of this code chunk by adding critical surrounding context. Follow these guidelines:
947
-
948
- 1. Preserve ALL code syntax, indentation, and comments exactly as they appear
949
- 2. Include any import statements, function definitions, or class declarations that this code depends on
950
- 3. Add necessary type definitions or interfaces that are referenced in this chunk
951
- 4. Include any crucial comments from elsewhere in the document that explain this code
952
- 5. If there are key variable declarations or initializations earlier in the document, include those
953
- 6. Keep the original chunk COMPLETELY INTACT and UNCHANGED in your response
954
- 7. The total length should be between {min_tokens} and {max_tokens} tokens
955
- 8. Do NOT include implementation details for functions that are only called but not defined in this chunk
956
-
957
- Provide ONLY the enriched code chunk in your response:`;
958
- var TECHNICAL_PROMPT_TEMPLATE = `
959
- <document>
960
- {doc_content}
961
- </document>
962
-
963
- Here is the chunk we want to situate within the whole document:
964
- <chunk>
965
- {chunk_content}
966
- </chunk>
967
-
968
- Create an enriched version of this chunk by adding critical surrounding context. This appears to be technical documentation that requires special handling. Follow these guidelines:
969
-
970
- 1. Preserve ALL technical terminology, product names, and version numbers exactly as they appear
971
- 2. Include any prerequisite information or requirements mentioned earlier in the document
972
- 3. Add section/subsection headings or navigation path to situate this chunk within the document structure
973
- 4. Include any definitions of technical terms, acronyms, or jargon used in this chunk
974
- 5. If this chunk references specific configurations, include relevant parameter explanations
975
- 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
976
- 7. The total length should be between {min_tokens} and {max_tokens} tokens
977
- 8. Format the response maintaining any hierarchical structure present in the original
978
-
979
- Provide ONLY the enriched chunk text in your response:`;
980
- function getContextualizationPrompt(docContent, chunkContent, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS, promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE) {
981
- if (!docContent || !chunkContent) {
982
- return "Error: Document or chunk content missing.";
983
- }
984
- const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN);
985
- if (chunkTokens > maxTokens * 0.7) {
986
- maxTokens = Math.ceil(chunkTokens * 1.3);
987
- minTokens = chunkTokens;
988
- }
989
- return promptTemplate.replace("{doc_content}", docContent).replace("{chunk_content}", chunkContent).replace("{min_tokens}", minTokens.toString()).replace("{max_tokens}", maxTokens.toString());
990
- }
991
- function getCachingContextualizationPrompt(chunkContent, contentType, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS) {
992
- if (!chunkContent) {
993
- return {
994
- prompt: "Error: Chunk content missing.",
995
- systemPrompt: SYSTEM_PROMPTS.DEFAULT
996
- };
997
- }
998
- const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN);
999
- if (chunkTokens > maxTokens * 0.7) {
1000
- maxTokens = Math.ceil(chunkTokens * 1.3);
1001
- minTokens = chunkTokens;
1002
- }
1003
- let promptTemplate = CACHED_CHUNK_PROMPT_TEMPLATE;
1004
- let systemPrompt = SYSTEM_PROMPTS.DEFAULT;
1005
- if (contentType) {
1006
- if (contentType.includes("javascript") || contentType.includes("typescript") || contentType.includes("python") || contentType.includes("java") || contentType.includes("c++") || contentType.includes("code")) {
1007
- promptTemplate = CACHED_CODE_CHUNK_PROMPT_TEMPLATE;
1008
- systemPrompt = SYSTEM_PROMPTS.CODE;
1009
- } else if (contentType.includes("pdf")) {
1010
- if (containsMathematicalContent(chunkContent)) {
1011
- promptTemplate = CACHED_MATH_PDF_PROMPT_TEMPLATE;
1012
- systemPrompt = SYSTEM_PROMPTS.MATH_PDF;
1013
- } else {
1014
- systemPrompt = SYSTEM_PROMPTS.PDF;
1015
- }
1016
- } else if (contentType.includes("markdown") || contentType.includes("text/html") || isTechnicalDocumentation(chunkContent)) {
1017
- promptTemplate = CACHED_TECHNICAL_PROMPT_TEMPLATE;
1018
- systemPrompt = SYSTEM_PROMPTS.TECHNICAL;
1019
- }
1020
- }
1021
- const formattedPrompt = promptTemplate.replace("{chunk_content}", chunkContent).replace("{min_tokens}", minTokens.toString()).replace("{max_tokens}", maxTokens.toString());
1022
- return {
1023
- prompt: formattedPrompt,
1024
- systemPrompt
1025
- };
1026
- }
1027
- function getPromptForMimeType(mimeType, docContent, chunkContent) {
1028
- let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS;
1029
- let maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS;
1030
- let promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE;
1031
- if (mimeType.includes("pdf")) {
1032
- if (containsMathematicalContent(docContent)) {
1033
- minTokens = CONTEXT_TARGETS.MATH_PDF.MIN_TOKENS;
1034
- maxTokens = CONTEXT_TARGETS.MATH_PDF.MAX_TOKENS;
1035
- promptTemplate = MATH_PDF_PROMPT_TEMPLATE;
1036
- } else {
1037
- minTokens = CONTEXT_TARGETS.PDF.MIN_TOKENS;
1038
- maxTokens = CONTEXT_TARGETS.PDF.MAX_TOKENS;
1039
- }
1040
- } else if (mimeType.includes("javascript") || mimeType.includes("typescript") || mimeType.includes("python") || mimeType.includes("java") || mimeType.includes("c++") || mimeType.includes("code")) {
1041
- minTokens = CONTEXT_TARGETS.CODE.MIN_TOKENS;
1042
- maxTokens = CONTEXT_TARGETS.CODE.MAX_TOKENS;
1043
- promptTemplate = CODE_PROMPT_TEMPLATE;
1044
- } else if (isTechnicalDocumentation(docContent) || mimeType.includes("markdown") || mimeType.includes("text/html")) {
1045
- minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
1046
- maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
1047
- promptTemplate = TECHNICAL_PROMPT_TEMPLATE;
1048
- }
1049
- return getContextualizationPrompt(docContent, chunkContent, minTokens, maxTokens, promptTemplate);
1050
- }
1051
- function getCachingPromptForMimeType(mimeType, chunkContent) {
1052
- let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS;
1053
- let maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS;
1054
- if (mimeType.includes("pdf")) {
1055
- if (containsMathematicalContent(chunkContent)) {
1056
- minTokens = CONTEXT_TARGETS.MATH_PDF.MIN_TOKENS;
1057
- maxTokens = CONTEXT_TARGETS.MATH_PDF.MAX_TOKENS;
1058
- } else {
1059
- minTokens = CONTEXT_TARGETS.PDF.MIN_TOKENS;
1060
- maxTokens = CONTEXT_TARGETS.PDF.MAX_TOKENS;
1061
- }
1062
- } else if (mimeType.includes("javascript") || mimeType.includes("typescript") || mimeType.includes("python") || mimeType.includes("java") || mimeType.includes("c++") || mimeType.includes("code")) {
1063
- minTokens = CONTEXT_TARGETS.CODE.MIN_TOKENS;
1064
- maxTokens = CONTEXT_TARGETS.CODE.MAX_TOKENS;
1065
- } else if (isTechnicalDocumentation(chunkContent) || mimeType.includes("markdown") || mimeType.includes("text/html")) {
1066
- minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
1067
- maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
1068
- }
1069
- return getCachingContextualizationPrompt(chunkContent, mimeType, minTokens, maxTokens);
1070
- }
1071
- function containsMathematicalContent(content) {
1072
- const latexMathPatterns = [
1073
- /\$\$.+?\$\$/s,
1074
- /\$.+?\$/g,
1075
- /\\begin\{equation\}/,
1076
- /\\begin\{align\}/,
1077
- /\\sum_/,
1078
- /\\int/,
1079
- /\\frac\{/,
1080
- /\\sqrt\{/,
1081
- /\\alpha|\\beta|\\gamma|\\delta|\\theta|\\lambda|\\sigma/,
1082
- /\\nabla|\\partial/
1083
- ];
1084
- const generalMathPatterns = [
1085
- /[≠≤≥±∞∫∂∑∏√∈∉⊆⊇⊂⊃∪∩]/,
1086
- /\b[a-zA-Z]\^[0-9]/,
1087
- /\(\s*-?\d+(\.\d+)?\s*,\s*-?\d+(\.\d+)?\s*\)/,
1088
- /\b[xyz]\s*=\s*-?\d+(\.\d+)?/,
1089
- /\[\s*-?\d+(\.\d+)?\s*,\s*-?\d+(\.\d+)?\s*\]/,
1090
- /\b\d+\s*×\s*\d+/
1091
- ];
1092
- for (const pattern of latexMathPatterns) {
1093
- if (pattern.test(content)) {
1094
- return true;
1095
- }
1096
- }
1097
- for (const pattern of generalMathPatterns) {
1098
- if (pattern.test(content)) {
1099
- return true;
1100
- }
1101
- }
1102
- const mathKeywords = [
1103
- "theorem",
1104
- "lemma",
1105
- "proof",
1106
- "equation",
1107
- "function",
1108
- "derivative",
1109
- "integral",
1110
- "matrix",
1111
- "vector",
1112
- "algorithm",
1113
- "constraint",
1114
- "coefficient"
1115
- ];
1116
- const contentLower = content.toLowerCase();
1117
- const mathKeywordCount = mathKeywords.filter((keyword) => contentLower.includes(keyword)).length;
1118
- return mathKeywordCount >= 2;
1119
- }
1120
- function isTechnicalDocumentation(content) {
1121
- const technicalPatterns = [
1122
- /\b(version|v)\s*\d+\.\d+(\.\d+)?/i,
1123
- /\b(api|sdk|cli)\b/i,
1124
- /\b(http|https|ftp):\/\//i,
1125
- /\b(GET|POST|PUT|DELETE)\b/,
1126
- /<\/?[a-z][\s\S]*>/i,
1127
- /\bREADME\b|\bCHANGELOG\b/i,
1128
- /\b(config|configuration)\b/i,
1129
- /\b(parameter|param|argument|arg)\b/i
1130
- ];
1131
- const docHeadings = [
1132
- /\b(Introduction|Overview|Getting Started|Installation|Usage|API Reference|Troubleshooting)\b/i
1133
- ];
1134
- for (const pattern of [...technicalPatterns, ...docHeadings]) {
1135
- if (pattern.test(content)) {
1136
- return true;
1137
- }
1138
- }
1139
- const listPatterns = [/\d+\.\s.+\n\d+\.\s.+/, /•\s.+\n•\s.+/, /\*\s.+\n\*\s.+/, /-\s.+\n-\s.+/];
1140
- for (const pattern of listPatterns) {
1141
- if (pattern.test(content)) {
1142
- return true;
1143
- }
1144
- }
1145
- return false;
1146
- }
1147
- function getChunkWithContext(chunkContent, generatedContext) {
1148
- if (!generatedContext || generatedContext.trim() === "") {
1149
- return chunkContent;
1150
- }
1151
- return generatedContext.trim();
1152
- }
1153
-
1154
- // llm.ts
1155
- import { createAnthropic } from "@ai-sdk/anthropic";
1156
- import { google } from "@ai-sdk/google";
1157
- import { createOpenAI } from "@ai-sdk/openai";
1158
- import { logger as logger2 } from "@elizaos/core";
1159
- import { createOpenRouter } from "@openrouter/ai-sdk-provider";
1160
- import { generateText as aiGenerateText, embed } from "ai";
1161
- async function generateText(runtime, prompt, system, overrideConfig) {
1162
- const config = validateModelConfig(runtime);
1163
- const provider = overrideConfig?.provider || config.TEXT_PROVIDER;
1164
- const modelName = overrideConfig?.modelName || config.TEXT_MODEL;
1165
- const maxTokens = overrideConfig?.maxTokens || config.MAX_OUTPUT_TOKENS;
1166
- const autoCacheContextualRetrieval = overrideConfig?.autoCacheContextualRetrieval !== false;
1167
- if (!modelName) {
1168
- throw new Error(`No model name configured for provider: ${provider}`);
1169
- }
1170
- try {
1171
- switch (provider) {
1172
- case "anthropic":
1173
- return await generateAnthropicText(config, prompt, system, modelName, maxTokens);
1174
- case "openai":
1175
- return await generateOpenAIText(config, prompt, system, modelName, maxTokens);
1176
- case "openrouter":
1177
- return await generateOpenRouterText(config, prompt, system, modelName, maxTokens, overrideConfig?.cacheDocument, overrideConfig?.cacheOptions, autoCacheContextualRetrieval);
1178
- case "google":
1179
- return await generateGoogleText(prompt, system, modelName, maxTokens, config);
1180
- default:
1181
- throw new Error(`Unsupported text provider: ${provider}`);
1182
- }
1183
- } catch (error) {
1184
- logger2.error({ error }, `${provider} ${modelName} error`);
1185
- throw error;
1186
- }
1187
- }
1188
- async function generateAnthropicText(config, prompt, system, modelName, maxTokens) {
1189
- const anthropic = createAnthropic({
1190
- apiKey: config.ANTHROPIC_API_KEY,
1191
- baseURL: config.ANTHROPIC_BASE_URL
1192
- });
1193
- const modelInstance = anthropic(modelName);
1194
- const maxRetries = 3;
1195
- for (let attempt = 0;attempt < maxRetries; attempt++) {
1196
- try {
1197
- return await aiGenerateText({
1198
- model: modelInstance,
1199
- prompt,
1200
- system,
1201
- temperature: 0.3,
1202
- maxOutputTokens: maxTokens
1203
- });
1204
- } catch (error) {
1205
- const errorObj = error;
1206
- const isRateLimit = errorObj?.status === 429 || errorObj?.message?.includes("rate limit") || errorObj?.message?.includes("429");
1207
- if (isRateLimit && attempt < maxRetries - 1) {
1208
- const delay = 2 ** (attempt + 1) * 1000;
1209
- await new Promise((resolve2) => setTimeout(resolve2, delay));
1210
- continue;
1211
- }
1212
- throw error;
1213
- }
1214
- }
1215
- throw new Error("Max retries exceeded for Anthropic text generation");
1216
- }
1217
- async function generateOpenAIText(config, prompt, system, modelName, maxTokens) {
1218
- const openai = createOpenAI({
1219
- apiKey: config.OPENAI_API_KEY,
1220
- baseURL: config.OPENAI_BASE_URL
1221
- });
1222
- const modelInstance = openai.chat(modelName);
1223
- const result = await aiGenerateText({
1224
- model: modelInstance,
1225
- prompt,
1226
- system,
1227
- temperature: 0.3,
1228
- maxOutputTokens: maxTokens
1229
- });
1230
- return result;
1231
- }
1232
- async function generateGoogleText(prompt, system, modelName, maxTokens, config) {
1233
- const googleProvider = google;
1234
- if (config.GOOGLE_API_KEY) {
1235
- process.env.GOOGLE_GENERATIVE_AI_API_KEY = config.GOOGLE_API_KEY;
1236
- }
1237
- const modelInstance = googleProvider(modelName);
1238
- const result = await aiGenerateText({
1239
- model: modelInstance,
1240
- prompt,
1241
- system,
1242
- temperature: 0.3,
1243
- maxOutputTokens: maxTokens
1244
- });
1245
- return result;
1246
- }
1247
- async function generateOpenRouterText(config, prompt, system, modelName, maxTokens, cacheDocument, _cacheOptions, autoCacheContextualRetrieval = true) {
1248
- const openrouter = createOpenRouter({
1249
- apiKey: config.OPENROUTER_API_KEY,
1250
- baseURL: config.OPENROUTER_BASE_URL
1251
- });
1252
- const modelInstance = openrouter.chat(modelName);
1253
- const isClaudeModel = modelName.toLowerCase().includes("claude");
1254
- const isGeminiModel = modelName.toLowerCase().includes("gemini");
1255
- const isGemini25Model = modelName.toLowerCase().includes("gemini-2.5");
1256
- const supportsCaching = isClaudeModel || isGeminiModel;
1257
- let documentForCaching = cacheDocument;
1258
- if (!documentForCaching && autoCacheContextualRetrieval && supportsCaching) {
1259
- const docMatch = prompt.match(/<document>([\s\S]*?)<\/document>/);
1260
- if (docMatch?.[1]) {
1261
- documentForCaching = docMatch[1].trim();
1262
- }
1263
- }
1264
- if (documentForCaching && supportsCaching) {
1265
- let promptText = prompt;
1266
- if (promptText.includes("<document>")) {
1267
- promptText = promptText.replace(/<document>[\s\S]*?<\/document>/, "").trim();
1268
- }
1269
- if (isClaudeModel) {
1270
- return await generateClaudeWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching);
1271
- } else if (isGeminiModel) {
1272
- return await generateGeminiWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching, isGemini25Model);
1273
- }
1274
- }
1275
- return await generateStandardOpenRouterText(prompt, system, modelInstance, modelName, maxTokens);
1276
- }
1277
- async function generateClaudeWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching) {
1278
- const messages = [
1279
- system ? {
1280
- role: "system",
1281
- content: [
1282
- {
1283
- type: "text",
1284
- text: system
1285
- },
1286
- {
1287
- type: "text",
1288
- text: documentForCaching,
1289
- cache_control: {
1290
- type: "ephemeral"
1291
- }
1292
- }
1293
- ]
1294
- } : {
1295
- role: "user",
1296
- content: [
1297
- {
1298
- type: "text",
1299
- text: "Document for context:"
1300
- },
1301
- {
1302
- type: "text",
1303
- text: documentForCaching,
1304
- cache_control: {
1305
- type: "ephemeral"
1306
- }
1307
- },
1308
- {
1309
- type: "text",
1310
- text: promptText
1311
- }
1312
- ]
1313
- },
1314
- system ? {
1315
- role: "user",
1316
- content: [
1317
- {
1318
- type: "text",
1319
- text: promptText
1320
- }
1321
- ]
1322
- } : null
1323
- ].filter(Boolean);
1324
- const result = await aiGenerateText({
1325
- model: modelInstance,
1326
- messages,
1327
- temperature: 0.3,
1328
- maxOutputTokens: maxTokens,
1329
- providerOptions: {
1330
- openrouter: {
1331
- usage: {
1332
- include: true
1333
- }
1334
- }
1335
- }
1336
- });
1337
- logCacheMetrics(result);
1338
- const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0);
1339
- logger2.debug(`OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.inputTokens || 0}→${result.usage.outputTokens || 0})`);
1340
- return result;
1341
- }
1342
- async function generateGeminiWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching, _isGemini25Model) {
1343
- const geminiSystemPrefix = system ? `${system}
1344
-
1345
- ` : "";
1346
- const geminiPrompt = `${geminiSystemPrefix}${documentForCaching}
1347
-
1348
- ${promptText}`;
1349
- const result = await aiGenerateText({
1350
- model: modelInstance,
1351
- prompt: geminiPrompt,
1352
- temperature: 0.3,
1353
- maxOutputTokens: maxTokens,
1354
- providerOptions: {
1355
- openrouter: {
1356
- usage: {
1357
- include: true
1358
- }
1359
- }
1360
- }
1361
- });
1362
- logCacheMetrics(result);
1363
- const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0);
1364
- logger2.debug(`OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.inputTokens || 0}→${result.usage.outputTokens || 0})`);
1365
- return result;
1366
- }
1367
- async function generateStandardOpenRouterText(prompt, system, modelInstance, modelName, maxTokens) {
1368
- const result = await aiGenerateText({
1369
- model: modelInstance,
1370
- prompt,
1371
- system,
1372
- temperature: 0.3,
1373
- maxOutputTokens: maxTokens,
1374
- providerOptions: {
1375
- openrouter: {
1376
- usage: {
1377
- include: true
1378
- }
1379
- }
1380
- }
1381
- });
1382
- const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0);
1383
- logger2.debug(`OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.inputTokens || 0}→${result.usage.outputTokens || 0})`);
1384
- return result;
1385
- }
1386
- function logCacheMetrics(_result) {}
1387
-
1388
- // document-processor.ts
1389
- function estimateTokens(text) {
1390
- return Math.ceil(text.length / 4);
1391
- }
1392
- function getCtxKnowledgeEnabled(runtime) {
1393
- let result;
1394
- let _source;
1395
- let rawValue;
1396
- if (runtime) {
1397
- const settingValue = runtime.getSetting("CTX_KNOWLEDGE_ENABLED");
1398
- rawValue = typeof settingValue === "string" ? settingValue : settingValue?.toString();
1399
- const cleanValue = rawValue?.trim().toLowerCase();
1400
- result = cleanValue === "true";
1401
- } else {
1402
- rawValue = process.env.CTX_KNOWLEDGE_ENABLED;
1403
- const cleanValue = rawValue?.toString().trim().toLowerCase();
1404
- result = cleanValue === "true";
1405
- }
1406
- return result;
1407
- }
1408
- function shouldUseCustomLLM() {
1409
- const textProvider = process.env.TEXT_PROVIDER;
1410
- const textModel = process.env.TEXT_MODEL;
1411
- if (!textProvider || !textModel) {
1412
- return false;
1413
- }
1414
- switch (textProvider.toLowerCase()) {
1415
- case "openrouter":
1416
- return !!process.env.OPENROUTER_API_KEY;
1417
- case "openai":
1418
- return !!process.env.OPENAI_API_KEY;
1419
- case "anthropic":
1420
- return !!process.env.ANTHROPIC_API_KEY;
1421
- case "google":
1422
- return !!process.env.GOOGLE_API_KEY;
1423
- default:
1424
- return false;
1425
- }
1426
- }
1427
- var useCustomLLM = shouldUseCustomLLM();
1428
- async function processFragmentsSynchronously({
1429
- runtime,
1430
- documentId,
1431
- fullDocumentText,
1432
- agentId,
1433
- contentType,
1434
- roomId,
1435
- entityId,
1436
- worldId,
1437
- documentTitle
1438
- }) {
1439
- if (!fullDocumentText || fullDocumentText.trim() === "") {
1440
- logger3.warn(`No text content available for document ${documentId}`);
1441
- return 0;
1442
- }
1443
- const chunks = await splitDocumentIntoChunks(fullDocumentText);
1444
- if (chunks.length === 0) {
1445
- logger3.warn(`No chunks generated for document ${documentId}`);
1446
- return 0;
1447
- }
1448
- logger3.info(`Split into ${chunks.length} chunks`);
1449
- const providerLimits = await getProviderRateLimits(runtime);
1450
- const CONCURRENCY_LIMIT = providerLimits.maxConcurrentRequests || 30;
1451
- const rateLimiter = createRateLimiter(providerLimits.requestsPerMinute || 60, providerLimits.tokensPerMinute, providerLimits.rateLimitEnabled);
1452
- const { savedCount, failedCount } = await processAndSaveFragments({
1453
- runtime,
1454
- documentId,
1455
- chunks,
1456
- fullDocumentText,
1457
- contentType,
1458
- agentId,
1459
- roomId: roomId || agentId,
1460
- entityId: entityId || agentId,
1461
- worldId: worldId || agentId,
1462
- concurrencyLimit: CONCURRENCY_LIMIT,
1463
- rateLimiter,
1464
- documentTitle,
1465
- batchDelayMs: providerLimits.batchDelayMs
1466
- });
1467
- if (failedCount > 0) {
1468
- logger3.warn(`${failedCount}/${chunks.length} chunks failed processing`);
1469
- }
1470
- return savedCount;
1471
- }
1472
- async function extractTextFromDocument(fileBuffer, contentType, originalFilename) {
1473
- if (!fileBuffer || fileBuffer.length === 0) {
1474
- throw new Error(`Empty file buffer provided for ${originalFilename}`);
1475
- }
1476
- try {
1477
- if (contentType === "application/pdf") {
1478
- logger3.debug(`Extracting text from PDF: ${originalFilename}`);
1479
- return await convertPdfToTextFromBuffer(fileBuffer, originalFilename);
1480
- } else {
1481
- if (contentType.includes("text/") || contentType.includes("application/json") || contentType.includes("application/xml")) {
1482
- try {
1483
- return fileBuffer.toString("utf8");
1484
- } catch (_textError) {
1485
- logger3.warn(`Failed to decode ${originalFilename} as UTF-8`);
1486
- }
1487
- }
1488
- return await extractTextFromFileBuffer(fileBuffer, contentType, originalFilename);
1489
- }
1490
- } catch (error) {
1491
- const errorMessage = error instanceof Error ? error.message : String(error);
1492
- logger3.error(`Error extracting text from ${originalFilename}: ${errorMessage}`);
1493
- throw new Error(`Failed to extract text from ${originalFilename}: ${errorMessage}`);
1494
- }
1495
- }
1496
- function createDocumentMemory({
1497
- text,
1498
- agentId,
1499
- clientDocumentId,
1500
- originalFilename,
1501
- contentType,
1502
- worldId,
1503
- fileSize,
1504
- documentId,
1505
- customMetadata
1506
- }) {
1507
- const fileExt = originalFilename.split(".").pop()?.toLowerCase() || "";
1508
- const title = originalFilename.replace(`.${fileExt}`, "");
1509
- const docId = documentId || v4_default();
1510
- return {
1511
- id: docId,
1512
- agentId,
1513
- roomId: agentId,
1514
- worldId,
1515
- entityId: agentId,
1516
- content: { text },
1517
- metadata: {
1518
- type: MemoryType.CUSTOM,
1519
- documentId: clientDocumentId,
1520
- originalFilename,
1521
- contentType,
1522
- title,
1523
- fileExt,
1524
- fileSize,
1525
- source: "rag-service-main-upload",
1526
- timestamp: Date.now(),
1527
- ...customMetadata || {}
1528
- }
1529
- };
1530
- }
1531
- async function splitDocumentIntoChunks(documentText) {
1532
- const tokenChunkSize = DEFAULT_CHUNK_TOKEN_SIZE;
1533
- const tokenChunkOverlap = DEFAULT_CHUNK_OVERLAP_TOKENS;
1534
- return await splitChunks(documentText, tokenChunkSize, tokenChunkOverlap);
1535
- }
1536
- async function processAndSaveFragments({
1537
- runtime,
1538
- documentId,
1539
- chunks,
1540
- fullDocumentText,
1541
- contentType,
1542
- agentId,
1543
- roomId,
1544
- entityId,
1545
- worldId,
1546
- concurrencyLimit,
1547
- rateLimiter,
1548
- documentTitle,
1549
- batchDelayMs = 500
1550
- }) {
1551
- let savedCount = 0;
1552
- let failedCount = 0;
1553
- const failedChunks = [];
1554
- for (let i = 0;i < chunks.length; i += concurrencyLimit) {
1555
- const batchChunks = chunks.slice(i, i + concurrencyLimit);
1556
- const batchOriginalIndices = Array.from({ length: batchChunks.length }, (_, k) => i + k);
1557
- const contextualizedChunks = await getContextualizedChunks(runtime, fullDocumentText, batchChunks, contentType, batchOriginalIndices, documentTitle);
1558
- const embeddingResults = await generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLimiter);
1559
- for (const result of embeddingResults) {
1560
- const originalChunkIndex = result.index;
1561
- if (!result.success) {
1562
- failedCount++;
1563
- failedChunks.push(originalChunkIndex);
1564
- logger3.warn(`Failed to process chunk ${originalChunkIndex} for document ${documentId}`);
1565
- continue;
1566
- }
1567
- const contextualizedChunkText = result.text;
1568
- const embedding = result.embedding;
1569
- if (!embedding || embedding.length === 0) {
1570
- failedCount++;
1571
- failedChunks.push(originalChunkIndex);
1572
- continue;
1573
- }
1574
- try {
1575
- const fragmentMemory = {
1576
- id: v4_default(),
1577
- agentId,
1578
- roomId: roomId || agentId,
1579
- worldId: worldId || agentId,
1580
- entityId: entityId || agentId,
1581
- embedding,
1582
- content: { text: contextualizedChunkText },
1583
- metadata: {
1584
- type: MemoryType.FRAGMENT,
1585
- documentId,
1586
- position: originalChunkIndex,
1587
- timestamp: Date.now(),
1588
- source: "rag-service-fragment-sync"
1589
- }
1590
- };
1591
- await runtime.createMemory(fragmentMemory, "knowledge");
1592
- savedCount++;
1593
- } catch (saveError) {
1594
- const errorMessage = saveError instanceof Error ? saveError.message : String(saveError);
1595
- logger3.error(`Error saving chunk ${originalChunkIndex} to database: ${errorMessage}`);
1596
- failedCount++;
1597
- failedChunks.push(originalChunkIndex);
1598
- }
1599
- }
1600
- if (i + concurrencyLimit < chunks.length && batchDelayMs > 0) {
1601
- await new Promise((resolve2) => setTimeout(resolve2, batchDelayMs));
1602
- }
1603
- }
1604
- return { savedCount, failedCount, failedChunks };
1605
- }
1606
- var EMBEDDING_BATCH_SIZE = 100;
1607
- async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLimiter) {
1608
- const validChunks = contextualizedChunks.filter((chunk) => chunk.success);
1609
- const failedChunks = contextualizedChunks.filter((chunk) => !chunk.success);
1610
- const results = [];
1611
- for (const chunk of failedChunks) {
1612
- results.push({
1613
- success: false,
1614
- index: chunk.index,
1615
- error: new Error("Chunk processing failed"),
1616
- text: chunk.contextualizedText
1617
- });
1618
- }
1619
- if (validChunks.length === 0) {
1620
- return results;
1621
- }
1622
- const useBatchEmbeddings = shouldUseBatchEmbeddings(runtime);
1623
- if (useBatchEmbeddings) {
1624
- return await generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results);
1625
- } else {
1626
- return await generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results);
1627
- }
1628
- }
1629
- function shouldUseBatchEmbeddings(runtime) {
1630
- const setting = runtime.getSetting("BATCH_EMBEDDINGS") ?? process.env.BATCH_EMBEDDINGS;
1631
- return setting === "true" || setting === true;
1632
- }
1633
- async function generateEmbeddingsBatch(runtime, validChunks, rateLimiter, results) {
1634
- for (let batchStart = 0;batchStart < validChunks.length; batchStart += EMBEDDING_BATCH_SIZE) {
1635
- const batchEnd = Math.min(batchStart + EMBEDDING_BATCH_SIZE, validChunks.length);
1636
- const batch = validChunks.slice(batchStart, batchEnd);
1637
- const batchTexts = batch.map((c) => c.contextualizedText);
1638
- const totalTokens = batchTexts.reduce((sum, text) => sum + estimateTokens(text), 0);
1639
- await rateLimiter(totalTokens);
1640
- try {
1641
- const embeddings = await generateBatchEmbeddingsViaRuntime(runtime, batchTexts);
1642
- for (let i = 0;i < batch.length; i++) {
1643
- const chunk = batch[i];
1644
- const embedding = embeddings[i];
1645
- if (embedding && embedding.length > 0 && embedding[0] !== 0) {
1646
- results.push({
1647
- embedding,
1648
- success: true,
1649
- index: chunk.index,
1650
- text: chunk.contextualizedText
1651
- });
1652
- } else {
1653
- results.push({
1654
- success: false,
1655
- index: chunk.index,
1656
- error: new Error("Empty or invalid embedding returned"),
1657
- text: chunk.contextualizedText
1658
- });
1659
- }
1660
- }
1661
- } catch (error) {
1662
- const errorMessage = error instanceof Error ? error.message : String(error);
1663
- logger3.error(`Batch embedding error: ${errorMessage}`);
1664
- for (const chunk of batch) {
1665
- try {
1666
- const result = await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
1667
- if (result.success && result.embedding) {
1668
- results.push({
1669
- embedding: result.embedding,
1670
- success: true,
1671
- index: chunk.index,
1672
- text: chunk.contextualizedText
1673
- });
1674
- } else {
1675
- results.push({
1676
- success: false,
1677
- index: chunk.index,
1678
- error: result.error instanceof Error ? result.error : new Error("Embedding failed"),
1679
- text: chunk.contextualizedText
1680
- });
1681
- }
1682
- } catch (fallbackError) {
1683
- results.push({
1684
- success: false,
1685
- index: chunk.index,
1686
- error: fallbackError instanceof Error ? fallbackError : new Error(String(fallbackError)),
1687
- text: chunk.contextualizedText
1688
- });
1689
- }
1690
- }
1691
- }
1692
- }
1693
- return results;
1694
- }
1695
- async function generateBatchEmbeddingsViaRuntime(runtime, texts) {
1696
- const batchResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, { texts });
1697
- const isEmbeddingBatch = (val) => Array.isArray(val) && val.length > 0 && Array.isArray(val[0]) && typeof val[0][0] === "number";
1698
- const isEmbeddingVector = (val) => Array.isArray(val) && val.length > 0 && typeof val[0] === "number";
1699
- if (isEmbeddingBatch(batchResult)) {
1700
- return batchResult;
1701
- }
1702
- if (isEmbeddingVector(batchResult)) {
1703
- const embeddings = await Promise.all(texts.map(async (text) => {
1704
- const result = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
1705
- if (isEmbeddingVector(result)) {
1706
- return result;
1707
- }
1708
- const embeddingResult = result;
1709
- return embeddingResult?.embedding ?? [];
1710
- }));
1711
- return embeddings;
1712
- }
1713
- throw new Error("Unexpected batch embedding result format");
1714
- }
1715
- async function generateEmbeddingsIndividual(runtime, validChunks, rateLimiter, results) {
1716
- for (const chunk of validChunks) {
1717
- const embeddingTokens = estimateTokens(chunk.contextualizedText);
1718
- await rateLimiter(embeddingTokens);
1719
- try {
1720
- const generateEmbeddingOperation = async () => {
1721
- return await generateEmbeddingWithValidation(runtime, chunk.contextualizedText);
1722
- };
1723
- const { embedding, success, error } = await withRateLimitRetry(generateEmbeddingOperation, `embedding generation for chunk ${chunk.index}`);
1724
- if (!success) {
1725
- results.push({
1726
- success: false,
1727
- index: chunk.index,
1728
- error,
1729
- text: chunk.contextualizedText
1730
- });
1731
- } else {
1732
- results.push({
1733
- embedding: embedding ?? undefined,
1734
- success: true,
1735
- index: chunk.index,
1736
- text: chunk.contextualizedText
1737
- });
1738
- }
1739
- } catch (error) {
1740
- const errorMessage = error instanceof Error ? error.message : String(error);
1741
- logger3.error(`Error generating embedding for chunk ${chunk.index}: ${errorMessage}`);
1742
- results.push({
1743
- success: false,
1744
- index: chunk.index,
1745
- error: error instanceof Error ? error : new Error(String(error)),
1746
- text: chunk.contextualizedText
1747
- });
1748
- }
1749
- }
1750
- return results;
1751
- }
1752
- async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle) {
1753
- const ctxEnabled = getCtxKnowledgeEnabled(runtime);
1754
- if (ctxEnabled && fullDocumentText) {
1755
- return await generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle);
1756
- }
1757
- return chunks.map((chunkText, idx) => ({
1758
- contextualizedText: chunkText,
1759
- index: batchOriginalIndices[idx],
1760
- success: true
1761
- }));
1762
- }
1763
- async function generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchIndices, _documentTitle) {
1764
- if (!chunks || chunks.length === 0) {
1765
- return [];
1766
- }
1767
- const providerLimits = await getProviderRateLimits(runtime);
1768
- const rateLimiter = createRateLimiter(providerLimits.requestsPerMinute || 60, providerLimits.tokensPerMinute, providerLimits.rateLimitEnabled);
1769
- const config = validateModelConfig(runtime);
1770
- const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
1771
- const isUsingCacheCapableModel = isUsingOpenRouter && (config.TEXT_MODEL?.toLowerCase().includes("claude") || config.TEXT_MODEL?.toLowerCase().includes("gemini"));
1772
- logger3.debug(`Contextualizing ${chunks.length} chunks with ${config.TEXT_PROVIDER}/${config.TEXT_MODEL} (cache: ${isUsingCacheCapableModel})`);
1773
- const promptConfigs = prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndices, isUsingCacheCapableModel);
1774
- const contextualizedChunks = await Promise.all(promptConfigs.map(async (item) => {
1775
- if (!item.valid) {
1776
- return {
1777
- contextualizedText: item.chunkText,
1778
- success: false,
1779
- index: item.originalIndex
1780
- };
1781
- }
1782
- const llmTokens = estimateTokens(item.chunkText + (item.prompt || ""));
1783
- await rateLimiter(llmTokens);
1784
- try {
1785
- const generateTextOperation = async () => {
1786
- if (useCustomLLM) {
1787
- if (item.usesCaching && item.promptText) {
1788
- return await generateText(runtime, item.promptText, item.systemPrompt, {
1789
- cacheDocument: item.fullDocumentTextForContext,
1790
- cacheOptions: { type: "ephemeral" },
1791
- autoCacheContextualRetrieval: true
1792
- });
1793
- } else if (item.prompt) {
1794
- return await generateText(runtime, item.prompt);
1795
- }
1796
- throw new Error("Missing prompt for text generation");
1797
- } else {
1798
- if (item.usesCaching && item.promptText) {
1799
- const combinedPrompt = item.systemPrompt ? `${item.systemPrompt}
1800
-
1801
- ${item.promptText}` : item.promptText;
1802
- return await runtime.useModel(ModelType.TEXT_LARGE, {
1803
- prompt: combinedPrompt
1804
- });
1805
- } else if (item.prompt) {
1806
- return await runtime.useModel(ModelType.TEXT_LARGE, {
1807
- prompt: item.prompt
1808
- });
1809
- }
1810
- throw new Error("Missing prompt for text generation");
1811
- }
1812
- };
1813
- const llmResponse = await withRateLimitRetry(generateTextOperation, `context generation for chunk ${item.originalIndex}`);
1814
- const generatedContext = typeof llmResponse === "string" ? llmResponse : llmResponse.text;
1815
- const contextualizedText = getChunkWithContext(item.chunkText, generatedContext);
1816
- return {
1817
- contextualizedText,
1818
- success: true,
1819
- index: item.originalIndex
1820
- };
1821
- } catch (error) {
1822
- const errorMessage = error instanceof Error ? error.message : String(error);
1823
- logger3.error(`Error generating context for chunk ${item.originalIndex}: ${errorMessage}`);
1824
- return {
1825
- contextualizedText: item.chunkText,
1826
- success: false,
1827
- index: item.originalIndex
1828
- };
1829
- }
1830
- }));
1831
- return contextualizedChunks;
1832
- }
1833
- function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndices, isUsingCacheCapableModel = false) {
1834
- return chunks.map((chunkText, idx) => {
1835
- const originalIndex = batchIndices ? batchIndices[idx] : idx;
1836
- try {
1837
- if (isUsingCacheCapableModel) {
1838
- const cachingPromptInfo = contentType ? getCachingPromptForMimeType(contentType, chunkText) : getCachingContextualizationPrompt(chunkText);
1839
- if (cachingPromptInfo.prompt.startsWith("Error:")) {
1840
- return {
1841
- originalIndex,
1842
- chunkText,
1843
- valid: false,
1844
- usesCaching: false
1845
- };
1846
- }
1847
- return {
1848
- valid: true,
1849
- originalIndex,
1850
- chunkText,
1851
- usesCaching: true,
1852
- systemPrompt: cachingPromptInfo.systemPrompt,
1853
- promptText: cachingPromptInfo.prompt,
1854
- fullDocumentTextForContext: fullDocumentText
1855
- };
1856
- } else {
1857
- const prompt = contentType ? getPromptForMimeType(contentType, fullDocumentText, chunkText) : getContextualizationPrompt(fullDocumentText, chunkText);
1858
- if (prompt.startsWith("Error:")) {
1859
- return {
1860
- prompt: null,
1861
- originalIndex,
1862
- chunkText,
1863
- valid: false,
1864
- usesCaching: false
1865
- };
1866
- }
1867
- return {
1868
- prompt,
1869
- originalIndex,
1870
- chunkText,
1871
- valid: true,
1872
- usesCaching: false
1873
- };
1874
- }
1875
- } catch (error) {
1876
- const errorMessage = error instanceof Error ? error.message : String(error);
1877
- logger3.error(`Error preparing prompt for chunk ${originalIndex}: ${errorMessage}`);
1878
- return {
1879
- prompt: null,
1880
- originalIndex,
1881
- chunkText,
1882
- valid: false,
1883
- usesCaching: false
1884
- };
1885
- }
1886
- });
1887
- }
1888
- async function generateEmbeddingWithValidation(runtime, text) {
1889
- try {
1890
- const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, { text });
1891
- const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
1892
- if (!embedding || embedding.length === 0) {
1893
- return { embedding: null, success: false, error: new Error("Zero vector detected") };
1894
- }
1895
- return { embedding, success: true };
1896
- } catch (error) {
1897
- return {
1898
- embedding: null,
1899
- success: false,
1900
- error: error instanceof Error ? error : new Error(String(error))
1901
- };
1902
- }
1903
- }
1904
- async function withRateLimitRetry(operation, errorContext, retryDelay) {
1905
- try {
1906
- return await operation();
1907
- } catch (error) {
1908
- const errorWithStatus = error;
1909
- if (errorWithStatus.status === 429) {
1910
- const delay = retryDelay || errorWithStatus.headers?.["retry-after"] || 5;
1911
- await new Promise((resolve2) => setTimeout(resolve2, delay * 1000));
1912
- try {
1913
- return await operation();
1914
- } catch (retryError) {
1915
- const retryErrorMessage = retryError instanceof Error ? retryError.message : String(retryError);
1916
- logger3.error(`Failed after retry for ${errorContext}: ${retryErrorMessage}`);
1917
- throw retryError;
1918
- }
1919
- }
1920
- throw error;
1921
- }
1922
- }
1923
- function createRateLimiter(requestsPerMinute, tokensPerMinute, rateLimitEnabled = true) {
1924
- const requestTimes = [];
1925
- const tokenUsage = [];
1926
- const intervalMs = 60 * 1000;
1927
- return async function rateLimiter(estimatedTokens = 1000) {
1928
- if (!rateLimitEnabled)
1929
- return;
1930
- const now = Date.now();
1931
- while (requestTimes.length > 0 && now - requestTimes[0] > intervalMs) {
1932
- requestTimes.shift();
1933
- }
1934
- while (tokenUsage.length > 0 && now - tokenUsage[0].timestamp > intervalMs) {
1935
- tokenUsage.shift();
1936
- }
1937
- const currentTokens = tokenUsage.reduce((sum, usage) => sum + usage.tokens, 0);
1938
- const requestLimitExceeded = requestTimes.length >= requestsPerMinute;
1939
- const tokenLimitExceeded = tokensPerMinute && currentTokens + estimatedTokens > tokensPerMinute;
1940
- if (requestLimitExceeded || tokenLimitExceeded) {
1941
- let timeToWait = 0;
1942
- if (requestLimitExceeded) {
1943
- timeToWait = Math.max(timeToWait, requestTimes[0] + intervalMs - now);
1944
- }
1945
- if (tokenLimitExceeded && tokenUsage.length > 0) {
1946
- timeToWait = Math.max(timeToWait, tokenUsage[0].timestamp + intervalMs - now);
1947
- }
1948
- if (timeToWait > 0) {
1949
- await new Promise((resolve2) => setTimeout(resolve2, timeToWait));
1950
- }
1951
- }
1952
- requestTimes.push(now);
1953
- if (tokensPerMinute) {
1954
- tokenUsage.push({ timestamp: now, tokens: estimatedTokens });
1955
- }
1956
- };
1957
- }
1958
-
1959
- // service.ts
1960
- class KnowledgeService extends Service {
1961
- static serviceType = "knowledge";
1962
- config = {};
1963
- capabilityDescription = "Provides Retrieval Augmented Generation capabilities, including knowledge upload and querying.";
1964
- knowledgeProcessingSemaphore;
1965
- constructor(runtime, _config) {
1966
- super(runtime);
1967
- this.knowledgeProcessingSemaphore = new Semaphore(10);
1968
- }
1969
- async loadInitialDocuments() {
1970
- logger4.info(`Loading documents on startup for agent ${this.runtime.agentId}`);
1971
- try {
1972
- await new Promise((resolve2) => setTimeout(resolve2, 1000));
1973
- const knowledgePathSetting = this.runtime.getSetting("KNOWLEDGE_PATH");
1974
- const knowledgePath = typeof knowledgePathSetting === "string" ? knowledgePathSetting : undefined;
1975
- const result = await loadDocsFromPath(this, this.runtime.agentId, undefined, knowledgePath);
1976
- if (result.successful > 0) {
1977
- logger4.info(`Loaded ${result.successful} documents on startup`);
1978
- }
1979
- } catch (error) {
1980
- logger4.error({ error }, "Error loading documents on startup");
1981
- }
1982
- }
1983
- static async start(runtime) {
1984
- logger4.info(`Starting Knowledge service for agent: ${runtime.agentId}`);
1985
- const validatedConfig = validateModelConfig(runtime);
1986
- const ctxEnabled = validatedConfig.CTX_KNOWLEDGE_ENABLED;
1987
- if (ctxEnabled) {
1988
- logger4.info(`Contextual Knowledge enabled: ${validatedConfig.EMBEDDING_PROVIDER || "auto"} embeddings, ${validatedConfig.TEXT_PROVIDER} text generation`);
1989
- logger4.info(`Text model: ${validatedConfig.TEXT_MODEL}`);
1990
- } else {
1991
- const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
1992
- logger4.info("Basic Embedding mode - documents will not be enriched with context");
1993
- logger4.info("To enable contextual enrichment: Set CTX_KNOWLEDGE_ENABLED=true and configure TEXT_PROVIDER/TEXT_MODEL");
1994
- if (usingPluginOpenAI) {
1995
- logger4.info("Using plugin-openai configuration for embeddings");
1996
- } else {
1997
- logger4.info(`Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}`);
1998
- }
1999
- }
2000
- const service = new KnowledgeService(runtime);
2001
- service.config = validatedConfig;
2002
- if (service.config.LOAD_DOCS_ON_STARTUP) {
2003
- service.loadInitialDocuments().catch((error) => {
2004
- logger4.error({ error }, "Error loading initial documents");
2005
- });
2006
- }
2007
- if (service.runtime.character?.knowledge && service.runtime.character.knowledge.length > 0) {
2008
- const stringKnowledge = service.runtime.character.knowledge.map((item) => {
2009
- const itemAny = item;
2010
- if (itemAny?.item?.case === "path" && typeof itemAny.item.value === "string") {
2011
- return itemAny.item.value;
2012
- }
2013
- if (typeof itemAny?.path === "string") {
2014
- return itemAny.path;
2015
- }
2016
- if (typeof item === "string") {
2017
- return item;
2018
- }
2019
- return null;
2020
- }).filter((item) => item !== null);
2021
- await service.processCharacterKnowledge(stringKnowledge).catch((err) => {
2022
- logger4.error({ error: err }, "Error processing character knowledge");
2023
- });
2024
- }
2025
- return service;
2026
- }
2027
- static async stop(runtime) {
2028
- logger4.info(`Stopping Knowledge service for agent: ${runtime.agentId}`);
2029
- const service = runtime.getService(KnowledgeService.serviceType);
2030
- if (!service) {
2031
- logger4.warn(`KnowledgeService not found for agent ${runtime.agentId} during stop.`);
2032
- }
2033
- if (service instanceof KnowledgeService) {
2034
- await service.stop();
2035
- }
2036
- }
2037
- async stop() {
2038
- logger4.info(`Knowledge service stopping for agent: ${this.runtime.character?.name}`);
2039
- }
2040
- async addKnowledge(options) {
2041
- const agentId = options.agentId || this.runtime.agentId;
2042
- const contentBasedId = generateContentBasedId(options.content, agentId, {
2043
- includeFilename: options.originalFilename,
2044
- contentType: options.contentType,
2045
- maxChars: 2000
2046
- });
2047
- logger4.info(`Processing "${options.originalFilename}" (${options.contentType})`);
2048
- try {
2049
- const existingDocument = await this.runtime.getMemoryById(contentBasedId);
2050
- if (existingDocument && existingDocument.metadata?.type === MemoryType2.DOCUMENT) {
2051
- logger4.info(`"${options.originalFilename}" already exists - skipping`);
2052
- const fragments = await this.runtime.getMemories({
2053
- tableName: "knowledge"
2054
- });
2055
- const relatedFragments = fragments.filter((f) => f.metadata?.type === MemoryType2.FRAGMENT && f.metadata.documentId === contentBasedId);
2056
- return {
2057
- clientDocumentId: contentBasedId,
2058
- storedDocumentMemoryId: existingDocument.id,
2059
- fragmentCount: relatedFragments.length
2060
- };
2061
- }
2062
- } catch (error) {
2063
- logger4.debug(`Document ${contentBasedId} not found or error checking existence, proceeding with processing: ${error instanceof Error ? error.message : String(error)}`);
2064
- }
2065
- return this.processDocument({
2066
- ...options,
2067
- clientDocumentId: contentBasedId
2068
- });
2069
- }
2070
- async processDocument({
2071
- agentId: passedAgentId,
2072
- clientDocumentId,
2073
- contentType,
2074
- originalFilename,
2075
- worldId,
2076
- content,
2077
- roomId,
2078
- entityId,
2079
- metadata
2080
- }) {
2081
- const agentId = passedAgentId || this.runtime.agentId;
2082
- try {
2083
- logger4.debug(`Processing document ${originalFilename} (type: ${contentType}) for agent: ${agentId}`);
2084
- let fileBuffer = null;
2085
- let extractedText;
2086
- let documentContentToStore;
2087
- const isPdfFile = contentType === "application/pdf" || originalFilename.toLowerCase().endsWith(".pdf");
2088
- if (isPdfFile) {
2089
- try {
2090
- fileBuffer = Buffer.from(content, "base64");
2091
- } catch (e) {
2092
- logger4.error({ error: e }, `Failed to convert base64 to buffer for ${originalFilename}`);
2093
- throw new Error(`Invalid base64 content for PDF file ${originalFilename}`);
2094
- }
2095
- extractedText = await extractTextFromDocument(fileBuffer, contentType, originalFilename);
2096
- documentContentToStore = content;
2097
- } else if (isBinaryContentType(contentType, originalFilename)) {
2098
- try {
2099
- fileBuffer = Buffer.from(content, "base64");
2100
- } catch (e) {
2101
- logger4.error({ error: e }, `Failed to convert base64 to buffer for ${originalFilename}`);
2102
- throw new Error(`Invalid base64 content for binary file ${originalFilename}`);
2103
- }
2104
- extractedText = await extractTextFromDocument(fileBuffer, contentType, originalFilename);
2105
- documentContentToStore = extractedText;
2106
- } else {
2107
- if (looksLikeBase64(content)) {
2108
- try {
2109
- const decodedBuffer = Buffer.from(content, "base64");
2110
- const decodedText = decodedBuffer.toString("utf8");
2111
- const invalidCharCount = (decodedText.match(/\ufffd/g) || []).length;
2112
- const textLength = decodedText.length;
2113
- if (invalidCharCount > 0 && invalidCharCount / textLength > 0.1) {
2114
- throw new Error("Decoded content contains too many invalid characters");
2115
- }
2116
- logger4.debug(`Successfully decoded base64 content for text file: ${originalFilename}`);
2117
- extractedText = decodedText;
2118
- documentContentToStore = decodedText;
2119
- } catch (e) {
2120
- logger4.error({ error: e instanceof Error ? e : new Error(String(e)) }, `Failed to decode base64 for ${originalFilename}`);
2121
- throw new Error(`File ${originalFilename} appears to be corrupted or incorrectly encoded`);
2122
- }
2123
- } else {
2124
- logger4.debug(`Treating content as plain text for file: ${originalFilename}`);
2125
- extractedText = content;
2126
- documentContentToStore = content;
2127
- }
2128
- }
2129
- if (!extractedText || extractedText.trim() === "") {
2130
- throw new Error(`No text content extracted from ${originalFilename} (type: ${contentType})`);
2131
- }
2132
- const documentMemory = createDocumentMemory({
2133
- text: documentContentToStore,
2134
- agentId,
2135
- clientDocumentId,
2136
- originalFilename,
2137
- contentType,
2138
- worldId,
2139
- fileSize: fileBuffer ? fileBuffer.length : extractedText.length,
2140
- documentId: clientDocumentId,
2141
- customMetadata: metadata
2142
- });
2143
- const memoryWithScope = {
2144
- ...documentMemory,
2145
- id: clientDocumentId,
2146
- agentId,
2147
- roomId: roomId || agentId,
2148
- entityId: entityId || agentId
2149
- };
2150
- await this.runtime.createMemory(memoryWithScope, "documents");
2151
- const fragmentCount = await processFragmentsSynchronously({
2152
- runtime: this.runtime,
2153
- documentId: clientDocumentId,
2154
- fullDocumentText: extractedText,
2155
- agentId,
2156
- contentType,
2157
- roomId: roomId || agentId,
2158
- entityId: entityId || agentId,
2159
- worldId: worldId || agentId,
2160
- documentTitle: originalFilename
2161
- });
2162
- logger4.debug(`"${originalFilename}" stored with ${fragmentCount} fragments`);
2163
- return {
2164
- clientDocumentId,
2165
- storedDocumentMemoryId: memoryWithScope.id,
2166
- fragmentCount
2167
- };
2168
- } catch (error) {
2169
- logger4.error({ error }, `Error processing document ${originalFilename}`);
2170
- throw error;
2171
- }
2172
- }
2173
- async checkExistingKnowledge(knowledgeId) {
2174
- const existingDocument = await this.runtime.getMemoryById(knowledgeId);
2175
- return !!existingDocument;
2176
- }
2177
- async getKnowledge(message, scope) {
2178
- if (!message?.content?.text || message?.content?.text.trim().length === 0) {
2179
- logger4.warn("Invalid or empty message content for knowledge query");
2180
- return [];
2181
- }
2182
- const embedding = await this.runtime.useModel(ModelType2.TEXT_EMBEDDING, {
2183
- text: message.content.text
2184
- });
2185
- const filterScope = {};
2186
- if (scope?.roomId)
2187
- filterScope.roomId = scope.roomId;
2188
- if (scope?.worldId)
2189
- filterScope.worldId = scope.worldId;
2190
- if (scope?.entityId)
2191
- filterScope.entityId = scope.entityId;
2192
- const fragments = await this.runtime.searchMemories({
2193
- tableName: "knowledge",
2194
- embedding,
2195
- query: message.content.text,
2196
- ...filterScope,
2197
- count: 20,
2198
- match_threshold: 0.1
2199
- });
2200
- return fragments.filter((fragment) => fragment.id !== undefined).map((fragment) => ({
2201
- id: fragment.id,
2202
- content: fragment.content,
2203
- similarity: fragment.similarity,
2204
- metadata: fragment.metadata,
2205
- worldId: fragment.worldId
2206
- }));
2207
- }
2208
- async enrichConversationMemoryWithRAG(memoryId, ragMetadata) {
2209
- try {
2210
- const existingMemory = await this.runtime.getMemoryById(memoryId);
2211
- if (!existingMemory) {
2212
- logger4.warn(`Cannot enrich memory ${memoryId} - memory not found`);
2213
- return;
2214
- }
2215
- const ragUsageData = {
2216
- retrievedFragments: ragMetadata.retrievedFragments,
2217
- queryText: ragMetadata.queryText,
2218
- totalFragments: ragMetadata.totalFragments,
2219
- retrievalTimestamp: ragMetadata.retrievalTimestamp,
2220
- usedInResponse: true
2221
- };
2222
- const updatedMetadata = {
2223
- ...existingMemory.metadata,
2224
- knowledgeUsed: true,
2225
- ragUsage: JSON.stringify(ragUsageData),
2226
- timestamp: existingMemory.metadata?.timestamp ?? Date.now(),
2227
- type: MemoryType2.CUSTOM
2228
- };
2229
- await this.runtime.updateMemory({
2230
- id: memoryId,
2231
- metadata: updatedMetadata
2232
- });
2233
- } catch (error) {
2234
- const errorMessage = error instanceof Error ? error.message : String(error);
2235
- logger4.warn(`Failed to enrich conversation memory ${memoryId} with RAG data: ${errorMessage}`);
2236
- }
2237
- }
2238
- pendingRAGEnrichment = [];
2239
- setPendingRAGMetadata(ragMetadata) {
2240
- const now = Date.now();
2241
- this.pendingRAGEnrichment = this.pendingRAGEnrichment.filter((entry) => now - entry.timestamp < 30000);
2242
- this.pendingRAGEnrichment.push({
2243
- ragMetadata,
2244
- timestamp: now
2245
- });
2246
- }
2247
- async enrichRecentMemoriesWithPendingRAG() {
2248
- if (this.pendingRAGEnrichment.length === 0) {
2249
- return;
2250
- }
2251
- try {
2252
- const recentMemories = await this.runtime.getMemories({
2253
- tableName: "messages",
2254
- count: 10
2255
- });
2256
- const now = Date.now();
2257
- const recentConversationMemories = recentMemories.filter((memory) => memory.metadata?.type === "message" && now - (memory.createdAt || 0) < 1e4 && !(memory.metadata && ("ragUsage" in memory.metadata) && memory.metadata.ragUsage)).sort((a, b) => (b.createdAt || 0) - (a.createdAt || 0));
2258
- for (const pendingEntry of this.pendingRAGEnrichment) {
2259
- const matchingMemory = recentConversationMemories.find((memory) => (memory.createdAt || 0) > pendingEntry.timestamp);
2260
- if (matchingMemory?.id) {
2261
- await this.enrichConversationMemoryWithRAG(matchingMemory.id, pendingEntry.ragMetadata);
2262
- const index = this.pendingRAGEnrichment.indexOf(pendingEntry);
2263
- if (index > -1) {
2264
- this.pendingRAGEnrichment.splice(index, 1);
2265
- }
2266
- }
2267
- }
2268
- } catch (error) {
2269
- const errorMessage = error instanceof Error ? error.message : String(error);
2270
- logger4.warn(`Error enriching recent memories with RAG data: ${errorMessage}`);
2271
- }
2272
- }
2273
- async processCharacterKnowledge(items) {
2274
- await new Promise((resolve2) => setTimeout(resolve2, 1000));
2275
- logger4.info(`Processing ${items.length} character knowledge items`);
2276
- const processingPromises = items.map(async (item) => {
2277
- await this.knowledgeProcessingSemaphore.acquire();
2278
- try {
2279
- const knowledgeId = generateContentBasedId(item, this.runtime.agentId, {
2280
- maxChars: 2000,
2281
- includeFilename: "character-knowledge"
2282
- });
2283
- if (await this.checkExistingKnowledge(knowledgeId)) {
2284
- return;
2285
- }
2286
- let metadata = {
2287
- type: MemoryType2.CUSTOM,
2288
- timestamp: Date.now(),
2289
- source: "character"
2290
- };
2291
- const pathMatch = item.match(/^Path: (.+?)(?:\n|\r\n)/);
2292
- if (pathMatch) {
2293
- const filePath = pathMatch[1].trim();
2294
- const extension = filePath.split(".").pop() || "";
2295
- const filename = filePath.split("/").pop() || "";
2296
- const title = filename.replace(`.${extension}`, "");
2297
- metadata = {
2298
- ...metadata,
2299
- path: filePath,
2300
- filename,
2301
- fileExt: extension,
2302
- title,
2303
- fileType: `text/${extension || "plain"}`,
2304
- fileSize: item.length
2305
- };
2306
- }
2307
- await this._internalAddKnowledge({
2308
- id: knowledgeId,
2309
- content: {
2310
- text: item
2311
- },
2312
- metadata
2313
- }, undefined, {
2314
- roomId: this.runtime.agentId,
2315
- entityId: this.runtime.agentId,
2316
- worldId: this.runtime.agentId
2317
- });
2318
- } catch (error) {
2319
- logger4.error({ error }, "Error processing character knowledge");
2320
- } finally {
2321
- this.knowledgeProcessingSemaphore.release();
2322
- }
2323
- });
2324
- await Promise.all(processingPromises);
2325
- }
2326
- async _internalAddKnowledge(item, options = {
2327
- targetTokens: 1500,
2328
- overlap: 200,
2329
- modelContextSize: 4096
2330
- }, scope = {
2331
- roomId: this.runtime.agentId,
2332
- entityId: this.runtime.agentId,
2333
- worldId: this.runtime.agentId
2334
- }) {
2335
- const finalScope = {
2336
- roomId: scope?.roomId ?? this.runtime.agentId,
2337
- worldId: scope?.worldId ?? this.runtime.agentId,
2338
- entityId: scope?.entityId ?? this.runtime.agentId
2339
- };
2340
- const documentMetadata = {
2341
- ...item.metadata ?? {},
2342
- type: MemoryType2.CUSTOM,
2343
- documentId: item.id
2344
- };
2345
- const documentMemory = {
2346
- id: item.id,
2347
- agentId: this.runtime.agentId,
2348
- roomId: finalScope.roomId,
2349
- worldId: finalScope.worldId,
2350
- entityId: finalScope.entityId,
2351
- content: item.content,
2352
- metadata: documentMetadata,
2353
- createdAt: Date.now()
2354
- };
2355
- const existingDocument = await this.runtime.getMemoryById(item.id);
2356
- if (existingDocument) {
2357
- await this.runtime.updateMemory({
2358
- ...documentMemory,
2359
- id: item.id
2360
- });
2361
- } else {
2362
- await this.runtime.createMemory(documentMemory, "documents");
2363
- }
2364
- const fragments = await this.splitAndCreateFragments(item, options.targetTokens, options.overlap, finalScope);
2365
- for (const fragment of fragments) {
2366
- try {
2367
- await this.processDocumentFragment(fragment);
2368
- } catch (error) {
2369
- logger4.error({ error }, `KnowledgeService: Error processing fragment ${fragment.id} for document ${item.id}`);
2370
- }
2371
- }
2372
- }
2373
- async processDocumentFragment(fragment) {
2374
- try {
2375
- await this.runtime.addEmbeddingToMemory(fragment);
2376
- await this.runtime.createMemory(fragment, "knowledge");
2377
- } catch (error) {
2378
- logger4.error({ error }, `Error processing fragment ${fragment.id}`);
2379
- throw error;
2380
- }
2381
- }
2382
- async splitAndCreateFragments(document, targetTokens, overlap, scope) {
2383
- if (!document.content.text) {
2384
- return [];
2385
- }
2386
- const text = document.content.text;
2387
- const chunks = await splitChunks2(text, targetTokens, overlap);
2388
- return chunks.map((chunk, index) => {
2389
- const fragmentIdContent = `${document.id}-fragment-${index}-${Date.now()}`;
2390
- const fragmentId = createUniqueUuid(this.runtime, fragmentIdContent);
2391
- return {
2392
- id: fragmentId,
2393
- entityId: scope.entityId,
2394
- agentId: this.runtime.agentId,
2395
- roomId: scope.roomId,
2396
- worldId: scope.worldId,
2397
- content: {
2398
- text: chunk
2399
- },
2400
- metadata: {
2401
- ...document.metadata || {},
2402
- type: MemoryType2.FRAGMENT,
2403
- documentId: document.id,
2404
- position: index,
2405
- timestamp: Date.now()
2406
- },
2407
- createdAt: Date.now()
2408
- };
2409
- });
2410
- }
2411
- async getMemories(params) {
2412
- return this.runtime.getMemories({
2413
- ...params,
2414
- agentId: this.runtime.agentId
2415
- });
2416
- }
2417
- async countMemories(params) {
2418
- const roomId = params.roomId || this.runtime.agentId;
2419
- const unique = params.unique ?? false;
2420
- const tableName = params.tableName;
2421
- return this.runtime.countMemories(roomId, unique, tableName);
2422
- }
2423
- async deleteMemory(memoryId) {
2424
- await this.runtime.deleteMemory(memoryId);
2425
- }
2426
- }
2427
-
2428
- // actions.ts
2429
- var processKnowledgeAction = {
2430
- name: "PROCESS_KNOWLEDGE",
2431
- description: "Process and store knowledge from a file path or text content into the knowledge base",
2432
- similes: [],
2433
- examples: [
2434
- [
2435
- {
2436
- name: "user",
2437
- content: {
2438
- text: "Process the document at /path/to/document.pdf"
2439
- }
2440
- },
2441
- {
2442
- name: "assistant",
2443
- content: {
2444
- text: "I'll process the document at /path/to/document.pdf and add it to my knowledge base.",
2445
- actions: ["PROCESS_KNOWLEDGE"]
2446
- }
2447
- }
2448
- ],
2449
- [
2450
- {
2451
- name: "user",
2452
- content: {
2453
- text: "Add this to your knowledge: The capital of France is Paris."
2454
- }
2455
- },
2456
- {
2457
- name: "assistant",
2458
- content: {
2459
- text: "I'll add that information to my knowledge base.",
2460
- actions: ["PROCESS_KNOWLEDGE"]
2461
- }
2462
- }
2463
- ]
2464
- ],
2465
- validate: async (runtime, message, state, options) => {
2466
- const __avTextRaw = typeof message?.content?.text === "string" ? message.content.text : "";
2467
- const __avText = __avTextRaw.toLowerCase();
2468
- const __avKeywords = ["process", "knowledge"];
2469
- const __avKeywordOk = __avKeywords.length > 0 && __avKeywords.some((kw) => kw.length > 0 && __avText.includes(kw));
2470
- const __avRegexOk = /\b(?:process|knowledge)\b/i.test(__avText);
2471
- const __avSource = String(message?.content?.source ?? message?.source ?? "");
2472
- const __avExpectedSource = "";
2473
- const __avSourceOk = __avExpectedSource ? __avSource === __avExpectedSource : Boolean(__avSource || state || runtime?.agentId || runtime?.getService);
2474
- const __avOptions = options && typeof options === "object" ? options : {};
2475
- const __avInputOk = __avText.trim().length > 0 || Object.keys(__avOptions).length > 0 || Boolean(message?.content && typeof message.content === "object");
2476
- if (!(__avKeywordOk && __avRegexOk && __avSourceOk && __avInputOk)) {
2477
- return false;
2478
- }
2479
- const __avLegacyValidate = async (runtime2, message2, _state) => {
2480
- const text = message2.content.text?.toLowerCase() || "";
2481
- const knowledgeKeywords = [
2482
- "process",
2483
- "add",
2484
- "upload",
2485
- "document",
2486
- "knowledge",
2487
- "learn",
2488
- "remember",
2489
- "store",
2490
- "ingest",
2491
- "file"
2492
- ];
2493
- const hasKeyword = knowledgeKeywords.some((keyword) => text.includes(keyword));
2494
- const pathPattern = /(?:\/[\w.-]+)+|(?:[a-zA-Z]:[\\/][\w\s.-]+(?:[\\/][\w\s.-]+)*)/;
2495
- const hasPath = pathPattern.test(text);
2496
- const service = runtime2.getService(KnowledgeService.serviceType);
2497
- if (!service) {
2498
- logger5.warn("Knowledge service not available for PROCESS_KNOWLEDGE action");
2499
- return false;
2500
- }
2501
- return hasKeyword || hasPath;
2502
- };
2503
- try {
2504
- return Boolean(await __avLegacyValidate(runtime, message, state, options));
2505
- } catch {
2506
- return false;
2507
- }
2508
- },
2509
- handler: async (runtime, message, _state, _options, callback) => {
2510
- try {
2511
- const service = runtime.getService(KnowledgeService.serviceType);
2512
- if (!service) {
2513
- throw new Error("Knowledge service not available");
2514
- }
2515
- const text = message.content.text || "";
2516
- const pathPattern = /(?:\/[\w.-]+)+|(?:[a-zA-Z]:[\\/][\w\s.-]+(?:[\\/][\w\s.-]+)*)/;
2517
- const pathMatch = text.match(pathPattern);
2518
- let response;
2519
- if (pathMatch) {
2520
- const filePath = pathMatch[0];
2521
- if (!fs2.existsSync(filePath)) {
2522
- response = {
2523
- text: `I couldn't find the file at ${filePath}. Please check the path and try again.`
2524
- };
2525
- if (callback) {
2526
- await callback(response);
2527
- }
2528
- return;
2529
- }
2530
- const fileBuffer = fs2.readFileSync(filePath);
2531
- const fileName = path2.basename(filePath);
2532
- const fileExt = path2.extname(filePath).toLowerCase();
2533
- let contentType = "text/plain";
2534
- if (fileExt === ".pdf")
2535
- contentType = "application/pdf";
2536
- else if (fileExt === ".docx")
2537
- contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
2538
- else if (fileExt === ".doc")
2539
- contentType = "application/msword";
2540
- else if ([".txt", ".md", ".tson", ".xml", ".csv"].includes(fileExt))
2541
- contentType = "text/plain";
2542
- const knowledgeOptions = {
2543
- clientDocumentId: stringToUuid(runtime.agentId + fileName + Date.now()),
2544
- contentType,
2545
- originalFilename: fileName,
2546
- worldId: runtime.agentId,
2547
- content: fileBuffer.toString("base64"),
2548
- roomId: message.roomId,
2549
- entityId: message.entityId
2550
- };
2551
- const result = await service.addKnowledge(knowledgeOptions);
2552
- response = {
2553
- text: `I've successfully processed the document "${fileName}". It has been split into ${result?.fragmentCount || 0} searchable fragments and added to my knowledge base.`
2554
- };
2555
- } else {
2556
- const knowledgeContent = text.replace(/^(add|store|remember|process|learn)\s+(this|that|the following)?:?\s*/i, "").trim();
2557
- if (!knowledgeContent) {
2558
- response = {
2559
- text: "I need some content to add to my knowledge base. Please provide text or a file path."
2560
- };
2561
- if (callback) {
2562
- await callback(response);
2563
- }
2564
- return;
2565
- }
2566
- const knowledgeOptions = {
2567
- clientDocumentId: stringToUuid(`${runtime.agentId}text${Date.now()}user-knowledge`),
2568
- contentType: "text/plain",
2569
- originalFilename: "user-knowledge.txt",
2570
- worldId: runtime.agentId,
2571
- content: knowledgeContent,
2572
- roomId: message.roomId,
2573
- entityId: message.entityId
2574
- };
2575
- await service.addKnowledge(knowledgeOptions);
2576
- response = {
2577
- text: `I've added that information to my knowledge base. It has been stored and indexed for future reference.`
2578
- };
2579
- }
2580
- if (callback) {
2581
- await callback(response);
2582
- }
2583
- return { success: true, text: response.text };
2584
- } catch (error) {
2585
- logger5.error({ error }, "Error in PROCESS_KNOWLEDGE action");
2586
- const errorResponse = {
2587
- text: `I encountered an error while processing the knowledge: ${error instanceof Error ? error.message : String(error)}`
2588
- };
2589
- if (callback) {
2590
- await callback(errorResponse);
2591
- }
2592
- return { success: false, error: error instanceof Error ? error.message : String(error) };
2593
- }
2594
- }
2595
- };
2596
- var searchKnowledgeAction = {
2597
- name: "SEARCH_KNOWLEDGE",
2598
- description: "Search the knowledge base for specific information",
2599
- similes: [
2600
- "search knowledge",
2601
- "find information",
2602
- "look up",
2603
- "query knowledge base",
2604
- "search documents",
2605
- "find in knowledge"
2606
- ],
2607
- examples: [
2608
- [
2609
- {
2610
- name: "user",
2611
- content: {
2612
- text: "Search your knowledge for information about quantum computing"
2613
- }
2614
- },
2615
- {
2616
- name: "assistant",
2617
- content: {
2618
- text: "I'll search my knowledge base for information about quantum computing.",
2619
- actions: ["SEARCH_KNOWLEDGE"]
2620
- }
2621
- }
2622
- ]
2623
- ],
2624
- validate: async (runtime, message, state, options) => {
2625
- const __avTextRaw = typeof message?.content?.text === "string" ? message.content.text : "";
2626
- const __avText = __avTextRaw.toLowerCase();
2627
- const __avKeywords = ["search", "knowledge"];
2628
- const __avKeywordOk = __avKeywords.length > 0 && __avKeywords.some((kw) => kw.length > 0 && __avText.includes(kw));
2629
- const __avRegexOk = /\b(?:search|knowledge)\b/i.test(__avText);
2630
- const __avSource = String(message?.content?.source ?? message?.source ?? "");
2631
- const __avExpectedSource = "";
2632
- const __avSourceOk = __avExpectedSource ? __avSource === __avExpectedSource : Boolean(__avSource || state || runtime?.agentId || runtime?.getService);
2633
- const __avOptions = options && typeof options === "object" ? options : {};
2634
- const __avInputOk = __avText.trim().length > 0 || Object.keys(__avOptions).length > 0 || Boolean(message?.content && typeof message.content === "object");
2635
- if (!(__avKeywordOk && __avRegexOk && __avSourceOk && __avInputOk)) {
2636
- return false;
2637
- }
2638
- const __avLegacyValidate = async (runtime2, message2, _state) => {
2639
- const text = message2.content.text?.toLowerCase() || "";
2640
- const searchKeywords = ["search", "find", "look up", "query", "what do you know about"];
2641
- const knowledgeKeywords = ["knowledge", "information", "document", "database"];
2642
- const hasSearchKeyword = searchKeywords.some((keyword) => text.includes(keyword));
2643
- const hasKnowledgeKeyword = knowledgeKeywords.some((keyword) => text.includes(keyword));
2644
- const service = runtime2.getService(KnowledgeService.serviceType);
2645
- if (!service) {
2646
- return false;
2647
- }
2648
- return hasSearchKeyword && hasKnowledgeKeyword;
2649
- };
2650
- try {
2651
- return Boolean(await __avLegacyValidate(runtime, message, state, options));
2652
- } catch {
2653
- return false;
2654
- }
2655
- },
2656
- handler: async (runtime, message, _state, _options, callback) => {
2657
- try {
2658
- const service = runtime.getService(KnowledgeService.serviceType);
2659
- if (!service) {
2660
- throw new Error("Knowledge service not available");
2661
- }
2662
- const text = message.content.text || "";
2663
- const query = text.replace(/^(search|find|look up|query)\s+(your\s+)?knowledge\s+(base\s+)?(for\s+)?/i, "").trim();
2664
- if (!query) {
2665
- const response2 = {
2666
- text: "What would you like me to search for in my knowledge base?"
2667
- };
2668
- if (callback) {
2669
- await callback(response2);
2670
- }
2671
- return;
2672
- }
2673
- const searchMessage = {
2674
- ...message,
2675
- content: {
2676
- text: query
2677
- }
2678
- };
2679
- const results = await service.getKnowledge(searchMessage);
2680
- let response;
2681
- if (results.length === 0) {
2682
- response = {
2683
- text: `I couldn't find any information about "${query}" in my knowledge base.`
2684
- };
2685
- } else {
2686
- const formattedResults = results.slice(0, 3).map((item, index) => `${index + 1}. ${item.content.text}`).join(`
2687
-
2688
- `);
2689
- response = {
2690
- text: `Here's what I found about "${query}":
2691
-
2692
- ${formattedResults}`
2693
- };
2694
- }
2695
- if (callback) {
2696
- await callback(response);
2697
- }
2698
- return { success: true, text: response.text };
2699
- } catch (error) {
2700
- logger5.error({ error }, "Error in SEARCH_KNOWLEDGE action");
2701
- const errorResponse = {
2702
- text: `I encountered an error while searching the knowledge base: ${error instanceof Error ? error.message : String(error)}`
2703
- };
2704
- if (callback) {
2705
- await callback(errorResponse);
2706
- }
2707
- return { success: false, error: error instanceof Error ? error.message : String(error) };
2708
- }
2709
- }
2710
- };
2711
- var knowledgeActions = [processKnowledgeAction, searchKnowledgeAction];
2712
-
2713
- // documents-provider.ts
2714
- import { addHeader, logger as logger6, MemoryType as MemoryType3 } from "@elizaos/core";
2715
- var documentsProvider = {
2716
- name: "AVAILABLE_DOCUMENTS",
2717
- description: "List of documents available in the knowledge base. Shows which documents the agent can reference and retrieve information from.",
2718
- dynamic: true,
2719
- get: async (runtime, _message, _state) => {
2720
- try {
2721
- const knowledgeService = runtime.getService("knowledge");
2722
- if (!knowledgeService) {
2723
- logger6.warn("Knowledge service not available for documents provider");
2724
- return {
2725
- data: { documents: [] },
2726
- values: {
2727
- documentsCount: 0,
2728
- documents: "",
2729
- availableDocuments: ""
2730
- },
2731
- text: ""
2732
- };
2733
- }
2734
- const allMemories = await knowledgeService.getMemories({
2735
- tableName: "documents",
2736
- roomId: runtime.agentId,
2737
- count: 100
2738
- });
2739
- const documents = allMemories.filter((memory) => memory.metadata?.type === MemoryType3.DOCUMENT);
2740
- if (!documents || documents.length === 0) {
2741
- return {
2742
- data: { documents: [] },
2743
- values: {
2744
- documentsCount: 0,
2745
- documents: "",
2746
- availableDocuments: ""
2747
- },
2748
- text: ""
2749
- };
2750
- }
2751
- const documentsList = documents.map((doc, index) => {
2752
- const metadata = doc.metadata;
2753
- const filename = metadata?.filename || metadata?.title || `Document ${index + 1}`;
2754
- const fileType = metadata?.fileExt || metadata?.fileType || "";
2755
- const source = metadata?.source || "upload";
2756
- const fileSize = metadata?.fileSize;
2757
- const parts = [filename];
2758
- if (fileType) {
2759
- parts.push(fileType);
2760
- }
2761
- if (fileSize) {
2762
- const sizeKB = Math.round(fileSize / 1024);
2763
- if (sizeKB > 1024) {
2764
- parts.push(`${Math.round(sizeKB / 1024)}MB`);
2765
- } else {
2766
- parts.push(`${sizeKB}KB`);
2767
- }
2768
- }
2769
- if (source && source !== "upload") {
2770
- parts.push(`from ${source}`);
2771
- }
2772
- return parts.join(" - ");
2773
- }).join(`
2774
- `);
2775
- const documentsText = addHeader("# Available Documents", `${documents.length} document(s) in knowledge base:
2776
- ${documentsList}`);
2777
- return {
2778
- data: {
2779
- documents: documents.map((doc) => ({
2780
- id: doc.id,
2781
- filename: doc.metadata?.filename || doc.metadata?.title,
2782
- fileType: doc.metadata?.fileType || doc.metadata?.fileExt,
2783
- source: doc.metadata?.source
2784
- })),
2785
- count: documents.length
2786
- },
2787
- values: {
2788
- documentsCount: documents.length,
2789
- documents: documentsList,
2790
- availableDocuments: documentsText
2791
- },
2792
- text: documentsText
2793
- };
2794
- } catch (error) {
2795
- logger6.error("Error in documents provider:", error instanceof Error ? error.message : String(error));
2796
- return {
2797
- data: { documents: [], error: error instanceof Error ? error.message : String(error) },
2798
- values: {
2799
- documentsCount: 0,
2800
- documents: "",
2801
- availableDocuments: ""
2802
- },
2803
- text: ""
2804
- };
2805
- }
2806
- }
2807
- };
2808
-
2809
- // provider.ts
2810
- import { addHeader as addHeader2 } from "@elizaos/core";
2811
- var knowledgeProvider = {
2812
- name: "KNOWLEDGE",
2813
- description: "Knowledge from the knowledge base that the agent knows, retrieved whenever the agent needs to answer a question about their expertise.",
2814
- dynamic: true,
2815
- get: async (runtime, message) => {
2816
- const knowledgeService = runtime.getService("knowledge");
2817
- const knowledgeData = await knowledgeService?.getKnowledge(message);
2818
- if (!knowledgeData || knowledgeData.length === 0) {
2819
- return {
2820
- text: "",
2821
- values: { knowledge: "", knowledgeUsed: false },
2822
- data: { knowledge: "", ragMetadata: null, knowledgeUsed: false }
2823
- };
2824
- }
2825
- const firstFiveKnowledgeItems = knowledgeData.slice(0, 5);
2826
- let knowledge = addHeader2("# Knowledge", firstFiveKnowledgeItems.map((item) => `- ${item.content.text}`).join(`
2827
- `));
2828
- const tokenLength = 3.5;
2829
- const maxChars = 4000 * tokenLength;
2830
- if (knowledge.length > maxChars) {
2831
- knowledge = knowledge.slice(0, maxChars);
1
+ // index.ts
2
+ import { createKnowledgePlugin as createNativeKnowledgePlugin } from "@elizaos/core";
3
+
4
+ // routes.ts
5
+ import fs from "node:fs";
6
+ import path from "node:path";
7
+ import { createUniqueUuid, KnowledgeService, logger, MemoryType, ModelType } from "@elizaos/core";
8
+ import multer from "multer";
9
+
10
+ // utils.ts
11
+ import { Buffer } from "node:buffer";
12
+ import * as mammoth from "mammoth";
13
+ import { extractText } from "unpdf";
14
+ var MAX_FALLBACK_SIZE_BYTES = 5 * 1024 * 1024;
15
+ function normalizeS3Url(url) {
16
+ try {
17
+ const urlObj = new URL(url);
18
+ return `${urlObj.origin}${urlObj.pathname}`;
19
+ } catch {
20
+ return url;
21
+ }
22
+ }
23
+ async function fetchUrlContent(url) {
24
+ try {
25
+ const controller = new AbortController;
26
+ const timeoutId = setTimeout(() => controller.abort(), 30000);
27
+ const response = await fetch(url, {
28
+ signal: controller.signal,
29
+ headers: {
30
+ "User-Agent": "Eliza-Knowledge-Plugin/1.0"
31
+ }
32
+ });
33
+ clearTimeout(timeoutId);
34
+ if (!response.ok) {
35
+ throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`);
2832
36
  }
2833
- const ragMetadata = {
2834
- retrievedFragments: knowledgeData.map((fragment) => {
2835
- const fragmentMetadata = fragment.metadata;
2836
- return {
2837
- fragmentId: fragment.id,
2838
- documentTitle: fragmentMetadata?.filename || fragmentMetadata?.title || "",
2839
- similarityScore: fragment.similarity,
2840
- contentPreview: `${(fragment.content?.text || "").substring(0, 100)}...`
2841
- };
2842
- }),
2843
- queryText: message.content?.text || "",
2844
- totalFragments: knowledgeData.length,
2845
- retrievalTimestamp: Date.now()
2846
- };
2847
- knowledgeService.setPendingRAGMetadata(ragMetadata);
2848
- setTimeout(async () => {
2849
- await knowledgeService.enrichRecentMemoriesWithPendingRAG();
2850
- }, 2000);
37
+ const contentType = response.headers.get("content-type") || "application/octet-stream";
38
+ const arrayBuffer = await response.arrayBuffer();
39
+ const buffer = Buffer.from(arrayBuffer);
40
+ const base64Content = buffer.toString("base64");
2851
41
  return {
2852
- data: {
2853
- knowledge,
2854
- ragMetadata,
2855
- knowledgeUsed: true
2856
- },
2857
- values: {
2858
- knowledge,
2859
- knowledgeUsed: true
2860
- },
2861
- text: knowledge,
2862
- ragMetadata,
2863
- knowledgeUsed: true
42
+ content: base64Content,
43
+ contentType
2864
44
  };
45
+ } catch (error) {
46
+ const errorMessage = error instanceof Error ? error.message : String(error);
47
+ throw new Error(`Failed to fetch content from URL: ${errorMessage}`);
2865
48
  }
2866
- };
49
+ }
2867
50
 
2868
51
  // routes.ts
2869
- import fs3 from "node:fs";
2870
- import path3 from "node:path";
2871
- import { createUniqueUuid as createUniqueUuid2, logger as logger7, MemoryType as MemoryType4, ModelType as ModelType3 } from "@elizaos/core";
2872
- import multer from "multer";
2873
52
  function asWritableStream(res) {
2874
53
  return res;
2875
54
  }
@@ -2912,11 +91,11 @@ function sendError(res, status, code, message, details) {
2912
91
  res.end(JSON.stringify({ success: false, error: { code, message, details } }));
2913
92
  }
2914
93
  var cleanupFile = (filePath) => {
2915
- if (filePath && fs3.existsSync(filePath)) {
94
+ if (filePath && fs.existsSync(filePath)) {
2916
95
  try {
2917
- fs3.unlinkSync(filePath);
96
+ fs.unlinkSync(filePath);
2918
97
  } catch (error) {
2919
- logger7.error({ error }, `Error cleaning up file ${filePath}`);
98
+ logger.error({ error }, `Error cleaning up file ${filePath}`);
2920
99
  }
2921
100
  }
2922
101
  };
@@ -2945,15 +124,15 @@ async function uploadKnowledgeHandler(req, res, runtime) {
2945
124
  }
2946
125
  const invalidFiles = files.filter((file) => {
2947
126
  if (file.size === 0) {
2948
- logger7.warn(`File ${file.originalname} is empty`);
127
+ logger.warn(`File ${file.originalname} is empty`);
2949
128
  return true;
2950
129
  }
2951
130
  if (!file.originalname || file.originalname.trim() === "") {
2952
- logger7.warn(`File has no name`);
131
+ logger.warn(`File has no name`);
2953
132
  return true;
2954
133
  }
2955
134
  if (!file.path) {
2956
- logger7.warn(`File ${file.originalname} has no path`);
135
+ logger.warn(`File ${file.originalname} has no path`);
2957
136
  return true;
2958
137
  }
2959
138
  return false;
@@ -2965,16 +144,16 @@ async function uploadKnowledgeHandler(req, res, runtime) {
2965
144
  }
2966
145
  const agentId = req.body?.agentId || req.query?.agentId;
2967
146
  if (!agentId) {
2968
- logger7.error("No agent ID provided in upload request");
147
+ logger.error("No agent ID provided in upload request");
2969
148
  return sendError(res, 400, "MISSING_AGENT_ID", "Agent ID is required for uploading knowledge");
2970
149
  }
2971
150
  const worldId = req.body?.worldId || agentId;
2972
- logger7.info(`Processing file upload for agent: ${agentId}`);
151
+ logger.info(`Processing file upload for agent: ${agentId}`);
2973
152
  const processingPromises = files.map(async (file) => {
2974
153
  const originalFilename = file.originalname;
2975
154
  const filePath = file.path;
2976
155
  try {
2977
- const fileBuffer = await fs3.promises.readFile(filePath);
156
+ const fileBuffer = await fs.promises.readFile(filePath);
2978
157
  const base64Content = fileBuffer.toString("base64");
2979
158
  const addKnowledgeOpts = {
2980
159
  agentId,
@@ -2997,7 +176,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
2997
176
  status: "success"
2998
177
  };
2999
178
  } catch (fileError) {
3000
- logger7.error(`Error processing file ${file.originalname}: ${fileError instanceof Error ? fileError.message : String(fileError)}`);
179
+ logger.error(`Error processing file ${file.originalname}: ${fileError instanceof Error ? fileError.message : String(fileError)}`);
3001
180
  cleanupFile(filePath);
3002
181
  return {
3003
182
  id: "",
@@ -3016,7 +195,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3016
195
  }
3017
196
  const agentId = req.body?.agentId || req.query?.agentId;
3018
197
  if (!agentId) {
3019
- logger7.error("No agent ID provided in URL request");
198
+ logger.error("No agent ID provided in URL request");
3020
199
  return sendError(res, 400, "MISSING_AGENT_ID", "Agent ID is required for uploading knowledge from URLs");
3021
200
  }
3022
201
  const processingPromises = fileUrls.map(async (fileUrl) => {
@@ -3026,7 +205,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3026
205
  const pathSegments = urlObject.pathname.split("/");
3027
206
  const encodedFilename = pathSegments[pathSegments.length - 1] || "document.pdf";
3028
207
  const originalFilename = decodeURIComponent(encodedFilename);
3029
- logger7.debug(`Fetching content from URL: ${fileUrl}`);
208
+ logger.debug(`Fetching content from URL: ${fileUrl}`);
3030
209
  const { content, contentType: fetchedContentType } = await fetchUrlContent(fileUrl);
3031
210
  let contentType = fetchedContentType;
3032
211
  if (contentType === "application/octet-stream") {
@@ -3073,7 +252,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3073
252
  status: "success"
3074
253
  };
3075
254
  } catch (urlError) {
3076
- logger7.error(`Error processing URL ${fileUrl}: ${urlError instanceof Error ? urlError.message : String(urlError)}`);
255
+ logger.error(`Error processing URL ${fileUrl}: ${urlError instanceof Error ? urlError.message : String(urlError)}`);
3077
256
  return {
3078
257
  fileUrl,
3079
258
  status: "error_processing",
@@ -3085,7 +264,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3085
264
  sendSuccess(res, results);
3086
265
  }
3087
266
  } catch (error) {
3088
- logger7.error({ error }, "Error processing knowledge");
267
+ logger.error({ error }, "Error processing knowledge");
3089
268
  if (hasUploadedFiles) {
3090
269
  cleanupFiles(req.files);
3091
270
  }
@@ -3110,7 +289,7 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
3110
289
  let filteredMemories = memories;
3111
290
  if (fileUrls && fileUrls.length > 0) {
3112
291
  const normalizedRequestUrls = fileUrls.map((url) => normalizeS3Url(String(url)));
3113
- const urlBasedIds = normalizedRequestUrls.map((url) => createUniqueUuid2(runtime, url));
292
+ const urlBasedIds = normalizedRequestUrls.map((url) => createUniqueUuid(runtime, url));
3114
293
  filteredMemories = memories.filter((memory) => urlBasedIds.includes(memory.id) || memory.metadata && ("url" in memory.metadata) && typeof memory.metadata.url === "string" && normalizedRequestUrls.includes(normalizeS3Url(memory.metadata.url)));
3115
294
  }
3116
295
  const cleanMemories = includeEmbedding ? filteredMemories : filteredMemories.map((memory) => ({
@@ -3124,7 +303,7 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
3124
303
  totalRequested: fileUrls ? fileUrls.length : 0
3125
304
  });
3126
305
  } catch (error) {
3127
- logger7.error({ error }, "Error retrieving documents");
306
+ logger.error({ error }, "Error retrieving documents");
3128
307
  sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve documents", error instanceof Error ? error.message : String(error));
3129
308
  }
3130
309
  }
@@ -3135,16 +314,16 @@ async function deleteKnowledgeDocumentHandler(req, res, runtime) {
3135
314
  }
3136
315
  const knowledgeId = req.params?.knowledgeId;
3137
316
  if (!knowledgeId || knowledgeId.length < 36) {
3138
- logger7.error(`Invalid knowledge ID format: ${knowledgeId}`);
317
+ logger.error(`Invalid knowledge ID format: ${knowledgeId}`);
3139
318
  return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
3140
319
  }
3141
320
  try {
3142
321
  const typedKnowledgeId = knowledgeId;
3143
- logger7.debug(`Deleting document: ${typedKnowledgeId}`);
322
+ logger.debug(`Deleting document: ${typedKnowledgeId}`);
3144
323
  await service.deleteMemory(typedKnowledgeId);
3145
324
  sendSuccess(res, null, 204);
3146
325
  } catch (error) {
3147
- logger7.error({ error }, `Error deleting document ${knowledgeId}`);
326
+ logger.error({ error }, `Error deleting document ${knowledgeId}`);
3148
327
  sendError(res, 500, "DELETE_ERROR", "Failed to delete document", error instanceof Error ? error.message : String(error));
3149
328
  }
3150
329
  }
@@ -3155,11 +334,11 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
3155
334
  }
3156
335
  const knowledgeId = req.params?.knowledgeId;
3157
336
  if (!knowledgeId || knowledgeId.length < 36) {
3158
- logger7.error(`Invalid knowledge ID format: ${knowledgeId}`);
337
+ logger.error(`Invalid knowledge ID format: ${knowledgeId}`);
3159
338
  return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
3160
339
  }
3161
340
  try {
3162
- logger7.debug(`Retrieving document: ${knowledgeId}`);
341
+ logger.debug(`Retrieving document: ${knowledgeId}`);
3163
342
  const memories = await service.getMemories({
3164
343
  tableName: "documents",
3165
344
  count: 1e4
@@ -3175,7 +354,7 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
3175
354
  };
3176
355
  sendSuccess(res, { document: cleanDocument });
3177
356
  } catch (error) {
3178
- logger7.error({ error }, `Error retrieving document ${knowledgeId}`);
357
+ logger.error({ error }, `Error retrieving document ${knowledgeId}`);
3179
358
  sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve document", error instanceof Error ? error.message : String(error));
3180
359
  }
3181
360
  }
@@ -3184,10 +363,10 @@ async function knowledgePanelHandler(req, res, runtime) {
3184
363
  const requestPath = req.originalUrl || req.url || req.path || "";
3185
364
  const pluginBasePath = requestPath.replace(/\/display.*$/, "");
3186
365
  try {
3187
- const currentDir = path3.dirname(new URL(import.meta.url).pathname);
3188
- const frontendPath = path3.join(currentDir, "../dist/index.html");
3189
- if (fs3.existsSync(frontendPath)) {
3190
- const html = await fs3.promises.readFile(frontendPath, "utf8");
366
+ const currentDir = path.dirname(new URL(import.meta.url).pathname);
367
+ const frontendPath = path.join(currentDir, "../dist/index.html");
368
+ if (fs.existsSync(frontendPath)) {
369
+ const html = await fs.promises.readFile(frontendPath, "utf8");
3191
370
  let injectedHtml = html.replace("<head>", `<head>
3192
371
  <script>
3193
372
  window.ELIZA_CONFIG = {
@@ -3202,10 +381,10 @@ async function knowledgePanelHandler(req, res, runtime) {
3202
381
  } else {
3203
382
  let cssFile = "index.css";
3204
383
  let jsFile = "index.js";
3205
- const manifestPath = path3.join(currentDir, "../dist/manifest.json");
3206
- if (fs3.existsSync(manifestPath)) {
384
+ const manifestPath = path.join(currentDir, "../dist/manifest.json");
385
+ if (fs.existsSync(manifestPath)) {
3207
386
  try {
3208
- const manifestContent = await fs3.promises.readFile(manifestPath, "utf8");
387
+ const manifestContent = await fs.promises.readFile(manifestPath, "utf8");
3209
388
  const manifest = JSON.parse(manifestContent);
3210
389
  for (const [key, value] of Object.entries(manifest)) {
3211
390
  if (typeof value === "object" && value !== null) {
@@ -3253,14 +432,14 @@ async function knowledgePanelHandler(req, res, runtime) {
3253
432
  res.end(html);
3254
433
  }
3255
434
  } catch (error) {
3256
- logger7.error({ error }, "Error serving frontend");
435
+ logger.error({ error }, "Error serving frontend");
3257
436
  sendError(res, 500, "FRONTEND_ERROR", "Failed to load knowledge panel", error instanceof Error ? error.message : String(error));
3258
437
  }
3259
438
  }
3260
439
  async function frontendAssetHandler(req, res, _runtime) {
3261
440
  try {
3262
441
  const fullPath = req.originalUrl || req.url || req.path || "";
3263
- const currentDir = path3.dirname(new URL(import.meta.url).pathname);
442
+ const currentDir = path.dirname(new URL(import.meta.url).pathname);
3264
443
  const assetsMarker = "/assets/";
3265
444
  const assetsStartIndex = fullPath.lastIndexOf(assetsMarker);
3266
445
  let assetName = null;
@@ -3274,9 +453,9 @@ async function frontendAssetHandler(req, res, _runtime) {
3274
453
  if (!assetName || assetName.includes("..")) {
3275
454
  return sendError(res, 400, "BAD_REQUEST", `Invalid asset name: '${assetName}' from path ${fullPath}`);
3276
455
  }
3277
- const assetPath = path3.join(currentDir, "../dist/assets", assetName);
3278
- if (fs3.existsSync(assetPath)) {
3279
- const fileStream = fs3.createReadStream(assetPath);
456
+ const assetPath = path.join(currentDir, "../dist/assets", assetName);
457
+ if (fs.existsSync(assetPath)) {
458
+ const fileStream = fs.createReadStream(assetPath);
3280
459
  let contentType = "application/octet-stream";
3281
460
  if (assetPath.endsWith(".js")) {
3282
461
  contentType = "application/javascript";
@@ -3289,7 +468,7 @@ async function frontendAssetHandler(req, res, _runtime) {
3289
468
  sendError(res, 404, "NOT_FOUND", `Asset not found: ${req.url}`);
3290
469
  }
3291
470
  } catch (error) {
3292
- logger7.error({ error }, `Error serving asset ${req.url}`);
471
+ logger.error({ error }, `Error serving asset ${req.url}`);
3293
472
  sendError(res, 500, "ASSET_ERROR", `Failed to load asset ${req.url}`, error instanceof Error ? error.message : String(error));
3294
473
  }
3295
474
  }
@@ -3348,7 +527,7 @@ async function getKnowledgeChunksHandler(req, res, runtime) {
3348
527
  }
3349
528
  });
3350
529
  } catch (error) {
3351
- logger7.error({ error }, "Error retrieving chunks");
530
+ logger.error({ error }, "Error retrieving chunks");
3352
531
  sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve knowledge chunks", error instanceof Error ? error.message : String(error));
3353
532
  }
3354
533
  }
@@ -3369,7 +548,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
3369
548
  if (!searchText || searchText.trim().length === 0) {
3370
549
  return sendError(res, 400, "INVALID_QUERY", "Search query cannot be empty");
3371
550
  }
3372
- const embedding = await runtime.useModel(ModelType3.TEXT_EMBEDDING, {
551
+ const embedding = await runtime.useModel(ModelType.TEXT_EMBEDDING, {
3373
552
  text: searchText
3374
553
  });
3375
554
  const results = await runtime.searchMemories({
@@ -3412,7 +591,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
3412
591
  count: enhancedResults.length
3413
592
  });
3414
593
  } catch (error) {
3415
- logger7.error({ error }, "Error searching knowledge");
594
+ logger.error({ error }, "Error searching knowledge");
3416
595
  sendError(res, 500, "SEARCH_ERROR", "Failed to search knowledge", error instanceof Error ? error.message : String(error));
3417
596
  }
3418
597
  }
@@ -3446,7 +625,7 @@ async function getGraphNodesHandler(req, res, runtime) {
3446
625
  const links = [];
3447
626
  paginatedDocuments.forEach((doc) => {
3448
627
  if (!doc.id) {
3449
- logger7.warn("Skipping document without ID");
628
+ logger.warn("Skipping document without ID");
3450
629
  return;
3451
630
  }
3452
631
  nodes.push({ id: doc.id, type: "document" });
@@ -3464,7 +643,7 @@ async function getGraphNodesHandler(req, res, runtime) {
3464
643
  const docFragments = allFragments.filter((fragment) => {
3465
644
  const metadata = fragment.metadata;
3466
645
  const typeString = typeof metadata?.type === "string" ? metadata.type : null;
3467
- const isFragment = typeString && typeString.toLowerCase() === "fragment" || metadata?.type === MemoryType4.FRAGMENT || !metadata?.type && metadata?.documentId;
646
+ const isFragment = typeString && typeString.toLowerCase() === "fragment" || metadata?.type === MemoryType.FRAGMENT || !metadata?.type && metadata?.documentId;
3468
647
  return metadata?.documentId === doc.id && isFragment;
3469
648
  });
3470
649
  docFragments.forEach((frag) => {
@@ -3488,7 +667,7 @@ async function getGraphNodesHandler(req, res, runtime) {
3488
667
  }
3489
668
  });
3490
669
  } catch (error) {
3491
- logger7.error({ error }, "Error fetching graph nodes");
670
+ logger.error({ error }, "Error fetching graph nodes");
3492
671
  sendError(res, 500, "GRAPH_ERROR", "Failed to fetch graph nodes", error instanceof Error ? error.message : String(error));
3493
672
  }
3494
673
  }
@@ -3547,10 +726,10 @@ async function getGraphNodeDetailsHandler(req, res, runtime) {
3547
726
  });
3548
727
  return;
3549
728
  }
3550
- logger7.error(`Node ${nodeId} not found`);
729
+ logger.error(`Node ${nodeId} not found`);
3551
730
  sendError(res, 404, "NOT_FOUND", `Node with ID ${nodeId} not found`);
3552
731
  } catch (error) {
3553
- logger7.error({ error }, `Error fetching node details for ${nodeId}`);
732
+ logger.error({ error }, `Error fetching node details for ${nodeId}`);
3554
733
  sendError(res, 500, "GRAPH_ERROR", "Failed to fetch node details", error instanceof Error ? error.message : String(error));
3555
734
  }
3556
735
  }
@@ -3573,7 +752,7 @@ async function expandDocumentGraphHandler(req, res, runtime) {
3573
752
  const documentFragments = allFragments.filter((fragment) => {
3574
753
  const metadata = fragment.metadata;
3575
754
  const typeString = typeof metadata?.type === "string" ? metadata.type : null;
3576
- const isFragment = typeString && typeString.toLowerCase() === "fragment" || metadata?.type === MemoryType4.FRAGMENT || !metadata?.type && metadata?.documentId;
755
+ const isFragment = typeString && typeString.toLowerCase() === "fragment" || metadata?.type === MemoryType.FRAGMENT || !metadata?.type && metadata?.documentId;
3577
756
  return metadata?.documentId === documentId && isFragment;
3578
757
  });
3579
758
  const nodes = documentFragments.filter((frag) => frag.id !== undefined).map((frag) => ({
@@ -3591,7 +770,7 @@ async function expandDocumentGraphHandler(req, res, runtime) {
3591
770
  fragmentCount: nodes.length
3592
771
  });
3593
772
  } catch (error) {
3594
- logger7.error({ error }, `Error expanding document ${documentId}`);
773
+ logger.error({ error }, `Error expanding document ${documentId}`);
3595
774
  sendError(res, 500, "GRAPH_ERROR", "Failed to expand document", error instanceof Error ? error.message : String(error));
3596
775
  }
3597
776
  }
@@ -3600,7 +779,7 @@ async function uploadKnowledgeWithMulter(req, res, runtime) {
3600
779
  const uploadArray = upload.array("files", parseInt(String(runtime.getSetting("KNOWLEDGE_MAX_FILES") || "10"), 10));
3601
780
  uploadArray(req, res, (err) => {
3602
781
  if (err) {
3603
- logger7.error({ error: err }, "File upload error");
782
+ logger.error({ error: err }, "File upload error");
3604
783
  return sendError(res, 400, "UPLOAD_ERROR", err.message);
3605
784
  }
3606
785
  uploadKnowledgeHandler(req, res, runtime);
@@ -3669,22 +848,205 @@ var knowledgeRoutes = [
3669
848
  }
3670
849
  ];
3671
850
 
851
+ // index.ts
852
+ import { KnowledgeService as KnowledgeService2 } from "@elizaos/core";
853
+
854
+ // documents-provider.ts
855
+ import { addHeader, logger as logger2, MemoryType as MemoryType2 } from "@elizaos/core";
856
+ var documentsProvider = {
857
+ name: "AVAILABLE_DOCUMENTS",
858
+ description: "List of documents available in the knowledge base. Shows which documents the agent can reference and retrieve information from.",
859
+ dynamic: true,
860
+ get: async (runtime, _message, _state) => {
861
+ try {
862
+ const knowledgeService = runtime.getService("knowledge");
863
+ if (!knowledgeService) {
864
+ logger2.warn("Knowledge service not available for documents provider");
865
+ return {
866
+ data: { documents: [] },
867
+ values: {
868
+ documentsCount: 0,
869
+ documents: "",
870
+ availableDocuments: ""
871
+ },
872
+ text: ""
873
+ };
874
+ }
875
+ const allMemories = await knowledgeService.getMemories({
876
+ tableName: "documents",
877
+ roomId: runtime.agentId,
878
+ count: 100
879
+ });
880
+ const documents = allMemories.filter((memory) => memory.metadata?.type === MemoryType2.DOCUMENT);
881
+ if (!documents || documents.length === 0) {
882
+ return {
883
+ data: { documents: [] },
884
+ values: {
885
+ documentsCount: 0,
886
+ documents: "",
887
+ availableDocuments: ""
888
+ },
889
+ text: ""
890
+ };
891
+ }
892
+ const documentsList = documents.map((doc, index) => {
893
+ const metadata = doc.metadata;
894
+ const filename = metadata?.filename || metadata?.title || `Document ${index + 1}`;
895
+ const fileType = metadata?.fileExt || metadata?.fileType || "";
896
+ const source = metadata?.source || "upload";
897
+ const fileSize = metadata?.fileSize;
898
+ const parts = [filename];
899
+ if (fileType) {
900
+ parts.push(fileType);
901
+ }
902
+ if (fileSize) {
903
+ const sizeKB = Math.round(fileSize / 1024);
904
+ if (sizeKB > 1024) {
905
+ parts.push(`${Math.round(sizeKB / 1024)}MB`);
906
+ } else {
907
+ parts.push(`${sizeKB}KB`);
908
+ }
909
+ }
910
+ if (source && source !== "upload") {
911
+ parts.push(`from ${source}`);
912
+ }
913
+ return parts.join(" - ");
914
+ }).join(`
915
+ `);
916
+ const documentsText = addHeader("# Available Documents", `${documents.length} document(s) in knowledge base:
917
+ ${documentsList}`);
918
+ return {
919
+ data: {
920
+ documents: documents.map((doc) => ({
921
+ id: doc.id,
922
+ filename: doc.metadata?.filename || doc.metadata?.title,
923
+ fileType: doc.metadata?.fileType || doc.metadata?.fileExt,
924
+ source: doc.metadata?.source
925
+ })),
926
+ count: documents.length
927
+ },
928
+ values: {
929
+ documentsCount: documents.length,
930
+ documents: documentsList,
931
+ availableDocuments: documentsText
932
+ },
933
+ text: documentsText
934
+ };
935
+ } catch (error) {
936
+ logger2.error("Error in documents provider:", error instanceof Error ? error.message : String(error));
937
+ return {
938
+ data: { documents: [], error: error instanceof Error ? error.message : String(error) },
939
+ values: {
940
+ documentsCount: 0,
941
+ documents: "",
942
+ availableDocuments: ""
943
+ },
944
+ text: ""
945
+ };
946
+ }
947
+ }
948
+ };
949
+ // provider.ts
950
+ import { addHeader as addHeader2 } from "@elizaos/core";
951
+ var knowledgeProvider = {
952
+ name: "KNOWLEDGE",
953
+ description: "Knowledge from the knowledge base that the agent knows, retrieved whenever the agent needs to answer a question about their expertise.",
954
+ dynamic: true,
955
+ get: async (runtime, message) => {
956
+ const knowledgeService = runtime.getService("knowledge");
957
+ const knowledgeData = await knowledgeService?.getKnowledge(message);
958
+ if (!knowledgeData || knowledgeData.length === 0) {
959
+ return {
960
+ text: "",
961
+ values: { knowledge: "", knowledgeUsed: false },
962
+ data: { knowledge: "", ragMetadata: null, knowledgeUsed: false }
963
+ };
964
+ }
965
+ const firstFiveKnowledgeItems = knowledgeData.slice(0, 5);
966
+ let knowledge = addHeader2("# Knowledge", firstFiveKnowledgeItems.map((item) => `- ${item.content.text}`).join(`
967
+ `));
968
+ const tokenLength = 3.5;
969
+ const maxChars = 4000 * tokenLength;
970
+ if (knowledge.length > maxChars) {
971
+ knowledge = knowledge.slice(0, maxChars);
972
+ }
973
+ const ragMetadata = {
974
+ retrievedFragments: knowledgeData.map((fragment) => {
975
+ const fragmentMetadata = fragment.metadata;
976
+ return {
977
+ fragmentId: fragment.id,
978
+ documentTitle: fragmentMetadata?.filename || fragmentMetadata?.title || "",
979
+ similarityScore: fragment.similarity,
980
+ contentPreview: `${(fragment.content?.text || "").substring(0, 100)}...`
981
+ };
982
+ }),
983
+ queryText: message.content?.text || "",
984
+ totalFragments: knowledgeData.length,
985
+ retrievalTimestamp: Date.now()
986
+ };
987
+ knowledgeService.setPendingRAGMetadata(ragMetadata);
988
+ setTimeout(async () => {
989
+ await knowledgeService.enrichRecentMemoriesWithPendingRAG();
990
+ }, 2000);
991
+ return {
992
+ data: {
993
+ knowledge,
994
+ ragMetadata,
995
+ knowledgeUsed: true
996
+ },
997
+ values: {
998
+ knowledge,
999
+ knowledgeUsed: true
1000
+ },
1001
+ text: knowledge,
1002
+ ragMetadata,
1003
+ knowledgeUsed: true
1004
+ };
1005
+ }
1006
+ };
1007
+ // types.ts
1008
+ import z from "zod";
1009
+ var ModelConfigSchema = z.object({
1010
+ EMBEDDING_PROVIDER: z.enum(["local", "openai", "google"]).optional(),
1011
+ TEXT_PROVIDER: z.enum(["openai", "anthropic", "openrouter", "google"]).optional(),
1012
+ OPENAI_API_KEY: z.string().optional(),
1013
+ ANTHROPIC_API_KEY: z.string().optional(),
1014
+ OPENROUTER_API_KEY: z.string().optional(),
1015
+ GOOGLE_API_KEY: z.string().optional(),
1016
+ OPENAI_BASE_URL: z.string().optional(),
1017
+ ANTHROPIC_BASE_URL: z.string().optional(),
1018
+ OPENROUTER_BASE_URL: z.string().optional(),
1019
+ GOOGLE_BASE_URL: z.string().optional(),
1020
+ TEXT_EMBEDDING_MODEL: z.string(),
1021
+ TEXT_MODEL: z.string().optional(),
1022
+ MAX_INPUT_TOKENS: z.string().or(z.number()).transform((val) => typeof val === "string" ? parseInt(val, 10) : val),
1023
+ MAX_OUTPUT_TOKENS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 4096),
1024
+ EMBEDDING_DIMENSION: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 1536),
1025
+ LOAD_DOCS_ON_STARTUP: z.boolean().default(false),
1026
+ CTX_KNOWLEDGE_ENABLED: z.boolean().default(false),
1027
+ RATE_LIMIT_ENABLED: z.boolean().default(true),
1028
+ MAX_CONCURRENT_REQUESTS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 150),
1029
+ REQUESTS_PER_MINUTE: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 300),
1030
+ TOKENS_PER_MINUTE: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 750000),
1031
+ BATCH_DELAY_MS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 100)
1032
+ });
1033
+ var KnowledgeServiceType = {
1034
+ KNOWLEDGE: "knowledge"
1035
+ };
1036
+
3672
1037
  // index.ts
3673
1038
  function createKnowledgePlugin(config = {}) {
3674
- const { enableUI = true, enableRoutes = true, enableActions = true, enableTests = true } = config;
3675
- const plugin = {
3676
- name: "knowledge",
3677
- description: "Plugin for Retrieval Augmented Generation, including knowledge management and embedding.",
3678
- services: [KnowledgeService],
3679
- providers: [knowledgeProvider, documentsProvider]
3680
- };
1039
+ const { enableUI = true, enableRoutes = true, enableActions = true } = config;
1040
+ const plugin = createNativeKnowledgePlugin({
1041
+ enableActions,
1042
+ enableProviders: true
1043
+ });
1044
+ if (!enableActions) {
1045
+ plugin.actions = undefined;
1046
+ }
3681
1047
  if (enableUI || enableRoutes) {
3682
1048
  plugin.routes = knowledgeRoutes;
3683
1049
  }
3684
- if (enableActions) {
3685
- plugin.actions = knowledgeActions;
3686
- }
3687
- if (enableTests) {}
3688
1050
  return plugin;
3689
1051
  }
3690
1052
  var knowledgePluginCore = createKnowledgePlugin({
@@ -3703,9 +1065,10 @@ var knowledgePlugin = createKnowledgePlugin({
3703
1065
  enableUI: true,
3704
1066
  enableRoutes: true,
3705
1067
  enableActions: true,
3706
- enableTests: true
1068
+ enableTests: false
3707
1069
  });
3708
1070
  export {
1071
+ knowledgeRoutes,
3709
1072
  knowledgeProvider,
3710
1073
  knowledgePluginHeadless,
3711
1074
  knowledgePluginCore,
@@ -3714,8 +1077,8 @@ export {
3714
1077
  createKnowledgePlugin,
3715
1078
  ModelConfigSchema,
3716
1079
  KnowledgeServiceType,
3717
- KnowledgeService
1080
+ KnowledgeService2 as KnowledgeService
3718
1081
  };
3719
1082
 
3720
- //# debugId=40399672E222E6EF64756E2164756E21
1083
+ //# debugId=A4E0C70529C57E3764756E2164756E21
3721
1084
  //# sourceMappingURL=index.node.js.map