@elizaos/plugin-knowledge 1.0.0-beta.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,3122 @@
1
+ // src/index.ts
2
+ import { logger as logger6 } from "@elizaos/core";
3
+
4
+ // src/types.ts
5
+ import z from "zod";
6
+ var ModelConfigSchema = z.object({
7
+ // Provider configuration
8
+ // NOTE: If EMBEDDING_PROVIDER is not specified, the plugin automatically assumes
9
+ // plugin-openai is being used and will use OPENAI_EMBEDDING_MODEL and
10
+ // OPENAI_EMBEDDING_DIMENSIONS for configuration
11
+ EMBEDDING_PROVIDER: z.enum(["openai", "google"]),
12
+ TEXT_PROVIDER: z.enum(["openai", "anthropic", "openrouter", "google"]).optional(),
13
+ // API keys
14
+ OPENAI_API_KEY: z.string().optional(),
15
+ ANTHROPIC_API_KEY: z.string().optional(),
16
+ OPENROUTER_API_KEY: z.string().optional(),
17
+ GOOGLE_API_KEY: z.string().optional(),
18
+ // Base URLs (optional for most providers)
19
+ OPENAI_BASE_URL: z.string().optional(),
20
+ ANTHROPIC_BASE_URL: z.string().optional(),
21
+ OPENROUTER_BASE_URL: z.string().optional(),
22
+ GOOGLE_BASE_URL: z.string().optional(),
23
+ // Model names
24
+ TEXT_EMBEDDING_MODEL: z.string(),
25
+ TEXT_MODEL: z.string().optional(),
26
+ // Token limits
27
+ MAX_INPUT_TOKENS: z.string().or(z.number()).transform((val) => typeof val === "string" ? parseInt(val, 10) : val),
28
+ MAX_OUTPUT_TOKENS: z.string().or(z.number()).optional().transform(
29
+ (val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 4096
30
+ ),
31
+ // Embedding dimension
32
+ // For OpenAI: Only applies to text-embedding-3-small and text-embedding-3-large models
33
+ // Default: 1536 dimensions
34
+ EMBEDDING_DIMENSION: z.string().or(z.number()).optional().transform(
35
+ (val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 1536
36
+ ),
37
+ // Contextual Knowledge settings
38
+ CTX_KNOWLEDGE_ENABLED: z.boolean().default(false)
39
+ });
40
+ var KnowledgeServiceType = {
41
+ KNOWLEDGE: "knowledge"
42
+ };
43
+
44
+ // src/config.ts
45
+ import z2 from "zod";
46
+ import { logger } from "@elizaos/core";
47
+ function validateModelConfig() {
48
+ try {
49
+ const ctxKnowledgeEnabled2 = process.env.CTX_KNOWLEDGE_ENABLED === "true";
50
+ logger.debug(`Configuration: CTX_KNOWLEDGE_ENABLED=${ctxKnowledgeEnabled2}`);
51
+ const assumePluginOpenAI = !process.env.EMBEDDING_PROVIDER;
52
+ if (assumePluginOpenAI) {
53
+ if (process.env.OPENAI_API_KEY && process.env.OPENAI_EMBEDDING_MODEL) {
54
+ logger.info(
55
+ "EMBEDDING_PROVIDER not specified, using configuration from plugin-openai"
56
+ );
57
+ } else {
58
+ logger.warn(
59
+ "EMBEDDING_PROVIDER not specified, but plugin-openai configuration incomplete. Check OPENAI_API_KEY and OPENAI_EMBEDDING_MODEL."
60
+ );
61
+ }
62
+ }
63
+ const embeddingProvider = process.env.EMBEDDING_PROVIDER || "openai";
64
+ const textEmbeddingModel = process.env.TEXT_EMBEDDING_MODEL || process.env.OPENAI_EMBEDDING_MODEL || "text-embedding-3-small";
65
+ const embeddingDimension = process.env.EMBEDDING_DIMENSION || process.env.OPENAI_EMBEDDING_DIMENSIONS || 1536;
66
+ const openaiApiKey = process.env.OPENAI_API_KEY;
67
+ const config = ModelConfigSchema.parse({
68
+ EMBEDDING_PROVIDER: embeddingProvider,
69
+ TEXT_PROVIDER: process.env.TEXT_PROVIDER,
70
+ OPENAI_API_KEY: openaiApiKey,
71
+ ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
72
+ OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY,
73
+ GOOGLE_API_KEY: process.env.GOOGLE_API_KEY,
74
+ OPENAI_BASE_URL: process.env.OPENAI_BASE_URL,
75
+ ANTHROPIC_BASE_URL: process.env.ANTHROPIC_BASE_URL,
76
+ OPENROUTER_BASE_URL: process.env.OPENROUTER_BASE_URL,
77
+ GOOGLE_BASE_URL: process.env.GOOGLE_BASE_URL,
78
+ TEXT_EMBEDDING_MODEL: textEmbeddingModel,
79
+ TEXT_MODEL: process.env.TEXT_MODEL,
80
+ MAX_INPUT_TOKENS: process.env.MAX_INPUT_TOKENS || 4e3,
81
+ MAX_OUTPUT_TOKENS: process.env.MAX_OUTPUT_TOKENS || 4096,
82
+ EMBEDDING_DIMENSION: embeddingDimension,
83
+ CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled2
84
+ });
85
+ validateConfigRequirements(config, assumePluginOpenAI);
86
+ return config;
87
+ } catch (error) {
88
+ if (error instanceof z2.ZodError) {
89
+ const issues = error.issues.map((issue) => `${issue.path.join(".")}: ${issue.message}`).join(", ");
90
+ throw new Error(`Model configuration validation failed: ${issues}`);
91
+ }
92
+ throw error;
93
+ }
94
+ }
95
+ function validateConfigRequirements(config, assumePluginOpenAI) {
96
+ if (!assumePluginOpenAI) {
97
+ if (config.EMBEDDING_PROVIDER === "openai" && !config.OPENAI_API_KEY) {
98
+ throw new Error(
99
+ 'OPENAI_API_KEY is required when EMBEDDING_PROVIDER is set to "openai"'
100
+ );
101
+ }
102
+ if (config.EMBEDDING_PROVIDER === "google" && !config.GOOGLE_API_KEY) {
103
+ throw new Error(
104
+ 'GOOGLE_API_KEY is required when EMBEDDING_PROVIDER is set to "google"'
105
+ );
106
+ }
107
+ } else {
108
+ if (!config.OPENAI_API_KEY) {
109
+ throw new Error(
110
+ "OPENAI_API_KEY is required when using plugin-openai configuration"
111
+ );
112
+ }
113
+ if (!config.TEXT_EMBEDDING_MODEL) {
114
+ throw new Error(
115
+ "OPENAI_EMBEDDING_MODEL is required when using plugin-openai configuration"
116
+ );
117
+ }
118
+ }
119
+ if (config.CTX_KNOWLEDGE_ENABLED) {
120
+ logger.info(
121
+ "Contextual Knowledge is enabled. Validating text generation settings..."
122
+ );
123
+ if (!config.TEXT_PROVIDER) {
124
+ throw new Error(
125
+ "TEXT_PROVIDER is required when CTX_KNOWLEDGE_ENABLED is true"
126
+ );
127
+ }
128
+ if (!config.TEXT_MODEL) {
129
+ throw new Error(
130
+ "TEXT_MODEL is required when CTX_KNOWLEDGE_ENABLED is true"
131
+ );
132
+ }
133
+ if (config.TEXT_PROVIDER === "openai" && !config.OPENAI_API_KEY) {
134
+ throw new Error(
135
+ 'OPENAI_API_KEY is required when TEXT_PROVIDER is set to "openai"'
136
+ );
137
+ }
138
+ if (config.TEXT_PROVIDER === "anthropic" && !config.ANTHROPIC_API_KEY) {
139
+ throw new Error(
140
+ 'ANTHROPIC_API_KEY is required when TEXT_PROVIDER is set to "anthropic"'
141
+ );
142
+ }
143
+ if (config.TEXT_PROVIDER === "openrouter" && !config.OPENROUTER_API_KEY) {
144
+ throw new Error(
145
+ 'OPENROUTER_API_KEY is required when TEXT_PROVIDER is set to "openrouter"'
146
+ );
147
+ }
148
+ if (config.TEXT_PROVIDER === "google" && !config.GOOGLE_API_KEY) {
149
+ throw new Error(
150
+ 'GOOGLE_API_KEY is required when TEXT_PROVIDER is set to "google"'
151
+ );
152
+ }
153
+ if (config.TEXT_PROVIDER === "openrouter") {
154
+ const modelName = config.TEXT_MODEL?.toLowerCase() || "";
155
+ if (modelName.includes("claude") || modelName.includes("gemini")) {
156
+ logger.info(
157
+ `Using ${modelName} with OpenRouter. This configuration supports document caching for improved performance.`
158
+ );
159
+ }
160
+ }
161
+ } else {
162
+ if (assumePluginOpenAI) {
163
+ logger.info(
164
+ "Contextual Knowledge is disabled. Using embedding configuration from plugin-openai."
165
+ );
166
+ } else {
167
+ logger.info(
168
+ "Contextual Knowledge is disabled. Using basic embedding-only configuration."
169
+ );
170
+ }
171
+ }
172
+ }
173
+ async function getProviderRateLimits() {
174
+ const config = validateModelConfig();
175
+ const maxConcurrentRequests = getEnvInt("MAX_CONCURRENT_REQUESTS", 30);
176
+ const requestsPerMinute = getEnvInt("REQUESTS_PER_MINUTE", 60);
177
+ const tokensPerMinute = getEnvInt("TOKENS_PER_MINUTE", 15e4);
178
+ switch (config.EMBEDDING_PROVIDER) {
179
+ case "openai":
180
+ return {
181
+ maxConcurrentRequests,
182
+ requestsPerMinute: Math.min(requestsPerMinute, 3e3),
183
+ tokensPerMinute: Math.min(tokensPerMinute, 15e4),
184
+ provider: "openai"
185
+ };
186
+ case "google":
187
+ return {
188
+ maxConcurrentRequests,
189
+ requestsPerMinute: Math.min(requestsPerMinute, 60),
190
+ tokensPerMinute: Math.min(tokensPerMinute, 1e5),
191
+ provider: "google"
192
+ };
193
+ default:
194
+ return {
195
+ maxConcurrentRequests,
196
+ requestsPerMinute,
197
+ tokensPerMinute,
198
+ provider: config.EMBEDDING_PROVIDER
199
+ };
200
+ }
201
+ }
202
+ function getEnvInt(envVar, defaultValue) {
203
+ return process.env[envVar] ? parseInt(process.env[envVar], 10) : defaultValue;
204
+ }
205
+
206
+ // src/service.ts
207
+ import {
208
+ createUniqueUuid,
209
+ logger as logger4,
210
+ MemoryType as MemoryType2,
211
+ ModelType as ModelType2,
212
+ Semaphore,
213
+ Service,
214
+ splitChunks as splitChunks2
215
+ } from "@elizaos/core";
216
+
217
+ // src/document-processor.ts
218
+ import {
219
+ MemoryType,
220
+ ModelType,
221
+ logger as logger3,
222
+ splitChunks
223
+ } from "@elizaos/core";
224
+
225
+ // node_modules/uuid/dist/esm/stringify.js
226
+ var byteToHex = [];
227
+ for (let i = 0; i < 256; ++i) {
228
+ byteToHex.push((i + 256).toString(16).slice(1));
229
+ }
230
+ function unsafeStringify(arr, offset = 0) {
231
+ return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
232
+ }
233
+
234
+ // node_modules/uuid/dist/esm/rng.js
235
+ import { randomFillSync } from "crypto";
236
+ var rnds8Pool = new Uint8Array(256);
237
+ var poolPtr = rnds8Pool.length;
238
+ function rng() {
239
+ if (poolPtr > rnds8Pool.length - 16) {
240
+ randomFillSync(rnds8Pool);
241
+ poolPtr = 0;
242
+ }
243
+ return rnds8Pool.slice(poolPtr, poolPtr += 16);
244
+ }
245
+
246
+ // node_modules/uuid/dist/esm/native.js
247
+ import { randomUUID } from "crypto";
248
+ var native_default = { randomUUID };
249
+
250
+ // node_modules/uuid/dist/esm/v4.js
251
+ function v4(options, buf, offset) {
252
+ if (native_default.randomUUID && !buf && !options) {
253
+ return native_default.randomUUID();
254
+ }
255
+ options = options || {};
256
+ const rnds = options.random || (options.rng || rng)();
257
+ rnds[6] = rnds[6] & 15 | 64;
258
+ rnds[8] = rnds[8] & 63 | 128;
259
+ if (buf) {
260
+ offset = offset || 0;
261
+ for (let i = 0; i < 16; ++i) {
262
+ buf[offset + i] = rnds[i];
263
+ }
264
+ return buf;
265
+ }
266
+ return unsafeStringify(rnds);
267
+ }
268
+ var v4_default = v4;
269
+
270
+ // src/ctx-embeddings.ts
271
+ var DEFAULT_CHUNK_TOKEN_SIZE = 500;
272
+ var DEFAULT_CHUNK_OVERLAP_TOKENS = 100;
273
+ var DEFAULT_CHARS_PER_TOKEN = 3.5;
274
+ var CONTEXT_TARGETS = {
275
+ DEFAULT: {
276
+ MIN_TOKENS: 60,
277
+ MAX_TOKENS: 120
278
+ },
279
+ PDF: {
280
+ MIN_TOKENS: 80,
281
+ MAX_TOKENS: 150
282
+ },
283
+ MATH_PDF: {
284
+ MIN_TOKENS: 100,
285
+ MAX_TOKENS: 180
286
+ },
287
+ CODE: {
288
+ MIN_TOKENS: 100,
289
+ MAX_TOKENS: 200
290
+ },
291
+ TECHNICAL: {
292
+ MIN_TOKENS: 80,
293
+ MAX_TOKENS: 160
294
+ }
295
+ };
296
+ var SYSTEM_PROMPTS = {
297
+ DEFAULT: "You are a precision text augmentation tool. Your task is to expand a given text chunk with its direct context from a larger document. You must: 1) Keep the original chunk intact; 2) Add critical context from surrounding text; 3) Never summarize or rephrase the original chunk; 4) Create contextually rich output for improved semantic retrieval.",
298
+ CODE: "You are a precision code augmentation tool. Your task is to expand a given code chunk with necessary context from the larger codebase. You must: 1) Keep the original code chunk intact with exact syntax and indentation; 2) Add relevant imports, function signatures, or class definitions; 3) Include critical surrounding code context; 4) Create contextually rich output that maintains correct syntax.",
299
+ PDF: "You are a precision document augmentation tool. Your task is to expand a given PDF text chunk with its direct context from the larger document. You must: 1) Keep the original chunk intact; 2) Add section headings, references, or figure captions; 3) Include text that immediately precedes and follows the chunk; 4) Create contextually rich output that maintains the document's original structure.",
300
+ MATH_PDF: "You are a precision mathematical content augmentation tool. Your task is to expand a given mathematical text chunk with essential context. You must: 1) Keep original mathematical notations and expressions exactly as they appear; 2) Add relevant definitions, theorems, or equations from elsewhere in the document; 3) Preserve all LaTeX or mathematical formatting; 4) Create contextually rich output for improved mathematical comprehension.",
301
+ TECHNICAL: "You are a precision technical documentation augmentation tool. Your task is to expand a technical document chunk with critical context. You must: 1) Keep the original chunk intact including all technical terminology; 2) Add relevant configuration examples, parameter definitions, or API references; 3) Include any prerequisite information; 4) Create contextually rich output that maintains technical accuracy."
302
+ };
303
+ var CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE = `
304
+ <document>
305
+ {doc_content}
306
+ </document>
307
+
308
+ Here is the chunk we want to situate within the whole document:
309
+ <chunk>
310
+ {chunk_content}
311
+ </chunk>
312
+
313
+ Create an enriched version of this chunk by adding critical surrounding context. Follow these guidelines:
314
+
315
+ 1. Identify the document's main topic and key information relevant to understanding this chunk
316
+ 2. Include 2-3 sentences before the chunk that provide essential context
317
+ 3. Include 2-3 sentences after the chunk that complete thoughts or provide resolution
318
+ 4. For technical documents, include any definitions or explanations of terms used in the chunk
319
+ 5. For narrative content, include character or setting information needed to understand the chunk
320
+ 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
321
+ 7. Do not use phrases like "this chunk discusses" - directly present the context
322
+ 8. The total length should be between {min_tokens} and {max_tokens} tokens
323
+ 9. Format the response as a single coherent paragraph
324
+
325
+ Provide ONLY the enriched chunk text in your response:`;
326
+ var CACHED_CHUNK_PROMPT_TEMPLATE = `
327
+ Here is the chunk we want to situate within the whole document:
328
+ <chunk>
329
+ {chunk_content}
330
+ </chunk>
331
+
332
+ Create an enriched version of this chunk by adding critical surrounding context. Follow these guidelines:
333
+
334
+ 1. Identify the document's main topic and key information relevant to understanding this chunk
335
+ 2. Include 2-3 sentences before the chunk that provide essential context
336
+ 3. Include 2-3 sentences after the chunk that complete thoughts or provide resolution
337
+ 4. For technical documents, include any definitions or explanations of terms used in the chunk
338
+ 5. For narrative content, include character or setting information needed to understand the chunk
339
+ 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
340
+ 7. Do not use phrases like "this chunk discusses" - directly present the context
341
+ 8. The total length should be between {min_tokens} and {max_tokens} tokens
342
+ 9. Format the response as a single coherent paragraph
343
+
344
+ Provide ONLY the enriched chunk text in your response:`;
345
+ var CACHED_CODE_CHUNK_PROMPT_TEMPLATE = `
346
+ Here is the chunk of code we want to situate within the whole document:
347
+ <chunk>
348
+ {chunk_content}
349
+ </chunk>
350
+
351
+ Create an enriched version of this code chunk by adding critical surrounding context. Follow these guidelines:
352
+
353
+ 1. Preserve ALL code syntax, indentation, and comments exactly as they appear
354
+ 2. Include any import statements, function definitions, or class declarations that this code depends on
355
+ 3. Add necessary type definitions or interfaces that are referenced in this chunk
356
+ 4. Include any crucial comments from elsewhere in the document that explain this code
357
+ 5. If there are key variable declarations or initializations earlier in the document, include those
358
+ 6. Keep the original chunk COMPLETELY INTACT and UNCHANGED in your response
359
+ 7. The total length should be between {min_tokens} and {max_tokens} tokens
360
+ 8. Do NOT include implementation details for functions that are only called but not defined in this chunk
361
+
362
+ Provide ONLY the enriched code chunk in your response:`;
363
+ var CACHED_MATH_PDF_PROMPT_TEMPLATE = `
364
+ Here is the chunk we want to situate within the whole document:
365
+ <chunk>
366
+ {chunk_content}
367
+ </chunk>
368
+
369
+ Create an enriched version of this chunk by adding critical surrounding context. This document contains mathematical content that requires special handling. Follow these guidelines:
370
+
371
+ 1. Preserve ALL mathematical notation exactly as it appears in the chunk
372
+ 2. Include any defining equations, variables, or parameters mentioned earlier in the document that relate to this chunk
373
+ 3. Add section/subsection names or figure references if they help situate the chunk
374
+ 4. If variables or symbols are defined elsewhere in the document, include these definitions
375
+ 5. If mathematical expressions appear corrupted, try to infer their meaning from context
376
+ 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
377
+ 7. The total length should be between {min_tokens} and {max_tokens} tokens
378
+ 8. Format the response as a coherent mathematical explanation
379
+
380
+ Provide ONLY the enriched chunk text in your response:`;
381
+ var CACHED_TECHNICAL_PROMPT_TEMPLATE = `
382
+ Here is the chunk we want to situate within the whole document:
383
+ <chunk>
384
+ {chunk_content}
385
+ </chunk>
386
+
387
+ Create an enriched version of this chunk by adding critical surrounding context. This appears to be technical documentation that requires special handling. Follow these guidelines:
388
+
389
+ 1. Preserve ALL technical terminology, product names, and version numbers exactly as they appear
390
+ 2. Include any prerequisite information or requirements mentioned earlier in the document
391
+ 3. Add section/subsection headings or navigation path to situate this chunk within the document structure
392
+ 4. Include any definitions of technical terms, acronyms, or jargon used in this chunk
393
+ 5. If this chunk references specific configurations, include relevant parameter explanations
394
+ 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
395
+ 7. The total length should be between {min_tokens} and {max_tokens} tokens
396
+ 8. Format the response maintaining any hierarchical structure present in the original
397
+
398
+ Provide ONLY the enriched chunk text in your response:`;
399
+ var MATH_PDF_PROMPT_TEMPLATE = `
400
+ <document>
401
+ {doc_content}
402
+ </document>
403
+
404
+ Here is the chunk we want to situate within the whole document:
405
+ <chunk>
406
+ {chunk_content}
407
+ </chunk>
408
+
409
+ Create an enriched version of this chunk by adding critical surrounding context. This document contains mathematical content that requires special handling. Follow these guidelines:
410
+
411
+ 1. Preserve ALL mathematical notation exactly as it appears in the chunk
412
+ 2. Include any defining equations, variables, or parameters mentioned earlier in the document that relate to this chunk
413
+ 3. Add section/subsection names or figure references if they help situate the chunk
414
+ 4. If variables or symbols are defined elsewhere in the document, include these definitions
415
+ 5. If mathematical expressions appear corrupted, try to infer their meaning from context
416
+ 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
417
+ 7. The total length should be between {min_tokens} and {max_tokens} tokens
418
+ 8. Format the response as a coherent mathematical explanation
419
+
420
+ Provide ONLY the enriched chunk text in your response:`;
421
+ var CODE_PROMPT_TEMPLATE = `
422
+ <document>
423
+ {doc_content}
424
+ </document>
425
+
426
+ Here is the chunk of code we want to situate within the whole document:
427
+ <chunk>
428
+ {chunk_content}
429
+ </chunk>
430
+
431
+ Create an enriched version of this code chunk by adding critical surrounding context. Follow these guidelines:
432
+
433
+ 1. Preserve ALL code syntax, indentation, and comments exactly as they appear
434
+ 2. Include any import statements, function definitions, or class declarations that this code depends on
435
+ 3. Add necessary type definitions or interfaces that are referenced in this chunk
436
+ 4. Include any crucial comments from elsewhere in the document that explain this code
437
+ 5. If there are key variable declarations or initializations earlier in the document, include those
438
+ 6. Keep the original chunk COMPLETELY INTACT and UNCHANGED in your response
439
+ 7. The total length should be between {min_tokens} and {max_tokens} tokens
440
+ 8. Do NOT include implementation details for functions that are only called but not defined in this chunk
441
+
442
+ Provide ONLY the enriched code chunk in your response:`;
443
+ var TECHNICAL_PROMPT_TEMPLATE = `
444
+ <document>
445
+ {doc_content}
446
+ </document>
447
+
448
+ Here is the chunk we want to situate within the whole document:
449
+ <chunk>
450
+ {chunk_content}
451
+ </chunk>
452
+
453
+ Create an enriched version of this chunk by adding critical surrounding context. This appears to be technical documentation that requires special handling. Follow these guidelines:
454
+
455
+ 1. Preserve ALL technical terminology, product names, and version numbers exactly as they appear
456
+ 2. Include any prerequisite information or requirements mentioned earlier in the document
457
+ 3. Add section/subsection headings or navigation path to situate this chunk within the document structure
458
+ 4. Include any definitions of technical terms, acronyms, or jargon used in this chunk
459
+ 5. If this chunk references specific configurations, include relevant parameter explanations
460
+ 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response
461
+ 7. The total length should be between {min_tokens} and {max_tokens} tokens
462
+ 8. Format the response maintaining any hierarchical structure present in the original
463
+
464
+ Provide ONLY the enriched chunk text in your response:`;
465
+ function getContextualizationPrompt(docContent, chunkContent, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS, promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE) {
466
+ if (!docContent || !chunkContent) {
467
+ console.warn(
468
+ "Document content or chunk content is missing for contextualization."
469
+ );
470
+ return "Error: Document or chunk content missing.";
471
+ }
472
+ const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN);
473
+ if (chunkTokens > maxTokens * 0.7) {
474
+ maxTokens = Math.ceil(chunkTokens * 1.3);
475
+ minTokens = chunkTokens;
476
+ }
477
+ return promptTemplate.replace("{doc_content}", docContent).replace("{chunk_content}", chunkContent).replace("{min_tokens}", minTokens.toString()).replace("{max_tokens}", maxTokens.toString());
478
+ }
479
+ function getCachingContextualizationPrompt(chunkContent, contentType, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS) {
480
+ if (!chunkContent) {
481
+ console.warn("Chunk content is missing for contextualization.");
482
+ return {
483
+ prompt: "Error: Chunk content missing.",
484
+ systemPrompt: SYSTEM_PROMPTS.DEFAULT
485
+ };
486
+ }
487
+ const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN);
488
+ if (chunkTokens > maxTokens * 0.7) {
489
+ maxTokens = Math.ceil(chunkTokens * 1.3);
490
+ minTokens = chunkTokens;
491
+ }
492
+ let promptTemplate = CACHED_CHUNK_PROMPT_TEMPLATE;
493
+ let systemPrompt = SYSTEM_PROMPTS.DEFAULT;
494
+ if (contentType) {
495
+ if (contentType.includes("javascript") || contentType.includes("typescript") || contentType.includes("python") || contentType.includes("java") || contentType.includes("c++") || contentType.includes("code")) {
496
+ promptTemplate = CACHED_CODE_CHUNK_PROMPT_TEMPLATE;
497
+ systemPrompt = SYSTEM_PROMPTS.CODE;
498
+ } else if (contentType.includes("pdf")) {
499
+ if (containsMathematicalContent(chunkContent)) {
500
+ promptTemplate = CACHED_MATH_PDF_PROMPT_TEMPLATE;
501
+ systemPrompt = SYSTEM_PROMPTS.MATH_PDF;
502
+ } else {
503
+ systemPrompt = SYSTEM_PROMPTS.PDF;
504
+ }
505
+ } else if (contentType.includes("markdown") || contentType.includes("text/html") || isTechnicalDocumentation(chunkContent)) {
506
+ promptTemplate = CACHED_TECHNICAL_PROMPT_TEMPLATE;
507
+ systemPrompt = SYSTEM_PROMPTS.TECHNICAL;
508
+ }
509
+ }
510
+ const formattedPrompt = promptTemplate.replace("{chunk_content}", chunkContent).replace("{min_tokens}", minTokens.toString()).replace("{max_tokens}", maxTokens.toString());
511
+ return {
512
+ prompt: formattedPrompt,
513
+ systemPrompt
514
+ };
515
+ }
516
+ function getPromptForMimeType(mimeType, docContent, chunkContent) {
517
+ let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS;
518
+ let maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS;
519
+ let promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE;
520
+ if (mimeType.includes("pdf")) {
521
+ if (containsMathematicalContent(docContent)) {
522
+ minTokens = CONTEXT_TARGETS.MATH_PDF.MIN_TOKENS;
523
+ maxTokens = CONTEXT_TARGETS.MATH_PDF.MAX_TOKENS;
524
+ promptTemplate = MATH_PDF_PROMPT_TEMPLATE;
525
+ console.debug("Using mathematical PDF prompt template");
526
+ } else {
527
+ minTokens = CONTEXT_TARGETS.PDF.MIN_TOKENS;
528
+ maxTokens = CONTEXT_TARGETS.PDF.MAX_TOKENS;
529
+ console.debug("Using standard PDF settings");
530
+ }
531
+ } else if (mimeType.includes("javascript") || mimeType.includes("typescript") || mimeType.includes("python") || mimeType.includes("java") || mimeType.includes("c++") || mimeType.includes("code")) {
532
+ minTokens = CONTEXT_TARGETS.CODE.MIN_TOKENS;
533
+ maxTokens = CONTEXT_TARGETS.CODE.MAX_TOKENS;
534
+ promptTemplate = CODE_PROMPT_TEMPLATE;
535
+ console.debug("Using code prompt template");
536
+ } else if (isTechnicalDocumentation(docContent) || mimeType.includes("markdown") || mimeType.includes("text/html")) {
537
+ minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
538
+ maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
539
+ promptTemplate = TECHNICAL_PROMPT_TEMPLATE;
540
+ console.debug("Using technical documentation prompt template");
541
+ }
542
+ return getContextualizationPrompt(
543
+ docContent,
544
+ chunkContent,
545
+ minTokens,
546
+ maxTokens,
547
+ promptTemplate
548
+ );
549
+ }
550
+ function getCachingPromptForMimeType(mimeType, chunkContent) {
551
+ let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS;
552
+ let maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS;
553
+ if (mimeType.includes("pdf")) {
554
+ if (containsMathematicalContent(chunkContent)) {
555
+ minTokens = CONTEXT_TARGETS.MATH_PDF.MIN_TOKENS;
556
+ maxTokens = CONTEXT_TARGETS.MATH_PDF.MAX_TOKENS;
557
+ } else {
558
+ minTokens = CONTEXT_TARGETS.PDF.MIN_TOKENS;
559
+ maxTokens = CONTEXT_TARGETS.PDF.MAX_TOKENS;
560
+ }
561
+ } else if (mimeType.includes("javascript") || mimeType.includes("typescript") || mimeType.includes("python") || mimeType.includes("java") || mimeType.includes("c++") || mimeType.includes("code")) {
562
+ minTokens = CONTEXT_TARGETS.CODE.MIN_TOKENS;
563
+ maxTokens = CONTEXT_TARGETS.CODE.MAX_TOKENS;
564
+ } else if (isTechnicalDocumentation(chunkContent) || mimeType.includes("markdown") || mimeType.includes("text/html")) {
565
+ minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
566
+ maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
567
+ }
568
+ return getCachingContextualizationPrompt(
569
+ chunkContent,
570
+ mimeType,
571
+ minTokens,
572
+ maxTokens
573
+ );
574
+ }
575
+ function containsMathematicalContent(content) {
576
+ const latexMathPatterns = [
577
+ /\$\$.+?\$\$/s,
578
+ // Display math: $$ ... $$
579
+ /\$.+?\$/g,
580
+ // Inline math: $ ... $
581
+ /\\begin\{equation\}/,
582
+ // LaTeX equation environment
583
+ /\\begin\{align\}/,
584
+ // LaTeX align environment
585
+ /\\sum_/,
586
+ // Summation
587
+ /\\int/,
588
+ // Integral
589
+ /\\frac\{/,
590
+ // Fraction
591
+ /\\sqrt\{/,
592
+ // Square root
593
+ /\\alpha|\\beta|\\gamma|\\delta|\\theta|\\lambda|\\sigma/,
594
+ // Greek letters
595
+ /\\nabla|\\partial/
596
+ // Differential operators
597
+ ];
598
+ const generalMathPatterns = [
599
+ /[≠≤≥±∞∫∂∑∏√∈∉⊆⊇⊂⊃∪∩]/,
600
+ // Mathematical symbols
601
+ /\b[a-zA-Z]\^[0-9]/,
602
+ // Simple exponents (e.g., x^2)
603
+ /\(\s*-?\d+(\.\d+)?\s*,\s*-?\d+(\.\d+)?\s*\)/,
604
+ // Coordinates
605
+ /\b[xyz]\s*=\s*-?\d+(\.\d+)?/,
606
+ // Simple equations
607
+ /\[\s*-?\d+(\.\d+)?\s*,\s*-?\d+(\.\d+)?\s*\]/,
608
+ // Vectors/matrices
609
+ /\b\d+\s*×\s*\d+/
610
+ // Dimensions with × symbol
611
+ ];
612
+ for (const pattern of latexMathPatterns) {
613
+ if (pattern.test(content)) {
614
+ return true;
615
+ }
616
+ }
617
+ for (const pattern of generalMathPatterns) {
618
+ if (pattern.test(content)) {
619
+ return true;
620
+ }
621
+ }
622
+ const mathKeywords = [
623
+ "theorem",
624
+ "lemma",
625
+ "proof",
626
+ "equation",
627
+ "function",
628
+ "derivative",
629
+ "integral",
630
+ "matrix",
631
+ "vector",
632
+ "algorithm",
633
+ "constraint",
634
+ "coefficient"
635
+ ];
636
+ const contentLower = content.toLowerCase();
637
+ const mathKeywordCount = mathKeywords.filter(
638
+ (keyword) => contentLower.includes(keyword)
639
+ ).length;
640
+ return mathKeywordCount >= 2;
641
+ }
642
+ function isTechnicalDocumentation(content) {
643
+ const technicalPatterns = [
644
+ /\b(version|v)\s*\d+\.\d+(\.\d+)?/i,
645
+ // Version numbers
646
+ /\b(api|sdk|cli)\b/i,
647
+ // Technical acronyms
648
+ /\b(http|https|ftp):\/\//i,
649
+ // URLs
650
+ /\b(GET|POST|PUT|DELETE)\b/,
651
+ // HTTP methods
652
+ /<\/?[a-z][\s\S]*>/i,
653
+ // HTML/XML tags
654
+ /\bREADME\b|\bCHANGELOG\b/i,
655
+ // Common doc file names
656
+ /\b(config|configuration)\b/i,
657
+ // Configuration references
658
+ /\b(parameter|param|argument|arg)\b/i
659
+ // Parameter references
660
+ ];
661
+ const docHeadings = [
662
+ /\b(Introduction|Overview|Getting Started|Installation|Usage|API Reference|Troubleshooting)\b/i
663
+ ];
664
+ for (const pattern of [...technicalPatterns, ...docHeadings]) {
665
+ if (pattern.test(content)) {
666
+ return true;
667
+ }
668
+ }
669
+ const listPatterns = [
670
+ /\d+\.\s.+\n\d+\.\s.+/,
671
+ // Numbered lists
672
+ /•\s.+\n•\s.+/,
673
+ // Bullet points with •
674
+ /\*\s.+\n\*\s.+/,
675
+ // Bullet points with *
676
+ /-\s.+\n-\s.+/
677
+ // Bullet points with -
678
+ ];
679
+ for (const pattern of listPatterns) {
680
+ if (pattern.test(content)) {
681
+ return true;
682
+ }
683
+ }
684
+ return false;
685
+ }
686
+ function getChunkWithContext(chunkContent, generatedContext) {
687
+ if (!generatedContext || generatedContext.trim() === "") {
688
+ console.warn(
689
+ "Generated context is empty. Falling back to original chunk content."
690
+ );
691
+ return chunkContent;
692
+ }
693
+ if (!generatedContext.includes(chunkContent)) {
694
+ console.warn(
695
+ "Generated context does not contain the original chunk. Appending original to ensure data integrity."
696
+ );
697
+ return `${generatedContext.trim()}
698
+
699
+ ${chunkContent}`;
700
+ }
701
+ return generatedContext.trim();
702
+ }
703
+
704
+ // src/utils.ts
705
+ import * as mammoth from "mammoth";
706
+ import { logger as logger2 } from "@elizaos/core";
707
+ import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
708
+ var PLAIN_TEXT_CONTENT_TYPES = [
709
+ "application/typescript",
710
+ "text/typescript",
711
+ "text/x-python",
712
+ "application/x-python-code",
713
+ "application/yaml",
714
+ "text/yaml",
715
+ "application/x-yaml",
716
+ "application/json",
717
+ "text/markdown",
718
+ "text/csv"
719
+ ];
720
+ var MAX_FALLBACK_SIZE_BYTES = 5 * 1024 * 1024;
721
+ var BINARY_CHECK_BYTES = 1024;
722
+ async function extractTextFromFileBuffer(fileBuffer, contentType, originalFilename) {
723
+ const lowerContentType = contentType.toLowerCase();
724
+ logger2.debug(
725
+ `[TextUtil] Attempting to extract text from ${originalFilename} (type: ${contentType})`
726
+ );
727
+ if (lowerContentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
728
+ logger2.debug(
729
+ `[TextUtil] Extracting text from DOCX ${originalFilename} via mammoth.`
730
+ );
731
+ try {
732
+ const result = await mammoth.extractRawText({ buffer: fileBuffer });
733
+ logger2.debug(
734
+ `[TextUtil] DOCX text extraction complete for ${originalFilename}. Text length: ${result.value.length}`
735
+ );
736
+ return result.value;
737
+ } catch (docxError) {
738
+ const errorMsg = `[TextUtil] Failed to parse DOCX file ${originalFilename}: ${docxError.message}`;
739
+ logger2.error(errorMsg, docxError.stack);
740
+ throw new Error(errorMsg);
741
+ }
742
+ } else if (lowerContentType === "application/msword" || originalFilename.toLowerCase().endsWith(".doc")) {
743
+ logger2.debug(
744
+ `[TextUtil] Handling Microsoft Word .doc file: ${originalFilename}`
745
+ );
746
+ return `[Microsoft Word Document: ${originalFilename}]
747
+
748
+ This document was indexed for search but cannot be displayed directly in the browser. The original document content is preserved for retrieval purposes.`;
749
+ } else if (lowerContentType.startsWith("text/") || PLAIN_TEXT_CONTENT_TYPES.includes(lowerContentType)) {
750
+ logger2.debug(
751
+ `[TextUtil] Extracting text from plain text compatible file ${originalFilename} (type: ${contentType})`
752
+ );
753
+ return fileBuffer.toString("utf-8");
754
+ } else {
755
+ logger2.warn(
756
+ `[TextUtil] Unsupported content type: "${contentType}" for ${originalFilename}. Attempting fallback to plain text.`
757
+ );
758
+ if (fileBuffer.length > MAX_FALLBACK_SIZE_BYTES) {
759
+ const sizeErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) exceeds maximum size for fallback (${MAX_FALLBACK_SIZE_BYTES} bytes). Cannot process as plain text.`;
760
+ logger2.error(sizeErrorMsg);
761
+ throw new Error(sizeErrorMsg);
762
+ }
763
+ const initialBytes = fileBuffer.subarray(
764
+ 0,
765
+ Math.min(fileBuffer.length, BINARY_CHECK_BYTES)
766
+ );
767
+ if (initialBytes.includes(0)) {
768
+ const binaryHeuristicMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) appears to be binary based on initial byte check. Cannot process as plain text.`;
769
+ logger2.error(binaryHeuristicMsg);
770
+ throw new Error(binaryHeuristicMsg);
771
+ }
772
+ try {
773
+ const textContent = fileBuffer.toString("utf-8");
774
+ if (textContent.includes("\uFFFD")) {
775
+ const binaryErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) seems to be binary or has encoding issues after fallback to plain text (detected \uFFFD).`;
776
+ logger2.error(binaryErrorMsg);
777
+ throw new Error(binaryErrorMsg);
778
+ }
779
+ logger2.debug(
780
+ `[TextUtil] Successfully processed unknown type ${contentType} as plain text after fallback for ${originalFilename}.`
781
+ );
782
+ return textContent;
783
+ } catch (fallbackError) {
784
+ const finalErrorMsg = `[TextUtil] Unsupported content type: ${contentType} for ${originalFilename}. Fallback to plain text also failed or indicated binary content.`;
785
+ logger2.error(
786
+ finalErrorMsg,
787
+ fallbackError.message ? fallbackError.stack : void 0
788
+ );
789
+ throw new Error(finalErrorMsg);
790
+ }
791
+ }
792
+ }
793
+ async function convertPdfToTextFromBuffer(pdfBuffer, filename) {
794
+ const docName = filename || "unnamed-document";
795
+ logger2.debug(`[PdfService] Starting conversion for ${docName}`);
796
+ try {
797
+ const uint8Array = new Uint8Array(pdfBuffer);
798
+ const pdf = await getDocument({ data: uint8Array }).promise;
799
+ const numPages = pdf.numPages;
800
+ const textPages = [];
801
+ for (let pageNum = 1; pageNum <= numPages; pageNum++) {
802
+ logger2.debug(`[PdfService] Processing page ${pageNum}/${numPages}`);
803
+ const page = await pdf.getPage(pageNum);
804
+ const textContent = await page.getTextContent();
805
+ const lineMap = /* @__PURE__ */ new Map();
806
+ textContent.items.filter(isTextItem).forEach((item) => {
807
+ const yPos = Math.round(item.transform[5]);
808
+ if (!lineMap.has(yPos)) {
809
+ lineMap.set(yPos, []);
810
+ }
811
+ lineMap.get(yPos).push(item);
812
+ });
813
+ const sortedLines = Array.from(lineMap.entries()).sort((a, b) => b[0] - a[0]).map(
814
+ ([_, items]) => items.sort((a, b) => a.transform[4] - b.transform[4]).map((item) => item.str).join(" ")
815
+ );
816
+ textPages.push(sortedLines.join("\n"));
817
+ }
818
+ const fullText = textPages.join("\n\n").replace(/\s+/g, " ").trim();
819
+ logger2.debug(
820
+ `[PdfService] Conversion complete for ${docName}, length: ${fullText.length}`
821
+ );
822
+ return fullText;
823
+ } catch (error) {
824
+ logger2.error(
825
+ `[PdfService] Error converting PDF ${docName}:`,
826
+ error.message
827
+ );
828
+ throw new Error(`Failed to convert PDF to text: ${error.message}`);
829
+ }
830
+ }
831
+ function isTextItem(item) {
832
+ return "str" in item;
833
+ }
834
+
835
+ // src/document-processor.ts
836
+ var ctxKnowledgeEnabled = process.env.CTX_KNOWLEDGE_ENABLED === "true" || process.env.CTX_KNOWLEDGE_ENABLED === "True";
837
+ if (ctxKnowledgeEnabled) {
838
+ logger3.info(`Document processor starting with Contextual Knowledge ENABLED`);
839
+ } else {
840
+ logger3.info(`Document processor starting with Contextual Knowledge DISABLED`);
841
+ }
842
+ async function processFragmentsSynchronously({
843
+ runtime,
844
+ documentId,
845
+ fullDocumentText,
846
+ agentId,
847
+ contentType,
848
+ roomId,
849
+ entityId,
850
+ worldId
851
+ }) {
852
+ if (!fullDocumentText || fullDocumentText.trim() === "") {
853
+ logger3.warn(
854
+ `No text content available to chunk for document ${documentId}.`
855
+ );
856
+ return 0;
857
+ }
858
+ const chunks = await splitDocumentIntoChunks(fullDocumentText);
859
+ if (chunks.length === 0) {
860
+ logger3.warn(
861
+ `No chunks generated from text for ${documentId}. No fragments to save.`
862
+ );
863
+ return 0;
864
+ }
865
+ logger3.info(
866
+ `Split content into ${chunks.length} chunks for document ${documentId}`
867
+ );
868
+ const providerLimits = await getProviderRateLimits();
869
+ const CONCURRENCY_LIMIT = Math.min(
870
+ 30,
871
+ providerLimits.maxConcurrentRequests || 30
872
+ );
873
+ const rateLimiter = createRateLimiter(providerLimits.requestsPerMinute || 60);
874
+ const { savedCount, failedCount } = await processAndSaveFragments({
875
+ runtime,
876
+ documentId,
877
+ chunks,
878
+ fullDocumentText,
879
+ contentType,
880
+ agentId,
881
+ roomId: roomId || agentId,
882
+ entityId: entityId || agentId,
883
+ worldId: worldId || agentId,
884
+ concurrencyLimit: CONCURRENCY_LIMIT,
885
+ rateLimiter
886
+ });
887
+ if (failedCount > 0) {
888
+ logger3.warn(
889
+ `Failed to process ${failedCount} chunks out of ${chunks.length} for document ${documentId}`
890
+ );
891
+ }
892
+ logger3.info(
893
+ `Finished saving ${savedCount} fragments for document ${documentId}.`
894
+ );
895
+ return savedCount;
896
+ }
897
+ async function extractTextFromDocument(fileBuffer, contentType, originalFilename) {
898
+ if (!fileBuffer || fileBuffer.length === 0) {
899
+ throw new Error(
900
+ `Empty file buffer provided for ${originalFilename}. Cannot extract text.`
901
+ );
902
+ }
903
+ try {
904
+ if (contentType === "application/pdf") {
905
+ logger3.debug(`Extracting text from PDF: ${originalFilename}`);
906
+ return await convertPdfToTextFromBuffer(fileBuffer, originalFilename);
907
+ } else {
908
+ logger3.debug(
909
+ `Extracting text from non-PDF: ${originalFilename} (Type: ${contentType})`
910
+ );
911
+ if (contentType.includes("text/") || contentType.includes("application/json") || contentType.includes("application/xml")) {
912
+ try {
913
+ return fileBuffer.toString("utf8");
914
+ } catch (textError) {
915
+ logger3.warn(
916
+ `Failed to decode ${originalFilename} as UTF-8, falling back to binary extraction`
917
+ );
918
+ }
919
+ }
920
+ return await extractTextFromFileBuffer(
921
+ fileBuffer,
922
+ contentType,
923
+ originalFilename
924
+ );
925
+ }
926
+ } catch (error) {
927
+ logger3.error(
928
+ `Error extracting text from ${originalFilename}: ${error.message}`
929
+ );
930
+ throw new Error(
931
+ `Failed to extract text from ${originalFilename}: ${error.message}`
932
+ );
933
+ }
934
+ }
935
+ function createDocumentMemory({
936
+ text,
937
+ agentId,
938
+ clientDocumentId,
939
+ originalFilename,
940
+ contentType,
941
+ worldId,
942
+ fileSize,
943
+ documentId
944
+ }) {
945
+ const fileExt = originalFilename.split(".").pop()?.toLowerCase() || "";
946
+ const title = originalFilename.replace(`.${fileExt}`, "");
947
+ const docId = documentId || v4_default();
948
+ return {
949
+ id: docId,
950
+ agentId,
951
+ roomId: agentId,
952
+ worldId,
953
+ entityId: agentId,
954
+ content: { text },
955
+ metadata: {
956
+ type: MemoryType.DOCUMENT,
957
+ documentId: clientDocumentId,
958
+ originalFilename,
959
+ contentType,
960
+ title,
961
+ fileExt,
962
+ fileSize,
963
+ source: "rag-service-main-upload",
964
+ timestamp: Date.now()
965
+ }
966
+ };
967
+ }
968
+ async function splitDocumentIntoChunks(documentText) {
969
+ const tokenChunkSize = DEFAULT_CHUNK_TOKEN_SIZE;
970
+ const tokenChunkOverlap = DEFAULT_CHUNK_OVERLAP_TOKENS;
971
+ const targetCharChunkSize = Math.round(
972
+ tokenChunkSize * DEFAULT_CHARS_PER_TOKEN
973
+ );
974
+ const targetCharChunkOverlap = Math.round(
975
+ tokenChunkOverlap * DEFAULT_CHARS_PER_TOKEN
976
+ );
977
+ logger3.debug(
978
+ `Using core splitChunks with settings: tokenChunkSize=${tokenChunkSize}, tokenChunkOverlap=${tokenChunkOverlap}, charChunkSize=${targetCharChunkSize}, charChunkOverlap=${targetCharChunkOverlap}`
979
+ );
980
+ return await splitChunks(documentText, tokenChunkSize, tokenChunkOverlap);
981
+ }
982
+ async function processAndSaveFragments({
983
+ runtime,
984
+ documentId,
985
+ chunks,
986
+ fullDocumentText,
987
+ contentType,
988
+ agentId,
989
+ roomId,
990
+ entityId,
991
+ worldId,
992
+ concurrencyLimit,
993
+ rateLimiter
994
+ }) {
995
+ let savedCount = 0;
996
+ let failedCount = 0;
997
+ const failedChunks = [];
998
+ for (let i = 0; i < chunks.length; i += concurrencyLimit) {
999
+ const batchChunks = chunks.slice(i, i + concurrencyLimit);
1000
+ const batchOriginalIndices = Array.from(
1001
+ { length: batchChunks.length },
1002
+ (_, k) => i + k
1003
+ );
1004
+ logger3.debug(
1005
+ `Processing batch of ${batchChunks.length} chunks for document ${documentId}. Starting original index: ${batchOriginalIndices[0]}, batch ${Math.floor(i / concurrencyLimit) + 1}/${Math.ceil(chunks.length / concurrencyLimit)}`
1006
+ );
1007
+ const contextualizedChunks = await getContextualizedChunks(
1008
+ runtime,
1009
+ fullDocumentText,
1010
+ batchChunks,
1011
+ contentType,
1012
+ batchOriginalIndices
1013
+ );
1014
+ const embeddingResults = await generateEmbeddingsForChunks(
1015
+ runtime,
1016
+ contextualizedChunks,
1017
+ rateLimiter
1018
+ );
1019
+ for (const result of embeddingResults) {
1020
+ const originalChunkIndex = result.index;
1021
+ if (!result.success) {
1022
+ failedCount++;
1023
+ failedChunks.push(originalChunkIndex);
1024
+ logger3.warn(
1025
+ `Failed to process chunk ${originalChunkIndex} for document ${documentId}`
1026
+ );
1027
+ continue;
1028
+ }
1029
+ const contextualizedChunkText = result.text;
1030
+ const embedding = result.embedding;
1031
+ if (!embedding || embedding.length === 0) {
1032
+ logger3.warn(
1033
+ `Zero vector detected for chunk ${originalChunkIndex} (document ${documentId}). Embedding: ${JSON.stringify(result.embedding)}`
1034
+ );
1035
+ failedCount++;
1036
+ failedChunks.push(originalChunkIndex);
1037
+ continue;
1038
+ }
1039
+ try {
1040
+ const fragmentMemory = {
1041
+ id: v4_default(),
1042
+ agentId,
1043
+ roomId: roomId || agentId,
1044
+ worldId: worldId || agentId,
1045
+ entityId: entityId || agentId,
1046
+ embedding,
1047
+ content: { text: contextualizedChunkText },
1048
+ metadata: {
1049
+ type: MemoryType.FRAGMENT,
1050
+ documentId,
1051
+ position: originalChunkIndex,
1052
+ timestamp: Date.now(),
1053
+ source: "rag-service-fragment-sync"
1054
+ }
1055
+ };
1056
+ await runtime.createMemory(fragmentMemory, "knowledge");
1057
+ logger3.debug(
1058
+ `Saved fragment ${originalChunkIndex + 1} for document ${documentId} (Fragment ID: ${fragmentMemory.id})`
1059
+ );
1060
+ savedCount++;
1061
+ } catch (saveError) {
1062
+ logger3.error(
1063
+ `Error saving chunk ${originalChunkIndex} to database: ${saveError.message}`,
1064
+ saveError.stack
1065
+ );
1066
+ failedCount++;
1067
+ failedChunks.push(originalChunkIndex);
1068
+ }
1069
+ }
1070
+ if (i + concurrencyLimit < chunks.length) {
1071
+ await new Promise((resolve) => setTimeout(resolve, 500));
1072
+ }
1073
+ }
1074
+ return { savedCount, failedCount, failedChunks };
1075
+ }
1076
+ async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLimiter) {
1077
+ return await Promise.all(
1078
+ contextualizedChunks.map(async (contextualizedChunk) => {
1079
+ await rateLimiter();
1080
+ try {
1081
+ const generateEmbeddingOperation = async () => {
1082
+ return await generateEmbeddingWithValidation(
1083
+ runtime,
1084
+ contextualizedChunk.contextualizedText
1085
+ );
1086
+ };
1087
+ const { embedding, success, error } = await withRateLimitRetry(
1088
+ generateEmbeddingOperation,
1089
+ `embedding generation for chunk ${contextualizedChunk.index}`
1090
+ );
1091
+ if (!success) {
1092
+ return {
1093
+ success: false,
1094
+ index: contextualizedChunk.index,
1095
+ error,
1096
+ text: contextualizedChunk.contextualizedText
1097
+ };
1098
+ }
1099
+ return {
1100
+ embedding,
1101
+ success: true,
1102
+ index: contextualizedChunk.index,
1103
+ text: contextualizedChunk.contextualizedText
1104
+ };
1105
+ } catch (error) {
1106
+ logger3.error(
1107
+ `Error generating embedding for chunk ${contextualizedChunk.index}: ${error.message}`
1108
+ );
1109
+ return {
1110
+ success: false,
1111
+ index: contextualizedChunk.index,
1112
+ error,
1113
+ text: contextualizedChunk.contextualizedText
1114
+ };
1115
+ }
1116
+ })
1117
+ );
1118
+ }
1119
+ async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices) {
1120
+ if (ctxKnowledgeEnabled && fullDocumentText) {
1121
+ logger3.debug(`Generating contexts for ${chunks.length} chunks`);
1122
+ return await generateContextsInBatch(
1123
+ runtime,
1124
+ fullDocumentText,
1125
+ chunks,
1126
+ contentType,
1127
+ batchOriginalIndices
1128
+ );
1129
+ } else {
1130
+ return chunks.map((chunkText, idx) => ({
1131
+ contextualizedText: chunkText,
1132
+ index: batchOriginalIndices[idx],
1133
+ success: true
1134
+ }));
1135
+ }
1136
+ }
1137
+ async function generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchIndices) {
1138
+ if (!chunks || chunks.length === 0) {
1139
+ return [];
1140
+ }
1141
+ const providerLimits = await getProviderRateLimits();
1142
+ const rateLimiter = createRateLimiter(providerLimits.requestsPerMinute || 60);
1143
+ const config = validateModelConfig();
1144
+ const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
1145
+ const isUsingCacheCapableModel = isUsingOpenRouter && (config.TEXT_MODEL?.toLowerCase().includes("claude") || config.TEXT_MODEL?.toLowerCase().includes("gemini"));
1146
+ logger3.info(
1147
+ `Using provider: ${config.TEXT_PROVIDER}, model: ${config.TEXT_MODEL}, caching capability: ${isUsingCacheCapableModel}`
1148
+ );
1149
+ const promptConfigs = prepareContextPrompts(
1150
+ chunks,
1151
+ fullDocumentText,
1152
+ contentType,
1153
+ batchIndices,
1154
+ isUsingCacheCapableModel
1155
+ );
1156
+ const contextualizedChunks = await Promise.all(
1157
+ promptConfigs.map(async (item) => {
1158
+ if (!item.valid) {
1159
+ return {
1160
+ contextualizedText: item.chunkText,
1161
+ success: false,
1162
+ index: item.originalIndex
1163
+ };
1164
+ }
1165
+ await rateLimiter();
1166
+ try {
1167
+ let llmResponse;
1168
+ const generateTextOperation = async () => {
1169
+ if (item.usesCaching) {
1170
+ return await runtime.useModel(ModelType.TEXT_LARGE, {
1171
+ prompt: item.promptText,
1172
+ system: item.systemPrompt
1173
+ // cacheDocument: item.fullDocumentTextForContext, // Not directly supported by useModel
1174
+ // cacheOptions: { type: 'ephemeral' }, // Not directly supported by useModel
1175
+ });
1176
+ } else {
1177
+ return await runtime.useModel(ModelType.TEXT_LARGE, {
1178
+ prompt: item.prompt
1179
+ });
1180
+ }
1181
+ };
1182
+ llmResponse = await withRateLimitRetry(
1183
+ generateTextOperation,
1184
+ `context generation for chunk ${item.originalIndex}`
1185
+ );
1186
+ const generatedContext = llmResponse.text;
1187
+ const contextualizedText = getChunkWithContext(
1188
+ item.chunkText,
1189
+ generatedContext
1190
+ );
1191
+ logger3.debug(
1192
+ `Context added for chunk ${item.originalIndex}. New length: ${contextualizedText.length}`
1193
+ );
1194
+ return {
1195
+ contextualizedText,
1196
+ success: true,
1197
+ index: item.originalIndex
1198
+ };
1199
+ } catch (error) {
1200
+ logger3.error(
1201
+ `Error generating context for chunk ${item.originalIndex}: ${error.message}`,
1202
+ error.stack
1203
+ );
1204
+ return {
1205
+ contextualizedText: item.chunkText,
1206
+ success: false,
1207
+ index: item.originalIndex
1208
+ };
1209
+ }
1210
+ })
1211
+ );
1212
+ return contextualizedChunks;
1213
+ }
1214
+ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndices, isUsingCacheCapableModel = false) {
1215
+ return chunks.map((chunkText, idx) => {
1216
+ const originalIndex = batchIndices ? batchIndices[idx] : idx;
1217
+ try {
1218
+ if (isUsingCacheCapableModel) {
1219
+ const cachingPromptInfo = contentType ? getCachingPromptForMimeType(contentType, chunkText) : getCachingContextualizationPrompt(chunkText);
1220
+ if (cachingPromptInfo.prompt.startsWith("Error:")) {
1221
+ logger3.warn(
1222
+ `Skipping contextualization for chunk ${originalIndex} due to: ${cachingPromptInfo.prompt}`
1223
+ );
1224
+ return {
1225
+ originalIndex,
1226
+ chunkText,
1227
+ valid: false,
1228
+ usesCaching: false
1229
+ };
1230
+ }
1231
+ return {
1232
+ valid: true,
1233
+ originalIndex,
1234
+ chunkText,
1235
+ usesCaching: true,
1236
+ systemPrompt: cachingPromptInfo.systemPrompt,
1237
+ promptText: cachingPromptInfo.prompt,
1238
+ fullDocumentTextForContext: fullDocumentText
1239
+ };
1240
+ } else {
1241
+ const prompt = contentType ? getPromptForMimeType(contentType, fullDocumentText, chunkText) : getContextualizationPrompt(fullDocumentText, chunkText);
1242
+ if (prompt.startsWith("Error:")) {
1243
+ logger3.warn(
1244
+ `Skipping contextualization for chunk ${originalIndex} due to: ${prompt}`
1245
+ );
1246
+ return {
1247
+ prompt: null,
1248
+ originalIndex,
1249
+ chunkText,
1250
+ valid: false,
1251
+ usesCaching: false
1252
+ };
1253
+ }
1254
+ return {
1255
+ prompt,
1256
+ originalIndex,
1257
+ chunkText,
1258
+ valid: true,
1259
+ usesCaching: false
1260
+ };
1261
+ }
1262
+ } catch (error) {
1263
+ logger3.error(
1264
+ `Error preparing prompt for chunk ${originalIndex}: ${error.message}`,
1265
+ error.stack
1266
+ );
1267
+ return {
1268
+ prompt: null,
1269
+ originalIndex,
1270
+ chunkText,
1271
+ valid: false,
1272
+ usesCaching: false
1273
+ };
1274
+ }
1275
+ });
1276
+ }
1277
+ async function generateEmbeddingWithValidation(runtime, text) {
1278
+ try {
1279
+ const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, {
1280
+ text
1281
+ });
1282
+ const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
1283
+ if (!embedding || embedding.length === 0) {
1284
+ logger3.warn(
1285
+ `Zero vector detected. Embedding result: ${JSON.stringify(embeddingResult)}`
1286
+ );
1287
+ return {
1288
+ embedding: null,
1289
+ success: false,
1290
+ error: new Error("Zero vector detected")
1291
+ };
1292
+ }
1293
+ return { embedding, success: true };
1294
+ } catch (error) {
1295
+ return { embedding: null, success: false, error };
1296
+ }
1297
+ }
1298
+ async function withRateLimitRetry(operation, errorContext, retryDelay) {
1299
+ try {
1300
+ return await operation();
1301
+ } catch (error) {
1302
+ if (error.status === 429) {
1303
+ const delay = retryDelay || error.headers?.["retry-after"] || 5;
1304
+ logger3.warn(
1305
+ `Rate limit hit for ${errorContext}. Retrying after ${delay}s`
1306
+ );
1307
+ await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
1308
+ try {
1309
+ return await operation();
1310
+ } catch (retryError) {
1311
+ logger3.error(
1312
+ `Failed after retry for ${errorContext}: ${retryError.message}`
1313
+ );
1314
+ throw retryError;
1315
+ }
1316
+ }
1317
+ throw error;
1318
+ }
1319
+ }
1320
+ function createRateLimiter(requestsPerMinute) {
1321
+ const requestTimes = [];
1322
+ const intervalMs = 60 * 1e3;
1323
+ return async function rateLimiter() {
1324
+ const now = Date.now();
1325
+ while (requestTimes.length > 0 && now - requestTimes[0] > intervalMs) {
1326
+ requestTimes.shift();
1327
+ }
1328
+ if (requestTimes.length >= requestsPerMinute) {
1329
+ const oldestRequest = requestTimes[0];
1330
+ const timeToWait = Math.max(0, oldestRequest + intervalMs - now);
1331
+ if (timeToWait > 0) {
1332
+ logger3.debug(
1333
+ `Rate limiting applied, waiting ${timeToWait}ms before next request`
1334
+ );
1335
+ await new Promise((resolve) => setTimeout(resolve, timeToWait));
1336
+ }
1337
+ }
1338
+ requestTimes.push(Date.now());
1339
+ };
1340
+ }
1341
+
1342
+ // src/service.ts
1343
+ var KnowledgeService = class _KnowledgeService extends Service {
1344
+ /**
1345
+ * Create a new Knowledge service
1346
+ * @param runtime Agent runtime
1347
+ */
1348
+ constructor(runtime) {
1349
+ super(runtime);
1350
+ this.runtime = runtime;
1351
+ this.knowledgeProcessingSemaphore = new Semaphore(10);
1352
+ logger4.info(`KnowledgeService initialized for agent: ${runtime.agentId}`);
1353
+ }
1354
+ static serviceType = KnowledgeServiceType.KNOWLEDGE;
1355
+ capabilityDescription = "Provides Retrieval Augmented Generation capabilities, including knowledge upload and querying.";
1356
+ knowledgeProcessingSemaphore;
1357
+ /**
1358
+ * Start the Knowledge service
1359
+ * @param runtime Agent runtime
1360
+ * @returns Initialized Knowledge service
1361
+ */
1362
+ static async start(runtime) {
1363
+ logger4.info(`Starting Knowledge service for agent: ${runtime.agentId}`);
1364
+ const service = new _KnowledgeService(runtime);
1365
+ if (service.runtime.character?.knowledge && service.runtime.character.knowledge.length > 0) {
1366
+ logger4.info(
1367
+ `KnowledgeService: Processing ${service.runtime.character.knowledge.length} character knowledge items.`
1368
+ );
1369
+ const stringKnowledge = service.runtime.character.knowledge.filter(
1370
+ (item) => typeof item === "string"
1371
+ );
1372
+ service.processCharacterKnowledge(stringKnowledge).catch((err) => {
1373
+ logger4.error(
1374
+ `KnowledgeService: Error processing character knowledge during startup: ${err.message}`,
1375
+ err
1376
+ );
1377
+ });
1378
+ } else {
1379
+ logger4.info(
1380
+ `KnowledgeService: No character knowledge to process for agent ${runtime.agentId}.`
1381
+ );
1382
+ }
1383
+ return service;
1384
+ }
1385
+ /**
1386
+ * Stop the Knowledge service
1387
+ * @param runtime Agent runtime
1388
+ */
1389
+ static async stop(runtime) {
1390
+ logger4.info(`Stopping Knowledge service for agent: ${runtime.agentId}`);
1391
+ const service = runtime.getService(_KnowledgeService.serviceType);
1392
+ if (!service) {
1393
+ logger4.warn(
1394
+ `KnowledgeService not found for agent ${runtime.agentId} during stop.`
1395
+ );
1396
+ }
1397
+ }
1398
+ /**
1399
+ * Stop the service
1400
+ */
1401
+ async stop() {
1402
+ logger4.info(
1403
+ `Knowledge service stopping for agent: ${this.runtime.agentId}`
1404
+ );
1405
+ }
1406
+ /**
1407
+ * Add knowledge to the system
1408
+ * @param options Knowledge options
1409
+ * @returns Promise with document processing result
1410
+ */
1411
+ async addKnowledge(options) {
1412
+ const agentId = this.runtime.agentId;
1413
+ logger4.info(
1414
+ `KnowledgeService (agent: ${agentId}) processing document for public addKnowledge: ${options.originalFilename}, type: ${options.contentType}`
1415
+ );
1416
+ try {
1417
+ const existingDocument = await this.runtime.getMemoryById(
1418
+ options.clientDocumentId
1419
+ );
1420
+ if (existingDocument && existingDocument.metadata?.type === MemoryType2.DOCUMENT) {
1421
+ logger4.info(
1422
+ `Document ${options.originalFilename} with ID ${options.clientDocumentId} already exists. Skipping processing.`
1423
+ );
1424
+ const fragments = await this.runtime.getMemories({
1425
+ tableName: "knowledge"
1426
+ // Assuming fragments store original documentId in metadata.documentId
1427
+ // This query might need adjustment based on actual fragment metadata structure.
1428
+ // A more robust way would be to query where metadata.documentId === options.clientDocumentId
1429
+ });
1430
+ const relatedFragments = fragments.filter(
1431
+ (f) => f.metadata?.type === MemoryType2.FRAGMENT && f.metadata.documentId === options.clientDocumentId
1432
+ );
1433
+ return {
1434
+ clientDocumentId: options.clientDocumentId,
1435
+ storedDocumentMemoryId: existingDocument.id,
1436
+ fragmentCount: relatedFragments.length
1437
+ };
1438
+ }
1439
+ } catch (error) {
1440
+ logger4.debug(
1441
+ `Document ${options.clientDocumentId} not found or error checking existence, proceeding with processing: ${error instanceof Error ? error.message : String(error)}`
1442
+ );
1443
+ }
1444
+ return this.processDocument(options);
1445
+ }
1446
+ /**
1447
+ * Process a document regardless of type - Called by public addKnowledge
1448
+ * @param options Document options
1449
+ * @returns Promise with document processing result
1450
+ */
1451
+ async processDocument({
1452
+ clientDocumentId,
1453
+ contentType,
1454
+ originalFilename,
1455
+ worldId,
1456
+ content,
1457
+ roomId,
1458
+ entityId
1459
+ }) {
1460
+ const agentId = this.runtime.agentId;
1461
+ try {
1462
+ logger4.debug(
1463
+ `KnowledgeService: Processing document ${originalFilename} (type: ${contentType}) via processDocument`
1464
+ );
1465
+ let fileBuffer = null;
1466
+ let extractedText;
1467
+ const isPdfFile = contentType === "application/pdf" || originalFilename.toLowerCase().endsWith(".pdf");
1468
+ const isBinaryFile = this.isBinaryContentType(
1469
+ contentType,
1470
+ originalFilename
1471
+ );
1472
+ if (isBinaryFile) {
1473
+ try {
1474
+ fileBuffer = Buffer.from(content, "base64");
1475
+ } catch (e) {
1476
+ logger4.error(
1477
+ `KnowledgeService: Failed to convert base64 to buffer for ${originalFilename}: ${e.message}`
1478
+ );
1479
+ throw new Error(
1480
+ `Invalid base64 content for binary file ${originalFilename}`
1481
+ );
1482
+ }
1483
+ extractedText = await extractTextFromDocument(
1484
+ fileBuffer,
1485
+ contentType,
1486
+ originalFilename
1487
+ );
1488
+ } else {
1489
+ extractedText = content;
1490
+ }
1491
+ if (!extractedText || extractedText.trim() === "") {
1492
+ const noTextError = new Error(
1493
+ `KnowledgeService: No text content extracted from ${originalFilename} (type: ${contentType}).`
1494
+ );
1495
+ logger4.warn(noTextError.message);
1496
+ throw noTextError;
1497
+ }
1498
+ const documentMemory = createDocumentMemory({
1499
+ text: isPdfFile ? content : extractedText,
1500
+ // Store base64 for PDF, text for others
1501
+ agentId,
1502
+ clientDocumentId,
1503
+ // This becomes the memory.id
1504
+ originalFilename,
1505
+ contentType,
1506
+ worldId,
1507
+ fileSize: fileBuffer ? fileBuffer.length : extractedText.length,
1508
+ documentId: clientDocumentId
1509
+ // Explicitly set documentId in metadata as well
1510
+ });
1511
+ const memoryWithScope = {
1512
+ ...documentMemory,
1513
+ id: clientDocumentId,
1514
+ // Ensure the ID of the memory is the clientDocumentId
1515
+ roomId: roomId || agentId,
1516
+ entityId: entityId || agentId
1517
+ };
1518
+ await this.runtime.createMemory(memoryWithScope, "documents");
1519
+ logger4.debug(
1520
+ `KnowledgeService: Stored document ${originalFilename} (Memory ID: ${memoryWithScope.id})`
1521
+ );
1522
+ const fragmentCount = await processFragmentsSynchronously({
1523
+ runtime: this.runtime,
1524
+ documentId: clientDocumentId,
1525
+ // Pass clientDocumentId to link fragments
1526
+ fullDocumentText: extractedText,
1527
+ agentId,
1528
+ contentType,
1529
+ roomId: roomId || agentId,
1530
+ entityId: entityId || agentId,
1531
+ worldId: worldId || agentId
1532
+ });
1533
+ logger4.info(
1534
+ `KnowledgeService: Document ${originalFilename} processed with ${fragmentCount} fragments for agent ${agentId}`
1535
+ );
1536
+ return {
1537
+ clientDocumentId,
1538
+ storedDocumentMemoryId: memoryWithScope.id,
1539
+ fragmentCount
1540
+ };
1541
+ } catch (error) {
1542
+ logger4.error(
1543
+ `KnowledgeService: Error processing document ${originalFilename}: ${error.message}`,
1544
+ error.stack
1545
+ );
1546
+ throw error;
1547
+ }
1548
+ }
1549
+ /**
1550
+ * Determines if a file should be treated as binary based on its content type and filename
1551
+ * @param contentType MIME type of the file
1552
+ * @param filename Original filename
1553
+ * @returns True if the file should be treated as binary (base64 encoded)
1554
+ */
1555
+ isBinaryContentType(contentType, filename) {
1556
+ const binaryContentTypes = [
1557
+ "application/pdf",
1558
+ "application/msword",
1559
+ "application/vnd.openxmlformats-officedocument",
1560
+ "application/vnd.ms-excel",
1561
+ "application/vnd.ms-powerpoint",
1562
+ "application/zip",
1563
+ "application/x-zip-compressed",
1564
+ "application/octet-stream",
1565
+ "image/",
1566
+ "audio/",
1567
+ "video/"
1568
+ ];
1569
+ const isBinaryMimeType = binaryContentTypes.some(
1570
+ (type) => contentType.includes(type)
1571
+ );
1572
+ if (isBinaryMimeType) {
1573
+ return true;
1574
+ }
1575
+ const fileExt = filename.split(".").pop()?.toLowerCase() || "";
1576
+ const binaryExtensions = [
1577
+ "pdf",
1578
+ "docx",
1579
+ "doc",
1580
+ "xls",
1581
+ "xlsx",
1582
+ "ppt",
1583
+ "pptx",
1584
+ "zip",
1585
+ "jpg",
1586
+ "jpeg",
1587
+ "png",
1588
+ "gif",
1589
+ "mp3",
1590
+ "mp4",
1591
+ "wav"
1592
+ ];
1593
+ return binaryExtensions.includes(fileExt);
1594
+ }
1595
+ // --- Knowledge methods moved from AgentRuntime ---
1596
+ async handleProcessingError(error, context) {
1597
+ logger4.error(
1598
+ `KnowledgeService: Error ${context}:`,
1599
+ error?.message || error || "Unknown error"
1600
+ );
1601
+ throw error;
1602
+ }
1603
+ async checkExistingKnowledge(knowledgeId) {
1604
+ const existingDocument = await this.runtime.getMemoryById(knowledgeId);
1605
+ return !!existingDocument;
1606
+ }
1607
+ async getKnowledge(message, scope) {
1608
+ logger4.debug(
1609
+ "KnowledgeService: getKnowledge called for message id: " + message.id
1610
+ );
1611
+ if (!message?.content?.text || message?.content?.text.trim().length === 0) {
1612
+ logger4.warn(
1613
+ "KnowledgeService: Invalid or empty message content for knowledge query."
1614
+ );
1615
+ return [];
1616
+ }
1617
+ const embedding = await this.runtime.useModel(ModelType2.TEXT_EMBEDDING, {
1618
+ text: message.content.text
1619
+ });
1620
+ const filterScope = {};
1621
+ if (scope?.roomId) filterScope.roomId = scope.roomId;
1622
+ if (scope?.worldId) filterScope.worldId = scope.worldId;
1623
+ if (scope?.entityId) filterScope.entityId = scope.entityId;
1624
+ const fragments = await this.runtime.searchMemories({
1625
+ tableName: "knowledge",
1626
+ embedding,
1627
+ query: message.content.text,
1628
+ ...filterScope,
1629
+ count: 20,
1630
+ match_threshold: 0.1
1631
+ // TODO: Make configurable
1632
+ });
1633
+ return fragments.filter((fragment) => fragment.id !== void 0).map((fragment) => ({
1634
+ id: fragment.id,
1635
+ // Cast as UUID after filtering
1636
+ content: fragment.content,
1637
+ // Cast if necessary, ensure Content type matches
1638
+ similarity: fragment.similarity,
1639
+ metadata: fragment.metadata,
1640
+ worldId: fragment.worldId
1641
+ }));
1642
+ }
1643
+ async processCharacterKnowledge(items) {
1644
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
1645
+ logger4.info(
1646
+ `KnowledgeService: Processing ${items.length} character knowledge items for agent ${this.runtime.agentId}`
1647
+ );
1648
+ const processingPromises = items.map(async (item) => {
1649
+ await this.knowledgeProcessingSemaphore.acquire();
1650
+ try {
1651
+ const knowledgeId = createUniqueUuid(this.runtime.agentId + item, item);
1652
+ if (await this.checkExistingKnowledge(knowledgeId)) {
1653
+ logger4.debug(
1654
+ `KnowledgeService: Character knowledge item with ID ${knowledgeId} already exists. Skipping.`
1655
+ );
1656
+ return;
1657
+ }
1658
+ logger4.debug(
1659
+ `KnowledgeService: Processing character knowledge for ${this.runtime.character?.name} - ${item.slice(0, 100)}`
1660
+ );
1661
+ let metadata = {
1662
+ type: MemoryType2.DOCUMENT,
1663
+ // Character knowledge often represents a doc/fact.
1664
+ timestamp: Date.now(),
1665
+ source: "character"
1666
+ // Indicate the source
1667
+ };
1668
+ const pathMatch = item.match(/^Path: (.+?)(?:\n|\r\n)/);
1669
+ if (pathMatch) {
1670
+ const filePath = pathMatch[1].trim();
1671
+ const extension = filePath.split(".").pop() || "";
1672
+ const filename = filePath.split("/").pop() || "";
1673
+ const title = filename.replace(`.${extension}`, "");
1674
+ metadata = {
1675
+ ...metadata,
1676
+ path: filePath,
1677
+ filename,
1678
+ fileExt: extension,
1679
+ title,
1680
+ fileType: `text/${extension || "plain"}`,
1681
+ // Assume text if not specified
1682
+ fileSize: item.length
1683
+ };
1684
+ }
1685
+ await this._internalAddKnowledge(
1686
+ {
1687
+ id: knowledgeId,
1688
+ // Use the content-derived ID
1689
+ content: {
1690
+ text: item
1691
+ },
1692
+ metadata
1693
+ },
1694
+ void 0,
1695
+ {
1696
+ // Scope to the agent itself for character knowledge
1697
+ roomId: this.runtime.agentId,
1698
+ entityId: this.runtime.agentId,
1699
+ worldId: this.runtime.agentId
1700
+ }
1701
+ );
1702
+ } catch (error) {
1703
+ await this.handleProcessingError(
1704
+ error,
1705
+ "processing character knowledge"
1706
+ );
1707
+ } finally {
1708
+ this.knowledgeProcessingSemaphore.release();
1709
+ }
1710
+ });
1711
+ await Promise.all(processingPromises);
1712
+ logger4.info(
1713
+ `KnowledgeService: Finished processing character knowledge for agent ${this.runtime.agentId}.`
1714
+ );
1715
+ }
1716
+ // Renamed from AgentRuntime's addKnowledge
1717
+ // This is the core logic for adding text-based knowledge items and creating fragments.
1718
+ async _internalAddKnowledge(item, options = {
1719
+ targetTokens: 1500,
1720
+ // TODO: Make these configurable, perhaps from plugin config
1721
+ overlap: 200,
1722
+ modelContextSize: 4096
1723
+ }, scope = {
1724
+ // Default scope for internal additions (like character knowledge)
1725
+ roomId: this.runtime.agentId,
1726
+ entityId: this.runtime.agentId,
1727
+ worldId: this.runtime.agentId
1728
+ }) {
1729
+ const finalScope = {
1730
+ roomId: scope?.roomId ?? this.runtime.agentId,
1731
+ worldId: scope?.worldId ?? this.runtime.agentId,
1732
+ entityId: scope?.entityId ?? this.runtime.agentId
1733
+ };
1734
+ logger4.debug(
1735
+ `KnowledgeService: _internalAddKnowledge called for item ID ${item.id}`
1736
+ );
1737
+ const documentMemory = {
1738
+ id: item.id,
1739
+ // This ID should be the unique ID for the document being added.
1740
+ agentId: this.runtime.agentId,
1741
+ roomId: finalScope.roomId,
1742
+ worldId: finalScope.worldId,
1743
+ entityId: finalScope.entityId,
1744
+ content: item.content,
1745
+ metadata: {
1746
+ ...item.metadata || {},
1747
+ // Spread existing metadata
1748
+ type: MemoryType2.DOCUMENT,
1749
+ // Ensure it's marked as a document
1750
+ documentId: item.id,
1751
+ // Ensure metadata.documentId is set to the item's ID
1752
+ timestamp: item.metadata?.timestamp || Date.now()
1753
+ },
1754
+ createdAt: Date.now()
1755
+ };
1756
+ const existingDocument = await this.runtime.getMemoryById(item.id);
1757
+ if (existingDocument) {
1758
+ logger4.debug(
1759
+ `KnowledgeService: Document ${item.id} already exists in _internalAddKnowledge, updating...`
1760
+ );
1761
+ await this.runtime.updateMemory({
1762
+ ...documentMemory,
1763
+ id: item.id
1764
+ // Ensure ID is passed for update
1765
+ });
1766
+ } else {
1767
+ await this.runtime.createMemory(documentMemory, "documents");
1768
+ }
1769
+ const fragments = await this.splitAndCreateFragments(
1770
+ item,
1771
+ // item.id is the documentId
1772
+ options.targetTokens,
1773
+ options.overlap,
1774
+ finalScope
1775
+ );
1776
+ let fragmentsProcessed = 0;
1777
+ for (const fragment of fragments) {
1778
+ try {
1779
+ await this.processDocumentFragment(fragment);
1780
+ fragmentsProcessed++;
1781
+ } catch (error) {
1782
+ logger4.error(
1783
+ `KnowledgeService: Error processing fragment ${fragment.id} for document ${item.id}:`,
1784
+ error
1785
+ );
1786
+ }
1787
+ }
1788
+ logger4.debug(
1789
+ `KnowledgeService: Processed ${fragmentsProcessed}/${fragments.length} fragments for document ${item.id}.`
1790
+ );
1791
+ }
1792
+ async splitAndCreateFragments(document, targetTokens, overlap, scope) {
1793
+ if (!document.content.text) {
1794
+ return [];
1795
+ }
1796
+ const text = document.content.text;
1797
+ const chunks = await splitChunks2(text, targetTokens, overlap);
1798
+ return chunks.map((chunk, index) => {
1799
+ const fragmentIdContent = `${document.id}-fragment-${index}-${Date.now()}`;
1800
+ const fragmentId = createUniqueUuid(
1801
+ this.runtime.agentId + fragmentIdContent,
1802
+ fragmentIdContent
1803
+ );
1804
+ return {
1805
+ id: fragmentId,
1806
+ entityId: scope.entityId,
1807
+ agentId: this.runtime.agentId,
1808
+ roomId: scope.roomId,
1809
+ worldId: scope.worldId,
1810
+ content: {
1811
+ text: chunk
1812
+ },
1813
+ metadata: {
1814
+ ...document.metadata || {},
1815
+ // Spread metadata from parent document
1816
+ type: MemoryType2.FRAGMENT,
1817
+ documentId: document.id,
1818
+ // Link fragment to parent document
1819
+ position: index,
1820
+ timestamp: Date.now()
1821
+ // Fragment's own creation timestamp
1822
+ // Ensure we don't overwrite essential fragment metadata with document's
1823
+ // For example, source might be different or more specific for the fragment.
1824
+ // Here, we primarily inherit and then set fragment-specifics.
1825
+ },
1826
+ createdAt: Date.now()
1827
+ };
1828
+ });
1829
+ }
1830
+ async processDocumentFragment(fragment) {
1831
+ try {
1832
+ await this.runtime.addEmbeddingToMemory(fragment);
1833
+ await this.runtime.createMemory(fragment, "knowledge");
1834
+ } catch (error) {
1835
+ logger4.error(
1836
+ `KnowledgeService: Error processing fragment ${fragment.id}:`,
1837
+ error instanceof Error ? error.message : String(error)
1838
+ );
1839
+ throw error;
1840
+ }
1841
+ }
1842
+ // --- End of moved knowledge methods ---
1843
+ };
1844
+
1845
+ // src/provider.ts
1846
+ import { addHeader } from "@elizaos/core";
1847
+ var knowledgeProvider = {
1848
+ name: "KNOWLEDGE",
1849
+ description: "Knowledge from the knowledge base that the agent knows, retrieved whenever the agent needs to answer a question about their expertise.",
1850
+ dynamic: true,
1851
+ get: async (runtime, message) => {
1852
+ const knowledgeData = await runtime.getService("knowledge")?.getKnowledge(message);
1853
+ const firstFiveKnowledgeItems = knowledgeData?.slice(0, 5);
1854
+ let knowledge = (firstFiveKnowledgeItems && firstFiveKnowledgeItems.length > 0 ? addHeader(
1855
+ "# Knowledge",
1856
+ firstFiveKnowledgeItems.map((knowledge2) => `- ${knowledge2.content.text}`).join("\n")
1857
+ ) : "") + "\n";
1858
+ const tokenLength = 3.5;
1859
+ if (knowledge.length > 4e3 * tokenLength) {
1860
+ knowledge = knowledge.slice(0, 4e3 * tokenLength);
1861
+ }
1862
+ return {
1863
+ data: {
1864
+ knowledge
1865
+ },
1866
+ values: {
1867
+ knowledge
1868
+ },
1869
+ text: knowledge
1870
+ };
1871
+ }
1872
+ };
1873
+
1874
+ // src/tests.ts
1875
+ import { MemoryType as MemoryType3, ModelType as ModelType3 } from "@elizaos/core";
1876
+ import { Buffer as Buffer2 } from "buffer";
1877
+ import * as fs from "fs";
1878
+ import * as path from "path";
1879
+ var mockLogger = {
1880
+ info: (() => {
1881
+ const fn = (...args) => {
1882
+ fn.calls.push(args);
1883
+ };
1884
+ fn.calls = [];
1885
+ return fn;
1886
+ })(),
1887
+ warn: (() => {
1888
+ const fn = (...args) => {
1889
+ fn.calls.push(args);
1890
+ };
1891
+ fn.calls = [];
1892
+ return fn;
1893
+ })(),
1894
+ error: (() => {
1895
+ const fn = (...args) => {
1896
+ fn.calls.push(args);
1897
+ };
1898
+ fn.calls = [];
1899
+ return fn;
1900
+ })(),
1901
+ debug: (() => {
1902
+ const fn = (...args) => {
1903
+ fn.calls.push(args);
1904
+ };
1905
+ fn.calls = [];
1906
+ return fn;
1907
+ })(),
1908
+ success: (() => {
1909
+ const fn = (...args) => {
1910
+ fn.calls.push(args);
1911
+ };
1912
+ fn.calls = [];
1913
+ return fn;
1914
+ })(),
1915
+ clearCalls: () => {
1916
+ mockLogger.info.calls = [];
1917
+ mockLogger.warn.calls = [];
1918
+ mockLogger.error.calls = [];
1919
+ mockLogger.debug.calls = [];
1920
+ mockLogger.success.calls = [];
1921
+ }
1922
+ };
1923
+ global.logger = mockLogger;
1924
+ function createMockRuntime(overrides) {
1925
+ const memories = /* @__PURE__ */ new Map();
1926
+ const services = /* @__PURE__ */ new Map();
1927
+ return {
1928
+ agentId: v4_default(),
1929
+ character: {
1930
+ name: "Test Agent",
1931
+ bio: ["Test bio"],
1932
+ knowledge: []
1933
+ },
1934
+ providers: [],
1935
+ actions: [],
1936
+ evaluators: [],
1937
+ plugins: [],
1938
+ services,
1939
+ events: /* @__PURE__ */ new Map(),
1940
+ // Database methods
1941
+ async init() {
1942
+ },
1943
+ async close() {
1944
+ },
1945
+ async getConnection() {
1946
+ return null;
1947
+ },
1948
+ async getAgent(agentId) {
1949
+ return null;
1950
+ },
1951
+ async getAgents() {
1952
+ return [];
1953
+ },
1954
+ async createAgent(agent) {
1955
+ return true;
1956
+ },
1957
+ async updateAgent(agentId, agent) {
1958
+ return true;
1959
+ },
1960
+ async deleteAgent(agentId) {
1961
+ return true;
1962
+ },
1963
+ async ensureAgentExists(agent) {
1964
+ return agent;
1965
+ },
1966
+ async ensureEmbeddingDimension(dimension) {
1967
+ },
1968
+ async getEntityById(entityId) {
1969
+ return null;
1970
+ },
1971
+ async getEntitiesForRoom(roomId) {
1972
+ return [];
1973
+ },
1974
+ async createEntity(entity) {
1975
+ return true;
1976
+ },
1977
+ async updateEntity(entity) {
1978
+ },
1979
+ async getComponent(entityId, type) {
1980
+ return null;
1981
+ },
1982
+ async getComponents(entityId) {
1983
+ return [];
1984
+ },
1985
+ async createComponent(component) {
1986
+ return true;
1987
+ },
1988
+ async updateComponent(component) {
1989
+ },
1990
+ async deleteComponent(componentId) {
1991
+ },
1992
+ // Memory methods with mock implementation
1993
+ async getMemoryById(id) {
1994
+ return memories.get(id) || null;
1995
+ },
1996
+ async getMemories(params) {
1997
+ const results = Array.from(memories.values()).filter((m) => {
1998
+ if (params.roomId && m.roomId !== params.roomId) return false;
1999
+ if (params.entityId && m.entityId !== params.entityId) return false;
2000
+ if (params.tableName === "knowledge" && m.metadata?.type !== MemoryType3.FRAGMENT)
2001
+ return false;
2002
+ if (params.tableName === "documents" && m.metadata?.type !== MemoryType3.DOCUMENT)
2003
+ return false;
2004
+ return true;
2005
+ });
2006
+ return params.count ? results.slice(0, params.count) : results;
2007
+ },
2008
+ async getMemoriesByIds(ids) {
2009
+ return ids.map((id) => memories.get(id)).filter(Boolean);
2010
+ },
2011
+ async getMemoriesByRoomIds(params) {
2012
+ return Array.from(memories.values()).filter(
2013
+ (m) => params.roomIds.includes(m.roomId)
2014
+ );
2015
+ },
2016
+ async searchMemories(params) {
2017
+ const fragments = Array.from(memories.values()).filter(
2018
+ (m) => m.metadata?.type === MemoryType3.FRAGMENT
2019
+ );
2020
+ return fragments.map((f) => ({
2021
+ ...f,
2022
+ similarity: 0.8 + Math.random() * 0.2
2023
+ // Mock similarity between 0.8 and 1.0
2024
+ })).slice(0, params.count || 10);
2025
+ },
2026
+ async createMemory(memory, tableName) {
2027
+ const id = memory.id || v4_default();
2028
+ const memoryWithId = { ...memory, id };
2029
+ memories.set(id, memoryWithId);
2030
+ return id;
2031
+ },
2032
+ async updateMemory(memory) {
2033
+ if (memory.id && memories.has(memory.id)) {
2034
+ memories.set(memory.id, { ...memories.get(memory.id), ...memory });
2035
+ return true;
2036
+ }
2037
+ return false;
2038
+ },
2039
+ async deleteMemory(memoryId) {
2040
+ memories.delete(memoryId);
2041
+ },
2042
+ async deleteAllMemories(roomId, tableName) {
2043
+ for (const [id, memory] of memories.entries()) {
2044
+ if (memory.roomId === roomId) {
2045
+ memories.delete(id);
2046
+ }
2047
+ }
2048
+ },
2049
+ async countMemories(roomId) {
2050
+ return Array.from(memories.values()).filter((m) => m.roomId === roomId).length;
2051
+ },
2052
+ // Other required methods with minimal implementation
2053
+ async getCachedEmbeddings(params) {
2054
+ return [];
2055
+ },
2056
+ async log(params) {
2057
+ },
2058
+ async getLogs(params) {
2059
+ return [];
2060
+ },
2061
+ async deleteLog(logId) {
2062
+ },
2063
+ async createWorld(world) {
2064
+ return v4_default();
2065
+ },
2066
+ async getWorld(id) {
2067
+ return null;
2068
+ },
2069
+ async removeWorld(id) {
2070
+ },
2071
+ async getAllWorlds() {
2072
+ return [];
2073
+ },
2074
+ async updateWorld(world) {
2075
+ },
2076
+ async getRoom(roomId) {
2077
+ return null;
2078
+ },
2079
+ async createRoom(room) {
2080
+ return v4_default();
2081
+ },
2082
+ async deleteRoom(roomId) {
2083
+ },
2084
+ async deleteRoomsByWorldId(worldId) {
2085
+ },
2086
+ async updateRoom(room) {
2087
+ },
2088
+ async getRoomsForParticipant(entityId) {
2089
+ return [];
2090
+ },
2091
+ async getRoomsForParticipants(userIds) {
2092
+ return [];
2093
+ },
2094
+ async getRooms(worldId) {
2095
+ return [];
2096
+ },
2097
+ async addParticipant(entityId, roomId) {
2098
+ return true;
2099
+ },
2100
+ async removeParticipant(entityId, roomId) {
2101
+ return true;
2102
+ },
2103
+ async getParticipantsForEntity(entityId) {
2104
+ return [];
2105
+ },
2106
+ async getParticipantsForRoom(roomId) {
2107
+ return [];
2108
+ },
2109
+ async getParticipantUserState(roomId, entityId) {
2110
+ return null;
2111
+ },
2112
+ async setParticipantUserState(roomId, entityId, state) {
2113
+ },
2114
+ async createRelationship(params) {
2115
+ return true;
2116
+ },
2117
+ async updateRelationship(relationship) {
2118
+ },
2119
+ async getRelationship(params) {
2120
+ return null;
2121
+ },
2122
+ async getRelationships(params) {
2123
+ return [];
2124
+ },
2125
+ async getCache(key) {
2126
+ return void 0;
2127
+ },
2128
+ async setCache(key, value) {
2129
+ return true;
2130
+ },
2131
+ async deleteCache(key) {
2132
+ return true;
2133
+ },
2134
+ async createTask(task) {
2135
+ return v4_default();
2136
+ },
2137
+ async getTasks(params) {
2138
+ return [];
2139
+ },
2140
+ async getTask(id) {
2141
+ return null;
2142
+ },
2143
+ async getTasksByName(name) {
2144
+ return [];
2145
+ },
2146
+ async updateTask(id, task) {
2147
+ },
2148
+ async deleteTask(id) {
2149
+ },
2150
+ async getMemoriesByWorldId(params) {
2151
+ return [];
2152
+ },
2153
+ // Plugin/service methods
2154
+ async registerPlugin(plugin) {
2155
+ },
2156
+ async initialize() {
2157
+ },
2158
+ getService(name) {
2159
+ return services.get(name) || null;
2160
+ },
2161
+ getAllServices() {
2162
+ return services;
2163
+ },
2164
+ async registerService(ServiceClass) {
2165
+ const service = await ServiceClass.start(this);
2166
+ services.set(ServiceClass.serviceType, service);
2167
+ },
2168
+ registerDatabaseAdapter(adapter) {
2169
+ },
2170
+ setSetting(key, value) {
2171
+ },
2172
+ getSetting(key) {
2173
+ return null;
2174
+ },
2175
+ getConversationLength() {
2176
+ return 0;
2177
+ },
2178
+ async processActions(message, responses) {
2179
+ },
2180
+ async evaluate(message) {
2181
+ return null;
2182
+ },
2183
+ registerProvider(provider) {
2184
+ this.providers.push(provider);
2185
+ },
2186
+ registerAction(action) {
2187
+ },
2188
+ registerEvaluator(evaluator) {
2189
+ },
2190
+ async ensureConnection(params) {
2191
+ },
2192
+ async ensureParticipantInRoom(entityId, roomId) {
2193
+ },
2194
+ async ensureWorldExists(world) {
2195
+ },
2196
+ async ensureRoomExists(room) {
2197
+ },
2198
+ async composeState(message) {
2199
+ return {
2200
+ values: {},
2201
+ data: {},
2202
+ text: ""
2203
+ };
2204
+ },
2205
+ // Model methods with mocks
2206
+ async useModel(modelType, params) {
2207
+ if (modelType === ModelType3.TEXT_EMBEDDING) {
2208
+ return new Array(1536).fill(0).map(() => Math.random());
2209
+ }
2210
+ if (modelType === ModelType3.TEXT_LARGE || modelType === ModelType3.TEXT_SMALL) {
2211
+ return `Mock response for: ${params.prompt}`;
2212
+ }
2213
+ return null;
2214
+ },
2215
+ registerModel(modelType, handler, provider) {
2216
+ },
2217
+ getModel(modelType) {
2218
+ return void 0;
2219
+ },
2220
+ registerEvent(event, handler) {
2221
+ },
2222
+ getEvent(event) {
2223
+ return void 0;
2224
+ },
2225
+ async emitEvent(event, params) {
2226
+ },
2227
+ registerTaskWorker(taskHandler) {
2228
+ },
2229
+ getTaskWorker(name) {
2230
+ return void 0;
2231
+ },
2232
+ async stop() {
2233
+ },
2234
+ async addEmbeddingToMemory(memory) {
2235
+ memory.embedding = await this.useModel(ModelType3.TEXT_EMBEDDING, {
2236
+ text: memory.content.text
2237
+ });
2238
+ return memory;
2239
+ },
2240
+ registerSendHandler(source, handler) {
2241
+ },
2242
+ async sendMessageToTarget(target, content) {
2243
+ },
2244
+ ...overrides
2245
+ };
2246
+ }
2247
+ function createTestFileBuffer(content, type = "text") {
2248
+ if (type === "pdf") {
2249
+ const pdfContent = `%PDF-1.4
2250
+ 1 0 obj
2251
+ << /Type /Catalog /Pages 2 0 R >>
2252
+ endobj
2253
+ 2 0 obj
2254
+ << /Type /Pages /Kids [3 0 R] /Count 1 >>
2255
+ endobj
2256
+ 3 0 obj
2257
+ << /Type /Page /Parent 2 0 R /Resources << /Font << /F1 << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> >> >> /MediaBox [0 0 612 792] /Contents 4 0 R >>
2258
+ endobj
2259
+ 4 0 obj
2260
+ << /Length ${content.length + 10} >>
2261
+ stream
2262
+ BT /F1 12 Tf 100 700 Td (${content}) Tj ET
2263
+ endstream
2264
+ endobj
2265
+ xref
2266
+ 0 5
2267
+ 0000000000 65535 f
2268
+ 0000000009 00000 n
2269
+ 0000000058 00000 n
2270
+ 0000000115 00000 n
2271
+ 0000000362 00000 n
2272
+ trailer
2273
+ << /Size 5 /Root 1 0 R >>
2274
+ startxref
2275
+ ${465 + content.length}
2276
+ %%EOF`;
2277
+ return Buffer2.from(pdfContent);
2278
+ }
2279
+ return Buffer2.from(content, "utf-8");
2280
+ }
2281
+ var KnowledgeTestSuite = class {
2282
+ name = "knowledge";
2283
+ description = "Tests for the Knowledge plugin including document processing, retrieval, and integration";
2284
+ tests = [
2285
+ // Configuration Tests
2286
+ {
2287
+ name: "Should handle default docs folder configuration",
2288
+ fn: async (runtime) => {
2289
+ const originalEnv = { ...process.env };
2290
+ delete process.env.KNOWLEDGE_PATH;
2291
+ try {
2292
+ const docsPath = path.join(process.cwd(), "docs");
2293
+ const docsExists = fs.existsSync(docsPath);
2294
+ if (!docsExists) {
2295
+ fs.mkdirSync(docsPath, { recursive: true });
2296
+ }
2297
+ await index_default.init({}, runtime);
2298
+ const errorCalls = mockLogger.error.calls;
2299
+ if (errorCalls.length > 0) {
2300
+ throw new Error(`Unexpected error during init: ${errorCalls[0]}`);
2301
+ }
2302
+ if (!docsExists) {
2303
+ fs.rmSync(docsPath, { recursive: true, force: true });
2304
+ }
2305
+ } finally {
2306
+ process.env = originalEnv;
2307
+ }
2308
+ }
2309
+ },
2310
+ {
2311
+ name: "Should throw error when no docs folder and no path configured",
2312
+ fn: async (runtime) => {
2313
+ const originalEnv = { ...process.env };
2314
+ delete process.env.KNOWLEDGE_PATH;
2315
+ try {
2316
+ const docsPath = path.join(process.cwd(), "docs");
2317
+ if (fs.existsSync(docsPath)) {
2318
+ fs.renameSync(docsPath, docsPath + ".backup");
2319
+ }
2320
+ await index_default.init({}, runtime);
2321
+ if (fs.existsSync(docsPath + ".backup")) {
2322
+ fs.renameSync(docsPath + ".backup", docsPath);
2323
+ }
2324
+ } finally {
2325
+ process.env = originalEnv;
2326
+ }
2327
+ }
2328
+ },
2329
+ // Service Lifecycle Tests
2330
+ {
2331
+ name: "Should initialize KnowledgeService correctly",
2332
+ fn: async (runtime) => {
2333
+ const service = await KnowledgeService.start(runtime);
2334
+ if (!service) {
2335
+ throw new Error("Service initialization failed");
2336
+ }
2337
+ if (service.capabilityDescription !== "Provides Retrieval Augmented Generation capabilities, including knowledge upload and querying.") {
2338
+ throw new Error("Incorrect service capability description");
2339
+ }
2340
+ runtime.services.set(KnowledgeService.serviceType, service);
2341
+ const retrievedService = runtime.getService(
2342
+ KnowledgeService.serviceType
2343
+ );
2344
+ if (retrievedService !== service) {
2345
+ throw new Error("Service not properly registered with runtime");
2346
+ }
2347
+ await service.stop();
2348
+ }
2349
+ },
2350
+ // Document Processing Tests
2351
+ {
2352
+ name: "Should extract text from text files",
2353
+ fn: async (runtime) => {
2354
+ const testContent = "This is a test document with some content.";
2355
+ const buffer = createTestFileBuffer(testContent);
2356
+ const extractedText = await extractTextFromDocument(
2357
+ buffer,
2358
+ "text/plain",
2359
+ "test.txt"
2360
+ );
2361
+ if (extractedText !== testContent) {
2362
+ throw new Error(`Expected "${testContent}", got "${extractedText}"`);
2363
+ }
2364
+ }
2365
+ },
2366
+ {
2367
+ name: "Should handle empty file buffer",
2368
+ fn: async (runtime) => {
2369
+ const emptyBuffer = Buffer2.alloc(0);
2370
+ try {
2371
+ await extractTextFromDocument(emptyBuffer, "text/plain", "empty.txt");
2372
+ throw new Error("Should have thrown error for empty buffer");
2373
+ } catch (error) {
2374
+ if (!error.message.includes("Empty file buffer")) {
2375
+ throw new Error(`Unexpected error: ${error.message}`);
2376
+ }
2377
+ }
2378
+ }
2379
+ },
2380
+ {
2381
+ name: "Should create document memory correctly",
2382
+ fn: async (runtime) => {
2383
+ const params = {
2384
+ text: "Test document content",
2385
+ agentId: runtime.agentId,
2386
+ clientDocumentId: v4_default(),
2387
+ originalFilename: "test-doc.txt",
2388
+ contentType: "text/plain",
2389
+ worldId: v4_default(),
2390
+ fileSize: 1024
2391
+ };
2392
+ const memory = createDocumentMemory(params);
2393
+ if (!memory.id) {
2394
+ throw new Error("Document memory should have an ID");
2395
+ }
2396
+ if (memory.metadata?.type !== MemoryType3.DOCUMENT) {
2397
+ throw new Error("Document memory should have DOCUMENT type");
2398
+ }
2399
+ if (memory.content.text !== params.text) {
2400
+ throw new Error("Document memory content mismatch");
2401
+ }
2402
+ if (memory.metadata.originalFilename !== params.originalFilename) {
2403
+ throw new Error("Document memory metadata mismatch");
2404
+ }
2405
+ }
2406
+ },
2407
+ // Knowledge Addition Tests
2408
+ {
2409
+ name: "Should add knowledge successfully",
2410
+ fn: async (runtime) => {
2411
+ const service = await KnowledgeService.start(runtime);
2412
+ runtime.services.set(KnowledgeService.serviceType, service);
2413
+ const testDocument = {
2414
+ clientDocumentId: v4_default(),
2415
+ contentType: "text/plain",
2416
+ originalFilename: "knowledge-test.txt",
2417
+ worldId: runtime.agentId,
2418
+ content: "This is test knowledge that should be stored and retrievable."
2419
+ };
2420
+ const result = await service.addKnowledge(testDocument);
2421
+ if (result.clientDocumentId !== testDocument.clientDocumentId) {
2422
+ throw new Error("Client document ID mismatch");
2423
+ }
2424
+ if (!result.storedDocumentMemoryId) {
2425
+ throw new Error("No stored document memory ID returned");
2426
+ }
2427
+ if (result.fragmentCount === 0) {
2428
+ throw new Error("No fragments created");
2429
+ }
2430
+ const storedDoc = await runtime.getMemoryById(
2431
+ result.storedDocumentMemoryId
2432
+ );
2433
+ if (!storedDoc) {
2434
+ throw new Error("Document not found in storage");
2435
+ }
2436
+ await service.stop();
2437
+ }
2438
+ },
2439
+ {
2440
+ name: "Should handle duplicate document uploads",
2441
+ fn: async (runtime) => {
2442
+ const service = await KnowledgeService.start(runtime);
2443
+ runtime.services.set(KnowledgeService.serviceType, service);
2444
+ const testDocument = {
2445
+ clientDocumentId: v4_default(),
2446
+ contentType: "text/plain",
2447
+ originalFilename: "duplicate-test.txt",
2448
+ worldId: runtime.agentId,
2449
+ content: "This document will be uploaded twice."
2450
+ };
2451
+ const result1 = await service.addKnowledge(testDocument);
2452
+ const result2 = await service.addKnowledge(testDocument);
2453
+ if (result1.storedDocumentMemoryId !== result2.storedDocumentMemoryId) {
2454
+ throw new Error("Duplicate upload created new document");
2455
+ }
2456
+ if (result1.fragmentCount !== result2.fragmentCount) {
2457
+ throw new Error("Fragment count mismatch on duplicate upload");
2458
+ }
2459
+ await service.stop();
2460
+ }
2461
+ },
2462
+ // Knowledge Retrieval Tests
2463
+ {
2464
+ name: "Should retrieve knowledge based on query",
2465
+ fn: async (runtime) => {
2466
+ const service = await KnowledgeService.start(runtime);
2467
+ runtime.services.set(KnowledgeService.serviceType, service);
2468
+ const testDocument = {
2469
+ clientDocumentId: v4_default(),
2470
+ contentType: "text/plain",
2471
+ originalFilename: "retrieval-test.txt",
2472
+ worldId: runtime.agentId,
2473
+ content: "The capital of France is Paris. Paris is known for the Eiffel Tower."
2474
+ };
2475
+ await service.addKnowledge(testDocument);
2476
+ const queryMessage = {
2477
+ id: v4_default(),
2478
+ entityId: runtime.agentId,
2479
+ agentId: runtime.agentId,
2480
+ roomId: runtime.agentId,
2481
+ content: {
2482
+ text: "What is the capital of France?"
2483
+ }
2484
+ };
2485
+ const results = await service.getKnowledge(queryMessage);
2486
+ if (results.length === 0) {
2487
+ throw new Error("No knowledge retrieved");
2488
+ }
2489
+ const hasRelevantContent = results.some(
2490
+ (item) => item.content.text?.toLowerCase().includes("paris") || item.content.text?.toLowerCase().includes("france")
2491
+ );
2492
+ if (!hasRelevantContent) {
2493
+ throw new Error("Retrieved knowledge not relevant to query");
2494
+ }
2495
+ await service.stop();
2496
+ }
2497
+ },
2498
+ // Provider Tests
2499
+ {
2500
+ name: "Should format knowledge in provider output",
2501
+ fn: async (runtime) => {
2502
+ const service = await KnowledgeService.start(runtime);
2503
+ runtime.services.set("knowledge", service);
2504
+ const testDocument = {
2505
+ clientDocumentId: v4_default(),
2506
+ contentType: "text/plain",
2507
+ originalFilename: "provider-test.txt",
2508
+ worldId: runtime.agentId,
2509
+ content: "Important fact 1. Important fact 2. Important fact 3."
2510
+ };
2511
+ await service.addKnowledge(testDocument);
2512
+ const message = {
2513
+ id: v4_default(),
2514
+ entityId: runtime.agentId,
2515
+ agentId: runtime.agentId,
2516
+ roomId: runtime.agentId,
2517
+ content: {
2518
+ text: "Tell me about important facts"
2519
+ }
2520
+ };
2521
+ const originalGetKnowledge = service.getKnowledge.bind(service);
2522
+ service.getKnowledge = async (msg) => {
2523
+ return [
2524
+ {
2525
+ id: v4_default(),
2526
+ content: { text: "Important fact 1." },
2527
+ metadata: void 0
2528
+ },
2529
+ {
2530
+ id: v4_default(),
2531
+ content: { text: "Important fact 2." },
2532
+ metadata: void 0
2533
+ }
2534
+ ];
2535
+ };
2536
+ const state = {
2537
+ values: {},
2538
+ data: {},
2539
+ text: ""
2540
+ };
2541
+ const result = await knowledgeProvider.get(runtime, message, state);
2542
+ if (!result.text) {
2543
+ throw new Error("Provider returned no text");
2544
+ }
2545
+ if (!result.text.includes("# Knowledge")) {
2546
+ throw new Error("Provider output missing knowledge header");
2547
+ }
2548
+ if (!result.text.includes("Important fact")) {
2549
+ throw new Error("Provider output missing knowledge content");
2550
+ }
2551
+ service.getKnowledge = originalGetKnowledge;
2552
+ await service.stop();
2553
+ }
2554
+ },
2555
+ // Character Knowledge Tests
2556
+ {
2557
+ name: "Should process character knowledge on startup",
2558
+ fn: async (runtime) => {
2559
+ const knowledgeRuntime = createMockRuntime({
2560
+ character: {
2561
+ name: "Knowledge Agent",
2562
+ bio: ["Agent with knowledge"],
2563
+ knowledge: [
2564
+ "The sky is blue.",
2565
+ "Water boils at 100 degrees Celsius.",
2566
+ "Path: docs/test.md\nThis is markdown content."
2567
+ ]
2568
+ }
2569
+ });
2570
+ const service = await KnowledgeService.start(knowledgeRuntime);
2571
+ await new Promise((resolve) => setTimeout(resolve, 2e3));
2572
+ const memories = await knowledgeRuntime.getMemories({
2573
+ tableName: "documents",
2574
+ entityId: knowledgeRuntime.agentId
2575
+ });
2576
+ if (memories.length < 3) {
2577
+ throw new Error(
2578
+ `Expected at least 3 character knowledge items, got ${memories.length}`
2579
+ );
2580
+ }
2581
+ const pathKnowledge = memories.find(
2582
+ (m) => m.content.text?.includes("markdown content")
2583
+ );
2584
+ if (!pathKnowledge) {
2585
+ throw new Error("Path-based knowledge not found");
2586
+ }
2587
+ const metadata = pathKnowledge.metadata;
2588
+ if (!metadata.path || !metadata.filename) {
2589
+ throw new Error("Path-based knowledge missing file metadata");
2590
+ }
2591
+ await service.stop();
2592
+ }
2593
+ },
2594
+ // Error Handling Tests
2595
+ {
2596
+ name: "Should handle and log errors appropriately",
2597
+ fn: async (runtime) => {
2598
+ const service = await KnowledgeService.start(runtime);
2599
+ runtime.services.set(KnowledgeService.serviceType, service);
2600
+ mockLogger.clearCalls();
2601
+ try {
2602
+ await service.addKnowledge({
2603
+ clientDocumentId: v4_default(),
2604
+ contentType: "text/plain",
2605
+ originalFilename: "empty.txt",
2606
+ worldId: runtime.agentId,
2607
+ content: ""
2608
+ // Empty content should cause an error
2609
+ });
2610
+ throw new Error("Expected error for empty content");
2611
+ } catch (error) {
2612
+ if (!error.message.includes("Empty file buffer") && !error.message.includes("Expected error for empty content")) {
2613
+ }
2614
+ }
2615
+ try {
2616
+ await service.addKnowledge({
2617
+ clientDocumentId: v4_default(),
2618
+ contentType: "text/plain",
2619
+ originalFilename: "null-content.txt",
2620
+ worldId: runtime.agentId,
2621
+ content: null
2622
+ // This should definitely cause an error
2623
+ });
2624
+ } catch (error) {
2625
+ }
2626
+ await service.stop();
2627
+ }
2628
+ },
2629
+ // Integration Tests
2630
+ {
2631
+ name: "End-to-end knowledge workflow test",
2632
+ fn: async (runtime) => {
2633
+ await index_default.init(
2634
+ {
2635
+ EMBEDDING_PROVIDER: "openai",
2636
+ OPENAI_API_KEY: "test-key",
2637
+ TEXT_EMBEDDING_MODEL: "text-embedding-3-small"
2638
+ },
2639
+ runtime
2640
+ );
2641
+ const service = await KnowledgeService.start(runtime);
2642
+ runtime.services.set(KnowledgeService.serviceType, service);
2643
+ runtime.services.set("knowledge", service);
2644
+ runtime.registerProvider(knowledgeProvider);
2645
+ const document = {
2646
+ clientDocumentId: v4_default(),
2647
+ contentType: "text/plain",
2648
+ originalFilename: "integration-test.txt",
2649
+ worldId: runtime.agentId,
2650
+ content: `
2651
+ Quantum computing uses quantum bits or qubits.
2652
+ Unlike classical bits, qubits can exist in superposition.
2653
+ This allows quantum computers to process many calculations simultaneously.
2654
+ Major companies like IBM, Google, and Microsoft are developing quantum computers.
2655
+ `
2656
+ };
2657
+ const addResult = await service.addKnowledge(document);
2658
+ if (addResult.fragmentCount === 0) {
2659
+ throw new Error("No fragments created in integration test");
2660
+ }
2661
+ const queryMessage = {
2662
+ id: v4_default(),
2663
+ entityId: runtime.agentId,
2664
+ agentId: runtime.agentId,
2665
+ roomId: runtime.agentId,
2666
+ content: {
2667
+ text: "What are qubits?"
2668
+ }
2669
+ };
2670
+ const knowledge = await service.getKnowledge(queryMessage);
2671
+ if (knowledge.length === 0) {
2672
+ throw new Error("No knowledge retrieved in integration test");
2673
+ }
2674
+ const state = {
2675
+ values: {},
2676
+ data: {},
2677
+ text: ""
2678
+ };
2679
+ const providerResult = await knowledgeProvider.get(
2680
+ runtime,
2681
+ queryMessage,
2682
+ state
2683
+ );
2684
+ if (!providerResult.text || !providerResult.text.includes("qubit")) {
2685
+ throw new Error("Provider did not return relevant knowledge");
2686
+ }
2687
+ if (!providerResult.values || !providerResult.values.knowledge || !providerResult.data || !providerResult.data.knowledge) {
2688
+ throw new Error("Provider result missing knowledge in values/data");
2689
+ }
2690
+ await service.stop();
2691
+ }
2692
+ },
2693
+ // Performance and Limits Tests
2694
+ {
2695
+ name: "Should handle large documents with chunking",
2696
+ fn: async (runtime) => {
2697
+ const service = await KnowledgeService.start(runtime);
2698
+ runtime.services.set(KnowledgeService.serviceType, service);
2699
+ const largeContent = Array(100).fill(
2700
+ "This is a paragraph of text that will be repeated many times to create a large document for testing chunking functionality. "
2701
+ ).join("\n\n");
2702
+ const document = {
2703
+ clientDocumentId: v4_default(),
2704
+ contentType: "text/plain",
2705
+ originalFilename: "large-document.txt",
2706
+ worldId: runtime.agentId,
2707
+ content: largeContent
2708
+ };
2709
+ const result = await service.addKnowledge(document);
2710
+ if (result.fragmentCount < 2) {
2711
+ throw new Error(
2712
+ "Large document should be split into multiple fragments"
2713
+ );
2714
+ }
2715
+ const fragments = await runtime.getMemories({
2716
+ tableName: "knowledge",
2717
+ roomId: runtime.agentId
2718
+ });
2719
+ const documentFragments = fragments.filter(
2720
+ (f) => f.metadata?.documentId === document.clientDocumentId
2721
+ );
2722
+ if (documentFragments.length !== result.fragmentCount) {
2723
+ throw new Error("Fragment count mismatch");
2724
+ }
2725
+ await service.stop();
2726
+ }
2727
+ },
2728
+ // Binary File Handling Tests
2729
+ {
2730
+ name: "Should detect binary content types correctly",
2731
+ fn: async (runtime) => {
2732
+ const service = await KnowledgeService.start(runtime);
2733
+ const isBinary = service.isBinaryContentType.bind(service);
2734
+ const binaryTypes = [
2735
+ { type: "application/pdf", filename: "test.pdf", expected: true },
2736
+ { type: "image/png", filename: "test.png", expected: true },
2737
+ {
2738
+ type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
2739
+ filename: "test.docx",
2740
+ expected: true
2741
+ },
2742
+ { type: "text/plain", filename: "test.txt", expected: false },
2743
+ { type: "application/json", filename: "test.tson", expected: false },
2744
+ {
2745
+ type: "application/octet-stream",
2746
+ filename: "unknown.bin",
2747
+ expected: true
2748
+ }
2749
+ ];
2750
+ for (const test of binaryTypes) {
2751
+ const result = isBinary(test.type, test.filename);
2752
+ if (result !== test.expected) {
2753
+ throw new Error(
2754
+ `Binary detection failed for ${test.type}/${test.filename}. Expected ${test.expected}, got ${result}`
2755
+ );
2756
+ }
2757
+ }
2758
+ await service.stop();
2759
+ }
2760
+ }
2761
+ ];
2762
+ };
2763
+ var tests_default = new KnowledgeTestSuite();
2764
+
2765
+ // src/actions.ts
2766
+ import { logger as logger5 } from "@elizaos/core";
2767
+ import * as fs2 from "fs";
2768
+ import * as path2 from "path";
2769
+ var processKnowledgeAction = {
2770
+ name: "PROCESS_KNOWLEDGE",
2771
+ description: "Process and store knowledge from a file path or text content into the knowledge base",
2772
+ similes: [
2773
+ "add knowledge",
2774
+ "upload document",
2775
+ "store information",
2776
+ "add to knowledge base",
2777
+ "learn from document",
2778
+ "ingest file",
2779
+ "process document",
2780
+ "remember this"
2781
+ ],
2782
+ examples: [
2783
+ [
2784
+ {
2785
+ name: "user",
2786
+ content: {
2787
+ text: "Process the document at /path/to/document.pdf"
2788
+ }
2789
+ },
2790
+ {
2791
+ name: "assistant",
2792
+ content: {
2793
+ text: "I'll process the document at /path/to/document.pdf and add it to my knowledge base.",
2794
+ actions: ["PROCESS_KNOWLEDGE"]
2795
+ }
2796
+ }
2797
+ ],
2798
+ [
2799
+ {
2800
+ name: "user",
2801
+ content: {
2802
+ text: "Add this to your knowledge: The capital of France is Paris."
2803
+ }
2804
+ },
2805
+ {
2806
+ name: "assistant",
2807
+ content: {
2808
+ text: "I'll add that information to my knowledge base.",
2809
+ actions: ["PROCESS_KNOWLEDGE"]
2810
+ }
2811
+ }
2812
+ ]
2813
+ ],
2814
+ validate: async (runtime, message, state) => {
2815
+ const text = message.content.text?.toLowerCase() || "";
2816
+ const knowledgeKeywords = [
2817
+ "process",
2818
+ "add",
2819
+ "upload",
2820
+ "document",
2821
+ "knowledge",
2822
+ "learn",
2823
+ "remember",
2824
+ "store",
2825
+ "ingest",
2826
+ "file"
2827
+ ];
2828
+ const hasKeyword = knowledgeKeywords.some(
2829
+ (keyword) => text.includes(keyword)
2830
+ );
2831
+ const pathPattern = /(?:\/[\w.-]+)+|(?:[a-zA-Z]:[\\/][\w\s.-]+(?:[\\/][\w\s.-]+)*)/;
2832
+ const hasPath = pathPattern.test(text);
2833
+ const service = runtime.getService(KnowledgeService.serviceType);
2834
+ if (!service) {
2835
+ logger5.warn(
2836
+ "Knowledge service not available for PROCESS_KNOWLEDGE action"
2837
+ );
2838
+ return false;
2839
+ }
2840
+ return hasKeyword || hasPath;
2841
+ },
2842
+ handler: async (runtime, message, state, options, callback) => {
2843
+ try {
2844
+ const service = runtime.getService(
2845
+ KnowledgeService.serviceType
2846
+ );
2847
+ if (!service) {
2848
+ throw new Error("Knowledge service not available");
2849
+ }
2850
+ const text = message.content.text || "";
2851
+ const pathPattern = /(?:\/[\w.-]+)+|(?:[a-zA-Z]:[\\/][\w\s.-]+(?:[\\/][\w\s.-]+)*)/;
2852
+ const pathMatch = text.match(pathPattern);
2853
+ let response;
2854
+ if (pathMatch) {
2855
+ const filePath = pathMatch[0];
2856
+ if (!fs2.existsSync(filePath)) {
2857
+ response = {
2858
+ text: `I couldn't find the file at ${filePath}. Please check the path and try again.`
2859
+ };
2860
+ if (callback) {
2861
+ await callback(response);
2862
+ }
2863
+ return;
2864
+ }
2865
+ const fileBuffer = fs2.readFileSync(filePath);
2866
+ const fileName = path2.basename(filePath);
2867
+ const fileExt = path2.extname(filePath).toLowerCase();
2868
+ let contentType = "text/plain";
2869
+ if (fileExt === ".pdf") contentType = "application/pdf";
2870
+ else if (fileExt === ".docx")
2871
+ contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
2872
+ else if (fileExt === ".doc") contentType = "application/msword";
2873
+ else if ([".txt", ".md", ".tson", ".xml", ".csv"].includes(fileExt))
2874
+ contentType = "text/plain";
2875
+ const knowledgeOptions = {
2876
+ clientDocumentId: `${runtime.agentId}-${Date.now()}-${fileName}`,
2877
+ contentType,
2878
+ originalFilename: fileName,
2879
+ worldId: runtime.agentId,
2880
+ content: fileBuffer.toString("base64"),
2881
+ roomId: message.roomId,
2882
+ entityId: message.entityId
2883
+ };
2884
+ const result = await service.addKnowledge(knowledgeOptions);
2885
+ response = {
2886
+ text: `I've successfully processed the document "${fileName}". It has been split into ${result.fragmentCount} searchable fragments and added to my knowledge base.`
2887
+ };
2888
+ } else {
2889
+ const knowledgeContent = text.replace(
2890
+ /^(add|store|remember|process|learn)\s+(this|that|the following)?:?\s*/i,
2891
+ ""
2892
+ ).trim();
2893
+ if (!knowledgeContent) {
2894
+ response = {
2895
+ text: "I need some content to add to my knowledge base. Please provide text or a file path."
2896
+ };
2897
+ if (callback) {
2898
+ await callback(response);
2899
+ }
2900
+ return;
2901
+ }
2902
+ const knowledgeOptions = {
2903
+ clientDocumentId: `${runtime.agentId}-${Date.now()}-text`,
2904
+ contentType: "text/plain",
2905
+ originalFilename: "user-knowledge.txt",
2906
+ worldId: runtime.agentId,
2907
+ content: knowledgeContent,
2908
+ roomId: message.roomId,
2909
+ entityId: message.entityId
2910
+ };
2911
+ const result = await service.addKnowledge(knowledgeOptions);
2912
+ response = {
2913
+ text: `I've added that information to my knowledge base. It has been stored and indexed for future reference.`
2914
+ };
2915
+ }
2916
+ if (callback) {
2917
+ await callback(response);
2918
+ }
2919
+ } catch (error) {
2920
+ logger5.error("Error in PROCESS_KNOWLEDGE action:", error);
2921
+ const errorResponse = {
2922
+ text: `I encountered an error while processing the knowledge: ${error instanceof Error ? error.message : "Unknown error"}`
2923
+ };
2924
+ if (callback) {
2925
+ await callback(errorResponse);
2926
+ }
2927
+ }
2928
+ }
2929
+ };
2930
+ var searchKnowledgeAction = {
2931
+ name: "SEARCH_KNOWLEDGE",
2932
+ description: "Search the knowledge base for specific information",
2933
+ similes: [
2934
+ "search knowledge",
2935
+ "find information",
2936
+ "look up",
2937
+ "query knowledge base",
2938
+ "search documents",
2939
+ "find in knowledge"
2940
+ ],
2941
+ examples: [
2942
+ [
2943
+ {
2944
+ name: "user",
2945
+ content: {
2946
+ text: "Search your knowledge for information about quantum computing"
2947
+ }
2948
+ },
2949
+ {
2950
+ name: "assistant",
2951
+ content: {
2952
+ text: "I'll search my knowledge base for information about quantum computing.",
2953
+ actions: ["SEARCH_KNOWLEDGE"]
2954
+ }
2955
+ }
2956
+ ]
2957
+ ],
2958
+ validate: async (runtime, message, state) => {
2959
+ const text = message.content.text?.toLowerCase() || "";
2960
+ const searchKeywords = [
2961
+ "search",
2962
+ "find",
2963
+ "look up",
2964
+ "query",
2965
+ "what do you know about"
2966
+ ];
2967
+ const knowledgeKeywords = [
2968
+ "knowledge",
2969
+ "information",
2970
+ "document",
2971
+ "database"
2972
+ ];
2973
+ const hasSearchKeyword = searchKeywords.some(
2974
+ (keyword) => text.includes(keyword)
2975
+ );
2976
+ const hasKnowledgeKeyword = knowledgeKeywords.some(
2977
+ (keyword) => text.includes(keyword)
2978
+ );
2979
+ const service = runtime.getService(KnowledgeService.serviceType);
2980
+ if (!service) {
2981
+ return false;
2982
+ }
2983
+ return hasSearchKeyword && hasKnowledgeKeyword;
2984
+ },
2985
+ handler: async (runtime, message, state, options, callback) => {
2986
+ try {
2987
+ const service = runtime.getService(
2988
+ KnowledgeService.serviceType
2989
+ );
2990
+ if (!service) {
2991
+ throw new Error("Knowledge service not available");
2992
+ }
2993
+ const text = message.content.text || "";
2994
+ const query = text.replace(
2995
+ /^(search|find|look up|query)\s+(your\s+)?knowledge\s+(base\s+)?(for\s+)?/i,
2996
+ ""
2997
+ ).trim();
2998
+ if (!query) {
2999
+ const response2 = {
3000
+ text: "What would you like me to search for in my knowledge base?"
3001
+ };
3002
+ if (callback) {
3003
+ await callback(response2);
3004
+ }
3005
+ return;
3006
+ }
3007
+ const searchMessage = {
3008
+ ...message,
3009
+ content: {
3010
+ text: query
3011
+ }
3012
+ };
3013
+ const results = await service.getKnowledge(searchMessage);
3014
+ let response;
3015
+ if (results.length === 0) {
3016
+ response = {
3017
+ text: `I couldn't find any information about "${query}" in my knowledge base.`
3018
+ };
3019
+ } else {
3020
+ const formattedResults = results.slice(0, 3).map((item, index) => `${index + 1}. ${item.content.text}`).join("\n\n");
3021
+ response = {
3022
+ text: `Here's what I found about "${query}":
3023
+
3024
+ ${formattedResults}`
3025
+ };
3026
+ }
3027
+ if (callback) {
3028
+ await callback(response);
3029
+ }
3030
+ } catch (error) {
3031
+ logger5.error("Error in SEARCH_KNOWLEDGE action:", error);
3032
+ const errorResponse = {
3033
+ text: `I encountered an error while searching the knowledge base: ${error instanceof Error ? error.message : "Unknown error"}`
3034
+ };
3035
+ if (callback) {
3036
+ await callback(errorResponse);
3037
+ }
3038
+ }
3039
+ }
3040
+ };
3041
+ var knowledgeActions = [processKnowledgeAction, searchKnowledgeAction];
3042
+
3043
+ // src/index.ts
3044
+ var knowledgePlugin = {
3045
+ name: "knowledge",
3046
+ description: "Plugin for Retrieval Augmented Generation, including knowledge management and embedding.",
3047
+ config: {
3048
+ // Token limits
3049
+ MAX_INPUT_TOKENS: process.env.MAX_INPUT_TOKENS,
3050
+ MAX_OUTPUT_TOKENS: process.env.MAX_OUTPUT_TOKENS,
3051
+ // Contextual Knowledge settings
3052
+ CTX_KNOWLEDGE_ENABLED: process.env.CTX_KNOWLEDGE_ENABLED || "false"
3053
+ },
3054
+ async init(config, runtime) {
3055
+ logger6.info("Initializing Knowledge Plugin...");
3056
+ try {
3057
+ logger6.info("Validating model configuration for Knowledge plugin...");
3058
+ const validatedConfig = validateModelConfig();
3059
+ if (validatedConfig.CTX_KNOWLEDGE_ENABLED) {
3060
+ logger6.info(
3061
+ "Running in Contextual Knowledge mode with text generation capabilities."
3062
+ );
3063
+ logger6.info(
3064
+ `Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings and ${validatedConfig.TEXT_PROVIDER} for text generation.`
3065
+ );
3066
+ } else {
3067
+ const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
3068
+ if (usingPluginOpenAI) {
3069
+ logger6.info(
3070
+ "Running in Basic Embedding mode with auto-detected configuration from plugin-openai."
3071
+ );
3072
+ } else {
3073
+ logger6.info(
3074
+ "Running in Basic Embedding mode (CTX_KNOWLEDGE_ENABLED=false). TEXT_PROVIDER and TEXT_MODEL not required."
3075
+ );
3076
+ }
3077
+ logger6.info(
3078
+ `Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}.`
3079
+ );
3080
+ }
3081
+ logger6.info("Model configuration validated successfully.");
3082
+ if (runtime) {
3083
+ logger6.info(
3084
+ `Knowledge Plugin initialized for agent: ${runtime.agentId}`
3085
+ );
3086
+ const loadDocsOnStartup = config.LOAD_DOCS_ON_STARTUP !== "false" && process.env.LOAD_DOCS_ON_STARTUP !== "false";
3087
+ if (loadDocsOnStartup) {
3088
+ setTimeout(async () => {
3089
+ try {
3090
+ const service = runtime.getService(KnowledgeService.serviceType);
3091
+ if (service instanceof KnowledgeService) {
3092
+ const { loadDocsFromPath } = await import("./docs-loader-3LDO3WCY.js");
3093
+ const result = await loadDocsFromPath(service, runtime.agentId);
3094
+ if (result.successful > 0) {
3095
+ logger6.info(
3096
+ `Loaded ${result.successful} documents from docs folder on startup`
3097
+ );
3098
+ }
3099
+ }
3100
+ } catch (error) {
3101
+ logger6.error("Error loading documents on startup:", error);
3102
+ }
3103
+ }, 5e3);
3104
+ }
3105
+ }
3106
+ logger6.info("Knowledge Plugin initialized.");
3107
+ } catch (error) {
3108
+ logger6.error("Failed to initialize Knowledge plugin:", error);
3109
+ throw error;
3110
+ }
3111
+ },
3112
+ services: [KnowledgeService],
3113
+ providers: [knowledgeProvider],
3114
+ actions: knowledgeActions,
3115
+ tests: [tests_default]
3116
+ };
3117
+ var index_default = knowledgePlugin;
3118
+ export {
3119
+ index_default as default,
3120
+ knowledgePlugin
3121
+ };
3122
+ //# sourceMappingURL=index.js.map