@elizaos/plugin-knowledge 1.0.10 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,17 +1,147 @@
1
+ // src/service.ts
1
2
  import {
2
- convertPdfToTextFromBuffer,
3
- extractTextFromFileBuffer,
4
- fetchUrlContent,
5
- generateContentBasedId,
6
- isBinaryContentType,
7
- loadDocsFromPath,
8
- looksLikeBase64,
9
- normalizeS3Url,
10
- v4_default
11
- } from "./chunk-UOE4LEMH.js";
3
+ createUniqueUuid,
4
+ logger as logger6,
5
+ MemoryType as MemoryType2,
6
+ ModelType as ModelType2,
7
+ Semaphore,
8
+ Service,
9
+ splitChunks as splitChunks2
10
+ } from "@elizaos/core";
12
11
 
13
- // src/index.ts
14
- import { logger as logger7 } from "@elizaos/core";
12
+ // src/document-processor.ts
13
+ import {
14
+ MemoryType,
15
+ ModelType,
16
+ logger as logger4,
17
+ splitChunks
18
+ } from "@elizaos/core";
19
+
20
+ // node_modules/uuid/dist/esm/regex.js
21
+ var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i;
22
+
23
+ // node_modules/uuid/dist/esm/validate.js
24
+ function validate(uuid) {
25
+ return typeof uuid === "string" && regex_default.test(uuid);
26
+ }
27
+ var validate_default = validate;
28
+
29
+ // node_modules/uuid/dist/esm/parse.js
30
+ function parse(uuid) {
31
+ if (!validate_default(uuid)) {
32
+ throw TypeError("Invalid UUID");
33
+ }
34
+ let v;
35
+ return Uint8Array.of((v = parseInt(uuid.slice(0, 8), 16)) >>> 24, v >>> 16 & 255, v >>> 8 & 255, v & 255, (v = parseInt(uuid.slice(9, 13), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(14, 18), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(19, 23), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(24, 36), 16)) / 1099511627776 & 255, v / 4294967296 & 255, v >>> 24 & 255, v >>> 16 & 255, v >>> 8 & 255, v & 255);
36
+ }
37
+ var parse_default = parse;
38
+
39
+ // node_modules/uuid/dist/esm/stringify.js
40
+ var byteToHex = [];
41
+ for (let i = 0; i < 256; ++i) {
42
+ byteToHex.push((i + 256).toString(16).slice(1));
43
+ }
44
+ function unsafeStringify(arr, offset = 0) {
45
+ return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
46
+ }
47
+
48
+ // node_modules/uuid/dist/esm/rng.js
49
+ import { randomFillSync } from "crypto";
50
+ var rnds8Pool = new Uint8Array(256);
51
+ var poolPtr = rnds8Pool.length;
52
+ function rng() {
53
+ if (poolPtr > rnds8Pool.length - 16) {
54
+ randomFillSync(rnds8Pool);
55
+ poolPtr = 0;
56
+ }
57
+ return rnds8Pool.slice(poolPtr, poolPtr += 16);
58
+ }
59
+
60
+ // node_modules/uuid/dist/esm/v35.js
61
+ function stringToBytes(str) {
62
+ str = unescape(encodeURIComponent(str));
63
+ const bytes = new Uint8Array(str.length);
64
+ for (let i = 0; i < str.length; ++i) {
65
+ bytes[i] = str.charCodeAt(i);
66
+ }
67
+ return bytes;
68
+ }
69
+ var DNS = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
70
+ var URL2 = "6ba7b811-9dad-11d1-80b4-00c04fd430c8";
71
+ function v35(version, hash, value, namespace, buf, offset) {
72
+ const valueBytes = typeof value === "string" ? stringToBytes(value) : value;
73
+ const namespaceBytes = typeof namespace === "string" ? parse_default(namespace) : namespace;
74
+ if (typeof namespace === "string") {
75
+ namespace = parse_default(namespace);
76
+ }
77
+ if (namespace?.length !== 16) {
78
+ throw TypeError("Namespace must be array-like (16 iterable integer values, 0-255)");
79
+ }
80
+ let bytes = new Uint8Array(16 + valueBytes.length);
81
+ bytes.set(namespaceBytes);
82
+ bytes.set(valueBytes, namespaceBytes.length);
83
+ bytes = hash(bytes);
84
+ bytes[6] = bytes[6] & 15 | version;
85
+ bytes[8] = bytes[8] & 63 | 128;
86
+ if (buf) {
87
+ offset = offset || 0;
88
+ for (let i = 0; i < 16; ++i) {
89
+ buf[offset + i] = bytes[i];
90
+ }
91
+ return buf;
92
+ }
93
+ return unsafeStringify(bytes);
94
+ }
95
+
96
+ // node_modules/uuid/dist/esm/native.js
97
+ import { randomUUID } from "crypto";
98
+ var native_default = { randomUUID };
99
+
100
+ // node_modules/uuid/dist/esm/v4.js
101
+ function v4(options, buf, offset) {
102
+ if (native_default.randomUUID && !buf && !options) {
103
+ return native_default.randomUUID();
104
+ }
105
+ options = options || {};
106
+ const rnds = options.random ?? options.rng?.() ?? rng();
107
+ if (rnds.length < 16) {
108
+ throw new Error("Random bytes length must be >= 16");
109
+ }
110
+ rnds[6] = rnds[6] & 15 | 64;
111
+ rnds[8] = rnds[8] & 63 | 128;
112
+ if (buf) {
113
+ offset = offset || 0;
114
+ if (offset < 0 || offset + 16 > buf.length) {
115
+ throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
116
+ }
117
+ for (let i = 0; i < 16; ++i) {
118
+ buf[offset + i] = rnds[i];
119
+ }
120
+ return buf;
121
+ }
122
+ return unsafeStringify(rnds);
123
+ }
124
+ var v4_default = v4;
125
+
126
+ // node_modules/uuid/dist/esm/sha1.js
127
+ import { createHash } from "crypto";
128
+ function sha1(bytes) {
129
+ if (Array.isArray(bytes)) {
130
+ bytes = Buffer.from(bytes);
131
+ } else if (typeof bytes === "string") {
132
+ bytes = Buffer.from(bytes, "utf8");
133
+ }
134
+ return createHash("sha1").update(bytes).digest();
135
+ }
136
+ var sha1_default = sha1;
137
+
138
+ // node_modules/uuid/dist/esm/v5.js
139
+ function v5(value, namespace, buf, offset) {
140
+ return v35(80, sha1_default, value, namespace, buf, offset);
141
+ }
142
+ v5.DNS = DNS;
143
+ v5.URL = URL2;
144
+ var v5_default = v5;
15
145
 
16
146
  // src/types.ts
17
147
  import z from "zod";
@@ -42,6 +172,8 @@ var ModelConfigSchema = z.object({
42
172
  // For OpenAI: Only applies to text-embedding-3-small and text-embedding-3-large models
43
173
  // Default: 1536 dimensions
44
174
  EMBEDDING_DIMENSION: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 1536),
175
+ // config setting
176
+ LOAD_DOCS_ON_STARTUP: z.boolean().default(false),
45
177
  // Contextual Knowledge settings
46
178
  CTX_KNOWLEDGE_ENABLED: z.boolean().default(false)
47
179
  });
@@ -52,26 +184,35 @@ var KnowledgeServiceType = {
52
184
  // src/config.ts
53
185
  import z2 from "zod";
54
186
  import { logger } from "@elizaos/core";
187
+ var parseBooleanEnv = (value) => {
188
+ if (typeof value === "boolean") return value;
189
+ if (typeof value === "string") return value.toLowerCase() === "true";
190
+ return false;
191
+ };
55
192
  function validateModelConfig(runtime) {
56
193
  try {
57
194
  const getSetting = (key, defaultValue) => {
58
195
  if (runtime) {
59
- return runtime.getSetting(key) || defaultValue;
196
+ return runtime.getSetting(key) || process.env[key] || defaultValue;
60
197
  }
61
198
  return process.env[key] || defaultValue;
62
199
  };
63
- const ctxKnowledgeEnabled2 = getSetting("CTX_KNOWLEDGE_ENABLED") === "true";
64
- logger.debug(`Configuration: CTX_KNOWLEDGE_ENABLED=${ctxKnowledgeEnabled2}`);
200
+ const ctxKnowledgeEnabled = parseBooleanEnv(getSetting("CTX_KNOWLEDGE_ENABLED", "false"));
201
+ logger.debug(
202
+ `[Document Processor] CTX_KNOWLEDGE_ENABLED: '${ctxKnowledgeEnabled} (runtime: ${!!runtime})`
203
+ );
65
204
  const embeddingProvider = getSetting("EMBEDDING_PROVIDER");
66
205
  const assumePluginOpenAI = !embeddingProvider;
67
206
  if (assumePluginOpenAI) {
68
207
  const openaiApiKey2 = getSetting("OPENAI_API_KEY");
69
208
  const openaiEmbeddingModel = getSetting("OPENAI_EMBEDDING_MODEL");
70
209
  if (openaiApiKey2 && openaiEmbeddingModel) {
71
- logger.debug("EMBEDDING_PROVIDER not specified, using configuration from plugin-openai");
210
+ logger.debug(
211
+ "[Document Processor] EMBEDDING_PROVIDER not specified, using configuration from plugin-openai"
212
+ );
72
213
  } else {
73
214
  logger.debug(
74
- "EMBEDDING_PROVIDER not specified. Assuming embeddings are provided by another plugin (e.g., plugin-google-genai)."
215
+ "[Document Processor] EMBEDDING_PROVIDER not specified. Assuming embeddings are provided by another plugin (e.g., plugin-google-genai)."
75
216
  );
76
217
  }
77
218
  }
@@ -95,7 +236,8 @@ function validateModelConfig(runtime) {
95
236
  MAX_INPUT_TOKENS: getSetting("MAX_INPUT_TOKENS", "4000"),
96
237
  MAX_OUTPUT_TOKENS: getSetting("MAX_OUTPUT_TOKENS", "4096"),
97
238
  EMBEDDING_DIMENSION: embeddingDimension,
98
- CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled2
239
+ LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")),
240
+ CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled
99
241
  });
100
242
  validateConfigRequirements(config, assumePluginOpenAI);
101
243
  return config;
@@ -116,13 +258,15 @@ function validateConfigRequirements(config, assumePluginOpenAI) {
116
258
  throw new Error('GOOGLE_API_KEY is required when EMBEDDING_PROVIDER is set to "google"');
117
259
  }
118
260
  if (!embeddingProvider) {
119
- logger.debug("No EMBEDDING_PROVIDER specified. Embeddings will be handled by the runtime.");
261
+ logger.debug(
262
+ "[Document Processor] No EMBEDDING_PROVIDER specified. Embeddings will be handled by the runtime."
263
+ );
120
264
  }
121
265
  if (assumePluginOpenAI && config.OPENAI_API_KEY && !config.TEXT_EMBEDDING_MODEL) {
122
266
  throw new Error("OPENAI_EMBEDDING_MODEL is required when using plugin-openai configuration");
123
267
  }
124
268
  if (config.CTX_KNOWLEDGE_ENABLED) {
125
- logger.debug("Contextual Knowledge is enabled. Validating text generation settings...");
269
+ logger.debug("[Document Processor] CTX validation: Checking text generation settings...");
126
270
  if (config.TEXT_PROVIDER === "openai" && !config.OPENAI_API_KEY) {
127
271
  throw new Error('OPENAI_API_KEY is required when TEXT_PROVIDER is set to "openai"');
128
272
  }
@@ -139,17 +283,21 @@ function validateConfigRequirements(config, assumePluginOpenAI) {
139
283
  const modelName = config.TEXT_MODEL?.toLowerCase() || "";
140
284
  if (modelName.includes("claude") || modelName.includes("gemini")) {
141
285
  logger.debug(
142
- `Using ${modelName} with OpenRouter. This configuration supports document caching for improved performance.`
286
+ `[Document Processor] Using ${modelName} with OpenRouter. This configuration supports document caching for improved performance.`
143
287
  );
144
288
  }
145
289
  }
146
290
  } else {
291
+ logger.info("[Document Processor] Contextual Knowledge is DISABLED!");
292
+ logger.info("[Document Processor] This means documents will NOT be enriched with context.");
147
293
  if (assumePluginOpenAI) {
148
- logger.debug(
149
- "Contextual Knowledge is disabled. Embeddings will be handled by the runtime (e.g., plugin-openai, plugin-google-genai)."
294
+ logger.info(
295
+ "[Document Processor] Embeddings will be handled by the runtime (e.g., plugin-openai, plugin-google-genai)."
150
296
  );
151
297
  } else {
152
- logger.debug("Contextual Knowledge is disabled. Using configured embedding provider.");
298
+ logger.info(
299
+ "[Document Processor] Using configured embedding provider for basic embeddings only."
300
+ );
153
301
  }
154
302
  }
155
303
  }
@@ -164,7 +312,18 @@ async function getProviderRateLimits(runtime) {
164
312
  const maxConcurrentRequests = parseInt(getSetting("MAX_CONCURRENT_REQUESTS", "30"), 10);
165
313
  const requestsPerMinute = parseInt(getSetting("REQUESTS_PER_MINUTE", "60"), 10);
166
314
  const tokensPerMinute = parseInt(getSetting("TOKENS_PER_MINUTE", "150000"), 10);
167
- switch (config.EMBEDDING_PROVIDER) {
315
+ const primaryProvider = config.TEXT_PROVIDER || config.EMBEDDING_PROVIDER;
316
+ logger.debug(
317
+ `[Document Processor] Rate limiting for ${primaryProvider}: ${requestsPerMinute} RPM, ${tokensPerMinute} TPM, ${maxConcurrentRequests} concurrent`
318
+ );
319
+ switch (primaryProvider) {
320
+ case "anthropic":
321
+ return {
322
+ maxConcurrentRequests,
323
+ requestsPerMinute,
324
+ tokensPerMinute,
325
+ provider: "anthropic"
326
+ };
168
327
  case "openai":
169
328
  return {
170
329
  maxConcurrentRequests,
@@ -184,30 +343,11 @@ async function getProviderRateLimits(runtime) {
184
343
  maxConcurrentRequests,
185
344
  requestsPerMinute,
186
345
  tokensPerMinute,
187
- provider: config.EMBEDDING_PROVIDER
346
+ provider: primaryProvider || "unknown"
188
347
  };
189
348
  }
190
349
  }
191
350
 
192
- // src/service.ts
193
- import {
194
- createUniqueUuid,
195
- logger as logger4,
196
- MemoryType as MemoryType2,
197
- ModelType as ModelType2,
198
- Semaphore,
199
- Service,
200
- splitChunks as splitChunks2
201
- } from "@elizaos/core";
202
-
203
- // src/document-processor.ts
204
- import {
205
- MemoryType,
206
- ModelType,
207
- logger as logger3,
208
- splitChunks
209
- } from "@elizaos/core";
210
-
211
351
  // src/ctx-embeddings.ts
212
352
  var DEFAULT_CHUNK_TOKEN_SIZE = 500;
213
353
  var DEFAULT_CHUNK_OVERLAP_TOKENS = 100;
@@ -405,9 +545,7 @@ Create an enriched version of this chunk by adding critical surrounding context.
405
545
  Provide ONLY the enriched chunk text in your response:`;
406
546
  function getContextualizationPrompt(docContent, chunkContent, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS, promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE) {
407
547
  if (!docContent || !chunkContent) {
408
- console.warn(
409
- "Document content or chunk content is missing for contextualization."
410
- );
548
+ console.warn("Document content or chunk content is missing for contextualization.");
411
549
  return "Error: Document or chunk content missing.";
412
550
  }
413
551
  const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN);
@@ -478,15 +616,8 @@ function getPromptForMimeType(mimeType, docContent, chunkContent) {
478
616
  minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
479
617
  maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
480
618
  promptTemplate = TECHNICAL_PROMPT_TEMPLATE;
481
- console.debug("Using technical documentation prompt template");
482
619
  }
483
- return getContextualizationPrompt(
484
- docContent,
485
- chunkContent,
486
- minTokens,
487
- maxTokens,
488
- promptTemplate
489
- );
620
+ return getContextualizationPrompt(docContent, chunkContent, minTokens, maxTokens, promptTemplate);
490
621
  }
491
622
  function getCachingPromptForMimeType(mimeType, chunkContent) {
492
623
  let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS;
@@ -506,12 +637,7 @@ function getCachingPromptForMimeType(mimeType, chunkContent) {
506
637
  minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
507
638
  maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
508
639
  }
509
- return getCachingContextualizationPrompt(
510
- chunkContent,
511
- mimeType,
512
- minTokens,
513
- maxTokens
514
- );
640
+ return getCachingContextualizationPrompt(chunkContent, mimeType, minTokens, maxTokens);
515
641
  }
516
642
  function containsMathematicalContent(content) {
517
643
  const latexMathPatterns = [
@@ -575,9 +701,7 @@ function containsMathematicalContent(content) {
575
701
  "coefficient"
576
702
  ];
577
703
  const contentLower = content.toLowerCase();
578
- const mathKeywordCount = mathKeywords.filter(
579
- (keyword) => contentLower.includes(keyword)
580
- ).length;
704
+ const mathKeywordCount = mathKeywords.filter((keyword) => contentLower.includes(keyword)).length;
581
705
  return mathKeywordCount >= 2;
582
706
  }
583
707
  function isTechnicalDocumentation(content) {
@@ -626,9 +750,7 @@ function isTechnicalDocumentation(content) {
626
750
  }
627
751
  function getChunkWithContext(chunkContent, generatedContext) {
628
752
  if (!generatedContext || generatedContext.trim() === "") {
629
- console.warn(
630
- "Generated context is empty. Falling back to original chunk content."
631
- );
753
+ console.warn("Generated context is empty. Falling back to original chunk content.");
632
754
  return chunkContent;
633
755
  }
634
756
  return generatedContext.trim();
@@ -641,8 +763,8 @@ import { createAnthropic } from "@ai-sdk/anthropic";
641
763
  import { createOpenRouter } from "@openrouter/ai-sdk-provider";
642
764
  import { google } from "@ai-sdk/google";
643
765
  import { logger as logger2 } from "@elizaos/core";
644
- async function generateText(prompt, system, overrideConfig) {
645
- const config = validateModelConfig();
766
+ async function generateText(runtime, prompt, system, overrideConfig) {
767
+ const config = validateModelConfig(runtime);
646
768
  const provider = overrideConfig?.provider || config.TEXT_PROVIDER;
647
769
  const modelName = overrideConfig?.modelName || config.TEXT_MODEL;
648
770
  const maxTokens = overrideConfig?.maxTokens || config.MAX_OUTPUT_TOKENS;
@@ -650,11 +772,12 @@ async function generateText(prompt, system, overrideConfig) {
650
772
  try {
651
773
  switch (provider) {
652
774
  case "anthropic":
653
- return await generateAnthropicText(prompt, system, modelName, maxTokens);
775
+ return await generateAnthropicText(config, prompt, system, modelName, maxTokens);
654
776
  case "openai":
655
- return await generateOpenAIText(prompt, system, modelName, maxTokens);
777
+ return await generateOpenAIText(config, prompt, system, modelName, maxTokens);
656
778
  case "openrouter":
657
779
  return await generateOpenRouterText(
780
+ config,
658
781
  prompt,
659
782
  system,
660
783
  modelName,
@@ -669,31 +792,47 @@ async function generateText(prompt, system, overrideConfig) {
669
792
  throw new Error(`Unsupported text provider: ${provider}`);
670
793
  }
671
794
  } catch (error) {
672
- logger2.error(`[LLM Service - ${provider}] Error generating text with ${modelName}:`, error);
795
+ logger2.error(`[Document Processor] ${provider} ${modelName} error:`, error);
673
796
  throw error;
674
797
  }
675
798
  }
676
- async function generateAnthropicText(prompt, system, modelName, maxTokens) {
677
- const config = validateModelConfig();
799
+ async function generateAnthropicText(config, prompt, system, modelName, maxTokens) {
678
800
  const anthropic = createAnthropic({
679
801
  apiKey: config.ANTHROPIC_API_KEY,
680
802
  baseURL: config.ANTHROPIC_BASE_URL
681
803
  });
682
804
  const modelInstance = anthropic(modelName);
683
- const result = await aiGenerateText({
684
- model: modelInstance,
685
- prompt,
686
- system,
687
- temperature: 0.3,
688
- maxTokens
689
- });
690
- logger2.debug(
691
- `[LLM Service - Anthropic] Text generated with ${modelName}. Usage: ${result.usage.promptTokens} prompt tokens, ${result.usage.completionTokens} completion tokens.`
692
- );
693
- return result;
805
+ const maxRetries = 3;
806
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
807
+ try {
808
+ const result = await aiGenerateText({
809
+ model: modelInstance,
810
+ prompt,
811
+ system,
812
+ temperature: 0.3,
813
+ maxTokens
814
+ });
815
+ const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
816
+ logger2.debug(
817
+ `[Document Processor] ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
818
+ );
819
+ return result;
820
+ } catch (error) {
821
+ const isRateLimit = error?.status === 429 || error?.message?.includes("rate limit") || error?.message?.includes("429");
822
+ if (isRateLimit && attempt < maxRetries - 1) {
823
+ const delay = Math.pow(2, attempt + 1) * 1e3;
824
+ logger2.warn(
825
+ `[Document Processor] Rate limit hit (${modelName}): attempt ${attempt + 1}/${maxRetries}, retrying in ${Math.round(delay / 1e3)}s`
826
+ );
827
+ await new Promise((resolve2) => setTimeout(resolve2, delay));
828
+ continue;
829
+ }
830
+ throw error;
831
+ }
832
+ }
833
+ throw new Error("Max retries exceeded for Anthropic text generation");
694
834
  }
695
- async function generateOpenAIText(prompt, system, modelName, maxTokens) {
696
- const config = validateModelConfig();
835
+ async function generateOpenAIText(config, prompt, system, modelName, maxTokens) {
697
836
  const openai = createOpenAI({
698
837
  apiKey: config.OPENAI_API_KEY,
699
838
  baseURL: config.OPENAI_BASE_URL
@@ -706,8 +845,9 @@ async function generateOpenAIText(prompt, system, modelName, maxTokens) {
706
845
  temperature: 0.3,
707
846
  maxTokens
708
847
  });
848
+ const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
709
849
  logger2.debug(
710
- `[LLM Service - OpenAI] Text generated with ${modelName}. Usage: ${result.usage.promptTokens} prompt tokens, ${result.usage.completionTokens} completion tokens.`
850
+ `[Document Processor] OpenAI ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
711
851
  );
712
852
  return result;
713
853
  }
@@ -724,13 +864,13 @@ async function generateGoogleText(prompt, system, modelName, maxTokens, config)
724
864
  temperature: 0.3,
725
865
  maxTokens
726
866
  });
867
+ const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
727
868
  logger2.debug(
728
- `[LLM Service - Google] Text generated with ${modelName}. Usage: ${result.usage.promptTokens} prompt tokens, ${result.usage.completionTokens} completion tokens.`
869
+ `[Document Processor] Google ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
729
870
  );
730
871
  return result;
731
872
  }
732
- async function generateOpenRouterText(prompt, system, modelName, maxTokens, cacheDocument, cacheOptions, autoCacheContextualRetrieval = true) {
733
- const config = validateModelConfig();
873
+ async function generateOpenRouterText(config, prompt, system, modelName, maxTokens, cacheDocument, cacheOptions, autoCacheContextualRetrieval = true) {
734
874
  const openrouter = createOpenRouter({
735
875
  apiKey: config.OPENROUTER_API_KEY,
736
876
  baseURL: config.OPENROUTER_BASE_URL
@@ -746,7 +886,7 @@ async function generateOpenRouterText(prompt, system, modelName, maxTokens, cach
746
886
  if (docMatch && docMatch[1]) {
747
887
  documentForCaching = docMatch[1].trim();
748
888
  logger2.debug(
749
- `[LLM Service - OpenRouter] Auto-detected document for caching (${documentForCaching.length} chars)`
889
+ `[Document Processor] Auto-detected document for caching (${documentForCaching.length} chars)`
750
890
  );
751
891
  }
752
892
  }
@@ -777,13 +917,11 @@ async function generateOpenRouterText(prompt, system, modelName, maxTokens, cach
777
917
  );
778
918
  }
779
919
  }
780
- logger2.debug("[LLM Service - OpenRouter] Using standard request without caching");
920
+ logger2.debug("[Document Processor] Using standard request without caching");
781
921
  return await generateStandardOpenRouterText(prompt, system, modelInstance, modelName, maxTokens);
782
922
  }
783
923
  async function generateClaudeWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching) {
784
- logger2.debug(
785
- `[LLM Service - OpenRouter] Using explicit prompt caching with Claude model ${modelName}`
786
- );
924
+ logger2.debug(`[Document Processor] Using explicit prompt caching with Claude ${modelName}`);
787
925
  const messages = [
788
926
  // System message with cached document (if system is provided)
789
927
  system ? {
@@ -835,7 +973,7 @@ async function generateClaudeWithCaching(promptText, system, modelInstance, mode
835
973
  ]
836
974
  } : null
837
975
  ].filter(Boolean);
838
- logger2.debug("[LLM Service - OpenRouter] Using Claude-specific caching structure");
976
+ logger2.debug("[Document Processor] Using Claude-specific caching structure");
839
977
  const result = await aiGenerateText({
840
978
  model: modelInstance,
841
979
  messages,
@@ -850,8 +988,9 @@ async function generateClaudeWithCaching(promptText, system, modelInstance, mode
850
988
  }
851
989
  });
852
990
  logCacheMetrics(result);
991
+ const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
853
992
  logger2.debug(
854
- `[LLM Service - OpenRouter] Text generated with ${modelName} using Claude caching. Usage: ${result.usage.promptTokens} prompt tokens, ${result.usage.completionTokens} completion tokens.`
993
+ `[Document Processor] OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
855
994
  );
856
995
  return result;
857
996
  }
@@ -861,27 +1000,23 @@ async function generateGeminiWithCaching(promptText, system, modelInstance, mode
861
1000
  const minTokensForImplicitCache = modelName.toLowerCase().includes("flash") ? 1028 : 2048;
862
1001
  const likelyTriggersCaching = estimatedDocTokens >= minTokensForImplicitCache;
863
1002
  if (usingImplicitCaching) {
1003
+ logger2.debug(`[Document Processor] Using Gemini 2.5 implicit caching with ${modelName}`);
864
1004
  logger2.debug(
865
- `[LLM Service - OpenRouter] Using Gemini 2.5 implicit caching with model ${modelName}`
866
- );
867
- logger2.debug(
868
- `[LLM Service - OpenRouter] Gemini 2.5 models automatically cache large prompts (no cache_control needed)`
1005
+ `[Document Processor] Gemini 2.5 models automatically cache large prompts (no cache_control needed)`
869
1006
  );
870
1007
  if (likelyTriggersCaching) {
871
1008
  logger2.debug(
872
- `[LLM Service - OpenRouter] Document size ~${estimatedDocTokens} tokens exceeds minimum ${minTokensForImplicitCache} tokens for implicit caching`
1009
+ `[Document Processor] Document ~${estimatedDocTokens} tokens exceeds ${minTokensForImplicitCache} token threshold for caching`
873
1010
  );
874
1011
  } else {
875
1012
  logger2.debug(
876
- `[LLM Service - OpenRouter] Warning: Document size ~${estimatedDocTokens} tokens may not meet minimum ${minTokensForImplicitCache} token threshold for implicit caching`
1013
+ `[Document Processor] Document ~${estimatedDocTokens} tokens may not meet ${minTokensForImplicitCache} token threshold for caching`
877
1014
  );
878
1015
  }
879
1016
  } else {
1017
+ logger2.debug(`[Document Processor] Using standard prompt format with Gemini ${modelName}`);
880
1018
  logger2.debug(
881
- `[LLM Service - OpenRouter] Using standard prompt format with Gemini model ${modelName}`
882
- );
883
- logger2.debug(
884
- `[LLM Service - OpenRouter] Note: Only Gemini 2.5 models support automatic implicit caching`
1019
+ `[Document Processor] Note: Only Gemini 2.5 models support automatic implicit caching`
885
1020
  );
886
1021
  }
887
1022
  const geminiSystemPrefix = system ? `${system}
@@ -905,8 +1040,10 @@ ${promptText}`;
905
1040
  }
906
1041
  });
907
1042
  logCacheMetrics(result);
1043
+ const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
1044
+ const cachingType = usingImplicitCaching ? "implicit" : "standard";
908
1045
  logger2.debug(
909
- `[LLM Service - OpenRouter] Text generated with ${modelName} using ${usingImplicitCaching ? "implicit" : "standard"} caching. Usage: ${result.usage.promptTokens} prompt tokens, ${result.usage.completionTokens} completion tokens.`
1046
+ `[Document Processor] OpenRouter ${modelName} (${cachingType} caching): ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
910
1047
  );
911
1048
  return result;
912
1049
  }
@@ -926,21 +1063,397 @@ async function generateStandardOpenRouterText(prompt, system, modelInstance, mod
926
1063
  }
927
1064
  }
928
1065
  });
1066
+ const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
929
1067
  logger2.debug(
930
- `[LLM Service - OpenRouter] Text generated with ${modelName}. Usage: ${result.usage.promptTokens} prompt tokens, ${result.usage.completionTokens} completion tokens.`
1068
+ `[Document Processor] OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
931
1069
  );
932
1070
  return result;
933
1071
  }
934
1072
  function logCacheMetrics(result) {
935
1073
  if (result.usage && result.usage.cacheTokens) {
936
1074
  logger2.debug(
937
- `[LLM Service - OpenRouter] Cache metrics - Cached tokens: ${result.usage.cacheTokens}, Cache discount: ${result.usage.cacheDiscount}`
1075
+ `[Document Processor] Cache metrics - tokens: ${result.usage.cacheTokens}, discount: ${result.usage.cacheDiscount}`
1076
+ );
1077
+ }
1078
+ }
1079
+
1080
+ // src/utils.ts
1081
+ import { Buffer as Buffer2 } from "buffer";
1082
+ import * as mammoth from "mammoth";
1083
+ import { logger as logger3 } from "@elizaos/core";
1084
+ import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
1085
+ import { createHash as createHash2 } from "crypto";
1086
+ var PLAIN_TEXT_CONTENT_TYPES = [
1087
+ "application/typescript",
1088
+ "text/typescript",
1089
+ "text/x-python",
1090
+ "application/x-python-code",
1091
+ "application/yaml",
1092
+ "text/yaml",
1093
+ "application/x-yaml",
1094
+ "application/json",
1095
+ "text/markdown",
1096
+ "text/csv"
1097
+ ];
1098
+ var MAX_FALLBACK_SIZE_BYTES = 5 * 1024 * 1024;
1099
+ var BINARY_CHECK_BYTES = 1024;
1100
+ async function extractTextFromFileBuffer(fileBuffer, contentType, originalFilename) {
1101
+ const lowerContentType = contentType.toLowerCase();
1102
+ logger3.debug(
1103
+ `[TextUtil] Attempting to extract text from ${originalFilename} (type: ${contentType})`
1104
+ );
1105
+ if (lowerContentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
1106
+ logger3.debug(`[TextUtil] Extracting text from DOCX ${originalFilename} via mammoth.`);
1107
+ try {
1108
+ const result = await mammoth.extractRawText({ buffer: fileBuffer });
1109
+ logger3.debug(
1110
+ `[TextUtil] DOCX text extraction complete for ${originalFilename}. Text length: ${result.value.length}`
1111
+ );
1112
+ return result.value;
1113
+ } catch (docxError) {
1114
+ const errorMsg = `[TextUtil] Failed to parse DOCX file ${originalFilename}: ${docxError.message}`;
1115
+ logger3.error(errorMsg, docxError.stack);
1116
+ throw new Error(errorMsg);
1117
+ }
1118
+ } else if (lowerContentType === "application/msword" || originalFilename.toLowerCase().endsWith(".doc")) {
1119
+ logger3.debug(`[TextUtil] Handling Microsoft Word .doc file: ${originalFilename}`);
1120
+ return `[Microsoft Word Document: ${originalFilename}]
1121
+
1122
+ This document was indexed for search but cannot be displayed directly in the browser. The original document content is preserved for retrieval purposes.`;
1123
+ } else if (lowerContentType.startsWith("text/") || PLAIN_TEXT_CONTENT_TYPES.includes(lowerContentType)) {
1124
+ logger3.debug(
1125
+ `[TextUtil] Extracting text from plain text compatible file ${originalFilename} (type: ${contentType})`
1126
+ );
1127
+ return fileBuffer.toString("utf-8");
1128
+ } else {
1129
+ logger3.warn(
1130
+ `[TextUtil] Unsupported content type: "${contentType}" for ${originalFilename}. Attempting fallback to plain text.`
1131
+ );
1132
+ if (fileBuffer.length > MAX_FALLBACK_SIZE_BYTES) {
1133
+ const sizeErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) exceeds maximum size for fallback (${MAX_FALLBACK_SIZE_BYTES} bytes). Cannot process as plain text.`;
1134
+ logger3.error(sizeErrorMsg);
1135
+ throw new Error(sizeErrorMsg);
1136
+ }
1137
+ const initialBytes = fileBuffer.subarray(0, Math.min(fileBuffer.length, BINARY_CHECK_BYTES));
1138
+ if (initialBytes.includes(0)) {
1139
+ const binaryHeuristicMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) appears to be binary based on initial byte check. Cannot process as plain text.`;
1140
+ logger3.error(binaryHeuristicMsg);
1141
+ throw new Error(binaryHeuristicMsg);
1142
+ }
1143
+ try {
1144
+ const textContent = fileBuffer.toString("utf-8");
1145
+ if (textContent.includes("\uFFFD")) {
1146
+ const binaryErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) seems to be binary or has encoding issues after fallback to plain text (detected \uFFFD).`;
1147
+ logger3.error(binaryErrorMsg);
1148
+ throw new Error(binaryErrorMsg);
1149
+ }
1150
+ logger3.debug(
1151
+ `[TextUtil] Successfully processed unknown type ${contentType} as plain text after fallback for ${originalFilename}.`
1152
+ );
1153
+ return textContent;
1154
+ } catch (fallbackError) {
1155
+ const finalErrorMsg = `[TextUtil] Unsupported content type: ${contentType} for ${originalFilename}. Fallback to plain text also failed or indicated binary content.`;
1156
+ logger3.error(finalErrorMsg, fallbackError.message ? fallbackError.stack : void 0);
1157
+ throw new Error(finalErrorMsg);
1158
+ }
1159
+ }
1160
+ }
1161
+ async function convertPdfToTextFromBuffer(pdfBuffer, filename) {
1162
+ const docName = filename || "unnamed-document";
1163
+ logger3.debug(`[PdfService] Starting conversion for ${docName}`);
1164
+ try {
1165
+ const uint8Array = new Uint8Array(pdfBuffer);
1166
+ const pdf = await getDocument({ data: uint8Array }).promise;
1167
+ const numPages = pdf.numPages;
1168
+ const textPages = [];
1169
+ for (let pageNum = 1; pageNum <= numPages; pageNum++) {
1170
+ logger3.debug(`[PdfService] Processing page ${pageNum}/${numPages}`);
1171
+ const page = await pdf.getPage(pageNum);
1172
+ const textContent = await page.getTextContent();
1173
+ const lineMap = /* @__PURE__ */ new Map();
1174
+ textContent.items.filter(isTextItem).forEach((item) => {
1175
+ const yPos = Math.round(item.transform[5]);
1176
+ if (!lineMap.has(yPos)) {
1177
+ lineMap.set(yPos, []);
1178
+ }
1179
+ lineMap.get(yPos).push(item);
1180
+ });
1181
+ const sortedLines = Array.from(lineMap.entries()).sort((a, b) => b[0] - a[0]).map(
1182
+ ([_, items]) => items.sort((a, b) => a.transform[4] - b.transform[4]).map((item) => item.str).join(" ")
1183
+ );
1184
+ textPages.push(sortedLines.join("\n"));
1185
+ }
1186
+ const fullText = textPages.join("\n\n").replace(/\s+/g, " ").trim();
1187
+ logger3.debug(`[PdfService] Conversion complete for ${docName}, length: ${fullText.length}`);
1188
+ return fullText;
1189
+ } catch (error) {
1190
+ logger3.error(`[PdfService] Error converting PDF ${docName}:`, error.message);
1191
+ throw new Error(`Failed to convert PDF to text: ${error.message}`);
1192
+ }
1193
+ }
1194
+ function isBinaryContentType(contentType, filename) {
1195
+ const textContentTypes = [
1196
+ "text/",
1197
+ "application/json",
1198
+ "application/xml",
1199
+ "application/javascript",
1200
+ "application/typescript",
1201
+ "application/x-yaml",
1202
+ "application/x-sh"
1203
+ ];
1204
+ const isTextMimeType = textContentTypes.some((type) => contentType.includes(type));
1205
+ if (isTextMimeType) {
1206
+ return false;
1207
+ }
1208
+ const binaryContentTypes = [
1209
+ "application/pdf",
1210
+ "application/msword",
1211
+ "application/vnd.openxmlformats-officedocument",
1212
+ "application/vnd.ms-excel",
1213
+ "application/vnd.ms-powerpoint",
1214
+ "application/zip",
1215
+ "application/x-zip-compressed",
1216
+ "application/octet-stream",
1217
+ "image/",
1218
+ "audio/",
1219
+ "video/"
1220
+ ];
1221
+ const isBinaryMimeType = binaryContentTypes.some((type) => contentType.includes(type));
1222
+ if (isBinaryMimeType) {
1223
+ return true;
1224
+ }
1225
+ const fileExt = filename.split(".").pop()?.toLowerCase() || "";
1226
+ const textExtensions = [
1227
+ "txt",
1228
+ "md",
1229
+ "markdown",
1230
+ "json",
1231
+ "xml",
1232
+ "html",
1233
+ "htm",
1234
+ "css",
1235
+ "js",
1236
+ "ts",
1237
+ "jsx",
1238
+ "tsx",
1239
+ "yaml",
1240
+ "yml",
1241
+ "toml",
1242
+ "ini",
1243
+ "cfg",
1244
+ "conf",
1245
+ "sh",
1246
+ "bash",
1247
+ "zsh",
1248
+ "fish",
1249
+ "py",
1250
+ "rb",
1251
+ "go",
1252
+ "rs",
1253
+ "java",
1254
+ "c",
1255
+ "cpp",
1256
+ "h",
1257
+ "hpp",
1258
+ "cs",
1259
+ "php",
1260
+ "sql",
1261
+ "r",
1262
+ "swift",
1263
+ "kt",
1264
+ "scala",
1265
+ "clj",
1266
+ "ex",
1267
+ "exs",
1268
+ "vim",
1269
+ "env",
1270
+ "gitignore",
1271
+ "dockerignore",
1272
+ "editorconfig",
1273
+ "log",
1274
+ "csv",
1275
+ "tsv",
1276
+ "properties",
1277
+ "gradle",
1278
+ "sbt",
1279
+ "makefile",
1280
+ "dockerfile",
1281
+ "vagrantfile",
1282
+ "gemfile",
1283
+ "rakefile",
1284
+ "podfile",
1285
+ "csproj",
1286
+ "vbproj",
1287
+ "fsproj",
1288
+ "sln",
1289
+ "pom"
1290
+ ];
1291
+ if (textExtensions.includes(fileExt)) {
1292
+ return false;
1293
+ }
1294
+ const binaryExtensions = [
1295
+ "pdf",
1296
+ "docx",
1297
+ "doc",
1298
+ "xls",
1299
+ "xlsx",
1300
+ "ppt",
1301
+ "pptx",
1302
+ "zip",
1303
+ "rar",
1304
+ "7z",
1305
+ "tar",
1306
+ "gz",
1307
+ "bz2",
1308
+ "xz",
1309
+ "jpg",
1310
+ "jpeg",
1311
+ "png",
1312
+ "gif",
1313
+ "bmp",
1314
+ "svg",
1315
+ "ico",
1316
+ "webp",
1317
+ "mp3",
1318
+ "mp4",
1319
+ "avi",
1320
+ "mov",
1321
+ "wmv",
1322
+ "flv",
1323
+ "wav",
1324
+ "flac",
1325
+ "ogg",
1326
+ "exe",
1327
+ "dll",
1328
+ "so",
1329
+ "dylib",
1330
+ "bin",
1331
+ "dat",
1332
+ "db",
1333
+ "sqlite"
1334
+ ];
1335
+ return binaryExtensions.includes(fileExt);
1336
+ }
1337
+ function isTextItem(item) {
1338
+ return "str" in item;
1339
+ }
1340
+ function normalizeS3Url(url) {
1341
+ try {
1342
+ const urlObj = new URL(url);
1343
+ return `${urlObj.origin}${urlObj.pathname}`;
1344
+ } catch (error) {
1345
+ logger3.warn(`[URL NORMALIZER] Failed to parse URL: ${url}. Returning original.`);
1346
+ return url;
1347
+ }
1348
+ }
1349
+ async function fetchUrlContent(url) {
1350
+ logger3.debug(`[URL FETCHER] Fetching content from URL: ${url}`);
1351
+ try {
1352
+ const controller = new AbortController();
1353
+ const timeoutId = setTimeout(() => controller.abort(), 3e4);
1354
+ const response = await fetch(url, {
1355
+ signal: controller.signal,
1356
+ headers: {
1357
+ "User-Agent": "Eliza-Knowledge-Plugin/1.0"
1358
+ }
1359
+ });
1360
+ clearTimeout(timeoutId);
1361
+ if (!response.ok) {
1362
+ throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`);
1363
+ }
1364
+ const contentType = response.headers.get("content-type") || "application/octet-stream";
1365
+ logger3.debug(`[URL FETCHER] Content type from server: ${contentType} for URL: ${url}`);
1366
+ const arrayBuffer = await response.arrayBuffer();
1367
+ const buffer = Buffer2.from(arrayBuffer);
1368
+ const base64Content = buffer.toString("base64");
1369
+ logger3.debug(
1370
+ `[URL FETCHER] Successfully fetched content from URL: ${url} (${buffer.length} bytes)`
938
1371
  );
1372
+ return {
1373
+ content: base64Content,
1374
+ contentType
1375
+ };
1376
+ } catch (error) {
1377
+ logger3.error(`[URL FETCHER] Error fetching content from URL ${url}: ${error.message}`);
1378
+ throw new Error(`Failed to fetch content from URL: ${error.message}`);
1379
+ }
1380
+ }
1381
+ function looksLikeBase64(content) {
1382
+ if (!content || content.length === 0) return false;
1383
+ const cleanContent = content.replace(/\s/g, "");
1384
+ if (cleanContent.length < 16) return false;
1385
+ if (cleanContent.length % 4 !== 0) return false;
1386
+ const base64Regex = /^[A-Za-z0-9+/]*={0,2}$/;
1387
+ if (!base64Regex.test(cleanContent)) return false;
1388
+ const hasNumbers = /\d/.test(cleanContent);
1389
+ const hasUpperCase = /[A-Z]/.test(cleanContent);
1390
+ const hasLowerCase = /[a-z]/.test(cleanContent);
1391
+ return (hasNumbers || hasUpperCase) && hasLowerCase;
1392
+ }
1393
+ function generateContentBasedId(content, agentId, options) {
1394
+ const {
1395
+ maxChars = 2e3,
1396
+ // Use first 2000 chars by default
1397
+ includeFilename,
1398
+ contentType
1399
+ } = options || {};
1400
+ let contentForHashing;
1401
+ if (looksLikeBase64(content)) {
1402
+ try {
1403
+ const decoded = Buffer2.from(content, "base64").toString("utf8");
1404
+ if (!decoded.includes("\uFFFD") || contentType?.includes("pdf")) {
1405
+ contentForHashing = content.slice(0, maxChars);
1406
+ } else {
1407
+ contentForHashing = decoded.slice(0, maxChars);
1408
+ }
1409
+ } catch {
1410
+ contentForHashing = content.slice(0, maxChars);
1411
+ }
1412
+ } else {
1413
+ contentForHashing = content.slice(0, maxChars);
939
1414
  }
1415
+ contentForHashing = contentForHashing.replace(/\r\n/g, "\n").replace(/\r/g, "\n").trim();
1416
+ const componentsToHash = [
1417
+ agentId,
1418
+ // Namespace by agent
1419
+ contentForHashing,
1420
+ // The actual content
1421
+ includeFilename || ""
1422
+ // Optional filename for additional uniqueness
1423
+ ].filter(Boolean).join("::");
1424
+ const hash = createHash2("sha256").update(componentsToHash).digest("hex");
1425
+ const DOCUMENT_NAMESPACE = "6ba7b810-9dad-11d1-80b4-00c04fd430c8";
1426
+ const uuid = v5_default(hash, DOCUMENT_NAMESPACE);
1427
+ logger3.debug(
1428
+ `[generateContentBasedId] Generated UUID ${uuid} for document with content hash ${hash.slice(0, 8)}...`
1429
+ );
1430
+ return uuid;
940
1431
  }
941
1432
 
942
1433
  // src/document-processor.ts
943
- var ctxKnowledgeEnabled = process.env.CTX_KNOWLEDGE_ENABLED === "true" || process.env.CTX_KNOWLEDGE_ENABLED === "True";
1434
+ function estimateTokens(text) {
1435
+ return Math.ceil(text.length / 4);
1436
+ }
1437
+ function getCtxKnowledgeEnabled(runtime) {
1438
+ let result;
1439
+ let source;
1440
+ let rawValue;
1441
+ if (runtime) {
1442
+ rawValue = runtime.getSetting("CTX_KNOWLEDGE_ENABLED");
1443
+ const cleanValue = rawValue?.toString().trim().toLowerCase();
1444
+ result = cleanValue === "true";
1445
+ source = "runtime.getSetting()";
1446
+ } else {
1447
+ rawValue = process.env.CTX_KNOWLEDGE_ENABLED;
1448
+ const cleanValue = rawValue?.toString().trim().toLowerCase();
1449
+ result = cleanValue === "true";
1450
+ source = "process.env";
1451
+ }
1452
+ if (process.env.NODE_ENV === "development" && rawValue && !result) {
1453
+ logger4.debug(`[Document Processor] CTX config mismatch - ${source}: '${rawValue}' \u2192 ${result}`);
1454
+ }
1455
+ return result;
1456
+ }
944
1457
  function shouldUseCustomLLM() {
945
1458
  const textProvider = process.env.TEXT_PROVIDER;
946
1459
  const textModel = process.env.TEXT_MODEL;
@@ -961,18 +1474,6 @@ function shouldUseCustomLLM() {
961
1474
  }
962
1475
  }
963
1476
  var useCustomLLM = shouldUseCustomLLM();
964
- if (ctxKnowledgeEnabled) {
965
- logger3.info(`Document processor starting with Contextual Knowledge ENABLED`);
966
- if (useCustomLLM) {
967
- logger3.info(
968
- `Using Custom LLM with provider: ${process.env.TEXT_PROVIDER}, model: ${process.env.TEXT_MODEL}`
969
- );
970
- } else {
971
- logger3.info(`Using ElizaOS Runtime LLM (default behavior)`);
972
- }
973
- } else {
974
- logger3.info(`Document processor starting with Contextual Knowledge DISABLED`);
975
- }
976
1477
  async function processFragmentsSynchronously({
977
1478
  runtime,
978
1479
  documentId,
@@ -981,21 +1482,29 @@ async function processFragmentsSynchronously({
981
1482
  contentType,
982
1483
  roomId,
983
1484
  entityId,
984
- worldId
1485
+ worldId,
1486
+ documentTitle
985
1487
  }) {
986
1488
  if (!fullDocumentText || fullDocumentText.trim() === "") {
987
- logger3.warn(`No text content available to chunk for document ${documentId}.`);
1489
+ logger4.warn(`No text content available to chunk for document ${documentId}.`);
988
1490
  return 0;
989
1491
  }
990
1492
  const chunks = await splitDocumentIntoChunks(fullDocumentText);
991
1493
  if (chunks.length === 0) {
992
- logger3.warn(`No chunks generated from text for ${documentId}. No fragments to save.`);
1494
+ logger4.warn(`No chunks generated from text for ${documentId}. No fragments to save.`);
993
1495
  return 0;
994
1496
  }
995
- logger3.info(`Split content into ${chunks.length} chunks for document ${documentId}`);
1497
+ const docName = documentTitle || documentId.substring(0, 8);
1498
+ logger4.info(`[Document Processor] "${docName}": Split into ${chunks.length} chunks`);
996
1499
  const providerLimits = await getProviderRateLimits();
997
1500
  const CONCURRENCY_LIMIT = Math.min(30, providerLimits.maxConcurrentRequests || 30);
998
- const rateLimiter = createRateLimiter(providerLimits.requestsPerMinute || 60);
1501
+ const rateLimiter = createRateLimiter(
1502
+ providerLimits.requestsPerMinute || 60,
1503
+ providerLimits.tokensPerMinute
1504
+ );
1505
+ logger4.debug(
1506
+ `[Document Processor] Rate limits: ${providerLimits.requestsPerMinute} RPM, ${providerLimits.tokensPerMinute} TPM (${providerLimits.provider}, concurrency: ${CONCURRENCY_LIMIT})`
1507
+ );
999
1508
  const { savedCount, failedCount } = await processAndSaveFragments({
1000
1509
  runtime,
1001
1510
  documentId,
@@ -1007,14 +1516,27 @@ async function processFragmentsSynchronously({
1007
1516
  entityId: entityId || agentId,
1008
1517
  worldId: worldId || agentId,
1009
1518
  concurrencyLimit: CONCURRENCY_LIMIT,
1010
- rateLimiter
1519
+ rateLimiter,
1520
+ documentTitle
1011
1521
  });
1522
+ const successRate = (savedCount / chunks.length * 100).toFixed(1);
1012
1523
  if (failedCount > 0) {
1013
- logger3.warn(
1014
- `Failed to process ${failedCount} chunks out of ${chunks.length} for document ${documentId}`
1524
+ logger4.warn(
1525
+ `[Document Processor] "${docName}": ${failedCount}/${chunks.length} chunks failed processing`
1015
1526
  );
1016
1527
  }
1017
- logger3.info(`Finished saving ${savedCount} fragments for document ${documentId}.`);
1528
+ logger4.info(
1529
+ `[Document Processor] "${docName}" complete: ${savedCount}/${chunks.length} fragments saved (${successRate}% success)`
1530
+ );
1531
+ logKnowledgeGenerationSummary({
1532
+ documentId,
1533
+ totalChunks: chunks.length,
1534
+ savedCount,
1535
+ failedCount,
1536
+ successRate: parseFloat(successRate),
1537
+ ctxEnabled: getCtxKnowledgeEnabled(runtime),
1538
+ providerLimits
1539
+ });
1018
1540
  return savedCount;
1019
1541
  }
1020
1542
  async function extractTextFromDocument(fileBuffer, contentType, originalFilename) {
@@ -1023,15 +1545,15 @@ async function extractTextFromDocument(fileBuffer, contentType, originalFilename
1023
1545
  }
1024
1546
  try {
1025
1547
  if (contentType === "application/pdf") {
1026
- logger3.debug(`Extracting text from PDF: ${originalFilename}`);
1548
+ logger4.debug(`Extracting text from PDF: ${originalFilename}`);
1027
1549
  return await convertPdfToTextFromBuffer(fileBuffer, originalFilename);
1028
1550
  } else {
1029
- logger3.debug(`Extracting text from non-PDF: ${originalFilename} (Type: ${contentType})`);
1551
+ logger4.debug(`Extracting text from non-PDF: ${originalFilename} (Type: ${contentType})`);
1030
1552
  if (contentType.includes("text/") || contentType.includes("application/json") || contentType.includes("application/xml")) {
1031
1553
  try {
1032
1554
  return fileBuffer.toString("utf8");
1033
1555
  } catch (textError) {
1034
- logger3.warn(
1556
+ logger4.warn(
1035
1557
  `Failed to decode ${originalFilename} as UTF-8, falling back to binary extraction`
1036
1558
  );
1037
1559
  }
@@ -1039,7 +1561,7 @@ async function extractTextFromDocument(fileBuffer, contentType, originalFilename
1039
1561
  return await extractTextFromFileBuffer(fileBuffer, contentType, originalFilename);
1040
1562
  }
1041
1563
  } catch (error) {
1042
- logger3.error(`Error extracting text from ${originalFilename}: ${error.message}`);
1564
+ logger4.error(`Error extracting text from ${originalFilename}: ${error.message}`);
1043
1565
  throw new Error(`Failed to extract text from ${originalFilename}: ${error.message}`);
1044
1566
  }
1045
1567
  }
@@ -1084,7 +1606,7 @@ async function splitDocumentIntoChunks(documentText) {
1084
1606
  const tokenChunkOverlap = DEFAULT_CHUNK_OVERLAP_TOKENS;
1085
1607
  const targetCharChunkSize = Math.round(tokenChunkSize * DEFAULT_CHARS_PER_TOKEN);
1086
1608
  const targetCharChunkOverlap = Math.round(tokenChunkOverlap * DEFAULT_CHARS_PER_TOKEN);
1087
- logger3.debug(
1609
+ logger4.debug(
1088
1610
  `Using core splitChunks with settings: tokenChunkSize=${tokenChunkSize}, tokenChunkOverlap=${tokenChunkOverlap}, charChunkSize=${targetCharChunkSize}, charChunkOverlap=${targetCharChunkOverlap}`
1089
1611
  );
1090
1612
  return await splitChunks(documentText, tokenChunkSize, tokenChunkOverlap);
@@ -1100,7 +1622,8 @@ async function processAndSaveFragments({
1100
1622
  entityId,
1101
1623
  worldId,
1102
1624
  concurrencyLimit,
1103
- rateLimiter
1625
+ rateLimiter,
1626
+ documentTitle
1104
1627
  }) {
1105
1628
  let savedCount = 0;
1106
1629
  let failedCount = 0;
@@ -1108,15 +1631,16 @@ async function processAndSaveFragments({
1108
1631
  for (let i = 0; i < chunks.length; i += concurrencyLimit) {
1109
1632
  const batchChunks = chunks.slice(i, i + concurrencyLimit);
1110
1633
  const batchOriginalIndices = Array.from({ length: batchChunks.length }, (_, k) => i + k);
1111
- logger3.debug(
1112
- `Processing batch of ${batchChunks.length} chunks for document ${documentId}. Starting original index: ${batchOriginalIndices[0]}, batch ${Math.floor(i / concurrencyLimit) + 1}/${Math.ceil(chunks.length / concurrencyLimit)}`
1634
+ logger4.debug(
1635
+ `[Document Processor] Batch ${Math.floor(i / concurrencyLimit) + 1}/${Math.ceil(chunks.length / concurrencyLimit)}: processing ${batchChunks.length} chunks (${batchOriginalIndices[0]}-${batchOriginalIndices[batchOriginalIndices.length - 1]})`
1113
1636
  );
1114
1637
  const contextualizedChunks = await getContextualizedChunks(
1115
1638
  runtime,
1116
1639
  fullDocumentText,
1117
1640
  batchChunks,
1118
1641
  contentType,
1119
- batchOriginalIndices
1642
+ batchOriginalIndices,
1643
+ documentTitle
1120
1644
  );
1121
1645
  const embeddingResults = await generateEmbeddingsForChunks(
1122
1646
  runtime,
@@ -1128,13 +1652,13 @@ async function processAndSaveFragments({
1128
1652
  if (!result.success) {
1129
1653
  failedCount++;
1130
1654
  failedChunks.push(originalChunkIndex);
1131
- logger3.warn(`Failed to process chunk ${originalChunkIndex} for document ${documentId}`);
1655
+ logger4.warn(`Failed to process chunk ${originalChunkIndex} for document ${documentId}`);
1132
1656
  continue;
1133
1657
  }
1134
1658
  const contextualizedChunkText = result.text;
1135
1659
  const embedding = result.embedding;
1136
1660
  if (!embedding || embedding.length === 0) {
1137
- logger3.warn(
1661
+ logger4.warn(
1138
1662
  `Zero vector detected for chunk ${originalChunkIndex} (document ${documentId}). Embedding: ${JSON.stringify(result.embedding)}`
1139
1663
  );
1140
1664
  failedCount++;
@@ -1159,12 +1683,15 @@ async function processAndSaveFragments({
1159
1683
  }
1160
1684
  };
1161
1685
  await runtime.createMemory(fragmentMemory, "knowledge");
1162
- logger3.debug(
1163
- `Saved fragment ${originalChunkIndex + 1} for document ${documentId} (Fragment ID: ${fragmentMemory.id})`
1164
- );
1686
+ if (originalChunkIndex === chunks.length - 1) {
1687
+ const docName = documentTitle || documentId.substring(0, 8);
1688
+ logger4.info(
1689
+ `[Document Processor] "${docName}": All ${chunks.length} chunks processed successfully`
1690
+ );
1691
+ }
1165
1692
  savedCount++;
1166
1693
  } catch (saveError) {
1167
- logger3.error(
1694
+ logger4.error(
1168
1695
  `Error saving chunk ${originalChunkIndex} to database: ${saveError.message}`,
1169
1696
  saveError.stack
1170
1697
  );
@@ -1173,7 +1700,7 @@ async function processAndSaveFragments({
1173
1700
  }
1174
1701
  }
1175
1702
  if (i + concurrencyLimit < chunks.length) {
1176
- await new Promise((resolve) => setTimeout(resolve, 500));
1703
+ await new Promise((resolve2) => setTimeout(resolve2, 500));
1177
1704
  }
1178
1705
  }
1179
1706
  return { savedCount, failedCount, failedChunks };
@@ -1199,7 +1726,8 @@ async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLi
1199
1726
  text: contextualizedChunk.contextualizedText
1200
1727
  };
1201
1728
  }
1202
- await rateLimiter();
1729
+ const embeddingTokens = estimateTokens(contextualizedChunk.contextualizedText);
1730
+ await rateLimiter(embeddingTokens);
1203
1731
  try {
1204
1732
  const generateEmbeddingOperation = async () => {
1205
1733
  return await generateEmbeddingWithValidation(
@@ -1226,7 +1754,7 @@ async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLi
1226
1754
  text: contextualizedChunk.contextualizedText
1227
1755
  };
1228
1756
  } catch (error) {
1229
- logger3.error(
1757
+ logger4.error(
1230
1758
  `Error generating embedding for chunk ${contextualizedChunk.index}: ${error.message}`
1231
1759
  );
1232
1760
  return {
@@ -1239,37 +1767,50 @@ async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLi
1239
1767
  })
1240
1768
  );
1241
1769
  }
1242
- async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices) {
1243
- if (ctxKnowledgeEnabled && fullDocumentText) {
1244
- logger3.debug(`Generating contexts for ${chunks.length} chunks`);
1770
+ async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle) {
1771
+ const ctxEnabled = getCtxKnowledgeEnabled(runtime);
1772
+ if (batchOriginalIndices[0] === 0) {
1773
+ const docName = documentTitle || "Document";
1774
+ const provider = runtime?.getSetting("TEXT_PROVIDER") || process.env.TEXT_PROVIDER;
1775
+ const model = runtime?.getSetting("TEXT_MODEL") || process.env.TEXT_MODEL;
1776
+ logger4.info(
1777
+ `[Document Processor] "${docName}": CTX enrichment ${ctxEnabled ? "ENABLED" : "DISABLED"}${ctxEnabled ? ` (${provider}/${model})` : ""}`
1778
+ );
1779
+ }
1780
+ if (ctxEnabled && fullDocumentText) {
1245
1781
  return await generateContextsInBatch(
1246
1782
  runtime,
1247
1783
  fullDocumentText,
1248
1784
  chunks,
1249
1785
  contentType,
1250
- batchOriginalIndices
1786
+ batchOriginalIndices,
1787
+ documentTitle
1788
+ );
1789
+ } else if (!ctxEnabled && batchOriginalIndices[0] === 0) {
1790
+ logger4.debug(
1791
+ `[Document Processor] To enable CTX: Set CTX_KNOWLEDGE_ENABLED=true and configure TEXT_PROVIDER/TEXT_MODEL`
1251
1792
  );
1252
- } else {
1253
- return chunks.map((chunkText, idx) => ({
1254
- contextualizedText: chunkText,
1255
- index: batchOriginalIndices[idx],
1256
- success: true
1257
- }));
1258
1793
  }
1794
+ return chunks.map((chunkText, idx) => ({
1795
+ contextualizedText: chunkText,
1796
+ index: batchOriginalIndices[idx],
1797
+ success: true
1798
+ }));
1259
1799
  }
1260
- async function generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchIndices) {
1261
- console.log("####### generateContextsInBatch FULLL DOCUMENT", fullDocumentText);
1262
- console.log("####### generateContextsInBatch CHUNKS", chunks);
1800
+ async function generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchIndices, documentTitle) {
1263
1801
  if (!chunks || chunks.length === 0) {
1264
1802
  return [];
1265
1803
  }
1266
1804
  const providerLimits = await getProviderRateLimits();
1267
- const rateLimiter = createRateLimiter(providerLimits.requestsPerMinute || 60);
1268
- const config = validateModelConfig();
1805
+ const rateLimiter = createRateLimiter(
1806
+ providerLimits.requestsPerMinute || 60,
1807
+ providerLimits.tokensPerMinute
1808
+ );
1809
+ const config = validateModelConfig(runtime);
1269
1810
  const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
1270
1811
  const isUsingCacheCapableModel = isUsingOpenRouter && (config.TEXT_MODEL?.toLowerCase().includes("claude") || config.TEXT_MODEL?.toLowerCase().includes("gemini"));
1271
- logger3.info(
1272
- `Using provider: ${config.TEXT_PROVIDER}, model: ${config.TEXT_MODEL}, caching capability: ${isUsingCacheCapableModel}`
1812
+ logger4.debug(
1813
+ `[Document Processor] Contextualizing ${chunks.length} chunks with ${config.TEXT_PROVIDER}/${config.TEXT_MODEL} (cache: ${isUsingCacheCapableModel})`
1273
1814
  );
1274
1815
  const promptConfigs = prepareContextPrompts(
1275
1816
  chunks,
@@ -1287,19 +1828,20 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
1287
1828
  index: item.originalIndex
1288
1829
  };
1289
1830
  }
1290
- await rateLimiter();
1831
+ const llmTokens = estimateTokens(item.chunkText + (item.prompt || ""));
1832
+ await rateLimiter(llmTokens);
1291
1833
  try {
1292
1834
  let llmResponse;
1293
1835
  const generateTextOperation = async () => {
1294
1836
  if (useCustomLLM) {
1295
1837
  if (item.usesCaching) {
1296
- return await generateText(item.promptText, item.systemPrompt, {
1838
+ return await generateText(runtime, item.promptText, item.systemPrompt, {
1297
1839
  cacheDocument: item.fullDocumentTextForContext,
1298
1840
  cacheOptions: { type: "ephemeral" },
1299
1841
  autoCacheContextualRetrieval: true
1300
1842
  });
1301
1843
  } else {
1302
- return await generateText(item.prompt);
1844
+ return await generateText(runtime, item.prompt);
1303
1845
  }
1304
1846
  } else {
1305
1847
  if (item.usesCaching) {
@@ -1320,16 +1862,19 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
1320
1862
  );
1321
1863
  const generatedContext = typeof llmResponse === "string" ? llmResponse : llmResponse.text;
1322
1864
  const contextualizedText = getChunkWithContext(item.chunkText, generatedContext);
1323
- logger3.debug(
1324
- `Context added for chunk ${item.originalIndex}. New length: ${contextualizedText.length}`
1325
- );
1865
+ if ((item.originalIndex + 1) % Math.max(1, Math.floor(chunks.length / 3)) === 0 || item.originalIndex === chunks.length - 1) {
1866
+ const docName = documentTitle || "Document";
1867
+ logger4.debug(
1868
+ `[Document Processor] "${docName}": Context added for ${item.originalIndex + 1}/${chunks.length} chunks`
1869
+ );
1870
+ }
1326
1871
  return {
1327
1872
  contextualizedText,
1328
1873
  success: true,
1329
1874
  index: item.originalIndex
1330
1875
  };
1331
1876
  } catch (error) {
1332
- logger3.error(
1877
+ logger4.error(
1333
1878
  `Error generating context for chunk ${item.originalIndex}: ${error.message}`,
1334
1879
  error.stack
1335
1880
  );
@@ -1350,7 +1895,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
1350
1895
  if (isUsingCacheCapableModel) {
1351
1896
  const cachingPromptInfo = contentType ? getCachingPromptForMimeType(contentType, chunkText) : getCachingContextualizationPrompt(chunkText);
1352
1897
  if (cachingPromptInfo.prompt.startsWith("Error:")) {
1353
- logger3.warn(
1898
+ logger4.warn(
1354
1899
  `Skipping contextualization for chunk ${originalIndex} due to: ${cachingPromptInfo.prompt}`
1355
1900
  );
1356
1901
  return {
@@ -1372,7 +1917,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
1372
1917
  } else {
1373
1918
  const prompt = contentType ? getPromptForMimeType(contentType, fullDocumentText, chunkText) : getContextualizationPrompt(fullDocumentText, chunkText);
1374
1919
  if (prompt.startsWith("Error:")) {
1375
- logger3.warn(`Skipping contextualization for chunk ${originalIndex} due to: ${prompt}`);
1920
+ logger4.warn(`Skipping contextualization for chunk ${originalIndex} due to: ${prompt}`);
1376
1921
  return {
1377
1922
  prompt: null,
1378
1923
  originalIndex,
@@ -1390,7 +1935,7 @@ function prepareContextPrompts(chunks, fullDocumentText, contentType, batchIndic
1390
1935
  };
1391
1936
  }
1392
1937
  } catch (error) {
1393
- logger3.error(
1938
+ logger4.error(
1394
1939
  `Error preparing prompt for chunk ${originalIndex}: ${error.message}`,
1395
1940
  error.stack
1396
1941
  );
@@ -1411,7 +1956,7 @@ async function generateEmbeddingWithValidation(runtime, text) {
1411
1956
  });
1412
1957
  const embedding = Array.isArray(embeddingResult) ? embeddingResult : embeddingResult?.embedding;
1413
1958
  if (!embedding || embedding.length === 0) {
1414
- logger3.warn(`Zero vector detected. Embedding result: ${JSON.stringify(embedding)}`);
1959
+ logger4.warn(`Zero vector detected. Embedding result: ${JSON.stringify(embedding)}`);
1415
1960
  return {
1416
1961
  embedding: null,
1417
1962
  success: false,
@@ -1429,43 +1974,298 @@ async function withRateLimitRetry(operation, errorContext, retryDelay) {
1429
1974
  } catch (error) {
1430
1975
  if (error.status === 429) {
1431
1976
  const delay = retryDelay || error.headers?.["retry-after"] || 5;
1432
- logger3.warn(`Rate limit hit for ${errorContext}. Retrying after ${delay}s`);
1433
- await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
1977
+ logger4.warn(`Rate limit hit for ${errorContext}. Retrying after ${delay}s`);
1978
+ await new Promise((resolve2) => setTimeout(resolve2, delay * 1e3));
1434
1979
  try {
1435
1980
  return await operation();
1436
1981
  } catch (retryError) {
1437
- logger3.error(`Failed after retry for ${errorContext}: ${retryError.message}`);
1982
+ logger4.error(`Failed after retry for ${errorContext}: ${retryError.message}`);
1438
1983
  throw retryError;
1439
1984
  }
1440
1985
  }
1441
1986
  throw error;
1442
1987
  }
1443
1988
  }
1444
- function createRateLimiter(requestsPerMinute) {
1989
+ function createRateLimiter(requestsPerMinute, tokensPerMinute) {
1445
1990
  const requestTimes = [];
1991
+ const tokenUsage = [];
1446
1992
  const intervalMs = 60 * 1e3;
1447
- return async function rateLimiter() {
1993
+ return async function rateLimiter(estimatedTokens = 1e3) {
1448
1994
  const now = Date.now();
1449
1995
  while (requestTimes.length > 0 && now - requestTimes[0] > intervalMs) {
1450
1996
  requestTimes.shift();
1451
1997
  }
1452
- if (requestTimes.length >= requestsPerMinute) {
1453
- const oldestRequest = requestTimes[0];
1454
- const timeToWait = Math.max(0, oldestRequest + intervalMs - now);
1998
+ while (tokenUsage.length > 0 && now - tokenUsage[0].timestamp > intervalMs) {
1999
+ tokenUsage.shift();
2000
+ }
2001
+ const currentTokens = tokenUsage.reduce((sum, usage) => sum + usage.tokens, 0);
2002
+ const requestLimitExceeded = requestTimes.length >= requestsPerMinute;
2003
+ const tokenLimitExceeded = tokensPerMinute && currentTokens + estimatedTokens > tokensPerMinute;
2004
+ if (requestLimitExceeded || tokenLimitExceeded) {
2005
+ let timeToWait = 0;
2006
+ if (requestLimitExceeded) {
2007
+ const oldestRequest = requestTimes[0];
2008
+ timeToWait = Math.max(timeToWait, oldestRequest + intervalMs - now);
2009
+ }
2010
+ if (tokenLimitExceeded && tokenUsage.length > 0) {
2011
+ const oldestTokenUsage = tokenUsage[0];
2012
+ timeToWait = Math.max(timeToWait, oldestTokenUsage.timestamp + intervalMs - now);
2013
+ }
1455
2014
  if (timeToWait > 0) {
1456
- logger3.debug(`Rate limiting applied, waiting ${timeToWait}ms before next request`);
1457
- await new Promise((resolve) => setTimeout(resolve, timeToWait));
2015
+ const reason = requestLimitExceeded ? "request" : "token";
2016
+ if (timeToWait > 5e3) {
2017
+ logger4.info(
2018
+ `[Document Processor] Rate limiting: waiting ${Math.round(timeToWait / 1e3)}s due to ${reason} limit`
2019
+ );
2020
+ } else {
2021
+ logger4.debug(
2022
+ `[Document Processor] Rate limiting: ${timeToWait}ms wait (${reason} limit)`
2023
+ );
2024
+ }
2025
+ await new Promise((resolve2) => setTimeout(resolve2, timeToWait));
2026
+ }
2027
+ }
2028
+ requestTimes.push(now);
2029
+ if (tokensPerMinute) {
2030
+ tokenUsage.push({ timestamp: now, tokens: estimatedTokens });
2031
+ }
2032
+ };
2033
+ }
2034
+ function logKnowledgeGenerationSummary({
2035
+ totalChunks,
2036
+ savedCount,
2037
+ failedCount,
2038
+ ctxEnabled,
2039
+ providerLimits
2040
+ }) {
2041
+ if (failedCount > 0 || process.env.NODE_ENV === "development") {
2042
+ const status = failedCount > 0 ? "PARTIAL" : "SUCCESS";
2043
+ logger4.info(
2044
+ `[Document Processor] ${status}: ${savedCount}/${totalChunks} chunks, CTX: ${ctxEnabled ? "ON" : "OFF"}, Provider: ${providerLimits.provider}`
2045
+ );
2046
+ }
2047
+ if (failedCount > 0) {
2048
+ logger4.warn(`[Document Processor] ${failedCount} chunks failed processing`);
2049
+ }
2050
+ }
2051
+
2052
+ // src/docs-loader.ts
2053
+ import { logger as logger5 } from "@elizaos/core";
2054
+ import * as fs from "fs";
2055
+ import * as path from "path";
2056
+ function getKnowledgePath() {
2057
+ const envPath = process.env.KNOWLEDGE_PATH;
2058
+ if (envPath) {
2059
+ const resolvedPath = path.resolve(envPath);
2060
+ if (!fs.existsSync(resolvedPath)) {
2061
+ logger5.warn(`Knowledge path from environment variable does not exist: ${resolvedPath}`);
2062
+ logger5.warn("Please create the directory or update KNOWLEDGE_PATH environment variable");
2063
+ }
2064
+ return resolvedPath;
2065
+ }
2066
+ const defaultPath = path.join(process.cwd(), "docs");
2067
+ if (!fs.existsSync(defaultPath)) {
2068
+ logger5.info(`Default docs folder does not exist at: ${defaultPath}`);
2069
+ logger5.info("To use the knowledge plugin, either:");
2070
+ logger5.info('1. Create a "docs" folder in your project root');
2071
+ logger5.info("2. Set KNOWLEDGE_PATH environment variable to your documents folder");
2072
+ }
2073
+ return defaultPath;
2074
+ }
2075
+ async function loadDocsFromPath(service, agentId, worldId) {
2076
+ const docsPath = getKnowledgePath();
2077
+ if (!fs.existsSync(docsPath)) {
2078
+ logger5.warn(`Knowledge path does not exist: ${docsPath}`);
2079
+ return { total: 0, successful: 0, failed: 0 };
2080
+ }
2081
+ logger5.info(`Loading documents from: ${docsPath}`);
2082
+ const files = getAllFiles(docsPath);
2083
+ if (files.length === 0) {
2084
+ logger5.info("No files found in knowledge path");
2085
+ return { total: 0, successful: 0, failed: 0 };
2086
+ }
2087
+ logger5.info(`Found ${files.length} files to process`);
2088
+ let successful = 0;
2089
+ let failed = 0;
2090
+ for (const filePath of files) {
2091
+ try {
2092
+ const fileName = path.basename(filePath);
2093
+ const fileExt = path.extname(filePath).toLowerCase();
2094
+ if (fileName.startsWith(".")) {
2095
+ continue;
2096
+ }
2097
+ const contentType = getContentType(fileExt);
2098
+ if (!contentType) {
2099
+ logger5.debug(`Skipping unsupported file type: ${filePath}`);
2100
+ continue;
2101
+ }
2102
+ const fileBuffer = fs.readFileSync(filePath);
2103
+ const isBinary = isBinaryContentType(contentType, fileName);
2104
+ const content = isBinary ? fileBuffer.toString("base64") : fileBuffer.toString("utf-8");
2105
+ const knowledgeOptions = {
2106
+ clientDocumentId: "",
2107
+ // Will be generated by the service based on content
2108
+ contentType,
2109
+ originalFilename: fileName,
2110
+ worldId: worldId || agentId,
2111
+ content,
2112
+ roomId: agentId,
2113
+ entityId: agentId
2114
+ };
2115
+ logger5.debug(`Processing document: ${fileName}`);
2116
+ const result = await service.addKnowledge(knowledgeOptions);
2117
+ logger5.info(`\u2705 "${fileName}": ${result.fragmentCount} fragments created`);
2118
+ successful++;
2119
+ } catch (error) {
2120
+ logger5.error(`Failed to process file ${filePath}:`, error);
2121
+ failed++;
2122
+ }
2123
+ }
2124
+ logger5.info(
2125
+ `Document loading complete: ${successful} successful, ${failed} failed out of ${files.length} total`
2126
+ );
2127
+ return {
2128
+ total: files.length,
2129
+ successful,
2130
+ failed
2131
+ };
2132
+ }
2133
+ function getAllFiles(dirPath, files = []) {
2134
+ try {
2135
+ const entries = fs.readdirSync(dirPath, { withFileTypes: true });
2136
+ for (const entry of entries) {
2137
+ const fullPath = path.join(dirPath, entry.name);
2138
+ if (entry.isDirectory()) {
2139
+ if (!["node_modules", ".git", ".vscode", "dist", "build"].includes(entry.name)) {
2140
+ getAllFiles(fullPath, files);
2141
+ }
2142
+ } else if (entry.isFile()) {
2143
+ files.push(fullPath);
1458
2144
  }
1459
2145
  }
1460
- requestTimes.push(Date.now());
2146
+ } catch (error) {
2147
+ logger5.error(`Error reading directory ${dirPath}:`, error);
2148
+ }
2149
+ return files;
2150
+ }
2151
+ function getContentType(extension) {
2152
+ const contentTypes = {
2153
+ // Text documents
2154
+ ".txt": "text/plain",
2155
+ ".md": "text/markdown",
2156
+ ".markdown": "text/markdown",
2157
+ ".tson": "text/plain",
2158
+ ".xml": "application/xml",
2159
+ ".csv": "text/csv",
2160
+ ".tsv": "text/tab-separated-values",
2161
+ ".log": "text/plain",
2162
+ // Web files
2163
+ ".html": "text/html",
2164
+ ".htm": "text/html",
2165
+ ".css": "text/css",
2166
+ ".scss": "text/x-scss",
2167
+ ".sass": "text/x-sass",
2168
+ ".less": "text/x-less",
2169
+ // JavaScript/TypeScript
2170
+ ".js": "text/javascript",
2171
+ ".jsx": "text/javascript",
2172
+ ".ts": "text/typescript",
2173
+ ".tsx": "text/typescript",
2174
+ ".mjs": "text/javascript",
2175
+ ".cjs": "text/javascript",
2176
+ ".vue": "text/x-vue",
2177
+ ".svelte": "text/x-svelte",
2178
+ ".astro": "text/x-astro",
2179
+ // Python
2180
+ ".py": "text/x-python",
2181
+ ".pyw": "text/x-python",
2182
+ ".pyi": "text/x-python",
2183
+ // Java/Kotlin/Scala
2184
+ ".java": "text/x-java",
2185
+ ".kt": "text/x-kotlin",
2186
+ ".kts": "text/x-kotlin",
2187
+ ".scala": "text/x-scala",
2188
+ // C/C++/C#
2189
+ ".c": "text/x-c",
2190
+ ".cpp": "text/x-c++",
2191
+ ".cc": "text/x-c++",
2192
+ ".cxx": "text/x-c++",
2193
+ ".h": "text/x-c",
2194
+ ".hpp": "text/x-c++",
2195
+ ".cs": "text/x-csharp",
2196
+ // Other languages
2197
+ ".php": "text/x-php",
2198
+ ".rb": "text/x-ruby",
2199
+ ".go": "text/x-go",
2200
+ ".rs": "text/x-rust",
2201
+ ".swift": "text/x-swift",
2202
+ ".r": "text/x-r",
2203
+ ".R": "text/x-r",
2204
+ ".m": "text/x-objectivec",
2205
+ ".mm": "text/x-objectivec",
2206
+ ".clj": "text/x-clojure",
2207
+ ".cljs": "text/x-clojure",
2208
+ ".ex": "text/x-elixir",
2209
+ ".exs": "text/x-elixir",
2210
+ ".lua": "text/x-lua",
2211
+ ".pl": "text/x-perl",
2212
+ ".pm": "text/x-perl",
2213
+ ".dart": "text/x-dart",
2214
+ ".hs": "text/x-haskell",
2215
+ ".elm": "text/x-elm",
2216
+ ".ml": "text/x-ocaml",
2217
+ ".fs": "text/x-fsharp",
2218
+ ".fsx": "text/x-fsharp",
2219
+ ".vb": "text/x-vb",
2220
+ ".pas": "text/x-pascal",
2221
+ ".d": "text/x-d",
2222
+ ".nim": "text/x-nim",
2223
+ ".zig": "text/x-zig",
2224
+ ".jl": "text/x-julia",
2225
+ ".tcl": "text/x-tcl",
2226
+ ".awk": "text/x-awk",
2227
+ ".sed": "text/x-sed",
2228
+ // Shell scripts
2229
+ ".sh": "text/x-sh",
2230
+ ".bash": "text/x-sh",
2231
+ ".zsh": "text/x-sh",
2232
+ ".fish": "text/x-fish",
2233
+ ".ps1": "text/x-powershell",
2234
+ ".bat": "text/x-batch",
2235
+ ".cmd": "text/x-batch",
2236
+ // Config files
2237
+ ".json": "application/json",
2238
+ ".yaml": "text/x-yaml",
2239
+ ".yml": "text/x-yaml",
2240
+ ".toml": "text/x-toml",
2241
+ ".ini": "text/x-ini",
2242
+ ".cfg": "text/x-ini",
2243
+ ".conf": "text/x-ini",
2244
+ ".env": "text/plain",
2245
+ ".gitignore": "text/plain",
2246
+ ".dockerignore": "text/plain",
2247
+ ".editorconfig": "text/plain",
2248
+ ".properties": "text/x-properties",
2249
+ // Database
2250
+ ".sql": "text/x-sql",
2251
+ // Binary documents
2252
+ ".pdf": "application/pdf",
2253
+ ".doc": "application/msword",
2254
+ ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
1461
2255
  };
2256
+ return contentTypes[extension] || null;
1462
2257
  }
1463
2258
 
1464
2259
  // src/service.ts
2260
+ var parseBooleanEnv2 = (value) => {
2261
+ if (typeof value === "boolean") return value;
2262
+ if (typeof value === "string") return value.toLowerCase() === "true";
2263
+ return false;
2264
+ };
1465
2265
  var KnowledgeService = class _KnowledgeService extends Service {
1466
2266
  static serviceType = "knowledge";
1467
- config;
1468
- knowledgeConfig;
2267
+ config = {};
2268
+ knowledgeConfig = {};
1469
2269
  capabilityDescription = "Provides Retrieval Augmented Generation capabilities, including knowledge upload and querying.";
1470
2270
  knowledgeProcessingSemaphore;
1471
2271
  /**
@@ -1475,53 +2275,25 @@ var KnowledgeService = class _KnowledgeService extends Service {
1475
2275
  constructor(runtime, config) {
1476
2276
  super(runtime);
1477
2277
  this.knowledgeProcessingSemaphore = new Semaphore(10);
1478
- const parseBooleanEnv = (value) => {
1479
- if (typeof value === "boolean") return value;
1480
- if (typeof value === "string") return value.toLowerCase() === "true";
1481
- return false;
1482
- };
1483
- const loadDocsOnStartup = parseBooleanEnv(config?.LOAD_DOCS_ON_STARTUP) || process.env.LOAD_DOCS_ON_STARTUP === "true";
1484
- this.knowledgeConfig = {
1485
- CTX_KNOWLEDGE_ENABLED: parseBooleanEnv(config?.CTX_KNOWLEDGE_ENABLED),
1486
- LOAD_DOCS_ON_STARTUP: loadDocsOnStartup,
1487
- MAX_INPUT_TOKENS: config?.MAX_INPUT_TOKENS,
1488
- MAX_OUTPUT_TOKENS: config?.MAX_OUTPUT_TOKENS,
1489
- EMBEDDING_PROVIDER: config?.EMBEDDING_PROVIDER,
1490
- TEXT_PROVIDER: config?.TEXT_PROVIDER,
1491
- TEXT_EMBEDDING_MODEL: config?.TEXT_EMBEDDING_MODEL
1492
- };
1493
- this.config = { ...this.knowledgeConfig };
1494
- logger4.info(
1495
- `KnowledgeService initialized for agent ${this.runtime.agentId} with config:`,
1496
- this.knowledgeConfig
1497
- );
1498
- if (this.knowledgeConfig.LOAD_DOCS_ON_STARTUP) {
1499
- logger4.info("LOAD_DOCS_ON_STARTUP is enabled. Loading documents from docs folder...");
1500
- this.loadInitialDocuments().catch((error) => {
1501
- logger4.error("Error during initial document loading in KnowledgeService:", error);
1502
- });
1503
- } else {
1504
- logger4.info("LOAD_DOCS_ON_STARTUP is disabled. Skipping automatic document loading.");
1505
- }
1506
2278
  }
1507
2279
  async loadInitialDocuments() {
1508
- logger4.info(
2280
+ logger6.info(
1509
2281
  `KnowledgeService: Checking for documents to load on startup for agent ${this.runtime.agentId}`
1510
2282
  );
1511
2283
  try {
1512
- await new Promise((resolve) => setTimeout(resolve, 1e3));
2284
+ await new Promise((resolve2) => setTimeout(resolve2, 1e3));
1513
2285
  const result = await loadDocsFromPath(this, this.runtime.agentId);
1514
2286
  if (result.successful > 0) {
1515
- logger4.info(
2287
+ logger6.info(
1516
2288
  `KnowledgeService: Loaded ${result.successful} documents from docs folder on startup for agent ${this.runtime.agentId}`
1517
2289
  );
1518
2290
  } else {
1519
- logger4.info(
2291
+ logger6.info(
1520
2292
  `KnowledgeService: No new documents found to load on startup for agent ${this.runtime.agentId}`
1521
2293
  );
1522
2294
  }
1523
2295
  } catch (error) {
1524
- logger4.error(
2296
+ logger6.error(
1525
2297
  `KnowledgeService: Error loading documents on startup for agent ${this.runtime.agentId}:`,
1526
2298
  error
1527
2299
  );
@@ -1533,23 +2305,90 @@ var KnowledgeService = class _KnowledgeService extends Service {
1533
2305
  * @returns Initialized Knowledge service
1534
2306
  */
1535
2307
  static async start(runtime) {
1536
- logger4.info(`Starting Knowledge service for agent: ${runtime.agentId}`);
2308
+ logger6.info(`Starting Knowledge service for agent: ${runtime.agentId}`);
2309
+ logger6.info("Initializing Knowledge Plugin...");
2310
+ let validatedConfig = {};
2311
+ try {
2312
+ logger6.info("Validating model configuration for Knowledge plugin...");
2313
+ logger6.debug(`[Knowledge Plugin] INIT DEBUG:`);
2314
+ logger6.debug(
2315
+ `[Knowledge Plugin] - process.env.CTX_KNOWLEDGE_ENABLED: '${process.env.CTX_KNOWLEDGE_ENABLED}'`
2316
+ );
2317
+ const config = {
2318
+ CTX_KNOWLEDGE_ENABLED: parseBooleanEnv2(runtime.getSetting("CTX_KNOWLEDGE_ENABLED"))
2319
+ };
2320
+ logger6.debug(
2321
+ `[Knowledge Plugin] - config.CTX_KNOWLEDGE_ENABLED: '${config.CTX_KNOWLEDGE_ENABLED}'`
2322
+ );
2323
+ logger6.debug(
2324
+ `[Knowledge Plugin] - runtime.getSetting('CTX_KNOWLEDGE_ENABLED'): '${runtime.getSetting("CTX_KNOWLEDGE_ENABLED")}'`
2325
+ );
2326
+ validatedConfig = validateModelConfig(runtime);
2327
+ const ctxEnabledFromEnv = parseBooleanEnv2(process.env.CTX_KNOWLEDGE_ENABLED);
2328
+ const ctxEnabledFromRuntime = parseBooleanEnv2(runtime.getSetting("CTX_KNOWLEDGE_ENABLED"));
2329
+ const ctxEnabledFromValidated = validatedConfig.CTX_KNOWLEDGE_ENABLED;
2330
+ const finalCtxEnabled = ctxEnabledFromValidated;
2331
+ logger6.debug(`[Knowledge Plugin] CTX_KNOWLEDGE_ENABLED sources:`);
2332
+ logger6.debug(`[Knowledge Plugin] - From env: ${ctxEnabledFromEnv}`);
2333
+ logger6.debug(`[Knowledge Plugin] - From runtime: ${ctxEnabledFromRuntime}`);
2334
+ logger6.debug(`[Knowledge Plugin] - FINAL RESULT: ${finalCtxEnabled}`);
2335
+ if (finalCtxEnabled) {
2336
+ logger6.info("Running in Contextual Knowledge mode with text generation capabilities.");
2337
+ logger6.info(
2338
+ `Using ${validatedConfig.EMBEDDING_PROVIDER || "auto-detected"} for embeddings and ${validatedConfig.TEXT_PROVIDER} for text generation.`
2339
+ );
2340
+ logger6.info(`Text model: ${validatedConfig.TEXT_MODEL}`);
2341
+ } else {
2342
+ const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
2343
+ logger6.warn(
2344
+ "Running in Basic Embedding mode - documents will NOT be enriched with context!"
2345
+ );
2346
+ logger6.info("To enable contextual enrichment:");
2347
+ logger6.info(" - Set CTX_KNOWLEDGE_ENABLED=true");
2348
+ logger6.info(" - Configure TEXT_PROVIDER (anthropic/openai/openrouter/google)");
2349
+ logger6.info(" - Configure TEXT_MODEL and API key");
2350
+ if (usingPluginOpenAI) {
2351
+ logger6.info("Using auto-detected configuration from plugin-openai for embeddings.");
2352
+ } else {
2353
+ logger6.info(
2354
+ `Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}.`
2355
+ );
2356
+ }
2357
+ }
2358
+ logger6.success("Model configuration validated successfully.");
2359
+ logger6.success(`Knowledge Plugin initialized for agent: ${runtime.character.name}`);
2360
+ logger6.info(
2361
+ "Knowledge Plugin initialized. Frontend panel should be discoverable via its public route."
2362
+ );
2363
+ } catch (error) {
2364
+ logger6.error("Failed to initialize Knowledge plugin:", error);
2365
+ throw error;
2366
+ }
1537
2367
  const service = new _KnowledgeService(runtime);
2368
+ service.config = validatedConfig;
2369
+ if (service.config.LOAD_DOCS_ON_STARTUP) {
2370
+ logger6.info("LOAD_DOCS_ON_STARTUP is enabled. Loading documents from docs folder...");
2371
+ service.loadInitialDocuments().catch((error) => {
2372
+ logger6.error("Error during initial document loading in KnowledgeService:", error);
2373
+ });
2374
+ } else {
2375
+ logger6.info("LOAD_DOCS_ON_STARTUP is disabled. Skipping automatic document loading.");
2376
+ }
1538
2377
  if (service.runtime.character?.knowledge && service.runtime.character.knowledge.length > 0) {
1539
- logger4.info(
2378
+ logger6.info(
1540
2379
  `KnowledgeService: Processing ${service.runtime.character.knowledge.length} character knowledge items.`
1541
2380
  );
1542
2381
  const stringKnowledge = service.runtime.character.knowledge.filter(
1543
2382
  (item) => typeof item === "string"
1544
2383
  );
1545
2384
  await service.processCharacterKnowledge(stringKnowledge).catch((err) => {
1546
- logger4.error(
2385
+ logger6.error(
1547
2386
  `KnowledgeService: Error processing character knowledge during startup: ${err.message}`,
1548
2387
  err
1549
2388
  );
1550
2389
  });
1551
2390
  } else {
1552
- logger4.info(
2391
+ logger6.info(
1553
2392
  `KnowledgeService: No character knowledge to process for agent ${runtime.agentId}.`
1554
2393
  );
1555
2394
  }
@@ -1560,10 +2399,10 @@ var KnowledgeService = class _KnowledgeService extends Service {
1560
2399
  * @param runtime Agent runtime
1561
2400
  */
1562
2401
  static async stop(runtime) {
1563
- logger4.info(`Stopping Knowledge service for agent: ${runtime.agentId}`);
2402
+ logger6.info(`Stopping Knowledge service for agent: ${runtime.agentId}`);
1564
2403
  const service = runtime.getService(_KnowledgeService.serviceType);
1565
2404
  if (!service) {
1566
- logger4.warn(`KnowledgeService not found for agent ${runtime.agentId} during stop.`);
2405
+ logger6.warn(`KnowledgeService not found for agent ${runtime.agentId} during stop.`);
1567
2406
  }
1568
2407
  if (service instanceof _KnowledgeService) {
1569
2408
  await service.stop();
@@ -1573,7 +2412,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1573
2412
  * Stop the service
1574
2413
  */
1575
2414
  async stop() {
1576
- logger4.info(`Knowledge service stopping for agent: ${this.runtime.agentId}`);
2415
+ logger6.info(`Knowledge service stopping for agent: ${this.runtime.character?.name}`);
1577
2416
  }
1578
2417
  /**
1579
2418
  * Add knowledge to the system
@@ -1588,15 +2427,11 @@ var KnowledgeService = class _KnowledgeService extends Service {
1588
2427
  maxChars: 2e3
1589
2428
  // Use first 2KB of content for ID generation
1590
2429
  });
1591
- logger4.info(
1592
- `KnowledgeService processing document for agent: ${agentId}, file: ${options.originalFilename}, type: ${options.contentType}, generated ID: ${contentBasedId}`
1593
- );
2430
+ logger6.info(`Processing "${options.originalFilename}" (${options.contentType})`);
1594
2431
  try {
1595
2432
  const existingDocument = await this.runtime.getMemoryById(contentBasedId);
1596
2433
  if (existingDocument && existingDocument.metadata?.type === MemoryType2.DOCUMENT) {
1597
- logger4.info(
1598
- `Document ${options.originalFilename} with ID ${contentBasedId} already exists. Skipping processing.`
1599
- );
2434
+ logger6.info(`"${options.originalFilename}" already exists - skipping`);
1600
2435
  const fragments = await this.runtime.getMemories({
1601
2436
  tableName: "knowledge"
1602
2437
  });
@@ -1610,7 +2445,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1610
2445
  };
1611
2446
  }
1612
2447
  } catch (error) {
1613
- logger4.debug(
2448
+ logger6.debug(
1614
2449
  `Document ${contentBasedId} not found or error checking existence, proceeding with processing: ${error instanceof Error ? error.message : String(error)}`
1615
2450
  );
1616
2451
  }
@@ -1637,7 +2472,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1637
2472
  }) {
1638
2473
  const agentId = passedAgentId || this.runtime.agentId;
1639
2474
  try {
1640
- logger4.debug(
2475
+ logger6.debug(
1641
2476
  `KnowledgeService: Processing document ${originalFilename} (type: ${contentType}) via processDocument for agent: ${agentId}`
1642
2477
  );
1643
2478
  let fileBuffer = null;
@@ -1648,7 +2483,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1648
2483
  try {
1649
2484
  fileBuffer = Buffer.from(content, "base64");
1650
2485
  } catch (e) {
1651
- logger4.error(
2486
+ logger6.error(
1652
2487
  `KnowledgeService: Failed to convert base64 to buffer for ${originalFilename}: ${e.message}`
1653
2488
  );
1654
2489
  throw new Error(`Invalid base64 content for PDF file ${originalFilename}`);
@@ -1659,7 +2494,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1659
2494
  try {
1660
2495
  fileBuffer = Buffer.from(content, "base64");
1661
2496
  } catch (e) {
1662
- logger4.error(
2497
+ logger6.error(
1663
2498
  `KnowledgeService: Failed to convert base64 to buffer for ${originalFilename}: ${e.message}`
1664
2499
  );
1665
2500
  throw new Error(`Invalid base64 content for binary file ${originalFilename}`);
@@ -1676,11 +2511,11 @@ var KnowledgeService = class _KnowledgeService extends Service {
1676
2511
  if (invalidCharCount > 0 && invalidCharCount / textLength > 0.1) {
1677
2512
  throw new Error("Decoded content contains too many invalid characters");
1678
2513
  }
1679
- logger4.debug(`Successfully decoded base64 content for text file: ${originalFilename}`);
2514
+ logger6.debug(`Successfully decoded base64 content for text file: ${originalFilename}`);
1680
2515
  extractedText = decodedText;
1681
2516
  documentContentToStore = decodedText;
1682
2517
  } catch (e) {
1683
- logger4.error(
2518
+ logger6.error(
1684
2519
  `Failed to decode base64 for ${originalFilename}: ${e instanceof Error ? e.message : String(e)}`
1685
2520
  );
1686
2521
  throw new Error(
@@ -1688,7 +2523,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1688
2523
  );
1689
2524
  }
1690
2525
  } else {
1691
- logger4.debug(`Treating content as plain text for file: ${originalFilename}`);
2526
+ logger6.debug(`Treating content as plain text for file: ${originalFilename}`);
1692
2527
  extractedText = content;
1693
2528
  documentContentToStore = content;
1694
2529
  }
@@ -1697,7 +2532,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1697
2532
  const noTextError = new Error(
1698
2533
  `KnowledgeService: No text content extracted from ${originalFilename} (type: ${contentType}).`
1699
2534
  );
1700
- logger4.warn(noTextError.message);
2535
+ logger6.warn(noTextError.message);
1701
2536
  throw noTextError;
1702
2537
  }
1703
2538
  const documentMemory = createDocumentMemory({
@@ -1723,14 +2558,14 @@ var KnowledgeService = class _KnowledgeService extends Service {
1723
2558
  roomId: roomId || agentId,
1724
2559
  entityId: entityId || agentId
1725
2560
  };
1726
- logger4.debug(
2561
+ logger6.debug(
1727
2562
  `KnowledgeService: Creating memory with agentId=${agentId}, entityId=${entityId}, roomId=${roomId}, this.runtime.agentId=${this.runtime.agentId}`
1728
2563
  );
1729
- logger4.debug(
2564
+ logger6.debug(
1730
2565
  `KnowledgeService: memoryWithScope agentId=${memoryWithScope.agentId}, entityId=${memoryWithScope.entityId}`
1731
2566
  );
1732
2567
  await this.runtime.createMemory(memoryWithScope, "documents");
1733
- logger4.debug(
2568
+ logger6.debug(
1734
2569
  `KnowledgeService: Stored document ${originalFilename} (Memory ID: ${memoryWithScope.id})`
1735
2570
  );
1736
2571
  const fragmentCount = await processFragmentsSynchronously({
@@ -1742,18 +2577,17 @@ var KnowledgeService = class _KnowledgeService extends Service {
1742
2577
  contentType,
1743
2578
  roomId: roomId || agentId,
1744
2579
  entityId: entityId || agentId,
1745
- worldId: worldId || agentId
2580
+ worldId: worldId || agentId,
2581
+ documentTitle: originalFilename
1746
2582
  });
1747
- logger4.info(
1748
- `KnowledgeService: Document ${originalFilename} processed with ${fragmentCount} fragments for agent ${agentId}`
1749
- );
2583
+ logger6.debug(`"${originalFilename}" stored with ${fragmentCount} fragments`);
1750
2584
  return {
1751
2585
  clientDocumentId,
1752
2586
  storedDocumentMemoryId: memoryWithScope.id,
1753
2587
  fragmentCount
1754
2588
  };
1755
2589
  } catch (error) {
1756
- logger4.error(
2590
+ logger6.error(
1757
2591
  `KnowledgeService: Error processing document ${originalFilename}: ${error.message}`,
1758
2592
  error.stack
1759
2593
  );
@@ -1762,7 +2596,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1762
2596
  }
1763
2597
  // --- Knowledge methods moved from AgentRuntime ---
1764
2598
  async handleProcessingError(error, context) {
1765
- logger4.error(`KnowledgeService: Error ${context}:`, error?.message || error || "Unknown error");
2599
+ logger6.error(`KnowledgeService: Error ${context}:`, error?.message || error || "Unknown error");
1766
2600
  throw error;
1767
2601
  }
1768
2602
  async checkExistingKnowledge(knowledgeId) {
@@ -1770,9 +2604,9 @@ var KnowledgeService = class _KnowledgeService extends Service {
1770
2604
  return !!existingDocument;
1771
2605
  }
1772
2606
  async getKnowledge(message, scope) {
1773
- logger4.debug("KnowledgeService: getKnowledge called for message id: " + message.id);
2607
+ logger6.debug("KnowledgeService: getKnowledge called for message id: " + message.id);
1774
2608
  if (!message?.content?.text || message?.content?.text.trim().length === 0) {
1775
- logger4.warn("KnowledgeService: Invalid or empty message content for knowledge query.");
2609
+ logger6.warn("KnowledgeService: Invalid or empty message content for knowledge query.");
1776
2610
  return [];
1777
2611
  }
1778
2612
  const embedding = await this.runtime.useModel(ModelType2.TEXT_EMBEDDING, {
@@ -1801,9 +2635,104 @@ var KnowledgeService = class _KnowledgeService extends Service {
1801
2635
  worldId: fragment.worldId
1802
2636
  }));
1803
2637
  }
2638
+ /**
2639
+ * Enrich a conversation memory with RAG metadata
2640
+ * This can be called after response generation to add RAG tracking data
2641
+ * @param memoryId The ID of the conversation memory to enrich
2642
+ * @param ragMetadata The RAG metadata to add
2643
+ */
2644
+ async enrichConversationMemoryWithRAG(memoryId, ragMetadata) {
2645
+ try {
2646
+ const existingMemory = await this.runtime.getMemoryById(memoryId);
2647
+ if (!existingMemory) {
2648
+ logger6.warn(`Cannot enrich memory ${memoryId} - memory not found`);
2649
+ return;
2650
+ }
2651
+ const updatedMetadata = {
2652
+ ...existingMemory.metadata,
2653
+ knowledgeUsed: true,
2654
+ // Simple flag for UI to detect RAG usage
2655
+ ragUsage: {
2656
+ retrievedFragments: ragMetadata.retrievedFragments,
2657
+ queryText: ragMetadata.queryText,
2658
+ totalFragments: ragMetadata.totalFragments,
2659
+ retrievalTimestamp: ragMetadata.retrievalTimestamp,
2660
+ usedInResponse: true
2661
+ },
2662
+ timestamp: existingMemory.metadata?.timestamp || Date.now(),
2663
+ type: existingMemory.metadata?.type || "message"
2664
+ };
2665
+ await this.runtime.updateMemory({
2666
+ id: memoryId,
2667
+ metadata: updatedMetadata
2668
+ });
2669
+ logger6.debug(
2670
+ `Enriched conversation memory ${memoryId} with RAG data: ${ragMetadata.totalFragments} fragments`
2671
+ );
2672
+ } catch (error) {
2673
+ logger6.warn(
2674
+ `Failed to enrich conversation memory ${memoryId} with RAG data: ${error.message}`
2675
+ );
2676
+ }
2677
+ }
2678
+ /**
2679
+ * Set the current response memory ID for RAG tracking
2680
+ * This is called by the knowledge provider to track which response memory to enrich
2681
+ */
2682
+ pendingRAGEnrichment = [];
2683
+ /**
2684
+ * Store RAG metadata for the next conversation memory that gets created
2685
+ * @param ragMetadata The RAG metadata to associate with the next memory
2686
+ */
2687
+ setPendingRAGMetadata(ragMetadata) {
2688
+ const now = Date.now();
2689
+ this.pendingRAGEnrichment = this.pendingRAGEnrichment.filter(
2690
+ (entry) => now - entry.timestamp < 3e4
2691
+ );
2692
+ this.pendingRAGEnrichment.push({
2693
+ ragMetadata,
2694
+ timestamp: now
2695
+ });
2696
+ logger6.debug(`Stored pending RAG metadata for next conversation memory`);
2697
+ }
2698
+ /**
2699
+ * Try to enrich recent conversation memories with pending RAG metadata
2700
+ * This is called periodically to catch memories that were created after RAG retrieval
2701
+ */
2702
+ async enrichRecentMemoriesWithPendingRAG() {
2703
+ if (this.pendingRAGEnrichment.length === 0) {
2704
+ return;
2705
+ }
2706
+ try {
2707
+ const recentMemories = await this.runtime.getMemories({
2708
+ tableName: "messages",
2709
+ count: 10
2710
+ });
2711
+ const now = Date.now();
2712
+ const recentConversationMemories = recentMemories.filter(
2713
+ (memory) => memory.metadata?.type === "message" && now - (memory.createdAt || 0) < 1e4 && // Created in last 10 seconds
2714
+ !memory.metadata?.ragUsage
2715
+ // Doesn't already have RAG data
2716
+ ).sort((a, b) => (b.createdAt || 0) - (a.createdAt || 0));
2717
+ for (const pendingEntry of this.pendingRAGEnrichment) {
2718
+ const matchingMemory = recentConversationMemories.find(
2719
+ (memory) => (memory.createdAt || 0) > pendingEntry.timestamp
2720
+ );
2721
+ if (matchingMemory && matchingMemory.id) {
2722
+ await this.enrichConversationMemoryWithRAG(matchingMemory.id, pendingEntry.ragMetadata);
2723
+ const index = this.pendingRAGEnrichment.indexOf(pendingEntry);
2724
+ if (index > -1) {
2725
+ this.pendingRAGEnrichment.splice(index, 1);
2726
+ }
2727
+ }
2728
+ }
2729
+ } catch (error) {
2730
+ logger6.warn(`Error enriching recent memories with RAG data: ${error.message}`);
2731
+ }
2732
+ }
1804
2733
  async processCharacterKnowledge(items) {
1805
- await new Promise((resolve) => setTimeout(resolve, 1e3));
1806
- logger4.info(
2734
+ await new Promise((resolve2) => setTimeout(resolve2, 1e3));
2735
+ logger6.info(
1807
2736
  `KnowledgeService: Processing ${items.length} character knowledge items for agent ${this.runtime.agentId}`
1808
2737
  );
1809
2738
  const processingPromises = items.map(async (item) => {
@@ -1816,12 +2745,12 @@ var KnowledgeService = class _KnowledgeService extends Service {
1816
2745
  // A constant identifier for character knowledge
1817
2746
  });
1818
2747
  if (await this.checkExistingKnowledge(knowledgeId)) {
1819
- logger4.debug(
2748
+ logger6.debug(
1820
2749
  `KnowledgeService: Character knowledge item with ID ${knowledgeId} already exists. Skipping.`
1821
2750
  );
1822
2751
  return;
1823
2752
  }
1824
- logger4.debug(
2753
+ logger6.debug(
1825
2754
  `KnowledgeService: Processing character knowledge for ${this.runtime.character?.name} - ${item.slice(0, 100)}`
1826
2755
  );
1827
2756
  let metadata = {
@@ -1872,7 +2801,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1872
2801
  }
1873
2802
  });
1874
2803
  await Promise.all(processingPromises);
1875
- logger4.info(
2804
+ logger6.info(
1876
2805
  `KnowledgeService: Finished processing character knowledge for agent ${this.runtime.agentId}.`
1877
2806
  );
1878
2807
  }
@@ -1892,7 +2821,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1892
2821
  worldId: scope?.worldId ?? this.runtime.agentId,
1893
2822
  entityId: scope?.entityId ?? this.runtime.agentId
1894
2823
  };
1895
- logger4.debug(`KnowledgeService: _internalAddKnowledge called for item ID ${item.id}`);
2824
+ logger6.debug(`KnowledgeService: _internalAddKnowledge called for item ID ${item.id}`);
1896
2825
  const documentMemory = {
1897
2826
  id: item.id,
1898
2827
  // This ID should be the unique ID for the document being added.
@@ -1914,7 +2843,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1914
2843
  };
1915
2844
  const existingDocument = await this.runtime.getMemoryById(item.id);
1916
2845
  if (existingDocument) {
1917
- logger4.debug(
2846
+ logger6.debug(
1918
2847
  `KnowledgeService: Document ${item.id} already exists in _internalAddKnowledge, updating...`
1919
2848
  );
1920
2849
  await this.runtime.updateMemory({
@@ -1938,13 +2867,13 @@ var KnowledgeService = class _KnowledgeService extends Service {
1938
2867
  await this.processDocumentFragment(fragment);
1939
2868
  fragmentsProcessed++;
1940
2869
  } catch (error) {
1941
- logger4.error(
2870
+ logger6.error(
1942
2871
  `KnowledgeService: Error processing fragment ${fragment.id} for document ${item.id}:`,
1943
2872
  error
1944
2873
  );
1945
2874
  }
1946
2875
  }
1947
- logger4.debug(
2876
+ logger6.debug(
1948
2877
  `KnowledgeService: Processed ${fragmentsProcessed}/${fragments.length} fragments for document ${item.id}.`
1949
2878
  );
1950
2879
  }
@@ -1953,7 +2882,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
1953
2882
  await this.runtime.addEmbeddingToMemory(fragment);
1954
2883
  await this.runtime.createMemory(fragment, "knowledge");
1955
2884
  } catch (error) {
1956
- logger4.error(
2885
+ logger6.error(
1957
2886
  `KnowledgeService: Error processing fragment ${fragment.id}:`,
1958
2887
  error instanceof Error ? error.message : String(error)
1959
2888
  );
@@ -2018,7 +2947,7 @@ var KnowledgeService = class _KnowledgeService extends Service {
2018
2947
  */
2019
2948
  async deleteMemory(memoryId) {
2020
2949
  await this.runtime.deleteMemory(memoryId);
2021
- logger4.info(
2950
+ logger6.info(
2022
2951
  `KnowledgeService: Deleted memory ${memoryId} for agent ${this.runtime.agentId}. Assumed it was a document or related fragment.`
2023
2952
  );
2024
2953
  }
@@ -2026,13 +2955,14 @@ var KnowledgeService = class _KnowledgeService extends Service {
2026
2955
  };
2027
2956
 
2028
2957
  // src/provider.ts
2029
- import { addHeader } from "@elizaos/core";
2958
+ import { addHeader, logger as logger7 } from "@elizaos/core";
2030
2959
  var knowledgeProvider = {
2031
2960
  name: "KNOWLEDGE",
2032
2961
  description: "Knowledge from the knowledge base that the agent knows, retrieved whenever the agent needs to answer a question about their expertise.",
2033
2962
  dynamic: true,
2034
2963
  get: async (runtime, message) => {
2035
- const knowledgeData = await runtime.getService("knowledge")?.getKnowledge(message);
2964
+ const knowledgeService = runtime.getService("knowledge");
2965
+ const knowledgeData = await knowledgeService?.getKnowledge(message);
2036
2966
  const firstFiveKnowledgeItems = knowledgeData?.slice(0, 5);
2037
2967
  let knowledge = (firstFiveKnowledgeItems && firstFiveKnowledgeItems.length > 0 ? addHeader(
2038
2968
  "# Knowledge",
@@ -2042,23 +2972,61 @@ var knowledgeProvider = {
2042
2972
  if (knowledge.length > 4e3 * tokenLength) {
2043
2973
  knowledge = knowledge.slice(0, 4e3 * tokenLength);
2044
2974
  }
2975
+ let ragMetadata = null;
2976
+ if (knowledgeData && knowledgeData.length > 0) {
2977
+ ragMetadata = {
2978
+ retrievedFragments: knowledgeData.map((fragment) => ({
2979
+ fragmentId: fragment.id,
2980
+ documentTitle: fragment.metadata?.filename || fragment.metadata?.title || "Unknown Document",
2981
+ similarityScore: fragment.similarity,
2982
+ contentPreview: (fragment.content?.text || "No content").substring(0, 100) + "..."
2983
+ })),
2984
+ queryText: message.content?.text || "Unknown query",
2985
+ totalFragments: knowledgeData.length,
2986
+ retrievalTimestamp: Date.now()
2987
+ };
2988
+ }
2989
+ if (knowledgeData && knowledgeData.length > 0 && knowledgeService && ragMetadata) {
2990
+ try {
2991
+ knowledgeService.setPendingRAGMetadata(ragMetadata);
2992
+ setTimeout(async () => {
2993
+ try {
2994
+ await knowledgeService.enrichRecentMemoriesWithPendingRAG();
2995
+ } catch (error) {
2996
+ logger7.warn("RAG memory enrichment failed:", error.message);
2997
+ }
2998
+ }, 2e3);
2999
+ } catch (error) {
3000
+ logger7.warn("RAG memory enrichment failed:", error.message);
3001
+ }
3002
+ }
2045
3003
  return {
2046
3004
  data: {
2047
- knowledge
3005
+ knowledge,
3006
+ ragMetadata,
3007
+ // 🎯 Include RAG metadata for memory tracking
3008
+ knowledgeUsed: knowledgeData && knowledgeData.length > 0
3009
+ // Simple flag for easy detection
2048
3010
  },
2049
3011
  values: {
2050
- knowledge
3012
+ knowledge,
3013
+ knowledgeUsed: knowledgeData && knowledgeData.length > 0
3014
+ // Simple flag for easy detection
2051
3015
  },
2052
- text: knowledge
3016
+ text: knowledge,
3017
+ ragMetadata,
3018
+ // 🎯 Also include at top level for easy access
3019
+ knowledgeUsed: knowledgeData && knowledgeData.length > 0
3020
+ // 🎯 Simple flag at top level too
2053
3021
  };
2054
3022
  }
2055
3023
  };
2056
3024
 
2057
3025
  // src/tests.ts
2058
3026
  import { MemoryType as MemoryType3, ModelType as ModelType3 } from "@elizaos/core";
2059
- import { Buffer as Buffer2 } from "buffer";
2060
- import * as fs from "fs";
2061
- import * as path from "path";
3027
+ import { Buffer as Buffer3 } from "buffer";
3028
+ import * as fs2 from "fs";
3029
+ import * as path2 from "path";
2062
3030
  var mockLogger = {
2063
3031
  info: (() => {
2064
3032
  const fn = (...args) => {
@@ -2192,9 +3160,7 @@ function createMockRuntime(overrides) {
2192
3160
  return ids.map((id) => memories.get(id)).filter(Boolean);
2193
3161
  },
2194
3162
  async getMemoriesByRoomIds(params) {
2195
- return Array.from(memories.values()).filter(
2196
- (m) => params.roomIds.includes(m.roomId)
2197
- );
3163
+ return Array.from(memories.values()).filter((m) => params.roomIds.includes(m.roomId));
2198
3164
  },
2199
3165
  async searchMemories(params) {
2200
3166
  const fragments = Array.from(memories.values()).filter(
@@ -2457,9 +3423,9 @@ trailer
2457
3423
  startxref
2458
3424
  ${465 + content.length}
2459
3425
  %%EOF`;
2460
- return Buffer2.from(pdfContent);
3426
+ return Buffer3.from(pdfContent);
2461
3427
  }
2462
- return Buffer2.from(content, "utf-8");
3428
+ return Buffer3.from(content, "utf-8");
2463
3429
  }
2464
3430
  var KnowledgeTestSuite = class {
2465
3431
  name = "knowledge";
@@ -2472,10 +3438,10 @@ var KnowledgeTestSuite = class {
2472
3438
  const originalEnv = { ...process.env };
2473
3439
  delete process.env.KNOWLEDGE_PATH;
2474
3440
  try {
2475
- const docsPath = path.join(process.cwd(), "docs");
2476
- const docsExists = fs.existsSync(docsPath);
3441
+ const docsPath = path2.join(process.cwd(), "docs");
3442
+ const docsExists = fs2.existsSync(docsPath);
2477
3443
  if (!docsExists) {
2478
- fs.mkdirSync(docsPath, { recursive: true });
3444
+ fs2.mkdirSync(docsPath, { recursive: true });
2479
3445
  }
2480
3446
  await index_default.init({}, runtime);
2481
3447
  const errorCalls = mockLogger.error.calls;
@@ -2483,7 +3449,7 @@ var KnowledgeTestSuite = class {
2483
3449
  throw new Error(`Unexpected error during init: ${errorCalls[0]}`);
2484
3450
  }
2485
3451
  if (!docsExists) {
2486
- fs.rmSync(docsPath, { recursive: true, force: true });
3452
+ fs2.rmSync(docsPath, { recursive: true, force: true });
2487
3453
  }
2488
3454
  } finally {
2489
3455
  process.env = originalEnv;
@@ -2496,13 +3462,13 @@ var KnowledgeTestSuite = class {
2496
3462
  const originalEnv = { ...process.env };
2497
3463
  delete process.env.KNOWLEDGE_PATH;
2498
3464
  try {
2499
- const docsPath = path.join(process.cwd(), "docs");
2500
- if (fs.existsSync(docsPath)) {
2501
- fs.renameSync(docsPath, docsPath + ".backup");
3465
+ const docsPath = path2.join(process.cwd(), "docs");
3466
+ if (fs2.existsSync(docsPath)) {
3467
+ fs2.renameSync(docsPath, docsPath + ".backup");
2502
3468
  }
2503
3469
  await index_default.init({}, runtime);
2504
- if (fs.existsSync(docsPath + ".backup")) {
2505
- fs.renameSync(docsPath + ".backup", docsPath);
3470
+ if (fs2.existsSync(docsPath + ".backup")) {
3471
+ fs2.renameSync(docsPath + ".backup", docsPath);
2506
3472
  }
2507
3473
  } finally {
2508
3474
  process.env = originalEnv;
@@ -2521,9 +3487,7 @@ var KnowledgeTestSuite = class {
2521
3487
  throw new Error("Incorrect service capability description");
2522
3488
  }
2523
3489
  runtime.services.set(KnowledgeService.serviceType, service);
2524
- const retrievedService = runtime.getService(
2525
- KnowledgeService.serviceType
2526
- );
3490
+ const retrievedService = runtime.getService(KnowledgeService.serviceType);
2527
3491
  if (retrievedService !== service) {
2528
3492
  throw new Error("Service not properly registered with runtime");
2529
3493
  }
@@ -2536,11 +3500,7 @@ var KnowledgeTestSuite = class {
2536
3500
  fn: async (runtime) => {
2537
3501
  const testContent = "This is a test document with some content.";
2538
3502
  const buffer = createTestFileBuffer(testContent);
2539
- const extractedText = await extractTextFromDocument(
2540
- buffer,
2541
- "text/plain",
2542
- "test.txt"
2543
- );
3503
+ const extractedText = await extractTextFromDocument(buffer, "text/plain", "test.txt");
2544
3504
  if (extractedText !== testContent) {
2545
3505
  throw new Error(`Expected "${testContent}", got "${extractedText}"`);
2546
3506
  }
@@ -2549,7 +3509,7 @@ var KnowledgeTestSuite = class {
2549
3509
  {
2550
3510
  name: "Should handle empty file buffer",
2551
3511
  fn: async (runtime) => {
2552
- const emptyBuffer = Buffer2.alloc(0);
3512
+ const emptyBuffer = Buffer3.alloc(0);
2553
3513
  try {
2554
3514
  await extractTextFromDocument(emptyBuffer, "text/plain", "empty.txt");
2555
3515
  throw new Error("Should have thrown error for empty buffer");
@@ -2612,9 +3572,7 @@ var KnowledgeTestSuite = class {
2612
3572
  if (result.fragmentCount === 0) {
2613
3573
  throw new Error("No fragments created");
2614
3574
  }
2615
- const storedDoc = await runtime.getMemoryById(
2616
- result.storedDocumentMemoryId
2617
- );
3575
+ const storedDoc = await runtime.getMemoryById(result.storedDocumentMemoryId);
2618
3576
  if (!storedDoc) {
2619
3577
  throw new Error("Document not found in storage");
2620
3578
  }
@@ -2759,19 +3717,15 @@ var KnowledgeTestSuite = class {
2759
3717
  }
2760
3718
  });
2761
3719
  const service = await KnowledgeService.start(knowledgeRuntime);
2762
- await new Promise((resolve) => setTimeout(resolve, 2e3));
3720
+ await new Promise((resolve2) => setTimeout(resolve2, 2e3));
2763
3721
  const memories = await knowledgeRuntime.getMemories({
2764
3722
  tableName: "documents",
2765
3723
  entityId: knowledgeRuntime.agentId
2766
3724
  });
2767
3725
  if (memories.length < 3) {
2768
- throw new Error(
2769
- `Expected at least 3 character knowledge items, got ${memories.length}`
2770
- );
3726
+ throw new Error(`Expected at least 3 character knowledge items, got ${memories.length}`);
2771
3727
  }
2772
- const pathKnowledge = memories.find(
2773
- (m) => m.content.text?.includes("markdown content")
2774
- );
3728
+ const pathKnowledge = memories.find((m) => m.content.text?.includes("markdown content"));
2775
3729
  if (!pathKnowledge) {
2776
3730
  throw new Error("Path-based knowledge not found");
2777
3731
  }
@@ -2873,11 +3827,7 @@ var KnowledgeTestSuite = class {
2873
3827
  data: {},
2874
3828
  text: ""
2875
3829
  };
2876
- const providerResult = await knowledgeProvider.get(
2877
- runtime,
2878
- queryMessage,
2879
- state
2880
- );
3830
+ const providerResult = await knowledgeProvider.get(runtime, queryMessage, state);
2881
3831
  if (!providerResult.text || !providerResult.text.includes("qubit")) {
2882
3832
  throw new Error("Provider did not return relevant knowledge");
2883
3833
  }
@@ -2907,9 +3857,7 @@ var KnowledgeTestSuite = class {
2907
3857
  };
2908
3858
  const result = await service.addKnowledge(document);
2909
3859
  if (result.fragmentCount < 2) {
2910
- throw new Error(
2911
- "Large document should be split into multiple fragments"
2912
- );
3860
+ throw new Error("Large document should be split into multiple fragments");
2913
3861
  }
2914
3862
  const fragments = await runtime.getMemories({
2915
3863
  tableName: "knowledge",
@@ -2961,9 +3909,9 @@ var KnowledgeTestSuite = class {
2961
3909
  var tests_default = new KnowledgeTestSuite();
2962
3910
 
2963
3911
  // src/actions.ts
2964
- import { logger as logger5, stringToUuid } from "@elizaos/core";
2965
- import * as fs2 from "fs";
2966
- import * as path2 from "path";
3912
+ import { logger as logger8, stringToUuid } from "@elizaos/core";
3913
+ import * as fs3 from "fs";
3914
+ import * as path3 from "path";
2967
3915
  var processKnowledgeAction = {
2968
3916
  name: "PROCESS_KNOWLEDGE",
2969
3917
  description: "Process and store knowledge from a file path or text content into the knowledge base",
@@ -3014,25 +3962,19 @@ var processKnowledgeAction = {
3014
3962
  "ingest",
3015
3963
  "file"
3016
3964
  ];
3017
- const hasKeyword = knowledgeKeywords.some(
3018
- (keyword) => text.includes(keyword)
3019
- );
3965
+ const hasKeyword = knowledgeKeywords.some((keyword) => text.includes(keyword));
3020
3966
  const pathPattern = /(?:\/[\w.-]+)+|(?:[a-zA-Z]:[\\/][\w\s.-]+(?:[\\/][\w\s.-]+)*)/;
3021
3967
  const hasPath = pathPattern.test(text);
3022
3968
  const service = runtime.getService(KnowledgeService.serviceType);
3023
3969
  if (!service) {
3024
- logger5.warn(
3025
- "Knowledge service not available for PROCESS_KNOWLEDGE action"
3026
- );
3970
+ logger8.warn("Knowledge service not available for PROCESS_KNOWLEDGE action");
3027
3971
  return false;
3028
3972
  }
3029
3973
  return hasKeyword || hasPath;
3030
3974
  },
3031
3975
  handler: async (runtime, message, state, options, callback) => {
3032
3976
  try {
3033
- const service = runtime.getService(
3034
- KnowledgeService.serviceType
3035
- );
3977
+ const service = runtime.getService(KnowledgeService.serviceType);
3036
3978
  if (!service) {
3037
3979
  throw new Error("Knowledge service not available");
3038
3980
  }
@@ -3042,7 +3984,7 @@ var processKnowledgeAction = {
3042
3984
  let response;
3043
3985
  if (pathMatch) {
3044
3986
  const filePath = pathMatch[0];
3045
- if (!fs2.existsSync(filePath)) {
3987
+ if (!fs3.existsSync(filePath)) {
3046
3988
  response = {
3047
3989
  text: `I couldn't find the file at ${filePath}. Please check the path and try again.`
3048
3990
  };
@@ -3051,9 +3993,9 @@ var processKnowledgeAction = {
3051
3993
  }
3052
3994
  return;
3053
3995
  }
3054
- const fileBuffer = fs2.readFileSync(filePath);
3055
- const fileName = path2.basename(filePath);
3056
- const fileExt = path2.extname(filePath).toLowerCase();
3996
+ const fileBuffer = fs3.readFileSync(filePath);
3997
+ const fileName = path3.basename(filePath);
3998
+ const fileExt = path3.extname(filePath).toLowerCase();
3057
3999
  let contentType = "text/plain";
3058
4000
  if (fileExt === ".pdf") contentType = "application/pdf";
3059
4001
  else if (fileExt === ".docx")
@@ -3075,10 +4017,7 @@ var processKnowledgeAction = {
3075
4017
  text: `I've successfully processed the document "${fileName}". It has been split into ${result.fragmentCount} searchable fragments and added to my knowledge base.`
3076
4018
  };
3077
4019
  } else {
3078
- const knowledgeContent = text.replace(
3079
- /^(add|store|remember|process|learn)\s+(this|that|the following)?:?\s*/i,
3080
- ""
3081
- ).trim();
4020
+ const knowledgeContent = text.replace(/^(add|store|remember|process|learn)\s+(this|that|the following)?:?\s*/i, "").trim();
3082
4021
  if (!knowledgeContent) {
3083
4022
  response = {
3084
4023
  text: "I need some content to add to my knowledge base. Please provide text or a file path."
@@ -3106,7 +4045,7 @@ var processKnowledgeAction = {
3106
4045
  await callback(response);
3107
4046
  }
3108
4047
  } catch (error) {
3109
- logger5.error("Error in PROCESS_KNOWLEDGE action:", error);
4048
+ logger8.error("Error in PROCESS_KNOWLEDGE action:", error);
3110
4049
  const errorResponse = {
3111
4050
  text: `I encountered an error while processing the knowledge: ${error instanceof Error ? error.message : "Unknown error"}`
3112
4051
  };
@@ -3146,25 +4085,10 @@ var searchKnowledgeAction = {
3146
4085
  ],
3147
4086
  validate: async (runtime, message, state) => {
3148
4087
  const text = message.content.text?.toLowerCase() || "";
3149
- const searchKeywords = [
3150
- "search",
3151
- "find",
3152
- "look up",
3153
- "query",
3154
- "what do you know about"
3155
- ];
3156
- const knowledgeKeywords = [
3157
- "knowledge",
3158
- "information",
3159
- "document",
3160
- "database"
3161
- ];
3162
- const hasSearchKeyword = searchKeywords.some(
3163
- (keyword) => text.includes(keyword)
3164
- );
3165
- const hasKnowledgeKeyword = knowledgeKeywords.some(
3166
- (keyword) => text.includes(keyword)
3167
- );
4088
+ const searchKeywords = ["search", "find", "look up", "query", "what do you know about"];
4089
+ const knowledgeKeywords = ["knowledge", "information", "document", "database"];
4090
+ const hasSearchKeyword = searchKeywords.some((keyword) => text.includes(keyword));
4091
+ const hasKnowledgeKeyword = knowledgeKeywords.some((keyword) => text.includes(keyword));
3168
4092
  const service = runtime.getService(KnowledgeService.serviceType);
3169
4093
  if (!service) {
3170
4094
  return false;
@@ -3173,17 +4097,12 @@ var searchKnowledgeAction = {
3173
4097
  },
3174
4098
  handler: async (runtime, message, state, options, callback) => {
3175
4099
  try {
3176
- const service = runtime.getService(
3177
- KnowledgeService.serviceType
3178
- );
4100
+ const service = runtime.getService(KnowledgeService.serviceType);
3179
4101
  if (!service) {
3180
4102
  throw new Error("Knowledge service not available");
3181
4103
  }
3182
4104
  const text = message.content.text || "";
3183
- const query = text.replace(
3184
- /^(search|find|look up|query)\s+(your\s+)?knowledge\s+(base\s+)?(for\s+)?/i,
3185
- ""
3186
- ).trim();
4105
+ const query = text.replace(/^(search|find|look up|query)\s+(your\s+)?knowledge\s+(base\s+)?(for\s+)?/i, "").trim();
3187
4106
  if (!query) {
3188
4107
  const response2 = {
3189
4108
  text: "What would you like me to search for in my knowledge base?"
@@ -3217,7 +4136,7 @@ ${formattedResults}`
3217
4136
  await callback(response);
3218
4137
  }
3219
4138
  } catch (error) {
3220
- logger5.error("Error in SEARCH_KNOWLEDGE action:", error);
4139
+ logger8.error("Error in SEARCH_KNOWLEDGE action:", error);
3221
4140
  const errorResponse = {
3222
4141
  text: `I encountered an error while searching the knowledge base: ${error instanceof Error ? error.message : "Unknown error"}`
3223
4142
  };
@@ -3230,9 +4149,9 @@ ${formattedResults}`
3230
4149
  var knowledgeActions = [processKnowledgeAction, searchKnowledgeAction];
3231
4150
 
3232
4151
  // src/routes.ts
3233
- import { createUniqueUuid as createUniqueUuid2, logger as logger6, ModelType as ModelType4 } from "@elizaos/core";
3234
- import fs3 from "fs";
3235
- import path3 from "path";
4152
+ import { createUniqueUuid as createUniqueUuid2, logger as logger9, ModelType as ModelType4 } from "@elizaos/core";
4153
+ import fs4 from "fs";
4154
+ import path4 from "path";
3236
4155
  import multer from "multer";
3237
4156
  var createUploadMiddleware = (runtime) => {
3238
4157
  const uploadDir = runtime.getSetting("KNOWLEDGE_UPLOAD_DIR") || "/tmp/uploads/";
@@ -3277,11 +4196,11 @@ function sendError(res, status, code, message, details) {
3277
4196
  res.end(JSON.stringify({ success: false, error: { code, message, details } }));
3278
4197
  }
3279
4198
  var cleanupFile = (filePath) => {
3280
- if (filePath && fs3.existsSync(filePath)) {
4199
+ if (filePath && fs4.existsSync(filePath)) {
3281
4200
  try {
3282
- fs3.unlinkSync(filePath);
4201
+ fs4.unlinkSync(filePath);
3283
4202
  } catch (error) {
3284
- logger6.error(`Error cleaning up file ${filePath}:`, error);
4203
+ logger9.error(`Error cleaning up file ${filePath}:`, error);
3285
4204
  }
3286
4205
  }
3287
4206
  };
@@ -3308,15 +4227,15 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3308
4227
  }
3309
4228
  const invalidFiles = files.filter((file) => {
3310
4229
  if (file.size === 0) {
3311
- logger6.warn(`File ${file.originalname} is empty`);
4230
+ logger9.warn(`File ${file.originalname} is empty`);
3312
4231
  return true;
3313
4232
  }
3314
4233
  if (!file.originalname || file.originalname.trim() === "") {
3315
- logger6.warn(`File has no name`);
4234
+ logger9.warn(`File has no name`);
3316
4235
  return true;
3317
4236
  }
3318
4237
  if (!file.path) {
3319
- logger6.warn(`File ${file.originalname} has no path`);
4238
+ logger9.warn(`File ${file.originalname} has no path`);
3320
4239
  return true;
3321
4240
  }
3322
4241
  return false;
@@ -3333,7 +4252,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3333
4252
  }
3334
4253
  const agentId = req.body.agentId || req.query.agentId;
3335
4254
  if (!agentId) {
3336
- logger6.error("[KNOWLEDGE UPLOAD HANDLER] No agent ID provided in request");
4255
+ logger9.error("[Document Processor] \u274C No agent ID provided in upload request");
3337
4256
  return sendError(
3338
4257
  res,
3339
4258
  400,
@@ -3342,15 +4261,15 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3342
4261
  );
3343
4262
  }
3344
4263
  const worldId = req.body.worldId || agentId;
3345
- logger6.info(`[KNOWLEDGE UPLOAD HANDLER] Processing upload for agent: ${agentId}`);
4264
+ logger9.info(`[Document Processor] \u{1F4E4} Processing file upload for agent: ${agentId}`);
3346
4265
  const processingPromises = files.map(async (file, index) => {
3347
4266
  const originalFilename = file.originalname;
3348
4267
  const filePath = file.path;
3349
- logger6.debug(
3350
- `[KNOWLEDGE UPLOAD HANDLER] File: ${originalFilename}, Agent ID: ${agentId}, World ID: ${worldId}`
4268
+ logger9.debug(
4269
+ `[Document Processor] \u{1F4C4} Processing file: ${originalFilename} (agent: ${agentId})`
3351
4270
  );
3352
4271
  try {
3353
- const fileBuffer = await fs3.promises.readFile(filePath);
4272
+ const fileBuffer = await fs4.promises.readFile(filePath);
3354
4273
  const base64Content = fileBuffer.toString("base64");
3355
4274
  const addKnowledgeOpts = {
3356
4275
  agentId,
@@ -3381,8 +4300,9 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3381
4300
  status: "success"
3382
4301
  };
3383
4302
  } catch (fileError) {
3384
- logger6.error(
3385
- `[KNOWLEDGE UPLOAD HANDLER] Error processing file ${file.originalname}: ${fileError}`
4303
+ logger9.error(
4304
+ `[Document Processor] \u274C Error processing file ${file.originalname}:`,
4305
+ fileError
3386
4306
  );
3387
4307
  cleanupFile(filePath);
3388
4308
  return {
@@ -3403,7 +4323,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3403
4323
  }
3404
4324
  const agentId = req.body.agentId || req.query.agentId;
3405
4325
  if (!agentId) {
3406
- logger6.error("[KNOWLEDGE URL HANDLER] No agent ID provided in request");
4326
+ logger9.error("[Document Processor] \u274C No agent ID provided in URL request");
3407
4327
  return sendError(
3408
4328
  res,
3409
4329
  400,
@@ -3411,7 +4331,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3411
4331
  "Agent ID is required for uploading knowledge from URLs"
3412
4332
  );
3413
4333
  }
3414
- logger6.info(`[KNOWLEDGE URL HANDLER] Processing URL upload for agent: ${agentId}`);
4334
+ logger9.info(`[Document Processor] \u{1F4E4} Processing URL upload for agent: ${agentId}`);
3415
4335
  const processingPromises = fileUrls.map(async (fileUrl) => {
3416
4336
  try {
3417
4337
  const normalizedUrl = normalizeS3Url(fileUrl);
@@ -3419,7 +4339,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3419
4339
  const pathSegments = urlObject.pathname.split("/");
3420
4340
  const encodedFilename = pathSegments[pathSegments.length - 1] || "document.pdf";
3421
4341
  const originalFilename = decodeURIComponent(encodedFilename);
3422
- logger6.info(`[KNOWLEDGE URL HANDLER] Fetching content from URL: ${fileUrl}`);
4342
+ logger9.debug(`[Document Processor] \u{1F310} Fetching content from URL: ${fileUrl}`);
3423
4343
  const { content, contentType: fetchedContentType } = await fetchUrlContent(fileUrl);
3424
4344
  let contentType = fetchedContentType;
3425
4345
  if (contentType === "application/octet-stream") {
@@ -3459,8 +4379,8 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3459
4379
  url: normalizedUrl
3460
4380
  }
3461
4381
  };
3462
- logger6.debug(
3463
- `[KNOWLEDGE URL HANDLER] Processing knowledge from URL: ${fileUrl} (type: ${contentType})`
4382
+ logger9.debug(
4383
+ `[Document Processor] \u{1F4C4} Processing knowledge from URL: ${originalFilename} (type: ${contentType})`
3464
4384
  );
3465
4385
  const result = await service.addKnowledge(addKnowledgeOpts);
3466
4386
  return {
@@ -3474,7 +4394,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3474
4394
  status: "success"
3475
4395
  };
3476
4396
  } catch (urlError) {
3477
- logger6.error(`[KNOWLEDGE URL HANDLER] Error processing URL ${fileUrl}: ${urlError}`);
4397
+ logger9.error(`[Document Processor] \u274C Error processing URL ${fileUrl}:`, urlError);
3478
4398
  return {
3479
4399
  fileUrl,
3480
4400
  status: "error_processing",
@@ -3486,7 +4406,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
3486
4406
  sendSuccess(res, results);
3487
4407
  }
3488
4408
  } catch (error) {
3489
- logger6.error("[KNOWLEDGE HANDLER] Error processing knowledge:", error);
4409
+ logger9.error("[Document Processor] \u274C Error processing knowledge:", error);
3490
4410
  if (hasUploadedFiles) {
3491
4411
  cleanupFiles(req.files);
3492
4412
  }
@@ -3504,7 +4424,7 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
3504
4424
  );
3505
4425
  }
3506
4426
  try {
3507
- const limit = req.query.limit ? Number.parseInt(req.query.limit, 10) : 20;
4427
+ const limit = req.query.limit ? Number.parseInt(req.query.limit, 10) : 1e4;
3508
4428
  const before = req.query.before ? Number.parseInt(req.query.before, 10) : Date.now();
3509
4429
  const includeEmbedding = req.query.includeEmbedding === "true";
3510
4430
  const agentId = req.query.agentId;
@@ -3525,8 +4445,8 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
3525
4445
  // Or if the URL is stored in the metadata (check if it exists)
3526
4446
  memory.metadata && "url" in memory.metadata && typeof memory.metadata.url === "string" && normalizedRequestUrls.includes(normalizeS3Url(memory.metadata.url))
3527
4447
  );
3528
- logger6.debug(
3529
- `[KNOWLEDGE GET HANDLER] Filtered documents by URLs: ${fileUrls.length} URLs, found ${filteredMemories.length} matching documents`
4448
+ logger9.debug(
4449
+ `[Document Processor] \u{1F50D} Filtered documents by URLs: ${fileUrls.length} URLs, found ${filteredMemories.length} matching documents`
3530
4450
  );
3531
4451
  }
3532
4452
  const cleanMemories = includeEmbedding ? filteredMemories : filteredMemories.map((memory) => ({
@@ -3540,15 +4460,12 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
3540
4460
  totalRequested: fileUrls ? fileUrls.length : 0
3541
4461
  });
3542
4462
  } catch (error) {
3543
- logger6.error("[KNOWLEDGE GET HANDLER] Error retrieving documents:", error);
4463
+ logger9.error("[Document Processor] \u274C Error retrieving documents:", error);
3544
4464
  sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve documents", error.message);
3545
4465
  }
3546
4466
  }
3547
4467
  async function deleteKnowledgeDocumentHandler(req, res, runtime) {
3548
- logger6.debug(`[KNOWLEDGE DELETE HANDLER] Received DELETE request:
3549
- - path: ${req.path}
3550
- - params: ${JSON.stringify(req.params)}
3551
- `);
4468
+ logger9.debug(`[Document Processor] \u{1F5D1}\uFE0F DELETE request for document: ${req.params.knowledgeId}`);
3552
4469
  const service = runtime.getService(KnowledgeService.serviceType);
3553
4470
  if (!service) {
3554
4471
  return sendError(
@@ -3560,29 +4477,22 @@ async function deleteKnowledgeDocumentHandler(req, res, runtime) {
3560
4477
  }
3561
4478
  const knowledgeId = req.params.knowledgeId;
3562
4479
  if (!knowledgeId || knowledgeId.length < 36) {
3563
- logger6.error(`[KNOWLEDGE DELETE HANDLER] Invalid knowledge ID format: ${knowledgeId}`);
4480
+ logger9.error(`[Document Processor] \u274C Invalid knowledge ID format: ${knowledgeId}`);
3564
4481
  return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
3565
4482
  }
3566
4483
  try {
3567
4484
  const typedKnowledgeId = knowledgeId;
3568
- logger6.debug(
3569
- `[KNOWLEDGE DELETE HANDLER] Attempting to delete document with ID: ${typedKnowledgeId}`
3570
- );
4485
+ logger9.debug(`[Document Processor] \u{1F5D1}\uFE0F Deleting document: ${typedKnowledgeId}`);
3571
4486
  await service.deleteMemory(typedKnowledgeId);
3572
- logger6.info(
3573
- `[KNOWLEDGE DELETE HANDLER] Successfully deleted document with ID: ${typedKnowledgeId}`
3574
- );
4487
+ logger9.info(`[Document Processor] \u2705 Successfully deleted document: ${typedKnowledgeId}`);
3575
4488
  sendSuccess(res, null, 204);
3576
4489
  } catch (error) {
3577
- logger6.error(`[KNOWLEDGE DELETE HANDLER] Error deleting document ${knowledgeId}:`, error);
4490
+ logger9.error(`[Document Processor] \u274C Error deleting document ${knowledgeId}:`, error);
3578
4491
  sendError(res, 500, "DELETE_ERROR", "Failed to delete document", error.message);
3579
4492
  }
3580
4493
  }
3581
4494
  async function getKnowledgeByIdHandler(req, res, runtime) {
3582
- logger6.debug(`[KNOWLEDGE GET BY ID HANDLER] Received GET request:
3583
- - path: ${req.path}
3584
- - params: ${JSON.stringify(req.params)}
3585
- `);
4495
+ logger9.debug(`[Document Processor] \u{1F50D} GET request for document: ${req.params.knowledgeId}`);
3586
4496
  const service = runtime.getService(KnowledgeService.serviceType);
3587
4497
  if (!service) {
3588
4498
  return sendError(
@@ -3594,15 +4504,15 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
3594
4504
  }
3595
4505
  const knowledgeId = req.params.knowledgeId;
3596
4506
  if (!knowledgeId || knowledgeId.length < 36) {
3597
- logger6.error(`[KNOWLEDGE GET BY ID HANDLER] Invalid knowledge ID format: ${knowledgeId}`);
4507
+ logger9.error(`[Document Processor] \u274C Invalid knowledge ID format: ${knowledgeId}`);
3598
4508
  return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
3599
4509
  }
3600
4510
  try {
3601
- logger6.debug(`[KNOWLEDGE GET BY ID HANDLER] Retrieving document with ID: ${knowledgeId}`);
4511
+ logger9.debug(`[Document Processor] \u{1F50D} Retrieving document: ${knowledgeId}`);
3602
4512
  const agentId = req.query.agentId;
3603
4513
  const memories = await service.getMemories({
3604
4514
  tableName: "documents",
3605
- count: 1e3
4515
+ count: 1e4
3606
4516
  });
3607
4517
  const typedKnowledgeId = knowledgeId;
3608
4518
  const document = memories.find((memory) => memory.id === typedKnowledgeId);
@@ -3615,19 +4525,19 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
3615
4525
  };
3616
4526
  sendSuccess(res, { document: cleanDocument });
3617
4527
  } catch (error) {
3618
- logger6.error(`[KNOWLEDGE GET BY ID HANDLER] Error retrieving document ${knowledgeId}:`, error);
4528
+ logger9.error(`[Document Processor] \u274C Error retrieving document ${knowledgeId}:`, error);
3619
4529
  sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve document", error.message);
3620
4530
  }
3621
4531
  }
3622
4532
  async function knowledgePanelHandler(req, res, runtime) {
3623
4533
  const agentId = runtime.agentId;
3624
- logger6.debug(`[KNOWLEDGE PANEL] Serving panel for agent ${agentId}, request path: ${req.path}`);
4534
+ logger9.debug(`[Document Processor] \u{1F310} Serving knowledge panel for agent ${agentId}`);
3625
4535
  try {
3626
- const currentDir = path3.dirname(new URL(import.meta.url).pathname);
3627
- const frontendPath = path3.join(currentDir, "../dist/index.html");
3628
- logger6.debug(`[KNOWLEDGE PANEL] Looking for frontend at: ${frontendPath}`);
3629
- if (fs3.existsSync(frontendPath)) {
3630
- const html = await fs3.promises.readFile(frontendPath, "utf8");
4536
+ const currentDir = path4.dirname(new URL(import.meta.url).pathname);
4537
+ const frontendPath = path4.join(currentDir, "../dist/index.html");
4538
+ logger9.debug(`[Document Processor] \u{1F310} Looking for frontend at: ${frontendPath}`);
4539
+ if (fs4.existsSync(frontendPath)) {
4540
+ const html = await fs4.promises.readFile(frontendPath, "utf8");
3631
4541
  const injectedHtml = html.replace(
3632
4542
  "<head>",
3633
4543
  `<head>
@@ -3643,10 +4553,10 @@ async function knowledgePanelHandler(req, res, runtime) {
3643
4553
  } else {
3644
4554
  let cssFile = "index.css";
3645
4555
  let jsFile = "index.js";
3646
- const manifestPath = path3.join(currentDir, "../dist/manifest.json");
3647
- if (fs3.existsSync(manifestPath)) {
4556
+ const manifestPath = path4.join(currentDir, "../dist/manifest.json");
4557
+ if (fs4.existsSync(manifestPath)) {
3648
4558
  try {
3649
- const manifestContent = await fs3.promises.readFile(manifestPath, "utf8");
4559
+ const manifestContent = await fs4.promises.readFile(manifestPath, "utf8");
3650
4560
  const manifest = JSON.parse(manifestContent);
3651
4561
  for (const [key, value] of Object.entries(manifest)) {
3652
4562
  if (typeof value === "object" && value !== null) {
@@ -3659,10 +4569,10 @@ async function knowledgePanelHandler(req, res, runtime) {
3659
4569
  }
3660
4570
  }
3661
4571
  } catch (manifestError) {
3662
- logger6.error("[KNOWLEDGE PANEL] Error reading manifest:", manifestError);
4572
+ logger9.error("[Document Processor] \u274C Error reading manifest:", manifestError);
3663
4573
  }
3664
4574
  }
3665
- logger6.debug(`[KNOWLEDGE PANEL] Using fallback with CSS: ${cssFile}, JS: ${jsFile}`);
4575
+ logger9.debug(`[Document Processor] \u{1F310} Using fallback with CSS: ${cssFile}, JS: ${jsFile}`);
3666
4576
  const html = `
3667
4577
  <!DOCTYPE html>
3668
4578
  <html lang="en">
@@ -3696,16 +4606,14 @@ async function knowledgePanelHandler(req, res, runtime) {
3696
4606
  res.end(html);
3697
4607
  }
3698
4608
  } catch (error) {
3699
- logger6.error("[KNOWLEDGE PANEL] Error serving frontend:", error);
4609
+ logger9.error("[Document Processor] \u274C Error serving frontend:", error);
3700
4610
  sendError(res, 500, "FRONTEND_ERROR", "Failed to load knowledge panel", error.message);
3701
4611
  }
3702
4612
  }
3703
4613
  async function frontendAssetHandler(req, res, runtime) {
3704
4614
  try {
3705
- logger6.debug(
3706
- `[KNOWLEDGE ASSET HANDLER] Called with req.path: ${req.path}, req.originalUrl: ${req.originalUrl}, req.params: ${JSON.stringify(req.params)}`
3707
- );
3708
- const currentDir = path3.dirname(new URL(import.meta.url).pathname);
4615
+ logger9.debug(`[Document Processor] \u{1F310} Asset request: ${req.path}`);
4616
+ const currentDir = path4.dirname(new URL(import.meta.url).pathname);
3709
4617
  const assetRequestPath = req.path;
3710
4618
  const assetsMarker = "/assets/";
3711
4619
  const assetsStartIndex = assetRequestPath.indexOf(assetsMarker);
@@ -3721,10 +4629,10 @@ async function frontendAssetHandler(req, res, runtime) {
3721
4629
  `Invalid asset name: '${assetName}' from path ${assetRequestPath}`
3722
4630
  );
3723
4631
  }
3724
- const assetPath = path3.join(currentDir, "../dist/assets", assetName);
3725
- logger6.debug(`[KNOWLEDGE ASSET HANDLER] Attempting to serve asset: ${assetPath}`);
3726
- if (fs3.existsSync(assetPath)) {
3727
- const fileStream = fs3.createReadStream(assetPath);
4632
+ const assetPath = path4.join(currentDir, "../dist/assets", assetName);
4633
+ logger9.debug(`[Document Processor] \u{1F310} Serving asset: ${assetPath}`);
4634
+ if (fs4.existsSync(assetPath)) {
4635
+ const fileStream = fs4.createReadStream(assetPath);
3728
4636
  let contentType = "application/octet-stream";
3729
4637
  if (assetPath.endsWith(".js")) {
3730
4638
  contentType = "application/javascript";
@@ -3737,7 +4645,7 @@ async function frontendAssetHandler(req, res, runtime) {
3737
4645
  sendError(res, 404, "NOT_FOUND", `Asset not found: ${req.url}`);
3738
4646
  }
3739
4647
  } catch (error) {
3740
- logger6.error(`[KNOWLEDGE ASSET HANDLER] Error serving asset ${req.url}:`, error);
4648
+ logger9.error(`[Document Processor] \u274C Error serving asset ${req.url}:`, error);
3741
4649
  sendError(res, 500, "ASSET_ERROR", `Failed to load asset ${req.url}`, error.message);
3742
4650
  }
3743
4651
  }
@@ -3751,8 +4659,8 @@ async function getKnowledgeChunksHandler(req, res, runtime) {
3751
4659
  const documentsOnly = req.query.documentsOnly === "true";
3752
4660
  const documents = await service.getMemories({
3753
4661
  tableName: "documents",
3754
- count: 1e3,
3755
- // Reasonable limit for documents
4662
+ count: 1e4,
4663
+ // High limit to get all documents
3756
4664
  end: Date.now()
3757
4665
  });
3758
4666
  if (documentsOnly) {
@@ -3798,7 +4706,7 @@ async function getKnowledgeChunksHandler(req, res, runtime) {
3798
4706
  }
3799
4707
  });
3800
4708
  } catch (error) {
3801
- logger6.error("[KNOWLEDGE CHUNKS GET HANDLER] Error retrieving chunks:", error);
4709
+ logger9.error("[Document Processor] \u274C Error retrieving chunks:", error);
3802
4710
  sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve knowledge chunks", error.message);
3803
4711
  }
3804
4712
  }
@@ -3820,15 +4728,15 @@ async function searchKnowledgeHandler(req, res, runtime) {
3820
4728
  return sendError(res, 400, "INVALID_QUERY", "Search query cannot be empty");
3821
4729
  }
3822
4730
  if (req.query.threshold && (parsedThreshold < 0 || parsedThreshold > 1)) {
3823
- logger6.debug(
3824
- `[KNOWLEDGE SEARCH] Threshold value ${parsedThreshold} was clamped to ${matchThreshold}`
4731
+ logger9.debug(
4732
+ `[Document Processor] \u{1F50D} Threshold value ${parsedThreshold} was clamped to ${matchThreshold}`
3825
4733
  );
3826
4734
  }
3827
4735
  if (req.query.limit && (parsedLimit < 1 || parsedLimit > 100)) {
3828
- logger6.debug(`[KNOWLEDGE SEARCH] Limit value ${parsedLimit} was clamped to ${limit}`);
4736
+ logger9.debug(`[Document Processor] \u{1F50D} Limit value ${parsedLimit} was clamped to ${limit}`);
3829
4737
  }
3830
- logger6.debug(
3831
- `[KNOWLEDGE SEARCH] Searching for: "${searchText}" with threshold: ${matchThreshold}, limit: ${limit}`
4738
+ logger9.debug(
4739
+ `[Document Processor] \u{1F50D} Searching: "${searchText}" (threshold: ${matchThreshold}, limit: ${limit})`
3832
4740
  );
3833
4741
  const embedding = await runtime.useModel(ModelType4.TEXT_EMBEDDING, {
3834
4742
  text: searchText
@@ -3854,7 +4762,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
3854
4762
  documentFilename = document.metadata.filename || documentFilename;
3855
4763
  }
3856
4764
  } catch (e) {
3857
- logger6.debug(`Could not fetch document ${documentId} for fragment`);
4765
+ logger9.debug(`Could not fetch document ${documentId} for fragment`);
3858
4766
  }
3859
4767
  }
3860
4768
  return {
@@ -3869,8 +4777,8 @@ async function searchKnowledgeHandler(req, res, runtime) {
3869
4777
  };
3870
4778
  })
3871
4779
  );
3872
- logger6.info(
3873
- `[KNOWLEDGE SEARCH] Found ${enhancedResults.length} results for query: "${searchText}"`
4780
+ logger9.info(
4781
+ `[Document Processor] \u{1F50D} Found ${enhancedResults.length} results for: "${searchText}"`
3874
4782
  );
3875
4783
  sendSuccess(res, {
3876
4784
  query: searchText,
@@ -3879,7 +4787,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
3879
4787
  count: enhancedResults.length
3880
4788
  });
3881
4789
  } catch (error) {
3882
- logger6.error("[KNOWLEDGE SEARCH] Error searching knowledge:", error);
4790
+ logger9.error("[Document Processor] \u274C Error searching knowledge:", error);
3883
4791
  sendError(res, 500, "SEARCH_ERROR", "Failed to search knowledge", error.message);
3884
4792
  }
3885
4793
  }
@@ -3891,7 +4799,7 @@ async function uploadKnowledgeWithMulter(req, res, runtime) {
3891
4799
  );
3892
4800
  uploadArray(req, res, (err) => {
3893
4801
  if (err) {
3894
- logger6.error("[KNOWLEDGE UPLOAD] Multer error:", err);
4802
+ logger9.error("[Document Processor] \u274C File upload error:", err);
3895
4803
  return sendError(res, 400, "UPLOAD_ERROR", err.message);
3896
4804
  }
3897
4805
  uploadKnowledgeHandler(req, res, runtime);
@@ -3946,70 +4854,6 @@ var knowledgeRoutes = [
3946
4854
  var knowledgePlugin = {
3947
4855
  name: "knowledge",
3948
4856
  description: "Plugin for Retrieval Augmented Generation, including knowledge management and embedding.",
3949
- config: {
3950
- // Token limits - these will be read from runtime settings during init
3951
- MAX_INPUT_TOKENS: "4000",
3952
- MAX_OUTPUT_TOKENS: "4096",
3953
- // Contextual Knowledge settings
3954
- CTX_KNOWLEDGE_ENABLED: "false"
3955
- },
3956
- async init(config, runtime) {
3957
- logger7.info("Initializing Knowledge Plugin...");
3958
- try {
3959
- logger7.info("Validating model configuration for Knowledge plugin...");
3960
- const validatedConfig = validateModelConfig(runtime);
3961
- if (validatedConfig.CTX_KNOWLEDGE_ENABLED) {
3962
- logger7.info("Running in Contextual Knowledge mode with text generation capabilities.");
3963
- logger7.info(
3964
- `Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings and ${validatedConfig.TEXT_PROVIDER} for text generation.`
3965
- );
3966
- } else {
3967
- const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
3968
- if (usingPluginOpenAI) {
3969
- logger7.info(
3970
- "Running in Basic Embedding mode with auto-detected configuration from plugin-openai."
3971
- );
3972
- } else {
3973
- logger7.info(
3974
- "Running in Basic Embedding mode (CTX_KNOWLEDGE_ENABLED=false). TEXT_PROVIDER and TEXT_MODEL not required."
3975
- );
3976
- }
3977
- logger7.info(
3978
- `Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}.`
3979
- );
3980
- }
3981
- logger7.info("Model configuration validated successfully.");
3982
- if (runtime) {
3983
- logger7.info(`Knowledge Plugin initialized for agent: ${runtime.agentId}`);
3984
- const loadDocsOnStartup = config.LOAD_DOCS_ON_STARTUP === "true" || process.env.LOAD_DOCS_ON_STARTUP === "true";
3985
- if (loadDocsOnStartup) {
3986
- logger7.info("LOAD_DOCS_ON_STARTUP is enabled. Scheduling document loading...");
3987
- setTimeout(async () => {
3988
- try {
3989
- const service = runtime.getService(KnowledgeService.serviceType);
3990
- if (service instanceof KnowledgeService) {
3991
- const { loadDocsFromPath: loadDocsFromPath2 } = await import("./docs-loader-PF5X4UMB.js");
3992
- const result = await loadDocsFromPath2(service, runtime.agentId);
3993
- if (result.successful > 0) {
3994
- logger7.info(`Loaded ${result.successful} documents from docs folder on startup`);
3995
- }
3996
- }
3997
- } catch (error) {
3998
- logger7.error("Error loading documents on startup:", error);
3999
- }
4000
- }, 5e3);
4001
- } else {
4002
- logger7.info("LOAD_DOCS_ON_STARTUP is not enabled. Skipping automatic document loading.");
4003
- }
4004
- }
4005
- logger7.info(
4006
- "Knowledge Plugin initialized. Frontend panel should be discoverable via its public route."
4007
- );
4008
- } catch (error) {
4009
- logger7.error("Failed to initialize Knowledge plugin:", error);
4010
- throw error;
4011
- }
4012
- },
4013
4857
  services: [KnowledgeService],
4014
4858
  providers: [knowledgeProvider],
4015
4859
  routes: knowledgeRoutes,