@elizaos/plugin-knowledge 1.0.9 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -47
- package/dist/.vite/manifest.json +2 -2
- package/dist/assets/index-CzI8hR5q.css +1 -0
- package/dist/assets/index-DimDNB3w.js +160 -0
- package/dist/{chunk-QH7GBNKB.js → chunk-RFXW7QQK.js} +9 -3
- package/dist/chunk-RFXW7QQK.js.map +1 -0
- package/dist/{docs-loader-5INCF4VJ.js → docs-loader-5H4HRYEE.js} +2 -2
- package/dist/index.d.ts +0 -3
- package/dist/index.html +2 -2
- package/dist/index.js +576 -386
- package/dist/index.js.map +1 -1
- package/package.json +8 -6
- package/dist/assets/index-BIGrGyiB.css +0 -1
- package/dist/assets/index-BdW2hLiy.js +0 -165
- package/dist/chunk-QH7GBNKB.js.map +0 -1
- /package/dist/{docs-loader-5INCF4VJ.js.map → docs-loader-5H4HRYEE.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -8,10 +8,10 @@ import {
|
|
|
8
8
|
looksLikeBase64,
|
|
9
9
|
normalizeS3Url,
|
|
10
10
|
v4_default
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-RFXW7QQK.js";
|
|
12
12
|
|
|
13
13
|
// src/index.ts
|
|
14
|
-
import { logger as
|
|
14
|
+
import { logger as logger8 } from "@elizaos/core";
|
|
15
15
|
|
|
16
16
|
// src/types.ts
|
|
17
17
|
import z from "zod";
|
|
@@ -60,18 +60,24 @@ function validateModelConfig(runtime) {
|
|
|
60
60
|
}
|
|
61
61
|
return process.env[key] || defaultValue;
|
|
62
62
|
};
|
|
63
|
-
const
|
|
64
|
-
|
|
63
|
+
const ctxKnowledgeEnabledSetting = getSetting("CTX_KNOWLEDGE_ENABLED");
|
|
64
|
+
const cleanSetting = ctxKnowledgeEnabledSetting?.toString().trim().toLowerCase();
|
|
65
|
+
const ctxKnowledgeEnabled = cleanSetting === "true";
|
|
66
|
+
logger.debug(
|
|
67
|
+
`[Document Processor] CTX_KNOWLEDGE_ENABLED: '${ctxKnowledgeEnabledSetting}' \u2192 ${ctxKnowledgeEnabled} (runtime: ${!!runtime})`
|
|
68
|
+
);
|
|
65
69
|
const embeddingProvider = getSetting("EMBEDDING_PROVIDER");
|
|
66
70
|
const assumePluginOpenAI = !embeddingProvider;
|
|
67
71
|
if (assumePluginOpenAI) {
|
|
68
72
|
const openaiApiKey2 = getSetting("OPENAI_API_KEY");
|
|
69
73
|
const openaiEmbeddingModel = getSetting("OPENAI_EMBEDDING_MODEL");
|
|
70
74
|
if (openaiApiKey2 && openaiEmbeddingModel) {
|
|
71
|
-
logger.debug(
|
|
75
|
+
logger.debug(
|
|
76
|
+
"[Document Processor] EMBEDDING_PROVIDER not specified, using configuration from plugin-openai"
|
|
77
|
+
);
|
|
72
78
|
} else {
|
|
73
79
|
logger.debug(
|
|
74
|
-
"EMBEDDING_PROVIDER not specified. Assuming embeddings are provided by another plugin (e.g., plugin-google-genai)."
|
|
80
|
+
"[Document Processor] EMBEDDING_PROVIDER not specified. Assuming embeddings are provided by another plugin (e.g., plugin-google-genai)."
|
|
75
81
|
);
|
|
76
82
|
}
|
|
77
83
|
}
|
|
@@ -95,7 +101,7 @@ function validateModelConfig(runtime) {
|
|
|
95
101
|
MAX_INPUT_TOKENS: getSetting("MAX_INPUT_TOKENS", "4000"),
|
|
96
102
|
MAX_OUTPUT_TOKENS: getSetting("MAX_OUTPUT_TOKENS", "4096"),
|
|
97
103
|
EMBEDDING_DIMENSION: embeddingDimension,
|
|
98
|
-
CTX_KNOWLEDGE_ENABLED:
|
|
104
|
+
CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled
|
|
99
105
|
});
|
|
100
106
|
validateConfigRequirements(config, assumePluginOpenAI);
|
|
101
107
|
return config;
|
|
@@ -116,13 +122,15 @@ function validateConfigRequirements(config, assumePluginOpenAI) {
|
|
|
116
122
|
throw new Error('GOOGLE_API_KEY is required when EMBEDDING_PROVIDER is set to "google"');
|
|
117
123
|
}
|
|
118
124
|
if (!embeddingProvider) {
|
|
119
|
-
logger.debug(
|
|
125
|
+
logger.debug(
|
|
126
|
+
"[Document Processor] No EMBEDDING_PROVIDER specified. Embeddings will be handled by the runtime."
|
|
127
|
+
);
|
|
120
128
|
}
|
|
121
129
|
if (assumePluginOpenAI && config.OPENAI_API_KEY && !config.TEXT_EMBEDDING_MODEL) {
|
|
122
130
|
throw new Error("OPENAI_EMBEDDING_MODEL is required when using plugin-openai configuration");
|
|
123
131
|
}
|
|
124
132
|
if (config.CTX_KNOWLEDGE_ENABLED) {
|
|
125
|
-
logger.debug("
|
|
133
|
+
logger.debug("[Document Processor] CTX validation: Checking text generation settings...");
|
|
126
134
|
if (config.TEXT_PROVIDER === "openai" && !config.OPENAI_API_KEY) {
|
|
127
135
|
throw new Error('OPENAI_API_KEY is required when TEXT_PROVIDER is set to "openai"');
|
|
128
136
|
}
|
|
@@ -139,17 +147,21 @@ function validateConfigRequirements(config, assumePluginOpenAI) {
|
|
|
139
147
|
const modelName = config.TEXT_MODEL?.toLowerCase() || "";
|
|
140
148
|
if (modelName.includes("claude") || modelName.includes("gemini")) {
|
|
141
149
|
logger.debug(
|
|
142
|
-
`Using ${modelName} with OpenRouter. This configuration supports document caching for improved performance.`
|
|
150
|
+
`[Document Processor] Using ${modelName} with OpenRouter. This configuration supports document caching for improved performance.`
|
|
143
151
|
);
|
|
144
152
|
}
|
|
145
153
|
}
|
|
146
154
|
} else {
|
|
155
|
+
logger.info("[Document Processor] Contextual Knowledge is DISABLED!");
|
|
156
|
+
logger.info("[Document Processor] This means documents will NOT be enriched with context.");
|
|
147
157
|
if (assumePluginOpenAI) {
|
|
148
|
-
logger.
|
|
149
|
-
"
|
|
158
|
+
logger.info(
|
|
159
|
+
"[Document Processor] Embeddings will be handled by the runtime (e.g., plugin-openai, plugin-google-genai)."
|
|
150
160
|
);
|
|
151
161
|
} else {
|
|
152
|
-
logger.
|
|
162
|
+
logger.info(
|
|
163
|
+
"[Document Processor] Using configured embedding provider for basic embeddings only."
|
|
164
|
+
);
|
|
153
165
|
}
|
|
154
166
|
}
|
|
155
167
|
}
|
|
@@ -164,7 +176,18 @@ async function getProviderRateLimits(runtime) {
|
|
|
164
176
|
const maxConcurrentRequests = parseInt(getSetting("MAX_CONCURRENT_REQUESTS", "30"), 10);
|
|
165
177
|
const requestsPerMinute = parseInt(getSetting("REQUESTS_PER_MINUTE", "60"), 10);
|
|
166
178
|
const tokensPerMinute = parseInt(getSetting("TOKENS_PER_MINUTE", "150000"), 10);
|
|
167
|
-
|
|
179
|
+
const primaryProvider = config.TEXT_PROVIDER || config.EMBEDDING_PROVIDER;
|
|
180
|
+
logger.debug(
|
|
181
|
+
`[Document Processor] Rate limiting for ${primaryProvider}: ${requestsPerMinute} RPM, ${tokensPerMinute} TPM, ${maxConcurrentRequests} concurrent`
|
|
182
|
+
);
|
|
183
|
+
switch (primaryProvider) {
|
|
184
|
+
case "anthropic":
|
|
185
|
+
return {
|
|
186
|
+
maxConcurrentRequests,
|
|
187
|
+
requestsPerMinute,
|
|
188
|
+
tokensPerMinute,
|
|
189
|
+
provider: "anthropic"
|
|
190
|
+
};
|
|
168
191
|
case "openai":
|
|
169
192
|
return {
|
|
170
193
|
maxConcurrentRequests,
|
|
@@ -184,7 +207,7 @@ async function getProviderRateLimits(runtime) {
|
|
|
184
207
|
maxConcurrentRequests,
|
|
185
208
|
requestsPerMinute,
|
|
186
209
|
tokensPerMinute,
|
|
187
|
-
provider:
|
|
210
|
+
provider: primaryProvider || "unknown"
|
|
188
211
|
};
|
|
189
212
|
}
|
|
190
213
|
}
|
|
@@ -405,9 +428,7 @@ Create an enriched version of this chunk by adding critical surrounding context.
|
|
|
405
428
|
Provide ONLY the enriched chunk text in your response:`;
|
|
406
429
|
function getContextualizationPrompt(docContent, chunkContent, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS, promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE) {
|
|
407
430
|
if (!docContent || !chunkContent) {
|
|
408
|
-
console.warn(
|
|
409
|
-
"Document content or chunk content is missing for contextualization."
|
|
410
|
-
);
|
|
431
|
+
console.warn("Document content or chunk content is missing for contextualization.");
|
|
411
432
|
return "Error: Document or chunk content missing.";
|
|
412
433
|
}
|
|
413
434
|
const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN);
|
|
@@ -478,15 +499,8 @@ function getPromptForMimeType(mimeType, docContent, chunkContent) {
|
|
|
478
499
|
minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
|
|
479
500
|
maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
|
|
480
501
|
promptTemplate = TECHNICAL_PROMPT_TEMPLATE;
|
|
481
|
-
console.debug("Using technical documentation prompt template");
|
|
482
502
|
}
|
|
483
|
-
return getContextualizationPrompt(
|
|
484
|
-
docContent,
|
|
485
|
-
chunkContent,
|
|
486
|
-
minTokens,
|
|
487
|
-
maxTokens,
|
|
488
|
-
promptTemplate
|
|
489
|
-
);
|
|
503
|
+
return getContextualizationPrompt(docContent, chunkContent, minTokens, maxTokens, promptTemplate);
|
|
490
504
|
}
|
|
491
505
|
function getCachingPromptForMimeType(mimeType, chunkContent) {
|
|
492
506
|
let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS;
|
|
@@ -506,12 +520,7 @@ function getCachingPromptForMimeType(mimeType, chunkContent) {
|
|
|
506
520
|
minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS;
|
|
507
521
|
maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS;
|
|
508
522
|
}
|
|
509
|
-
return getCachingContextualizationPrompt(
|
|
510
|
-
chunkContent,
|
|
511
|
-
mimeType,
|
|
512
|
-
minTokens,
|
|
513
|
-
maxTokens
|
|
514
|
-
);
|
|
523
|
+
return getCachingContextualizationPrompt(chunkContent, mimeType, minTokens, maxTokens);
|
|
515
524
|
}
|
|
516
525
|
function containsMathematicalContent(content) {
|
|
517
526
|
const latexMathPatterns = [
|
|
@@ -575,9 +584,7 @@ function containsMathematicalContent(content) {
|
|
|
575
584
|
"coefficient"
|
|
576
585
|
];
|
|
577
586
|
const contentLower = content.toLowerCase();
|
|
578
|
-
const mathKeywordCount = mathKeywords.filter(
|
|
579
|
-
(keyword) => contentLower.includes(keyword)
|
|
580
|
-
).length;
|
|
587
|
+
const mathKeywordCount = mathKeywords.filter((keyword) => contentLower.includes(keyword)).length;
|
|
581
588
|
return mathKeywordCount >= 2;
|
|
582
589
|
}
|
|
583
590
|
function isTechnicalDocumentation(content) {
|
|
@@ -626,9 +633,7 @@ function isTechnicalDocumentation(content) {
|
|
|
626
633
|
}
|
|
627
634
|
function getChunkWithContext(chunkContent, generatedContext) {
|
|
628
635
|
if (!generatedContext || generatedContext.trim() === "") {
|
|
629
|
-
console.warn(
|
|
630
|
-
"Generated context is empty. Falling back to original chunk content."
|
|
631
|
-
);
|
|
636
|
+
console.warn("Generated context is empty. Falling back to original chunk content.");
|
|
632
637
|
return chunkContent;
|
|
633
638
|
}
|
|
634
639
|
return generatedContext.trim();
|
|
@@ -669,7 +674,7 @@ async function generateText(prompt, system, overrideConfig) {
|
|
|
669
674
|
throw new Error(`Unsupported text provider: ${provider}`);
|
|
670
675
|
}
|
|
671
676
|
} catch (error) {
|
|
672
|
-
logger2.error(`[
|
|
677
|
+
logger2.error(`[Document Processor] ${provider} ${modelName} error:`, error);
|
|
673
678
|
throw error;
|
|
674
679
|
}
|
|
675
680
|
}
|
|
@@ -680,17 +685,35 @@ async function generateAnthropicText(prompt, system, modelName, maxTokens) {
|
|
|
680
685
|
baseURL: config.ANTHROPIC_BASE_URL
|
|
681
686
|
});
|
|
682
687
|
const modelInstance = anthropic(modelName);
|
|
683
|
-
const
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
688
|
+
const maxRetries = 3;
|
|
689
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
690
|
+
try {
|
|
691
|
+
const result = await aiGenerateText({
|
|
692
|
+
model: modelInstance,
|
|
693
|
+
prompt,
|
|
694
|
+
system,
|
|
695
|
+
temperature: 0.3,
|
|
696
|
+
maxTokens
|
|
697
|
+
});
|
|
698
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
699
|
+
logger2.debug(
|
|
700
|
+
`[Document Processor] ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
701
|
+
);
|
|
702
|
+
return result;
|
|
703
|
+
} catch (error) {
|
|
704
|
+
const isRateLimit = error?.status === 429 || error?.message?.includes("rate limit") || error?.message?.includes("429");
|
|
705
|
+
if (isRateLimit && attempt < maxRetries - 1) {
|
|
706
|
+
const delay = Math.pow(2, attempt + 1) * 1e3;
|
|
707
|
+
logger2.warn(
|
|
708
|
+
`[Document Processor] Rate limit hit (${modelName}): attempt ${attempt + 1}/${maxRetries}, retrying in ${Math.round(delay / 1e3)}s`
|
|
709
|
+
);
|
|
710
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
711
|
+
continue;
|
|
712
|
+
}
|
|
713
|
+
throw error;
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
throw new Error("Max retries exceeded for Anthropic text generation");
|
|
694
717
|
}
|
|
695
718
|
async function generateOpenAIText(prompt, system, modelName, maxTokens) {
|
|
696
719
|
const config = validateModelConfig();
|
|
@@ -706,8 +729,9 @@ async function generateOpenAIText(prompt, system, modelName, maxTokens) {
|
|
|
706
729
|
temperature: 0.3,
|
|
707
730
|
maxTokens
|
|
708
731
|
});
|
|
732
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
709
733
|
logger2.debug(
|
|
710
|
-
`[
|
|
734
|
+
`[Document Processor] OpenAI ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
711
735
|
);
|
|
712
736
|
return result;
|
|
713
737
|
}
|
|
@@ -724,8 +748,9 @@ async function generateGoogleText(prompt, system, modelName, maxTokens, config)
|
|
|
724
748
|
temperature: 0.3,
|
|
725
749
|
maxTokens
|
|
726
750
|
});
|
|
751
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
727
752
|
logger2.debug(
|
|
728
|
-
`[
|
|
753
|
+
`[Document Processor] Google ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
729
754
|
);
|
|
730
755
|
return result;
|
|
731
756
|
}
|
|
@@ -746,7 +771,7 @@ async function generateOpenRouterText(prompt, system, modelName, maxTokens, cach
|
|
|
746
771
|
if (docMatch && docMatch[1]) {
|
|
747
772
|
documentForCaching = docMatch[1].trim();
|
|
748
773
|
logger2.debug(
|
|
749
|
-
`[
|
|
774
|
+
`[Document Processor] Auto-detected document for caching (${documentForCaching.length} chars)`
|
|
750
775
|
);
|
|
751
776
|
}
|
|
752
777
|
}
|
|
@@ -777,13 +802,11 @@ async function generateOpenRouterText(prompt, system, modelName, maxTokens, cach
|
|
|
777
802
|
);
|
|
778
803
|
}
|
|
779
804
|
}
|
|
780
|
-
logger2.debug("[
|
|
805
|
+
logger2.debug("[Document Processor] Using standard request without caching");
|
|
781
806
|
return await generateStandardOpenRouterText(prompt, system, modelInstance, modelName, maxTokens);
|
|
782
807
|
}
|
|
783
808
|
async function generateClaudeWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching) {
|
|
784
|
-
logger2.debug(
|
|
785
|
-
`[LLM Service - OpenRouter] Using explicit prompt caching with Claude model ${modelName}`
|
|
786
|
-
);
|
|
809
|
+
logger2.debug(`[Document Processor] Using explicit prompt caching with Claude ${modelName}`);
|
|
787
810
|
const messages = [
|
|
788
811
|
// System message with cached document (if system is provided)
|
|
789
812
|
system ? {
|
|
@@ -835,7 +858,7 @@ async function generateClaudeWithCaching(promptText, system, modelInstance, mode
|
|
|
835
858
|
]
|
|
836
859
|
} : null
|
|
837
860
|
].filter(Boolean);
|
|
838
|
-
logger2.debug("[
|
|
861
|
+
logger2.debug("[Document Processor] Using Claude-specific caching structure");
|
|
839
862
|
const result = await aiGenerateText({
|
|
840
863
|
model: modelInstance,
|
|
841
864
|
messages,
|
|
@@ -850,8 +873,9 @@ async function generateClaudeWithCaching(promptText, system, modelInstance, mode
|
|
|
850
873
|
}
|
|
851
874
|
});
|
|
852
875
|
logCacheMetrics(result);
|
|
876
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
853
877
|
logger2.debug(
|
|
854
|
-
`[
|
|
878
|
+
`[Document Processor] OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
855
879
|
);
|
|
856
880
|
return result;
|
|
857
881
|
}
|
|
@@ -861,27 +885,23 @@ async function generateGeminiWithCaching(promptText, system, modelInstance, mode
|
|
|
861
885
|
const minTokensForImplicitCache = modelName.toLowerCase().includes("flash") ? 1028 : 2048;
|
|
862
886
|
const likelyTriggersCaching = estimatedDocTokens >= minTokensForImplicitCache;
|
|
863
887
|
if (usingImplicitCaching) {
|
|
888
|
+
logger2.debug(`[Document Processor] Using Gemini 2.5 implicit caching with ${modelName}`);
|
|
864
889
|
logger2.debug(
|
|
865
|
-
`[
|
|
866
|
-
);
|
|
867
|
-
logger2.debug(
|
|
868
|
-
`[LLM Service - OpenRouter] Gemini 2.5 models automatically cache large prompts (no cache_control needed)`
|
|
890
|
+
`[Document Processor] Gemini 2.5 models automatically cache large prompts (no cache_control needed)`
|
|
869
891
|
);
|
|
870
892
|
if (likelyTriggersCaching) {
|
|
871
893
|
logger2.debug(
|
|
872
|
-
`[
|
|
894
|
+
`[Document Processor] Document ~${estimatedDocTokens} tokens exceeds ${minTokensForImplicitCache} token threshold for caching`
|
|
873
895
|
);
|
|
874
896
|
} else {
|
|
875
897
|
logger2.debug(
|
|
876
|
-
`[
|
|
898
|
+
`[Document Processor] Document ~${estimatedDocTokens} tokens may not meet ${minTokensForImplicitCache} token threshold for caching`
|
|
877
899
|
);
|
|
878
900
|
}
|
|
879
901
|
} else {
|
|
902
|
+
logger2.debug(`[Document Processor] Using standard prompt format with Gemini ${modelName}`);
|
|
880
903
|
logger2.debug(
|
|
881
|
-
`[
|
|
882
|
-
);
|
|
883
|
-
logger2.debug(
|
|
884
|
-
`[LLM Service - OpenRouter] Note: Only Gemini 2.5 models support automatic implicit caching`
|
|
904
|
+
`[Document Processor] Note: Only Gemini 2.5 models support automatic implicit caching`
|
|
885
905
|
);
|
|
886
906
|
}
|
|
887
907
|
const geminiSystemPrefix = system ? `${system}
|
|
@@ -905,8 +925,10 @@ ${promptText}`;
|
|
|
905
925
|
}
|
|
906
926
|
});
|
|
907
927
|
logCacheMetrics(result);
|
|
928
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
929
|
+
const cachingType = usingImplicitCaching ? "implicit" : "standard";
|
|
908
930
|
logger2.debug(
|
|
909
|
-
`[
|
|
931
|
+
`[Document Processor] OpenRouter ${modelName} (${cachingType} caching): ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
910
932
|
);
|
|
911
933
|
return result;
|
|
912
934
|
}
|
|
@@ -926,21 +948,44 @@ async function generateStandardOpenRouterText(prompt, system, modelInstance, mod
|
|
|
926
948
|
}
|
|
927
949
|
}
|
|
928
950
|
});
|
|
951
|
+
const totalTokens = result.usage.promptTokens + result.usage.completionTokens;
|
|
929
952
|
logger2.debug(
|
|
930
|
-
`[
|
|
953
|
+
`[Document Processor] OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})`
|
|
931
954
|
);
|
|
932
955
|
return result;
|
|
933
956
|
}
|
|
934
957
|
function logCacheMetrics(result) {
|
|
935
958
|
if (result.usage && result.usage.cacheTokens) {
|
|
936
959
|
logger2.debug(
|
|
937
|
-
`[
|
|
960
|
+
`[Document Processor] Cache metrics - tokens: ${result.usage.cacheTokens}, discount: ${result.usage.cacheDiscount}`
|
|
938
961
|
);
|
|
939
962
|
}
|
|
940
963
|
}
|
|
941
964
|
|
|
942
965
|
// src/document-processor.ts
|
|
943
|
-
|
|
966
|
+
function estimateTokens(text) {
|
|
967
|
+
return Math.ceil(text.length / 4);
|
|
968
|
+
}
|
|
969
|
+
function getCtxKnowledgeEnabled(runtime) {
|
|
970
|
+
let result;
|
|
971
|
+
let source;
|
|
972
|
+
let rawValue;
|
|
973
|
+
if (runtime) {
|
|
974
|
+
rawValue = runtime.getSetting("CTX_KNOWLEDGE_ENABLED");
|
|
975
|
+
const cleanValue = rawValue?.toString().trim().toLowerCase();
|
|
976
|
+
result = cleanValue === "true";
|
|
977
|
+
source = "runtime.getSetting()";
|
|
978
|
+
} else {
|
|
979
|
+
rawValue = process.env.CTX_KNOWLEDGE_ENABLED;
|
|
980
|
+
const cleanValue = rawValue?.toString().trim().toLowerCase();
|
|
981
|
+
result = cleanValue === "true";
|
|
982
|
+
source = "process.env";
|
|
983
|
+
}
|
|
984
|
+
if (process.env.NODE_ENV === "development" && rawValue && !result) {
|
|
985
|
+
logger3.debug(`[Document Processor] CTX config mismatch - ${source}: '${rawValue}' \u2192 ${result}`);
|
|
986
|
+
}
|
|
987
|
+
return result;
|
|
988
|
+
}
|
|
944
989
|
function shouldUseCustomLLM() {
|
|
945
990
|
const textProvider = process.env.TEXT_PROVIDER;
|
|
946
991
|
const textModel = process.env.TEXT_MODEL;
|
|
@@ -961,18 +1006,6 @@ function shouldUseCustomLLM() {
|
|
|
961
1006
|
}
|
|
962
1007
|
}
|
|
963
1008
|
var useCustomLLM = shouldUseCustomLLM();
|
|
964
|
-
if (ctxKnowledgeEnabled) {
|
|
965
|
-
logger3.info(`Document processor starting with Contextual Knowledge ENABLED`);
|
|
966
|
-
if (useCustomLLM) {
|
|
967
|
-
logger3.info(
|
|
968
|
-
`Using Custom LLM with provider: ${process.env.TEXT_PROVIDER}, model: ${process.env.TEXT_MODEL}`
|
|
969
|
-
);
|
|
970
|
-
} else {
|
|
971
|
-
logger3.info(`Using ElizaOS Runtime LLM (default behavior)`);
|
|
972
|
-
}
|
|
973
|
-
} else {
|
|
974
|
-
logger3.info(`Document processor starting with Contextual Knowledge DISABLED`);
|
|
975
|
-
}
|
|
976
1009
|
async function processFragmentsSynchronously({
|
|
977
1010
|
runtime,
|
|
978
1011
|
documentId,
|
|
@@ -981,7 +1014,8 @@ async function processFragmentsSynchronously({
|
|
|
981
1014
|
contentType,
|
|
982
1015
|
roomId,
|
|
983
1016
|
entityId,
|
|
984
|
-
worldId
|
|
1017
|
+
worldId,
|
|
1018
|
+
documentTitle
|
|
985
1019
|
}) {
|
|
986
1020
|
if (!fullDocumentText || fullDocumentText.trim() === "") {
|
|
987
1021
|
logger3.warn(`No text content available to chunk for document ${documentId}.`);
|
|
@@ -992,10 +1026,17 @@ async function processFragmentsSynchronously({
|
|
|
992
1026
|
logger3.warn(`No chunks generated from text for ${documentId}. No fragments to save.`);
|
|
993
1027
|
return 0;
|
|
994
1028
|
}
|
|
995
|
-
|
|
1029
|
+
const docName = documentTitle || documentId.substring(0, 8);
|
|
1030
|
+
logger3.info(`[Document Processor] "${docName}": Split into ${chunks.length} chunks`);
|
|
996
1031
|
const providerLimits = await getProviderRateLimits();
|
|
997
1032
|
const CONCURRENCY_LIMIT = Math.min(30, providerLimits.maxConcurrentRequests || 30);
|
|
998
|
-
const rateLimiter = createRateLimiter(
|
|
1033
|
+
const rateLimiter = createRateLimiter(
|
|
1034
|
+
providerLimits.requestsPerMinute || 60,
|
|
1035
|
+
providerLimits.tokensPerMinute
|
|
1036
|
+
);
|
|
1037
|
+
logger3.debug(
|
|
1038
|
+
`[Document Processor] Rate limits: ${providerLimits.requestsPerMinute} RPM, ${providerLimits.tokensPerMinute} TPM (${providerLimits.provider}, concurrency: ${CONCURRENCY_LIMIT})`
|
|
1039
|
+
);
|
|
999
1040
|
const { savedCount, failedCount } = await processAndSaveFragments({
|
|
1000
1041
|
runtime,
|
|
1001
1042
|
documentId,
|
|
@@ -1007,14 +1048,27 @@ async function processFragmentsSynchronously({
|
|
|
1007
1048
|
entityId: entityId || agentId,
|
|
1008
1049
|
worldId: worldId || agentId,
|
|
1009
1050
|
concurrencyLimit: CONCURRENCY_LIMIT,
|
|
1010
|
-
rateLimiter
|
|
1051
|
+
rateLimiter,
|
|
1052
|
+
documentTitle
|
|
1011
1053
|
});
|
|
1054
|
+
const successRate = (savedCount / chunks.length * 100).toFixed(1);
|
|
1012
1055
|
if (failedCount > 0) {
|
|
1013
1056
|
logger3.warn(
|
|
1014
|
-
`
|
|
1057
|
+
`[Document Processor] "${docName}": ${failedCount}/${chunks.length} chunks failed processing`
|
|
1015
1058
|
);
|
|
1016
1059
|
}
|
|
1017
|
-
logger3.info(
|
|
1060
|
+
logger3.info(
|
|
1061
|
+
`[Document Processor] "${docName}" complete: ${savedCount}/${chunks.length} fragments saved (${successRate}% success)`
|
|
1062
|
+
);
|
|
1063
|
+
logKnowledgeGenerationSummary({
|
|
1064
|
+
documentId,
|
|
1065
|
+
totalChunks: chunks.length,
|
|
1066
|
+
savedCount,
|
|
1067
|
+
failedCount,
|
|
1068
|
+
successRate: parseFloat(successRate),
|
|
1069
|
+
ctxEnabled: getCtxKnowledgeEnabled(runtime),
|
|
1070
|
+
providerLimits
|
|
1071
|
+
});
|
|
1018
1072
|
return savedCount;
|
|
1019
1073
|
}
|
|
1020
1074
|
async function extractTextFromDocument(fileBuffer, contentType, originalFilename) {
|
|
@@ -1100,7 +1154,8 @@ async function processAndSaveFragments({
|
|
|
1100
1154
|
entityId,
|
|
1101
1155
|
worldId,
|
|
1102
1156
|
concurrencyLimit,
|
|
1103
|
-
rateLimiter
|
|
1157
|
+
rateLimiter,
|
|
1158
|
+
documentTitle
|
|
1104
1159
|
}) {
|
|
1105
1160
|
let savedCount = 0;
|
|
1106
1161
|
let failedCount = 0;
|
|
@@ -1109,14 +1164,15 @@ async function processAndSaveFragments({
|
|
|
1109
1164
|
const batchChunks = chunks.slice(i, i + concurrencyLimit);
|
|
1110
1165
|
const batchOriginalIndices = Array.from({ length: batchChunks.length }, (_, k) => i + k);
|
|
1111
1166
|
logger3.debug(
|
|
1112
|
-
`
|
|
1167
|
+
`[Document Processor] Batch ${Math.floor(i / concurrencyLimit) + 1}/${Math.ceil(chunks.length / concurrencyLimit)}: processing ${batchChunks.length} chunks (${batchOriginalIndices[0]}-${batchOriginalIndices[batchOriginalIndices.length - 1]})`
|
|
1113
1168
|
);
|
|
1114
1169
|
const contextualizedChunks = await getContextualizedChunks(
|
|
1115
1170
|
runtime,
|
|
1116
1171
|
fullDocumentText,
|
|
1117
1172
|
batchChunks,
|
|
1118
1173
|
contentType,
|
|
1119
|
-
batchOriginalIndices
|
|
1174
|
+
batchOriginalIndices,
|
|
1175
|
+
documentTitle
|
|
1120
1176
|
);
|
|
1121
1177
|
const embeddingResults = await generateEmbeddingsForChunks(
|
|
1122
1178
|
runtime,
|
|
@@ -1159,9 +1215,12 @@ async function processAndSaveFragments({
|
|
|
1159
1215
|
}
|
|
1160
1216
|
};
|
|
1161
1217
|
await runtime.createMemory(fragmentMemory, "knowledge");
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1218
|
+
if (originalChunkIndex === chunks.length - 1) {
|
|
1219
|
+
const docName = documentTitle || documentId.substring(0, 8);
|
|
1220
|
+
logger3.info(
|
|
1221
|
+
`[Document Processor] "${docName}": All ${chunks.length} chunks processed successfully`
|
|
1222
|
+
);
|
|
1223
|
+
}
|
|
1165
1224
|
savedCount++;
|
|
1166
1225
|
} catch (saveError) {
|
|
1167
1226
|
logger3.error(
|
|
@@ -1199,7 +1258,8 @@ async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLi
|
|
|
1199
1258
|
text: contextualizedChunk.contextualizedText
|
|
1200
1259
|
};
|
|
1201
1260
|
}
|
|
1202
|
-
|
|
1261
|
+
const embeddingTokens = estimateTokens(contextualizedChunk.contextualizedText);
|
|
1262
|
+
await rateLimiter(embeddingTokens);
|
|
1203
1263
|
try {
|
|
1204
1264
|
const generateEmbeddingOperation = async () => {
|
|
1205
1265
|
return await generateEmbeddingWithValidation(
|
|
@@ -1239,37 +1299,50 @@ async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLi
|
|
|
1239
1299
|
})
|
|
1240
1300
|
);
|
|
1241
1301
|
}
|
|
1242
|
-
async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices) {
|
|
1243
|
-
|
|
1244
|
-
|
|
1302
|
+
async function getContextualizedChunks(runtime, fullDocumentText, chunks, contentType, batchOriginalIndices, documentTitle) {
|
|
1303
|
+
const ctxEnabled = getCtxKnowledgeEnabled(runtime);
|
|
1304
|
+
if (batchOriginalIndices[0] === 0) {
|
|
1305
|
+
const docName = documentTitle || "Document";
|
|
1306
|
+
const provider = runtime?.getSetting("TEXT_PROVIDER") || process.env.TEXT_PROVIDER;
|
|
1307
|
+
const model = runtime?.getSetting("TEXT_MODEL") || process.env.TEXT_MODEL;
|
|
1308
|
+
logger3.info(
|
|
1309
|
+
`[Document Processor] "${docName}": CTX enrichment ${ctxEnabled ? "ENABLED" : "DISABLED"}${ctxEnabled ? ` (${provider}/${model})` : ""}`
|
|
1310
|
+
);
|
|
1311
|
+
}
|
|
1312
|
+
if (ctxEnabled && fullDocumentText) {
|
|
1245
1313
|
return await generateContextsInBatch(
|
|
1246
1314
|
runtime,
|
|
1247
1315
|
fullDocumentText,
|
|
1248
1316
|
chunks,
|
|
1249
1317
|
contentType,
|
|
1250
|
-
batchOriginalIndices
|
|
1318
|
+
batchOriginalIndices,
|
|
1319
|
+
documentTitle
|
|
1320
|
+
);
|
|
1321
|
+
} else if (!ctxEnabled && batchOriginalIndices[0] === 0) {
|
|
1322
|
+
logger3.debug(
|
|
1323
|
+
`[Document Processor] To enable CTX: Set CTX_KNOWLEDGE_ENABLED=true and configure TEXT_PROVIDER/TEXT_MODEL`
|
|
1251
1324
|
);
|
|
1252
|
-
} else {
|
|
1253
|
-
return chunks.map((chunkText, idx) => ({
|
|
1254
|
-
contextualizedText: chunkText,
|
|
1255
|
-
index: batchOriginalIndices[idx],
|
|
1256
|
-
success: true
|
|
1257
|
-
}));
|
|
1258
1325
|
}
|
|
1326
|
+
return chunks.map((chunkText, idx) => ({
|
|
1327
|
+
contextualizedText: chunkText,
|
|
1328
|
+
index: batchOriginalIndices[idx],
|
|
1329
|
+
success: true
|
|
1330
|
+
}));
|
|
1259
1331
|
}
|
|
1260
|
-
async function generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchIndices) {
|
|
1261
|
-
console.log("####### generateContextsInBatch FULLL DOCUMENT", fullDocumentText);
|
|
1262
|
-
console.log("####### generateContextsInBatch CHUNKS", chunks);
|
|
1332
|
+
async function generateContextsInBatch(runtime, fullDocumentText, chunks, contentType, batchIndices, documentTitle) {
|
|
1263
1333
|
if (!chunks || chunks.length === 0) {
|
|
1264
1334
|
return [];
|
|
1265
1335
|
}
|
|
1266
1336
|
const providerLimits = await getProviderRateLimits();
|
|
1267
|
-
const rateLimiter = createRateLimiter(
|
|
1337
|
+
const rateLimiter = createRateLimiter(
|
|
1338
|
+
providerLimits.requestsPerMinute || 60,
|
|
1339
|
+
providerLimits.tokensPerMinute
|
|
1340
|
+
);
|
|
1268
1341
|
const config = validateModelConfig();
|
|
1269
1342
|
const isUsingOpenRouter = config.TEXT_PROVIDER === "openrouter";
|
|
1270
1343
|
const isUsingCacheCapableModel = isUsingOpenRouter && (config.TEXT_MODEL?.toLowerCase().includes("claude") || config.TEXT_MODEL?.toLowerCase().includes("gemini"));
|
|
1271
|
-
logger3.
|
|
1272
|
-
`
|
|
1344
|
+
logger3.debug(
|
|
1345
|
+
`[Document Processor] Contextualizing ${chunks.length} chunks with ${config.TEXT_PROVIDER}/${config.TEXT_MODEL} (cache: ${isUsingCacheCapableModel})`
|
|
1273
1346
|
);
|
|
1274
1347
|
const promptConfigs = prepareContextPrompts(
|
|
1275
1348
|
chunks,
|
|
@@ -1287,7 +1360,8 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1287
1360
|
index: item.originalIndex
|
|
1288
1361
|
};
|
|
1289
1362
|
}
|
|
1290
|
-
|
|
1363
|
+
const llmTokens = estimateTokens(item.chunkText + (item.prompt || ""));
|
|
1364
|
+
await rateLimiter(llmTokens);
|
|
1291
1365
|
try {
|
|
1292
1366
|
let llmResponse;
|
|
1293
1367
|
const generateTextOperation = async () => {
|
|
@@ -1320,9 +1394,12 @@ async function generateContextsInBatch(runtime, fullDocumentText, chunks, conten
|
|
|
1320
1394
|
);
|
|
1321
1395
|
const generatedContext = typeof llmResponse === "string" ? llmResponse : llmResponse.text;
|
|
1322
1396
|
const contextualizedText = getChunkWithContext(item.chunkText, generatedContext);
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1397
|
+
if ((item.originalIndex + 1) % Math.max(1, Math.floor(chunks.length / 3)) === 0 || item.originalIndex === chunks.length - 1) {
|
|
1398
|
+
const docName = documentTitle || "Document";
|
|
1399
|
+
logger3.debug(
|
|
1400
|
+
`[Document Processor] "${docName}": Context added for ${item.originalIndex + 1}/${chunks.length} chunks`
|
|
1401
|
+
);
|
|
1402
|
+
}
|
|
1326
1403
|
return {
|
|
1327
1404
|
contextualizedText,
|
|
1328
1405
|
success: true,
|
|
@@ -1441,25 +1518,68 @@ async function withRateLimitRetry(operation, errorContext, retryDelay) {
|
|
|
1441
1518
|
throw error;
|
|
1442
1519
|
}
|
|
1443
1520
|
}
|
|
1444
|
-
function createRateLimiter(requestsPerMinute) {
|
|
1521
|
+
function createRateLimiter(requestsPerMinute, tokensPerMinute) {
|
|
1445
1522
|
const requestTimes = [];
|
|
1523
|
+
const tokenUsage = [];
|
|
1446
1524
|
const intervalMs = 60 * 1e3;
|
|
1447
|
-
return async function rateLimiter() {
|
|
1525
|
+
return async function rateLimiter(estimatedTokens = 1e3) {
|
|
1448
1526
|
const now = Date.now();
|
|
1449
1527
|
while (requestTimes.length > 0 && now - requestTimes[0] > intervalMs) {
|
|
1450
1528
|
requestTimes.shift();
|
|
1451
1529
|
}
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1530
|
+
while (tokenUsage.length > 0 && now - tokenUsage[0].timestamp > intervalMs) {
|
|
1531
|
+
tokenUsage.shift();
|
|
1532
|
+
}
|
|
1533
|
+
const currentTokens = tokenUsage.reduce((sum, usage) => sum + usage.tokens, 0);
|
|
1534
|
+
const requestLimitExceeded = requestTimes.length >= requestsPerMinute;
|
|
1535
|
+
const tokenLimitExceeded = tokensPerMinute && currentTokens + estimatedTokens > tokensPerMinute;
|
|
1536
|
+
if (requestLimitExceeded || tokenLimitExceeded) {
|
|
1537
|
+
let timeToWait = 0;
|
|
1538
|
+
if (requestLimitExceeded) {
|
|
1539
|
+
const oldestRequest = requestTimes[0];
|
|
1540
|
+
timeToWait = Math.max(timeToWait, oldestRequest + intervalMs - now);
|
|
1541
|
+
}
|
|
1542
|
+
if (tokenLimitExceeded && tokenUsage.length > 0) {
|
|
1543
|
+
const oldestTokenUsage = tokenUsage[0];
|
|
1544
|
+
timeToWait = Math.max(timeToWait, oldestTokenUsage.timestamp + intervalMs - now);
|
|
1545
|
+
}
|
|
1455
1546
|
if (timeToWait > 0) {
|
|
1456
|
-
|
|
1547
|
+
const reason = requestLimitExceeded ? "request" : "token";
|
|
1548
|
+
if (timeToWait > 5e3) {
|
|
1549
|
+
logger3.info(
|
|
1550
|
+
`[Document Processor] Rate limiting: waiting ${Math.round(timeToWait / 1e3)}s due to ${reason} limit`
|
|
1551
|
+
);
|
|
1552
|
+
} else {
|
|
1553
|
+
logger3.debug(
|
|
1554
|
+
`[Document Processor] Rate limiting: ${timeToWait}ms wait (${reason} limit)`
|
|
1555
|
+
);
|
|
1556
|
+
}
|
|
1457
1557
|
await new Promise((resolve) => setTimeout(resolve, timeToWait));
|
|
1458
1558
|
}
|
|
1459
1559
|
}
|
|
1460
|
-
requestTimes.push(
|
|
1560
|
+
requestTimes.push(now);
|
|
1561
|
+
if (tokensPerMinute) {
|
|
1562
|
+
tokenUsage.push({ timestamp: now, tokens: estimatedTokens });
|
|
1563
|
+
}
|
|
1461
1564
|
};
|
|
1462
1565
|
}
|
|
1566
|
+
function logKnowledgeGenerationSummary({
|
|
1567
|
+
totalChunks,
|
|
1568
|
+
savedCount,
|
|
1569
|
+
failedCount,
|
|
1570
|
+
ctxEnabled,
|
|
1571
|
+
providerLimits
|
|
1572
|
+
}) {
|
|
1573
|
+
if (failedCount > 0 || process.env.NODE_ENV === "development") {
|
|
1574
|
+
const status = failedCount > 0 ? "PARTIAL" : "SUCCESS";
|
|
1575
|
+
logger3.info(
|
|
1576
|
+
`[Document Processor] ${status}: ${savedCount}/${totalChunks} chunks, CTX: ${ctxEnabled ? "ON" : "OFF"}, Provider: ${providerLimits.provider}`
|
|
1577
|
+
);
|
|
1578
|
+
}
|
|
1579
|
+
if (failedCount > 0) {
|
|
1580
|
+
logger3.warn(`[Document Processor] ${failedCount} chunks failed processing`);
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1463
1583
|
|
|
1464
1584
|
// src/service.ts
|
|
1465
1585
|
var KnowledgeService = class _KnowledgeService extends Service {
|
|
@@ -1588,15 +1708,11 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1588
1708
|
maxChars: 2e3
|
|
1589
1709
|
// Use first 2KB of content for ID generation
|
|
1590
1710
|
});
|
|
1591
|
-
logger4.info(
|
|
1592
|
-
`KnowledgeService processing document for agent: ${agentId}, file: ${options.originalFilename}, type: ${options.contentType}, generated ID: ${contentBasedId}`
|
|
1593
|
-
);
|
|
1711
|
+
logger4.info(`Processing "${options.originalFilename}" (${options.contentType})`);
|
|
1594
1712
|
try {
|
|
1595
1713
|
const existingDocument = await this.runtime.getMemoryById(contentBasedId);
|
|
1596
1714
|
if (existingDocument && existingDocument.metadata?.type === MemoryType2.DOCUMENT) {
|
|
1597
|
-
logger4.info(
|
|
1598
|
-
`Document ${options.originalFilename} with ID ${contentBasedId} already exists. Skipping processing.`
|
|
1599
|
-
);
|
|
1715
|
+
logger4.info(`"${options.originalFilename}" already exists - skipping`);
|
|
1600
1716
|
const fragments = await this.runtime.getMemories({
|
|
1601
1717
|
tableName: "knowledge"
|
|
1602
1718
|
});
|
|
@@ -1742,11 +1858,10 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1742
1858
|
contentType,
|
|
1743
1859
|
roomId: roomId || agentId,
|
|
1744
1860
|
entityId: entityId || agentId,
|
|
1745
|
-
worldId: worldId || agentId
|
|
1861
|
+
worldId: worldId || agentId,
|
|
1862
|
+
documentTitle: originalFilename
|
|
1746
1863
|
});
|
|
1747
|
-
logger4.
|
|
1748
|
-
`KnowledgeService: Document ${originalFilename} processed with ${fragmentCount} fragments for agent ${agentId}`
|
|
1749
|
-
);
|
|
1864
|
+
logger4.debug(`"${originalFilename}" stored with ${fragmentCount} fragments`);
|
|
1750
1865
|
return {
|
|
1751
1866
|
clientDocumentId,
|
|
1752
1867
|
storedDocumentMemoryId: memoryWithScope.id,
|
|
@@ -1801,6 +1916,101 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
1801
1916
|
worldId: fragment.worldId
|
|
1802
1917
|
}));
|
|
1803
1918
|
}
|
|
1919
|
+
/**
|
|
1920
|
+
* Enrich a conversation memory with RAG metadata
|
|
1921
|
+
* This can be called after response generation to add RAG tracking data
|
|
1922
|
+
* @param memoryId The ID of the conversation memory to enrich
|
|
1923
|
+
* @param ragMetadata The RAG metadata to add
|
|
1924
|
+
*/
|
|
1925
|
+
async enrichConversationMemoryWithRAG(memoryId, ragMetadata) {
|
|
1926
|
+
try {
|
|
1927
|
+
const existingMemory = await this.runtime.getMemoryById(memoryId);
|
|
1928
|
+
if (!existingMemory) {
|
|
1929
|
+
logger4.warn(`Cannot enrich memory ${memoryId} - memory not found`);
|
|
1930
|
+
return;
|
|
1931
|
+
}
|
|
1932
|
+
const updatedMetadata = {
|
|
1933
|
+
...existingMemory.metadata,
|
|
1934
|
+
knowledgeUsed: true,
|
|
1935
|
+
// Simple flag for UI to detect RAG usage
|
|
1936
|
+
ragUsage: {
|
|
1937
|
+
retrievedFragments: ragMetadata.retrievedFragments,
|
|
1938
|
+
queryText: ragMetadata.queryText,
|
|
1939
|
+
totalFragments: ragMetadata.totalFragments,
|
|
1940
|
+
retrievalTimestamp: ragMetadata.retrievalTimestamp,
|
|
1941
|
+
usedInResponse: true
|
|
1942
|
+
},
|
|
1943
|
+
timestamp: existingMemory.metadata?.timestamp || Date.now(),
|
|
1944
|
+
type: existingMemory.metadata?.type || "message"
|
|
1945
|
+
};
|
|
1946
|
+
await this.runtime.updateMemory({
|
|
1947
|
+
id: memoryId,
|
|
1948
|
+
metadata: updatedMetadata
|
|
1949
|
+
});
|
|
1950
|
+
logger4.debug(
|
|
1951
|
+
`Enriched conversation memory ${memoryId} with RAG data: ${ragMetadata.totalFragments} fragments`
|
|
1952
|
+
);
|
|
1953
|
+
} catch (error) {
|
|
1954
|
+
logger4.warn(
|
|
1955
|
+
`Failed to enrich conversation memory ${memoryId} with RAG data: ${error.message}`
|
|
1956
|
+
);
|
|
1957
|
+
}
|
|
1958
|
+
}
|
|
1959
|
+
/**
|
|
1960
|
+
* Set the current response memory ID for RAG tracking
|
|
1961
|
+
* This is called by the knowledge provider to track which response memory to enrich
|
|
1962
|
+
*/
|
|
1963
|
+
pendingRAGEnrichment = [];
|
|
1964
|
+
/**
|
|
1965
|
+
* Store RAG metadata for the next conversation memory that gets created
|
|
1966
|
+
* @param ragMetadata The RAG metadata to associate with the next memory
|
|
1967
|
+
*/
|
|
1968
|
+
setPendingRAGMetadata(ragMetadata) {
|
|
1969
|
+
const now = Date.now();
|
|
1970
|
+
this.pendingRAGEnrichment = this.pendingRAGEnrichment.filter(
|
|
1971
|
+
(entry) => now - entry.timestamp < 3e4
|
|
1972
|
+
);
|
|
1973
|
+
this.pendingRAGEnrichment.push({
|
|
1974
|
+
ragMetadata,
|
|
1975
|
+
timestamp: now
|
|
1976
|
+
});
|
|
1977
|
+
logger4.debug(`Stored pending RAG metadata for next conversation memory`);
|
|
1978
|
+
}
|
|
1979
|
+
/**
|
|
1980
|
+
* Try to enrich recent conversation memories with pending RAG metadata
|
|
1981
|
+
* This is called periodically to catch memories that were created after RAG retrieval
|
|
1982
|
+
*/
|
|
1983
|
+
async enrichRecentMemoriesWithPendingRAG() {
|
|
1984
|
+
if (this.pendingRAGEnrichment.length === 0) {
|
|
1985
|
+
return;
|
|
1986
|
+
}
|
|
1987
|
+
try {
|
|
1988
|
+
const recentMemories = await this.runtime.getMemories({
|
|
1989
|
+
tableName: "messages",
|
|
1990
|
+
count: 10
|
|
1991
|
+
});
|
|
1992
|
+
const now = Date.now();
|
|
1993
|
+
const recentConversationMemories = recentMemories.filter(
|
|
1994
|
+
(memory) => memory.metadata?.type === "message" && now - (memory.createdAt || 0) < 1e4 && // Created in last 10 seconds
|
|
1995
|
+
!memory.metadata?.ragUsage
|
|
1996
|
+
// Doesn't already have RAG data
|
|
1997
|
+
).sort((a, b) => (b.createdAt || 0) - (a.createdAt || 0));
|
|
1998
|
+
for (const pendingEntry of this.pendingRAGEnrichment) {
|
|
1999
|
+
const matchingMemory = recentConversationMemories.find(
|
|
2000
|
+
(memory) => (memory.createdAt || 0) > pendingEntry.timestamp
|
|
2001
|
+
);
|
|
2002
|
+
if (matchingMemory && matchingMemory.id) {
|
|
2003
|
+
await this.enrichConversationMemoryWithRAG(matchingMemory.id, pendingEntry.ragMetadata);
|
|
2004
|
+
const index = this.pendingRAGEnrichment.indexOf(pendingEntry);
|
|
2005
|
+
if (index > -1) {
|
|
2006
|
+
this.pendingRAGEnrichment.splice(index, 1);
|
|
2007
|
+
}
|
|
2008
|
+
}
|
|
2009
|
+
}
|
|
2010
|
+
} catch (error) {
|
|
2011
|
+
logger4.warn(`Error enriching recent memories with RAG data: ${error.message}`);
|
|
2012
|
+
}
|
|
2013
|
+
}
|
|
1804
2014
|
async processCharacterKnowledge(items) {
|
|
1805
2015
|
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
1806
2016
|
logger4.info(
|
|
@@ -2026,13 +2236,14 @@ var KnowledgeService = class _KnowledgeService extends Service {
|
|
|
2026
2236
|
};
|
|
2027
2237
|
|
|
2028
2238
|
// src/provider.ts
|
|
2029
|
-
import { addHeader } from "@elizaos/core";
|
|
2239
|
+
import { addHeader, logger as logger5 } from "@elizaos/core";
|
|
2030
2240
|
var knowledgeProvider = {
|
|
2031
2241
|
name: "KNOWLEDGE",
|
|
2032
2242
|
description: "Knowledge from the knowledge base that the agent knows, retrieved whenever the agent needs to answer a question about their expertise.",
|
|
2033
2243
|
dynamic: true,
|
|
2034
2244
|
get: async (runtime, message) => {
|
|
2035
|
-
const
|
|
2245
|
+
const knowledgeService = runtime.getService("knowledge");
|
|
2246
|
+
const knowledgeData = await knowledgeService?.getKnowledge(message);
|
|
2036
2247
|
const firstFiveKnowledgeItems = knowledgeData?.slice(0, 5);
|
|
2037
2248
|
let knowledge = (firstFiveKnowledgeItems && firstFiveKnowledgeItems.length > 0 ? addHeader(
|
|
2038
2249
|
"# Knowledge",
|
|
@@ -2042,14 +2253,52 @@ var knowledgeProvider = {
|
|
|
2042
2253
|
if (knowledge.length > 4e3 * tokenLength) {
|
|
2043
2254
|
knowledge = knowledge.slice(0, 4e3 * tokenLength);
|
|
2044
2255
|
}
|
|
2256
|
+
let ragMetadata = null;
|
|
2257
|
+
if (knowledgeData && knowledgeData.length > 0) {
|
|
2258
|
+
ragMetadata = {
|
|
2259
|
+
retrievedFragments: knowledgeData.map((fragment) => ({
|
|
2260
|
+
fragmentId: fragment.id,
|
|
2261
|
+
documentTitle: fragment.metadata?.filename || fragment.metadata?.title || "Unknown Document",
|
|
2262
|
+
similarityScore: fragment.similarity,
|
|
2263
|
+
contentPreview: (fragment.content?.text || "No content").substring(0, 100) + "..."
|
|
2264
|
+
})),
|
|
2265
|
+
queryText: message.content?.text || "Unknown query",
|
|
2266
|
+
totalFragments: knowledgeData.length,
|
|
2267
|
+
retrievalTimestamp: Date.now()
|
|
2268
|
+
};
|
|
2269
|
+
}
|
|
2270
|
+
if (knowledgeData && knowledgeData.length > 0 && knowledgeService && ragMetadata) {
|
|
2271
|
+
try {
|
|
2272
|
+
knowledgeService.setPendingRAGMetadata(ragMetadata);
|
|
2273
|
+
setTimeout(async () => {
|
|
2274
|
+
try {
|
|
2275
|
+
await knowledgeService.enrichRecentMemoriesWithPendingRAG();
|
|
2276
|
+
} catch (error) {
|
|
2277
|
+
logger5.warn("RAG memory enrichment failed:", error.message);
|
|
2278
|
+
}
|
|
2279
|
+
}, 2e3);
|
|
2280
|
+
} catch (error) {
|
|
2281
|
+
logger5.warn("RAG memory enrichment failed:", error.message);
|
|
2282
|
+
}
|
|
2283
|
+
}
|
|
2045
2284
|
return {
|
|
2046
2285
|
data: {
|
|
2047
|
-
knowledge
|
|
2286
|
+
knowledge,
|
|
2287
|
+
ragMetadata,
|
|
2288
|
+
// 🎯 Include RAG metadata for memory tracking
|
|
2289
|
+
knowledgeUsed: knowledgeData && knowledgeData.length > 0
|
|
2290
|
+
// Simple flag for easy detection
|
|
2048
2291
|
},
|
|
2049
2292
|
values: {
|
|
2050
|
-
knowledge
|
|
2293
|
+
knowledge,
|
|
2294
|
+
knowledgeUsed: knowledgeData && knowledgeData.length > 0
|
|
2295
|
+
// Simple flag for easy detection
|
|
2051
2296
|
},
|
|
2052
|
-
text: knowledge
|
|
2297
|
+
text: knowledge,
|
|
2298
|
+
ragMetadata,
|
|
2299
|
+
// 🎯 Also include at top level for easy access
|
|
2300
|
+
knowledgeUsed: knowledgeData && knowledgeData.length > 0
|
|
2301
|
+
// 🎯 Simple flag at top level too
|
|
2053
2302
|
};
|
|
2054
2303
|
}
|
|
2055
2304
|
};
|
|
@@ -2192,9 +2441,7 @@ function createMockRuntime(overrides) {
|
|
|
2192
2441
|
return ids.map((id) => memories.get(id)).filter(Boolean);
|
|
2193
2442
|
},
|
|
2194
2443
|
async getMemoriesByRoomIds(params) {
|
|
2195
|
-
return Array.from(memories.values()).filter(
|
|
2196
|
-
(m) => params.roomIds.includes(m.roomId)
|
|
2197
|
-
);
|
|
2444
|
+
return Array.from(memories.values()).filter((m) => params.roomIds.includes(m.roomId));
|
|
2198
2445
|
},
|
|
2199
2446
|
async searchMemories(params) {
|
|
2200
2447
|
const fragments = Array.from(memories.values()).filter(
|
|
@@ -2521,9 +2768,7 @@ var KnowledgeTestSuite = class {
|
|
|
2521
2768
|
throw new Error("Incorrect service capability description");
|
|
2522
2769
|
}
|
|
2523
2770
|
runtime.services.set(KnowledgeService.serviceType, service);
|
|
2524
|
-
const retrievedService = runtime.getService(
|
|
2525
|
-
KnowledgeService.serviceType
|
|
2526
|
-
);
|
|
2771
|
+
const retrievedService = runtime.getService(KnowledgeService.serviceType);
|
|
2527
2772
|
if (retrievedService !== service) {
|
|
2528
2773
|
throw new Error("Service not properly registered with runtime");
|
|
2529
2774
|
}
|
|
@@ -2536,11 +2781,7 @@ var KnowledgeTestSuite = class {
|
|
|
2536
2781
|
fn: async (runtime) => {
|
|
2537
2782
|
const testContent = "This is a test document with some content.";
|
|
2538
2783
|
const buffer = createTestFileBuffer(testContent);
|
|
2539
|
-
const extractedText = await extractTextFromDocument(
|
|
2540
|
-
buffer,
|
|
2541
|
-
"text/plain",
|
|
2542
|
-
"test.txt"
|
|
2543
|
-
);
|
|
2784
|
+
const extractedText = await extractTextFromDocument(buffer, "text/plain", "test.txt");
|
|
2544
2785
|
if (extractedText !== testContent) {
|
|
2545
2786
|
throw new Error(`Expected "${testContent}", got "${extractedText}"`);
|
|
2546
2787
|
}
|
|
@@ -2612,9 +2853,7 @@ var KnowledgeTestSuite = class {
|
|
|
2612
2853
|
if (result.fragmentCount === 0) {
|
|
2613
2854
|
throw new Error("No fragments created");
|
|
2614
2855
|
}
|
|
2615
|
-
const storedDoc = await runtime.getMemoryById(
|
|
2616
|
-
result.storedDocumentMemoryId
|
|
2617
|
-
);
|
|
2856
|
+
const storedDoc = await runtime.getMemoryById(result.storedDocumentMemoryId);
|
|
2618
2857
|
if (!storedDoc) {
|
|
2619
2858
|
throw new Error("Document not found in storage");
|
|
2620
2859
|
}
|
|
@@ -2765,13 +3004,9 @@ var KnowledgeTestSuite = class {
|
|
|
2765
3004
|
entityId: knowledgeRuntime.agentId
|
|
2766
3005
|
});
|
|
2767
3006
|
if (memories.length < 3) {
|
|
2768
|
-
throw new Error(
|
|
2769
|
-
`Expected at least 3 character knowledge items, got ${memories.length}`
|
|
2770
|
-
);
|
|
3007
|
+
throw new Error(`Expected at least 3 character knowledge items, got ${memories.length}`);
|
|
2771
3008
|
}
|
|
2772
|
-
const pathKnowledge = memories.find(
|
|
2773
|
-
(m) => m.content.text?.includes("markdown content")
|
|
2774
|
-
);
|
|
3009
|
+
const pathKnowledge = memories.find((m) => m.content.text?.includes("markdown content"));
|
|
2775
3010
|
if (!pathKnowledge) {
|
|
2776
3011
|
throw new Error("Path-based knowledge not found");
|
|
2777
3012
|
}
|
|
@@ -2873,11 +3108,7 @@ var KnowledgeTestSuite = class {
|
|
|
2873
3108
|
data: {},
|
|
2874
3109
|
text: ""
|
|
2875
3110
|
};
|
|
2876
|
-
const providerResult = await knowledgeProvider.get(
|
|
2877
|
-
runtime,
|
|
2878
|
-
queryMessage,
|
|
2879
|
-
state
|
|
2880
|
-
);
|
|
3111
|
+
const providerResult = await knowledgeProvider.get(runtime, queryMessage, state);
|
|
2881
3112
|
if (!providerResult.text || !providerResult.text.includes("qubit")) {
|
|
2882
3113
|
throw new Error("Provider did not return relevant knowledge");
|
|
2883
3114
|
}
|
|
@@ -2907,9 +3138,7 @@ var KnowledgeTestSuite = class {
|
|
|
2907
3138
|
};
|
|
2908
3139
|
const result = await service.addKnowledge(document);
|
|
2909
3140
|
if (result.fragmentCount < 2) {
|
|
2910
|
-
throw new Error(
|
|
2911
|
-
"Large document should be split into multiple fragments"
|
|
2912
|
-
);
|
|
3141
|
+
throw new Error("Large document should be split into multiple fragments");
|
|
2913
3142
|
}
|
|
2914
3143
|
const fragments = await runtime.getMemories({
|
|
2915
3144
|
tableName: "knowledge",
|
|
@@ -2961,7 +3190,7 @@ var KnowledgeTestSuite = class {
|
|
|
2961
3190
|
var tests_default = new KnowledgeTestSuite();
|
|
2962
3191
|
|
|
2963
3192
|
// src/actions.ts
|
|
2964
|
-
import { logger as
|
|
3193
|
+
import { logger as logger6, stringToUuid } from "@elizaos/core";
|
|
2965
3194
|
import * as fs2 from "fs";
|
|
2966
3195
|
import * as path2 from "path";
|
|
2967
3196
|
var processKnowledgeAction = {
|
|
@@ -3014,25 +3243,19 @@ var processKnowledgeAction = {
|
|
|
3014
3243
|
"ingest",
|
|
3015
3244
|
"file"
|
|
3016
3245
|
];
|
|
3017
|
-
const hasKeyword = knowledgeKeywords.some(
|
|
3018
|
-
(keyword) => text.includes(keyword)
|
|
3019
|
-
);
|
|
3246
|
+
const hasKeyword = knowledgeKeywords.some((keyword) => text.includes(keyword));
|
|
3020
3247
|
const pathPattern = /(?:\/[\w.-]+)+|(?:[a-zA-Z]:[\\/][\w\s.-]+(?:[\\/][\w\s.-]+)*)/;
|
|
3021
3248
|
const hasPath = pathPattern.test(text);
|
|
3022
3249
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3023
3250
|
if (!service) {
|
|
3024
|
-
|
|
3025
|
-
"Knowledge service not available for PROCESS_KNOWLEDGE action"
|
|
3026
|
-
);
|
|
3251
|
+
logger6.warn("Knowledge service not available for PROCESS_KNOWLEDGE action");
|
|
3027
3252
|
return false;
|
|
3028
3253
|
}
|
|
3029
3254
|
return hasKeyword || hasPath;
|
|
3030
3255
|
},
|
|
3031
3256
|
handler: async (runtime, message, state, options, callback) => {
|
|
3032
3257
|
try {
|
|
3033
|
-
const service = runtime.getService(
|
|
3034
|
-
KnowledgeService.serviceType
|
|
3035
|
-
);
|
|
3258
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3036
3259
|
if (!service) {
|
|
3037
3260
|
throw new Error("Knowledge service not available");
|
|
3038
3261
|
}
|
|
@@ -3075,10 +3298,7 @@ var processKnowledgeAction = {
|
|
|
3075
3298
|
text: `I've successfully processed the document "${fileName}". It has been split into ${result.fragmentCount} searchable fragments and added to my knowledge base.`
|
|
3076
3299
|
};
|
|
3077
3300
|
} else {
|
|
3078
|
-
const knowledgeContent = text.replace(
|
|
3079
|
-
/^(add|store|remember|process|learn)\s+(this|that|the following)?:?\s*/i,
|
|
3080
|
-
""
|
|
3081
|
-
).trim();
|
|
3301
|
+
const knowledgeContent = text.replace(/^(add|store|remember|process|learn)\s+(this|that|the following)?:?\s*/i, "").trim();
|
|
3082
3302
|
if (!knowledgeContent) {
|
|
3083
3303
|
response = {
|
|
3084
3304
|
text: "I need some content to add to my knowledge base. Please provide text or a file path."
|
|
@@ -3106,7 +3326,7 @@ var processKnowledgeAction = {
|
|
|
3106
3326
|
await callback(response);
|
|
3107
3327
|
}
|
|
3108
3328
|
} catch (error) {
|
|
3109
|
-
|
|
3329
|
+
logger6.error("Error in PROCESS_KNOWLEDGE action:", error);
|
|
3110
3330
|
const errorResponse = {
|
|
3111
3331
|
text: `I encountered an error while processing the knowledge: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
3112
3332
|
};
|
|
@@ -3146,25 +3366,10 @@ var searchKnowledgeAction = {
|
|
|
3146
3366
|
],
|
|
3147
3367
|
validate: async (runtime, message, state) => {
|
|
3148
3368
|
const text = message.content.text?.toLowerCase() || "";
|
|
3149
|
-
const searchKeywords = [
|
|
3150
|
-
|
|
3151
|
-
|
|
3152
|
-
|
|
3153
|
-
"query",
|
|
3154
|
-
"what do you know about"
|
|
3155
|
-
];
|
|
3156
|
-
const knowledgeKeywords = [
|
|
3157
|
-
"knowledge",
|
|
3158
|
-
"information",
|
|
3159
|
-
"document",
|
|
3160
|
-
"database"
|
|
3161
|
-
];
|
|
3162
|
-
const hasSearchKeyword = searchKeywords.some(
|
|
3163
|
-
(keyword) => text.includes(keyword)
|
|
3164
|
-
);
|
|
3165
|
-
const hasKnowledgeKeyword = knowledgeKeywords.some(
|
|
3166
|
-
(keyword) => text.includes(keyword)
|
|
3167
|
-
);
|
|
3369
|
+
const searchKeywords = ["search", "find", "look up", "query", "what do you know about"];
|
|
3370
|
+
const knowledgeKeywords = ["knowledge", "information", "document", "database"];
|
|
3371
|
+
const hasSearchKeyword = searchKeywords.some((keyword) => text.includes(keyword));
|
|
3372
|
+
const hasKnowledgeKeyword = knowledgeKeywords.some((keyword) => text.includes(keyword));
|
|
3168
3373
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3169
3374
|
if (!service) {
|
|
3170
3375
|
return false;
|
|
@@ -3173,17 +3378,12 @@ var searchKnowledgeAction = {
|
|
|
3173
3378
|
},
|
|
3174
3379
|
handler: async (runtime, message, state, options, callback) => {
|
|
3175
3380
|
try {
|
|
3176
|
-
const service = runtime.getService(
|
|
3177
|
-
KnowledgeService.serviceType
|
|
3178
|
-
);
|
|
3381
|
+
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3179
3382
|
if (!service) {
|
|
3180
3383
|
throw new Error("Knowledge service not available");
|
|
3181
3384
|
}
|
|
3182
3385
|
const text = message.content.text || "";
|
|
3183
|
-
const query = text.replace(
|
|
3184
|
-
/^(search|find|look up|query)\s+(your\s+)?knowledge\s+(base\s+)?(for\s+)?/i,
|
|
3185
|
-
""
|
|
3186
|
-
).trim();
|
|
3386
|
+
const query = text.replace(/^(search|find|look up|query)\s+(your\s+)?knowledge\s+(base\s+)?(for\s+)?/i, "").trim();
|
|
3187
3387
|
if (!query) {
|
|
3188
3388
|
const response2 = {
|
|
3189
3389
|
text: "What would you like me to search for in my knowledge base?"
|
|
@@ -3217,7 +3417,7 @@ ${formattedResults}`
|
|
|
3217
3417
|
await callback(response);
|
|
3218
3418
|
}
|
|
3219
3419
|
} catch (error) {
|
|
3220
|
-
|
|
3420
|
+
logger6.error("Error in SEARCH_KNOWLEDGE action:", error);
|
|
3221
3421
|
const errorResponse = {
|
|
3222
3422
|
text: `I encountered an error while searching the knowledge base: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
3223
3423
|
};
|
|
@@ -3230,9 +3430,44 @@ ${formattedResults}`
|
|
|
3230
3430
|
var knowledgeActions = [processKnowledgeAction, searchKnowledgeAction];
|
|
3231
3431
|
|
|
3232
3432
|
// src/routes.ts
|
|
3233
|
-
import { createUniqueUuid as createUniqueUuid2, logger as
|
|
3433
|
+
import { createUniqueUuid as createUniqueUuid2, logger as logger7, ModelType as ModelType4 } from "@elizaos/core";
|
|
3234
3434
|
import fs3 from "fs";
|
|
3235
3435
|
import path3 from "path";
|
|
3436
|
+
import multer from "multer";
|
|
3437
|
+
var createUploadMiddleware = (runtime) => {
|
|
3438
|
+
const uploadDir = runtime.getSetting("KNOWLEDGE_UPLOAD_DIR") || "/tmp/uploads/";
|
|
3439
|
+
const maxFileSize = parseInt(runtime.getSetting("KNOWLEDGE_MAX_FILE_SIZE") || "52428800");
|
|
3440
|
+
const maxFiles = parseInt(runtime.getSetting("KNOWLEDGE_MAX_FILES") || "10");
|
|
3441
|
+
const allowedMimeTypes = runtime.getSetting("KNOWLEDGE_ALLOWED_MIME_TYPES")?.split(",") || [
|
|
3442
|
+
"text/plain",
|
|
3443
|
+
"text/markdown",
|
|
3444
|
+
"application/pdf",
|
|
3445
|
+
"application/msword",
|
|
3446
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
3447
|
+
"text/html",
|
|
3448
|
+
"application/json",
|
|
3449
|
+
"application/xml",
|
|
3450
|
+
"text/csv"
|
|
3451
|
+
];
|
|
3452
|
+
return multer({
|
|
3453
|
+
dest: uploadDir,
|
|
3454
|
+
limits: {
|
|
3455
|
+
fileSize: maxFileSize,
|
|
3456
|
+
files: maxFiles
|
|
3457
|
+
},
|
|
3458
|
+
fileFilter: (req, file, cb) => {
|
|
3459
|
+
if (allowedMimeTypes.includes(file.mimetype)) {
|
|
3460
|
+
cb(null, true);
|
|
3461
|
+
} else {
|
|
3462
|
+
cb(
|
|
3463
|
+
new Error(
|
|
3464
|
+
`File type ${file.mimetype} not allowed. Allowed types: ${allowedMimeTypes.join(", ")}`
|
|
3465
|
+
)
|
|
3466
|
+
);
|
|
3467
|
+
}
|
|
3468
|
+
}
|
|
3469
|
+
});
|
|
3470
|
+
};
|
|
3236
3471
|
function sendSuccess(res, data, status = 200) {
|
|
3237
3472
|
res.writeHead(status, { "Content-Type": "application/json" });
|
|
3238
3473
|
res.end(JSON.stringify({ success: true, data }));
|
|
@@ -3246,17 +3481,13 @@ var cleanupFile = (filePath) => {
|
|
|
3246
3481
|
try {
|
|
3247
3482
|
fs3.unlinkSync(filePath);
|
|
3248
3483
|
} catch (error) {
|
|
3249
|
-
|
|
3484
|
+
logger7.error(`Error cleaning up file ${filePath}:`, error);
|
|
3250
3485
|
}
|
|
3251
3486
|
}
|
|
3252
3487
|
};
|
|
3253
3488
|
var cleanupFiles = (files) => {
|
|
3254
3489
|
if (files) {
|
|
3255
|
-
files.forEach((file) =>
|
|
3256
|
-
if (file.tempFilePath) {
|
|
3257
|
-
cleanupFile(file.tempFilePath);
|
|
3258
|
-
}
|
|
3259
|
-
});
|
|
3490
|
+
files.forEach((file) => cleanupFile(file.path));
|
|
3260
3491
|
}
|
|
3261
3492
|
};
|
|
3262
3493
|
async function uploadKnowledgeHandler(req, res, runtime) {
|
|
@@ -3264,50 +3495,35 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3264
3495
|
if (!service) {
|
|
3265
3496
|
return sendError(res, 500, "SERVICE_NOT_FOUND", "KnowledgeService not found");
|
|
3266
3497
|
}
|
|
3267
|
-
const hasUploadedFiles = req.files &&
|
|
3498
|
+
const hasUploadedFiles = req.files && req.files.length > 0;
|
|
3268
3499
|
const isJsonRequest = !hasUploadedFiles && req.body && (req.body.fileUrl || req.body.fileUrls);
|
|
3269
3500
|
if (!hasUploadedFiles && !isJsonRequest) {
|
|
3270
3501
|
return sendError(res, 400, "INVALID_REQUEST", "Request must contain either files or URLs");
|
|
3271
3502
|
}
|
|
3272
3503
|
try {
|
|
3273
3504
|
if (hasUploadedFiles) {
|
|
3274
|
-
|
|
3275
|
-
if (req.files.files) {
|
|
3276
|
-
if (Array.isArray(req.files.files)) {
|
|
3277
|
-
files = req.files.files;
|
|
3278
|
-
} else {
|
|
3279
|
-
files = [req.files.files];
|
|
3280
|
-
}
|
|
3281
|
-
} else if (req.files.file) {
|
|
3282
|
-
files = [req.files.file];
|
|
3283
|
-
} else {
|
|
3284
|
-
files = Object.values(req.files).flat();
|
|
3285
|
-
}
|
|
3505
|
+
const files = req.files;
|
|
3286
3506
|
if (!files || files.length === 0) {
|
|
3287
3507
|
return sendError(res, 400, "NO_FILES", "No files uploaded");
|
|
3288
3508
|
}
|
|
3289
3509
|
const invalidFiles = files.filter((file) => {
|
|
3290
|
-
if (file.truncated) {
|
|
3291
|
-
logger6.warn(`File ${file.name} was truncated during upload`);
|
|
3292
|
-
return true;
|
|
3293
|
-
}
|
|
3294
3510
|
if (file.size === 0) {
|
|
3295
|
-
|
|
3511
|
+
logger7.warn(`File ${file.originalname} is empty`);
|
|
3296
3512
|
return true;
|
|
3297
3513
|
}
|
|
3298
|
-
if (!file.
|
|
3299
|
-
|
|
3514
|
+
if (!file.originalname || file.originalname.trim() === "") {
|
|
3515
|
+
logger7.warn(`File has no name`);
|
|
3300
3516
|
return true;
|
|
3301
3517
|
}
|
|
3302
|
-
if (!file.
|
|
3303
|
-
|
|
3518
|
+
if (!file.path) {
|
|
3519
|
+
logger7.warn(`File ${file.originalname} has no path`);
|
|
3304
3520
|
return true;
|
|
3305
3521
|
}
|
|
3306
3522
|
return false;
|
|
3307
3523
|
});
|
|
3308
3524
|
if (invalidFiles.length > 0) {
|
|
3309
3525
|
cleanupFiles(files);
|
|
3310
|
-
const invalidFileNames = invalidFiles.map((f) => f.
|
|
3526
|
+
const invalidFileNames = invalidFiles.map((f) => f.originalname || "unnamed").join(", ");
|
|
3311
3527
|
return sendError(
|
|
3312
3528
|
res,
|
|
3313
3529
|
400,
|
|
@@ -3317,8 +3533,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3317
3533
|
}
|
|
3318
3534
|
const agentId = req.body.agentId || req.query.agentId;
|
|
3319
3535
|
if (!agentId) {
|
|
3320
|
-
|
|
3321
|
-
cleanupFiles(files);
|
|
3536
|
+
logger7.error("[Document Processor] \u274C No agent ID provided in upload request");
|
|
3322
3537
|
return sendError(
|
|
3323
3538
|
res,
|
|
3324
3539
|
400,
|
|
@@ -3327,55 +3542,25 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3327
3542
|
);
|
|
3328
3543
|
}
|
|
3329
3544
|
const worldId = req.body.worldId || agentId;
|
|
3330
|
-
|
|
3545
|
+
logger7.info(`[Document Processor] \u{1F4E4} Processing file upload for agent: ${agentId}`);
|
|
3331
3546
|
const processingPromises = files.map(async (file, index) => {
|
|
3332
|
-
const originalFilename = file.
|
|
3333
|
-
const filePath = file.
|
|
3334
|
-
|
|
3335
|
-
`[
|
|
3547
|
+
const originalFilename = file.originalname;
|
|
3548
|
+
const filePath = file.path;
|
|
3549
|
+
logger7.debug(
|
|
3550
|
+
`[Document Processor] \u{1F4C4} Processing file: ${originalFilename} (agent: ${agentId})`
|
|
3336
3551
|
);
|
|
3337
3552
|
try {
|
|
3338
|
-
|
|
3339
|
-
if (filePath && fs3.existsSync(filePath)) {
|
|
3340
|
-
try {
|
|
3341
|
-
const stats = await fs3.promises.stat(filePath);
|
|
3342
|
-
if (stats.size === 0) {
|
|
3343
|
-
throw new Error("Temporary file is empty");
|
|
3344
|
-
}
|
|
3345
|
-
fileBuffer = await fs3.promises.readFile(filePath);
|
|
3346
|
-
logger6.debug(
|
|
3347
|
-
`[KNOWLEDGE UPLOAD] Read ${fileBuffer.length} bytes from temp file: ${filePath}`
|
|
3348
|
-
);
|
|
3349
|
-
} catch (fsError) {
|
|
3350
|
-
throw new Error(`Failed to read temporary file: ${fsError.message}`);
|
|
3351
|
-
}
|
|
3352
|
-
} else if (file.data && Buffer.isBuffer(file.data)) {
|
|
3353
|
-
fileBuffer = file.data;
|
|
3354
|
-
logger6.debug(`[KNOWLEDGE UPLOAD] Using in-memory buffer of ${fileBuffer.length} bytes`);
|
|
3355
|
-
} else {
|
|
3356
|
-
throw new Error("No file data available - neither temp file nor buffer found");
|
|
3357
|
-
}
|
|
3358
|
-
if (!Buffer.isBuffer(fileBuffer) || fileBuffer.length === 0) {
|
|
3359
|
-
throw new Error("Invalid or empty file buffer");
|
|
3360
|
-
}
|
|
3361
|
-
if (fileBuffer.length !== file.size) {
|
|
3362
|
-
logger6.warn(
|
|
3363
|
-
`File size mismatch for ${originalFilename}: expected ${file.size}, got ${fileBuffer.length}`
|
|
3364
|
-
);
|
|
3365
|
-
}
|
|
3553
|
+
const fileBuffer = await fs3.promises.readFile(filePath);
|
|
3366
3554
|
const base64Content = fileBuffer.toString("base64");
|
|
3367
|
-
if (!base64Content || base64Content.length === 0) {
|
|
3368
|
-
throw new Error("Failed to convert file to base64");
|
|
3369
|
-
}
|
|
3370
3555
|
const addKnowledgeOpts = {
|
|
3371
3556
|
agentId,
|
|
3372
3557
|
// Pass the agent ID from frontend
|
|
3373
3558
|
clientDocumentId: "",
|
|
3374
3559
|
// This will be ignored by the service
|
|
3375
3560
|
contentType: file.mimetype,
|
|
3376
|
-
// Directly from
|
|
3561
|
+
// Directly from multer file object
|
|
3377
3562
|
originalFilename,
|
|
3378
|
-
// Directly from
|
|
3563
|
+
// Directly from multer file object
|
|
3379
3564
|
content: base64Content,
|
|
3380
3565
|
// The base64 string of the file
|
|
3381
3566
|
worldId,
|
|
@@ -3385,9 +3570,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3385
3570
|
// Use the correct agent ID
|
|
3386
3571
|
};
|
|
3387
3572
|
const result = await service.addKnowledge(addKnowledgeOpts);
|
|
3388
|
-
|
|
3389
|
-
cleanupFile(filePath);
|
|
3390
|
-
}
|
|
3573
|
+
cleanupFile(filePath);
|
|
3391
3574
|
return {
|
|
3392
3575
|
id: result.clientDocumentId,
|
|
3393
3576
|
// Use the content-based ID returned by the service
|
|
@@ -3398,12 +3581,11 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3398
3581
|
status: "success"
|
|
3399
3582
|
};
|
|
3400
3583
|
} catch (fileError) {
|
|
3401
|
-
|
|
3402
|
-
`[
|
|
3584
|
+
logger7.error(
|
|
3585
|
+
`[Document Processor] \u274C Error processing file ${file.originalname}:`,
|
|
3586
|
+
fileError
|
|
3403
3587
|
);
|
|
3404
|
-
|
|
3405
|
-
cleanupFile(filePath);
|
|
3406
|
-
}
|
|
3588
|
+
cleanupFile(filePath);
|
|
3407
3589
|
return {
|
|
3408
3590
|
id: "",
|
|
3409
3591
|
// No ID since processing failed
|
|
@@ -3422,7 +3604,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3422
3604
|
}
|
|
3423
3605
|
const agentId = req.body.agentId || req.query.agentId;
|
|
3424
3606
|
if (!agentId) {
|
|
3425
|
-
|
|
3607
|
+
logger7.error("[Document Processor] \u274C No agent ID provided in URL request");
|
|
3426
3608
|
return sendError(
|
|
3427
3609
|
res,
|
|
3428
3610
|
400,
|
|
@@ -3430,7 +3612,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3430
3612
|
"Agent ID is required for uploading knowledge from URLs"
|
|
3431
3613
|
);
|
|
3432
3614
|
}
|
|
3433
|
-
|
|
3615
|
+
logger7.info(`[Document Processor] \u{1F4E4} Processing URL upload for agent: ${agentId}`);
|
|
3434
3616
|
const processingPromises = fileUrls.map(async (fileUrl) => {
|
|
3435
3617
|
try {
|
|
3436
3618
|
const normalizedUrl = normalizeS3Url(fileUrl);
|
|
@@ -3438,7 +3620,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3438
3620
|
const pathSegments = urlObject.pathname.split("/");
|
|
3439
3621
|
const encodedFilename = pathSegments[pathSegments.length - 1] || "document.pdf";
|
|
3440
3622
|
const originalFilename = decodeURIComponent(encodedFilename);
|
|
3441
|
-
|
|
3623
|
+
logger7.debug(`[Document Processor] \u{1F310} Fetching content from URL: ${fileUrl}`);
|
|
3442
3624
|
const { content, contentType: fetchedContentType } = await fetchUrlContent(fileUrl);
|
|
3443
3625
|
let contentType = fetchedContentType;
|
|
3444
3626
|
if (contentType === "application/octet-stream") {
|
|
@@ -3478,8 +3660,8 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3478
3660
|
url: normalizedUrl
|
|
3479
3661
|
}
|
|
3480
3662
|
};
|
|
3481
|
-
|
|
3482
|
-
`[
|
|
3663
|
+
logger7.debug(
|
|
3664
|
+
`[Document Processor] \u{1F4C4} Processing knowledge from URL: ${originalFilename} (type: ${contentType})`
|
|
3483
3665
|
);
|
|
3484
3666
|
const result = await service.addKnowledge(addKnowledgeOpts);
|
|
3485
3667
|
return {
|
|
@@ -3493,7 +3675,7 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3493
3675
|
status: "success"
|
|
3494
3676
|
};
|
|
3495
3677
|
} catch (urlError) {
|
|
3496
|
-
|
|
3678
|
+
logger7.error(`[Document Processor] \u274C Error processing URL ${fileUrl}:`, urlError);
|
|
3497
3679
|
return {
|
|
3498
3680
|
fileUrl,
|
|
3499
3681
|
status: "error_processing",
|
|
@@ -3505,10 +3687,9 @@ async function uploadKnowledgeHandler(req, res, runtime) {
|
|
|
3505
3687
|
sendSuccess(res, results);
|
|
3506
3688
|
}
|
|
3507
3689
|
} catch (error) {
|
|
3508
|
-
|
|
3509
|
-
if (hasUploadedFiles
|
|
3510
|
-
|
|
3511
|
-
cleanupFiles(allFiles);
|
|
3690
|
+
logger7.error("[Document Processor] \u274C Error processing knowledge:", error);
|
|
3691
|
+
if (hasUploadedFiles) {
|
|
3692
|
+
cleanupFiles(req.files);
|
|
3512
3693
|
}
|
|
3513
3694
|
sendError(res, 500, "PROCESSING_ERROR", "Failed to process knowledge", error.message);
|
|
3514
3695
|
}
|
|
@@ -3524,7 +3705,7 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
|
|
|
3524
3705
|
);
|
|
3525
3706
|
}
|
|
3526
3707
|
try {
|
|
3527
|
-
const limit = req.query.limit ? Number.parseInt(req.query.limit, 10) :
|
|
3708
|
+
const limit = req.query.limit ? Number.parseInt(req.query.limit, 10) : 1e4;
|
|
3528
3709
|
const before = req.query.before ? Number.parseInt(req.query.before, 10) : Date.now();
|
|
3529
3710
|
const includeEmbedding = req.query.includeEmbedding === "true";
|
|
3530
3711
|
const agentId = req.query.agentId;
|
|
@@ -3545,8 +3726,8 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
|
|
|
3545
3726
|
// Or if the URL is stored in the metadata (check if it exists)
|
|
3546
3727
|
memory.metadata && "url" in memory.metadata && typeof memory.metadata.url === "string" && normalizedRequestUrls.includes(normalizeS3Url(memory.metadata.url))
|
|
3547
3728
|
);
|
|
3548
|
-
|
|
3549
|
-
`[
|
|
3729
|
+
logger7.debug(
|
|
3730
|
+
`[Document Processor] \u{1F50D} Filtered documents by URLs: ${fileUrls.length} URLs, found ${filteredMemories.length} matching documents`
|
|
3550
3731
|
);
|
|
3551
3732
|
}
|
|
3552
3733
|
const cleanMemories = includeEmbedding ? filteredMemories : filteredMemories.map((memory) => ({
|
|
@@ -3560,15 +3741,12 @@ async function getKnowledgeDocumentsHandler(req, res, runtime) {
|
|
|
3560
3741
|
totalRequested: fileUrls ? fileUrls.length : 0
|
|
3561
3742
|
});
|
|
3562
3743
|
} catch (error) {
|
|
3563
|
-
|
|
3744
|
+
logger7.error("[Document Processor] \u274C Error retrieving documents:", error);
|
|
3564
3745
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve documents", error.message);
|
|
3565
3746
|
}
|
|
3566
3747
|
}
|
|
3567
3748
|
async function deleteKnowledgeDocumentHandler(req, res, runtime) {
|
|
3568
|
-
|
|
3569
|
-
- path: ${req.path}
|
|
3570
|
-
- params: ${JSON.stringify(req.params)}
|
|
3571
|
-
`);
|
|
3749
|
+
logger7.debug(`[Document Processor] \u{1F5D1}\uFE0F DELETE request for document: ${req.params.knowledgeId}`);
|
|
3572
3750
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3573
3751
|
if (!service) {
|
|
3574
3752
|
return sendError(
|
|
@@ -3580,29 +3758,22 @@ async function deleteKnowledgeDocumentHandler(req, res, runtime) {
|
|
|
3580
3758
|
}
|
|
3581
3759
|
const knowledgeId = req.params.knowledgeId;
|
|
3582
3760
|
if (!knowledgeId || knowledgeId.length < 36) {
|
|
3583
|
-
|
|
3761
|
+
logger7.error(`[Document Processor] \u274C Invalid knowledge ID format: ${knowledgeId}`);
|
|
3584
3762
|
return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
|
|
3585
3763
|
}
|
|
3586
3764
|
try {
|
|
3587
3765
|
const typedKnowledgeId = knowledgeId;
|
|
3588
|
-
|
|
3589
|
-
`[KNOWLEDGE DELETE HANDLER] Attempting to delete document with ID: ${typedKnowledgeId}`
|
|
3590
|
-
);
|
|
3766
|
+
logger7.debug(`[Document Processor] \u{1F5D1}\uFE0F Deleting document: ${typedKnowledgeId}`);
|
|
3591
3767
|
await service.deleteMemory(typedKnowledgeId);
|
|
3592
|
-
|
|
3593
|
-
`[KNOWLEDGE DELETE HANDLER] Successfully deleted document with ID: ${typedKnowledgeId}`
|
|
3594
|
-
);
|
|
3768
|
+
logger7.info(`[Document Processor] \u2705 Successfully deleted document: ${typedKnowledgeId}`);
|
|
3595
3769
|
sendSuccess(res, null, 204);
|
|
3596
3770
|
} catch (error) {
|
|
3597
|
-
|
|
3771
|
+
logger7.error(`[Document Processor] \u274C Error deleting document ${knowledgeId}:`, error);
|
|
3598
3772
|
sendError(res, 500, "DELETE_ERROR", "Failed to delete document", error.message);
|
|
3599
3773
|
}
|
|
3600
3774
|
}
|
|
3601
3775
|
async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
3602
|
-
|
|
3603
|
-
- path: ${req.path}
|
|
3604
|
-
- params: ${JSON.stringify(req.params)}
|
|
3605
|
-
`);
|
|
3776
|
+
logger7.debug(`[Document Processor] \u{1F50D} GET request for document: ${req.params.knowledgeId}`);
|
|
3606
3777
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
3607
3778
|
if (!service) {
|
|
3608
3779
|
return sendError(
|
|
@@ -3614,15 +3785,15 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
|
3614
3785
|
}
|
|
3615
3786
|
const knowledgeId = req.params.knowledgeId;
|
|
3616
3787
|
if (!knowledgeId || knowledgeId.length < 36) {
|
|
3617
|
-
|
|
3788
|
+
logger7.error(`[Document Processor] \u274C Invalid knowledge ID format: ${knowledgeId}`);
|
|
3618
3789
|
return sendError(res, 400, "INVALID_ID", "Invalid Knowledge ID format");
|
|
3619
3790
|
}
|
|
3620
3791
|
try {
|
|
3621
|
-
|
|
3792
|
+
logger7.debug(`[Document Processor] \u{1F50D} Retrieving document: ${knowledgeId}`);
|
|
3622
3793
|
const agentId = req.query.agentId;
|
|
3623
3794
|
const memories = await service.getMemories({
|
|
3624
3795
|
tableName: "documents",
|
|
3625
|
-
count:
|
|
3796
|
+
count: 1e4
|
|
3626
3797
|
});
|
|
3627
3798
|
const typedKnowledgeId = knowledgeId;
|
|
3628
3799
|
const document = memories.find((memory) => memory.id === typedKnowledgeId);
|
|
@@ -3635,17 +3806,17 @@ async function getKnowledgeByIdHandler(req, res, runtime) {
|
|
|
3635
3806
|
};
|
|
3636
3807
|
sendSuccess(res, { document: cleanDocument });
|
|
3637
3808
|
} catch (error) {
|
|
3638
|
-
|
|
3809
|
+
logger7.error(`[Document Processor] \u274C Error retrieving document ${knowledgeId}:`, error);
|
|
3639
3810
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve document", error.message);
|
|
3640
3811
|
}
|
|
3641
3812
|
}
|
|
3642
3813
|
async function knowledgePanelHandler(req, res, runtime) {
|
|
3643
3814
|
const agentId = runtime.agentId;
|
|
3644
|
-
|
|
3815
|
+
logger7.debug(`[Document Processor] \u{1F310} Serving knowledge panel for agent ${agentId}`);
|
|
3645
3816
|
try {
|
|
3646
3817
|
const currentDir = path3.dirname(new URL(import.meta.url).pathname);
|
|
3647
3818
|
const frontendPath = path3.join(currentDir, "../dist/index.html");
|
|
3648
|
-
|
|
3819
|
+
logger7.debug(`[Document Processor] \u{1F310} Looking for frontend at: ${frontendPath}`);
|
|
3649
3820
|
if (fs3.existsSync(frontendPath)) {
|
|
3650
3821
|
const html = await fs3.promises.readFile(frontendPath, "utf8");
|
|
3651
3822
|
const injectedHtml = html.replace(
|
|
@@ -3679,10 +3850,10 @@ async function knowledgePanelHandler(req, res, runtime) {
|
|
|
3679
3850
|
}
|
|
3680
3851
|
}
|
|
3681
3852
|
} catch (manifestError) {
|
|
3682
|
-
|
|
3853
|
+
logger7.error("[Document Processor] \u274C Error reading manifest:", manifestError);
|
|
3683
3854
|
}
|
|
3684
3855
|
}
|
|
3685
|
-
|
|
3856
|
+
logger7.debug(`[Document Processor] \u{1F310} Using fallback with CSS: ${cssFile}, JS: ${jsFile}`);
|
|
3686
3857
|
const html = `
|
|
3687
3858
|
<!DOCTYPE html>
|
|
3688
3859
|
<html lang="en">
|
|
@@ -3716,15 +3887,13 @@ async function knowledgePanelHandler(req, res, runtime) {
|
|
|
3716
3887
|
res.end(html);
|
|
3717
3888
|
}
|
|
3718
3889
|
} catch (error) {
|
|
3719
|
-
|
|
3890
|
+
logger7.error("[Document Processor] \u274C Error serving frontend:", error);
|
|
3720
3891
|
sendError(res, 500, "FRONTEND_ERROR", "Failed to load knowledge panel", error.message);
|
|
3721
3892
|
}
|
|
3722
3893
|
}
|
|
3723
3894
|
async function frontendAssetHandler(req, res, runtime) {
|
|
3724
3895
|
try {
|
|
3725
|
-
|
|
3726
|
-
`[KNOWLEDGE ASSET HANDLER] Called with req.path: ${req.path}, req.originalUrl: ${req.originalUrl}, req.params: ${JSON.stringify(req.params)}`
|
|
3727
|
-
);
|
|
3896
|
+
logger7.debug(`[Document Processor] \u{1F310} Asset request: ${req.path}`);
|
|
3728
3897
|
const currentDir = path3.dirname(new URL(import.meta.url).pathname);
|
|
3729
3898
|
const assetRequestPath = req.path;
|
|
3730
3899
|
const assetsMarker = "/assets/";
|
|
@@ -3742,7 +3911,7 @@ async function frontendAssetHandler(req, res, runtime) {
|
|
|
3742
3911
|
);
|
|
3743
3912
|
}
|
|
3744
3913
|
const assetPath = path3.join(currentDir, "../dist/assets", assetName);
|
|
3745
|
-
|
|
3914
|
+
logger7.debug(`[Document Processor] \u{1F310} Serving asset: ${assetPath}`);
|
|
3746
3915
|
if (fs3.existsSync(assetPath)) {
|
|
3747
3916
|
const fileStream = fs3.createReadStream(assetPath);
|
|
3748
3917
|
let contentType = "application/octet-stream";
|
|
@@ -3757,7 +3926,7 @@ async function frontendAssetHandler(req, res, runtime) {
|
|
|
3757
3926
|
sendError(res, 404, "NOT_FOUND", `Asset not found: ${req.url}`);
|
|
3758
3927
|
}
|
|
3759
3928
|
} catch (error) {
|
|
3760
|
-
|
|
3929
|
+
logger7.error(`[Document Processor] \u274C Error serving asset ${req.url}:`, error);
|
|
3761
3930
|
sendError(res, 500, "ASSET_ERROR", `Failed to load asset ${req.url}`, error.message);
|
|
3762
3931
|
}
|
|
3763
3932
|
}
|
|
@@ -3771,8 +3940,8 @@ async function getKnowledgeChunksHandler(req, res, runtime) {
|
|
|
3771
3940
|
const documentsOnly = req.query.documentsOnly === "true";
|
|
3772
3941
|
const documents = await service.getMemories({
|
|
3773
3942
|
tableName: "documents",
|
|
3774
|
-
count:
|
|
3775
|
-
//
|
|
3943
|
+
count: 1e4,
|
|
3944
|
+
// High limit to get all documents
|
|
3776
3945
|
end: Date.now()
|
|
3777
3946
|
});
|
|
3778
3947
|
if (documentsOnly) {
|
|
@@ -3818,7 +3987,7 @@ async function getKnowledgeChunksHandler(req, res, runtime) {
|
|
|
3818
3987
|
}
|
|
3819
3988
|
});
|
|
3820
3989
|
} catch (error) {
|
|
3821
|
-
|
|
3990
|
+
logger7.error("[Document Processor] \u274C Error retrieving chunks:", error);
|
|
3822
3991
|
sendError(res, 500, "RETRIEVAL_ERROR", "Failed to retrieve knowledge chunks", error.message);
|
|
3823
3992
|
}
|
|
3824
3993
|
}
|
|
@@ -3840,15 +4009,15 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
3840
4009
|
return sendError(res, 400, "INVALID_QUERY", "Search query cannot be empty");
|
|
3841
4010
|
}
|
|
3842
4011
|
if (req.query.threshold && (parsedThreshold < 0 || parsedThreshold > 1)) {
|
|
3843
|
-
|
|
3844
|
-
`[
|
|
4012
|
+
logger7.debug(
|
|
4013
|
+
`[Document Processor] \u{1F50D} Threshold value ${parsedThreshold} was clamped to ${matchThreshold}`
|
|
3845
4014
|
);
|
|
3846
4015
|
}
|
|
3847
4016
|
if (req.query.limit && (parsedLimit < 1 || parsedLimit > 100)) {
|
|
3848
|
-
|
|
4017
|
+
logger7.debug(`[Document Processor] \u{1F50D} Limit value ${parsedLimit} was clamped to ${limit}`);
|
|
3849
4018
|
}
|
|
3850
|
-
|
|
3851
|
-
`[
|
|
4019
|
+
logger7.debug(
|
|
4020
|
+
`[Document Processor] \u{1F50D} Searching: "${searchText}" (threshold: ${matchThreshold}, limit: ${limit})`
|
|
3852
4021
|
);
|
|
3853
4022
|
const embedding = await runtime.useModel(ModelType4.TEXT_EMBEDDING, {
|
|
3854
4023
|
text: searchText
|
|
@@ -3874,7 +4043,7 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
3874
4043
|
documentFilename = document.metadata.filename || documentFilename;
|
|
3875
4044
|
}
|
|
3876
4045
|
} catch (e) {
|
|
3877
|
-
|
|
4046
|
+
logger7.debug(`Could not fetch document ${documentId} for fragment`);
|
|
3878
4047
|
}
|
|
3879
4048
|
}
|
|
3880
4049
|
return {
|
|
@@ -3889,8 +4058,8 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
3889
4058
|
};
|
|
3890
4059
|
})
|
|
3891
4060
|
);
|
|
3892
|
-
|
|
3893
|
-
`[
|
|
4061
|
+
logger7.info(
|
|
4062
|
+
`[Document Processor] \u{1F50D} Found ${enhancedResults.length} results for: "${searchText}"`
|
|
3894
4063
|
);
|
|
3895
4064
|
sendSuccess(res, {
|
|
3896
4065
|
query: searchText,
|
|
@@ -3899,29 +4068,23 @@ async function searchKnowledgeHandler(req, res, runtime) {
|
|
|
3899
4068
|
count: enhancedResults.length
|
|
3900
4069
|
});
|
|
3901
4070
|
} catch (error) {
|
|
3902
|
-
|
|
4071
|
+
logger7.error("[Document Processor] \u274C Error searching knowledge:", error);
|
|
3903
4072
|
sendError(res, 500, "SEARCH_ERROR", "Failed to search knowledge", error.message);
|
|
3904
4073
|
}
|
|
3905
4074
|
}
|
|
3906
|
-
async function
|
|
3907
|
-
|
|
3908
|
-
|
|
3909
|
-
|
|
3910
|
-
|
|
3911
|
-
|
|
3912
|
-
|
|
3913
|
-
|
|
3914
|
-
|
|
3915
|
-
|
|
3916
|
-
try {
|
|
3917
|
-
logger6.debug("[KNOWLEDGE UPLOAD] Using files parsed by global middleware");
|
|
3918
|
-
await uploadKnowledgeHandler(req, res, runtime);
|
|
3919
|
-
} catch (handlerError) {
|
|
3920
|
-
logger6.error("[KNOWLEDGE UPLOAD] Handler error:", handlerError);
|
|
3921
|
-
if (!res.headersSent) {
|
|
3922
|
-
sendError(res, 500, "HANDLER_ERROR", "Failed to process upload");
|
|
4075
|
+
async function uploadKnowledgeWithMulter(req, res, runtime) {
|
|
4076
|
+
const upload = createUploadMiddleware(runtime);
|
|
4077
|
+
const uploadArray = upload.array(
|
|
4078
|
+
"files",
|
|
4079
|
+
parseInt(runtime.getSetting("KNOWLEDGE_MAX_FILES") || "10")
|
|
4080
|
+
);
|
|
4081
|
+
uploadArray(req, res, (err) => {
|
|
4082
|
+
if (err) {
|
|
4083
|
+
logger7.error("[Document Processor] \u274C File upload error:", err);
|
|
4084
|
+
return sendError(res, 400, "UPLOAD_ERROR", err.message);
|
|
3923
4085
|
}
|
|
3924
|
-
|
|
4086
|
+
uploadKnowledgeHandler(req, res, runtime);
|
|
4087
|
+
});
|
|
3925
4088
|
}
|
|
3926
4089
|
var knowledgeRoutes = [
|
|
3927
4090
|
{
|
|
@@ -3939,7 +4102,7 @@ var knowledgeRoutes = [
|
|
|
3939
4102
|
{
|
|
3940
4103
|
type: "POST",
|
|
3941
4104
|
path: "/documents",
|
|
3942
|
-
handler:
|
|
4105
|
+
handler: uploadKnowledgeWithMulter
|
|
3943
4106
|
},
|
|
3944
4107
|
{
|
|
3945
4108
|
type: "GET",
|
|
@@ -3980,59 +4143,86 @@ var knowledgePlugin = {
|
|
|
3980
4143
|
CTX_KNOWLEDGE_ENABLED: "false"
|
|
3981
4144
|
},
|
|
3982
4145
|
async init(config, runtime) {
|
|
3983
|
-
|
|
4146
|
+
logger8.info("Initializing Knowledge Plugin...");
|
|
3984
4147
|
try {
|
|
3985
|
-
|
|
4148
|
+
logger8.info("Validating model configuration for Knowledge plugin...");
|
|
4149
|
+
logger8.info(`[Knowledge Plugin] INIT DEBUG:`);
|
|
4150
|
+
logger8.info(`[Knowledge Plugin] - Runtime available: ${!!runtime}`);
|
|
4151
|
+
logger8.info(
|
|
4152
|
+
`[Knowledge Plugin] - process.env.CTX_KNOWLEDGE_ENABLED: '${process.env.CTX_KNOWLEDGE_ENABLED}'`
|
|
4153
|
+
);
|
|
4154
|
+
logger8.info(
|
|
4155
|
+
`[Knowledge Plugin] - config.CTX_KNOWLEDGE_ENABLED: '${config.CTX_KNOWLEDGE_ENABLED}'`
|
|
4156
|
+
);
|
|
4157
|
+
if (runtime) {
|
|
4158
|
+
logger8.info(
|
|
4159
|
+
`[Knowledge Plugin] - runtime.getSetting('CTX_KNOWLEDGE_ENABLED'): '${runtime.getSetting("CTX_KNOWLEDGE_ENABLED")}'`
|
|
4160
|
+
);
|
|
4161
|
+
}
|
|
3986
4162
|
const validatedConfig = validateModelConfig(runtime);
|
|
3987
|
-
|
|
3988
|
-
|
|
3989
|
-
|
|
3990
|
-
|
|
4163
|
+
const ctxEnabledFromEnv = process.env.CTX_KNOWLEDGE_ENABLED === "true" || process.env.CTX_KNOWLEDGE_ENABLED === "True";
|
|
4164
|
+
const ctxEnabledFromConfig = config.CTX_KNOWLEDGE_ENABLED === "true" || config.CTX_KNOWLEDGE_ENABLED === "True";
|
|
4165
|
+
const ctxEnabledFromValidated = validatedConfig.CTX_KNOWLEDGE_ENABLED;
|
|
4166
|
+
const ctxEnabledFromRuntime = runtime ? runtime.getSetting("CTX_KNOWLEDGE_ENABLED") === "true" || runtime.getSetting("CTX_KNOWLEDGE_ENABLED") === "True" : false;
|
|
4167
|
+
const finalCtxEnabled = ctxEnabledFromEnv || ctxEnabledFromConfig || ctxEnabledFromValidated || ctxEnabledFromRuntime;
|
|
4168
|
+
logger8.info(`[Knowledge Plugin] CTX_KNOWLEDGE_ENABLED sources:`);
|
|
4169
|
+
logger8.info(`[Knowledge Plugin] - From env: ${ctxEnabledFromEnv}`);
|
|
4170
|
+
logger8.info(`[Knowledge Plugin] - From config: ${ctxEnabledFromConfig}`);
|
|
4171
|
+
logger8.info(`[Knowledge Plugin] - From validated: ${ctxEnabledFromValidated}`);
|
|
4172
|
+
logger8.info(`[Knowledge Plugin] - From runtime: ${ctxEnabledFromRuntime}`);
|
|
4173
|
+
logger8.info(`[Knowledge Plugin] - FINAL RESULT: ${finalCtxEnabled}`);
|
|
4174
|
+
if (finalCtxEnabled) {
|
|
4175
|
+
logger8.info("Running in Contextual Knowledge mode with text generation capabilities.");
|
|
4176
|
+
logger8.info(
|
|
4177
|
+
`Using ${validatedConfig.EMBEDDING_PROVIDER || "auto-detected"} for embeddings and ${validatedConfig.TEXT_PROVIDER || process.env.TEXT_PROVIDER} for text generation.`
|
|
3991
4178
|
);
|
|
4179
|
+
logger8.info(`Text model: ${validatedConfig.TEXT_MODEL || process.env.TEXT_MODEL}`);
|
|
3992
4180
|
} else {
|
|
3993
4181
|
const usingPluginOpenAI = !process.env.EMBEDDING_PROVIDER;
|
|
4182
|
+
logger8.warn(
|
|
4183
|
+
"Running in Basic Embedding mode - documents will NOT be enriched with context!"
|
|
4184
|
+
);
|
|
4185
|
+
logger8.info("To enable contextual enrichment:");
|
|
4186
|
+
logger8.info(" - Set CTX_KNOWLEDGE_ENABLED=true");
|
|
4187
|
+
logger8.info(" - Configure TEXT_PROVIDER (anthropic/openai/openrouter/google)");
|
|
4188
|
+
logger8.info(" - Configure TEXT_MODEL and API key");
|
|
3994
4189
|
if (usingPluginOpenAI) {
|
|
3995
|
-
|
|
3996
|
-
"Running in Basic Embedding mode with auto-detected configuration from plugin-openai."
|
|
3997
|
-
);
|
|
4190
|
+
logger8.info("Using auto-detected configuration from plugin-openai for embeddings.");
|
|
3998
4191
|
} else {
|
|
3999
|
-
|
|
4000
|
-
|
|
4192
|
+
logger8.info(
|
|
4193
|
+
`Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}.`
|
|
4001
4194
|
);
|
|
4002
4195
|
}
|
|
4003
|
-
logger7.info(
|
|
4004
|
-
`Using ${validatedConfig.EMBEDDING_PROVIDER} for embeddings with ${validatedConfig.TEXT_EMBEDDING_MODEL}.`
|
|
4005
|
-
);
|
|
4006
4196
|
}
|
|
4007
|
-
|
|
4197
|
+
logger8.info("Model configuration validated successfully.");
|
|
4008
4198
|
if (runtime) {
|
|
4009
|
-
|
|
4199
|
+
logger8.info(`Knowledge Plugin initialized for agent: ${runtime.agentId}`);
|
|
4010
4200
|
const loadDocsOnStartup = config.LOAD_DOCS_ON_STARTUP === "true" || process.env.LOAD_DOCS_ON_STARTUP === "true";
|
|
4011
4201
|
if (loadDocsOnStartup) {
|
|
4012
|
-
|
|
4202
|
+
logger8.info("LOAD_DOCS_ON_STARTUP is enabled. Scheduling document loading...");
|
|
4013
4203
|
setTimeout(async () => {
|
|
4014
4204
|
try {
|
|
4015
4205
|
const service = runtime.getService(KnowledgeService.serviceType);
|
|
4016
4206
|
if (service instanceof KnowledgeService) {
|
|
4017
|
-
const { loadDocsFromPath: loadDocsFromPath2 } = await import("./docs-loader-
|
|
4207
|
+
const { loadDocsFromPath: loadDocsFromPath2 } = await import("./docs-loader-5H4HRYEE.js");
|
|
4018
4208
|
const result = await loadDocsFromPath2(service, runtime.agentId);
|
|
4019
4209
|
if (result.successful > 0) {
|
|
4020
|
-
|
|
4210
|
+
logger8.info(`Loaded ${result.successful} documents from docs folder on startup`);
|
|
4021
4211
|
}
|
|
4022
4212
|
}
|
|
4023
4213
|
} catch (error) {
|
|
4024
|
-
|
|
4214
|
+
logger8.error("Error loading documents on startup:", error);
|
|
4025
4215
|
}
|
|
4026
4216
|
}, 5e3);
|
|
4027
4217
|
} else {
|
|
4028
|
-
|
|
4218
|
+
logger8.info("LOAD_DOCS_ON_STARTUP is not enabled. Skipping automatic document loading.");
|
|
4029
4219
|
}
|
|
4030
4220
|
}
|
|
4031
|
-
|
|
4221
|
+
logger8.info(
|
|
4032
4222
|
"Knowledge Plugin initialized. Frontend panel should be discoverable via its public route."
|
|
4033
4223
|
);
|
|
4034
4224
|
} catch (error) {
|
|
4035
|
-
|
|
4225
|
+
logger8.error("Failed to initialize Knowledge plugin:", error);
|
|
4036
4226
|
throw error;
|
|
4037
4227
|
}
|
|
4038
4228
|
},
|