illuma-agents 1.0.16 → 1.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +3 -1
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +18 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +79 -32
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +5 -3
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +1 -0
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +10 -1
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +7 -8
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +15 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +11 -6
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +16 -8
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +9 -2
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/tools.cjs +17 -10
- package/dist/cjs/messages/tools.cjs.map +1 -1
- package/dist/cjs/stream.cjs +30 -16
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs +209 -47
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +73 -3
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +1 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +3 -1
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/utils/contextAnalytics.cjs +66 -0
- package/dist/cjs/utils/contextAnalytics.cjs.map +1 -0
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/toonFormat.cjs +388 -0
- package/dist/cjs/utils/toonFormat.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +3 -1
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +19 -1
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +81 -34
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +5 -3
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +1 -0
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +10 -1
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +7 -8
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +4 -2
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +11 -6
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +18 -10
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +10 -3
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/tools.mjs +19 -12
- package/dist/esm/messages/tools.mjs.map +1 -1
- package/dist/esm/stream.mjs +30 -16
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ProgrammaticToolCalling.mjs +208 -48
- package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +73 -3
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +1 -0
- package/dist/esm/tools/handlers.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +3 -1
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/utils/contextAnalytics.mjs +64 -0
- package/dist/esm/utils/contextAnalytics.mjs.map +1 -0
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/toonFormat.mjs +381 -0
- package/dist/esm/utils/toonFormat.mjs.map +1 -0
- package/dist/types/common/enum.d.ts +17 -0
- package/dist/types/graphs/Graph.d.ts +8 -0
- package/dist/types/tools/ProgrammaticToolCalling.d.ts +19 -0
- package/dist/types/types/tools.d.ts +3 -1
- package/dist/types/utils/contextAnalytics.d.ts +37 -0
- package/dist/types/utils/index.d.ts +2 -0
- package/dist/types/utils/toonFormat.d.ts +111 -0
- package/package.json +3 -2
- package/src/agents/AgentContext.ts +28 -20
- package/src/common/enum.ts +18 -0
- package/src/graphs/Graph.ts +152 -62
- package/src/llm/bedrock/__tests__/bedrock-caching.test.ts +495 -473
- package/src/llm/bedrock/index.ts +47 -35
- package/src/llm/openrouter/index.ts +11 -1
- package/src/llm/vertexai/index.ts +9 -10
- package/src/messages/cache.ts +104 -55
- package/src/messages/core.ts +29 -19
- package/src/messages/format.ts +14 -3
- package/src/messages/tools.ts +20 -13
- package/src/scripts/simple.ts +1 -1
- package/src/specs/emergency-prune.test.ts +407 -355
- package/src/stream.ts +28 -20
- package/src/tools/ProgrammaticToolCalling.ts +246 -52
- package/src/tools/ToolNode.ts +78 -5
- package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +155 -0
- package/src/tools/search/jina-reranker.test.ts +32 -28
- package/src/tools/search/search.ts +3 -1
- package/src/tools/search/tool.ts +16 -7
- package/src/types/tools.ts +3 -1
- package/src/utils/contextAnalytics.ts +103 -0
- package/src/utils/index.ts +2 -0
- package/src/utils/llmConfig.ts +8 -1
- package/src/utils/run.ts +5 -4
- package/src/utils/toonFormat.ts +475 -0
package/src/graphs/Graph.ts
CHANGED
|
@@ -36,6 +36,7 @@ import {
|
|
|
36
36
|
GraphEvents,
|
|
37
37
|
Providers,
|
|
38
38
|
StepTypes,
|
|
39
|
+
MessageTypes,
|
|
39
40
|
} from '@/common';
|
|
40
41
|
import {
|
|
41
42
|
formatAnthropicArtifactContent,
|
|
@@ -56,6 +57,10 @@ import {
|
|
|
56
57
|
joinKeys,
|
|
57
58
|
sleep,
|
|
58
59
|
} from '@/utils';
|
|
60
|
+
import {
|
|
61
|
+
buildContextAnalytics,
|
|
62
|
+
type ContextAnalytics,
|
|
63
|
+
} from '@/utils/contextAnalytics';
|
|
59
64
|
import { getChatModelClass, manualToolStreamProviders } from '@/llm/providers';
|
|
60
65
|
import { ToolNode as CustomToolNode, toolsCondition } from '@/tools/ToolNode';
|
|
61
66
|
import { ChatOpenAI, AzureChatOpenAI } from '@/llm/openai';
|
|
@@ -212,7 +217,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
212
217
|
/**
|
|
213
218
|
* Estimates a human-friendly description of the conversation timeframe based on message count.
|
|
214
219
|
* Uses rough heuristics to provide context about how much history is available.
|
|
215
|
-
*
|
|
220
|
+
*
|
|
216
221
|
* @param messageCount - Number of messages in the remaining context
|
|
217
222
|
* @returns A friendly description like "the last few minutes", "the past hour", etc.
|
|
218
223
|
*/
|
|
@@ -222,7 +227,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
222
227
|
// - Normal chat: ~10-15 messages per hour
|
|
223
228
|
// - Slow/thoughtful chat: ~5-8 messages per hour
|
|
224
229
|
// We use a middle estimate of ~12 messages per hour
|
|
225
|
-
|
|
230
|
+
|
|
226
231
|
if (messageCount <= 5) {
|
|
227
232
|
return 'just the last few exchanges';
|
|
228
233
|
} else if (messageCount <= 15) {
|
|
@@ -445,6 +450,17 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
445
450
|
return primaryContext.getContextBreakdown();
|
|
446
451
|
}
|
|
447
452
|
|
|
453
|
+
/**
|
|
454
|
+
* Get the latest context analytics from the graph.
|
|
455
|
+
* Returns metrics like utilization %, TOON stats, message breakdown.
|
|
456
|
+
*/
|
|
457
|
+
getContextAnalytics(): ContextAnalytics | null {
|
|
458
|
+
return this.lastContextAnalytics ?? null;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
/** Store the latest context analytics for retrieval after run */
|
|
462
|
+
private lastContextAnalytics: ContextAnalytics | null = null;
|
|
463
|
+
|
|
448
464
|
/* Graph */
|
|
449
465
|
|
|
450
466
|
createSystemRunnable({
|
|
@@ -699,7 +715,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
699
715
|
content: `[SESSION_CONTEXT]\n${agentContext.dynamicContext}`,
|
|
700
716
|
});
|
|
701
717
|
const ackMessage = new AIMessageChunk({
|
|
702
|
-
content:
|
|
718
|
+
content:
|
|
719
|
+
'Understood. I have noted the session context including the current date/time (CST) and will apply it appropriately.',
|
|
703
720
|
});
|
|
704
721
|
messages = [dynamicContextMessage, ackMessage, ...messages];
|
|
705
722
|
}
|
|
@@ -732,17 +749,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
732
749
|
this.config = config;
|
|
733
750
|
|
|
734
751
|
let messagesToUse = messages;
|
|
735
|
-
|
|
736
|
-
// Debug logging for pruneMessages creation conditions
|
|
737
|
-
const hasPruneMessages = !!agentContext.pruneMessages;
|
|
738
|
-
const hasTokenCounter = !!agentContext.tokenCounter;
|
|
739
|
-
const hasMaxContextTokens = agentContext.maxContextTokens != null;
|
|
740
|
-
const hasIndex0TokenCount = agentContext.indexTokenCountMap[0] != null;
|
|
741
|
-
|
|
742
|
-
if (!hasPruneMessages && hasTokenCounter && hasMaxContextTokens && !hasIndex0TokenCount) {
|
|
743
|
-
console.warn('[Graph] Cannot create pruneMessages - missing indexTokenCountMap[0]. Token map keys:', Object.keys(agentContext.indexTokenCountMap));
|
|
744
|
-
}
|
|
745
|
-
|
|
752
|
+
|
|
746
753
|
if (
|
|
747
754
|
!agentContext.pruneMessages &&
|
|
748
755
|
agentContext.tokenCounter &&
|
|
@@ -771,6 +778,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
771
778
|
indexTokenCountMap: agentContext.indexTokenCountMap,
|
|
772
779
|
});
|
|
773
780
|
}
|
|
781
|
+
|
|
774
782
|
if (agentContext.pruneMessages) {
|
|
775
783
|
const { context, indexTokenCountMap } = agentContext.pruneMessages({
|
|
776
784
|
messages,
|
|
@@ -798,13 +806,14 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
798
806
|
if (
|
|
799
807
|
agentContext.provider === Providers.BEDROCK &&
|
|
800
808
|
lastMessageX instanceof AIMessageChunk &&
|
|
801
|
-
lastMessageY
|
|
809
|
+
lastMessageY?.getType() === MessageTypes.TOOL &&
|
|
802
810
|
typeof lastMessageX.content === 'string'
|
|
803
811
|
) {
|
|
804
812
|
finalMessages[finalMessages.length - 2].content = '';
|
|
805
813
|
}
|
|
806
814
|
|
|
807
|
-
|
|
815
|
+
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
816
|
+
const isLatestToolMessage = lastMessageY?.getType() === MessageTypes.TOOL;
|
|
808
817
|
|
|
809
818
|
if (
|
|
810
819
|
isLatestToolMessage &&
|
|
@@ -820,6 +829,33 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
820
829
|
formatArtifactPayload(finalMessages);
|
|
821
830
|
}
|
|
822
831
|
|
|
832
|
+
/**
|
|
833
|
+
* Handle edge case: when switching from a non-thinking agent to a thinking-enabled agent,
|
|
834
|
+
* convert AI messages with tool calls to HumanMessages to avoid thinking block requirements.
|
|
835
|
+
* This is required by Anthropic/Bedrock when thinking is enabled.
|
|
836
|
+
*
|
|
837
|
+
* IMPORTANT: This MUST happen BEFORE cache control is applied.
|
|
838
|
+
* If we add cachePoint to an AI message first, then convert that AI message to a HumanMessage,
|
|
839
|
+
* the cachePoint is lost. By converting first, we ensure cache control is applied to the
|
|
840
|
+
* final message structure that will be sent to the API.
|
|
841
|
+
*/
|
|
842
|
+
const isAnthropicWithThinking =
|
|
843
|
+
(agentContext.provider === Providers.ANTHROPIC &&
|
|
844
|
+
(agentContext.clientOptions as t.AnthropicClientOptions).thinking !=
|
|
845
|
+
null) ||
|
|
846
|
+
(agentContext.provider === Providers.BEDROCK &&
|
|
847
|
+
(agentContext.clientOptions as t.BedrockAnthropicInput)
|
|
848
|
+
.additionalModelRequestFields?.['thinking'] != null);
|
|
849
|
+
|
|
850
|
+
if (isAnthropicWithThinking) {
|
|
851
|
+
finalMessages = ensureThinkingBlockInMessages(
|
|
852
|
+
finalMessages,
|
|
853
|
+
agentContext.provider
|
|
854
|
+
);
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
// Apply cache control AFTER thinking block handling to ensure cachePoints aren't lost
|
|
858
|
+
// when AI messages are converted to HumanMessages
|
|
823
859
|
if (agentContext.provider === Providers.ANTHROPIC) {
|
|
824
860
|
const anthropicOptions = agentContext.clientOptions as
|
|
825
861
|
| t.AnthropicClientOptions
|
|
@@ -841,32 +877,15 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
841
877
|
// Both Claude and Nova models support cachePoint in system and messages
|
|
842
878
|
// (Llama, Titan, and other models do NOT support cachePoint)
|
|
843
879
|
const modelId = bedrockOptions?.model?.toLowerCase() ?? '';
|
|
844
|
-
const supportsCaching =
|
|
880
|
+
const supportsCaching =
|
|
881
|
+
modelId.includes('claude') ||
|
|
882
|
+
modelId.includes('anthropic') ||
|
|
883
|
+
modelId.includes('nova');
|
|
845
884
|
if (bedrockOptions?.promptCache === true && supportsCaching) {
|
|
846
885
|
finalMessages = addBedrockCacheControl<BaseMessage>(finalMessages);
|
|
847
886
|
}
|
|
848
887
|
}
|
|
849
888
|
|
|
850
|
-
/**
|
|
851
|
-
* Handle edge case: when switching from a non-thinking agent to a thinking-enabled agent,
|
|
852
|
-
* convert AI messages with tool calls to HumanMessages to avoid thinking block requirements.
|
|
853
|
-
* This is required by Anthropic/Bedrock when thinking is enabled.
|
|
854
|
-
*/
|
|
855
|
-
const isAnthropicWithThinking =
|
|
856
|
-
(agentContext.provider === Providers.ANTHROPIC &&
|
|
857
|
-
(agentContext.clientOptions as t.AnthropicClientOptions).thinking !=
|
|
858
|
-
null) ||
|
|
859
|
-
(agentContext.provider === Providers.BEDROCK &&
|
|
860
|
-
(agentContext.clientOptions as t.BedrockAnthropicInput)
|
|
861
|
-
.additionalModelRequestFields?.['thinking'] != null);
|
|
862
|
-
|
|
863
|
-
if (isAnthropicWithThinking) {
|
|
864
|
-
finalMessages = ensureThinkingBlockInMessages(
|
|
865
|
-
finalMessages,
|
|
866
|
-
agentContext.provider
|
|
867
|
-
);
|
|
868
|
-
}
|
|
869
|
-
|
|
870
889
|
if (
|
|
871
890
|
agentContext.lastStreamCall != null &&
|
|
872
891
|
agentContext.streamBuffer != null
|
|
@@ -896,6 +915,42 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
896
915
|
);
|
|
897
916
|
}
|
|
898
917
|
|
|
918
|
+
// Get model info for analytics
|
|
919
|
+
const bedrockOpts = agentContext.clientOptions as
|
|
920
|
+
| t.BedrockAnthropicClientOptions
|
|
921
|
+
| undefined;
|
|
922
|
+
const modelId =
|
|
923
|
+
bedrockOpts?.model ||
|
|
924
|
+
(agentContext.clientOptions as t.AnthropicClientOptions | undefined)
|
|
925
|
+
?.modelName;
|
|
926
|
+
const thinkingConfig =
|
|
927
|
+
bedrockOpts?.additionalModelRequestFields?.['thinking'] ||
|
|
928
|
+
(agentContext.clientOptions as t.AnthropicClientOptions | undefined)
|
|
929
|
+
?.thinking;
|
|
930
|
+
|
|
931
|
+
// Build and emit context analytics for traces
|
|
932
|
+
const contextAnalytics = buildContextAnalytics(finalMessages, {
|
|
933
|
+
tokenCounter: agentContext.tokenCounter,
|
|
934
|
+
maxContextTokens: agentContext.maxContextTokens,
|
|
935
|
+
instructionTokens: agentContext.instructionTokens,
|
|
936
|
+
indexTokenCountMap: agentContext.indexTokenCountMap,
|
|
937
|
+
});
|
|
938
|
+
|
|
939
|
+
// Store for retrieval via getContextAnalytics() after run completes
|
|
940
|
+
this.lastContextAnalytics = contextAnalytics;
|
|
941
|
+
|
|
942
|
+
await safeDispatchCustomEvent(
|
|
943
|
+
GraphEvents.ON_CONTEXT_ANALYTICS,
|
|
944
|
+
{
|
|
945
|
+
provider: agentContext.provider,
|
|
946
|
+
model: modelId,
|
|
947
|
+
thinkingEnabled: thinkingConfig != null,
|
|
948
|
+
cacheEnabled: bedrockOpts?.promptCache === true,
|
|
949
|
+
analytics: contextAnalytics,
|
|
950
|
+
},
|
|
951
|
+
config
|
|
952
|
+
);
|
|
953
|
+
|
|
899
954
|
try {
|
|
900
955
|
result = await this.attemptInvoke(
|
|
901
956
|
{
|
|
@@ -908,8 +963,9 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
908
963
|
);
|
|
909
964
|
} catch (primaryError) {
|
|
910
965
|
// Check if this is a "input too long" error from Bedrock/Anthropic
|
|
911
|
-
const errorMessage =
|
|
912
|
-
|
|
966
|
+
const errorMessage =
|
|
967
|
+
(primaryError as Error).message.toLowerCase() ?? '';
|
|
968
|
+
const isInputTooLongError =
|
|
913
969
|
errorMessage.includes('too long') ||
|
|
914
970
|
errorMessage.includes('input is too long') ||
|
|
915
971
|
errorMessage.includes('context length') ||
|
|
@@ -919,41 +975,50 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
919
975
|
|
|
920
976
|
// Log when we detect the error
|
|
921
977
|
if (isInputTooLongError) {
|
|
922
|
-
console.warn(
|
|
978
|
+
console.warn(
|
|
979
|
+
'[Graph] Detected input too long error:',
|
|
980
|
+
errorMessage.substring(0, 200)
|
|
981
|
+
);
|
|
923
982
|
console.warn('[Graph] Checking emergency pruning conditions:', {
|
|
924
983
|
hasPruneMessages: !!agentContext.pruneMessages,
|
|
925
984
|
hasTokenCounter: !!agentContext.tokenCounter,
|
|
926
985
|
maxContextTokens: agentContext.maxContextTokens,
|
|
927
|
-
indexTokenMapKeys: Object.keys(agentContext.indexTokenCountMap)
|
|
986
|
+
indexTokenMapKeys: Object.keys(agentContext.indexTokenCountMap)
|
|
987
|
+
.length,
|
|
928
988
|
});
|
|
929
989
|
}
|
|
930
990
|
|
|
931
991
|
// If input too long and we have pruning capability OR tokenCounter, retry with progressively more aggressive pruning
|
|
932
992
|
// Note: We can create emergency pruneMessages dynamically if we have tokenCounter and maxContextTokens
|
|
933
|
-
const canPrune =
|
|
993
|
+
const canPrune =
|
|
994
|
+
agentContext.tokenCounter && agentContext.maxContextTokens;
|
|
934
995
|
if (isInputTooLongError && canPrune) {
|
|
935
996
|
// Progressive reduction: 50% -> 25% -> 10% of original context
|
|
936
997
|
const reductionLevels = [0.5, 0.25, 0.1];
|
|
937
|
-
|
|
998
|
+
|
|
938
999
|
for (const reductionFactor of reductionLevels) {
|
|
939
1000
|
if (result) break; // Exit if we got a result
|
|
940
|
-
|
|
941
|
-
const reducedMaxTokens = Math.floor(
|
|
1001
|
+
|
|
1002
|
+
const reducedMaxTokens = Math.floor(
|
|
1003
|
+
agentContext.maxContextTokens! * reductionFactor
|
|
1004
|
+
);
|
|
942
1005
|
console.warn(
|
|
943
1006
|
`[Graph] Input too long. Retrying with ${reductionFactor * 100}% context (${reducedMaxTokens} tokens)...`
|
|
944
1007
|
);
|
|
945
|
-
|
|
1008
|
+
|
|
946
1009
|
// Build fresh indexTokenCountMap if missing/incomplete
|
|
947
1010
|
// This is needed when messages were dynamically added without updating the token map
|
|
948
1011
|
let tokenMapForPruning = agentContext.indexTokenCountMap;
|
|
949
1012
|
if (Object.keys(tokenMapForPruning).length < messages.length) {
|
|
950
|
-
console.warn(
|
|
1013
|
+
console.warn(
|
|
1014
|
+
'[Graph] Building fresh token count map for emergency pruning...'
|
|
1015
|
+
);
|
|
951
1016
|
tokenMapForPruning = {};
|
|
952
1017
|
for (let i = 0; i < messages.length; i++) {
|
|
953
1018
|
tokenMapForPruning[i] = agentContext.tokenCounter!(messages[i]);
|
|
954
1019
|
}
|
|
955
1020
|
}
|
|
956
|
-
|
|
1021
|
+
|
|
957
1022
|
const emergencyPrune = createPruneMessages({
|
|
958
1023
|
startIndex: this.startIndex,
|
|
959
1024
|
provider: agentContext.provider,
|
|
@@ -970,15 +1035,18 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
970
1035
|
|
|
971
1036
|
// Skip if we can't fit any messages
|
|
972
1037
|
if (reducedMessages.length === 0) {
|
|
973
|
-
console.warn(
|
|
1038
|
+
console.warn(
|
|
1039
|
+
`[Graph] Cannot fit any messages at ${reductionFactor * 100}% reduction, trying next level...`
|
|
1040
|
+
);
|
|
974
1041
|
continue;
|
|
975
1042
|
}
|
|
976
1043
|
|
|
977
1044
|
// Calculate how many messages were pruned and estimate context timeframe
|
|
978
1045
|
const prunedCount = finalMessages.length - reducedMessages.length;
|
|
979
1046
|
const remainingCount = reducedMessages.length;
|
|
980
|
-
const estimatedContextDescription =
|
|
981
|
-
|
|
1047
|
+
const estimatedContextDescription =
|
|
1048
|
+
this.getContextTimeframeDescription(remainingCount);
|
|
1049
|
+
|
|
982
1050
|
// Inject a personalized context message to inform the agent about pruning
|
|
983
1051
|
const pruneNoticeMessage = new HumanMessage({
|
|
984
1052
|
content: `[CONTEXT NOTICE]
|
|
@@ -986,11 +1054,11 @@ Our conversation has grown quite long, so I've focused on ${estimatedContextDesc
|
|
|
986
1054
|
|
|
987
1055
|
If I seem to be missing something we discussed earlier, just give me a quick reminder and I'll pick right back up! I'm still fully engaged and ready to help with whatever you need.`,
|
|
988
1056
|
});
|
|
989
|
-
|
|
1057
|
+
|
|
990
1058
|
// Insert the notice after the system message (if any) but before conversation
|
|
991
1059
|
const hasSystemMessage = reducedMessages[0]?.getType() === 'system';
|
|
992
1060
|
const insertIndex = hasSystemMessage ? 1 : 0;
|
|
993
|
-
|
|
1061
|
+
|
|
994
1062
|
// Create new array with the pruning notice
|
|
995
1063
|
const messagesWithNotice = [
|
|
996
1064
|
...reducedMessages.slice(0, insertIndex),
|
|
@@ -1002,15 +1070,29 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
|
|
|
1002
1070
|
? formatContentStrings(messagesWithNotice)
|
|
1003
1071
|
: messagesWithNotice;
|
|
1004
1072
|
|
|
1005
|
-
// Apply
|
|
1073
|
+
// Apply thinking block handling first (before cache control)
|
|
1074
|
+
// This ensures AI+Tool sequences are converted to HumanMessages
|
|
1075
|
+
// before we add cache points that could be lost in the conversion
|
|
1076
|
+
if (isAnthropicWithThinking) {
|
|
1077
|
+
retryMessages = ensureThinkingBlockInMessages(
|
|
1078
|
+
retryMessages,
|
|
1079
|
+
agentContext.provider
|
|
1080
|
+
);
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
// Apply Bedrock cache control if needed (after thinking block handling)
|
|
1006
1084
|
if (agentContext.provider === Providers.BEDROCK) {
|
|
1007
1085
|
const bedrockOptions = agentContext.clientOptions as
|
|
1008
1086
|
| t.BedrockAnthropicClientOptions
|
|
1009
1087
|
| undefined;
|
|
1010
1088
|
const modelId = bedrockOptions?.model?.toLowerCase() ?? '';
|
|
1011
|
-
const supportsCaching =
|
|
1089
|
+
const supportsCaching =
|
|
1090
|
+
modelId.includes('claude') ||
|
|
1091
|
+
modelId.includes('anthropic') ||
|
|
1092
|
+
modelId.includes('nova');
|
|
1012
1093
|
if (bedrockOptions?.promptCache === true && supportsCaching) {
|
|
1013
|
-
retryMessages =
|
|
1094
|
+
retryMessages =
|
|
1095
|
+
addBedrockCacheControl<BaseMessage>(retryMessages);
|
|
1014
1096
|
}
|
|
1015
1097
|
}
|
|
1016
1098
|
|
|
@@ -1025,18 +1107,26 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
|
|
|
1025
1107
|
config
|
|
1026
1108
|
);
|
|
1027
1109
|
// Success with reduced context
|
|
1028
|
-
console.info(
|
|
1110
|
+
console.info(
|
|
1111
|
+
`[Graph] ✅ Retry successful at ${reductionFactor * 100}% with ${reducedMessages.length} messages (reduced from ${finalMessages.length})`
|
|
1112
|
+
);
|
|
1029
1113
|
} catch (retryError) {
|
|
1030
|
-
const retryErrorMsg =
|
|
1031
|
-
|
|
1114
|
+
const retryErrorMsg =
|
|
1115
|
+
(retryError as Error).message.toLowerCase() ?? '';
|
|
1116
|
+
const stillTooLong =
|
|
1032
1117
|
retryErrorMsg.includes('too long') ||
|
|
1033
1118
|
retryErrorMsg.includes('context length') ||
|
|
1034
1119
|
retryErrorMsg.includes('validationexception');
|
|
1035
|
-
|
|
1120
|
+
|
|
1036
1121
|
if (stillTooLong && reductionFactor > 0.1) {
|
|
1037
|
-
console.warn(
|
|
1122
|
+
console.warn(
|
|
1123
|
+
`[Graph] Still too long at ${reductionFactor * 100}%, trying more aggressive pruning...`
|
|
1124
|
+
);
|
|
1038
1125
|
} else {
|
|
1039
|
-
console.error(
|
|
1126
|
+
console.error(
|
|
1127
|
+
`[Graph] Retry at ${reductionFactor * 100}% failed:`,
|
|
1128
|
+
(retryError as Error).message
|
|
1129
|
+
);
|
|
1040
1130
|
}
|
|
1041
1131
|
}
|
|
1042
1132
|
}
|