illuma-agents 1.0.16 → 1.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +3 -1
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +18 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +79 -32
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/llm/bedrock/index.cjs +5 -3
  8. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  9. package/dist/cjs/llm/openai/index.cjs +1 -0
  10. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  11. package/dist/cjs/llm/openrouter/index.cjs +10 -1
  12. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  13. package/dist/cjs/llm/vertexai/index.cjs +7 -8
  14. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  15. package/dist/cjs/main.cjs +15 -0
  16. package/dist/cjs/main.cjs.map +1 -1
  17. package/dist/cjs/messages/cache.cjs +11 -6
  18. package/dist/cjs/messages/cache.cjs.map +1 -1
  19. package/dist/cjs/messages/core.cjs +16 -8
  20. package/dist/cjs/messages/core.cjs.map +1 -1
  21. package/dist/cjs/messages/format.cjs +9 -2
  22. package/dist/cjs/messages/format.cjs.map +1 -1
  23. package/dist/cjs/messages/tools.cjs +17 -10
  24. package/dist/cjs/messages/tools.cjs.map +1 -1
  25. package/dist/cjs/stream.cjs +30 -16
  26. package/dist/cjs/stream.cjs.map +1 -1
  27. package/dist/cjs/tools/ProgrammaticToolCalling.cjs +209 -47
  28. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  29. package/dist/cjs/tools/ToolNode.cjs +73 -3
  30. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  31. package/dist/cjs/tools/handlers.cjs +1 -0
  32. package/dist/cjs/tools/handlers.cjs.map +1 -1
  33. package/dist/cjs/tools/search/search.cjs.map +1 -1
  34. package/dist/cjs/tools/search/tool.cjs +3 -1
  35. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  36. package/dist/cjs/utils/contextAnalytics.cjs +66 -0
  37. package/dist/cjs/utils/contextAnalytics.cjs.map +1 -0
  38. package/dist/cjs/utils/run.cjs.map +1 -1
  39. package/dist/cjs/utils/toonFormat.cjs +388 -0
  40. package/dist/cjs/utils/toonFormat.cjs.map +1 -0
  41. package/dist/esm/agents/AgentContext.mjs +3 -1
  42. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  43. package/dist/esm/common/enum.mjs +19 -1
  44. package/dist/esm/common/enum.mjs.map +1 -1
  45. package/dist/esm/graphs/Graph.mjs +81 -34
  46. package/dist/esm/graphs/Graph.mjs.map +1 -1
  47. package/dist/esm/llm/bedrock/index.mjs +5 -3
  48. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  49. package/dist/esm/llm/openai/index.mjs +1 -0
  50. package/dist/esm/llm/openai/index.mjs.map +1 -1
  51. package/dist/esm/llm/openrouter/index.mjs +10 -1
  52. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  53. package/dist/esm/llm/vertexai/index.mjs +7 -8
  54. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  55. package/dist/esm/main.mjs +4 -2
  56. package/dist/esm/main.mjs.map +1 -1
  57. package/dist/esm/messages/cache.mjs +11 -6
  58. package/dist/esm/messages/cache.mjs.map +1 -1
  59. package/dist/esm/messages/core.mjs +18 -10
  60. package/dist/esm/messages/core.mjs.map +1 -1
  61. package/dist/esm/messages/format.mjs +10 -3
  62. package/dist/esm/messages/format.mjs.map +1 -1
  63. package/dist/esm/messages/tools.mjs +19 -12
  64. package/dist/esm/messages/tools.mjs.map +1 -1
  65. package/dist/esm/stream.mjs +30 -16
  66. package/dist/esm/stream.mjs.map +1 -1
  67. package/dist/esm/tools/ProgrammaticToolCalling.mjs +208 -48
  68. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  69. package/dist/esm/tools/ToolNode.mjs +73 -3
  70. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  71. package/dist/esm/tools/handlers.mjs +1 -0
  72. package/dist/esm/tools/handlers.mjs.map +1 -1
  73. package/dist/esm/tools/search/search.mjs.map +1 -1
  74. package/dist/esm/tools/search/tool.mjs +3 -1
  75. package/dist/esm/tools/search/tool.mjs.map +1 -1
  76. package/dist/esm/utils/contextAnalytics.mjs +64 -0
  77. package/dist/esm/utils/contextAnalytics.mjs.map +1 -0
  78. package/dist/esm/utils/run.mjs.map +1 -1
  79. package/dist/esm/utils/toonFormat.mjs +381 -0
  80. package/dist/esm/utils/toonFormat.mjs.map +1 -0
  81. package/dist/types/common/enum.d.ts +17 -0
  82. package/dist/types/graphs/Graph.d.ts +8 -0
  83. package/dist/types/tools/ProgrammaticToolCalling.d.ts +19 -0
  84. package/dist/types/types/tools.d.ts +3 -1
  85. package/dist/types/utils/contextAnalytics.d.ts +37 -0
  86. package/dist/types/utils/index.d.ts +2 -0
  87. package/dist/types/utils/toonFormat.d.ts +111 -0
  88. package/package.json +3 -2
  89. package/src/agents/AgentContext.ts +28 -20
  90. package/src/common/enum.ts +18 -0
  91. package/src/graphs/Graph.ts +152 -62
  92. package/src/llm/bedrock/__tests__/bedrock-caching.test.ts +495 -473
  93. package/src/llm/bedrock/index.ts +47 -35
  94. package/src/llm/openrouter/index.ts +11 -1
  95. package/src/llm/vertexai/index.ts +9 -10
  96. package/src/messages/cache.ts +104 -55
  97. package/src/messages/core.ts +29 -19
  98. package/src/messages/format.ts +14 -3
  99. package/src/messages/tools.ts +20 -13
  100. package/src/scripts/simple.ts +1 -1
  101. package/src/specs/emergency-prune.test.ts +407 -355
  102. package/src/stream.ts +28 -20
  103. package/src/tools/ProgrammaticToolCalling.ts +246 -52
  104. package/src/tools/ToolNode.ts +78 -5
  105. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +155 -0
  106. package/src/tools/search/jina-reranker.test.ts +32 -28
  107. package/src/tools/search/search.ts +3 -1
  108. package/src/tools/search/tool.ts +16 -7
  109. package/src/types/tools.ts +3 -1
  110. package/src/utils/contextAnalytics.ts +103 -0
  111. package/src/utils/index.ts +2 -0
  112. package/src/utils/llmConfig.ts +8 -1
  113. package/src/utils/run.ts +5 -4
  114. package/src/utils/toonFormat.ts +475 -0
@@ -36,6 +36,7 @@ import {
36
36
  GraphEvents,
37
37
  Providers,
38
38
  StepTypes,
39
+ MessageTypes,
39
40
  } from '@/common';
40
41
  import {
41
42
  formatAnthropicArtifactContent,
@@ -56,6 +57,10 @@ import {
56
57
  joinKeys,
57
58
  sleep,
58
59
  } from '@/utils';
60
+ import {
61
+ buildContextAnalytics,
62
+ type ContextAnalytics,
63
+ } from '@/utils/contextAnalytics';
59
64
  import { getChatModelClass, manualToolStreamProviders } from '@/llm/providers';
60
65
  import { ToolNode as CustomToolNode, toolsCondition } from '@/tools/ToolNode';
61
66
  import { ChatOpenAI, AzureChatOpenAI } from '@/llm/openai';
@@ -212,7 +217,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
212
217
  /**
213
218
  * Estimates a human-friendly description of the conversation timeframe based on message count.
214
219
  * Uses rough heuristics to provide context about how much history is available.
215
- *
220
+ *
216
221
  * @param messageCount - Number of messages in the remaining context
217
222
  * @returns A friendly description like "the last few minutes", "the past hour", etc.
218
223
  */
@@ -222,7 +227,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
222
227
  // - Normal chat: ~10-15 messages per hour
223
228
  // - Slow/thoughtful chat: ~5-8 messages per hour
224
229
  // We use a middle estimate of ~12 messages per hour
225
-
230
+
226
231
  if (messageCount <= 5) {
227
232
  return 'just the last few exchanges';
228
233
  } else if (messageCount <= 15) {
@@ -445,6 +450,17 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
445
450
  return primaryContext.getContextBreakdown();
446
451
  }
447
452
 
453
+ /**
454
+ * Get the latest context analytics from the graph.
455
+ * Returns metrics like utilization %, TOON stats, message breakdown.
456
+ */
457
+ getContextAnalytics(): ContextAnalytics | null {
458
+ return this.lastContextAnalytics ?? null;
459
+ }
460
+
461
+ /** Store the latest context analytics for retrieval after run */
462
+ private lastContextAnalytics: ContextAnalytics | null = null;
463
+
448
464
  /* Graph */
449
465
 
450
466
  createSystemRunnable({
@@ -699,7 +715,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
699
715
  content: `[SESSION_CONTEXT]\n${agentContext.dynamicContext}`,
700
716
  });
701
717
  const ackMessage = new AIMessageChunk({
702
- content: 'Understood. I have noted the session context including the current date/time (CST) and will apply it appropriately.',
718
+ content:
719
+ 'Understood. I have noted the session context including the current date/time (CST) and will apply it appropriately.',
703
720
  });
704
721
  messages = [dynamicContextMessage, ackMessage, ...messages];
705
722
  }
@@ -732,17 +749,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
732
749
  this.config = config;
733
750
 
734
751
  let messagesToUse = messages;
735
-
736
- // Debug logging for pruneMessages creation conditions
737
- const hasPruneMessages = !!agentContext.pruneMessages;
738
- const hasTokenCounter = !!agentContext.tokenCounter;
739
- const hasMaxContextTokens = agentContext.maxContextTokens != null;
740
- const hasIndex0TokenCount = agentContext.indexTokenCountMap[0] != null;
741
-
742
- if (!hasPruneMessages && hasTokenCounter && hasMaxContextTokens && !hasIndex0TokenCount) {
743
- console.warn('[Graph] Cannot create pruneMessages - missing indexTokenCountMap[0]. Token map keys:', Object.keys(agentContext.indexTokenCountMap));
744
- }
745
-
752
+
746
753
  if (
747
754
  !agentContext.pruneMessages &&
748
755
  agentContext.tokenCounter &&
@@ -771,6 +778,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
771
778
  indexTokenCountMap: agentContext.indexTokenCountMap,
772
779
  });
773
780
  }
781
+
774
782
  if (agentContext.pruneMessages) {
775
783
  const { context, indexTokenCountMap } = agentContext.pruneMessages({
776
784
  messages,
@@ -798,13 +806,14 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
798
806
  if (
799
807
  agentContext.provider === Providers.BEDROCK &&
800
808
  lastMessageX instanceof AIMessageChunk &&
801
- lastMessageY instanceof ToolMessage &&
809
+ lastMessageY?.getType() === MessageTypes.TOOL &&
802
810
  typeof lastMessageX.content === 'string'
803
811
  ) {
804
812
  finalMessages[finalMessages.length - 2].content = '';
805
813
  }
806
814
 
807
- const isLatestToolMessage = lastMessageY instanceof ToolMessage;
815
+ // Use getType() instead of instanceof to avoid module mismatch issues
816
+ const isLatestToolMessage = lastMessageY?.getType() === MessageTypes.TOOL;
808
817
 
809
818
  if (
810
819
  isLatestToolMessage &&
@@ -820,6 +829,33 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
820
829
  formatArtifactPayload(finalMessages);
821
830
  }
822
831
 
832
+ /**
833
+ * Handle edge case: when switching from a non-thinking agent to a thinking-enabled agent,
834
+ * convert AI messages with tool calls to HumanMessages to avoid thinking block requirements.
835
+ * This is required by Anthropic/Bedrock when thinking is enabled.
836
+ *
837
+ * IMPORTANT: This MUST happen BEFORE cache control is applied.
838
+ * If we add cachePoint to an AI message first, then convert that AI message to a HumanMessage,
839
+ * the cachePoint is lost. By converting first, we ensure cache control is applied to the
840
+ * final message structure that will be sent to the API.
841
+ */
842
+ const isAnthropicWithThinking =
843
+ (agentContext.provider === Providers.ANTHROPIC &&
844
+ (agentContext.clientOptions as t.AnthropicClientOptions).thinking !=
845
+ null) ||
846
+ (agentContext.provider === Providers.BEDROCK &&
847
+ (agentContext.clientOptions as t.BedrockAnthropicInput)
848
+ .additionalModelRequestFields?.['thinking'] != null);
849
+
850
+ if (isAnthropicWithThinking) {
851
+ finalMessages = ensureThinkingBlockInMessages(
852
+ finalMessages,
853
+ agentContext.provider
854
+ );
855
+ }
856
+
857
+ // Apply cache control AFTER thinking block handling to ensure cachePoints aren't lost
858
+ // when AI messages are converted to HumanMessages
823
859
  if (agentContext.provider === Providers.ANTHROPIC) {
824
860
  const anthropicOptions = agentContext.clientOptions as
825
861
  | t.AnthropicClientOptions
@@ -841,32 +877,15 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
841
877
  // Both Claude and Nova models support cachePoint in system and messages
842
878
  // (Llama, Titan, and other models do NOT support cachePoint)
843
879
  const modelId = bedrockOptions?.model?.toLowerCase() ?? '';
844
- const supportsCaching = modelId.includes('claude') || modelId.includes('anthropic') || modelId.includes('nova');
880
+ const supportsCaching =
881
+ modelId.includes('claude') ||
882
+ modelId.includes('anthropic') ||
883
+ modelId.includes('nova');
845
884
  if (bedrockOptions?.promptCache === true && supportsCaching) {
846
885
  finalMessages = addBedrockCacheControl<BaseMessage>(finalMessages);
847
886
  }
848
887
  }
849
888
 
850
- /**
851
- * Handle edge case: when switching from a non-thinking agent to a thinking-enabled agent,
852
- * convert AI messages with tool calls to HumanMessages to avoid thinking block requirements.
853
- * This is required by Anthropic/Bedrock when thinking is enabled.
854
- */
855
- const isAnthropicWithThinking =
856
- (agentContext.provider === Providers.ANTHROPIC &&
857
- (agentContext.clientOptions as t.AnthropicClientOptions).thinking !=
858
- null) ||
859
- (agentContext.provider === Providers.BEDROCK &&
860
- (agentContext.clientOptions as t.BedrockAnthropicInput)
861
- .additionalModelRequestFields?.['thinking'] != null);
862
-
863
- if (isAnthropicWithThinking) {
864
- finalMessages = ensureThinkingBlockInMessages(
865
- finalMessages,
866
- agentContext.provider
867
- );
868
- }
869
-
870
889
  if (
871
890
  agentContext.lastStreamCall != null &&
872
891
  agentContext.streamBuffer != null
@@ -896,6 +915,42 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
896
915
  );
897
916
  }
898
917
 
918
+ // Get model info for analytics
919
+ const bedrockOpts = agentContext.clientOptions as
920
+ | t.BedrockAnthropicClientOptions
921
+ | undefined;
922
+ const modelId =
923
+ bedrockOpts?.model ||
924
+ (agentContext.clientOptions as t.AnthropicClientOptions | undefined)
925
+ ?.modelName;
926
+ const thinkingConfig =
927
+ bedrockOpts?.additionalModelRequestFields?.['thinking'] ||
928
+ (agentContext.clientOptions as t.AnthropicClientOptions | undefined)
929
+ ?.thinking;
930
+
931
+ // Build and emit context analytics for traces
932
+ const contextAnalytics = buildContextAnalytics(finalMessages, {
933
+ tokenCounter: agentContext.tokenCounter,
934
+ maxContextTokens: agentContext.maxContextTokens,
935
+ instructionTokens: agentContext.instructionTokens,
936
+ indexTokenCountMap: agentContext.indexTokenCountMap,
937
+ });
938
+
939
+ // Store for retrieval via getContextAnalytics() after run completes
940
+ this.lastContextAnalytics = contextAnalytics;
941
+
942
+ await safeDispatchCustomEvent(
943
+ GraphEvents.ON_CONTEXT_ANALYTICS,
944
+ {
945
+ provider: agentContext.provider,
946
+ model: modelId,
947
+ thinkingEnabled: thinkingConfig != null,
948
+ cacheEnabled: bedrockOpts?.promptCache === true,
949
+ analytics: contextAnalytics,
950
+ },
951
+ config
952
+ );
953
+
899
954
  try {
900
955
  result = await this.attemptInvoke(
901
956
  {
@@ -908,8 +963,9 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
908
963
  );
909
964
  } catch (primaryError) {
910
965
  // Check if this is a "input too long" error from Bedrock/Anthropic
911
- const errorMessage = (primaryError as Error)?.message?.toLowerCase() ?? '';
912
- const isInputTooLongError =
966
+ const errorMessage =
967
+ (primaryError as Error).message.toLowerCase() ?? '';
968
+ const isInputTooLongError =
913
969
  errorMessage.includes('too long') ||
914
970
  errorMessage.includes('input is too long') ||
915
971
  errorMessage.includes('context length') ||
@@ -919,41 +975,50 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
919
975
 
920
976
  // Log when we detect the error
921
977
  if (isInputTooLongError) {
922
- console.warn('[Graph] Detected input too long error:', errorMessage.substring(0, 200));
978
+ console.warn(
979
+ '[Graph] Detected input too long error:',
980
+ errorMessage.substring(0, 200)
981
+ );
923
982
  console.warn('[Graph] Checking emergency pruning conditions:', {
924
983
  hasPruneMessages: !!agentContext.pruneMessages,
925
984
  hasTokenCounter: !!agentContext.tokenCounter,
926
985
  maxContextTokens: agentContext.maxContextTokens,
927
- indexTokenMapKeys: Object.keys(agentContext.indexTokenCountMap).length
986
+ indexTokenMapKeys: Object.keys(agentContext.indexTokenCountMap)
987
+ .length,
928
988
  });
929
989
  }
930
990
 
931
991
  // If input too long and we have pruning capability OR tokenCounter, retry with progressively more aggressive pruning
932
992
  // Note: We can create emergency pruneMessages dynamically if we have tokenCounter and maxContextTokens
933
- const canPrune = agentContext.tokenCounter && agentContext.maxContextTokens;
993
+ const canPrune =
994
+ agentContext.tokenCounter && agentContext.maxContextTokens;
934
995
  if (isInputTooLongError && canPrune) {
935
996
  // Progressive reduction: 50% -> 25% -> 10% of original context
936
997
  const reductionLevels = [0.5, 0.25, 0.1];
937
-
998
+
938
999
  for (const reductionFactor of reductionLevels) {
939
1000
  if (result) break; // Exit if we got a result
940
-
941
- const reducedMaxTokens = Math.floor(agentContext.maxContextTokens! * reductionFactor);
1001
+
1002
+ const reducedMaxTokens = Math.floor(
1003
+ agentContext.maxContextTokens! * reductionFactor
1004
+ );
942
1005
  console.warn(
943
1006
  `[Graph] Input too long. Retrying with ${reductionFactor * 100}% context (${reducedMaxTokens} tokens)...`
944
1007
  );
945
-
1008
+
946
1009
  // Build fresh indexTokenCountMap if missing/incomplete
947
1010
  // This is needed when messages were dynamically added without updating the token map
948
1011
  let tokenMapForPruning = agentContext.indexTokenCountMap;
949
1012
  if (Object.keys(tokenMapForPruning).length < messages.length) {
950
- console.warn('[Graph] Building fresh token count map for emergency pruning...');
1013
+ console.warn(
1014
+ '[Graph] Building fresh token count map for emergency pruning...'
1015
+ );
951
1016
  tokenMapForPruning = {};
952
1017
  for (let i = 0; i < messages.length; i++) {
953
1018
  tokenMapForPruning[i] = agentContext.tokenCounter!(messages[i]);
954
1019
  }
955
1020
  }
956
-
1021
+
957
1022
  const emergencyPrune = createPruneMessages({
958
1023
  startIndex: this.startIndex,
959
1024
  provider: agentContext.provider,
@@ -970,15 +1035,18 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
970
1035
 
971
1036
  // Skip if we can't fit any messages
972
1037
  if (reducedMessages.length === 0) {
973
- console.warn(`[Graph] Cannot fit any messages at ${reductionFactor * 100}% reduction, trying next level...`);
1038
+ console.warn(
1039
+ `[Graph] Cannot fit any messages at ${reductionFactor * 100}% reduction, trying next level...`
1040
+ );
974
1041
  continue;
975
1042
  }
976
1043
 
977
1044
  // Calculate how many messages were pruned and estimate context timeframe
978
1045
  const prunedCount = finalMessages.length - reducedMessages.length;
979
1046
  const remainingCount = reducedMessages.length;
980
- const estimatedContextDescription = this.getContextTimeframeDescription(remainingCount);
981
-
1047
+ const estimatedContextDescription =
1048
+ this.getContextTimeframeDescription(remainingCount);
1049
+
982
1050
  // Inject a personalized context message to inform the agent about pruning
983
1051
  const pruneNoticeMessage = new HumanMessage({
984
1052
  content: `[CONTEXT NOTICE]
@@ -986,11 +1054,11 @@ Our conversation has grown quite long, so I've focused on ${estimatedContextDesc
986
1054
 
987
1055
  If I seem to be missing something we discussed earlier, just give me a quick reminder and I'll pick right back up! I'm still fully engaged and ready to help with whatever you need.`,
988
1056
  });
989
-
1057
+
990
1058
  // Insert the notice after the system message (if any) but before conversation
991
1059
  const hasSystemMessage = reducedMessages[0]?.getType() === 'system';
992
1060
  const insertIndex = hasSystemMessage ? 1 : 0;
993
-
1061
+
994
1062
  // Create new array with the pruning notice
995
1063
  const messagesWithNotice = [
996
1064
  ...reducedMessages.slice(0, insertIndex),
@@ -1002,15 +1070,29 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
1002
1070
  ? formatContentStrings(messagesWithNotice)
1003
1071
  : messagesWithNotice;
1004
1072
 
1005
- // Apply Bedrock cache control if needed
1073
+ // Apply thinking block handling first (before cache control)
1074
+ // This ensures AI+Tool sequences are converted to HumanMessages
1075
+ // before we add cache points that could be lost in the conversion
1076
+ if (isAnthropicWithThinking) {
1077
+ retryMessages = ensureThinkingBlockInMessages(
1078
+ retryMessages,
1079
+ agentContext.provider
1080
+ );
1081
+ }
1082
+
1083
+ // Apply Bedrock cache control if needed (after thinking block handling)
1006
1084
  if (agentContext.provider === Providers.BEDROCK) {
1007
1085
  const bedrockOptions = agentContext.clientOptions as
1008
1086
  | t.BedrockAnthropicClientOptions
1009
1087
  | undefined;
1010
1088
  const modelId = bedrockOptions?.model?.toLowerCase() ?? '';
1011
- const supportsCaching = modelId.includes('claude') || modelId.includes('anthropic') || modelId.includes('nova');
1089
+ const supportsCaching =
1090
+ modelId.includes('claude') ||
1091
+ modelId.includes('anthropic') ||
1092
+ modelId.includes('nova');
1012
1093
  if (bedrockOptions?.promptCache === true && supportsCaching) {
1013
- retryMessages = addBedrockCacheControl<BaseMessage>(retryMessages);
1094
+ retryMessages =
1095
+ addBedrockCacheControl<BaseMessage>(retryMessages);
1014
1096
  }
1015
1097
  }
1016
1098
 
@@ -1025,18 +1107,26 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
1025
1107
  config
1026
1108
  );
1027
1109
  // Success with reduced context
1028
- console.info(`[Graph] ✅ Retry successful at ${reductionFactor * 100}% with ${reducedMessages.length} messages (reduced from ${finalMessages.length})`);
1110
+ console.info(
1111
+ `[Graph] ✅ Retry successful at ${reductionFactor * 100}% with ${reducedMessages.length} messages (reduced from ${finalMessages.length})`
1112
+ );
1029
1113
  } catch (retryError) {
1030
- const retryErrorMsg = (retryError as Error)?.message?.toLowerCase() ?? '';
1031
- const stillTooLong =
1114
+ const retryErrorMsg =
1115
+ (retryError as Error).message.toLowerCase() ?? '';
1116
+ const stillTooLong =
1032
1117
  retryErrorMsg.includes('too long') ||
1033
1118
  retryErrorMsg.includes('context length') ||
1034
1119
  retryErrorMsg.includes('validationexception');
1035
-
1120
+
1036
1121
  if (stillTooLong && reductionFactor > 0.1) {
1037
- console.warn(`[Graph] Still too long at ${reductionFactor * 100}%, trying more aggressive pruning...`);
1122
+ console.warn(
1123
+ `[Graph] Still too long at ${reductionFactor * 100}%, trying more aggressive pruning...`
1124
+ );
1038
1125
  } else {
1039
- console.error(`[Graph] Retry at ${reductionFactor * 100}% failed:`, (retryError as Error)?.message);
1126
+ console.error(
1127
+ `[Graph] Retry at ${reductionFactor * 100}% failed:`,
1128
+ (retryError as Error).message
1129
+ );
1040
1130
  }
1041
1131
  }
1042
1132
  }