npm - illuma-agents - Versions diffs - 1.0.16 → 1.0.18 - Mend

illuma-agents 1.0.16 → 1.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

package/dist/cjs/agents/AgentContext.cjs +3 -1
package/dist/cjs/agents/AgentContext.cjs.map +1 -1
package/dist/cjs/common/enum.cjs +18 -0
package/dist/cjs/common/enum.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +79 -32
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/llm/bedrock/index.cjs +5 -3
package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
package/dist/cjs/llm/openai/index.cjs +1 -0
package/dist/cjs/llm/openai/index.cjs.map +1 -1
package/dist/cjs/llm/openrouter/index.cjs +10 -1
package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
package/dist/cjs/llm/vertexai/index.cjs +7 -8
package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
package/dist/cjs/main.cjs +15 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/messages/cache.cjs +11 -6
package/dist/cjs/messages/cache.cjs.map +1 -1
package/dist/cjs/messages/core.cjs +16 -8
package/dist/cjs/messages/core.cjs.map +1 -1
package/dist/cjs/messages/format.cjs +9 -2
package/dist/cjs/messages/format.cjs.map +1 -1
package/dist/cjs/messages/tools.cjs +17 -10
package/dist/cjs/messages/tools.cjs.map +1 -1
package/dist/cjs/stream.cjs +30 -16
package/dist/cjs/stream.cjs.map +1 -1
package/dist/cjs/tools/ProgrammaticToolCalling.cjs +209 -47
package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
package/dist/cjs/tools/ToolNode.cjs +73 -3
package/dist/cjs/tools/ToolNode.cjs.map +1 -1
package/dist/cjs/tools/handlers.cjs +1 -0
package/dist/cjs/tools/handlers.cjs.map +1 -1
package/dist/cjs/tools/search/search.cjs.map +1 -1
package/dist/cjs/tools/search/tool.cjs +3 -1
package/dist/cjs/tools/search/tool.cjs.map +1 -1
package/dist/cjs/utils/contextAnalytics.cjs +66 -0
package/dist/cjs/utils/contextAnalytics.cjs.map +1 -0
package/dist/cjs/utils/run.cjs.map +1 -1
package/dist/cjs/utils/toonFormat.cjs +388 -0
package/dist/cjs/utils/toonFormat.cjs.map +1 -0
package/dist/esm/agents/AgentContext.mjs +3 -1
package/dist/esm/agents/AgentContext.mjs.map +1 -1
package/dist/esm/common/enum.mjs +19 -1
package/dist/esm/common/enum.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +81 -34
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/llm/bedrock/index.mjs +5 -3
package/dist/esm/llm/bedrock/index.mjs.map +1 -1
package/dist/esm/llm/openai/index.mjs +1 -0
package/dist/esm/llm/openai/index.mjs.map +1 -1
package/dist/esm/llm/openrouter/index.mjs +10 -1
package/dist/esm/llm/openrouter/index.mjs.map +1 -1
package/dist/esm/llm/vertexai/index.mjs +7 -8
package/dist/esm/llm/vertexai/index.mjs.map +1 -1
package/dist/esm/main.mjs +4 -2
package/dist/esm/main.mjs.map +1 -1
package/dist/esm/messages/cache.mjs +11 -6
package/dist/esm/messages/cache.mjs.map +1 -1
package/dist/esm/messages/core.mjs +18 -10
package/dist/esm/messages/core.mjs.map +1 -1
package/dist/esm/messages/format.mjs +10 -3
package/dist/esm/messages/format.mjs.map +1 -1
package/dist/esm/messages/tools.mjs +19 -12
package/dist/esm/messages/tools.mjs.map +1 -1
package/dist/esm/stream.mjs +30 -16
package/dist/esm/stream.mjs.map +1 -1
package/dist/esm/tools/ProgrammaticToolCalling.mjs +208 -48
package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
package/dist/esm/tools/ToolNode.mjs +73 -3
package/dist/esm/tools/ToolNode.mjs.map +1 -1
package/dist/esm/tools/handlers.mjs +1 -0
package/dist/esm/tools/handlers.mjs.map +1 -1
package/dist/esm/tools/search/search.mjs.map +1 -1
package/dist/esm/tools/search/tool.mjs +3 -1
package/dist/esm/tools/search/tool.mjs.map +1 -1
package/dist/esm/utils/contextAnalytics.mjs +64 -0
package/dist/esm/utils/contextAnalytics.mjs.map +1 -0
package/dist/esm/utils/run.mjs.map +1 -1
package/dist/esm/utils/toonFormat.mjs +381 -0
package/dist/esm/utils/toonFormat.mjs.map +1 -0
package/dist/types/common/enum.d.ts +17 -0
package/dist/types/graphs/Graph.d.ts +8 -0
package/dist/types/tools/ProgrammaticToolCalling.d.ts +19 -0
package/dist/types/types/tools.d.ts +3 -1
package/dist/types/utils/contextAnalytics.d.ts +37 -0
package/dist/types/utils/index.d.ts +2 -0
package/dist/types/utils/toonFormat.d.ts +111 -0
package/package.json +3 -2
package/src/agents/AgentContext.ts +28 -20
package/src/common/enum.ts +18 -0
package/src/graphs/Graph.ts +152 -62
package/src/llm/bedrock/__tests__/bedrock-caching.test.ts +495 -473
package/src/llm/bedrock/index.ts +47 -35
package/src/llm/openrouter/index.ts +11 -1
package/src/llm/vertexai/index.ts +9 -10
package/src/messages/cache.ts +104 -55
package/src/messages/core.ts +29 -19
package/src/messages/format.ts +14 -3
package/src/messages/tools.ts +20 -13
package/src/scripts/simple.ts +1 -1
package/src/specs/emergency-prune.test.ts +407 -355
package/src/stream.ts +28 -20
package/src/tools/ProgrammaticToolCalling.ts +246 -52
package/src/tools/ToolNode.ts +78 -5
package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +155 -0
package/src/tools/search/jina-reranker.test.ts +32 -28
package/src/tools/search/search.ts +3 -1
package/src/tools/search/tool.ts +16 -7
package/src/types/tools.ts +3 -1
package/src/utils/contextAnalytics.ts +103 -0
package/src/utils/index.ts +2 -0
package/src/utils/llmConfig.ts +8 -1
package/src/utils/run.ts +5 -4
package/src/utils/toonFormat.ts +475 -0

package/src/graphs/Graph.ts CHANGED Viewed

@@ -36,6 +36,7 @@ import {
   GraphEvents,
   Providers,
   StepTypes,
+  MessageTypes,
 } from '@/common';
 import {
   formatAnthropicArtifactContent,
@@ -56,6 +57,10 @@ import {
   joinKeys,
   sleep,
 } from '@/utils';
+import {
+  buildContextAnalytics,
+  type ContextAnalytics,
+} from '@/utils/contextAnalytics';
 import { getChatModelClass, manualToolStreamProviders } from '@/llm/providers';
 import { ToolNode as CustomToolNode, toolsCondition } from '@/tools/ToolNode';
 import { ChatOpenAI, AzureChatOpenAI } from '@/llm/openai';
@@ -212,7 +217,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
   /**
    * Estimates a human-friendly description of the conversation timeframe based on message count.
    * Uses rough heuristics to provide context about how much history is available.
-   *
+   *
    * @param messageCount - Number of messages in the remaining context
    * @returns A friendly description like "the last few minutes", "the past hour", etc.
    */
@@ -222,7 +227,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
     // - Normal chat: ~10-15 messages per hour
     // - Slow/thoughtful chat: ~5-8 messages per hour
     // We use a middle estimate of ~12 messages per hour
     if (messageCount <= 5) {
       return 'just the last few exchanges';
     } else if (messageCount <= 15) {
@@ -445,6 +450,17 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
     return primaryContext.getContextBreakdown();
   }
+  /**
+   * Get the latest context analytics from the graph.
+   * Returns metrics like utilization %, TOON stats, message breakdown.
+   */
+  getContextAnalytics(): ContextAnalytics | null {
+    return this.lastContextAnalytics ?? null;
+  }
+  /** Store the latest context analytics for retrieval after run */
+  private lastContextAnalytics: ContextAnalytics | null = null;
   /* Graph */
   createSystemRunnable({
@@ -699,7 +715,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
           content: `[SESSION_CONTEXT]\n${agentContext.dynamicContext}`,
         });
         const ackMessage = new AIMessageChunk({
-          content: 'Understood. I have noted the session context including the current date/time (CST) and will apply it appropriately.',
+          content:
+            'Understood. I have noted the session context including the current date/time (CST) and will apply it appropriately.',
         });
         messages = [dynamicContextMessage, ackMessage, ...messages];
       }
@@ -732,17 +749,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
       this.config = config;
       let messagesToUse = messages;
-      // Debug logging for pruneMessages creation conditions
-      const hasPruneMessages = !!agentContext.pruneMessages;
-      const hasTokenCounter = !!agentContext.tokenCounter;
-      const hasMaxContextTokens = agentContext.maxContextTokens != null;
-      const hasIndex0TokenCount = agentContext.indexTokenCountMap[0] != null;
-      if (!hasPruneMessages && hasTokenCounter && hasMaxContextTokens && !hasIndex0TokenCount) {
-        console.warn('[Graph] Cannot create pruneMessages - missing indexTokenCountMap[0]. Token map keys:', Object.keys(agentContext.indexTokenCountMap));
-      }
       if (
         !agentContext.pruneMessages &&
         agentContext.tokenCounter &&
@@ -771,6 +778,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
           indexTokenCountMap: agentContext.indexTokenCountMap,
         });
       }
       if (agentContext.pruneMessages) {
         const { context, indexTokenCountMap } = agentContext.pruneMessages({
           messages,
@@ -798,13 +806,14 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
       if (
         agentContext.provider === Providers.BEDROCK &&
         lastMessageX instanceof AIMessageChunk &&
-        lastMessageY instanceof ToolMessage &&
+        lastMessageY?.getType() === MessageTypes.TOOL &&
         typeof lastMessageX.content === 'string'
       ) {
         finalMessages[finalMessages.length - 2].content = '';
       }
-      const isLatestToolMessage = lastMessageY instanceof ToolMessage;
+      // Use getType() instead of instanceof to avoid module mismatch issues
+      const isLatestToolMessage = lastMessageY?.getType() === MessageTypes.TOOL;
       if (
         isLatestToolMessage &&
@@ -820,6 +829,33 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
         formatArtifactPayload(finalMessages);
       }
+      /**
+       * Handle edge case: when switching from a non-thinking agent to a thinking-enabled agent,
+       * convert AI messages with tool calls to HumanMessages to avoid thinking block requirements.
+       * This is required by Anthropic/Bedrock when thinking is enabled.
+       *
+       * IMPORTANT: This MUST happen BEFORE cache control is applied.
+       * If we add cachePoint to an AI message first, then convert that AI message to a HumanMessage,
+       * the cachePoint is lost. By converting first, we ensure cache control is applied to the
+       * final message structure that will be sent to the API.
+       */
+      const isAnthropicWithThinking =
+        (agentContext.provider === Providers.ANTHROPIC &&
+          (agentContext.clientOptions as t.AnthropicClientOptions).thinking !=
+            null) ||
+        (agentContext.provider === Providers.BEDROCK &&
+          (agentContext.clientOptions as t.BedrockAnthropicInput)
+            .additionalModelRequestFields?.['thinking'] != null);
+      if (isAnthropicWithThinking) {
+        finalMessages = ensureThinkingBlockInMessages(
+          finalMessages,
+          agentContext.provider
+        );
+      }
+      // Apply cache control AFTER thinking block handling to ensure cachePoints aren't lost
+      // when AI messages are converted to HumanMessages
       if (agentContext.provider === Providers.ANTHROPIC) {
         const anthropicOptions = agentContext.clientOptions as
           | t.AnthropicClientOptions
@@ -841,32 +877,15 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
         // Both Claude and Nova models support cachePoint in system and messages
         // (Llama, Titan, and other models do NOT support cachePoint)
         const modelId = bedrockOptions?.model?.toLowerCase() ?? '';
-        const supportsCaching = modelId.includes('claude') || modelId.includes('anthropic') || modelId.includes('nova');
+        const supportsCaching =
+          modelId.includes('claude') ||
+          modelId.includes('anthropic') ||
+          modelId.includes('nova');
         if (bedrockOptions?.promptCache === true && supportsCaching) {
           finalMessages = addBedrockCacheControl<BaseMessage>(finalMessages);
         }
       }
-      /**
-       * Handle edge case: when switching from a non-thinking agent to a thinking-enabled agent,
-       * convert AI messages with tool calls to HumanMessages to avoid thinking block requirements.
-       * This is required by Anthropic/Bedrock when thinking is enabled.
-       */
-      const isAnthropicWithThinking =
-        (agentContext.provider === Providers.ANTHROPIC &&
-          (agentContext.clientOptions as t.AnthropicClientOptions).thinking !=
-            null) ||
-        (agentContext.provider === Providers.BEDROCK &&
-          (agentContext.clientOptions as t.BedrockAnthropicInput)
-            .additionalModelRequestFields?.['thinking'] != null);
-      if (isAnthropicWithThinking) {
-        finalMessages = ensureThinkingBlockInMessages(
-          finalMessages,
-          agentContext.provider
-        );
-      }
       if (
         agentContext.lastStreamCall != null &&
         agentContext.streamBuffer != null
@@ -896,6 +915,42 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
         );
       }
+      // Get model info for analytics
+      const bedrockOpts = agentContext.clientOptions as
+        | t.BedrockAnthropicClientOptions
+        | undefined;
+      const modelId =
+        bedrockOpts?.model ||
+        (agentContext.clientOptions as t.AnthropicClientOptions | undefined)
+          ?.modelName;
+      const thinkingConfig =
+        bedrockOpts?.additionalModelRequestFields?.['thinking'] ||
+        (agentContext.clientOptions as t.AnthropicClientOptions | undefined)
+          ?.thinking;
+      // Build and emit context analytics for traces
+      const contextAnalytics = buildContextAnalytics(finalMessages, {
+        tokenCounter: agentContext.tokenCounter,
+        maxContextTokens: agentContext.maxContextTokens,
+        instructionTokens: agentContext.instructionTokens,
+        indexTokenCountMap: agentContext.indexTokenCountMap,
+      });
+      // Store for retrieval via getContextAnalytics() after run completes
+      this.lastContextAnalytics = contextAnalytics;
+      await safeDispatchCustomEvent(
+        GraphEvents.ON_CONTEXT_ANALYTICS,
+        {
+          provider: agentContext.provider,
+          model: modelId,
+          thinkingEnabled: thinkingConfig != null,
+          cacheEnabled: bedrockOpts?.promptCache === true,
+          analytics: contextAnalytics,
+        },
+        config
+      );
       try {
         result = await this.attemptInvoke(
           {
@@ -908,8 +963,9 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
         );
       } catch (primaryError) {
         // Check if this is a "input too long" error from Bedrock/Anthropic
-        const errorMessage = (primaryError as Error)?.message?.toLowerCase() ?? '';
-        const isInputTooLongError =
+        const errorMessage =
+          (primaryError as Error).message.toLowerCase() ?? '';
+        const isInputTooLongError =
           errorMessage.includes('too long') ||
           errorMessage.includes('input is too long') ||
           errorMessage.includes('context length') ||
@@ -919,41 +975,50 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
         // Log when we detect the error
         if (isInputTooLongError) {
-          console.warn('[Graph] Detected input too long error:', errorMessage.substring(0, 200));
+          console.warn(
+            '[Graph] Detected input too long error:',
+            errorMessage.substring(0, 200)
+          );
           console.warn('[Graph] Checking emergency pruning conditions:', {
             hasPruneMessages: !!agentContext.pruneMessages,
             hasTokenCounter: !!agentContext.tokenCounter,
             maxContextTokens: agentContext.maxContextTokens,
-            indexTokenMapKeys: Object.keys(agentContext.indexTokenCountMap).length
+            indexTokenMapKeys: Object.keys(agentContext.indexTokenCountMap)
+              .length,
           });
         }
         // If input too long and we have pruning capability OR tokenCounter, retry with progressively more aggressive pruning
         // Note: We can create emergency pruneMessages dynamically if we have tokenCounter and maxContextTokens
-        const canPrune = agentContext.tokenCounter && agentContext.maxContextTokens;
+        const canPrune =
+          agentContext.tokenCounter && agentContext.maxContextTokens;
         if (isInputTooLongError && canPrune) {
           // Progressive reduction: 50% -> 25% -> 10% of original context
           const reductionLevels = [0.5, 0.25, 0.1];
           for (const reductionFactor of reductionLevels) {
             if (result) break; // Exit if we got a result
-            const reducedMaxTokens = Math.floor(agentContext.maxContextTokens! * reductionFactor);
+            const reducedMaxTokens = Math.floor(
+              agentContext.maxContextTokens! * reductionFactor
+            );
             console.warn(
               `[Graph] Input too long. Retrying with ${reductionFactor * 100}% context (${reducedMaxTokens} tokens)...`
             );
             // Build fresh indexTokenCountMap if missing/incomplete
             // This is needed when messages were dynamically added without updating the token map
             let tokenMapForPruning = agentContext.indexTokenCountMap;
             if (Object.keys(tokenMapForPruning).length < messages.length) {
-              console.warn('[Graph] Building fresh token count map for emergency pruning...');
+              console.warn(
+                '[Graph] Building fresh token count map for emergency pruning...'
+              );
               tokenMapForPruning = {};
               for (let i = 0; i < messages.length; i++) {
                 tokenMapForPruning[i] = agentContext.tokenCounter!(messages[i]);
               }
             }
             const emergencyPrune = createPruneMessages({
               startIndex: this.startIndex,
               provider: agentContext.provider,
@@ -970,15 +1035,18 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
             // Skip if we can't fit any messages
             if (reducedMessages.length === 0) {
-              console.warn(`[Graph] Cannot fit any messages at ${reductionFactor * 100}% reduction, trying next level...`);
+              console.warn(
+                `[Graph] Cannot fit any messages at ${reductionFactor * 100}% reduction, trying next level...`
+              );
               continue;
             }
             // Calculate how many messages were pruned and estimate context timeframe
             const prunedCount = finalMessages.length - reducedMessages.length;
             const remainingCount = reducedMessages.length;
-            const estimatedContextDescription = this.getContextTimeframeDescription(remainingCount);
+            const estimatedContextDescription =
+              this.getContextTimeframeDescription(remainingCount);
             // Inject a personalized context message to inform the agent about pruning
             const pruneNoticeMessage = new HumanMessage({
               content: `[CONTEXT NOTICE]
@@ -986,11 +1054,11 @@ Our conversation has grown quite long, so I've focused on ${estimatedContextDesc
 If I seem to be missing something we discussed earlier, just give me a quick reminder and I'll pick right back up! I'm still fully engaged and ready to help with whatever you need.`,
             });
             // Insert the notice after the system message (if any) but before conversation
             const hasSystemMessage = reducedMessages[0]?.getType() === 'system';
             const insertIndex = hasSystemMessage ? 1 : 0;
             // Create new array with the pruning notice
             const messagesWithNotice = [
               ...reducedMessages.slice(0, insertIndex),
@@ -1002,15 +1070,29 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
               ? formatContentStrings(messagesWithNotice)
               : messagesWithNotice;
-            // Apply Bedrock cache control if needed
+            // Apply thinking block handling first (before cache control)
+            // This ensures AI+Tool sequences are converted to HumanMessages
+            // before we add cache points that could be lost in the conversion
+            if (isAnthropicWithThinking) {
+              retryMessages = ensureThinkingBlockInMessages(
+                retryMessages,
+                agentContext.provider
+              );
+            }
+            // Apply Bedrock cache control if needed (after thinking block handling)
             if (agentContext.provider === Providers.BEDROCK) {
               const bedrockOptions = agentContext.clientOptions as
                 | t.BedrockAnthropicClientOptions
                 | undefined;
               const modelId = bedrockOptions?.model?.toLowerCase() ?? '';
-              const supportsCaching = modelId.includes('claude') || modelId.includes('anthropic') || modelId.includes('nova');
+              const supportsCaching =
+                modelId.includes('claude') ||
+                modelId.includes('anthropic') ||
+                modelId.includes('nova');
               if (bedrockOptions?.promptCache === true && supportsCaching) {
-                retryMessages = addBedrockCacheControl<BaseMessage>(retryMessages);
+                retryMessages =
+                  addBedrockCacheControl<BaseMessage>(retryMessages);
               }
             }
@@ -1025,18 +1107,26 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
                 config
               );
               // Success with reduced context
-              console.info(`[Graph] ✅ Retry successful at ${reductionFactor * 100}% with ${reducedMessages.length} messages (reduced from ${finalMessages.length})`);
+              console.info(
+                `[Graph] ✅ Retry successful at ${reductionFactor * 100}% with ${reducedMessages.length} messages (reduced from ${finalMessages.length})`
+              );
             } catch (retryError) {
-              const retryErrorMsg = (retryError as Error)?.message?.toLowerCase() ?? '';
-              const stillTooLong =
+              const retryErrorMsg =
+                (retryError as Error).message.toLowerCase() ?? '';
+              const stillTooLong =
                 retryErrorMsg.includes('too long') ||
                 retryErrorMsg.includes('context length') ||
                 retryErrorMsg.includes('validationexception');
               if (stillTooLong && reductionFactor > 0.1) {
-                console.warn(`[Graph] Still too long at ${reductionFactor * 100}%, trying more aggressive pruning...`);
+                console.warn(
+                  `[Graph] Still too long at ${reductionFactor * 100}%, trying more aggressive pruning...`
+                );
               } else {
-                console.error(`[Graph] Retry at ${reductionFactor * 100}% failed:`, (retryError as Error)?.message);
+                console.error(
+                  `[Graph] Retry at ${reductionFactor * 100}% failed:`,
+                  (retryError as Error).message
+                );
               }
             }
           }