npm - illuma-agents - Versions diffs - 1.0.14 → 1.0.16 - Mend

illuma-agents 1.0.14 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/cjs/graphs/Graph.cjs +186 -24
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +186 -24
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/types/graphs/Graph.d.ts +8 -0
package/package.json +1 -1
package/src/graphs/Graph.ts +212 -28
package/src/specs/emergency-prune.test.ts +355 -0

package/src/graphs/Graph.ts CHANGED Viewed

@@ -209,6 +209,39 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
     }
   }
+  /**
+   * Estimates a human-friendly description of the conversation timeframe based on message count.
+   * Uses rough heuristics to provide context about how much history is available.
+   *
+   * @param messageCount - Number of messages in the remaining context
+   * @returns A friendly description like "the last few minutes", "the past hour", etc.
+   */
+  getContextTimeframeDescription(messageCount: number): string {
+    // Rough heuristics based on typical conversation patterns:
+    // - Very active chat: ~20-30 messages per hour
+    // - Normal chat: ~10-15 messages per hour
+    // - Slow/thoughtful chat: ~5-8 messages per hour
+    // We use a middle estimate of ~12 messages per hour
+    if (messageCount <= 5) {
+      return 'just the last few exchanges';
+    } else if (messageCount <= 15) {
+      return 'the last several minutes';
+    } else if (messageCount <= 30) {
+      return 'roughly the past hour';
+    } else if (messageCount <= 60) {
+      return 'the past couple of hours';
+    } else if (messageCount <= 150) {
+      return 'the past few hours';
+    } else if (messageCount <= 300) {
+      return 'roughly a day\'s worth';
+    } else if (messageCount <= 700) {
+      return 'the past few days';
+    } else {
+      return 'about a week or more';
+    }
+  }
   /* Run Step Processing */
   getRunStep(stepId: string): t.RunStep | undefined {
@@ -699,6 +732,17 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
       this.config = config;
       let messagesToUse = messages;
+      // Debug logging for pruneMessages creation conditions
+      const hasPruneMessages = !!agentContext.pruneMessages;
+      const hasTokenCounter = !!agentContext.tokenCounter;
+      const hasMaxContextTokens = agentContext.maxContextTokens != null;
+      const hasIndex0TokenCount = agentContext.indexTokenCountMap[0] != null;
+      if (!hasPruneMessages && hasTokenCounter && hasMaxContextTokens && !hasIndex0TokenCount) {
+        console.warn('[Graph] Cannot create pruneMessages - missing indexTokenCountMap[0]. Token map keys:', Object.keys(agentContext.indexTokenCountMap));
+      }
       if (
         !agentContext.pruneMessages &&
         agentContext.tokenCounter &&
@@ -863,37 +907,177 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
           config
         );
       } catch (primaryError) {
-        let lastError: unknown = primaryError;
-        for (const fb of fallbacks) {
-          try {
-            let model = this.getNewModel({
-              provider: fb.provider,
-              clientOptions: fb.clientOptions,
-            });
-            const bindableTools = agentContext.tools;
-            model = (
-              !bindableTools || bindableTools.length === 0
-                ? model
-                : model.bindTools(bindableTools)
-            ) as t.ChatModelInstance;
-            result = await this.attemptInvoke(
-              {
-                currentModel: model,
-                finalMessages,
-                provider: fb.provider,
-                tools: agentContext.tools,
-              },
-              config
+        // Check if this is a "input too long" error from Bedrock/Anthropic
+        const errorMessage = (primaryError as Error)?.message?.toLowerCase() ?? '';
+        const isInputTooLongError =
+          errorMessage.includes('too long') ||
+          errorMessage.includes('input is too long') ||
+          errorMessage.includes('context length') ||
+          errorMessage.includes('maximum context') ||
+          errorMessage.includes('validationexception') ||
+          errorMessage.includes('prompt is too long');
+        // Log when we detect the error
+        if (isInputTooLongError) {
+          console.warn('[Graph] Detected input too long error:', errorMessage.substring(0, 200));
+          console.warn('[Graph] Checking emergency pruning conditions:', {
+            hasPruneMessages: !!agentContext.pruneMessages,
+            hasTokenCounter: !!agentContext.tokenCounter,
+            maxContextTokens: agentContext.maxContextTokens,
+            indexTokenMapKeys: Object.keys(agentContext.indexTokenCountMap).length
+          });
+        }
+        // If input too long and we have pruning capability OR tokenCounter, retry with progressively more aggressive pruning
+        // Note: We can create emergency pruneMessages dynamically if we have tokenCounter and maxContextTokens
+        const canPrune = agentContext.tokenCounter && agentContext.maxContextTokens;
+        if (isInputTooLongError && canPrune) {
+          // Progressive reduction: 50% -> 25% -> 10% of original context
+          const reductionLevels = [0.5, 0.25, 0.1];
+          for (const reductionFactor of reductionLevels) {
+            if (result) break; // Exit if we got a result
+            const reducedMaxTokens = Math.floor(agentContext.maxContextTokens! * reductionFactor);
+            console.warn(
+              `[Graph] Input too long. Retrying with ${reductionFactor * 100}% context (${reducedMaxTokens} tokens)...`
             );
-            lastError = undefined;
-            break;
-          } catch (e) {
-            lastError = e;
-            continue;
+            // Build fresh indexTokenCountMap if missing/incomplete
+            // This is needed when messages were dynamically added without updating the token map
+            let tokenMapForPruning = agentContext.indexTokenCountMap;
+            if (Object.keys(tokenMapForPruning).length < messages.length) {
+              console.warn('[Graph] Building fresh token count map for emergency pruning...');
+              tokenMapForPruning = {};
+              for (let i = 0; i < messages.length; i++) {
+                tokenMapForPruning[i] = agentContext.tokenCounter!(messages[i]);
+              }
+            }
+            const emergencyPrune = createPruneMessages({
+              startIndex: this.startIndex,
+              provider: agentContext.provider,
+              tokenCounter: agentContext.tokenCounter!,
+              maxTokens: reducedMaxTokens,
+              thinkingEnabled: false, // Disable thinking for emergency prune
+              indexTokenCountMap: tokenMapForPruning,
+            });
+            const { context: reducedMessages } = emergencyPrune({
+              messages,
+              usageMetadata: agentContext.currentUsage,
+            });
+            // Skip if we can't fit any messages
+            if (reducedMessages.length === 0) {
+              console.warn(`[Graph] Cannot fit any messages at ${reductionFactor * 100}% reduction, trying next level...`);
+              continue;
+            }
+            // Calculate how many messages were pruned and estimate context timeframe
+            const prunedCount = finalMessages.length - reducedMessages.length;
+            const remainingCount = reducedMessages.length;
+            const estimatedContextDescription = this.getContextTimeframeDescription(remainingCount);
+            // Inject a personalized context message to inform the agent about pruning
+            const pruneNoticeMessage = new HumanMessage({
+              content: `[CONTEXT NOTICE]
+Our conversation has grown quite long, so I've focused on ${estimatedContextDescription} of our chat (${remainingCount} recent messages). ${prunedCount} earlier messages are no longer in my immediate memory.
+If I seem to be missing something we discussed earlier, just give me a quick reminder and I'll pick right back up! I'm still fully engaged and ready to help with whatever you need.`,
+            });
+            // Insert the notice after the system message (if any) but before conversation
+            const hasSystemMessage = reducedMessages[0]?.getType() === 'system';
+            const insertIndex = hasSystemMessage ? 1 : 0;
+            // Create new array with the pruning notice
+            const messagesWithNotice = [
+              ...reducedMessages.slice(0, insertIndex),
+              pruneNoticeMessage,
+              ...reducedMessages.slice(insertIndex),
+            ];
+            let retryMessages = agentContext.useLegacyContent
+              ? formatContentStrings(messagesWithNotice)
+              : messagesWithNotice;
+            // Apply Bedrock cache control if needed
+            if (agentContext.provider === Providers.BEDROCK) {
+              const bedrockOptions = agentContext.clientOptions as
+                | t.BedrockAnthropicClientOptions
+                | undefined;
+              const modelId = bedrockOptions?.model?.toLowerCase() ?? '';
+              const supportsCaching = modelId.includes('claude') || modelId.includes('anthropic') || modelId.includes('nova');
+              if (bedrockOptions?.promptCache === true && supportsCaching) {
+                retryMessages = addBedrockCacheControl<BaseMessage>(retryMessages);
+              }
+            }
+            try {
+              result = await this.attemptInvoke(
+                {
+                  currentModel: model,
+                  finalMessages: retryMessages,
+                  provider: agentContext.provider,
+                  tools: agentContext.tools,
+                },
+                config
+              );
+              // Success with reduced context
+              console.info(`[Graph] ✅ Retry successful at ${reductionFactor * 100}% with ${reducedMessages.length} messages (reduced from ${finalMessages.length})`);
+            } catch (retryError) {
+              const retryErrorMsg = (retryError as Error)?.message?.toLowerCase() ?? '';
+              const stillTooLong =
+                retryErrorMsg.includes('too long') ||
+                retryErrorMsg.includes('context length') ||
+                retryErrorMsg.includes('validationexception');
+              if (stillTooLong && reductionFactor > 0.1) {
+                console.warn(`[Graph] Still too long at ${reductionFactor * 100}%, trying more aggressive pruning...`);
+              } else {
+                console.error(`[Graph] Retry at ${reductionFactor * 100}% failed:`, (retryError as Error)?.message);
+              }
+            }
           }
         }
-        if (lastError !== undefined) {
-          throw lastError;
+        // If we got a result from retry, skip fallbacks
+        if (result) {
+          // result already set from retry
+        } else {
+          let lastError: unknown = primaryError;
+          for (const fb of fallbacks) {
+            try {
+              let model = this.getNewModel({
+                provider: fb.provider,
+                clientOptions: fb.clientOptions,
+              });
+              const bindableTools = agentContext.tools;
+              model = (
+                !bindableTools || bindableTools.length === 0
+                  ? model
+                  : model.bindTools(bindableTools)
+              ) as t.ChatModelInstance;
+              result = await this.attemptInvoke(
+                {
+                  currentModel: model,
+                  finalMessages,
+                  provider: fb.provider,
+                  tools: agentContext.tools,
+                },
+                config
+              );
+              lastError = undefined;
+              break;
+            } catch (e) {
+              lastError = e;
+              continue;
+            }
+          }
+          if (lastError !== undefined) {
+            throw lastError;
+          }
         }
       }

package/src/specs/emergency-prune.test.ts ADDED Viewed

@@ -0,0 +1,355 @@
+// src/specs/emergency-prune.test.ts
+/**
+ * Tests for the emergency pruning feature that handles "input too long" errors
+ * by retrying with more aggressive message pruning and adding a context notice.
+ */
+import {
+  HumanMessage,
+  AIMessage,
+  SystemMessage,
+  BaseMessage,
+} from '@langchain/core/messages';
+import type * as t from '@/types';
+import { createPruneMessages } from '@/messages/prune';
+import { Providers } from '@/common';
+// Simple token counter for testing (1 character = 1 token)
+const createTestTokenCounter = (): t.TokenCounter => {
+  return (message: BaseMessage): number => {
+    const content = message.content as string | Array<t.MessageContentComplex | string> | undefined;
+    if (typeof content === 'string') {
+      return content.length;
+    }
+    if (Array.isArray(content)) {
+      return content.reduce((total, item) => {
+        if (typeof item === 'string') return total + item.length;
+        if (typeof item === 'object' && 'text' in item && typeof item.text === 'string') {
+          return total + item.text.length;
+        }
+        return total;
+      }, 0);
+    }
+    return 0;
+  };
+};
+// Helper to create test messages
+const createTestMessages = (count: number, tokensPer: number): BaseMessage[] => {
+  const messages: BaseMessage[] = [
+    new SystemMessage('You are a helpful assistant.'),
+  ];
+  for (let i = 0; i < count; i++) {
+    const content = 'x'.repeat(tokensPer);
+    if (i % 2 === 0) {
+      messages.push(new HumanMessage(content));
+    } else {
+      messages.push(new AIMessage(content));
+    }
+  }
+  return messages;
+};
+// Helper to build indexTokenCountMap
+const buildIndexTokenCountMap = (
+  messages: BaseMessage[],
+  tokenCounter: t.TokenCounter
+): Record<string, number> => {
+  const map: Record<string, number> = {};
+  messages.forEach((msg, index) => {
+    map[index] = tokenCounter(msg);
+  });
+  return map;
+};
+/**
+ * Estimates a human-friendly description of the conversation timeframe based on message count.
+ * This mirrors the implementation in Graph.ts
+ */
+const getContextTimeframeDescription = (messageCount: number): string => {
+  if (messageCount <= 5) {
+    return 'just the last few exchanges';
+  } else if (messageCount <= 15) {
+    return 'the last several minutes';
+  } else if (messageCount <= 30) {
+    return 'roughly the past hour';
+  } else if (messageCount <= 60) {
+    return 'the past couple of hours';
+  } else if (messageCount <= 150) {
+    return 'the past few hours';
+  } else if (messageCount <= 300) {
+    return "roughly a day's worth";
+  } else if (messageCount <= 700) {
+    return 'the past few days';
+  } else {
+    return 'about a week or more';
+  }
+};
+describe('Emergency Pruning Feature', () => {
+  const tokenCounter = createTestTokenCounter();
+  describe('Normal Pruning vs Emergency Pruning', () => {
+    it('should prune more aggressively with 50% reduced context', () => {
+      // Create 20 messages, each with 100 tokens = 2000 tokens total (excluding system)
+      const messages = createTestMessages(20, 100);
+      const indexTokenCountMap = buildIndexTokenCountMap(messages, tokenCounter);
+      // Normal prune with 1500 token limit
+      const normalMaxTokens = 1500;
+      const normalPrune = createPruneMessages({
+        startIndex: 0,
+        provider: Providers.BEDROCK,
+        tokenCounter,
+        maxTokens: normalMaxTokens,
+        thinkingEnabled: false,
+        indexTokenCountMap,
+      });
+      const { context: normalContext } = normalPrune({ messages });
+      // Emergency prune with 50% (750 tokens)
+      const emergencyMaxTokens = Math.floor(normalMaxTokens * 0.5);
+      const emergencyPrune = createPruneMessages({
+        startIndex: 0,
+        provider: Providers.BEDROCK,
+        tokenCounter,
+        maxTokens: emergencyMaxTokens,
+        thinkingEnabled: false,
+        indexTokenCountMap,
+      });
+      const { context: emergencyContext } = emergencyPrune({ messages });
+      // Emergency should have fewer messages
+      expect(emergencyContext.length).toBeLessThan(normalContext.length);
+      console.log(`Normal prune: ${normalContext.length} messages, Emergency prune: ${emergencyContext.length} messages`);
+    });
+    it('should preserve system message and latest user message after emergency prune', () => {
+      const messages = createTestMessages(10, 200);
+      const indexTokenCountMap = buildIndexTokenCountMap(messages, tokenCounter);
+      // Very aggressive prune - only 300 tokens
+      const emergencyPrune = createPruneMessages({
+        startIndex: 0,
+        provider: Providers.BEDROCK,
+        tokenCounter,
+        maxTokens: 300,
+        thinkingEnabled: false,
+        indexTokenCountMap,
+      });
+      const { context } = emergencyPrune({ messages });
+      // Should still have system message if it fits
+      if (context.length > 0) {
+        // Check that we have at least the most recent messages
+        const lastMessage = context[context.length - 1];
+        expect(lastMessage).toBeDefined();
+      }
+    });
+  });
+  describe('Pruning Notice Message Injection', () => {
+    it('should calculate correct number of pruned messages', () => {
+      const originalCount = 20;
+      const messages = createTestMessages(originalCount, 100);
+      const indexTokenCountMap = buildIndexTokenCountMap(messages, tokenCounter);
+      const emergencyPrune = createPruneMessages({
+        startIndex: 0,
+        provider: Providers.BEDROCK,
+        tokenCounter,
+        maxTokens: 500, // Very small to force aggressive pruning
+        thinkingEnabled: false,
+        indexTokenCountMap,
+      });
+      const { context: reducedMessages } = emergencyPrune({ messages });
+      // Calculate how many were pruned (this is what we inject in the notice)
+      const prunedCount = messages.length - reducedMessages.length;
+      expect(prunedCount).toBeGreaterThan(0);
+      console.log(`Original: ${messages.length}, After prune: ${reducedMessages.length}, Pruned: ${prunedCount}`);
+    });
+    it('should inject personalized notice message after system message', () => {
+      const messages = createTestMessages(10, 100);
+      const indexTokenCountMap = buildIndexTokenCountMap(messages, tokenCounter);
+      const emergencyPrune = createPruneMessages({
+        startIndex: 0,
+        provider: Providers.BEDROCK,
+        tokenCounter,
+        maxTokens: 800,
+        thinkingEnabled: false,
+        indexTokenCountMap,
+      });
+      const { context: reducedMessages } = emergencyPrune({ messages });
+      // Simulate the notice injection logic from Graph.ts
+      const prunedCount = messages.length - reducedMessages.length;
+      const remainingCount = reducedMessages.length;
+      const estimatedContextDescription = getContextTimeframeDescription(remainingCount);
+      const pruneNoticeMessage = new HumanMessage({
+        content: `[CONTEXT NOTICE]
+Our conversation has grown quite long, so I've focused on ${estimatedContextDescription} of our chat (${remainingCount} recent messages). ${prunedCount} earlier messages are no longer in my immediate memory.
+If I seem to be missing something we discussed earlier, just give me a quick reminder and I'll pick right back up! I'm still fully engaged and ready to help with whatever you need.`,
+      });
+      // Insert after system message
+      const hasSystemMessage = reducedMessages[0]?.getType() === 'system';
+      const insertIndex = hasSystemMessage ? 1 : 0;
+      const messagesWithNotice = [
+        ...reducedMessages.slice(0, insertIndex),
+        pruneNoticeMessage,
+        ...reducedMessages.slice(insertIndex),
+      ];
+      // Verify notice is in correct position
+      if (hasSystemMessage) {
+        expect(messagesWithNotice[0].getType()).toBe('system');
+        expect(messagesWithNotice[1].getType()).toBe('human');
+        expect((messagesWithNotice[1].content as string)).toContain('[CONTEXT NOTICE]');
+        expect((messagesWithNotice[1].content as string)).toContain('recent messages');
+        expect((messagesWithNotice[1].content as string)).toContain('quick reminder');
+      } else {
+        expect(messagesWithNotice[0].getType()).toBe('human');
+        expect((messagesWithNotice[0].content as string)).toContain('[CONTEXT NOTICE]');
+      }
+      // Total messages should be reduced + 1 notice
+      expect(messagesWithNotice.length).toBe(reducedMessages.length + 1);
+      console.log(`Notice preview:\n${(pruneNoticeMessage.content as string).substring(0, 200)}...`);
+    });
+  });
+  describe('Context Timeframe Description', () => {
+    it('should return appropriate descriptions for different message counts', () => {
+      expect(getContextTimeframeDescription(3)).toBe('just the last few exchanges');
+      expect(getContextTimeframeDescription(10)).toBe('the last several minutes');
+      expect(getContextTimeframeDescription(25)).toBe('roughly the past hour');
+      expect(getContextTimeframeDescription(45)).toBe('the past couple of hours');
+      expect(getContextTimeframeDescription(100)).toBe('the past few hours');
+      expect(getContextTimeframeDescription(200)).toBe("roughly a day's worth");
+      expect(getContextTimeframeDescription(500)).toBe('the past few days');
+      expect(getContextTimeframeDescription(1000)).toBe('about a week or more');
+    });
+  });
+  describe('Error Detection Patterns', () => {
+    const errorPatterns = [
+      'Input is too long for the model',
+      'context length exceeded',
+      'maximum context length',
+      'ValidationException: Input is too long',
+      'prompt is too long for this model',
+      'The input is too long',
+    ];
+    it('should detect various "input too long" error patterns', () => {
+      const isInputTooLongError = (errorMessage: string): boolean => {
+        const lowerMessage = errorMessage.toLowerCase();
+        return (
+          lowerMessage.includes('too long') ||
+          lowerMessage.includes('input is too long') ||
+          lowerMessage.includes('context length') ||
+          lowerMessage.includes('maximum context') ||
+          lowerMessage.includes('validationexception') ||
+          lowerMessage.includes('prompt is too long')
+        );
+      };
+      for (const pattern of errorPatterns) {
+        expect(isInputTooLongError(pattern)).toBe(true);
+        console.log(`✓ Detected: "${pattern}"`);
+      }
+      // Should not match unrelated errors
+      expect(isInputTooLongError('Network timeout')).toBe(false);
+      expect(isInputTooLongError('Invalid API key')).toBe(false);
+      expect(isInputTooLongError('Rate limit exceeded')).toBe(false);
+    });
+  });
+  describe('Edge Cases', () => {
+    it('should handle empty messages after pruning', () => {
+      // Single very long message that exceeds the limit
+      const messages: BaseMessage[] = [
+        new SystemMessage('System prompt'),
+        new HumanMessage('x'.repeat(10000)), // Way too long
+      ];
+      const indexTokenCountMap = buildIndexTokenCountMap(messages, tokenCounter);
+      const emergencyPrune = createPruneMessages({
+        startIndex: 0,
+        provider: Providers.BEDROCK,
+        tokenCounter,
+        maxTokens: 100, // Very small limit
+        thinkingEnabled: false,
+        indexTokenCountMap,
+      });
+      const { context } = emergencyPrune({ messages });
+      // Should have at least tried to keep something or be empty
+      // The key is it shouldn't throw
+      expect(Array.isArray(context)).toBe(true);
+    });
+    it('should work with only system message and one user message', () => {
+      const messages: BaseMessage[] = [
+        new SystemMessage('You are helpful.'),
+        new HumanMessage('Hello'),
+      ];
+      const indexTokenCountMap = buildIndexTokenCountMap(messages, tokenCounter);
+      const emergencyPrune = createPruneMessages({
+        startIndex: 0,
+        provider: Providers.BEDROCK,
+        tokenCounter,
+        maxTokens: 500,
+        thinkingEnabled: false,
+        indexTokenCountMap,
+      });
+      const { context } = emergencyPrune({ messages });
+      expect(context.length).toBe(2);
+      expect(context[0].getType()).toBe('system');
+      expect(context[1].getType()).toBe('human');
+    });
+    it('should handle conversation without system message', () => {
+      const messages: BaseMessage[] = [
+        new HumanMessage('Hello'),
+        new AIMessage('Hi there!'),
+        new HumanMessage('How are you?'),
+      ];
+      const indexTokenCountMap = buildIndexTokenCountMap(messages, tokenCounter);
+      const emergencyPrune = createPruneMessages({
+        startIndex: 0,
+        provider: Providers.BEDROCK,
+        tokenCounter,
+        maxTokens: 100,
+        thinkingEnabled: false,
+        indexTokenCountMap,
+      });
+      const { context } = emergencyPrune({ messages });
+      // Should keep the most recent messages that fit
+      expect(context.length).toBeGreaterThan(0);
+      expect(context[0].getType()).not.toBe('system');
+    });
+  });
+});