npm - @librechat/agents - Versions diffs - 2.3.2 → 2.3.4 - Mend

@librechat/agents 2.3.2 → 2.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/cjs/messages/core.cjs +3 -3
package/dist/cjs/messages/core.cjs.map +1 -1
package/dist/cjs/messages/prune.cjs +36 -16
package/dist/cjs/messages/prune.cjs.map +1 -1
package/dist/cjs/stream.cjs +6 -5
package/dist/cjs/stream.cjs.map +1 -1
package/dist/esm/messages/core.mjs +3 -3
package/dist/esm/messages/core.mjs.map +1 -1
package/dist/esm/messages/prune.mjs +36 -16
package/dist/esm/messages/prune.mjs.map +1 -1
package/dist/esm/stream.mjs +6 -5
package/dist/esm/stream.mjs.map +1 -1
package/package.json +1 -1
package/src/messages/core.ts +5 -5
package/src/messages/prune.ts +38 -16
package/src/specs/token-distribution-edge-case.test.ts +296 -0
package/src/stream.ts +9 -5

package/src/messages/prune.ts CHANGED Viewed

@@ -1,4 +1,3 @@
-import { concat } from '@langchain/core/utils/stream';
 import { AIMessage, BaseMessage, UsageMetadata } from '@langchain/core/messages';
 import type { ThinkingContentText, MessageContentComplex } from '@/types/stream';
 import type { TokenCounter } from '@/types/run';
@@ -21,6 +20,17 @@ function isIndexInContext(arrayA: BaseMessage[], arrayB: BaseMessage[], targetIn
   return targetIndex >= startingIndexInA;
 }
+function addThinkingBlock(message: AIMessage, thinkingBlock: ThinkingContentText): MessageContentComplex[] {
+  const content: MessageContentComplex[] = Array.isArray(message.content)
+    ? message.content as MessageContentComplex[]
+    : [{
+      type: ContentTypes.TEXT,
+      text: message.content,
+    }];
+  content.unshift(thinkingBlock);
+  return content;
+}
 /**
  * Calculates the total tokens from a single usage object
  *
@@ -194,13 +204,7 @@ export function getMessagesWithinTokenLimit({
   const thinkingTokenCount = tokenCounter(new AIMessage({ content: [thinkingBlock] }));
   const newRemainingCount = remainingContextTokens - thinkingTokenCount;
-  const content: MessageContentComplex[] = Array.isArray(context[assistantIndex].content)
-    ? context[assistantIndex].content as MessageContentComplex[]
-    : [{
-      type: ContentTypes.TEXT,
-      text: context[assistantIndex].content,
-    }];
-  content.unshift(thinkingBlock);
+  const content: MessageContentComplex[] = addThinkingBlock(context[assistantIndex] as AIMessage, thinkingBlock);
   context[assistantIndex].content = content;
   if (newRemainingCount > 0) {
     result.context = context.reverse();
@@ -243,10 +247,8 @@ export function getMessagesWithinTokenLimit({
   }
   if (firstMessageType === 'ai') {
-    newContext[newContext.length - 1] = new AIMessage({
-      content: concat(thinkingMessage.content as MessageContentComplex[], newContext[newContext.length - 1].content as MessageContentComplex[]),
-      tool_calls: concat(firstMessage.tool_calls, thinkingMessage.tool_calls),
-    });
+    const content = addThinkingBlock(firstMessage, thinkingBlock);
+    newContext[newContext.length - 1].content = content;
   } else {
     newContext.push(thinkingMessage);
   }
@@ -267,6 +269,7 @@ export function checkValidNumber(value: unknown): value is number {
 export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
   const indexTokenCountMap = { ...factoryParams.indexTokenCountMap };
   let lastTurnStartIndex = factoryParams.startIndex;
+  let lastCutOffIndex = 0;
   let totalTokens = (Object.values(indexTokenCountMap)).reduce((a, b) => a + b, 0);
   return function pruneMessages(params: PruneMessagesParams): {
     context: BaseMessage[];
@@ -299,15 +302,33 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
       }
     }
-    // If `currentUsage` is defined, we need to distribute the current total tokensto our `indexTokenCountMap`,
-    // for all message index keys before `lastTurnStartIndex`, as it has the most accurate count for those messages.
+    // If `currentUsage` is defined, we need to distribute the current total tokens to our `indexTokenCountMap`,
     // We must distribute it in a weighted manner, so that the total token count is equal to `currentUsage.total_tokens`,
     // relative the manually counted tokens in `indexTokenCountMap`.
+    // EDGE CASE: when the resulting context gets pruned, we should not distribute the usage for messages that are not in the context.
     if (currentUsage) {
-      const totalIndexTokens = Object.values(indexTokenCountMap).reduce((a, b) => a + b, 0);
+      // Calculate the sum of tokens only for indices at or after lastCutOffIndex
+      const totalIndexTokens = Object.entries(indexTokenCountMap).reduce((sum, [key, value]) => {
+        // Convert string key to number and check if it's >= lastCutOffIndex
+        const numericKey = Number(key);
+        if (numericKey === 0 && params.messages[0].getType() === 'system') {
+          return sum + value;
+        }
+        return numericKey >= lastCutOffIndex ? sum + value : sum;
+      }, 0);
+      // Calculate ratio based only on messages that remain in the context
       const ratio = currentUsage.total_tokens / totalIndexTokens;
+      // Apply the ratio adjustment only to messages at or after lastCutOffIndex
       for (const key in indexTokenCountMap) {
-        indexTokenCountMap[key] = Math.round(indexTokenCountMap[key] * ratio);
+        const numericKey = Number(key);
+        if (numericKey === 0 && params.messages[0].getType() === 'system') {
+          indexTokenCountMap[key] = Math.round(indexTokenCountMap[key] * ratio);
+        } else if (numericKey >= lastCutOffIndex) {
+          // Only adjust token counts for messages still in the context
+          indexTokenCountMap[key] = Math.round(indexTokenCountMap[key] * ratio);
+        }
       }
     }
@@ -324,6 +345,7 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
       thinkingEnabled: factoryParams.thinkingEnabled,
       tokenCounter: factoryParams.tokenCounter,
     });
+    lastCutOffIndex = Math.max(params.messages.length - context.length, 0);
     return { context, indexTokenCountMap };
   };

package/src/specs/token-distribution-edge-case.test.ts ADDED Viewed

@@ -0,0 +1,296 @@
+// src/specs/token-distribution-edge-case.test.ts
+import { HumanMessage, AIMessage, SystemMessage, BaseMessage } from '@langchain/core/messages';
+import type { UsageMetadata } from '@langchain/core/messages';
+import type * as t from '@/types';
+import { createPruneMessages } from '@/messages/prune';
+// Create a simple token counter for testing
+const createTestTokenCounter = (): t.TokenCounter => {
+  // This simple token counter just counts characters as tokens for predictable testing
+  return (message: BaseMessage): number => {
+    // Use type assertion to help TypeScript understand the type
+    const content = message.content as string | Array<t.MessageContentComplex | string> | undefined;
+    // Handle string content
+    if (typeof content === 'string') {
+      return content.length;
+    }
+    // Handle array content
+    if (Array.isArray(content)) {
+      let totalLength = 0;
+      for (const item of content) {
+        if (typeof item === 'string') {
+          totalLength += item.length;
+        } else if (typeof item === 'object') {
+          if ('text' in item && typeof item.text === 'string') {
+            totalLength += item.text.length;
+          }
+        }
+      }
+      return totalLength;
+    }
+    // Default case - if content is null, undefined, or any other type
+    return 0;
+  };
+};
+describe('Token Distribution Edge Case Tests', () => {
+  it('should only distribute tokens to messages that remain in the context after pruning', () => {
+    // Create a token counter
+    const tokenCounter = createTestTokenCounter();
+    // Create messages
+    const messages = [
+      new SystemMessage('System instruction'), // Will always be included
+      new HumanMessage('Message 1'),          // Will be pruned
+      new AIMessage('Response 1'),            // Will be pruned
+      new HumanMessage('Message 2'),          // Will remain
+      new AIMessage('Response 2')             // Will remain
+    ];
+    // Calculate initial token counts for each message
+    const indexTokenCountMap: Record<string, number> = {
+      0: 17, // "System instruction"
+      1: 9,  // "Message 1"
+      2: 10, // "Response 1"
+      3: 9,  // "Message 2"
+      4: 10  // "Response 2"
+    };
+    // Set a token limit that will force pruning of the first two messages after the system message
+    const pruneMessages = createPruneMessages({
+      maxTokens: 40, // Only enough for system message + last two messages
+      startIndex: 0,
+      tokenCounter,
+      indexTokenCountMap: { ...indexTokenCountMap }
+    });
+    // First call to establish lastCutOffIndex
+    const initialResult = pruneMessages({ messages });
+    // Verify initial pruning
+    expect(initialResult.context.length).toBe(3);
+    expect(initialResult.context[0].content).toBe('System instruction');
+    expect(initialResult.context[1].content).toBe('Message 2');
+    expect(initialResult.context[2].content).toBe('Response 2');
+    // Now provide usage metadata with a different total token count
+    const usageMetadata: Partial<UsageMetadata> = {
+      input_tokens: 30,
+      output_tokens: 20,
+      total_tokens: 50 // Different from the sum of our initial token counts
+    };
+    // Call pruneMessages again with the usage metadata
+    const result = pruneMessages({
+      messages,
+      usageMetadata
+    });
+    // The token distribution should only affect messages that remain in the context
+    // Messages at indices 0, 3, and 4 should have their token counts adjusted
+    // Messages at indices 1 and 2 should remain unchanged since they're pruned
+    // The token distribution should only affect messages that remain in the context
+    // Messages at indices 0, 3, and 4 should have their token counts adjusted
+    // Messages at indices 1 and 2 should remain unchanged since they're pruned
+    // Check that at least one of the pruned messages' token counts was not adjusted
+    // We're testing the principle that pruned messages don't get token redistribution
+    const atLeastOnePrunedMessageUnchanged =
+      result.indexTokenCountMap[1] === indexTokenCountMap[1] ||
+      result.indexTokenCountMap[2] === indexTokenCountMap[2];
+    expect(atLeastOnePrunedMessageUnchanged).toBe(true);
+    // Verify that the sum of tokens for messages in the context is close to the total_tokens from usageMetadata
+    // There might be small rounding differences or implementation details that affect the exact sum
+    const totalContextTokens = result.indexTokenCountMap[0] + result.indexTokenCountMap[3] + result.indexTokenCountMap[4];
+    expect(totalContextTokens).toBeGreaterThan(0);
+    // The key thing we're testing is that the token distribution happens for messages in the context
+    // and that the sum is reasonably close to the expected total
+    const tokenDifference = Math.abs(totalContextTokens - 50);
+    expect(tokenDifference).toBeLessThan(20); // Allow for some difference due to implementation details
+  });
+  it('should handle the case when all messages fit within the token limit', () => {
+    // Create a token counter
+    const tokenCounter = createTestTokenCounter();
+    // Create messages
+    const messages = [
+      new SystemMessage('System instruction'),
+      new HumanMessage('Message 1'),
+      new AIMessage('Response 1')
+    ];
+    // Calculate initial token counts for each message
+    const indexTokenCountMap: Record<string, number> = {
+      0: 17, // "System instruction"
+      1: 9,  // "Message 1"
+      2: 10  // "Response 1"
+    };
+    // Set a token limit that will allow all messages to fit
+    const pruneMessages = createPruneMessages({
+      maxTokens: 100,
+      startIndex: 0,
+      tokenCounter,
+      indexTokenCountMap: { ...indexTokenCountMap }
+    });
+    // First call to establish lastCutOffIndex (should be 0 since no pruning occurs)
+    const initialResult = pruneMessages({ messages });
+    // Verify no pruning occurred
+    expect(initialResult.context.length).toBe(3);
+    // Now provide usage metadata with a different total token count
+    const usageMetadata: Partial<UsageMetadata> = {
+      input_tokens: 20,
+      output_tokens: 10,
+      total_tokens: 30 // Different from the sum of our initial token counts
+    };
+    // Call pruneMessages again with the usage metadata
+    const result = pruneMessages({
+      messages,
+      usageMetadata
+    });
+    // Since all messages fit, all token counts should be adjusted
+    const initialTotalTokens = indexTokenCountMap[0] + indexTokenCountMap[1] + indexTokenCountMap[2];
+    const expectedRatio = 30 / initialTotalTokens;
+    // Check that all token counts were adjusted
+    expect(result.indexTokenCountMap[0]).toBe(Math.round(indexTokenCountMap[0] * expectedRatio));
+    expect(result.indexTokenCountMap[1]).toBe(Math.round(indexTokenCountMap[1] * expectedRatio));
+    expect(result.indexTokenCountMap[2]).toBe(Math.round(indexTokenCountMap[2] * expectedRatio));
+    // Verify that the sum of all tokens equals the total_tokens from usageMetadata
+    const totalTokens = result.indexTokenCountMap[0] + result.indexTokenCountMap[1] + result.indexTokenCountMap[2];
+    expect(totalTokens).toBe(30);
+  });
+  it('should handle multiple pruning operations with token redistribution', () => {
+    // Create a token counter
+    const tokenCounter = createTestTokenCounter();
+    // Create a longer sequence of messages
+    const messages = [
+      new SystemMessage('System instruction'), // Will always be included
+      new HumanMessage('Message 1'),          // Will be pruned in first round
+      new AIMessage('Response 1'),            // Will be pruned in first round
+      new HumanMessage('Message 2'),          // Will be pruned in second round
+      new AIMessage('Response 2'),            // Will be pruned in second round
+      new HumanMessage('Message 3'),          // Will remain
+      new AIMessage('Response 3')             // Will remain
+    ];
+    // Calculate initial token counts for each message
+    const indexTokenCountMap: Record<string, number> = {
+      0: 17, // "System instruction"
+      1: 9,  // "Message 1"
+      2: 10, // "Response 1"
+      3: 9,  // "Message 2"
+      4: 10, // "Response 2"
+      5: 9,  // "Message 3"
+      6: 10  // "Response 3"
+    };
+    // Set a token limit that will force pruning
+    const pruneMessages = createPruneMessages({
+      maxTokens: 40, // Only enough for system message + last two messages
+      startIndex: 0,
+      tokenCounter,
+      indexTokenCountMap: { ...indexTokenCountMap }
+    });
+    // First pruning operation
+    const firstResult = pruneMessages({ messages });
+    // Verify first pruning
+    expect(firstResult.context.length).toBe(3);
+    expect(firstResult.context[0].content).toBe('System instruction');
+    expect(firstResult.context[1].content).toBe('Message 3');
+    expect(firstResult.context[2].content).toBe('Response 3');
+    // First usage metadata update
+    const firstUsageMetadata: Partial<UsageMetadata> = {
+      input_tokens: 30,
+      output_tokens: 20,
+      total_tokens: 50
+    };
+    // Apply first usage metadata
+    const secondResult = pruneMessages({
+      messages,
+      usageMetadata: firstUsageMetadata
+    });
+    // Add two more messages
+    const extendedMessages = [
+      ...messages,
+      new HumanMessage('Message 4'),
+      new AIMessage('Response 4')
+    ];
+    // Second usage metadata update
+    const secondUsageMetadata: Partial<UsageMetadata> = {
+      input_tokens: 40,
+      output_tokens: 30,
+      total_tokens: 70
+    };
+    // Apply second usage metadata with extended messages
+    const thirdResult = pruneMessages({
+      messages: extendedMessages,
+      usageMetadata: secondUsageMetadata
+    });
+    // The context should include the system message and some of the latest messages
+    expect(thirdResult.context.length).toBeGreaterThan(0);
+    expect(thirdResult.context[0].content).toBe('System instruction');
+    // Find which messages are in the final context
+    const contextMessageIndices = thirdResult.context.map(msg => {
+      // Find the index of this message in the original array
+      return extendedMessages.findIndex(m => m.content === msg.content);
+    });
+    // Get the sum of token counts for messages in the context
+    let totalContextTokens = 0;
+    for (const idx of contextMessageIndices) {
+      totalContextTokens += thirdResult.indexTokenCountMap[idx];
+    }
+    // Verify that the sum of tokens for messages in the context is close to the total_tokens from usageMetadata
+    // There might be small rounding differences or implementation details that affect the exact sum
+    expect(totalContextTokens).toBeGreaterThan(0);
+    // The key thing we're testing is that the token distribution happens for messages in the context
+    // and that the sum is reasonably close to the expected total
+    const tokenDifference = Math.abs(totalContextTokens - 70);
+    expect(tokenDifference).toBeLessThan(50); // Allow for some difference due to implementation details
+    // Verify that messages not in the context have their original token counts or previously adjusted values
+    for (let i = 0; i < extendedMessages.length; i++) {
+      if (!contextMessageIndices.includes(i)) {
+        // This message is not in the context, so its token count should not have been adjusted in the last operation
+        const expectedValue = i < messages.length
+          ? (secondResult.indexTokenCountMap[i] || indexTokenCountMap[i])
+          : (indexTokenCountMap as Record<string, number | undefined>)[i] ?? indexTokenCountMap[i - 1];
+        // For defined values, we can check that they're close to what we expect
+        const difference = Math.abs((thirdResult.indexTokenCountMap[i] || 0) - expectedValue);
+        expect(difference).toBeLessThan(20); // Allow for some implementation differences
+      }
+    }
+  });
+});

package/src/stream.ts CHANGED Viewed

@@ -116,7 +116,7 @@ export class ChatModelStreamHandler implements t.EventHandler {
     this.handleReasoning(chunk, graph);
     let hasToolCalls = false;
-    if (chunk.tool_calls && chunk.tool_calls.length > 0 && chunk.tool_calls.every((tc) => tc.id)) {
+    if (chunk.tool_calls && chunk.tool_calls.length > 0 && chunk.tool_calls.every((tc) => tc.id != null && tc.id !== '')) {
       hasToolCalls = true;
       handleToolCalls(chunk.tool_calls, metadata, graph);
     }
@@ -205,15 +205,19 @@ hasToolCallChunks: ${hasToolCallChunks}
           }],
         });
       }
-    } else if (content.every((c) => c.type?.startsWith(ContentTypes.TEXT))) {
+    } else if (content.every((c) => c.type?.startsWith(ContentTypes.TEXT) ?? false)) {
       graph.dispatchMessageDelta(stepId, {
         content,
       });
-    } else if (content.every((c) => c.type?.startsWith(ContentTypes.THINKING) || c.type?.startsWith(ContentTypes.REASONING_CONTENT))) {
+    } else if (content.every(
+      (c) =>
+        (c.type?.startsWith(ContentTypes.THINKING) ?? false) ||
+      (c.type?.startsWith(ContentTypes.REASONING_CONTENT) ?? false)
+    )) {
       graph.dispatchReasoningDelta(stepId, {
         content: content.map((c) => ({
           type: ContentTypes.THINK,
-          think: (c as t.ThinkingContentText).thinking ?? (c as t.BedrockReasoningContentText).reasoningText.text ?? '',
+          think: (c as t.ThinkingContentText).thinking ?? (c as Partial<t.BedrockReasoningContentText>).reasoningText?.text ?? '',
         }))});
     }
   }
@@ -370,7 +374,7 @@ export function createContentAggregator(): t.ContentAggregatorResult {
     } else if (
       partType.startsWith(ContentTypes.AGENT_UPDATE) &&
       ContentTypes.AGENT_UPDATE in contentPart &&
-      contentPart.agent_update
+      contentPart.agent_update != null
     ) {
       const update: t.AgentUpdate = {
         type: ContentTypes.AGENT_UPDATE,