npm - @librechat/agents - Versions diffs - 3.1.80-dev.3 → 3.1.81 - Mend

@librechat/agents 3.1.80-dev.3 → 3.1.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +27 -7
package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
package/dist/cjs/llm/vertexai/index.cjs +67 -15
package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
package/dist/esm/llm/anthropic/utils/message_inputs.mjs +27 -8
package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
package/dist/esm/llm/vertexai/index.mjs +67 -17
package/dist/esm/llm/vertexai/index.mjs.map +1 -1
package/dist/types/llm/anthropic/utils/message_inputs.d.ts +15 -3
package/dist/types/llm/vertexai/index.d.ts +47 -1
package/package.json +10 -5
package/src/llm/anthropic/utils/message_inputs.ts +58 -7
package/src/llm/anthropic/utils/tool-id-normalization.test.ts +178 -0
package/src/llm/vertexai/fixThoughtSignatures.test.ts +154 -0
package/src/llm/vertexai/index.ts +85 -24
package/src/llm/vertexai/llm.spec.ts +18 -0
package/src/llm/vertexai/repairUsageMetadata.test.ts +54 -0

package/src/llm/anthropic/utils/message_inputs.ts CHANGED Viewed

@@ -3,6 +3,7 @@
 /**
  * This util file contains functions for converting LangChain messages to Anthropic messages.
  */
+import { createHash } from 'node:crypto';
 import {
   type BaseMessage,
   type SystemMessage,
@@ -92,6 +93,49 @@ function _formatImage(imageUrl: string) {
   );
 }
+const ANTHROPIC_TOOL_USE_ID_PATTERN = /^[a-zA-Z0-9_-]+$/;
+const ANTHROPIC_TOOL_USE_ID_MAX_LENGTH = 64;
+const ANTHROPIC_TOOL_USE_ID_HASH_LENGTH = 10;
+/**
+ * Normalize a tool-call ID to satisfy Anthropic's `^[a-zA-Z0-9_-]+$` and 64-char
+ * constraints. Pure and deterministic — same input always yields the same output,
+ * so paired `tool_use.id` and `tool_result.tool_use_id` stay matched without
+ * needing a session map. IDs that already comply pass through unchanged.
+ *
+ * For non-compliant inputs we sanitize then append a short SHA-256 prefix of
+ * the original ID to preserve uniqueness when truncation would otherwise
+ * collapse distinct IDs to the same value (e.g. two long Responses-style IDs
+ * sharing a 64-char prefix). The hash is computed against the raw input so
+ * inputs that differ only after the truncation cutoff still produce distinct
+ * outputs.
+ */
+export function normalizeAnthropicToolCallId(id: string): string;
+export function normalizeAnthropicToolCallId(
+  id: string | undefined
+): string | undefined;
+export function normalizeAnthropicToolCallId(
+  id: string | undefined
+): string | undefined {
+  if (id == null) {
+    return id;
+  }
+  if (
+    id.length <= ANTHROPIC_TOOL_USE_ID_MAX_LENGTH &&
+    ANTHROPIC_TOOL_USE_ID_PATTERN.test(id)
+  ) {
+    return id;
+  }
+  const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, '_');
+  const hash = createHash('sha256')
+    .update(id)
+    .digest('hex')
+    .slice(0, ANTHROPIC_TOOL_USE_ID_HASH_LENGTH);
+  const prefixMaxLength =
+    ANTHROPIC_TOOL_USE_ID_MAX_LENGTH - ANTHROPIC_TOOL_USE_ID_HASH_LENGTH - 1;
+  return `${sanitized.slice(0, prefixMaxLength)}_${hash}`;
+}
 function _ensureMessageContents(
   messages: BaseMessage[]
 ): (SystemMessage | HumanMessage | AIMessage)[] {
@@ -111,7 +155,9 @@ function _ensureMessageContents(
           (previousMessage.content as MessageContentComplex[]).push({
             type: 'tool_result',
             content: message.content,
-            tool_use_id: (message as ToolMessage).tool_call_id,
+            tool_use_id: normalizeAnthropicToolCallId(
+              (message as ToolMessage).tool_call_id
+            ),
           });
         } else {
           // If not, we create a new human message with the tool result.
@@ -121,7 +167,9 @@ function _ensureMessageContents(
                 {
                   type: 'tool_result',
                   content: message.content,
-                  tool_use_id: (message as ToolMessage).tool_call_id,
+                  tool_use_id: normalizeAnthropicToolCallId(
+                    (message as ToolMessage).tool_call_id
+                  ),
                 },
               ],
             })
@@ -139,7 +187,9 @@ function _ensureMessageContents(
                 ...(toolMessageContent != null
                   ? { content: _formatContent(message) }
                   : {}),
-                tool_use_id: (message as ToolMessage).tool_call_id,
+                tool_use_id: normalizeAnthropicToolCallId(
+                  (message as ToolMessage).tool_call_id
+                ),
               },
             ],
           })
@@ -158,11 +208,12 @@ export function _convertLangChainToolCallToAnthropic(
   if (toolCall.id === undefined) {
     throw new Error('Anthropic requires all tool calls to have an "id".');
   }
+  const isServerTool = toolCall.id.startsWith(
+    Constants.ANTHROPIC_SERVER_TOOL_PREFIX
+  );
   return {
-    type: toolCall.id.startsWith(Constants.ANTHROPIC_SERVER_TOOL_PREFIX)
-      ? 'server_tool_use'
-      : 'tool_use',
-    id: toolCall.id,
+    type: isServerTool ? 'server_tool_use' : 'tool_use',
+    id: isServerTool ? toolCall.id : normalizeAnthropicToolCallId(toolCall.id),
     name: toolCall.name,
     input: toolCall.args,
   };

package/src/llm/anthropic/utils/tool-id-normalization.test.ts ADDED Viewed

@@ -0,0 +1,178 @@
+import { AIMessage, HumanMessage, ToolMessage } from '@langchain/core/messages';
+import {
+  _convertLangChainToolCallToAnthropic,
+  _convertMessagesToAnthropicPayload,
+  normalizeAnthropicToolCallId,
+} from './message_inputs';
+describe('normalizeAnthropicToolCallId', () => {
+  it('returns valid IDs unchanged', () => {
+    expect(normalizeAnthropicToolCallId('toolu_01ABcdEFgh')).toBe(
+      'toolu_01ABcdEFgh'
+    );
+    expect(normalizeAnthropicToolCallId('call_abc123XYZ')).toBe(
+      'call_abc123XYZ'
+    );
+    expect(normalizeAnthropicToolCallId('a-b_c-d')).toBe('a-b_c-d');
+  });
+  it('sanitizes invalid characters and appends a hash suffix', () => {
+    const out = normalizeAnthropicToolCallId(
+      'fc_67abc1234def567|call_abc123def456ghi789jkl0mnopqrs'
+    );
+    expect(/^[a-zA-Z0-9_-]+$/.test(out)).toBe(true);
+    expect(out.length).toBeLessThanOrEqual(64);
+    expect(
+      out.startsWith('fc_67abc1234def567_call_abc123def456ghi789jkl0mn')
+    ).toBe(true);
+    // Suffix is `_<10-hex-char hash>`
+    expect(out).toMatch(/_[0-9a-f]{10}$/);
+  });
+  it('produces compliant output for IDs of any length', () => {
+    const long = 'fc_' + 'a'.repeat(80);
+    const out = normalizeAnthropicToolCallId(long);
+    expect(out).toHaveLength(64);
+    expect(/^[a-zA-Z0-9_-]+$/.test(out)).toBe(true);
+  });
+  it('produces uniquely distinguishable outputs for IDs that share a 64-char prefix', () => {
+    const sharedPrefix = 'fc_' + 'a'.repeat(80);
+    const idA = sharedPrefix + '|call_unique_A';
+    const idB = sharedPrefix + '|call_unique_B';
+    const outA = normalizeAnthropicToolCallId(idA);
+    const outB = normalizeAnthropicToolCallId(idB);
+    expect(outA).not.toBe(outB);
+    expect(outA).toHaveLength(64);
+    expect(outB).toHaveLength(64);
+    expect(/^[a-zA-Z0-9_-]+$/.test(outA)).toBe(true);
+    expect(/^[a-zA-Z0-9_-]+$/.test(outB)).toBe(true);
+  });
+  it('disambiguates short IDs that sanitize to the same value', () => {
+    expect(normalizeAnthropicToolCallId('a|b')).not.toBe(
+      normalizeAnthropicToolCallId('a.b')
+    );
+  });
+  it('handles combined length and character violations', () => {
+    const id = 'fc_' + 'x|'.repeat(100);
+    const out = normalizeAnthropicToolCallId(id);
+    expect(out).toHaveLength(64);
+    expect(/^[a-zA-Z0-9_-]+$/.test(out)).toBe(true);
+  });
+  it('is deterministic — same input always yields same output', () => {
+    const id = 'fc_a|b|c';
+    expect(normalizeAnthropicToolCallId(id)).toBe(
+      normalizeAnthropicToolCallId(id)
+    );
+  });
+  it('passes through undefined for the optional overload', () => {
+    expect(normalizeAnthropicToolCallId(undefined)).toBeUndefined();
+  });
+  it('handles empty string by producing a deterministic compliant output', () => {
+    const out = normalizeAnthropicToolCallId('');
+    expect(/^[a-zA-Z0-9_-]+$/.test(out)).toBe(true);
+    expect(out.length).toBeLessThanOrEqual(64);
+    expect(out).toBe(normalizeAnthropicToolCallId(''));
+  });
+});
+describe('_convertMessagesToAnthropicPayload — cross-provider ID normalization', () => {
+  it('normalizes Responses-style IDs on tool_use AND matching tool_result', () => {
+    const responsesId = 'fc_67abc1234def567|call_abc123def456ghi789jkl0mnopqrs';
+    const payload = _convertMessagesToAnthropicPayload([
+      new HumanMessage('weather?'),
+      new AIMessage({
+        content: '',
+        tool_calls: [
+          {
+            id: responsesId,
+            name: 'get_weather',
+            args: { location: 'Tokyo' },
+            type: 'tool_call',
+          },
+        ],
+      }),
+      new ToolMessage({
+        tool_call_id: responsesId,
+        content: '{"temp": 21}',
+      }),
+    ]);
+    const assistantMsg = payload.messages.find((m) => m.role === 'assistant')!;
+    const userToolResultMsg = payload.messages.find(
+      (m) =>
+        m.role === 'user' &&
+        Array.isArray(m.content) &&
+        (m.content as Array<{ type: string }>)[0]?.type === 'tool_result'
+    )!;
+    const toolUseBlock = (
+      assistantMsg.content as Array<{ type: string; id?: string }>
+    ).find((b) => b.type === 'tool_use')!;
+    const toolResultBlock = (
+      userToolResultMsg.content as Array<{
+        type: string;
+        tool_use_id?: string;
+      }>
+    ).find((b) => b.type === 'tool_result')!;
+    const expected = normalizeAnthropicToolCallId(responsesId);
+    expect(toolUseBlock.id).toBe(expected);
+    expect(toolResultBlock.tool_use_id).toBe(expected);
+    expect(toolUseBlock.id).toBe(toolResultBlock.tool_use_id);
+    expect(/^[a-zA-Z0-9_-]+$/.test(toolUseBlock.id!)).toBe(true);
+    expect(toolUseBlock.id!.length).toBeLessThanOrEqual(64);
+  });
+  it('passes through Anthropic-native IDs unchanged', () => {
+    const nativeId = 'toolu_01ABcdEFgh23ijKL';
+    const payload = _convertMessagesToAnthropicPayload([
+      new HumanMessage('hi'),
+      new AIMessage({
+        content: '',
+        tool_calls: [
+          {
+            id: nativeId,
+            name: 'noop',
+            args: {},
+            type: 'tool_call',
+          },
+        ],
+      }),
+      new ToolMessage({
+        tool_call_id: nativeId,
+        content: 'ok',
+      }),
+    ]);
+    const assistantMsg = payload.messages.find((m) => m.role === 'assistant')!;
+    const toolUseBlock = (
+      assistantMsg.content as Array<{ type: string; id?: string }>
+    ).find((b) => b.type === 'tool_use')!;
+    expect(toolUseBlock.id).toBe(nativeId);
+  });
+  it('does not normalize server tool IDs (srvtoolu_ prefix)', () => {
+    const serverId = 'srvtoolu_01abcXYZ';
+    const block = _convertLangChainToolCallToAnthropic({
+      id: serverId,
+      name: 'web_search',
+      args: { query: 'x' },
+      type: 'tool_call',
+    });
+    expect(block.type).toBe('server_tool_use');
+    expect(block.id).toBe(serverId);
+  });
+});

package/src/llm/vertexai/fixThoughtSignatures.test.ts ADDED Viewed

@@ -0,0 +1,154 @@
+import { expect, test, describe } from '@jest/globals';
+import type { GeminiContent } from '@langchain/google-common';
+import { AIMessage, HumanMessage, ToolMessage } from '@langchain/core/messages';
+import { fixThoughtSignatures } from './index';
+const SIG_A = 'AY89a1/sigA==';
+const SIG_B = 'AY89a1/sigB==';
+const buildContents = (
+  blocks: Array<['user' | 'model' | 'function', GeminiContent['parts']]>
+): GeminiContent[] =>
+  blocks.map(([role, parts]) => ({ role, parts }) as GeminiContent);
+describe('fixThoughtSignatures', () => {
+  test('attaches signature to functionCall part when prior turn is a plain-text AI message (issue LibreChat#13006-followup)', () => {
+    // Reproduces the live failure from the issue: a Gemini 3 conversation
+    // where turn 1 was plain text ("Hello!") and turn 2 emitted a tool call
+    // with a thought signature. The plain-text AI message has no signatures,
+    // so the old position-by-filter code matched the toolcall AIMessage with
+    // the WRONG model content.
+    const helloAi = new AIMessage('Hello! How can I help you today?');
+    const toolcallAi = new AIMessage({
+      content: '',
+      tool_calls: [
+        { name: 'bash_tool', args: { command: 'echo hi' }, id: 'tc1' },
+      ],
+      additional_kwargs: { signatures: [SIG_A, ''] },
+    });
+    const input = [
+      new HumanMessage('hi there'),
+      helloAi,
+      new HumanMessage('run something'),
+      toolcallAi,
+      new ToolMessage({ content: 'ok', tool_call_id: 'tc1' }),
+    ];
+    const contents = buildContents([
+      ['user', [{ text: 'hi there' }]],
+      ['model', [{ text: 'Hello! How can I help you today?' }]],
+      ['user', [{ text: 'run something' }]],
+      [
+        'model',
+        [{ functionCall: { name: 'bash_tool', args: { command: 'echo hi' } } }],
+      ],
+      [
+        'user',
+        [
+          {
+            functionResponse: {
+              name: 'bash_tool',
+              response: { content: 'ok' },
+            },
+          },
+        ],
+      ],
+    ]);
+    fixThoughtSignatures(contents, input);
+    expect(contents[1].parts[0].thoughtSignature).toBeUndefined();
+    expect(contents[3].parts[0]).toMatchObject({
+      functionCall: { name: 'bash_tool' },
+      thoughtSignature: SIG_A,
+    });
+  });
+  test('attaches signatures across multiple tool-call turns by position', () => {
+    const turn1 = new AIMessage({
+      content: '',
+      tool_calls: [{ name: 'a', args: {}, id: 't1' }],
+      additional_kwargs: { signatures: [SIG_A, ''] },
+    });
+    const turn2 = new AIMessage({
+      content: '',
+      tool_calls: [{ name: 'b', args: {}, id: 't2' }],
+      additional_kwargs: { signatures: [SIG_B, ''] },
+    });
+    const input = [
+      new HumanMessage('q1'),
+      turn1,
+      new ToolMessage({ content: '1', tool_call_id: 't1' }),
+      new HumanMessage('q2'),
+      turn2,
+      new ToolMessage({ content: '2', tool_call_id: 't2' }),
+    ];
+    const contents = buildContents([
+      ['user', [{ text: 'q1' }]],
+      ['model', [{ functionCall: { name: 'a', args: {} } }]],
+      ['user', [{ functionResponse: { name: 'a', response: {} } }]],
+      ['user', [{ text: 'q2' }]],
+      ['model', [{ functionCall: { name: 'b', args: {} } }]],
+      ['user', [{ functionResponse: { name: 'b', response: {} } }]],
+    ]);
+    fixThoughtSignatures(contents, input);
+    expect(contents[1].parts[0].thoughtSignature).toBe(SIG_A);
+    expect(contents[4].parts[0].thoughtSignature).toBe(SIG_B);
+  });
+  test('does not overwrite signatures already attached by the library', () => {
+    const ai = new AIMessage({
+      content: '',
+      tool_calls: [{ name: 'a', args: {}, id: 't1' }],
+      additional_kwargs: { signatures: [SIG_A] },
+    });
+    const input = [new HumanMessage('q'), ai];
+    const contents = buildContents([
+      ['user', [{ text: 'q' }]],
+      [
+        'model',
+        [{ functionCall: { name: 'a', args: {} }, thoughtSignature: SIG_B }],
+      ],
+    ]);
+    fixThoughtSignatures(contents, input);
+    expect(contents[1].parts[0].thoughtSignature).toBe(SIG_B);
+  });
+  test('no-op when AI message has no signatures', () => {
+    const ai = new AIMessage({
+      content: '',
+      tool_calls: [{ name: 'a', args: {}, id: 't1' }],
+    });
+    const input = [new HumanMessage('q'), ai];
+    const contents = buildContents([
+      ['user', [{ text: 'q' }]],
+      ['model', [{ functionCall: { name: 'a', args: {} } }]],
+    ]);
+    fixThoughtSignatures(contents, input);
+    expect(contents[1].parts[0].thoughtSignature).toBeUndefined();
+  });
+  test('skips empty-string signatures', () => {
+    const ai = new AIMessage({
+      content: '',
+      tool_calls: [{ name: 'a', args: {}, id: 't1' }],
+      additional_kwargs: { signatures: ['', '', ''] },
+    });
+    const input = [new HumanMessage('q'), ai];
+    const contents = buildContents([
+      ['user', [{ text: 'q' }]],
+      ['model', [{ functionCall: { name: 'a', args: {} } }]],
+    ]);
+    fixThoughtSignatures(contents, input);
+    expect(contents[1].parts[0].thoughtSignature).toBeUndefined();
+  });
+});

package/src/llm/vertexai/index.ts CHANGED Viewed

@@ -6,10 +6,48 @@ import type {
   GoogleAIModelRequestParams,
   GoogleAbstractedClient,
 } from '@langchain/google-common';
-import type { BaseMessage } from '@langchain/core/messages';
-import { isAIMessage } from '@langchain/core/messages';
+import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
+import type { BaseMessage, UsageMetadata } from '@langchain/core/messages';
+import { AIMessageChunk, isAIMessage } from '@langchain/core/messages';
+import type { ChatGenerationChunk } from '@langchain/core/outputs';
 import type { GoogleThinkingConfig, VertexAIClientOptions } from '@/types';
+/**
+ * `@langchain/google-common`'s `_streamResponseChunks` emits usage on TWO
+ * different paths within the same stream:
+ *
+ *   - Streaming chunks set `chunk.generationInfo.usage_metadata` via
+ *     `responseToUsageMetadata`, which correctly sums
+ *     `candidatesTokenCount + thoughtsTokenCount` and includes
+ *     `output_token_details.reasoning`.
+ *   - The trailing fallback chunk (emitted after the API stream exhausts)
+ *     attaches its own `chunk.message.usage_metadata` built inline as
+ *     `output_tokens = candidatesTokenCount` only — dropping
+ *     `thoughtsTokenCount` and `output_token_details` entirely.
+ *
+ * After `AIMessageChunk.concat`, only `message.usage_metadata` survives —
+ * which is the buggy fallback value. This breaks the documented
+ * `total_tokens === input_tokens + output_tokens` invariant and silently
+ * undercharges thinking models for reasoning tokens.
+ *
+ * The repair: track the last `generationInfo.usage_metadata` we see, and
+ * when the fallback chunk arrives with its buggy `message.usage_metadata`,
+ * replace it with the tracked good value. `CustomChatGoogleGenerativeAI`
+ * solves the same problem for the Google API path differently — by
+ * overriding `_convertToUsageMetadata`.
+ */
+export function repairStreamUsageMetadata(
+  current: UsageMetadata | undefined,
+  generationInfoUsage: UsageMetadata | undefined
+): UsageMetadata | undefined {
+  if (!current) return current;
+  if (!generationInfoUsage) return current;
+  if (generationInfoUsage.total_tokens !== current.total_tokens) return current;
+  if (generationInfoUsage.output_tokens <= current.output_tokens)
+    return current;
+  return generationInfoUsage;
+}
 type AdditionalKwargs =
   | undefined
   | (BaseMessage['additional_kwargs'] & {
@@ -27,50 +65,44 @@ type AdditionalKwargs =
  * - The signature for a functionCall part is an empty string
  *
  * This function correlates each "model" content block in the formatted request
- * back to its originating AI message, then re-attaches non-empty signatures
- * that the library failed to apply.
+ * back to its originating AI message by *position*, then re-attaches non-empty
+ * signatures that the library failed to apply. AI messages without signatures
+ * still consume their slot — filtering them out shifted later messages onto
+ * the wrong content block and dropped real signatures on the floor.
  */
-function fixThoughtSignatures(
+export function fixThoughtSignatures(
   contents: GeminiContent[],
   input: BaseMessage[]
 ): void {
-  // Collect AI messages that have signatures, in order
-  const aiMessages = input.filter(
-    (msg) =>
-      isAIMessage(msg) &&
-      Array.isArray((msg.additional_kwargs as AdditionalKwargs)?.signatures) &&
-      (msg.additional_kwargs.signatures as string[]).length > 0
-  );
-  // Collect "model" content blocks from the formatted request, in order
+  // All AI messages, in order — non-signature ones still consume positional
+  // slots so later messages line up with their model content blocks.
+  const aiMessages = input.filter(isAIMessage);
   const modelContents = contents.filter((c) => c.role === 'model');
-  // They should correspond 1:1 in order (both derived from the same input sequence)
   const count = Math.min(aiMessages.length, modelContents.length);
   for (let i = 0; i < count; i++) {
-    const msg = aiMessages[i];
-    const content = modelContents[i];
-    const signatures = (msg.additional_kwargs as AdditionalKwargs)?.signatures;
+    const signatures = (aiMessages[i].additional_kwargs as AdditionalKwargs)
+      ?.signatures;
+    if (!Array.isArray(signatures) || signatures.length === 0) continue;
-    // Collect non-empty signatures that aren't already attached to any part
+    const content = modelContents[i];
     const attachedSignatures = new Set(
       content.parts
         .map((p) => p.thoughtSignature)
         .filter((s): s is string => s != null && s !== '')
     );
-    const availableSignatures = signatures?.filter(
-      (s) => s != null && s !== '' && !attachedSignatures.has(s)
+    const availableSignatures = signatures.filter(
+      (s): s is string => s != null && s !== '' && !attachedSignatures.has(s)
     );
-    // Assign available signatures to functionCall parts missing one, in order
     let sigIdx = 0;
     for (const part of content.parts) {
       if (
         'functionCall' in part &&
         (part.thoughtSignature == null || part.thoughtSignature === '') &&
-        sigIdx < (availableSignatures?.length ?? 0)
+        sigIdx < availableSignatures.length
       ) {
-        part.thoughtSignature = availableSignatures?.[sigIdx];
+        part.thoughtSignature = availableSignatures[sigIdx];
         sigIdx++;
       }
     }
@@ -446,6 +478,35 @@ export class ChatVertexAI extends ChatGoogle {
     }
     return params;
   }
+  async *_streamResponseChunks(
+    messages: BaseMessage[],
+    options: this['ParsedCallOptions'],
+    runManager?: CallbackManagerForLLMRun
+  ): AsyncGenerator<ChatGenerationChunk> {
+    let lastGoodUsage: UsageMetadata | undefined;
+    for await (const chunk of super._streamResponseChunks(
+      messages,
+      options,
+      runManager
+    )) {
+      const genUsage = (
+        chunk.generationInfo as { usage_metadata?: UsageMetadata } | undefined
+      )?.usage_metadata;
+      if (genUsage) {
+        lastGoodUsage = genUsage;
+      }
+      if (chunk.message instanceof AIMessageChunk) {
+        const repaired = repairStreamUsageMetadata(
+          chunk.message.usage_metadata,
+          lastGoodUsage
+        );
+        if (repaired !== chunk.message.usage_metadata) {
+          chunk.message.usage_metadata = repaired;
+        }
+      }
+      yield chunk;
+    }
+  }
   buildConnection(
     fields: VertexAIClientOptions | undefined,
     client: GoogleAbstractedClient

package/src/llm/vertexai/llm.spec.ts CHANGED Viewed

@@ -76,6 +76,24 @@ describe.each(gemini3Models)(
         (reasoningTokens as Record<string, number>)?.reasoning
       ).toBeGreaterThan(0);
     });
+    test('stream: usage_metadata includes reasoning in output_tokens (issue LibreChat#13006)', async () => {
+      let finalChunk: AIMessageChunk | undefined;
+      for await (const chunk of await model.stream(
+        'What is 2+2? Think step by step.'
+      )) {
+        finalChunk = finalChunk ? finalChunk.concat(chunk) : chunk;
+      }
+      const usage = finalChunk?.usage_metadata;
+      expect(usage).toBeDefined();
+      const reasoning = (
+        usage as { output_token_details?: { reasoning?: number } }
+      )?.output_token_details?.reasoning;
+      expect(reasoning).toBeGreaterThan(0);
+      expect(usage!.total_tokens).toBe(
+        usage!.input_tokens + usage!.output_tokens
+      );
+    });
   }
 );

package/src/llm/vertexai/repairUsageMetadata.test.ts ADDED Viewed

@@ -0,0 +1,54 @@
+import { expect, test, describe } from '@jest/globals';
+import type { UsageMetadata } from '@langchain/core/messages';
+import { repairStreamUsageMetadata } from './index';
+const goodUsage: UsageMetadata = {
+  input_tokens: 80657,
+  output_tokens: 2608,
+  total_tokens: 83265,
+  output_token_details: { reasoning: 1842 },
+};
+const buggyFallbackUsage: UsageMetadata = {
+  input_tokens: 80657,
+  output_tokens: 766,
+  total_tokens: 83265,
+};
+describe('repairStreamUsageMetadata', () => {
+  test('replaces buggy fallback usage with tracked good usage from generationInfo', () => {
+    const result = repairStreamUsageMetadata(buggyFallbackUsage, goodUsage);
+    expect(result).toBe(goodUsage);
+  });
+  test('returns current unchanged when no generationInfo usage was tracked', () => {
+    const result = repairStreamUsageMetadata(buggyFallbackUsage, undefined);
+    expect(result).toBe(buggyFallbackUsage);
+  });
+  test('returns undefined unchanged', () => {
+    const result = repairStreamUsageMetadata(undefined, goodUsage);
+    expect(result).toBeUndefined();
+  });
+  test('does not replace when total_tokens differ (different request)', () => {
+    const stale: UsageMetadata = { ...goodUsage, total_tokens: 100 };
+    const result = repairStreamUsageMetadata(buggyFallbackUsage, stale);
+    expect(result).toBe(buggyFallbackUsage);
+  });
+  test('does not replace when generationInfo output_tokens is not larger (already correct)', () => {
+    const equivalent: UsageMetadata = {
+      ...buggyFallbackUsage,
+      output_tokens: buggyFallbackUsage.output_tokens,
+    };
+    const result = repairStreamUsageMetadata(buggyFallbackUsage, equivalent);
+    expect(result).toBe(buggyFallbackUsage);
+  });
+  test('does not replace when generationInfo output_tokens is smaller', () => {
+    const smaller: UsageMetadata = { ...goodUsage, output_tokens: 100 };
+    const result = repairStreamUsageMetadata(buggyFallbackUsage, smaller);
+    expect(result).toBe(buggyFallbackUsage);
+  });
+});