npm - @librechat/agents - Versions diffs - 3.2.35 → 3.2.37 - Mend

@librechat/agents 3.2.35 → 3.2.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/dist/cjs/agents/AgentContext.cjs +75 -2
package/dist/cjs/agents/AgentContext.cjs.map +1 -1
package/dist/cjs/agents/projection.cjs +25 -0
package/dist/cjs/agents/projection.cjs.map +1 -0
package/dist/cjs/graphs/Graph.cjs +10 -26
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/langfuse.cjs +16 -5
package/dist/cjs/langfuse.cjs.map +1 -1
package/dist/cjs/langfuseToolOutputTracing.cjs +7 -0
package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +118 -7
package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +44 -4
package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
package/dist/cjs/main.cjs +7 -0
package/dist/cjs/messages/budget.cjs +23 -0
package/dist/cjs/messages/budget.cjs.map +1 -0
package/dist/cjs/messages/cache.cjs +184 -0
package/dist/cjs/messages/cache.cjs.map +1 -1
package/dist/cjs/messages/index.cjs +1 -0
package/dist/cjs/summarization/node.cjs +1 -1
package/dist/cjs/summarization/node.cjs.map +1 -1
package/dist/cjs/tools/search/format.cjs +91 -2
package/dist/cjs/tools/search/format.cjs.map +1 -1
package/dist/cjs/tools/search/tool.cjs +4 -3
package/dist/cjs/tools/search/tool.cjs.map +1 -1
package/dist/cjs/tools/toolOutputReferences.cjs +28 -14
package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
package/dist/esm/agents/AgentContext.mjs +76 -3
package/dist/esm/agents/AgentContext.mjs.map +1 -1
package/dist/esm/agents/projection.mjs +25 -0
package/dist/esm/agents/projection.mjs.map +1 -0
package/dist/esm/graphs/Graph.mjs +9 -25
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/langfuse.mjs +16 -5
package/dist/esm/langfuse.mjs.map +1 -1
package/dist/esm/langfuseToolOutputTracing.mjs +7 -0
package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
package/dist/esm/llm/anthropic/utils/message_inputs.mjs +118 -7
package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
package/dist/esm/llm/bedrock/utils/message_inputs.mjs +44 -4
package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
package/dist/esm/main.mjs +4 -2
package/dist/esm/messages/budget.mjs +23 -0
package/dist/esm/messages/budget.mjs.map +1 -0
package/dist/esm/messages/cache.mjs +182 -1
package/dist/esm/messages/cache.mjs.map +1 -1
package/dist/esm/messages/index.mjs +1 -0
package/dist/esm/summarization/node.mjs +2 -2
package/dist/esm/summarization/node.mjs.map +1 -1
package/dist/esm/tools/search/format.mjs +91 -2
package/dist/esm/tools/search/format.mjs.map +1 -1
package/dist/esm/tools/search/tool.mjs +4 -3
package/dist/esm/tools/search/tool.mjs.map +1 -1
package/dist/esm/tools/toolOutputReferences.mjs +28 -14
package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
package/dist/types/agents/AgentContext.d.ts +30 -1
package/dist/types/agents/projection.d.ts +26 -0
package/dist/types/index.d.ts +1 -0
package/dist/types/messages/budget.d.ts +11 -0
package/dist/types/messages/cache.d.ts +47 -0
package/dist/types/messages/index.d.ts +1 -0
package/dist/types/tools/search/format.d.ts +4 -1
package/dist/types/tools/search/types.d.ts +7 -0
package/dist/types/types/graph.d.ts +2 -0
package/package.json +2 -1
package/src/agents/AgentContext.ts +105 -4
package/src/agents/__tests__/AgentContext.test.ts +232 -9
package/src/agents/__tests__/projection.test.ts +73 -0
package/src/agents/projection.ts +46 -0
package/src/graphs/Graph.ts +66 -65
package/src/index.ts +3 -0
package/src/langfuse.ts +38 -4
package/src/langfuseToolOutputTracing.ts +18 -0
package/src/llm/anthropic/utils/cross-provider-reasoning.test.ts +317 -0
package/src/llm/anthropic/utils/message_inputs.ts +209 -19
package/src/llm/anthropic/utils/stripPrefillCache.test.ts +111 -0
package/src/llm/bedrock/utils/cross-provider-reasoning.test.ts +131 -0
package/src/llm/bedrock/utils/message_inputs.test.ts +129 -0
package/src/llm/bedrock/utils/message_inputs.ts +81 -4
package/src/llm/bedrock/utils/toolResultCachePoint.test.ts +103 -0
package/src/messages/budget.ts +32 -0
package/src/messages/cache.tail.test.ts +340 -0
package/src/messages/cache.ts +267 -1
package/src/messages/index.ts +1 -0
package/src/messages/tailCacheConversion.test.ts +161 -0
package/src/scripts/bench-prompt-cache.ts +479 -0
package/src/specs/langfuse-config.test.ts +69 -2
package/src/specs/langfuse-metadata.test.ts +44 -0
package/src/specs/langfuse-tool-output-tracing.test.ts +6 -0
package/src/summarization/node.ts +2 -2
package/src/tools/__tests__/annotateMessagesForLLM.test.ts +50 -0
package/src/tools/search/format.test.ts +242 -0
package/src/tools/search/format.ts +122 -5
package/src/tools/search/tool.ts +5 -1
package/src/tools/search/types.ts +7 -0
package/src/tools/toolOutputReferences.ts +34 -20
package/src/types/graph.ts +2 -0

package/src/llm/anthropic/utils/cross-provider-reasoning.test.ts ADDED Viewed

@@ -0,0 +1,317 @@
+import { AIMessage, HumanMessage } from '@langchain/core/messages';
+import type { BaseMessage } from '@langchain/core/messages';
+import { _convertMessagesToAnthropicPayload } from './message_inputs';
+/**
+ * Regression for cross-provider agent handoffs (e.g. Bedrock → Anthropic): a
+ * Bedrock turn that used extended thinking leaves a `reasoning_content` content
+ * block ({ reasoningText: { text, signature } }) in the history. The official
+ * Anthropic converter has no branch for it and previously threw
+ * "Unsupported message content format", crashing the handoff. Only known
+ * foreign reasoning (Bedrock `reasoning_content`, Google `reasoning`, LibreChat
+ * `think`) is dropped; any other unknown block still throws rather than being
+ * silently omitted (real content — user media, Google code-execution — must be
+ * surfaced); and a tool call carried only on `tool_calls` survives dropping its
+ * reasoning sibling without being duplicated.
+ */
+type AnthropicPayload = ReturnType<typeof _convertMessagesToAnthropicPayload>;
+/** Minimal view of a converted Anthropic content block the assertions read. */
+interface TestBlock {
+  type?: string;
+  text?: string;
+}
+const findAssistant = (payload: AnthropicPayload) =>
+  payload.messages.find((m) => m.role === 'assistant');
+const assistantBlocks = (payload: AnthropicPayload): TestBlock[] => {
+  const content = findAssistant(payload)?.content;
+  return Array.isArray(content) ? (content as TestBlock[]) : [];
+};
+describe('_convertMessagesToAnthropicPayload — cross-provider reasoning blocks', () => {
+  const bedrockHandoffHistory = (): BaseMessage[] => [
+    new HumanMessage('research Assort Health'),
+    new AIMessage({
+      content: [
+        {
+          type: 'reasoning_content',
+          index: 0,
+          reasoningText: {
+            text: 'Let me search Notion then hand off to the data agent.',
+            signature: 'bedrock-signature-not-valid-for-anthropic',
+          },
+        },
+        { type: 'text', text: 'Kicking off the searches now.' },
+        {
+          type: 'tool_use',
+          id: 'tooluse_abc',
+          name: 'notion-search',
+          input: { query: 'Assort Health' },
+        },
+      ],
+      tool_calls: [
+        {
+          id: 'tooluse_abc',
+          name: 'notion-search',
+          args: { query: 'Assort Health' },
+          type: 'tool_call',
+        },
+      ],
+    }),
+  ];
+  it('does not throw on a Bedrock reasoning_content block', () => {
+    expect(() =>
+      _convertMessagesToAnthropicPayload(bedrockHandoffHistory())
+    ).not.toThrow();
+  });
+  it('drops reasoning_content (incl. its foreign signature) but keeps text and tool_use', () => {
+    const payload = _convertMessagesToAnthropicPayload(bedrockHandoffHistory());
+    expect(findAssistant(payload)).toBeDefined();
+    const blocks = assistantBlocks(payload);
+    expect(blocks.find((b) => b.type === 'reasoning_content')).toBeUndefined();
+    expect(
+      blocks.find(
+        (b) => b.type === 'thinking' || b.type === 'redacted_thinking'
+      )
+    ).toBeUndefined();
+    expect(JSON.stringify(blocks)).not.toContain(
+      'bedrock-signature-not-valid-for-anthropic'
+    );
+    expect(
+      blocks.some(
+        (b) => b.type === 'text' && b.text === 'Kicking off the searches now.'
+      )
+    ).toBe(true);
+    expect(blocks.find((b) => b.type === 'tool_use')).toMatchObject({
+      type: 'tool_use',
+      id: 'tooluse_abc',
+      name: 'notion-search',
+      input: { query: 'Assort Health' },
+    });
+  });
+  it('drops a Google `reasoning` block without throwing', () => {
+    const history: BaseMessage[] = [
+      new HumanMessage('hi'),
+      new AIMessage({
+        content: [
+          { type: 'reasoning', reasoning: 'internal google chain of thought' },
+          { type: 'text', text: 'Hello!' },
+        ],
+      }),
+    ];
+    expect(() => _convertMessagesToAnthropicPayload(history)).not.toThrow();
+    const blocks = assistantBlocks(_convertMessagesToAnthropicPayload(history));
+    expect(blocks.find((b) => b.type === 'reasoning')).toBeUndefined();
+    expect(blocks.some((b) => b.type === 'text' && b.text === 'Hello!')).toBe(
+      true
+    );
+  });
+  it('drops a LibreChat `think` block without throwing', () => {
+    const history: BaseMessage[] = [
+      new HumanMessage('hi'),
+      new AIMessage({
+        content: [
+          { type: 'think', think: 'librechat serialized reasoning' },
+          { type: 'text', text: 'Done.' },
+        ],
+      }),
+    ];
+    expect(() => _convertMessagesToAnthropicPayload(history)).not.toThrow();
+    const blocks = assistantBlocks(_convertMessagesToAnthropicPayload(history));
+    expect(blocks.find((b) => b.type === 'think')).toBeUndefined();
+    expect(blocks.some((b) => b.type === 'text' && b.text === 'Done.')).toBe(
+      true
+    );
+  });
+  it('drops an unsigned `thinking` block (Google thinking-enabled output) on an assistant turn', () => {
+    const history: BaseMessage[] = [
+      new HumanMessage('hi'),
+      new AIMessage({
+        content: [
+          {
+            type: 'thinking',
+            thinking: 'google chain of thought, no signature',
+          },
+          { type: 'text', text: 'Answer.' },
+        ],
+      }),
+    ];
+    expect(() => _convertMessagesToAnthropicPayload(history)).not.toThrow();
+    const blocks = assistantBlocks(_convertMessagesToAnthropicPayload(history));
+    expect(blocks.find((b) => b.type === 'thinking')).toBeUndefined();
+    expect(blocks.some((b) => b.type === 'text' && b.text === 'Answer.')).toBe(
+      true
+    );
+  });
+  it('forwards a signed `thinking` block (Anthropic-native) unchanged', () => {
+    const history: BaseMessage[] = [
+      new HumanMessage('hi'),
+      new AIMessage({
+        content: [
+          {
+            type: 'thinking',
+            thinking: 'native reasoning',
+            signature: 'valid-sig',
+          },
+          { type: 'text', text: 'Answer.' },
+        ],
+      }),
+    ];
+    const blocks = assistantBlocks(_convertMessagesToAnthropicPayload(history));
+    expect(blocks.find((b) => b.type === 'thinking')).toMatchObject({
+      type: 'thinking',
+      thinking: 'native reasoning',
+      signature: 'valid-sig',
+    });
+  });
+  it('throws (not silently drops) on an unknown assistant block such as Google code execution', () => {
+    // executableCode/codeExecutionResult carry real visible content; silently
+    // dropping them on a Google → Anthropic handoff would lose evidence.
+    const history: BaseMessage[] = [
+      new HumanMessage('run some code'),
+      new AIMessage({
+        content: [
+          {
+            type: 'executableCode',
+            executableCode: { language: 'PYTHON', code: 'print(2+2)' },
+          },
+          { type: 'text', text: 'Here is the result.' },
+        ],
+      }),
+    ];
+    expect(() => _convertMessagesToAnthropicPayload(history)).toThrow(
+      'Unsupported message content format'
+    );
+  });
+  it('throws (not silently drops) on an unsupported user block such as media', () => {
+    const history: BaseMessage[] = [
+      new HumanMessage({
+        content: [
+          {
+            type: 'video_url',
+            video_url: { url: 'https://example.com/v.mp4' },
+          },
+          { type: 'text', text: 'what is in this video?' },
+        ],
+      }),
+    ];
+    expect(() => _convertMessagesToAnthropicPayload(history)).toThrow(
+      'Unsupported message content format'
+    );
+  });
+  it('does not drop a reasoning-typed block on a user turn (only assistant reasoning is dropped)', () => {
+    const history: BaseMessage[] = [
+      new HumanMessage({
+        content: [
+          { type: 'reasoning_content', reasoningText: { text: 'user text' } },
+          { type: 'text', text: 'hello' },
+        ],
+      }),
+    ];
+    expect(() => _convertMessagesToAnthropicPayload(history)).toThrow(
+      'Unsupported message content format'
+    );
+  });
+  it('preserves a tool call carried only on tool_calls when its reasoning sibling is dropped', () => {
+    // Mirrors a Bedrock extended-thinking turn: the tool lives only on
+    // `tool_calls`; `content` holds just the reasoning block (no tool_use).
+    const history: BaseMessage[] = [
+      new HumanMessage('research Assort Health'),
+      new AIMessage({
+        content: [
+          {
+            type: 'reasoning_content',
+            reasoningText: { text: 'I should hand off now.', signature: 'sig' },
+          },
+        ],
+        tool_calls: [
+          {
+            id: 'tooluse_transfer',
+            name: 'lc_transfer_to_data_agent',
+            args: { reason: 'need consumption data' },
+            type: 'tool_call',
+          },
+        ],
+      }),
+    ];
+    expect(() => _convertMessagesToAnthropicPayload(history)).not.toThrow();
+    const blocks = assistantBlocks(_convertMessagesToAnthropicPayload(history));
+    expect(blocks.find((b) => b.type === 'reasoning_content')).toBeUndefined();
+    expect(blocks.find((b) => b.type === 'tool_use')).toMatchObject({
+      type: 'tool_use',
+      id: 'tooluse_transfer',
+      name: 'lc_transfer_to_data_agent',
+      input: { reason: 'need consumption data' },
+    });
+    // The `_` placeholder must not linger once a real tool_use block is present.
+    expect(blocks.some((b) => b.type === 'text' && b.text === '_')).toBe(false);
+  });
+  it('does not duplicate a Google functionCall tool call already materialized by _formatContent', () => {
+    // _formatContent converts the `functionCall` part into a tool_use; the
+    // materialization must recognize it as represented and not append a second.
+    const history: BaseMessage[] = [
+      new HumanMessage('weather in SF?'),
+      new AIMessage({
+        content: [
+          {
+            type: 'functionCall',
+            functionCall: { name: 'get_weather', args: { city: 'SF' } },
+          },
+        ],
+        tool_calls: [
+          {
+            id: 'call_weather_1',
+            name: 'get_weather',
+            args: { city: 'SF' },
+            type: 'tool_call',
+          },
+        ],
+      }),
+    ];
+    const blocks = assistantBlocks(_convertMessagesToAnthropicPayload(history));
+    const toolUses = blocks.filter((b) => b.type === 'tool_use');
+    expect(toolUses).toHaveLength(1);
+    expect(toolUses[0]).toMatchObject({
+      type: 'tool_use',
+      id: 'call_weather_1',
+      name: 'get_weather',
+    });
+  });
+  it('falls back to placeholder text when reasoning was the only content', () => {
+    const history: BaseMessage[] = [
+      new HumanMessage('hi'),
+      new AIMessage({
+        content: [
+          {
+            type: 'reasoning_content',
+            reasoningText: {
+              text: 'only thinking, no visible text',
+              signature: 'sig',
+            },
+          },
+        ],
+      }),
+    ];
+    expect(() => _convertMessagesToAnthropicPayload(history)).not.toThrow();
+    const blocks = assistantBlocks(_convertMessagesToAnthropicPayload(history));
+    expect(blocks.find((b) => b.type === 'reasoning_content')).toBeUndefined();
+    expect(blocks.length).toBeGreaterThan(0);
+    expect(blocks.every((b) => b.type === 'text')).toBe(true);
+  });
+});

package/src/llm/anthropic/utils/message_inputs.ts CHANGED Viewed

@@ -140,6 +140,35 @@ export function normalizeAnthropicToolCallId(
   return `${sanitized.slice(0, prefixMaxLength)}_${hash}`;
 }
+/**
+ * Lift any `cache_control` off the inner blocks of a tool result onto the
+ * `tool_result` block itself. Anthropic documents the top-level
+ * `messages.content` block as the cacheable position and does not document
+ * caching of sub-content blocks; the API currently honors a nested marker, but
+ * anchoring on the documented position keeps the single tail breakpoint robust
+ * (and mirrors the Bedrock cachePoint hoist). The first marker found wins; it is
+ * stripped from every inner block so exactly one survives, on the outer block.
+ */
+function hoistToolResultCacheControl(
+  content: string | MessageContentComplex[]
+): { content: string | MessageContentComplex[]; cacheControl: unknown } {
+  if (!Array.isArray(content)) {
+    return { content, cacheControl: undefined };
+  }
+  let cacheControl: unknown;
+  const stripped = content.map((block) => {
+    if ('cache_control' in block) {
+      cacheControl ??= (block as Record<string, unknown>).cache_control;
+      const clone = { ...(block as Record<string, unknown>) };
+      delete clone.cache_control;
+      return clone as MessageContentComplex;
+    }
+    return block;
+  });
+  // `stripped` is element-equal to `content` when no marker was present.
+  return { content: stripped, cacheControl };
+}
 function _ensureMessageContents(
   messages: BaseMessage[]
 ): (SystemMessage | HumanMessage | AIMessage)[] {
@@ -183,13 +212,20 @@ function _ensureMessageContents(
         const toolMessageContent = (
           message as { content?: BaseMessage['content'] | null }
         ).content;
+        // Hoist a tail cache_control off the inner content onto the
+        // tool_result block itself (the documented cacheable position).
+        const { content: hoistedContent, cacheControl } =
+          toolMessageContent != null
+            ? hoistToolResultCacheControl(_formatContent(message))
+            : { content: undefined, cacheControl: undefined };
         updatedMsgs.push(
           new HumanMessage({
             content: [
               {
                 type: 'tool_result',
-                ...(toolMessageContent != null
-                  ? { content: _formatContent(message) }
+                ...(hoistedContent != null ? { content: hoistedContent } : {}),
+                ...(cacheControl != null
+                  ? { cache_control: cacheControl as { type: 'ephemeral' } }
                   : {}),
                 tool_use_id: normalizeAnthropicToolCallId(
                   (message as ToolMessage).tool_call_id
@@ -429,6 +465,14 @@ function _formatContent(message: BaseMessage) {
     'web_search_result',
   ];
   const textTypes = ['text', 'text_delta'];
+  /**
+   * Reasoning blocks emitted by other providers — Bedrock's `reasoning_content`,
+   * Google's `reasoning`, and LibreChat's `think`. Their signatures are
+   * provider-specific and cannot be validated by Anthropic, so on a
+   * cross-provider handoff (e.g. Bedrock → Anthropic) we drop them rather than
+   * forwarding an unusable block. The receiving model produces its own thinking.
+   */
+  const foreignReasoningTypes = ['reasoning_content', 'reasoning', 'think'];
   const { content } = message;
   if (typeof content === 'string') {
@@ -568,6 +612,15 @@ function _formatContent(message: BaseMessage) {
         };
       } else if (contentPart.type === 'thinking') {
         const thinkingPart = contentPart as AnthropicThinkingBlockParam;
+        // Google thinking-enabled output reuses `type: 'thinking'` but carries
+        // no Anthropic signature. Anthropic rejects an unsigned thinking block,
+        // so on an assistant turn treat it as foreign reasoning and drop it
+        // rather than forward an unusable block. Signed (Anthropic-native)
+        // thinking is forwarded as before.
+        const signature = (thinkingPart as { signature?: string }).signature;
+        if (isAIMessage(message) && (signature == null || signature === '')) {
+          return null;
+        }
         const block: AnthropicThinkingBlockParam = {
           type: 'thinking' as const, // Explicitly setting the type as "thinking"
           thinking: thinkingPart.thinking,
@@ -651,7 +704,9 @@ function _formatContent(message: BaseMessage) {
           (contentPartCopy.input === '' || contentPartCopy.input == null)
         ) {
           const matchingToolCall = isAIMessage(message)
-            ? message.tool_calls?.find((toolCall) => toolCall.id === contentPartCopy.id)
+            ? message.tool_calls?.find(
+              (toolCall) => toolCall.id === contentPartCopy.id
+            )
             : undefined;
           if (matchingToolCall) {
             contentPartCopy.input = matchingToolCall.args;
@@ -666,7 +721,10 @@ function _formatContent(message: BaseMessage) {
                   typeof p.input === 'string'
                 );
               })
-              .reduce((acc, part) => acc + (part as Record<string, unknown>).input, '');
+              .reduce(
+                (acc, part) => acc + (part as Record<string, unknown>).input,
+                ''
+              );
             if (merged !== '') {
               contentPartCopy.input = merged;
             }
@@ -720,6 +778,18 @@ function _formatContent(message: BaseMessage) {
           name: correspondingToolCall.name,
           input: functionCallPart.functionCall.args,
         };
+      } else if (
+        isAIMessage(message) &&
+        foreignReasoningTypes.some((t) => t === contentPart.type)
+      ) {
+        // Foreign reasoning on an ASSISTANT turn (Bedrock `reasoning_content`,
+        // Google `reasoning`, LibreChat `think`) carries provider-specific
+        // signatures Anthropic cannot validate; drop it so a cross-provider
+        // handoff doesn't crash. The same types on a user/tool turn are real
+        // input and fall through to the throw below rather than being silently
+        // dropped — as does any other unknown block (user media, Google
+        // code-execution), which must be surfaced, not discarded.
+        return null;
       } else {
         console.error(
           'Unsupported content part:',
@@ -808,25 +878,53 @@ export function _convertMessagesToAnthropicPayload(
           };
         }
       } else {
-        const { content } = message;
-        const hasMismatchedToolCalls = !toolCalls.every(
-          (toolCall) =>
-            !!content.find(
-              (contentPart) =>
-                (contentPart.type === 'tool_use' ||
-                  contentPart.type === 'input_json_delta' ||
-                  contentPart.type === 'server_tool_use') &&
-                contentPart.id === toolCall.id
+        const formattedContent = _formatContent(message);
+        const formattedBlocks = Array.isArray(formattedContent)
+          ? formattedContent
+          : [];
+        // Tool calls already materialized as content blocks by `_formatContent`.
+        // Derived from the FORMATTED output (not the raw content by type) so
+        // that Google `functionCall` parts — which `_formatContent` converts
+        // into `tool_use` — count as represented and are not appended twice.
+        const representedToolIds = new Set(
+          formattedBlocks
+            .filter(
+              (block) =>
+                block != null &&
+                (block.type === 'tool_use' || block.type === 'server_tool_use')
             )
+            .map((block) => (block as { id?: string }).id)
         );
-        if (hasMismatchedToolCalls) {
-          console.warn(
-            'The "tool_calls" field on a message is only respected if content is a string.'
-          );
+        // Client tool calls present in `tool_calls` but absent from the
+        // formatted content — e.g. a Bedrock extended-thinking turn records the
+        // tool only on `tool_calls` and leaves `content` as just the reasoning
+        // block. Without materializing them, dropping that reasoning block
+        // silently loses the (handoff) tool call instead of forwarding it.
+        const unrepresentedToolCalls = toolCalls.filter(
+          (toolCall) =>
+            !(
+              toolCall.id?.startsWith(Constants.ANTHROPIC_SERVER_TOOL_PREFIX) ??
+              false
+            ) && !representedToolIds.has(toolCall.id)
+        );
+        if (unrepresentedToolCalls.length === 0) {
+          return { role, content: formattedContent };
         }
+        const existingBlocks = formattedBlocks.filter(
+          (block) =>
+            !(
+              block != null &&
+              block.type === 'text' &&
+              'text' in block &&
+              block.text === ANTHROPIC_EMPTY_TEXT_PLACEHOLDER
+            )
+        );
         return {
           role,
-          content: _formatContent(message),
+          content: [
+            ...existingBlocks,
+            ...unrepresentedToolCalls.map(_convertLangChainToolCallToAnthropic),
+          ],
         };
       }
     } else {
@@ -855,6 +953,86 @@ export function modelDisallowsAssistantPrefill(model?: string): boolean {
   return Number(match[1]) >= 6;
 }
+function messagesHaveCacheControl(
+  messages: AnthropicMessageCreateParams['messages']
+): boolean {
+  return messages.some(
+    (message) =>
+      Array.isArray(message.content) &&
+      message.content.some((block) => 'cache_control' in block)
+  );
+}
+/** Anthropic rejects cache_control on these reasoning blocks. */
+const NON_CACHEABLE_PAYLOAD_BLOCK_TYPES = new Set([
+  'thinking',
+  'redacted_thinking',
+]);
+/**
+ * Place one ephemeral `cache_control` on the last cacheable block of the final
+ * message of an already-converted Anthropic payload. Used to re-anchor the tail
+ * breakpoint after a trailing assistant prefill is stripped. Operates on the
+ * post-conversion payload, where blocks the converter drops (foreign reasoning,
+ * input_json_delta) are already gone — only native thinking blocks must be
+ * skipped. Returns a new array only when it actually places a marker.
+ */
+function reanchorTailCacheControl(
+  messages: AnthropicMessageCreateParams['messages']
+): AnthropicMessageCreateParams['messages'] {
+  if (messages.length === 0) {
+    return messages;
+  }
+  const lastIndex = messages.length - 1;
+  const tail = messages[lastIndex];
+  const content = tail.content;
+  if (typeof content === 'string') {
+    if (content.trim() === '') {
+      return messages;
+    }
+    const next = [...messages];
+    next[lastIndex] = {
+      ...tail,
+      content: [
+        { type: 'text', text: content, cache_control: { type: 'ephemeral' } },
+      ],
+    } as (typeof messages)[number];
+    return next;
+  }
+  if (!Array.isArray(content)) {
+    return messages;
+  }
+  let anchor = -1;
+  for (let i = 0; i < content.length; i++) {
+    const type = (content[i] as { type?: string }).type;
+    if (type == null || NON_CACHEABLE_PAYLOAD_BLOCK_TYPES.has(type)) {
+      continue;
+    }
+    if (
+      type === 'text' &&
+      ((content[i] as { text?: string }).text ?? '').trim() === ''
+    ) {
+      continue;
+    }
+    anchor = i;
+  }
+  if (anchor < 0) {
+    return messages;
+  }
+  const next = [...messages];
+  next[lastIndex] = {
+    ...tail,
+    content: content.map((block, i) =>
+      i === anchor ? { ...block, cache_control: { type: 'ephemeral' } } : block
+    ),
+  } as (typeof messages)[number];
+  return next;
+}
 export function stripUnsupportedAssistantPrefill<
   T extends Pick<AnthropicMessageCreateParams, 'messages'> & { model?: string },
 >(request: T): T {
@@ -878,9 +1056,21 @@ export function stripUnsupportedAssistantPrefill<
     nextMessages.pop();
   }
+  /**
+   * If a single tail prompt-cache breakpoint rode the stripped assistant
+   * prefill, the survivors may now carry no `cache_control` at all, dropping
+   * message caching for this request. Re-anchor the breakpoint on the new tail
+   * (only when one was actually lost, so caching-off requests stay untouched).
+   */
+  const reanchored =
+    messagesHaveCacheControl(messages) &&
+    !messagesHaveCacheControl(nextMessages)
+      ? reanchorTailCacheControl(nextMessages)
+      : nextMessages;
   return {
     ...request,
-    messages: nextMessages,
+    messages: reanchored,
   };
 }