npm - @librechat/agents - Versions diffs - 3.1.97 → 3.1.99 - Mend

@librechat/agents 3.1.97 → 3.1.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/dist/cjs/graphs/Graph.cjs +6 -0
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/langfuseToolOutputTracing.cjs +16 -5
package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
package/dist/cjs/llm/bedrock/index.cjs +10 -0
package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
package/dist/cjs/llm/bedrock/toolCache.cjs +125 -0
package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -0
package/dist/cjs/messages/cache.cjs +17 -9
package/dist/cjs/messages/cache.cjs.map +1 -1
package/dist/cjs/messages/prune.cjs +45 -8
package/dist/cjs/messages/prune.cjs.map +1 -1
package/dist/cjs/tools/ToolNode.cjs +6 -1
package/dist/cjs/tools/ToolNode.cjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +6 -0
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/langfuseToolOutputTracing.mjs +16 -5
package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
package/dist/esm/llm/bedrock/index.mjs +10 -0
package/dist/esm/llm/bedrock/index.mjs.map +1 -1
package/dist/esm/llm/bedrock/toolCache.mjs +122 -0
package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -0
package/dist/esm/messages/cache.mjs +17 -9
package/dist/esm/messages/cache.mjs.map +1 -1
package/dist/esm/messages/prune.mjs +45 -8
package/dist/esm/messages/prune.mjs.map +1 -1
package/dist/esm/tools/ToolNode.mjs +6 -1
package/dist/esm/tools/ToolNode.mjs.map +1 -1
package/dist/types/llm/bedrock/index.d.ts +16 -0
package/dist/types/llm/bedrock/toolCache.d.ts +4 -0
package/dist/types/messages/cache.d.ts +2 -2
package/dist/types/types/llm.d.ts +2 -2
package/package.json +1 -1
package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +332 -0
package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +504 -0
package/src/graphs/Graph.ts +14 -0
package/src/langfuseToolOutputTracing.ts +26 -7
package/src/llm/bedrock/index.ts +32 -1
package/src/llm/bedrock/llm.spec.ts +154 -1
package/src/llm/bedrock/toolCache.test.ts +131 -0
package/src/llm/bedrock/toolCache.ts +191 -0
package/src/messages/cache.test.ts +97 -38
package/src/messages/cache.ts +18 -10
package/src/messages/prune.ts +55 -17
package/src/specs/langfuse-tool-output-tracing.test.ts +28 -0
package/src/specs/prune.test.ts +193 -0
package/src/tools/ToolNode.ts +7 -1
package/src/tools/__tests__/ToolNode.langfuse.test.ts +6 -0
package/src/types/llm.ts +2 -2

package/src/agents/__tests__/AgentContext.bedrock.live.test.ts CHANGED Viewed

@@ -11,6 +11,18 @@ import { config as dotenvConfig } from 'dotenv';
 dotenvConfig();
 import { describe, expect, it } from '@jest/globals';
+import {
+  AIMessage,
+  BaseMessage,
+  HumanMessage,
+  SystemMessage,
+  ToolMessage,
+  type MessageContentComplex,
+} from '@langchain/core/messages';
+import {
+  BedrockRuntimeClient,
+  ConverseCommand,
+} from '@aws-sdk/client-bedrock-runtime';
 import type * as t from '@/types';
 import {
   runLiveTurn,
@@ -20,6 +32,9 @@ import {
   waitForCachePropagation,
 } from './promptCacheLiveHelpers';
 import { Providers } from '@/common';
+import { addBedrockCacheControl } from '@/messages/cache';
+import { toLangChainContent } from '@/messages/langchain';
+import { convertToConverseMessages } from '@/llm/bedrock/utils';
 const accessKeyId =
   process.env.BEDROCK_AWS_ACCESS_KEY_ID ?? process.env.AWS_ACCESS_KEY_ID;
@@ -77,6 +92,373 @@ function createClientOptions(): t.BedrockAnthropicClientOptions {
   };
 }
+type BedrockCacheUsage = {
+  inputTokens: number;
+  outputTokens: number;
+  totalTokens: number;
+  cacheCreation: number;
+  cacheRead: number;
+  latencyMs: number;
+};
+type ConverseUsageResponse = {
+  usage?: {
+    inputTokens?: number;
+    outputTokens?: number;
+    totalTokens?: number;
+    cacheReadInputTokens?: number;
+    cacheWriteInputTokens?: number;
+  };
+};
+const benchmarkToolConfig = {
+  tools: [
+    {
+      toolSpec: {
+        name: 'lookup_cache_probe',
+        description: 'Returns prompt cache benchmark data.',
+        inputSchema: {
+          json: {
+            type: 'object',
+            properties: {
+              step: { type: 'integer' },
+            },
+            required: ['step'],
+          },
+        },
+      },
+    },
+  ],
+};
+function cachePointBlock(): MessageContentComplex {
+  return { cachePoint: { type: 'default' } } as MessageContentComplex;
+}
+function stripCacheMarkers(
+  content: MessageContentComplex[]
+): MessageContentComplex[] {
+  return content
+    .filter((block) => !('cachePoint' in block && !('type' in block)))
+    .map((block) => {
+      const cloned = { ...block };
+      delete (cloned as Record<string, unknown>).cache_control;
+      return cloned as MessageContentComplex;
+    });
+}
+function cloneLiveMessage(
+  message: BaseMessage,
+  content: MessageContentComplex[]
+): BaseMessage {
+  const baseParams = {
+    content: toLangChainContent(content),
+    additional_kwargs: { ...message.additional_kwargs },
+    response_metadata: { ...message.response_metadata },
+    id: message.id,
+    name: message.name,
+  };
+  const messageType = message.getType();
+  if (messageType === 'ai') {
+    return new AIMessage({
+      ...baseParams,
+      tool_calls: (message as AIMessage).tool_calls,
+    });
+  }
+  if (messageType === 'human') {
+    return new HumanMessage(baseParams);
+  }
+  if (messageType === 'system') {
+    return new SystemMessage(baseParams);
+  }
+  if (messageType === 'tool') {
+    return new ToolMessage({
+      ...baseParams,
+      tool_call_id: (message as ToolMessage).tool_call_id,
+    });
+  }
+  return message;
+}
+function addLegacyMovingTailBedrockCacheControl(
+  messages: BaseMessage[]
+): BaseMessage[] {
+  const updatedMessages = [...messages];
+  let messagesModified = 0;
+  for (let i = updatedMessages.length - 1; i >= 0; i--) {
+    const message = updatedMessages[i];
+    const messageType = message.getType();
+    if (messageType === 'system' || messageType === 'tool') {
+      continue;
+    }
+    const content = message.content;
+    if (typeof content === 'string') {
+      if (content === '' || messagesModified >= 2) {
+        continue;
+      }
+      updatedMessages[i] = cloneLiveMessage(message, [
+        { type: 'text', text: content } as MessageContentComplex,
+        cachePointBlock(),
+      ]);
+      messagesModified++;
+      continue;
+    }
+    if (!Array.isArray(content)) {
+      continue;
+    }
+    const workingContent = stripCacheMarkers(
+      content as MessageContentComplex[]
+    );
+    const lastTextIndex = workingContent.findLastIndex((block) => {
+      const type = (block as { type?: string }).type;
+      const text = (block as { text?: string }).text;
+      return (type === 'text' || type === 'input_text') && text?.trim() !== '';
+    });
+    if (messagesModified < 2 && lastTextIndex >= 0) {
+      workingContent.splice(lastTextIndex + 1, 0, cachePointBlock());
+      messagesModified++;
+    }
+    updatedMessages[i] = cloneLiveMessage(message, workingContent);
+  }
+  return updatedMessages;
+}
+function addLatestUserOnlyBedrockCacheControl(
+  messages: BaseMessage[]
+): BaseMessage[] {
+  const updatedMessages = [...messages];
+  let addedCachePoint = false;
+  for (let i = updatedMessages.length - 1; i >= 0; i--) {
+    const message = updatedMessages[i];
+    const messageType = message.getType();
+    if (messageType === 'system') {
+      continue;
+    }
+    const content = message.content;
+    const hasArrayContent = Array.isArray(content);
+    const canAddCache =
+      !addedCachePoint &&
+      messageType === 'human' &&
+      (typeof content === 'string' || hasArrayContent);
+    if (!canAddCache && !hasArrayContent) {
+      continue;
+    }
+    let workingContent: MessageContentComplex[];
+    let modified = false;
+    if (hasArrayContent) {
+      workingContent = stripCacheMarkers(content as MessageContentComplex[]);
+      modified = workingContent.length !== content.length;
+      const lastTextIndex = workingContent.findLastIndex((block) => {
+        const type = (block as { type?: string }).type;
+        const text = (block as { text?: string }).text;
+        return (
+          (type === 'text' || type === 'input_text') && text?.trim() !== ''
+        );
+      });
+      if (canAddCache && lastTextIndex >= 0) {
+        workingContent.splice(lastTextIndex + 1, 0, cachePointBlock());
+        addedCachePoint = true;
+        modified = true;
+      }
+      if (!modified) {
+        continue;
+      }
+    } else if (typeof content === 'string' && content.trim() !== '' && canAddCache) {
+      workingContent = [
+        { type: 'text', text: content } as MessageContentComplex,
+        cachePointBlock(),
+      ];
+      addedCachePoint = true;
+    } else {
+      continue;
+    }
+    updatedMessages[i] = cloneLiveMessage(message, workingContent);
+  }
+  return updatedMessages;
+}
+function repeated(label: string, count: number): string {
+  return Array.from(
+    { length: count },
+    (_, index) =>
+      `${label} reference ${index}: stable schema, metric definition, access policy, dashboard note, and query planning guidance.`
+  ).join('\n');
+}
+function buildToolLoopMessages({
+  nonce,
+  marker,
+}: {
+  nonce: string;
+  marker: string;
+}): BaseMessage[] {
+  const stableUserContext = [
+    `Bedrock prompt cache placement benchmark ${nonce}.`,
+    'The first user turn is intentionally stable across calls in the same benchmark case.',
+    repeated(`${nonce} user-context`, 190),
+    'Use the final tool result to answer with the requested marker.',
+  ].join('\n');
+  const volatileToolPayload = repeated(`${nonce} volatile-${marker}`, 70);
+  return [
+    new HumanMessage(stableUserContext),
+    new AIMessage({
+      content: `I will inspect cache probe step 1 for ${marker}.\n${volatileToolPayload}`,
+      tool_calls: [
+        {
+          id: `call_${marker}_1`,
+          name: 'lookup_cache_probe',
+          args: { step: 1 },
+        },
+      ],
+    }),
+    new ToolMessage({
+      content: `Tool result 1 for ${marker}.\n${volatileToolPayload}`,
+      tool_call_id: `call_${marker}_1`,
+    }),
+    new AIMessage({
+      content: `I will inspect cache probe step 2 for ${marker}.\n${volatileToolPayload}`,
+      tool_calls: [
+        {
+          id: `call_${marker}_2`,
+          name: 'lookup_cache_probe',
+          args: { step: 2 },
+        },
+      ],
+    }),
+    new ToolMessage({
+      content: [
+        `Final tool result marker: ${marker}.`,
+        'Reply with the marker and no extra explanation.',
+        volatileToolPayload,
+      ].join('\n'),
+      tool_call_id: `call_${marker}_2`,
+    }),
+  ];
+}
+function buildMultiTurnToolMessages({
+  nonce,
+  marker,
+}: {
+  nonce: string;
+  marker: string;
+}): BaseMessage[] {
+  const stableFirstUser = [
+    `Bedrock multi-turn prompt cache benchmark ${nonce}.`,
+    'This first user turn is intentionally stable across calls in the same benchmark case.',
+    repeated(`${nonce} stable-user-context`, 190),
+  ].join('\n');
+  const latestUser = [
+    `Current user request marker: ${marker}.`,
+    'Use the final tool result to answer with the marker only.',
+    repeated(`${nonce} latest-user-${marker}`, 18),
+  ].join('\n');
+  const volatileToolPayload = repeated(`${nonce} volatile-tool-${marker}`, 70);
+  return [
+    new HumanMessage(stableFirstUser),
+    new AIMessage('I will keep this stable context in mind.'),
+    new HumanMessage(latestUser),
+    new AIMessage({
+      content: `I will inspect cache probe step 1 for ${marker}.\n${volatileToolPayload}`,
+      tool_calls: [
+        {
+          id: `call_${marker}_1`,
+          name: 'lookup_cache_probe',
+          args: { step: 1 },
+        },
+      ],
+    }),
+    new ToolMessage({
+      content: `Tool result 1 for ${marker}.\n${volatileToolPayload}`,
+      tool_call_id: `call_${marker}_1`,
+    }),
+    new AIMessage({
+      content: `I will inspect cache probe step 2 for ${marker}.\n${volatileToolPayload}`,
+      tool_calls: [
+        {
+          id: `call_${marker}_2`,
+          name: 'lookup_cache_probe',
+          args: { step: 2 },
+        },
+      ],
+    }),
+    new ToolMessage({
+      content: [
+        `Final tool result marker: ${marker}.`,
+        'Reply with the marker and no extra explanation.',
+        volatileToolPayload,
+      ].join('\n'),
+      tool_call_id: `call_${marker}_2`,
+    }),
+  ];
+}
+function extractCacheUsage(
+  response: ConverseUsageResponse,
+  latencyMs: number
+): BedrockCacheUsage {
+  if (response.usage == null) {
+    throw new Error('Missing Bedrock usage metadata for cache benchmark');
+  }
+  const inputTokens = response.usage.inputTokens ?? 0;
+  const outputTokens = response.usage.outputTokens ?? 0;
+  return {
+    inputTokens,
+    outputTokens,
+    totalTokens: response.usage.totalTokens ?? inputTokens + outputTokens,
+    cacheCreation: response.usage.cacheWriteInputTokens ?? 0,
+    cacheRead: response.usage.cacheReadInputTokens ?? 0,
+    latencyMs,
+  };
+}
+async function runConverseCacheBenchmarkTurn({
+  client,
+  messages,
+}: {
+  client: BedrockRuntimeClient;
+  messages: BaseMessage[];
+}): Promise<BedrockCacheUsage> {
+  const { converseMessages, converseSystem } =
+    convertToConverseMessages(messages);
+  const startedAt = Date.now();
+  const response = await client.send(
+    new ConverseCommand({
+      modelId: model,
+      ...(converseSystem.length > 0 ? { system: converseSystem } : {}),
+      messages: converseMessages,
+      toolConfig: benchmarkToolConfig,
+      inferenceConfig: { maxTokens: 16, temperature: 0 },
+    })
+  );
+  return extractCacheUsage(
+    response as ConverseUsageResponse,
+    Date.now() - startedAt
+  );
+}
 describeIfLive('AgentContext Bedrock prompt cache live API', () => {
   it('caches only the stable system prefix while dynamic tail changes', async () => {
     const nonce = `agent-bedrock-cache-live-${Date.now()}`;
@@ -146,4 +528,126 @@ describeIfLive('AgentContext Bedrock prompt cache live API', () => {
     expect(second.text.toLowerCase()).toContain('bravo');
     expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
   }, 180_000);
+  it('reduces repeated cache writes versus the previous moving-tail placement', async () => {
+    const credentials = getCredentials();
+    const client = new BedrockRuntimeClient({
+      region,
+      ...(credentials != null ? { credentials } : {}),
+    });
+    const nonce = `bedrock-cache-placement-${Date.now()}`;
+    const legacyNonce = `${nonce}-legacy`;
+    const currentNonce = `${nonce}-current`;
+    const legacyFirst = await runConverseCacheBenchmarkTurn({
+      client,
+      messages: addLegacyMovingTailBedrockCacheControl(
+        buildToolLoopMessages({ nonce: legacyNonce, marker: 'alpha' })
+      ),
+    });
+    await waitForCachePropagation();
+    const legacySecond = await runConverseCacheBenchmarkTurn({
+      client,
+      messages: addLegacyMovingTailBedrockCacheControl(
+        buildToolLoopMessages({ nonce: legacyNonce, marker: 'bravo' })
+      ),
+    });
+    const currentFirst = await runConverseCacheBenchmarkTurn({
+      client,
+      messages: addBedrockCacheControl(
+        buildToolLoopMessages({ nonce: currentNonce, marker: 'alpha' })
+      ),
+    });
+    await waitForCachePropagation();
+    const currentSecond = await runConverseCacheBenchmarkTurn({
+      client,
+      messages: addBedrockCacheControl(
+        buildToolLoopMessages({ nonce: currentNonce, marker: 'bravo' })
+      ),
+    });
+    const cacheWriteReduction =
+      legacySecond.cacheCreation - currentSecond.cacheCreation;
+    process.stdout.write(
+      `Bedrock cache placement benchmark ${JSON.stringify({
+        legacyFirst,
+        legacySecond,
+        currentFirst,
+        currentSecond,
+        cacheWriteReduction,
+      })}\n`
+    );
+    expect(currentSecond.cacheRead).toBeGreaterThan(0);
+    expect(cacheWriteReduction).toBeGreaterThan(0);
+    expect(currentSecond.cacheCreation).toBeLessThan(
+      Math.ceil(legacySecond.cacheCreation * 0.5)
+    );
+  }, 240_000);
+  it('reuses prior user cache points when the latest user turn changes', async () => {
+    const credentials = getCredentials();
+    const client = new BedrockRuntimeClient({
+      region,
+      ...(credentials != null ? { credentials } : {}),
+    });
+    const nonce = `bedrock-multiturn-cache-placement-${Date.now()}`;
+    const currentNonce = `${nonce}-current`;
+    const latestOnlyNonce = `${nonce}-latest-only`;
+    const currentFirst = await runConverseCacheBenchmarkTurn({
+      client,
+      messages: addBedrockCacheControl(
+        buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'alpha' })
+      ),
+    });
+    await waitForCachePropagation();
+    const currentSecond = await runConverseCacheBenchmarkTurn({
+      client,
+      messages: addBedrockCacheControl(
+        buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'bravo' })
+      ),
+    });
+    const latestOnlyFirst = await runConverseCacheBenchmarkTurn({
+      client,
+      messages: addLatestUserOnlyBedrockCacheControl(
+        buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'alpha' })
+      ),
+    });
+    await waitForCachePropagation();
+    const latestOnlySecond = await runConverseCacheBenchmarkTurn({
+      client,
+      messages: addLatestUserOnlyBedrockCacheControl(
+        buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'bravo' })
+      ),
+    });
+    process.stdout.write(
+      `Bedrock multi-turn cache placement benchmark ${JSON.stringify({
+        currentFirst,
+        currentSecond,
+        latestOnlyFirst,
+        latestOnlySecond,
+        cacheWriteDelta:
+          currentSecond.cacheCreation - latestOnlySecond.cacheCreation,
+      })}\n`
+    );
+    expect(currentSecond.cacheRead).toBeGreaterThan(
+      latestOnlySecond.cacheRead
+    );
+    expect(currentSecond.cacheCreation).toBeLessThan(
+      latestOnlySecond.cacheCreation
+    );
+  }, 240_000);
 });

package/src/graphs/Graph.ts CHANGED Viewed

@@ -81,6 +81,7 @@ import {
 import { HandlerRegistry } from '@/events';
 import { ChatOpenAI } from '@/llm/openai';
 import { partitionAndMarkOpenRouterToolCache } from '@/llm/openrouter/toolCache';
+import { partitionAndMarkBedrockToolCache } from '@/llm/bedrock/toolCache';
 import type { HookRegistry } from '@/hooks';
 const { AGENT, TOOLS, SUMMARIZE } = GraphNodeKeys;
@@ -962,6 +963,19 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
             rawToolsForBinding,
             makeIsDeferred(agentContext.toolDefinitions)
           ) ?? rawToolsForBinding;
+      } else if (
+        agentContext.provider === Providers.BEDROCK &&
+        (
+          agentContext.clientOptions as
+            | t.BedrockAnthropicClientOptions
+            | undefined
+        )?.promptCache === true
+      ) {
+        toolsForBinding =
+          partitionAndMarkBedrockToolCache(
+            rawToolsForBinding,
+            makeIsDeferred(agentContext.toolDefinitions)
+          ) ?? rawToolsForBinding;
       }
       let model =

package/src/langfuseToolOutputTracing.ts CHANGED Viewed

@@ -20,6 +20,7 @@ const langfuseConfigKey = createContextKey('librechat.langfuse.config');
 const toolOutputTracingStorage =
   new AsyncLocalStorage<ResolvedLangfuseToolOutputTracingConfig>();
 const langfuseConfigStorage = new AsyncLocalStorage<t.LangfuseConfig>();
+const LANGGRAPH_TOOL_NODE_PREFIX = 'tools=';
 const CHAT_ROLES = new Set([
   'assistant',
@@ -446,6 +447,26 @@ function isToolObservation(attributes: Record<string, unknown>): boolean {
   return typeof type === 'string' && type.toLowerCase() === 'tool';
 }
+function classifyLangGraphToolNodeSpan(
+  attributes: Record<string, unknown>
+): void {
+  const type = attributes[LangfuseOtelSpanAttributes.OBSERVATION_TYPE];
+  if (typeof type !== 'string' || type.toLowerCase() !== 'span') {
+    return;
+  }
+  const langGraphNode =
+    attributes[
+      `${LangfuseOtelSpanAttributes.OBSERVATION_METADATA}.langgraph_node`
+    ];
+  if (
+    typeof langGraphNode === 'string' &&
+    langGraphNode.startsWith(LANGGRAPH_TOOL_NODE_PREFIX)
+  ) {
+    attributes[LangfuseOtelSpanAttributes.OBSERVATION_TYPE] = 'tool';
+  }
+}
 function redactToolObservationOutput(
   span: ReadableSpan,
   attributes: Record<string, unknown>,
@@ -469,11 +490,13 @@ export function redactLangfuseSpanToolOutputs(
   span: ReadableSpan,
   config: ResolvedLangfuseToolOutputTracingConfig
 ): void {
+  const attributes = (span as SpanWithAttributes).attributes;
+  classifyLangGraphToolNodeSpan(attributes);
   if (!shouldApplyToolOutputRedaction(config)) {
     return;
   }
-  const attributes = (span as SpanWithAttributes).attributes;
   redactToolObservationOutput(span, attributes, config);
   for (const key of [
@@ -618,10 +641,7 @@ function hasLangfuseConfigKeys(langfuse?: t.LangfuseConfig): boolean {
   if (langfuse == null) {
     return false;
   }
-  return (
-    isPresent(langfuse.secretKey) &&
-    isPresent(langfuse.publicKey)
-  );
+  return isPresent(langfuse.secretKey) && isPresent(langfuse.publicKey);
 }
 export function shouldTraceToolNodeForLangfuse({
@@ -639,8 +659,7 @@ export function shouldTraceToolNodeForLangfuse({
   const explicit = langfuse?.toolNodeTracing?.enabled;
   if (explicit != null) {
     return (
-      explicit &&
-      (hasLangfuseConfigKeys(langfuse) || hasLangfuseEnvKeys())
+      explicit && (hasLangfuseConfigKeys(langfuse) || hasLangfuseEnvKeys())
     );
   }

package/src/llm/bedrock/index.ts CHANGED Viewed

@@ -22,12 +22,17 @@
  */
 import { ChatBedrockConverse } from '@langchain/aws';
-import { ConverseStreamCommand } from '@aws-sdk/client-bedrock-runtime';
+import {
+  ConverseStreamCommand,
+  type GuardrailConfiguration,
+  type GuardrailStreamConfiguration,
+} from '@aws-sdk/client-bedrock-runtime';
 import { AIMessageChunk } from '@langchain/core/messages';
 import { ChatGenerationChunk, ChatResult } from '@langchain/core/outputs';
 import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
 import type { ChatBedrockConverseInput } from '@langchain/aws';
 import type { BaseMessage, ResponseMetadata } from '@langchain/core/messages';
+import { insertBedrockToolCachePoint } from './toolCache';
 import {
   convertToConverseMessages,
   handleConverseStreamContentBlockStart,
@@ -42,6 +47,9 @@ import {
  */
 export type ServiceTierType = 'priority' | 'default' | 'flex' | 'reserved';
+export type CustomGuardrailConfiguration = GuardrailConfiguration &
+  Pick<GuardrailStreamConfiguration, 'streamProcessingMode'>;
 /**
  * Extended input interface with additional features:
  * - applicationInferenceProfile: Use an inference profile ARN instead of model ID
@@ -49,6 +57,17 @@ export type ServiceTierType = 'priority' | 'default' | 'flex' | 'reserved';
  */
 export interface CustomChatBedrockConverseInput
   extends ChatBedrockConverseInput {
+  /**
+   * Enables Bedrock prompt cache checkpoints for message and tool prefixes.
+   */
+  promptCache?: boolean;
+  /**
+   * Guardrail configuration for Converse and ConverseStream invocations.
+   * `streamProcessingMode` is only used by ConverseStream.
+   */
+  guardrailConfig?: CustomGuardrailConfiguration;
   /**
    * Application Inference Profile ARN to use for the model.
    * For example, "arn:aws:bedrock:eu-west-1:123456789102:application-inference-profile/fm16bt65tzgx"
@@ -80,9 +99,15 @@ export interface CustomChatBedrockConverseInput
  */
 export interface CustomChatBedrockConverseCallOptions {
   serviceTier?: ServiceTierType;
+  guardrailConfig?: CustomGuardrailConfiguration;
 }
 export class CustomChatBedrockConverse extends ChatBedrockConverse {
+  /**
+   * Whether to insert Bedrock prompt cache checkpoints when available.
+   */
+  promptCache?: boolean;
   /**
    * Application Inference Profile ARN to use instead of model ID.
    */
@@ -95,6 +120,7 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
   constructor(fields?: CustomChatBedrockConverseInput) {
     super(fields);
+    this.promptCache = fields?.promptCache;
     this.applicationInferenceProfile = fields?.applicationInferenceProfile;
     this.serviceTier = fields?.serviceTier;
   }
@@ -120,12 +146,17 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
     serviceTier?: { type: ServiceTierType };
   } {
     const baseParams = super.invocationParams(options);
+    const toolConfig =
+      this.promptCache === true
+        ? insertBedrockToolCachePoint(baseParams.toolConfig, true)
+        : baseParams.toolConfig;
     /** Service tier from options or fall back to class-level setting */
     const serviceTierType = options?.serviceTier ?? this.serviceTier;
     return {
       ...baseParams,
+      toolConfig,
       serviceTier: serviceTierType ? { type: serviceTierType } : undefined,
     };
   }