npm - @librechat/agents - Versions diffs - 3.1.73 → 3.1.75-dev.0 - Mend

@librechat/agents 3.1.73 → 3.1.75-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (159) hide show

package/README.md +66 -0
package/dist/cjs/agents/AgentContext.cjs +146 -57
package/dist/cjs/agents/AgentContext.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +13 -3
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/llm/anthropic/index.cjs +145 -52
package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +25 -15
package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +84 -70
package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
package/dist/cjs/llm/bedrock/index.cjs +1 -1
package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +213 -3
package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +2 -1
package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
package/dist/cjs/llm/google/utils/common.cjs +5 -4
package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
package/dist/cjs/llm/openai/index.cjs +468 -647
package/dist/cjs/llm/openai/index.cjs.map +1 -1
package/dist/cjs/llm/openai/utils/index.cjs +1 -448
package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
package/dist/cjs/llm/openrouter/index.cjs +57 -175
package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
package/dist/cjs/llm/vertexai/index.cjs +5 -3
package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
package/dist/cjs/main.cjs +1 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/messages/cache.cjs +39 -4
package/dist/cjs/messages/cache.cjs.map +1 -1
package/dist/cjs/messages/core.cjs +7 -6
package/dist/cjs/messages/core.cjs.map +1 -1
package/dist/cjs/messages/format.cjs +7 -6
package/dist/cjs/messages/format.cjs.map +1 -1
package/dist/cjs/messages/langchain.cjs +26 -0
package/dist/cjs/messages/langchain.cjs.map +1 -0
package/dist/cjs/messages/prune.cjs +7 -6
package/dist/cjs/messages/prune.cjs.map +1 -1
package/dist/cjs/tools/BashExecutor.cjs +21 -11
package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
package/dist/cjs/tools/CodeExecutor.cjs +37 -10
package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
package/dist/cjs/tools/ProgrammaticToolCalling.cjs +16 -11
package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
package/dist/cjs/tools/ToolNode.cjs +5 -1
package/dist/cjs/tools/ToolNode.cjs.map +1 -1
package/dist/esm/agents/AgentContext.mjs +147 -58
package/dist/esm/agents/AgentContext.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +13 -3
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/llm/anthropic/index.mjs +146 -54
package/dist/esm/llm/anthropic/index.mjs.map +1 -1
package/dist/esm/llm/anthropic/types.mjs.map +1 -1
package/dist/esm/llm/anthropic/utils/message_inputs.mjs +25 -15
package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
package/dist/esm/llm/anthropic/utils/message_outputs.mjs +84 -71
package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
package/dist/esm/llm/bedrock/index.mjs +1 -1
package/dist/esm/llm/bedrock/index.mjs.map +1 -1
package/dist/esm/llm/bedrock/utils/message_inputs.mjs +214 -4
package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
package/dist/esm/llm/bedrock/utils/message_outputs.mjs +2 -1
package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
package/dist/esm/llm/google/utils/common.mjs +5 -4
package/dist/esm/llm/google/utils/common.mjs.map +1 -1
package/dist/esm/llm/openai/index.mjs +469 -648
package/dist/esm/llm/openai/index.mjs.map +1 -1
package/dist/esm/llm/openai/utils/index.mjs +4 -449
package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
package/dist/esm/llm/openrouter/index.mjs +57 -175
package/dist/esm/llm/openrouter/index.mjs.map +1 -1
package/dist/esm/llm/vertexai/index.mjs +5 -3
package/dist/esm/llm/vertexai/index.mjs.map +1 -1
package/dist/esm/main.mjs +1 -1
package/dist/esm/messages/cache.mjs +39 -4
package/dist/esm/messages/cache.mjs.map +1 -1
package/dist/esm/messages/core.mjs +7 -6
package/dist/esm/messages/core.mjs.map +1 -1
package/dist/esm/messages/format.mjs +7 -6
package/dist/esm/messages/format.mjs.map +1 -1
package/dist/esm/messages/langchain.mjs +23 -0
package/dist/esm/messages/langchain.mjs.map +1 -0
package/dist/esm/messages/prune.mjs +7 -6
package/dist/esm/messages/prune.mjs.map +1 -1
package/dist/esm/tools/BashExecutor.mjs +22 -12
package/dist/esm/tools/BashExecutor.mjs.map +1 -1
package/dist/esm/tools/CodeExecutor.mjs +37 -11
package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
package/dist/esm/tools/ProgrammaticToolCalling.mjs +17 -12
package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
package/dist/esm/tools/ToolNode.mjs +5 -1
package/dist/esm/tools/ToolNode.mjs.map +1 -1
package/dist/types/agents/AgentContext.d.ts +29 -4
package/dist/types/agents/__tests__/promptCacheLiveHelpers.d.ts +46 -0
package/dist/types/llm/anthropic/index.d.ts +22 -9
package/dist/types/llm/anthropic/types.d.ts +5 -1
package/dist/types/llm/anthropic/utils/message_outputs.d.ts +13 -6
package/dist/types/llm/anthropic/utils/output_parsers.d.ts +1 -1
package/dist/types/llm/openai/index.d.ts +21 -24
package/dist/types/llm/openrouter/index.d.ts +11 -9
package/dist/types/llm/vertexai/index.d.ts +1 -0
package/dist/types/messages/cache.d.ts +4 -1
package/dist/types/messages/langchain.d.ts +27 -0
package/dist/types/tools/CodeExecutor.d.ts +6 -0
package/dist/types/types/graph.d.ts +26 -38
package/dist/types/types/llm.d.ts +3 -3
package/dist/types/types/run.d.ts +2 -0
package/dist/types/types/stream.d.ts +1 -1
package/dist/types/types/tools.d.ts +9 -0
package/package.json +17 -16
package/src/agents/AgentContext.ts +189 -71
package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +116 -0
package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +149 -0
package/src/agents/__tests__/AgentContext.test.ts +333 -2
package/src/agents/__tests__/promptCacheLiveHelpers.ts +165 -0
package/src/graphs/Graph.ts +24 -4
package/src/graphs/__tests__/composition.smoke.test.ts +188 -0
package/src/llm/anthropic/index.ts +252 -84
package/src/llm/anthropic/llm.spec.ts +751 -102
package/src/llm/anthropic/types.ts +9 -1
package/src/llm/anthropic/utils/message_inputs.ts +43 -20
package/src/llm/anthropic/utils/message_outputs.ts +119 -101
package/src/llm/anthropic/utils/server-tool-inputs.test.ts +77 -0
package/src/llm/bedrock/index.ts +2 -2
package/src/llm/bedrock/llm.spec.ts +341 -0
package/src/llm/bedrock/utils/message_inputs.ts +303 -4
package/src/llm/bedrock/utils/message_outputs.ts +2 -1
package/src/llm/custom-chat-models.smoke.test.ts +662 -0
package/src/llm/google/llm.spec.ts +339 -57
package/src/llm/google/utils/common.ts +53 -48
package/src/llm/openai/contentBlocks.test.ts +346 -0
package/src/llm/openai/index.ts +736 -837
package/src/llm/openai/utils/index.ts +84 -64
package/src/llm/openrouter/index.ts +124 -247
package/src/llm/openrouter/reasoning.test.ts +8 -1
package/src/llm/vertexai/index.ts +11 -5
package/src/llm/vertexai/llm.spec.ts +28 -1
package/src/messages/cache.test.ts +106 -4
package/src/messages/cache.ts +57 -5
package/src/messages/core.ts +16 -9
package/src/messages/format.ts +9 -6
package/src/messages/langchain.ts +39 -0
package/src/messages/prune.ts +12 -8
package/src/scripts/caching.ts +2 -3
package/src/specs/anthropic.simple.test.ts +61 -0
package/src/specs/summarization.test.ts +58 -61
package/src/tools/BashExecutor.ts +37 -13
package/src/tools/CodeExecutor.ts +55 -11
package/src/tools/ProgrammaticToolCalling.ts +29 -14
package/src/tools/ToolNode.ts +5 -1
package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +60 -0
package/src/types/graph.ts +35 -88
package/src/types/llm.ts +3 -3
package/src/types/run.ts +2 -0
package/src/types/stream.ts +1 -1
package/src/types/tools.ts +9 -0
package/src/utils/llmConfig.ts +1 -6

package/src/agents/AgentContext.ts CHANGED Viewed

@@ -20,6 +20,16 @@ import { addCacheControl } from '@/messages/cache';
 import { DEFAULT_RESERVE_RATIO } from '@/messages';
 import { toJsonSchema } from '@/utils/schema';
+type AgentSystemTextBlock = {
+  type: 'text';
+  text: string;
+  cache_control?: { type: 'ephemeral' };
+};
+type AgentSystemContentBlock =
+  | AgentSystemTextBlock
+  | { cachePoint: { type: 'default' } };
 /**
  * Encapsulates agent-specific state that can vary between agents in a multi-agent system
  */
@@ -249,7 +259,7 @@ export class AgentContext {
   private summaryTokenCount: number = 0;
   /**
    * Where the summary should be injected:
-   * - `'system_prompt'`: cross-run summary, included in `buildInstructionsString`
+   * - `'system_prompt'`: cross-run summary, included in the dynamic system tail
    * - `'user_message'`: mid-run compaction, injected as HumanMessage on clean slate
    * - `'none'`: no summary present
    */
@@ -417,7 +427,8 @@ export class AgentContext {
   /**
    * Gets the system runnable, creating it lazily if needed.
-   * Includes instructions, additional instructions, and programmatic-only tools documentation.
+   * Includes stable instructions, dynamic additional instructions, and
+   * programmatic-only tools documentation.
    * Only rebuilds when marked stale (via markToolsAsDiscovered).
    */
   get systemRunnable():
@@ -431,8 +442,10 @@ export class AgentContext {
       return this.cachedSystemRunnable;
     }
-    const instructionsString = this.buildInstructionsString();
-    this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
+    this.cachedSystemRunnable = this.buildSystemRunnable({
+      stableInstructions: this.buildStableInstructionsString(),
+      dynamicInstructions: this.buildDynamicInstructionsString(),
+    });
     this.systemRunnableStale = false;
     return this.cachedSystemRunnable;
   }
@@ -443,17 +456,19 @@ export class AgentContext {
    */
   initializeSystemRunnable(): void {
     if (this.systemRunnableStale || this.cachedSystemRunnable === undefined) {
-      const instructionsString = this.buildInstructionsString();
-      this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
+      this.cachedSystemRunnable = this.buildSystemRunnable({
+        stableInstructions: this.buildStableInstructionsString(),
+        dynamicInstructions: this.buildDynamicInstructionsString(),
+      });
       this.systemRunnableStale = false;
     }
   }
   /**
-   * Builds the raw instructions string (without creating SystemMessage).
+   * Builds the cacheable instructions string (without creating SystemMessage).
    * Includes agent identity preamble and handoff context when available.
    */
-  private buildInstructionsString(): string {
+  private buildStableInstructionsString(): string {
     const parts: string[] = [];
     const identityPreamble = this.buildIdentityPreamble();
@@ -465,6 +480,22 @@ export class AgentContext {
       parts.push(this.instructions);
     }
+    const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
+    if (programmaticToolsDoc) {
+      parts.push(programmaticToolsDoc);
+    }
+    return parts.join('\n\n');
+  }
+  /**
+   * Builds the dynamic system-tail string (without creating SystemMessage).
+   * Keep this out of prompt-cache-marked content so volatile context does not
+   * invalidate the stable prefix.
+   */
+  private buildDynamicInstructionsString(): string {
+    const parts: string[] = [];
     if (
       this.additionalInstructions != null &&
       this.additionalInstructions !== ''
@@ -472,14 +503,10 @@ export class AgentContext {
       parts.push(this.additionalInstructions);
     }
-    const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
-    if (programmaticToolsDoc) {
-      parts.push(programmaticToolsDoc);
-    }
-    // Cross-run summary: include in system prompt so the model has context
-    // from the prior run.  Mid-run summaries are injected as a HumanMessage
-    // on the post-compaction clean slate instead (see buildSystemRunnable).
+    // Cross-run summary: include in the system tail so the model has context
+    // from the prior run without invalidating the cacheable prefix. Mid-run
+    // summaries are injected as a HumanMessage on the post-compaction clean
+    // slate instead (see buildSystemRunnable).
     if (
       this._summaryLocation === 'system_prompt' &&
       this.summaryText != null &&
@@ -523,9 +550,13 @@ export class AgentContext {
    * Build system runnable from pre-built instructions string.
    * Only called when content has actually changed.
    */
-  private buildSystemRunnable(
-    instructionsString: string
-  ):
+  private buildSystemRunnable({
+    stableInstructions,
+    dynamicInstructions,
+  }: {
+    stableInstructions: string;
+    dynamicInstructions: string;
+  }):
     | Runnable<
         BaseMessage[],
         (BaseMessage | SystemMessage)[],
@@ -537,35 +568,17 @@ export class AgentContext {
       this.summaryText != null &&
       this.summaryText !== '';
-    if (!instructionsString && !hasMidRunSummary) {
+    if (!stableInstructions && !dynamicInstructions && !hasMidRunSummary) {
       this.systemMessageTokens = 0;
       return undefined;
     }
-    let finalInstructions: string | BaseMessageFields = instructionsString;
-    let usePromptCache = false;
-    if (this.provider === Providers.ANTHROPIC) {
-      const anthropicOptions = this.clientOptions as
-        | t.AnthropicClientOptions
-        | undefined;
-      if (anthropicOptions?.promptCache === true) {
-        usePromptCache = true;
-        finalInstructions = {
-          content: [
-            {
-              type: 'text',
-              text: instructionsString,
-              cache_control: { type: 'ephemeral' },
-            },
-          ],
-        };
-      }
-    }
-    const systemMessage = instructionsString
-      ? new SystemMessage(finalInstructions)
-      : undefined;
+    const usePromptCache = this.hasAnthropicPromptCache();
+    const systemMessage = this.buildSystemMessage({
+      stableInstructions,
+      dynamicInstructions,
+      usePromptCache,
+    });
     if (this.tokenCounter) {
       this.systemMessageTokens = systemMessage
@@ -615,6 +628,72 @@ export class AgentContext {
     }).withConfig({ runName: 'prompt' });
   }
+  private hasAnthropicPromptCache(): boolean {
+    if (this.provider !== Providers.ANTHROPIC) {
+      return false;
+    }
+    const anthropicOptions = this.clientOptions as
+      | t.AnthropicClientOptions
+      | undefined;
+    return anthropicOptions?.promptCache === true;
+  }
+  private hasBedrockPromptCache(): boolean {
+    if (this.provider !== Providers.BEDROCK) {
+      return false;
+    }
+    const bedrockOptions = this.clientOptions as
+      | t.BedrockAnthropicClientOptions
+      | undefined;
+    return bedrockOptions?.promptCache === true;
+  }
+  private buildSystemMessage({
+    stableInstructions,
+    dynamicInstructions,
+    usePromptCache,
+  }: {
+    stableInstructions: string;
+    dynamicInstructions: string;
+    usePromptCache: boolean;
+  }): SystemMessage | undefined {
+    if (!stableInstructions && !dynamicInstructions) {
+      return undefined;
+    }
+    if (usePromptCache) {
+      const content: AgentSystemContentBlock[] = [];
+      if (stableInstructions) {
+        content.push({
+          type: 'text',
+          text: stableInstructions,
+          cache_control: { type: 'ephemeral' },
+        });
+      }
+      if (dynamicInstructions) {
+        content.push({ type: 'text', text: dynamicInstructions });
+      }
+      return new SystemMessage({ content } as BaseMessageFields);
+    }
+    if (this.hasBedrockPromptCache() && stableInstructions) {
+      const content: AgentSystemContentBlock[] = [
+        { type: 'text', text: stableInstructions },
+        { cachePoint: { type: 'default' } },
+      ];
+      if (dynamicInstructions) {
+        content.push({ type: 'text', text: dynamicInstructions });
+      }
+      return new SystemMessage({ content } as BaseMessageFields);
+    }
+    return new SystemMessage(
+      [stableInstructions, dynamicInstructions]
+        .filter((part) => part !== '')
+        .join('\n\n')
+    );
+  }
   /**
    * Reset context for a new run
    */
@@ -681,10 +760,47 @@ export class AgentContext {
     if (!this.toolDefinitions) {
       return [];
     }
-    return this.toolDefinitions.filter(
-      (def) =>
+    /**
+     * Mirror `getEventDrivenToolsForBinding`'s gate: a definition is only
+     * bound to the model when its `allowed_callers` include `'direct'` and
+     * (if deferred) it has been discovered. Filtering by `defer_loading`
+     * alone left programmatic-only definitions counted in
+     * `toolSchemaTokens` even though they were never bound.
+     */
+    return this.toolDefinitions.filter((def) => {
+      const allowedCallers = def.allowed_callers ?? ['direct'];
+      if (!allowedCallers.includes('direct')) {
+        return false;
+      }
+      return (
         def.defer_loading !== true || this.discoveredToolNames.has(def.name)
-    );
+      );
+    });
+  }
+  /**
+   * Single source of truth for "which entries of `this.tools` should be
+   * treated as actually bound". Callers:
+   *   - `getToolsForBinding` (non-event-driven branch)
+   *   - `getEventDrivenToolsForBinding` (appends instance tools alongside
+   *     schema-only definitions)
+   *   - `calculateInstructionTokens` (counts schema bytes for accounting)
+   *
+   * In event-driven mode (`toolDefinitions` present) instance tools are
+   * appended unfiltered; outside event-driven mode they pass through
+   * `filterToolsForBinding`. Centralizing the decision here prevents the
+   * accounting/binding paths from drifting apart, which was the root
+   * cause of the original miscount.
+   */
+  private getEffectiveInstanceTools(): t.GraphTools | undefined {
+    if (!this.tools) {
+      return undefined;
+    }
+    const isEventDriven = (this.toolDefinitions?.length ?? 0) > 0;
+    if (isEventDriven || !this.toolRegistry) {
+      return this.tools;
+    }
+    return this.filterToolsForBinding(this.tools);
   }
   /**
@@ -703,9 +819,17 @@ export class AgentContext {
      * populated after `fromConfig()` kicks off the initial calculation, so
      * callers that mutate `graphTools` must re-trigger this method to
      * refresh `toolSchemaTokens`.
+     *
+     * Use `getEffectiveInstanceTools()` so accounting reflects exactly the
+     * subset that `getToolsForBinding` would emit — preventing the
+     * worst-case-ceiling miscount that triggered spurious `empty_messages`
+     * preflight rejections at low `maxContextTokens`. Deferred and
+     * non-`'direct'` `toolDefinitions` are excluded by
+     * `getActiveToolDefinitions()` below.
      */
     const instanceTools: t.GraphTools = [
-      ...((this.tools as t.GenericTool[] | undefined) ?? []),
+      ...((this.getEffectiveInstanceTools() as t.GenericTool[] | undefined) ??
+        []),
       ...((this.graphTools as t.GenericTool[] | undefined) ?? []),
     ];
@@ -900,8 +1024,16 @@ export class AgentContext {
    */
   getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
     const maxContextTokens = this.maxContextTokens ?? 0;
-    const toolCount =
-      (this.tools?.length ?? 0) + this.getActiveToolDefinitions().length;
+    /**
+     * Derive `toolCount` from `getToolsForBinding()` so the diagnostic stays
+     * aligned with what is actually bound to the model — and with what
+     * `calculateInstructionTokens` counts into `toolSchemaTokens`. Using raw
+     * `this.tools.length` would inflate the count whenever the registry
+     * marks instance tools as deferred-undiscovered or non-`'direct'`,
+     * producing the same misleading "N tools" diagnostic this fix is meant
+     * to eliminate.
+     */
+    const toolCount = this.getToolsForBinding()?.length ?? 0;
     const messageCount = messages?.length ?? 0;
     let messageTokens = 0;
@@ -1014,10 +1146,7 @@ export class AgentContext {
       return this.getEventDrivenToolsForBinding();
     }
-    const filtered =
-      !this.tools || !this.toolRegistry
-        ? this.tools
-        : this.filterToolsForBinding(this.tools);
+    const filtered = this.getEffectiveInstanceTools();
     if (this.graphTools && this.graphTools.length > 0) {
       return [...(filtered ?? []), ...this.graphTools];
@@ -1032,21 +1161,9 @@ export class AgentContext {
       return this.graphTools ?? [];
     }
-    const defsToInclude = this.toolDefinitions.filter((def) => {
-      const allowedCallers = def.allowed_callers ?? ['direct'];
-      if (!allowedCallers.includes('direct')) {
-        return false;
-      }
-      if (
-        def.defer_loading === true &&
-        !this.discoveredToolNames.has(def.name)
-      ) {
-        return false;
-      }
-      return true;
-    });
-    const schemaTools = createSchemaOnlyTools(defsToInclude) as t.GraphTools;
+    const schemaTools = createSchemaOnlyTools(
+      this.getActiveToolDefinitions()
+    ) as t.GraphTools;
     const allTools = [...schemaTools];
@@ -1054,8 +1171,9 @@ export class AgentContext {
       allTools.push(...this.graphTools);
     }
-    if (this.tools && this.tools.length > 0) {
-      allTools.push(...this.tools);
+    const instanceTools = this.getEffectiveInstanceTools();
+    if (instanceTools && instanceTools.length > 0) {
+      allTools.push(...instanceTools);
     }
     return allTools;

package/src/agents/__tests__/AgentContext.anthropic.live.test.ts ADDED Viewed

@@ -0,0 +1,116 @@
+// src/agents/__tests__/AgentContext.anthropic.live.test.ts
+/**
+ * Live Anthropic prompt-cache verification.
+ *
+ * Run with:
+ * RUN_ANTHROPIC_PROMPT_CACHE_LIVE_TESTS=1 ANTHROPIC_API_KEY=... npm test -- AgentContext.anthropic.live.test.ts --runInBand
+ */
+import { config as dotenvConfig } from 'dotenv';
+dotenvConfig();
+import { describe, expect, it } from '@jest/globals';
+import type * as t from '@/types';
+import {
+  runLiveTurn,
+  assertSystemPayloadShape,
+  buildDynamicInstructions,
+  buildStableInstructions,
+  waitForCachePropagation,
+} from './promptCacheLiveHelpers';
+import { Providers } from '@/common';
+const shouldRunLive =
+  process.env.RUN_ANTHROPIC_PROMPT_CACHE_LIVE_TESTS === '1' &&
+  process.env.ANTHROPIC_API_KEY != null &&
+  process.env.ANTHROPIC_API_KEY !== '';
+const describeIfLive = shouldRunLive ? describe : describe.skip;
+const modelName =
+  process.env.ANTHROPIC_PROMPT_CACHE_MODEL ?? 'claude-sonnet-4-5';
+const providerLabel = 'Anthropic';
+function createClientOptions(): t.AnthropicClientOptions {
+  return {
+    modelName,
+    temperature: 0,
+    maxTokens: 8,
+    streaming: true,
+    streamUsage: true,
+    promptCache: true,
+    clientOptions: {
+      defaultHeaders: {
+        'anthropic-beta': 'prompt-caching-2024-07-31',
+      },
+    },
+  };
+}
+describeIfLive('AgentContext Anthropic prompt cache live API', () => {
+  it('caches only the stable system prefix while dynamic tail changes', async () => {
+    const nonce = `agent-cache-live-${Date.now()}`;
+    const clientOptions = createClientOptions();
+    const stableInstructions = buildStableInstructions({
+      nonce,
+      providerLabel,
+    });
+    const firstDynamicInstructions = buildDynamicInstructions({
+      marker: 'alpha',
+      tailDescription:
+        'The Dynamic Marker line is runtime context and must remain outside the cached prefix.',
+    });
+    const secondDynamicInstructions = buildDynamicInstructions({
+      marker: 'bravo',
+      tailDescription:
+        'The Dynamic Marker line is runtime context and must remain outside the cached prefix.',
+    });
+    await assertSystemPayloadShape({
+      agentId: 'live-cache-shape-check',
+      provider: Providers.ANTHROPIC,
+      clientOptions,
+      stableInstructions,
+      dynamicInstructions: firstDynamicInstructions,
+      expectedContent: [
+        {
+          type: 'text',
+          text: stableInstructions,
+          cache_control: { type: 'ephemeral' },
+        },
+        {
+          type: 'text',
+          text: firstDynamicInstructions,
+        },
+      ],
+    });
+    const first = await runLiveTurn({
+      provider: Providers.ANTHROPIC,
+      providerLabel,
+      clientOptions,
+      runId: `${nonce}-first`,
+      threadId: `${nonce}-thread`,
+      stableInstructions,
+      dynamicInstructions: firstDynamicInstructions,
+    });
+    expect(first.text.toLowerCase()).toContain('alpha');
+    expect(first.usage.input_token_details?.cache_creation).toBeGreaterThan(0);
+    expect(first.usage.input_token_details?.cache_read ?? 0).toBe(0);
+    await waitForCachePropagation();
+    const second = await runLiveTurn({
+      provider: Providers.ANTHROPIC,
+      providerLabel,
+      clientOptions,
+      runId: `${nonce}-second`,
+      threadId: `${nonce}-thread`,
+      stableInstructions,
+      dynamicInstructions: secondDynamicInstructions,
+    });
+    expect(second.text.toLowerCase()).toContain('bravo');
+    expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
+  }, 120_000);
+});

package/src/agents/__tests__/AgentContext.bedrock.live.test.ts ADDED Viewed

@@ -0,0 +1,149 @@
+// src/agents/__tests__/AgentContext.bedrock.live.test.ts
+/**
+ * Live Bedrock prompt-cache verification.
+ *
+ * Run with:
+ * RUN_BEDROCK_PROMPT_CACHE_LIVE_TESTS=1 BEDROCK_AWS_REGION=... BEDROCK_AWS_ACCESS_KEY_ID=... BEDROCK_AWS_SECRET_ACCESS_KEY=... npm test -- AgentContext.bedrock.live.test.ts --runInBand
+ *
+ * Standard AWS credential env vars or AWS_PROFILE can also be used.
+ */
+import { config as dotenvConfig } from 'dotenv';
+dotenvConfig();
+import { describe, expect, it } from '@jest/globals';
+import type * as t from '@/types';
+import {
+  runLiveTurn,
+  assertSystemPayloadShape,
+  buildDynamicInstructions,
+  buildStableInstructions,
+  waitForCachePropagation,
+} from './promptCacheLiveHelpers';
+import { Providers } from '@/common';
+const accessKeyId =
+  process.env.BEDROCK_AWS_ACCESS_KEY_ID ?? process.env.AWS_ACCESS_KEY_ID;
+const secretAccessKey =
+  process.env.BEDROCK_AWS_SECRET_ACCESS_KEY ??
+  process.env.AWS_SECRET_ACCESS_KEY;
+const sessionToken =
+  process.env.BEDROCK_AWS_SESSION_TOKEN ?? process.env.AWS_SESSION_TOKEN;
+const hasCredentialPair =
+  accessKeyId != null &&
+  accessKeyId !== '' &&
+  secretAccessKey != null &&
+  secretAccessKey !== '';
+const hasAmbientCredentials =
+  process.env.AWS_PROFILE != null ||
+  process.env.AWS_WEB_IDENTITY_TOKEN_FILE != null;
+const shouldRunLive =
+  process.env.RUN_BEDROCK_PROMPT_CACHE_LIVE_TESTS === '1' &&
+  (hasCredentialPair || hasAmbientCredentials);
+const describeIfLive = shouldRunLive ? describe : describe.skip;
+const model =
+  process.env.BEDROCK_PROMPT_CACHE_MODEL ??
+  'us.anthropic.claude-sonnet-4-5-20250929-v1:0';
+const region =
+  process.env.BEDROCK_AWS_REGION ?? process.env.AWS_REGION ?? 'us-east-1';
+const providerLabel = 'Bedrock';
+function getCredentials():
+  | t.BedrockAnthropicClientOptions['credentials']
+  | undefined {
+  if (!hasCredentialPair) {
+    return undefined;
+  }
+  return {
+    accessKeyId,
+    secretAccessKey,
+    ...(sessionToken != null && sessionToken !== '' ? { sessionToken } : {}),
+  };
+}
+function createClientOptions(): t.BedrockAnthropicClientOptions {
+  const credentials = getCredentials();
+  return {
+    model,
+    region,
+    maxTokens: 8,
+    streaming: true,
+    streamUsage: true,
+    promptCache: true,
+    ...(credentials != null ? { credentials } : {}),
+  };
+}
+describeIfLive('AgentContext Bedrock prompt cache live API', () => {
+  it('caches only the stable system prefix while dynamic tail changes', async () => {
+    const nonce = `agent-bedrock-cache-live-${Date.now()}`;
+    const clientOptions = createClientOptions();
+    const stableInstructions = buildStableInstructions({
+      nonce,
+      providerLabel,
+    });
+    const firstDynamicInstructions = buildDynamicInstructions({
+      marker: 'alpha',
+      tailDescription:
+        'The Dynamic Marker line is runtime context and must remain after the Bedrock cache point.',
+    });
+    const secondDynamicInstructions = buildDynamicInstructions({
+      marker: 'bravo',
+      tailDescription:
+        'The Dynamic Marker line is runtime context and must remain after the Bedrock cache point.',
+    });
+    await assertSystemPayloadShape({
+      agentId: 'live-bedrock-cache-shape-check',
+      provider: Providers.BEDROCK,
+      clientOptions,
+      stableInstructions,
+      dynamicInstructions: firstDynamicInstructions,
+      expectedContent: [
+        {
+          type: 'text',
+          text: stableInstructions,
+        },
+        {
+          cachePoint: { type: 'default' },
+        },
+        {
+          type: 'text',
+          text: firstDynamicInstructions,
+        },
+      ],
+    });
+    const first = await runLiveTurn({
+      provider: Providers.BEDROCK,
+      providerLabel,
+      clientOptions,
+      runId: `${nonce}-first`,
+      threadId: `${nonce}-thread`,
+      stableInstructions,
+      dynamicInstructions: firstDynamicInstructions,
+    });
+    expect(first.text.toLowerCase()).toContain('alpha');
+    expect(first.usage.input_token_details?.cache_creation).toBeGreaterThan(0);
+    expect(first.usage.input_token_details?.cache_read ?? 0).toBe(0);
+    await waitForCachePropagation();
+    const second = await runLiveTurn({
+      provider: Providers.BEDROCK,
+      providerLabel,
+      clientOptions,
+      runId: `${nonce}-second`,
+      threadId: `${nonce}-thread`,
+      stableInstructions,
+      dynamicInstructions: secondDynamicInstructions,
+    });
+    expect(second.text.toLowerCase()).toContain('bravo');
+    expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
+  }, 180_000);
+});