npm - @librechat/agents - Versions diffs - 3.2.33 → 3.2.35 - Mend

@librechat/agents 3.2.33 → 3.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (133) hide show

package/dist/cjs/agents/AgentContext.cjs +47 -10
package/dist/cjs/agents/AgentContext.cjs.map +1 -1
package/dist/cjs/common/enum.cjs +13 -0
package/dist/cjs/common/enum.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +121 -3
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/llm/bedrock/index.cjs +21 -2
package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +38 -2
package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
package/dist/cjs/llm/google/utils/common.cjs +6 -0
package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
package/dist/cjs/llm/invoke.cjs +49 -8
package/dist/cjs/llm/invoke.cjs.map +1 -1
package/dist/cjs/llm/openai/index.cjs +48 -1
package/dist/cjs/llm/openai/index.cjs.map +1 -1
package/dist/cjs/llm/vertexai/index.cjs +19 -0
package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
package/dist/cjs/main.cjs +2 -0
package/dist/cjs/messages/content.cjs +12 -14
package/dist/cjs/messages/content.cjs.map +1 -1
package/dist/cjs/messages/prune.cjs +31 -13
package/dist/cjs/messages/prune.cjs.map +1 -1
package/dist/cjs/run.cjs +7 -2
package/dist/cjs/run.cjs.map +1 -1
package/dist/cjs/stream.cjs +20 -2
package/dist/cjs/stream.cjs.map +1 -1
package/dist/cjs/summarization/node.cjs +12 -1
package/dist/cjs/summarization/node.cjs.map +1 -1
package/dist/cjs/tools/ToolNode.cjs +41 -4
package/dist/cjs/tools/ToolNode.cjs.map +1 -1
package/dist/cjs/tools/streamedToolCallSeals.cjs +30 -1
package/dist/cjs/tools/streamedToolCallSeals.cjs.map +1 -1
package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
package/dist/cjs/utils/tokens.cjs +30 -0
package/dist/cjs/utils/tokens.cjs.map +1 -1
package/dist/esm/agents/AgentContext.mjs +47 -10
package/dist/esm/agents/AgentContext.mjs.map +1 -1
package/dist/esm/common/enum.mjs +13 -0
package/dist/esm/common/enum.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +122 -4
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/llm/bedrock/index.mjs +22 -3
package/dist/esm/llm/bedrock/index.mjs.map +1 -1
package/dist/esm/llm/bedrock/utils/message_outputs.mjs +38 -3
package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
package/dist/esm/llm/google/utils/common.mjs +6 -0
package/dist/esm/llm/google/utils/common.mjs.map +1 -1
package/dist/esm/llm/invoke.mjs +49 -8
package/dist/esm/llm/invoke.mjs.map +1 -1
package/dist/esm/llm/openai/index.mjs +48 -1
package/dist/esm/llm/openai/index.mjs.map +1 -1
package/dist/esm/llm/vertexai/index.mjs +19 -0
package/dist/esm/llm/vertexai/index.mjs.map +1 -1
package/dist/esm/main.mjs +3 -3
package/dist/esm/messages/content.mjs +12 -15
package/dist/esm/messages/content.mjs.map +1 -1
package/dist/esm/messages/prune.mjs +31 -13
package/dist/esm/messages/prune.mjs.map +1 -1
package/dist/esm/run.mjs +7 -2
package/dist/esm/run.mjs.map +1 -1
package/dist/esm/stream.mjs +21 -3
package/dist/esm/stream.mjs.map +1 -1
package/dist/esm/summarization/node.mjs +12 -1
package/dist/esm/summarization/node.mjs.map +1 -1
package/dist/esm/tools/ToolNode.mjs +41 -4
package/dist/esm/tools/ToolNode.mjs.map +1 -1
package/dist/esm/tools/streamedToolCallSeals.mjs +25 -2
package/dist/esm/tools/streamedToolCallSeals.mjs.map +1 -1
package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
package/dist/esm/utils/tokens.mjs +30 -1
package/dist/esm/utils/tokens.mjs.map +1 -1
package/dist/types/agents/AgentContext.d.ts +7 -3
package/dist/types/common/enum.d.ts +13 -0
package/dist/types/graphs/Graph.d.ts +8 -1
package/dist/types/llm/bedrock/utils/index.d.ts +1 -1
package/dist/types/llm/bedrock/utils/message_outputs.d.ts +9 -0
package/dist/types/llm/invoke.d.ts +1 -1
package/dist/types/llm/vertexai/index.d.ts +10 -0
package/dist/types/messages/content.d.ts +5 -0
package/dist/types/messages/prune.d.ts +4 -0
package/dist/types/run.d.ts +1 -0
package/dist/types/tools/ToolNode.d.ts +8 -0
package/dist/types/tools/streamedToolCallSeals.d.ts +5 -1
package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
package/dist/types/types/graph.d.ts +89 -3
package/dist/types/types/run.d.ts +13 -0
package/dist/types/types/tools.d.ts +10 -0
package/dist/types/utils/tokens.d.ts +7 -0
package/package.json +1 -1
package/src/__tests__/stream.eagerEventExecution.test.ts +703 -0
package/src/agents/AgentContext.ts +69 -6
package/src/agents/__tests__/AgentContext.test.ts +6 -2
package/src/common/enum.ts +13 -0
package/src/graphs/Graph.ts +196 -0
package/src/llm/bedrock/index.ts +40 -0
package/src/llm/bedrock/streamSealDispatch.test.ts +158 -0
package/src/llm/bedrock/utils/index.ts +1 -0
package/src/llm/bedrock/utils/message_outputs.test.ts +85 -0
package/src/llm/bedrock/utils/message_outputs.ts +43 -0
package/src/llm/google/utils/common.test.ts +64 -0
package/src/llm/google/utils/common.ts +18 -0
package/src/llm/invoke.test.ts +79 -1
package/src/llm/invoke.ts +58 -4
package/src/llm/openai/index.ts +95 -1
package/src/llm/openai/sequentialToolCallSeals.test.ts +199 -0
package/src/llm/vertexai/index.ts +31 -0
package/src/llm/vertexai/sealStreamedToolCalls.test.ts +88 -0
package/src/llm/vertexai/streamSealDispatch.test.ts +148 -0
package/src/messages/content.ts +24 -32
package/src/messages/prune.ts +39 -2
package/src/run.ts +5 -0
package/src/scripts/subagent-usage-sink.ts +176 -0
package/src/specs/context-accuracy.live.test.ts +409 -0
package/src/specs/context-usage-event.test.ts +117 -0
package/src/specs/context-usage.live.test.ts +297 -0
package/src/specs/prune.test.ts +51 -1
package/src/specs/subagent.test.ts +124 -1
package/src/stream.ts +40 -6
package/src/summarization/__tests__/node.test.ts +60 -1
package/src/summarization/node.ts +20 -1
package/src/tools/ToolNode.ts +85 -3
package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
package/src/tools/__tests__/ToolNode.onResultCompletion.test.ts +368 -0
package/src/tools/streamedToolCallSeals.ts +37 -9
package/src/tools/subagent/SubagentExecutor.ts +221 -3
package/src/types/graph.ts +94 -1
package/src/types/run.ts +13 -0
package/src/types/tools.ts +10 -0
package/src/utils/__tests__/apportion.test.ts +32 -0
package/src/utils/tokens.ts +33 -0

package/src/agents/AgentContext.ts CHANGED Viewed

@@ -21,6 +21,7 @@ import {
   addCacheControlToStablePrefixMessages,
 } from '@/messages/cache';
 import { createSchemaOnlyTools } from '@/tools/schema';
+import { apportionTokenCounts } from '@/utils/tokens';
 import { DEFAULT_RESERVE_RATIO } from '@/messages';
 import { toJsonSchema } from '@/utils/schema';
@@ -191,6 +192,11 @@ export class AgentContext {
   dynamicInstructionTokens: number = 0;
   /** Token count for tool schemas only. */
   toolSchemaTokens: number = 0;
+  /** Per-tool schema token counts (post-multiplier), keyed by tool name.
+   *  `undefined` when not calculated (e.g. cached aggregate schema tokens). */
+  toolTokenCounts?: Record<string, number>;
+  /** Names of counted tools that are deferred (`defer_loading`) and discovered. */
+  deferredToolNames: string[] = [];
   /** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
   calibrationRatio: number = 1;
   /** Provider-observed instruction overhead from the pruner's best-variance turn. */
@@ -894,6 +900,8 @@ export class AgentContext {
     this.systemMessageTokens = 0;
     this.dynamicInstructionTokens = 0;
     this.toolSchemaTokens = 0;
+    this.toolTokenCounts = undefined;
+    this.deferredToolNames = [];
     this.cachedSystemRunnable = undefined;
     this.systemRunnableStale = true;
     this.lastToken = undefined;
@@ -1006,6 +1014,10 @@ export class AgentContext {
   ): Promise<void> {
     let toolTokens = 0;
     const countedToolNames = new Set<string>();
+    /** Prototype-free: external tool names like `toString` must not hit
+     *  inherited properties during accumulation */
+    const rawToolTokenCounts: Record<string, number> = Object.create(null);
+    const deferredCountedNames = new Set<string>();
     /**
      * Iterate both `tools` (user-provided instance tools) and `graphTools`
@@ -1040,11 +1052,14 @@ export class AgentContext {
             toolName,
             (genericTool.description as string | undefined) ?? ''
           );
-          toolTokens += tokenCounter(
+          const schemaTokens = tokenCounter(
             new SystemMessage(JSON.stringify(jsonSchema))
           );
+          toolTokens += schemaTokens;
           if (toolName) {
             countedToolNames.add(toolName);
+            rawToolTokenCounts[toolName] =
+              (rawToolTokenCounts[toolName] ?? 0) + schemaTokens;
           }
         }
       }
@@ -1062,7 +1077,16 @@ export class AgentContext {
           parameters: def.parameters ?? {},
         },
       };
-      toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
+      const schemaTokens = tokenCounter(
+        new SystemMessage(JSON.stringify(schema))
+      );
+      toolTokens += schemaTokens;
+      countedToolNames.add(def.name);
+      rawToolTokenCounts[def.name] =
+        (rawToolTokenCounts[def.name] ?? 0) + schemaTokens;
+      if (def.defer_loading === true) {
+        deferredCountedNames.add(def.name);
+      }
     }
     const isAnthropic =
@@ -1077,6 +1101,25 @@ export class AgentContext {
       ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
       : DEFAULT_TOOL_TOKEN_MULTIPLIER;
     this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);
+    /** Largest-remainder apportionment keeps the per-tool counts summing
+     *  exactly to the aggregate despite per-entry rounding */
+    const toolTokenCounts = apportionTokenCounts(
+      rawToolTokenCounts,
+      toolTokenMultiplier,
+      this.toolSchemaTokens
+    );
+    const deferredToolNames: string[] = [];
+    for (const name of Object.keys(rawToolTokenCounts)) {
+      if (
+        deferredCountedNames.has(name) ||
+        this.toolRegistry?.get(name)?.defer_loading === true
+      ) {
+        deferredToolNames.push(name);
+      }
+    }
+    this.toolTokenCounts = toolTokenCounts;
+    this.deferredToolNames = deferredToolNames;
   }
   /**
@@ -1212,9 +1255,8 @@ export class AgentContext {
    * Returns a structured breakdown of how the context token budget is consumed.
    * Useful for diagnostics when context overflow or pruning issues occur.
    *
-   * Note: `toolCount` reflects discoveries immediately, but `toolSchemaTokens`
-   * is a snapshot taken during `calculateInstructionTokens` and is not
-   * recomputed when `markToolsAsDiscovered` is called mid-run.
+   * Note: `markToolsAsDiscovered` re-triggers `calculateInstructionTokens`,
+   * so `toolSchemaTokens`/`toolTokenCounts` refresh before the next call.
    */
   getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
     const maxContextTokens = this.maxContextTokens ?? 0;
@@ -1238,7 +1280,14 @@ export class AgentContext {
       }
     }
-    const reserveTokens = Math.round(maxContextTokens * DEFAULT_RESERVE_RATIO);
+    /** Mirror the pruner's reserve math so availableForMessages agrees
+     *  with the contextBudget computed during pruning */
+    const reserveRatio =
+      this.summarizationConfig?.reserveRatio ?? DEFAULT_RESERVE_RATIO;
+    const reserveTokens =
+      reserveRatio > 0 && reserveRatio < 1
+        ? Math.round(maxContextTokens * reserveRatio)
+        : 0;
     const availableForMessages = Math.max(
       0,
       maxContextTokens - reserveTokens - this.instructionTokens
@@ -1255,6 +1304,12 @@ export class AgentContext {
       messageCount,
       messageTokens,
       availableForMessages,
+      toolTokenCounts:
+        this.toolTokenCounts != null ? { ...this.toolTokenCounts } : undefined,
+      deferredToolNames:
+        this.deferredToolNames.length > 0
+          ? [...this.deferredToolNames]
+          : undefined,
     };
   }
@@ -1324,6 +1379,14 @@ export class AgentContext {
     }
     if (hasNewDiscoveries) {
       this.systemRunnableStale = true;
+      /** Refresh schema token accounting so the next call's budget and
+       *  per-tool breakdown include the newly discovered tools; awaited
+       *  via tokenCalculationPromise before the next model call */
+      if (this.tokenCounter) {
+        this.tokenCalculationPromise = this.calculateInstructionTokens(
+          this.tokenCounter
+        );
+      }
     }
     return hasNewDiscoveries;
   }

package/src/agents/__tests__/AgentContext.test.ts CHANGED Viewed

@@ -1414,7 +1414,7 @@ describe('AgentContext', () => {
       expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(2);
     });
-    it('toolSchemaTokens snapshot does not auto-update after markToolsAsDiscovered', async () => {
+    it('refreshes toolSchemaTokens and per-tool counts after markToolsAsDiscovered', async () => {
       const toolDefinitions: t.LCTool[] = [
         {
           name: 'deferred',
@@ -1431,9 +1431,13 @@ describe('AgentContext', () => {
       await ctx.tokenCalculationPromise;
       expect(ctx.toolSchemaTokens).toBe(0);
+      expect(ctx.toolTokenCounts).toEqual({});
       ctx.markToolsAsDiscovered(['deferred']);
-      expect(ctx.toolSchemaTokens).toBe(0);
+      await ctx.tokenCalculationPromise;
+      expect(ctx.toolSchemaTokens).toBeGreaterThan(0);
+      expect(ctx.toolTokenCounts?.deferred).toBeGreaterThan(0);
+      expect(ctx.deferredToolNames).toContain('deferred');
     });
   });

package/src/common/enum.ts CHANGED Viewed

@@ -31,6 +31,8 @@ export enum GraphEvents {
   ON_SUBAGENT_UPDATE = 'on_subagent_update',
   /** [Custom] Diagnostic logging event for context management observability */
   ON_AGENT_LOG = 'on_agent_log',
+  /** [Custom] Per-model-call context window usage snapshot (post-prune token budget) */
+  ON_CONTEXT_USAGE = 'on_context_usage',
   /* Official Events */
@@ -185,6 +187,17 @@ export enum Constants {
   /** Anthropic server tool ID prefix (web_search, code_execution, etc.) */
   ANTHROPIC_SERVER_TOOL_PREFIX = 'srvtoolu_',
   SKILL_TOOL = 'skill',
+  /**
+   * Callback-metadata keys stamped by `attemptInvoke` /
+   * `tryFallbackProviders` carrying the provider (SDK `Providers` enum
+   * value) and configured model that actually served a model invocation.
+   * Unlike `ls_provider` — which derived providers inherit from their base
+   * class (e.g. DeepSeek/OpenRouter report `'openai'`) — these reflect the
+   * SDK's own routing, including fallback-provider calls. Consumed by the
+   * subagent usage-capture handler to tag billing events.
+   */
+  INVOKED_PROVIDER = '__invoked_provider',
+  INVOKED_MODEL = '__invoked_model',
   READ_FILE = 'read_file',
   BASH_TOOL = 'bash_tool',
   BASH_PROGRAMMATIC_TOOL_CALLING = 'run_tools_with_bash',

package/src/graphs/Graph.ts CHANGED Viewed

@@ -23,6 +23,7 @@ import {
   formatArtifactPayload,
   enforceOriginalContentCap,
   formatContentStrings,
+  isLegacyConvertible,
   createPruneMessages,
   addCacheControl,
   getMessageId,
@@ -45,6 +46,7 @@ import {
   isAnthropicLike,
   isOpenAILike,
   isGoogleLike,
+  apportionTokenCounts,
   joinKeys,
   sleep,
 } from '@/utils';
@@ -89,6 +91,55 @@ const { AGENT, TOOLS, SUMMARIZE } = GraphNodeKeys;
 /** Minimum relative variance before calibrated toolSchemaTokens overrides current value. */
 const CALIBRATION_VARIANCE_THRESHOLD = 0.15;
+/**
+ * Start index of the span post-prune formatters can mutate in place: the
+ * trailing tool batch plus its owning AI message (artifact formatting touches
+ * every tool result after the last AI tool call; Bedrock rewrites the AI
+ * message before a trailing tool result). Capped so the usage-snapshot
+ * recount stays constant-cost.
+ */
+function trailingMutationStart(messages: BaseMessage[]): number {
+  const MAX_SPAN = 16;
+  let index = messages.length - 1;
+  while (
+    index >= 0 &&
+    messages[index]?.getType() === 'tool' &&
+    messages.length - index < MAX_SPAN
+  ) {
+    index--;
+  }
+  return Math.max(0, Math.min(index, messages.length - 2));
+}
+/**
+ * Re-derives the breakdown fields coupled to the calibrated budget math so
+ * the snapshot stays internally consistent: the aggregate
+ * `instructionTokens`/`availableForMessages` reflect the pruner's effective
+ * (calibrated) overhead — component fields remain local estimates — and
+ * `messageTokens` mirrors `contextBudget - instructions - remaining`.
+ */
+function syncBudgetDerivedFields(usage: t.ContextUsageEvent): void {
+  const { breakdown, contextBudget, effectiveInstructionTokens } = usage;
+  if (effectiveInstructionTokens == null) {
+    return;
+  }
+  breakdown.instructionTokens = effectiveInstructionTokens;
+  if (contextBudget == null) {
+    return;
+  }
+  breakdown.availableForMessages = Math.max(
+    0,
+    contextBudget - effectiveInstructionTokens
+  );
+  if (usage.remainingContextTokens == null) {
+    return;
+  }
+  breakdown.messageTokens = Math.max(
+    0,
+    contextBudget - effectiveInstructionTokens - usage.remainingContextTokens
+  );
+}
 type ReasoningKey = 'reasoning_content' | 'reasoning';
 type ReasoningSummary = { summary?: Array<{ text?: string }> };
 type ReasoningDetail = { type?: string; text?: string };
@@ -825,6 +876,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
   agentContexts: Map<string, AgentContext> = new Map();
   /** Default agent ID to use */
   defaultAgentId: string;
+  /**
+   * Host sink for model usage emitted inside subagent child runs. Threaded
+   * into each `SubagentExecutor` this graph creates (and from there into
+   * child graphs, so nested subagents report too). See
+   * {@link t.StandardGraphInput.subagentUsageSink}.
+   */
+  subagentUsageSink?: t.SubagentUsageSink;
   constructor({
     runId,
@@ -834,11 +892,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
     tokenCounter,
     indexTokenCountMap,
     calibrationRatio,
+    subagentUsageSink,
   }: t.StandardGraphInput) {
     super();
     this.runId = runId;
     this.signal = signal;
     this.langfuse = langfuse;
+    this.subagentUsageSink = subagentUsageSink;
     if (agents.length === 0) {
       throw new Error('At least one agent configuration is required');
@@ -1423,6 +1483,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
       this.config = config;
       let messagesToUse = messages;
+      let contextUsage: t.ContextUsageEvent | null = null;
       if (
         !agentContext.pruneMessages &&
         agentContext.tokenCounter &&
@@ -1462,6 +1523,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
           originalToolContent,
           calibrationRatio,
           resolvedInstructionOverhead,
+          contextBudget,
+          effectiveInstructionTokens,
         } = agentContext.pruneMessages({
           messages,
           usageMetadata: agentContext.currentUsage,
@@ -1489,10 +1552,42 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
               : 1;
           if (variance > CALIBRATION_VARIANCE_THRESHOLD) {
             agentContext.toolSchemaTokens = calibratedToolTokens;
+            /** Largest-remainder apportionment keeps the per-tool breakdown
+             *  summing exactly to the calibrated aggregate */
+            if (agentContext.toolTokenCounts != null && currentToolTokens > 0) {
+              agentContext.toolTokenCounts = apportionTokenCounts(
+                agentContext.toolTokenCounts,
+                calibratedToolTokens / currentToolTokens,
+                calibratedToolTokens
+              );
+            }
           }
         }
         messagesToUse = context;
+        /** Dispatched right before the model invoke — a summarization
+         *  detour returns from this node without an LLM call, and the
+         *  post-summary retry produces its own snapshot.
+         *
+         *  The breakdown describes the post-prune prompt: counts from the
+         *  kept context, message tokens derived from the same calibrated
+         *  budget math as `remainingContextTokens` (the index map is keyed
+         *  by pre-prune state indices, so summing it over `context` would
+         *  missum); `prePruneContextTokens` carries the pre-prune metric. */
+        const usageBreakdown = agentContext.getTokenBudgetBreakdown(messages);
+        usageBreakdown.messageCount = context.length;
+        contextUsage = {
+          runId: this.runId,
+          agentId,
+          breakdown: usageBreakdown,
+          contextBudget,
+          effectiveInstructionTokens,
+          prePruneContextTokens,
+          remainingContextTokens,
+          calibrationRatio: agentContext.calibrationRatio,
+        };
+        syncBudgetDerivedFields(contextUsage);
         const hasPrunedMessages =
           agentContext.summarizationEnabled === true &&
           Array.isArray(messagesToRefine) &&
@@ -1598,6 +1693,33 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
       }
       let finalMessages = messagesToUse;
+      /** Tail snapshot for the dispatch-time usage delta: in-place
+       *  formatters (artifact appends, Bedrock content rewrites, legacy
+       *  string conversion) mutate without changing length or identity —
+       *  capture before they run. Legacy string conversion can also touch
+       *  messages before the tail, so those convertible indices are
+       *  tracked separately (none exist in the common case). */
+      const tailStart = trailingMutationStart(messagesToUse);
+      let preFormatTailTokens: number | null = null;
+      let legacyIndices: number[] | null = null;
+      let preFormatLegacyTokens = 0;
+      if (contextUsage != null && agentContext.tokenCounter != null) {
+        preFormatTailTokens = 0;
+        for (const message of messagesToUse.slice(tailStart)) {
+          preFormatTailTokens += agentContext.tokenCounter(message);
+        }
+        if (agentContext.useLegacyContent) {
+          legacyIndices = [];
+          for (let i = 0; i < tailStart; i++) {
+            if (isLegacyConvertible(messagesToUse[i])) {
+              legacyIndices.push(i);
+              preFormatLegacyTokens += agentContext.tokenCounter(
+                messagesToUse[i]
+              );
+            }
+          }
+        }
+      }
       if (agentContext.useLegacyContent) {
         finalMessages = formatContentStrings(finalMessages);
       }
@@ -1788,6 +1910,79 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
         );
       }
+      /** Past the empty-prompt guard — a model call is now guaranteed */
+      if (contextUsage != null) {
+        const usageRatio =
+          contextUsage.calibrationRatio != null &&
+          contextUsage.calibrationRatio > 0
+            ? contextUsage.calibrationRatio
+            : 1;
+        if (
+          agentContext.tokenCounter != null &&
+          finalMessages.length !== messagesToUse.length
+        ) {
+          /** Post-prune formatting restructured the payload (e.g. thinking
+           *  placeholder collapse, orphan drops) — recount so the gauge
+           *  reflects what is actually sent */
+          let rawTokens = 0;
+          for (const message of finalMessages) {
+            rawTokens += agentContext.tokenCounter(message);
+          }
+          contextUsage.breakdown.messageCount = finalMessages.length;
+          if (
+            contextUsage.contextBudget != null &&
+            contextUsage.effectiveInstructionTokens != null
+          ) {
+            contextUsage.remainingContextTokens = Math.max(
+              0,
+              contextUsage.contextBudget -
+                contextUsage.effectiveInstructionTokens -
+                Math.round(rawTokens * usageRatio)
+            );
+          }
+        } else if (
+          preFormatTailTokens != null &&
+          agentContext.tokenCounter != null &&
+          contextUsage.remainingContextTokens != null
+        ) {
+          /** Same-length formatting can still mutate in place — the trailing
+           *  tool batch (artifacts, Bedrock rewrites) and any legacy-converted
+           *  messages before it — adjust remaining by the calibrated delta */
+          let postFormatTailTokens = 0;
+          for (const message of finalMessages.slice(tailStart)) {
+            postFormatTailTokens += agentContext.tokenCounter(message);
+          }
+          let formatDelta = postFormatTailTokens - preFormatTailTokens;
+          if (legacyIndices != null && legacyIndices.length > 0) {
+            let postFormatLegacyTokens = 0;
+            for (const index of legacyIndices) {
+              postFormatLegacyTokens += agentContext.tokenCounter(
+                finalMessages[index]
+              );
+            }
+            formatDelta += postFormatLegacyTokens - preFormatLegacyTokens;
+          }
+          if (formatDelta !== 0) {
+            contextUsage.remainingContextTokens = Math.max(
+              0,
+              Math.min(
+                contextUsage.contextBudget ?? Number.MAX_SAFE_INTEGER,
+                contextUsage.remainingContextTokens -
+                  Math.round(formatDelta * usageRatio)
+              )
+            );
+          }
+        }
+        syncBudgetDerivedFields(contextUsage);
+        /** Awaited so async host handlers receive the pre-invoke snapshot
+         *  before any model deltas are emitted */
+        await safeDispatchCustomEvent(
+          GraphEvents.ON_CONTEXT_USAGE,
+          contextUsage,
+          config
+        );
+      }
       const invokeStart = Date.now();
       const invokeMeta = { runId: this.runId, agentId };
       emitAgentLog(
@@ -2063,6 +2258,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
           parentAgentId: agentContext.agentId,
           langfuse: this.langfuse,
           tokenCounter: agentContext.tokenCounter,
+          usageSink: this.subagentUsageSink,
           maxDepth: effectiveSubagentDepth,
           createChildGraph: (input): StandardGraph => {
             const childGraph = new StandardGraph(input);

package/src/llm/bedrock/index.ts CHANGED Viewed

@@ -34,6 +34,7 @@ import type { BaseMessage, ResponseMetadata } from '@langchain/core/messages';
 import type { ChatBedrockConverseInput } from '@langchain/aws';
 import {
   convertToConverseMessages,
+  createConverseToolUseStopChunk,
   handleConverseStreamContentBlockStart,
   handleConverseStreamContentBlockDelta,
   handleConverseStreamMetadata,
@@ -224,6 +225,15 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
     }
     const seenBlockIndices = new Set<number>();
+    const toolUseBlockIndices = new Set<number>();
+    /**
+     * Guardrails can reject an already-streamed toolUse block at
+     * `messageStop` (`guardrail_intervened`), after `contentBlockStop` has
+     * passed. Only emit eager-execution seals when no guardrails are
+     * configured, so a later intervention can't race an eagerly started tool.
+     */
+    const sealToolUseOnStop =
+      options.guardrailConfig == null && this.guardrailConfig == null;
     for await (const event of response.stream) {
       if (event.contentBlockStart != null) {
@@ -234,8 +244,23 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
           const idx = event.contentBlockStart.contentBlockIndex;
           if (idx != null) {
             seenBlockIndices.add(idx);
+            if (event.contentBlockStart.start?.toolUse != null) {
+              toolUseBlockIndices.add(idx);
+            }
           }
           yield this.enrichChunk(startChunk, seenBlockIndices);
+          // Registered stream handlers receive chunks through callback
+          // events, not the yielded generator — dispatch the start chunk so
+          // they see the tool call's id/name (eager chunk state needs both).
+          await runManager?.handleLLMNewToken(
+            startChunk.text,
+            undefined,
+            undefined,
+            undefined,
+            undefined,
+            { chunk: startChunk }
+          );
         }
       } else if (event.contentBlockDelta != null) {
         const deltaChunk = handleConverseStreamContentBlockDelta(
@@ -263,6 +288,21 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
         const stopIdx = event.contentBlockStop.contentBlockIndex;
         if (stopIdx != null) {
           seenBlockIndices.add(stopIdx);
+          if (sealToolUseOnStop && toolUseBlockIndices.has(stopIdx)) {
+            // Converse guarantees the block's input is complete at stop, so
+            // emit an explicit seal chunk for eager tool execution — through
+            // the callback path too, for registered stream handlers.
+            const sealChunk = createConverseToolUseStopChunk(stopIdx);
+            yield sealChunk;
+            await runManager?.handleLLMNewToken(
+              sealChunk.text,
+              undefined,
+              undefined,
+              undefined,
+              undefined,
+              { chunk: sealChunk }
+            );
+          }
         }
       } else {
         yield new ChatGenerationChunk({