npm - @salesforce/sfdx-agent-sdk - Versions diffs - 0.14.0 → 0.16.0 - Mend

@salesforce/sfdx-agent-sdk 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +255 -100
package/dist/agent-connectivity-resolver.d.ts +16 -1
package/dist/agent-connectivity-resolver.js +54 -3
package/dist/agent.d.ts +1 -1
package/dist/agent.js +30 -14
package/dist/chat-session.d.ts +109 -27
package/dist/chat-session.js +120 -26
package/dist/harness/agent-harness.d.ts +59 -22
package/dist/harness/gen-sink.d.ts +41 -0
package/dist/harness/gen-sink.js +88 -0
package/dist/harness/harness-config.d.ts +10 -3
package/dist/harness/index.d.ts +1 -0
package/dist/harness/index.js +1 -0
package/dist/harness/public.d.ts +49 -0
package/dist/harness/public.js +10 -0
package/dist/index.d.ts +3 -5
package/dist/index.js +1 -4
package/dist/mcp-config.d.ts +26 -0
package/dist/types/events.d.ts +1 -14
package/dist/types/messages.d.ts +12 -1
package/dist/types/usage.d.ts +65 -0
package/package.json +9 -5

package/dist/agent-connectivity-resolver.js CHANGED Viewed

@@ -2,7 +2,7 @@
  * Copyright 2026, Salesforce, Inc. All rights reserved.
  * See LICENSE.txt for license terms.
  */
-import { DefaultLLMGatewayClientFactory, Models, createJWTFromConnection, } from '@salesforce/llm-gateway-sdk';
+import { DefaultLLMGatewayClientFactory, Model, ModelName, Models, createClaudeModel, createJWTFromConnection, } from '@salesforce/llm-gateway-sdk';
 import { SfApiEnv, RealOrgConnectionFactory, } from '@salesforce/agentic-common';
 // TODO(@W-22782317): Temporary workaround — only on prod orgs the LLM Gateway must
 // route requests through AgentforceVibes rather than the default VibesService. Remove once a
@@ -46,9 +46,60 @@ export class DefaultAgentConnectivityResolver {
         const featureId = env === SfApiEnv.Prod ? PROD_ORG_FEATURE_ID : undefined;
         const orgJwt = await createJWTFromConnection(orgConnection, { featureId });
         const llmGatewayClient = this.gatewayClientFactory.create(orgJwt, { env });
-        const modelName = config.modelId ?? Models.getDefault().name;
-        llmGatewayClient.setModel(Models.getByName(modelName));
+        llmGatewayClient.setModel(resolveAgentConfigModel(config.modelId));
         return { llmGatewayClient, orgConnection, orgJwt };
     }
 }
+/**
+ * Resolves an `AgentConfig.modelId` value (which may be a {@link ModelName} enum value, a
+ * pre-built {@link Model} instance, or `undefined`) to a concrete {@link Model}.
+ *
+ * The enum branch goes through the strict {@link Models.getByName} registry; the live
+ * instance branch passes the consumer-built model through unchanged. A persisted-and-restored
+ * `Model` instance arrives here as a plain object (the JSON round-trip drops its prototype),
+ * and is rehydrated via {@link createClaudeModel} for Bedrock-Anthropic Claude variants — the
+ * single use case the consumer-built escape hatch was added for. Any other persisted shape is
+ * a programming error and throws.
+ *
+ * Exported for use by `Agent.updateAgentConfig`, which performs the same resolution when
+ * comparing previous and next models without re-running the full connectivity resolver.
+ */
+export function resolveAgentConfigModel(modelId) {
+    if (modelId === undefined)
+        return Models.getDefault();
+    // Known limitation: `instanceof Model` is realm-scoped — a consumer that ends up with two copies
+    // of `@salesforce/llm-gateway-sdk` resolved in their dependency tree will have their `Model`
+    // instance fail this check and fall through to `rehydratePersistedModel`. That branch handles
+    // it correctly for Claude variants but throws for anything else. The duplicate-package case is
+    // a packaging bug at the consumer; we don't paper over it here.
+    if (modelId instanceof Model)
+        return modelId;
+    if (typeof modelId === 'string')
+        return Models.getByName(modelId);
+    return rehydratePersistedModel(modelId);
+}
+function rehydratePersistedModel(persisted) {
+    const obj = persisted;
+    if (typeof obj.name !== 'string') {
+        throw new Error(`Cannot resolve modelId: missing string "name" on persisted object.`);
+    }
+    // If the persisted name matches an in-tree model, prefer the strict registry — the
+    // returned instance has the correct prototype and the canonical caps.
+    if (Object.values(ModelName).includes(obj.name)) {
+        return Models.getByName(obj.name);
+    }
+    if (!obj.name.startsWith('llmgateway__BedrockAnthropic')) {
+        throw new Error(`Cannot rehydrate persisted model "${obj.name}". Only Bedrock-Anthropic Claude variants are supported via the consumer-built Model escape hatch.`);
+    }
+    return createClaudeModel(obj.name, {
+        displayId: obj.displayId,
+        maxInputTokens: obj.maxInputTokens,
+        maxOutputTokens: obj.maxOutputTokens,
+        contextWindow: obj.contextWindow,
+        supportsPromptCache: obj.supportsPromptCache,
+        supportedFormats: obj.supportedFormats,
+        permittedParameters: obj.permittedParameters,
+        customHeaders: obj.customHeaders,
+    });
+}
 //# sourceMappingURL=agent-connectivity-resolver.js.map

package/dist/agent.d.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import { type AgentConfig } from './harness/harness-config.js';
 import { type ChatSession } from './chat-session.js';
 import type { McpServerInfo } from './mcp-config.js';
 import { type JSONWebToken, type LLMGatewayClient } from '@salesforce/llm-gateway-sdk';
-import type { AgentConnectivityResolver } from './agent-connectivity-resolver.js';
+import { type AgentConnectivityResolver } from './agent-connectivity-resolver.js';
 import type { AgentIdentityStore } from './internal/agent-identity-store.js';
 import type { TelemetryRouter, TelemetrySlice } from './internal/telemetry-router.js';
 import type { TelemetryBus, TelemetryEventCallback } from './types/telemetry-events.js';

package/dist/agent.js CHANGED Viewed

@@ -5,7 +5,8 @@
 import { EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
 import { toHarnessConfig } from './harness/harness-config.js';
 import { DefaultChatSession } from './chat-session.js';
-import { Models } from '@salesforce/llm-gateway-sdk';
+import {} from '@salesforce/llm-gateway-sdk';
+import { resolveAgentConfigModel } from './agent-connectivity-resolver.js';
 import { AgentSDKError, AgentSDKErrorType } from './errors.js';
 /**
  * Default implementation of {@link Agent} that delegates
@@ -112,8 +113,8 @@ export class DefaultAgent {
         const previousOrgJwt = this.orgJwt;
         const nextConfig = { ...this.config, ...config };
         const orgAliasRequested = Object.prototype.hasOwnProperty.call(config, 'orgAlias');
-        const previousModelName = previousClient.getModel().name;
-        const nextModelName = nextConfig.modelId ?? Models.getDefault().name;
+        const previousModel = previousClient.getModel();
+        const nextModel = resolveAgentConfigModel(nextConfig.modelId);
         let nextClient = previousClient;
         let nextConnection = this.orgConnection;
         let nextOrgJwt = this.orgJwt;
@@ -123,14 +124,16 @@ export class DefaultAgent {
             nextConnection = runtime.orgConnection;
             nextOrgJwt = runtime.orgJwt;
         }
-        else if (nextModelName !== previousModelName) {
+        else if (nextModel.name !== previousModel.name) {
             // Keep the same authenticated client, but pin the updated model.
             // (If modelId is omitted, the resolver pinned the default at creation time.)
-            nextClient.setModel(Models.getByName(nextModelName));
+            nextClient.setModel(nextModel);
         }
         await this.harness.destroyAgent(this.agentId);
+        let nextConfigRegistered = false;
         try {
             await this.harness.createAgent(this.agentId, this.projectRoot, nextClient, toHarnessConfig(nextConfig, nextOrgJwt), options);
+            nextConfigRegistered = true;
             // Persist before the in-memory swaps so a write failure flows through the same
             // catch block as a recreate failure: the rollback restores the harness with
             // previousConfig and disk state remains the pre-update record.
@@ -148,16 +151,21 @@ export class DefaultAgent {
         catch (error) {
             // Best-effort restoration to keep wrapper and harness state aligned.
             try {
-                // Restore client model if we mutated it in-place.
+                // Restore client model if we mutated it in-place. We re-pin the live previousModel
+                // instance (captured above as previousClient.getModel()) rather than re-resolving from
+                // this.config.modelId, because a JSON-rehydrated config may have a plain object there
+                // that would round-trip through createClaudeModel and lose the original prototype.
                 if (nextClient === previousClient) {
-                    previousClient.setModel(Models.getByName(previousModelName));
+                    previousClient.setModel(previousModel);
+                }
+                // Clear nextConfig registration only when the harness recreate
+                // actually succeeded (identityStore.write-failure path) — the
+                // harness throws on unknown id, so calling destroyAgent on the
+                // harness-recreate-failure path would short-circuit the rollback
+                // createAgent below.
+                if (nextConfigRegistered) {
+                    await this.harness.destroyAgent(this.agentId);
                 }
-                // Clear any nextConfig registration left behind by a successful harness recreate
-                // before the rollback createAgent runs. On the harness-recreate-failure path this
-                // is a no-op (the agent was never registered with nextConfig); on the
-                // identityStore.write-failure path it removes the live nextConfig so the rollback
-                // doesn't trip the harness's duplicate-registration guard.
-                await this.harness.destroyAgent(this.agentId);
                 await this.harness.createAgent(this.agentId, this.projectRoot, previousClient, toHarnessConfig(previousConfig, previousOrgJwt));
             }
             catch {
@@ -319,10 +327,18 @@ export class DefaultAgent {
     }
     attachSession(threadId) {
         const slice = this.router.registerSession(threadId);
+        // Live getter — read at call time so getContextUsage() reflects the
+        // model bound to the agent right now, not the model that was bound
+        // when this session was created. updateAgentConfig() can swap the
+        // underlying LLMGatewayClient mid-life. Per the SDK's Critical
+        // Invariant on context-window reachability, every bound model
+        // exposes a usable `contextWindow`; #507's decoupling work must
+        // preserve that, so this access is contractually safe.
+        const getContextWindow = () => this.llmGatewayClient.getModel().contextWindow;
         const session = new DefaultChatSession(this.harness, this.agentId, threadId, slice, {
             telemetry: this.telemetryBus,
             log: this.logBus,
-        }, this.clock, this.idGenerator);
+        }, getContextWindow, this.clock, this.idGenerator);
         this.sessions.set(threadId, session);
         this.sessionSliceUnregisters.set(threadId, () => this.router.unregisterSession(threadId));
         this.telemetryBus.emit({

package/dist/chat-session.d.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import type { ChatEvent, ChatStreamResult } from './types/events.js';
 import type { Message, MessagePart } from './types/messages.js';
 import type { TelemetryBus, TelemetryEventCallback } from './types/telemetry-events.js';
 import type { ToolResultInfo } from './types/tools.js';
+import type { ContextUsage } from './types/usage.js';
 /**
  * Options for a single chat interaction.
  */
@@ -61,11 +62,17 @@ export interface ChatSession {
      *
      * "Client-side tool" means a tool you declared in {@link AgentConfig.tools}
      * without an `execute` function — the SDK registers its name + schema with the
-     * model but does not run it. When the model calls one, the chat eventStream
-     * emits a `tool-call` event and ends with `finishReason: 'tool-calls'`. Your
-     * application runs the tool however it likes (HTTP call, DB query, UI prompt,
-     * etc.) and calls this method with the result; the agent loop resumes and
-     * produces its next turn on the returned `ChatStreamResult.eventStream`.
+     * model but does not run it. When the model calls one, the chat `eventStream`
+     * emits a `tool-call` event. Your application runs the tool however it likes
+     * (HTTP call, DB query, UI prompt, etc.) and calls this method with the result;
+     * the agent loop resumes and the post-resume events (`tool-result`, model
+     * follow-up text, terminal `finish`) arrive on the **same** `eventStream`
+     * the original {@link chat} call returned. The consumer keeps iterating it.
+     *
+     * Returns `Promise<void>` once the harness has accepted the result. The
+     * promise rejects on pre-stream failure (the `chat()`-returned subscribers
+     * still observe `ErrorEvent` + `FinishEvent` before the rejection so the
+     * subscribe-side contract holds).
      *
      * Use this method ONLY for client-side tools. Tools provided via
      * {@link AgentConfig.mcpServers} are executed by the harness — their results
@@ -73,23 +80,22 @@ export interface ChatSession {
      * Human-in-the-loop approval of harness-executed tools uses
      * {@link approveToolCall} / {@link declineToolCall}, not this method.
      *
-     * On pre-stream failure, subscribers are notified with `ErrorEvent` + `FinishEvent` before
-     * the returned promise rejects. See the interface-level "Failure handling" notes for details.
-     *
      * @param toolResult - The completed tool execution result. `toolCallId` and
      *   `toolName` MUST match the values from the originating `tool-call` event.
      */
-    submitToolResult(toolResult: ToolResultInfo): Promise<ChatStreamResult>;
+    submitToolResult(toolResult: ToolResultInfo): Promise<void>;
     /**
      * Approve a pending tool call, allowing the harness to execute it.
      * Called after receiving a `tool-approval-request` event from the stream.
      *
-     * Returns a `ChatStreamResult` containing the continuation stream — the harness
-     * executes the approved tool, generates the model's follow-up response, and
-     * streams both the text and events back to the caller.
+     * Returns `Promise<void>` once the harness has accepted the approval. The
+     * harness then executes the tool and emits the resulting events
+     * (`tool-result`, model follow-up text, terminal `finish`) on the **same**
+     * `eventStream` the original {@link chat} call returned. The consumer keeps
+     * iterating it.
      *
-     * On pre-stream failure, subscribers are notified with `ErrorEvent` + `FinishEvent` before
-     * the returned promise rejects. See the interface-level "Failure handling" notes for details.
+     * The promise rejects on pre-stream failure; subscribers still observe
+     * `ErrorEvent` + `FinishEvent` on the chat stream before the rejection.
      *
      * @param toolCallId - ID of the pending tool call to approve.
      * @param options - Optional approval metadata.
@@ -100,21 +106,20 @@ export interface ChatSession {
      */
     approveToolCall(toolCallId: string, options?: {
         remember?: boolean;
-    }): Promise<ChatStreamResult>;
+    }): Promise<void>;
     /**
      * Decline a pending tool call. The stream resumes with the model
-     * acknowledging the decline and potentially suggesting alternatives.
+     * acknowledging the decline and potentially suggesting alternatives —
+     * those events arrive on the **same** `eventStream` the original
+     * {@link chat} call returned.
      *
-     * Returns a `ChatStreamResult` containing the continuation stream — the harness
-     * cancels the pending tool call, generates the model's acknowledgement response,
-     * and streams both the text and events back to the caller.
-     *
-     * On pre-stream failure, subscribers are notified with `ErrorEvent` + `FinishEvent` before
-     * the returned promise rejects. See the interface-level "Failure handling" notes for details.
+     * Returns `Promise<void>` once the harness has accepted the decline. The
+     * promise rejects on pre-stream failure; subscribers still observe
+     * `ErrorEvent` + `FinishEvent` on the chat stream before the rejection.
      *
      * @param toolCallId - ID of the pending tool call to decline.
      */
-    declineToolCall(toolCallId: string): Promise<ChatStreamResult>;
+    declineToolCall(toolCallId: string): Promise<void>;
     /**
      * Retrieve message history for this session.
      *
@@ -123,6 +128,25 @@ export interface ChatSession {
     getMessageHistory(): Promise<Message[]>;
     /** Delete all messages in this session's history. */
     clearHistory(): Promise<void>;
+    /**
+     * Snapshot of how much of the model's context window the most recent
+     * turn used. Always returns a `ContextUsage` — pre-first-turn and
+     * immediately after `clearHistory()`, `usage` is `{}` and `usedFraction`
+     * is `undefined`, but `contextWindow` is always populated from the
+     * agent's currently-bound model.
+     *
+     * `usage` carries the **last per-step** reading from the model — the
+     * size of the prompt the model saw on its most recent invocation,
+     * which is the right "how full is my context" answer for deciding
+     * when to call `compactThread()`. This is **not** the per-turn billing
+     * aggregate; consumers who want billing totals should subscribe to
+     * `chat-stream-completed` telemetry.
+     *
+     * The `contextWindow` is read live from the agent's currently-bound
+     * model, so it reflects any `Agent.updateAgentConfig()` model swap
+     * that happened between turns.
+     */
+    getContextUsage(): ContextUsage;
     /**
      * Inject context messages into the thread without triggering an LLM response.
      * Useful for seeding file contents, system instructions, or prior conversation
@@ -176,6 +200,23 @@ export declare class DefaultChatSession implements ChatSession {
      * are stale and should not bleed into the next turn).
      */
     private readonly toolStartMs;
+    /**
+     * Live getter for the agent's currently-bound model's context window.
+     * Called by {@link getContextUsage} so reads reflect the model in
+     * effect right now, not the model bound when this session was created
+     * (an `Agent.updateAgentConfig()` swap can change it mid-life).
+     */
+    private readonly getContextWindow;
+    /**
+     * Last per-step usage reading observed on this session. Initialized
+     * to `{}` (every token field undefined) so {@link getContextUsage}
+     * can always return a populated `ContextUsage`. Updated on every
+     * `step-finish` ChatEvent whose `usage` is defined; an undefined
+     * usage is carried forward (defense against rare gateway-side gaps —
+     * see W-22692131). Reset to `{}` on `clearHistory()` so a fresh
+     * thread starts unprimed.
+     */
+    private latestUsage;
     private disposed;
     /**
      * @param harness - The agent harness managing thread and message lifecycle.
@@ -183,10 +224,12 @@ export declare class DefaultChatSession implements ChatSession {
      * @param threadId - ID of the conversation thread backing this session.
      * @param inbound - Router slice delivering harness events routed to this session.
      * @param parent - Parent agent's buses; this session forwards its events upward into them.
+     * @param getContextWindow - Live getter for the agent's currently-bound model's `contextWindow`.
+     *   Called by `getContextUsage()` so reads stay correct across `Agent.updateAgentConfig()` model swaps.
      * @param clock - Source of monotonic timestamps for telemetry events. Defaults to `RealClock`.
      * @param idGenerator - Source of message ids for `addContext()`. Defaults to `UUIDGenerator`.
      */
-    constructor(harness: AgentHarness, agentId: string, threadId: string, inbound: TelemetrySlice, parent: ChatSessionParentBuses, clock?: Clock, idGenerator?: UniqueIDGenerator);
+    constructor(harness: AgentHarness, agentId: string, threadId: string, inbound: TelemetrySlice, parent: ChatSessionParentBuses, getContextWindow: () => number, clock?: Clock, idGenerator?: UniqueIDGenerator);
     getId(): string;
     /**
      * @requirements
@@ -207,7 +250,7 @@ export declare class DefaultChatSession implements ChatSession {
      * - MUST notify listeners with `ErrorEvent` + `FinishEvent` and re-throw if the harness throws
      *   before returning a stream result.
      */
-    submitToolResult(toolResult: ToolResultInfo): Promise<ChatStreamResult>;
+    submitToolResult(toolResult: ToolResultInfo): Promise<void>;
     /**
      * @requirements
      * - MUST yield each event from the provided `stream`.
@@ -251,7 +294,7 @@ export declare class DefaultChatSession implements ChatSession {
      */
     approveToolCall(toolCallId: string, _options?: {
         remember?: boolean;
-    }): Promise<ChatStreamResult>;
+    }): Promise<void>;
     /**
      * @requirements
      * - MUST delegate to `this.harness.declineToolCall()`, passing `this.agentId`, `this.threadId`, and `toolCallId`.
@@ -263,7 +306,7 @@ export declare class DefaultChatSession implements ChatSession {
      * - MUST notify listeners with `ErrorEvent` + `FinishEvent` and re-throw if the harness throws
      *   before returning a stream result.
      */
-    declineToolCall(toolCallId: string): Promise<ChatStreamResult>;
+    declineToolCall(toolCallId: string): Promise<void>;
     /**
      * @requirements
      * - MUST delegate to `this.harness.getMessages()`, passing `this.agentId` and `this.threadId`.
@@ -273,8 +316,34 @@ export declare class DefaultChatSession implements ChatSession {
     /**
      * @requirements
      * - MUST delegate to `this.harness.clearMessages()`, passing `this.agentId` and `this.threadId`.
+     * - MUST reset `latestUsage` to `{}` so the next `getContextUsage()` reports a fresh
+     *   "no reading yet" snapshot until the next turn produces one.
      */
     clearHistory(): Promise<void>;
+    /**
+     * @requirements
+     * - MUST always return a populated `ContextUsage`. Pre-first-turn and post-`clearHistory()`,
+     *   `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always
+     *   populated from the agent's currently-bound model.
+     * - MUST read `contextWindow` via the constructor-injected `getContextWindow` getter
+     *   so swaps via `Agent.updateAgentConfig()` are reflected on the next call. Per the
+     *   SDK's Critical Invariant on context-window reachability, every bound model exposes
+     *   a usable `contextWindow`; the getter does not need a defensive try/catch.
+     * - MUST compute `usedFraction = (inputTokens + cachedInputTokens + cacheWriteInputTokens) /
+     *   contextWindow`, clamped to `[0, 1]`. The denominator-numerator must include cached
+     *   tokens because Bedrock-Claude's `message_delta.usage` reports only the *incremental*
+     *   `input_tokens` per delta — the bulk of the prompt rides on `cache_read_input_tokens`
+     *   / `cache_creation_input_tokens` which the Claude adapter surfaces as
+     *   `cachedInputTokens` / `cacheWriteInputTokens`. Those are real tokens the model
+     *   actually loaded into its context window (Bedrock charges for them and counts them
+     *   against the window), so they belong in the "how full" denominator. Mastra is
+     *   unaffected — it doesn't populate the cache fields, so the sum collapses to
+     *   `inputTokens` alone.
+     * - MUST treat `usedFraction` as `undefined` when ALL three input-bearing fields are
+     *   undefined — pre-first-turn, post-`clearHistory()`, or a harness reading with no
+     *   input-side counts at all.
+     */
+    getContextUsage(): ContextUsage;
     /**
      * @requirements
      * - IF `message` is a `string`, it MUST be formatted into a standard `Message` object array containing exactly one message.
@@ -330,5 +399,18 @@ export declare class DefaultChatSession implements ChatSession {
      * measures real elapsed time even for pre-stream rejections.
      */
     private notifyPreStreamError;
+    /**
+     * issue #529 contract change: a settle call (`approveToolCall` /
+     * `declineToolCall` / `submitToolResult`) rejected. The settle's
+     * Promise is the consumer's primary failure surface, but subscribers
+     * registered via {@link ChatSession.subscribe} also expect to observe
+     * `error + finish` events so a UI bound to the chat stream can
+     * render the failure. Emit those without firing
+     * `chat-stream-error` telemetry — chat-stream-* telemetry is owned
+     * by the chat() lifecycle, not by settle calls (issue #529: one
+     * chat-stream-started/completed/error pair per turn, not per
+     * settle).
+     */
+    private notifySettleRejection;
     private assertNotDisposed;
 }

package/dist/chat-session.js CHANGED Viewed

@@ -2,7 +2,7 @@
  * Copyright 2026, Salesforce, Inc. All rights reserved.
  * See LICENSE.txt for license terms.
  */
-import { EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
+import { backfillCreatedAt, EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
 import { AgentSDKError, AgentSDKErrorType } from './errors.js';
 /**
  * Default implementation of {@link ChatSession} that delegates all operations
@@ -31,6 +31,23 @@ export class DefaultChatSession {
      * are stale and should not bleed into the next turn).
      */
     toolStartMs = new Map();
+    /**
+     * Live getter for the agent's currently-bound model's context window.
+     * Called by {@link getContextUsage} so reads reflect the model in
+     * effect right now, not the model bound when this session was created
+     * (an `Agent.updateAgentConfig()` swap can change it mid-life).
+     */
+    getContextWindow;
+    /**
+     * Last per-step usage reading observed on this session. Initialized
+     * to `{}` (every token field undefined) so {@link getContextUsage}
+     * can always return a populated `ContextUsage`. Updated on every
+     * `step-finish` ChatEvent whose `usage` is defined; an undefined
+     * usage is carried forward (defense against rare gateway-side gaps —
+     * see W-22692131). Reset to `{}` on `clearHistory()` so a fresh
+     * thread starts unprimed.
+     */
+    latestUsage = {};
     disposed = false;
     /**
      * @param harness - The agent harness managing thread and message lifecycle.
@@ -38,13 +55,16 @@ export class DefaultChatSession {
      * @param threadId - ID of the conversation thread backing this session.
      * @param inbound - Router slice delivering harness events routed to this session.
      * @param parent - Parent agent's buses; this session forwards its events upward into them.
+     * @param getContextWindow - Live getter for the agent's currently-bound model's `contextWindow`.
+     *   Called by `getContextUsage()` so reads stay correct across `Agent.updateAgentConfig()` model swaps.
      * @param clock - Source of monotonic timestamps for telemetry events. Defaults to `RealClock`.
      * @param idGenerator - Source of message ids for `addContext()`. Defaults to `UUIDGenerator`.
      */
-    constructor(harness, agentId, threadId, inbound, parent, clock = new RealClock(), idGenerator = new UUIDGenerator()) {
+    constructor(harness, agentId, threadId, inbound, parent, getContextWindow, clock = new RealClock(), idGenerator = new UUIDGenerator()) {
         this.harness = harness;
         this.agentId = agentId;
         this.threadId = threadId;
+        this.getContextWindow = getContextWindow;
         this.clock = clock;
         this.idGenerator = idGenerator;
         this.inboundUnsubs = [inbound.telemetry.forwardTo(this.telemetryBus), inbound.log.forwardTo(this.logBus)];
@@ -89,16 +109,16 @@ export class DefaultChatSession {
      */
     async submitToolResult(toolResult) {
         this.assertNotDisposed();
-        const startedAt = this.emitChatStreamStarted('submit-tool-result');
+        // issue #529 contract change: settle calls are control messages on the
+        // existing chat() turn's stream — they don't open a new stream and
+        // they don't emit chat-stream-started/completed. The post-resume
+        // events flow through the harness's existing turn sink, which the
+        // consumer's chat()-returned eventStream is already iterating.
         try {
-            const result = await this.harness.submitToolResult(this.agentId, this.threadId, toolResult);
-            return {
-                textStream: result.textStream,
-                eventStream: this.wrapEventStream(result.eventStream, startedAt),
-            };
+            await this.harness.submitToolResult(this.agentId, this.threadId, toolResult);
         }
         catch (err) {
-            this.notifyPreStreamError(err, startedAt);
+            this.notifySettleRejection(err);
             throw err;
         }
     }
@@ -138,6 +158,18 @@ export class DefaultChatSession {
                 this.chatEventBus.emit(event);
                 this.deriveToolTelemetry(event);
                 yield event;
+                if (event.type === 'step-finish' && event.usage !== undefined) {
+                    // Snapshot the most recent per-step usage. Last-step semantics
+                    // (not the per-turn `finish.usage` aggregate) — `finish.usage`
+                    // sums every step inside the turn and double-counts persistent
+                    // context, which is the wrong denominator for "how full is my
+                    // context". An undefined usage on this step is intentionally
+                    // ignored so the prior reading is carried forward — gateway-side
+                    // gaps are rare but real (W-22692131) and clobbering with
+                    // undefined would surface as a transient hole consumers can't
+                    // distinguish from a fresh session.
+                    this.latestUsage = event.usage;
+                }
                 if (event.type === 'finish') {
                     sawFinish = true;
                     finishUsage = event.usage;
@@ -214,19 +246,17 @@ export class DefaultChatSession {
      */
     async approveToolCall(toolCallId, _options) {
         this.assertNotDisposed();
-        const startedAt = this.emitChatStreamStarted('approve-tool-call');
+        // issue #529 contract change: see `submitToolResult` for the rationale.
+        // Settle is a control message on the existing turn; events flow on
+        // the chat()-returned stream.
         try {
-            const result = await this.harness.approveToolCall(this.agentId, this.threadId, toolCallId);
-            this.emitToolApprovalResolved(toolCallId, true);
-            return {
-                textStream: result.textStream,
-                eventStream: this.wrapEventStream(result.eventStream, startedAt),
-            };
+            await this.harness.approveToolCall(this.agentId, this.threadId, toolCallId);
         }
         catch (err) {
-            this.notifyPreStreamError(err, startedAt);
+            this.notifySettleRejection(err);
             throw err;
         }
+        this.emitToolApprovalResolved(toolCallId, true);
     }
     /**
      * @requirements
@@ -241,19 +271,15 @@ export class DefaultChatSession {
      */
     async declineToolCall(toolCallId) {
         this.assertNotDisposed();
-        const startedAt = this.emitChatStreamStarted('decline-tool-call');
+        // issue #529 contract change: see `submitToolResult` for the rationale.
         try {
-            const result = await this.harness.declineToolCall(this.agentId, this.threadId, toolCallId);
-            this.emitToolApprovalResolved(toolCallId, false);
-            return {
-                textStream: result.textStream,
-                eventStream: this.wrapEventStream(result.eventStream, startedAt),
-            };
+            await this.harness.declineToolCall(this.agentId, this.threadId, toolCallId);
         }
         catch (err) {
-            this.notifyPreStreamError(err, startedAt);
+            this.notifySettleRejection(err);
             throw err;
         }
+        this.emitToolApprovalResolved(toolCallId, false);
     }
     /**
      * @requirements
@@ -267,10 +293,53 @@ export class DefaultChatSession {
     /**
      * @requirements
      * - MUST delegate to `this.harness.clearMessages()`, passing `this.agentId` and `this.threadId`.
+     * - MUST reset `latestUsage` to `{}` so the next `getContextUsage()` reports a fresh
+     *   "no reading yet" snapshot until the next turn produces one.
      */
     async clearHistory() {
         this.assertNotDisposed();
         await this.harness.clearMessages(this.agentId, this.threadId);
+        this.latestUsage = {};
+    }
+    /**
+     * @requirements
+     * - MUST always return a populated `ContextUsage`. Pre-first-turn and post-`clearHistory()`,
+     *   `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always
+     *   populated from the agent's currently-bound model.
+     * - MUST read `contextWindow` via the constructor-injected `getContextWindow` getter
+     *   so swaps via `Agent.updateAgentConfig()` are reflected on the next call. Per the
+     *   SDK's Critical Invariant on context-window reachability, every bound model exposes
+     *   a usable `contextWindow`; the getter does not need a defensive try/catch.
+     * - MUST compute `usedFraction = (inputTokens + cachedInputTokens + cacheWriteInputTokens) /
+     *   contextWindow`, clamped to `[0, 1]`. The denominator-numerator must include cached
+     *   tokens because Bedrock-Claude's `message_delta.usage` reports only the *incremental*
+     *   `input_tokens` per delta — the bulk of the prompt rides on `cache_read_input_tokens`
+     *   / `cache_creation_input_tokens` which the Claude adapter surfaces as
+     *   `cachedInputTokens` / `cacheWriteInputTokens`. Those are real tokens the model
+     *   actually loaded into its context window (Bedrock charges for them and counts them
+     *   against the window), so they belong in the "how full" denominator. Mastra is
+     *   unaffected — it doesn't populate the cache fields, so the sum collapses to
+     *   `inputTokens` alone.
+     * - MUST treat `usedFraction` as `undefined` when ALL three input-bearing fields are
+     *   undefined — pre-first-turn, post-`clearHistory()`, or a harness reading with no
+     *   input-side counts at all.
+     */
+    getContextUsage() {
+        this.assertNotDisposed();
+        const contextWindow = this.getContextWindow();
+        const { inputTokens, cachedInputTokens, cacheWriteInputTokens } = this.latestUsage;
+        const allInputUndefined = inputTokens === undefined && cachedInputTokens === undefined && cacheWriteInputTokens === undefined;
+        const effectiveInputTokens = allInputUndefined
+            ? undefined
+            : (inputTokens ?? 0) + (cachedInputTokens ?? 0) + (cacheWriteInputTokens ?? 0);
+        const usedFraction = effectiveInputTokens === undefined
+            ? undefined
+            : Math.min(1, Math.max(0, effectiveInputTokens / contextWindow));
+        // Spread `latestUsage` so consumer mutation of the returned `usage`
+        // object cannot leak back into the session's internal state on a
+        // subsequent `getContextUsage()` call. `UsageMetadata`'s fields are
+        // all primitives, so a shallow copy is sufficient.
+        return { usage: { ...this.latestUsage }, contextWindow, usedFraction };
     }
     /**
      * @requirements
@@ -292,7 +361,15 @@ export class DefaultChatSession {
                     createdAt: this.clock.now(),
                 },
             ]
-            : message;
+            : // `Message.createdAt` is required-on-read, optional-on-write —
+                // the SDK owns the backfill so harnesses see populated
+                // timestamps regardless of consumer-construction style. The
+                // shared `backfillCreatedAt` helper steps per-position via
+                // `clock.nextAfter` so a bulk insert produces strictly-
+                // ascending values. The two production harnesses share the
+                // same helper at their own `addContext` boundary so a
+                // direct `harness.addContext` call gets the same shape.
+                backfillCreatedAt(message, this.clock);
         await this.harness.addContext(this.agentId, this.threadId, messages);
     }
     /**
@@ -444,6 +521,23 @@ export class DefaultChatSession {
             error,
         });
     }
+    /**
+     * issue #529 contract change: a settle call (`approveToolCall` /
+     * `declineToolCall` / `submitToolResult`) rejected. The settle's
+     * Promise is the consumer's primary failure surface, but subscribers
+     * registered via {@link ChatSession.subscribe} also expect to observe
+     * `error + finish` events so a UI bound to the chat stream can
+     * render the failure. Emit those without firing
+     * `chat-stream-error` telemetry — chat-stream-* telemetry is owned
+     * by the chat() lifecycle, not by settle calls (issue #529: one
+     * chat-stream-started/completed/error pair per turn, not per
+     * settle).
+     */
+    notifySettleRejection(err) {
+        const error = err instanceof Error ? err : new Error(String(err));
+        this.chatEventBus.emit({ type: 'error', error });
+        this.chatEventBus.emit({ type: 'finish', finishReason: 'error' });
+    }
     assertNotDisposed() {
         if (this.disposed) {
             throw new AgentSDKError('ChatSession has been disposed.', AgentSDKErrorType.DISPOSED);