@salesforce/sfdx-agent-sdk 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * Copyright 2026, Salesforce, Inc. All rights reserved.
3
3
  * See LICENSE.txt for license terms.
4
4
  */
5
- import { DefaultLLMGatewayClientFactory, Models, createJWTFromConnection, } from '@salesforce/llm-gateway-sdk';
5
+ import { DefaultLLMGatewayClientFactory, Model, ModelName, Models, createClaudeModel, createJWTFromConnection, } from '@salesforce/llm-gateway-sdk';
6
6
  import { SfApiEnv, RealOrgConnectionFactory, } from '@salesforce/agentic-common';
7
7
  // TODO(@W-22782317): Temporary workaround — only on prod orgs the LLM Gateway must
8
8
  // route requests through AgentforceVibes rather than the default VibesService. Remove once a
@@ -46,9 +46,60 @@ export class DefaultAgentConnectivityResolver {
46
46
  const featureId = env === SfApiEnv.Prod ? PROD_ORG_FEATURE_ID : undefined;
47
47
  const orgJwt = await createJWTFromConnection(orgConnection, { featureId });
48
48
  const llmGatewayClient = this.gatewayClientFactory.create(orgJwt, { env });
49
- const modelName = config.modelId ?? Models.getDefault().name;
50
- llmGatewayClient.setModel(Models.getByName(modelName));
49
+ llmGatewayClient.setModel(resolveAgentConfigModel(config.modelId));
51
50
  return { llmGatewayClient, orgConnection, orgJwt };
52
51
  }
53
52
  }
53
+ /**
54
+ * Resolves an `AgentConfig.modelId` value (which may be a {@link ModelName} enum value, a
55
+ * pre-built {@link Model} instance, or `undefined`) to a concrete {@link Model}.
56
+ *
57
+ * The enum branch goes through the strict {@link Models.getByName} registry; the live
58
+ * instance branch passes the consumer-built model through unchanged. A persisted-and-restored
59
+ * `Model` instance arrives here as a plain object (the JSON round-trip drops its prototype),
60
+ * and is rehydrated via {@link createClaudeModel} for Bedrock-Anthropic Claude variants — the
61
+ * single use case the consumer-built escape hatch was added for. Any other persisted shape is
62
+ * a programming error and throws.
63
+ *
64
+ * Exported for use by `Agent.updateAgentConfig`, which performs the same resolution when
65
+ * comparing previous and next models without re-running the full connectivity resolver.
66
+ */
67
+ export function resolveAgentConfigModel(modelId) {
68
+ if (modelId === undefined)
69
+ return Models.getDefault();
70
+ // Known limitation: `instanceof Model` is realm-scoped — a consumer that ends up with two copies
71
+ // of `@salesforce/llm-gateway-sdk` resolved in their dependency tree will have their `Model`
72
+ // instance fail this check and fall through to `rehydratePersistedModel`. That branch handles
73
+ // it correctly for Claude variants but throws for anything else. The duplicate-package case is
74
+ // a packaging bug at the consumer; we don't paper over it here.
75
+ if (modelId instanceof Model)
76
+ return modelId;
77
+ if (typeof modelId === 'string')
78
+ return Models.getByName(modelId);
79
+ return rehydratePersistedModel(modelId);
80
+ }
81
+ function rehydratePersistedModel(persisted) {
82
+ const obj = persisted;
83
+ if (typeof obj.name !== 'string') {
84
+ throw new Error(`Cannot resolve modelId: missing string "name" on persisted object.`);
85
+ }
86
+ // If the persisted name matches an in-tree model, prefer the strict registry — the
87
+ // returned instance has the correct prototype and the canonical caps.
88
+ if (Object.values(ModelName).includes(obj.name)) {
89
+ return Models.getByName(obj.name);
90
+ }
91
+ if (!obj.name.startsWith('llmgateway__BedrockAnthropic')) {
92
+ throw new Error(`Cannot rehydrate persisted model "${obj.name}". Only Bedrock-Anthropic Claude variants are supported via the consumer-built Model escape hatch.`);
93
+ }
94
+ return createClaudeModel(obj.name, {
95
+ displayId: obj.displayId,
96
+ maxInputTokens: obj.maxInputTokens,
97
+ maxOutputTokens: obj.maxOutputTokens,
98
+ contextWindow: obj.contextWindow,
99
+ supportsPromptCache: obj.supportsPromptCache,
100
+ supportedFormats: obj.supportedFormats,
101
+ permittedParameters: obj.permittedParameters,
102
+ customHeaders: obj.customHeaders,
103
+ });
104
+ }
54
105
  //# sourceMappingURL=agent-connectivity-resolver.js.map
package/dist/agent.d.ts CHANGED
@@ -4,7 +4,7 @@ import { type AgentConfig } from './harness/harness-config.js';
4
4
  import { type ChatSession } from './chat-session.js';
5
5
  import type { McpServerInfo } from './mcp-config.js';
6
6
  import { type JSONWebToken, type LLMGatewayClient } from '@salesforce/llm-gateway-sdk';
7
- import type { AgentConnectivityResolver } from './agent-connectivity-resolver.js';
7
+ import { type AgentConnectivityResolver } from './agent-connectivity-resolver.js';
8
8
  import type { AgentIdentityStore } from './internal/agent-identity-store.js';
9
9
  import type { TelemetryRouter, TelemetrySlice } from './internal/telemetry-router.js';
10
10
  import type { TelemetryBus, TelemetryEventCallback } from './types/telemetry-events.js';
package/dist/agent.js CHANGED
@@ -5,7 +5,8 @@
5
5
  import { EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
6
6
  import { toHarnessConfig } from './harness/harness-config.js';
7
7
  import { DefaultChatSession } from './chat-session.js';
8
- import { Models } from '@salesforce/llm-gateway-sdk';
8
+ import {} from '@salesforce/llm-gateway-sdk';
9
+ import { resolveAgentConfigModel } from './agent-connectivity-resolver.js';
9
10
  import { AgentSDKError, AgentSDKErrorType } from './errors.js';
10
11
  /**
11
12
  * Default implementation of {@link Agent} that delegates
@@ -112,8 +113,8 @@ export class DefaultAgent {
112
113
  const previousOrgJwt = this.orgJwt;
113
114
  const nextConfig = { ...this.config, ...config };
114
115
  const orgAliasRequested = Object.prototype.hasOwnProperty.call(config, 'orgAlias');
115
- const previousModelName = previousClient.getModel().name;
116
- const nextModelName = nextConfig.modelId ?? Models.getDefault().name;
116
+ const previousModel = previousClient.getModel();
117
+ const nextModel = resolveAgentConfigModel(nextConfig.modelId);
117
118
  let nextClient = previousClient;
118
119
  let nextConnection = this.orgConnection;
119
120
  let nextOrgJwt = this.orgJwt;
@@ -123,14 +124,16 @@ export class DefaultAgent {
123
124
  nextConnection = runtime.orgConnection;
124
125
  nextOrgJwt = runtime.orgJwt;
125
126
  }
126
- else if (nextModelName !== previousModelName) {
127
+ else if (nextModel.name !== previousModel.name) {
127
128
  // Keep the same authenticated client, but pin the updated model.
128
129
  // (If modelId is omitted, the resolver pinned the default at creation time.)
129
- nextClient.setModel(Models.getByName(nextModelName));
130
+ nextClient.setModel(nextModel);
130
131
  }
131
132
  await this.harness.destroyAgent(this.agentId);
133
+ let nextConfigRegistered = false;
132
134
  try {
133
135
  await this.harness.createAgent(this.agentId, this.projectRoot, nextClient, toHarnessConfig(nextConfig, nextOrgJwt), options);
136
+ nextConfigRegistered = true;
134
137
  // Persist before the in-memory swaps so a write failure flows through the same
135
138
  // catch block as a recreate failure: the rollback restores the harness with
136
139
  // previousConfig and disk state remains the pre-update record.
@@ -148,16 +151,21 @@ export class DefaultAgent {
148
151
  catch (error) {
149
152
  // Best-effort restoration to keep wrapper and harness state aligned.
150
153
  try {
151
- // Restore client model if we mutated it in-place.
154
+ // Restore client model if we mutated it in-place. We re-pin the live previousModel
155
+ // instance (captured above as previousClient.getModel()) rather than re-resolving from
156
+ // this.config.modelId, because a JSON-rehydrated config may have a plain object there
157
+ // that would round-trip through createClaudeModel and lose the original prototype.
152
158
  if (nextClient === previousClient) {
153
- previousClient.setModel(Models.getByName(previousModelName));
159
+ previousClient.setModel(previousModel);
160
+ }
161
+ // Clear nextConfig registration only when the harness recreate
162
+ // actually succeeded (identityStore.write-failure path) — the
163
+ // harness throws on unknown id, so calling destroyAgent on the
164
+ // harness-recreate-failure path would short-circuit the rollback
165
+ // createAgent below.
166
+ if (nextConfigRegistered) {
167
+ await this.harness.destroyAgent(this.agentId);
154
168
  }
155
- // Clear any nextConfig registration left behind by a successful harness recreate
156
- // before the rollback createAgent runs. On the harness-recreate-failure path this
157
- // is a no-op (the agent was never registered with nextConfig); on the
158
- // identityStore.write-failure path it removes the live nextConfig so the rollback
159
- // doesn't trip the harness's duplicate-registration guard.
160
- await this.harness.destroyAgent(this.agentId);
161
169
  await this.harness.createAgent(this.agentId, this.projectRoot, previousClient, toHarnessConfig(previousConfig, previousOrgJwt));
162
170
  }
163
171
  catch {
@@ -319,10 +327,18 @@ export class DefaultAgent {
319
327
  }
320
328
  attachSession(threadId) {
321
329
  const slice = this.router.registerSession(threadId);
330
+ // Live getter — read at call time so getContextUsage() reflects the
331
+ // model bound to the agent right now, not the model that was bound
332
+ // when this session was created. updateAgentConfig() can swap the
333
+ // underlying LLMGatewayClient mid-life. Per the SDK's Critical
334
+ // Invariant on context-window reachability, every bound model
335
+ // exposes a usable `contextWindow`; #507's decoupling work must
336
+ // preserve that, so this access is contractually safe.
337
+ const getContextWindow = () => this.llmGatewayClient.getModel().contextWindow;
322
338
  const session = new DefaultChatSession(this.harness, this.agentId, threadId, slice, {
323
339
  telemetry: this.telemetryBus,
324
340
  log: this.logBus,
325
- }, this.clock, this.idGenerator);
341
+ }, getContextWindow, this.clock, this.idGenerator);
326
342
  this.sessions.set(threadId, session);
327
343
  this.sessionSliceUnregisters.set(threadId, () => this.router.unregisterSession(threadId));
328
344
  this.telemetryBus.emit({
@@ -6,6 +6,7 @@ import type { ChatEvent, ChatStreamResult } from './types/events.js';
6
6
  import type { Message, MessagePart } from './types/messages.js';
7
7
  import type { TelemetryBus, TelemetryEventCallback } from './types/telemetry-events.js';
8
8
  import type { ToolResultInfo } from './types/tools.js';
9
+ import type { ContextUsage } from './types/usage.js';
9
10
  /**
10
11
  * Options for a single chat interaction.
11
12
  */
@@ -61,11 +62,17 @@ export interface ChatSession {
61
62
  *
62
63
  * "Client-side tool" means a tool you declared in {@link AgentConfig.tools}
63
64
  * without an `execute` function — the SDK registers its name + schema with the
64
- * model but does not run it. When the model calls one, the chat eventStream
65
- * emits a `tool-call` event and ends with `finishReason: 'tool-calls'`. Your
66
- * application runs the tool however it likes (HTTP call, DB query, UI prompt,
67
- * etc.) and calls this method with the result; the agent loop resumes and
68
- * produces its next turn on the returned `ChatStreamResult.eventStream`.
65
+ * model but does not run it. When the model calls one, the chat `eventStream`
66
+ * emits a `tool-call` event. Your application runs the tool however it likes
67
+ * (HTTP call, DB query, UI prompt, etc.) and calls this method with the result;
68
+ * the agent loop resumes and the post-resume events (`tool-result`, model
69
+ * follow-up text, terminal `finish`) arrive on the **same** `eventStream`
70
+ * the original {@link chat} call returned. The consumer keeps iterating it.
71
+ *
72
+ * Returns `Promise<void>` once the harness has accepted the result. The
73
+ * promise rejects on pre-stream failure (the `chat()`-returned subscribers
74
+ * still observe `ErrorEvent` + `FinishEvent` before the rejection so the
75
+ * subscribe-side contract holds).
69
76
  *
70
77
  * Use this method ONLY for client-side tools. Tools provided via
71
78
  * {@link AgentConfig.mcpServers} are executed by the harness — their results
@@ -73,23 +80,22 @@ export interface ChatSession {
73
80
  * Human-in-the-loop approval of harness-executed tools uses
74
81
  * {@link approveToolCall} / {@link declineToolCall}, not this method.
75
82
  *
76
- * On pre-stream failure, subscribers are notified with `ErrorEvent` + `FinishEvent` before
77
- * the returned promise rejects. See the interface-level "Failure handling" notes for details.
78
- *
79
83
  * @param toolResult - The completed tool execution result. `toolCallId` and
80
84
  * `toolName` MUST match the values from the originating `tool-call` event.
81
85
  */
82
- submitToolResult(toolResult: ToolResultInfo): Promise<ChatStreamResult>;
86
+ submitToolResult(toolResult: ToolResultInfo): Promise<void>;
83
87
  /**
84
88
  * Approve a pending tool call, allowing the harness to execute it.
85
89
  * Called after receiving a `tool-approval-request` event from the stream.
86
90
  *
87
- * Returns a `ChatStreamResult` containing the continuation stream the harness
88
- * executes the approved tool, generates the model's follow-up response, and
89
- * streams both the text and events back to the caller.
91
+ * Returns `Promise<void>` once the harness has accepted the approval. The
92
+ * harness then executes the tool and emits the resulting events
93
+ * (`tool-result`, model follow-up text, terminal `finish`) on the **same**
94
+ * `eventStream` the original {@link chat} call returned. The consumer keeps
95
+ * iterating it.
90
96
  *
91
- * On pre-stream failure, subscribers are notified with `ErrorEvent` + `FinishEvent` before
92
- * the returned promise rejects. See the interface-level "Failure handling" notes for details.
97
+ * The promise rejects on pre-stream failure; subscribers still observe
98
+ * `ErrorEvent` + `FinishEvent` on the chat stream before the rejection.
93
99
  *
94
100
  * @param toolCallId - ID of the pending tool call to approve.
95
101
  * @param options - Optional approval metadata.
@@ -100,21 +106,20 @@ export interface ChatSession {
100
106
  */
101
107
  approveToolCall(toolCallId: string, options?: {
102
108
  remember?: boolean;
103
- }): Promise<ChatStreamResult>;
109
+ }): Promise<void>;
104
110
  /**
105
111
  * Decline a pending tool call. The stream resumes with the model
106
- * acknowledging the decline and potentially suggesting alternatives.
112
+ * acknowledging the decline and potentially suggesting alternatives
113
+ * those events arrive on the **same** `eventStream` the original
114
+ * {@link chat} call returned.
107
115
  *
108
- * Returns a `ChatStreamResult` containing the continuation stream the harness
109
- * cancels the pending tool call, generates the model's acknowledgement response,
110
- * and streams both the text and events back to the caller.
111
- *
112
- * On pre-stream failure, subscribers are notified with `ErrorEvent` + `FinishEvent` before
113
- * the returned promise rejects. See the interface-level "Failure handling" notes for details.
116
+ * Returns `Promise<void>` once the harness has accepted the decline. The
117
+ * promise rejects on pre-stream failure; subscribers still observe
118
+ * `ErrorEvent` + `FinishEvent` on the chat stream before the rejection.
114
119
  *
115
120
  * @param toolCallId - ID of the pending tool call to decline.
116
121
  */
117
- declineToolCall(toolCallId: string): Promise<ChatStreamResult>;
122
+ declineToolCall(toolCallId: string): Promise<void>;
118
123
  /**
119
124
  * Retrieve message history for this session.
120
125
  *
@@ -123,6 +128,25 @@ export interface ChatSession {
123
128
  getMessageHistory(): Promise<Message[]>;
124
129
  /** Delete all messages in this session's history. */
125
130
  clearHistory(): Promise<void>;
131
+ /**
132
+ * Snapshot of how much of the model's context window the most recent
133
+ * turn used. Always returns a `ContextUsage` — pre-first-turn and
134
+ * immediately after `clearHistory()`, `usage` is `{}` and `usedFraction`
135
+ * is `undefined`, but `contextWindow` is always populated from the
136
+ * agent's currently-bound model.
137
+ *
138
+ * `usage` carries the **last per-step** reading from the model — the
139
+ * size of the prompt the model saw on its most recent invocation,
140
+ * which is the right "how full is my context" answer for deciding
141
+ * when to call `compactThread()`. This is **not** the per-turn billing
142
+ * aggregate; consumers who want billing totals should subscribe to
143
+ * `chat-stream-completed` telemetry.
144
+ *
145
+ * The `contextWindow` is read live from the agent's currently-bound
146
+ * model, so it reflects any `Agent.updateAgentConfig()` model swap
147
+ * that happened between turns.
148
+ */
149
+ getContextUsage(): ContextUsage;
126
150
  /**
127
151
  * Inject context messages into the thread without triggering an LLM response.
128
152
  * Useful for seeding file contents, system instructions, or prior conversation
@@ -176,6 +200,23 @@ export declare class DefaultChatSession implements ChatSession {
176
200
  * are stale and should not bleed into the next turn).
177
201
  */
178
202
  private readonly toolStartMs;
203
+ /**
204
+ * Live getter for the agent's currently-bound model's context window.
205
+ * Called by {@link getContextUsage} so reads reflect the model in
206
+ * effect right now, not the model bound when this session was created
207
+ * (an `Agent.updateAgentConfig()` swap can change it mid-life).
208
+ */
209
+ private readonly getContextWindow;
210
+ /**
211
+ * Last per-step usage reading observed on this session. Initialized
212
+ * to `{}` (every token field undefined) so {@link getContextUsage}
213
+ * can always return a populated `ContextUsage`. Updated on every
214
+ * `step-finish` ChatEvent whose `usage` is defined; an undefined
215
+ * usage is carried forward (defense against rare gateway-side gaps —
216
+ * see W-22692131). Reset to `{}` on `clearHistory()` so a fresh
217
+ * thread starts unprimed.
218
+ */
219
+ private latestUsage;
179
220
  private disposed;
180
221
  /**
181
222
  * @param harness - The agent harness managing thread and message lifecycle.
@@ -183,10 +224,12 @@ export declare class DefaultChatSession implements ChatSession {
183
224
  * @param threadId - ID of the conversation thread backing this session.
184
225
  * @param inbound - Router slice delivering harness events routed to this session.
185
226
  * @param parent - Parent agent's buses; this session forwards its events upward into them.
227
+ * @param getContextWindow - Live getter for the agent's currently-bound model's `contextWindow`.
228
+ * Called by `getContextUsage()` so reads stay correct across `Agent.updateAgentConfig()` model swaps.
186
229
  * @param clock - Source of monotonic timestamps for telemetry events. Defaults to `RealClock`.
187
230
  * @param idGenerator - Source of message ids for `addContext()`. Defaults to `UUIDGenerator`.
188
231
  */
189
- constructor(harness: AgentHarness, agentId: string, threadId: string, inbound: TelemetrySlice, parent: ChatSessionParentBuses, clock?: Clock, idGenerator?: UniqueIDGenerator);
232
+ constructor(harness: AgentHarness, agentId: string, threadId: string, inbound: TelemetrySlice, parent: ChatSessionParentBuses, getContextWindow: () => number, clock?: Clock, idGenerator?: UniqueIDGenerator);
190
233
  getId(): string;
191
234
  /**
192
235
  * @requirements
@@ -207,7 +250,7 @@ export declare class DefaultChatSession implements ChatSession {
207
250
  * - MUST notify listeners with `ErrorEvent` + `FinishEvent` and re-throw if the harness throws
208
251
  * before returning a stream result.
209
252
  */
210
- submitToolResult(toolResult: ToolResultInfo): Promise<ChatStreamResult>;
253
+ submitToolResult(toolResult: ToolResultInfo): Promise<void>;
211
254
  /**
212
255
  * @requirements
213
256
  * - MUST yield each event from the provided `stream`.
@@ -251,7 +294,7 @@ export declare class DefaultChatSession implements ChatSession {
251
294
  */
252
295
  approveToolCall(toolCallId: string, _options?: {
253
296
  remember?: boolean;
254
- }): Promise<ChatStreamResult>;
297
+ }): Promise<void>;
255
298
  /**
256
299
  * @requirements
257
300
  * - MUST delegate to `this.harness.declineToolCall()`, passing `this.agentId`, `this.threadId`, and `toolCallId`.
@@ -263,7 +306,7 @@ export declare class DefaultChatSession implements ChatSession {
263
306
  * - MUST notify listeners with `ErrorEvent` + `FinishEvent` and re-throw if the harness throws
264
307
  * before returning a stream result.
265
308
  */
266
- declineToolCall(toolCallId: string): Promise<ChatStreamResult>;
309
+ declineToolCall(toolCallId: string): Promise<void>;
267
310
  /**
268
311
  * @requirements
269
312
  * - MUST delegate to `this.harness.getMessages()`, passing `this.agentId` and `this.threadId`.
@@ -273,8 +316,34 @@ export declare class DefaultChatSession implements ChatSession {
273
316
  /**
274
317
  * @requirements
275
318
  * - MUST delegate to `this.harness.clearMessages()`, passing `this.agentId` and `this.threadId`.
319
+ * - MUST reset `latestUsage` to `{}` so the next `getContextUsage()` reports a fresh
320
+ * "no reading yet" snapshot until the next turn produces one.
276
321
  */
277
322
  clearHistory(): Promise<void>;
323
+ /**
324
+ * @requirements
325
+ * - MUST always return a populated `ContextUsage`. Pre-first-turn and post-`clearHistory()`,
326
+ * `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always
327
+ * populated from the agent's currently-bound model.
328
+ * - MUST read `contextWindow` via the constructor-injected `getContextWindow` getter
329
+ * so swaps via `Agent.updateAgentConfig()` are reflected on the next call. Per the
330
+ * SDK's Critical Invariant on context-window reachability, every bound model exposes
331
+ * a usable `contextWindow`; the getter does not need a defensive try/catch.
332
+ * - MUST compute `usedFraction = (inputTokens + cachedInputTokens + cacheWriteInputTokens) /
333
+ * contextWindow`, clamped to `[0, 1]`. The denominator-numerator must include cached
334
+ * tokens because Bedrock-Claude's `message_delta.usage` reports only the *incremental*
335
+ * `input_tokens` per delta — the bulk of the prompt rides on `cache_read_input_tokens`
336
+ * / `cache_creation_input_tokens` which the Claude adapter surfaces as
337
+ * `cachedInputTokens` / `cacheWriteInputTokens`. Those are real tokens the model
338
+ * actually loaded into its context window (Bedrock charges for them and counts them
339
+ * against the window), so they belong in the "how full" denominator. Mastra is
340
+ * unaffected — it doesn't populate the cache fields, so the sum collapses to
341
+ * `inputTokens` alone.
342
+ * - MUST treat `usedFraction` as `undefined` when ALL three input-bearing fields are
343
+ * undefined — pre-first-turn, post-`clearHistory()`, or a harness reading with no
344
+ * input-side counts at all.
345
+ */
346
+ getContextUsage(): ContextUsage;
278
347
  /**
279
348
  * @requirements
280
349
  * - IF `message` is a `string`, it MUST be formatted into a standard `Message` object array containing exactly one message.
@@ -330,5 +399,18 @@ export declare class DefaultChatSession implements ChatSession {
330
399
  * measures real elapsed time even for pre-stream rejections.
331
400
  */
332
401
  private notifyPreStreamError;
402
+ /**
403
+ * issue #529 contract change: a settle call (`approveToolCall` /
404
+ * `declineToolCall` / `submitToolResult`) rejected. The settle's
405
+ * Promise is the consumer's primary failure surface, but subscribers
406
+ * registered via {@link ChatSession.subscribe} also expect to observe
407
+ * `error + finish` events so a UI bound to the chat stream can
408
+ * render the failure. Emit those without firing
409
+ * `chat-stream-error` telemetry — chat-stream-* telemetry is owned
410
+ * by the chat() lifecycle, not by settle calls (issue #529: one
411
+ * chat-stream-started/completed/error pair per turn, not per
412
+ * settle).
413
+ */
414
+ private notifySettleRejection;
333
415
  private assertNotDisposed;
334
416
  }
@@ -2,7 +2,7 @@
2
2
  * Copyright 2026, Salesforce, Inc. All rights reserved.
3
3
  * See LICENSE.txt for license terms.
4
4
  */
5
- import { EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
5
+ import { backfillCreatedAt, EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
6
6
  import { AgentSDKError, AgentSDKErrorType } from './errors.js';
7
7
  /**
8
8
  * Default implementation of {@link ChatSession} that delegates all operations
@@ -31,6 +31,23 @@ export class DefaultChatSession {
31
31
  * are stale and should not bleed into the next turn).
32
32
  */
33
33
  toolStartMs = new Map();
34
+ /**
35
+ * Live getter for the agent's currently-bound model's context window.
36
+ * Called by {@link getContextUsage} so reads reflect the model in
37
+ * effect right now, not the model bound when this session was created
38
+ * (an `Agent.updateAgentConfig()` swap can change it mid-life).
39
+ */
40
+ getContextWindow;
41
+ /**
42
+ * Last per-step usage reading observed on this session. Initialized
43
+ * to `{}` (every token field undefined) so {@link getContextUsage}
44
+ * can always return a populated `ContextUsage`. Updated on every
45
+ * `step-finish` ChatEvent whose `usage` is defined; an undefined
46
+ * usage is carried forward (defense against rare gateway-side gaps —
47
+ * see W-22692131). Reset to `{}` on `clearHistory()` so a fresh
48
+ * thread starts unprimed.
49
+ */
50
+ latestUsage = {};
34
51
  disposed = false;
35
52
  /**
36
53
  * @param harness - The agent harness managing thread and message lifecycle.
@@ -38,13 +55,16 @@ export class DefaultChatSession {
38
55
  * @param threadId - ID of the conversation thread backing this session.
39
56
  * @param inbound - Router slice delivering harness events routed to this session.
40
57
  * @param parent - Parent agent's buses; this session forwards its events upward into them.
58
+ * @param getContextWindow - Live getter for the agent's currently-bound model's `contextWindow`.
59
+ * Called by `getContextUsage()` so reads stay correct across `Agent.updateAgentConfig()` model swaps.
41
60
  * @param clock - Source of monotonic timestamps for telemetry events. Defaults to `RealClock`.
42
61
  * @param idGenerator - Source of message ids for `addContext()`. Defaults to `UUIDGenerator`.
43
62
  */
44
- constructor(harness, agentId, threadId, inbound, parent, clock = new RealClock(), idGenerator = new UUIDGenerator()) {
63
+ constructor(harness, agentId, threadId, inbound, parent, getContextWindow, clock = new RealClock(), idGenerator = new UUIDGenerator()) {
45
64
  this.harness = harness;
46
65
  this.agentId = agentId;
47
66
  this.threadId = threadId;
67
+ this.getContextWindow = getContextWindow;
48
68
  this.clock = clock;
49
69
  this.idGenerator = idGenerator;
50
70
  this.inboundUnsubs = [inbound.telemetry.forwardTo(this.telemetryBus), inbound.log.forwardTo(this.logBus)];
@@ -89,16 +109,16 @@ export class DefaultChatSession {
89
109
  */
90
110
  async submitToolResult(toolResult) {
91
111
  this.assertNotDisposed();
92
- const startedAt = this.emitChatStreamStarted('submit-tool-result');
112
+ // issue #529 contract change: settle calls are control messages on the
113
+ // existing chat() turn's stream — they don't open a new stream and
114
+ // they don't emit chat-stream-started/completed. The post-resume
115
+ // events flow through the harness's existing turn sink, which the
116
+ // consumer's chat()-returned eventStream is already iterating.
93
117
  try {
94
- const result = await this.harness.submitToolResult(this.agentId, this.threadId, toolResult);
95
- return {
96
- textStream: result.textStream,
97
- eventStream: this.wrapEventStream(result.eventStream, startedAt),
98
- };
118
+ await this.harness.submitToolResult(this.agentId, this.threadId, toolResult);
99
119
  }
100
120
  catch (err) {
101
- this.notifyPreStreamError(err, startedAt);
121
+ this.notifySettleRejection(err);
102
122
  throw err;
103
123
  }
104
124
  }
@@ -138,6 +158,18 @@ export class DefaultChatSession {
138
158
  this.chatEventBus.emit(event);
139
159
  this.deriveToolTelemetry(event);
140
160
  yield event;
161
+ if (event.type === 'step-finish' && event.usage !== undefined) {
162
+ // Snapshot the most recent per-step usage. Last-step semantics
163
+ // (not the per-turn `finish.usage` aggregate) — `finish.usage`
164
+ // sums every step inside the turn and double-counts persistent
165
+ // context, which is the wrong denominator for "how full is my
166
+ // context". An undefined usage on this step is intentionally
167
+ // ignored so the prior reading is carried forward — gateway-side
168
+ // gaps are rare but real (W-22692131) and clobbering with
169
+ // undefined would surface as a transient hole consumers can't
170
+ // distinguish from a fresh session.
171
+ this.latestUsage = event.usage;
172
+ }
141
173
  if (event.type === 'finish') {
142
174
  sawFinish = true;
143
175
  finishUsage = event.usage;
@@ -214,19 +246,17 @@ export class DefaultChatSession {
214
246
  */
215
247
  async approveToolCall(toolCallId, _options) {
216
248
  this.assertNotDisposed();
217
- const startedAt = this.emitChatStreamStarted('approve-tool-call');
249
+ // issue #529 contract change: see `submitToolResult` for the rationale.
250
+ // Settle is a control message on the existing turn; events flow on
251
+ // the chat()-returned stream.
218
252
  try {
219
- const result = await this.harness.approveToolCall(this.agentId, this.threadId, toolCallId);
220
- this.emitToolApprovalResolved(toolCallId, true);
221
- return {
222
- textStream: result.textStream,
223
- eventStream: this.wrapEventStream(result.eventStream, startedAt),
224
- };
253
+ await this.harness.approveToolCall(this.agentId, this.threadId, toolCallId);
225
254
  }
226
255
  catch (err) {
227
- this.notifyPreStreamError(err, startedAt);
256
+ this.notifySettleRejection(err);
228
257
  throw err;
229
258
  }
259
+ this.emitToolApprovalResolved(toolCallId, true);
230
260
  }
231
261
  /**
232
262
  * @requirements
@@ -241,19 +271,15 @@ export class DefaultChatSession {
241
271
  */
242
272
  async declineToolCall(toolCallId) {
243
273
  this.assertNotDisposed();
244
- const startedAt = this.emitChatStreamStarted('decline-tool-call');
274
+ // issue #529 contract change: see `submitToolResult` for the rationale.
245
275
  try {
246
- const result = await this.harness.declineToolCall(this.agentId, this.threadId, toolCallId);
247
- this.emitToolApprovalResolved(toolCallId, false);
248
- return {
249
- textStream: result.textStream,
250
- eventStream: this.wrapEventStream(result.eventStream, startedAt),
251
- };
276
+ await this.harness.declineToolCall(this.agentId, this.threadId, toolCallId);
252
277
  }
253
278
  catch (err) {
254
- this.notifyPreStreamError(err, startedAt);
279
+ this.notifySettleRejection(err);
255
280
  throw err;
256
281
  }
282
+ this.emitToolApprovalResolved(toolCallId, false);
257
283
  }
258
284
  /**
259
285
  * @requirements
@@ -267,10 +293,53 @@ export class DefaultChatSession {
267
293
  /**
268
294
  * @requirements
269
295
  * - MUST delegate to `this.harness.clearMessages()`, passing `this.agentId` and `this.threadId`.
296
+ * - MUST reset `latestUsage` to `{}` so the next `getContextUsage()` reports a fresh
297
+ * "no reading yet" snapshot until the next turn produces one.
270
298
  */
271
299
  async clearHistory() {
272
300
  this.assertNotDisposed();
273
301
  await this.harness.clearMessages(this.agentId, this.threadId);
302
+ this.latestUsage = {};
303
+ }
304
+ /**
305
+ * @requirements
306
+ * - MUST always return a populated `ContextUsage`. Pre-first-turn and post-`clearHistory()`,
307
+ * `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always
308
+ * populated from the agent's currently-bound model.
309
+ * - MUST read `contextWindow` via the constructor-injected `getContextWindow` getter
310
+ * so swaps via `Agent.updateAgentConfig()` are reflected on the next call. Per the
311
+ * SDK's Critical Invariant on context-window reachability, every bound model exposes
312
+ * a usable `contextWindow`; the getter does not need a defensive try/catch.
313
+ * - MUST compute `usedFraction = (inputTokens + cachedInputTokens + cacheWriteInputTokens) /
314
+ * contextWindow`, clamped to `[0, 1]`. The denominator-numerator must include cached
315
+ * tokens because Bedrock-Claude's `message_delta.usage` reports only the *incremental*
316
+ * `input_tokens` per delta — the bulk of the prompt rides on `cache_read_input_tokens`
317
+ * / `cache_creation_input_tokens` which the Claude adapter surfaces as
318
+ * `cachedInputTokens` / `cacheWriteInputTokens`. Those are real tokens the model
319
+ * actually loaded into its context window (Bedrock charges for them and counts them
320
+ * against the window), so they belong in the "how full" denominator. Mastra is
321
+ * unaffected — it doesn't populate the cache fields, so the sum collapses to
322
+ * `inputTokens` alone.
323
+ * - MUST treat `usedFraction` as `undefined` when ALL three input-bearing fields are
324
+ * undefined — pre-first-turn, post-`clearHistory()`, or a harness reading with no
325
+ * input-side counts at all.
326
+ */
327
+ getContextUsage() {
328
+ this.assertNotDisposed();
329
+ const contextWindow = this.getContextWindow();
330
+ const { inputTokens, cachedInputTokens, cacheWriteInputTokens } = this.latestUsage;
331
+ const allInputUndefined = inputTokens === undefined && cachedInputTokens === undefined && cacheWriteInputTokens === undefined;
332
+ const effectiveInputTokens = allInputUndefined
333
+ ? undefined
334
+ : (inputTokens ?? 0) + (cachedInputTokens ?? 0) + (cacheWriteInputTokens ?? 0);
335
+ const usedFraction = effectiveInputTokens === undefined
336
+ ? undefined
337
+ : Math.min(1, Math.max(0, effectiveInputTokens / contextWindow));
338
+ // Spread `latestUsage` so consumer mutation of the returned `usage`
339
+ // object cannot leak back into the session's internal state on a
340
+ // subsequent `getContextUsage()` call. `UsageMetadata`'s fields are
341
+ // all primitives, so a shallow copy is sufficient.
342
+ return { usage: { ...this.latestUsage }, contextWindow, usedFraction };
274
343
  }
275
344
  /**
276
345
  * @requirements
@@ -292,7 +361,15 @@ export class DefaultChatSession {
292
361
  createdAt: this.clock.now(),
293
362
  },
294
363
  ]
295
- : message;
364
+ : // `Message.createdAt` is required-on-read, optional-on-write —
365
+ // the SDK owns the backfill so harnesses see populated
366
+ // timestamps regardless of consumer-construction style. The
367
+ // shared `backfillCreatedAt` helper steps per-position via
368
+ // `clock.nextAfter` so a bulk insert produces strictly-
369
+ // ascending values. The two production harnesses share the
370
+ // same helper at their own `addContext` boundary so a
371
+ // direct `harness.addContext` call gets the same shape.
372
+ backfillCreatedAt(message, this.clock);
296
373
  await this.harness.addContext(this.agentId, this.threadId, messages);
297
374
  }
298
375
  /**
@@ -444,6 +521,23 @@ export class DefaultChatSession {
444
521
  error,
445
522
  });
446
523
  }
524
+ /**
525
+ * issue #529 contract change: a settle call (`approveToolCall` /
526
+ * `declineToolCall` / `submitToolResult`) rejected. The settle's
527
+ * Promise is the consumer's primary failure surface, but subscribers
528
+ * registered via {@link ChatSession.subscribe} also expect to observe
529
+ * `error + finish` events so a UI bound to the chat stream can
530
+ * render the failure. Emit those without firing
531
+ * `chat-stream-error` telemetry — chat-stream-* telemetry is owned
532
+ * by the chat() lifecycle, not by settle calls (issue #529: one
533
+ * chat-stream-started/completed/error pair per turn, not per
534
+ * settle).
535
+ */
536
+ notifySettleRejection(err) {
537
+ const error = err instanceof Error ? err : new Error(String(err));
538
+ this.chatEventBus.emit({ type: 'error', error });
539
+ this.chatEventBus.emit({ type: 'finish', finishReason: 'error' });
540
+ }
447
541
  assertNotDisposed() {
448
542
  if (this.disposed) {
449
543
  throw new AgentSDKError('ChatSession has been disposed.', AgentSDKErrorType.DISPOSED);