@salesforce/sfdx-agent-sdk 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +255 -100
- package/dist/agent-connectivity-resolver.d.ts +16 -1
- package/dist/agent-connectivity-resolver.js +54 -3
- package/dist/agent.d.ts +1 -1
- package/dist/agent.js +30 -14
- package/dist/chat-session.d.ts +109 -27
- package/dist/chat-session.js +120 -26
- package/dist/harness/agent-harness.d.ts +59 -22
- package/dist/harness/gen-sink.d.ts +41 -0
- package/dist/harness/gen-sink.js +88 -0
- package/dist/harness/harness-config.d.ts +10 -3
- package/dist/harness/index.d.ts +1 -0
- package/dist/harness/index.js +1 -0
- package/dist/harness/public.d.ts +49 -0
- package/dist/harness/public.js +10 -0
- package/dist/index.d.ts +3 -5
- package/dist/index.js +1 -4
- package/dist/mcp-config.d.ts +26 -0
- package/dist/types/events.d.ts +1 -14
- package/dist/types/messages.d.ts +12 -1
- package/dist/types/usage.d.ts +65 -0
- package/package.json +9 -5
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Copyright 2026, Salesforce, Inc. All rights reserved.
|
|
3
3
|
* See LICENSE.txt for license terms.
|
|
4
4
|
*/
|
|
5
|
-
import { DefaultLLMGatewayClientFactory, Models, createJWTFromConnection, } from '@salesforce/llm-gateway-sdk';
|
|
5
|
+
import { DefaultLLMGatewayClientFactory, Model, ModelName, Models, createClaudeModel, createJWTFromConnection, } from '@salesforce/llm-gateway-sdk';
|
|
6
6
|
import { SfApiEnv, RealOrgConnectionFactory, } from '@salesforce/agentic-common';
|
|
7
7
|
// TODO(@W-22782317): Temporary workaround — only on prod orgs the LLM Gateway must
|
|
8
8
|
// route requests through AgentforceVibes rather than the default VibesService. Remove once a
|
|
@@ -46,9 +46,60 @@ export class DefaultAgentConnectivityResolver {
|
|
|
46
46
|
const featureId = env === SfApiEnv.Prod ? PROD_ORG_FEATURE_ID : undefined;
|
|
47
47
|
const orgJwt = await createJWTFromConnection(orgConnection, { featureId });
|
|
48
48
|
const llmGatewayClient = this.gatewayClientFactory.create(orgJwt, { env });
|
|
49
|
-
|
|
50
|
-
llmGatewayClient.setModel(Models.getByName(modelName));
|
|
49
|
+
llmGatewayClient.setModel(resolveAgentConfigModel(config.modelId));
|
|
51
50
|
return { llmGatewayClient, orgConnection, orgJwt };
|
|
52
51
|
}
|
|
53
52
|
}
|
|
53
|
+
/**
|
|
54
|
+
* Resolves an `AgentConfig.modelId` value (which may be a {@link ModelName} enum value, a
|
|
55
|
+
* pre-built {@link Model} instance, or `undefined`) to a concrete {@link Model}.
|
|
56
|
+
*
|
|
57
|
+
* The enum branch goes through the strict {@link Models.getByName} registry; the live
|
|
58
|
+
* instance branch passes the consumer-built model through unchanged. A persisted-and-restored
|
|
59
|
+
* `Model` instance arrives here as a plain object (the JSON round-trip drops its prototype),
|
|
60
|
+
* and is rehydrated via {@link createClaudeModel} for Bedrock-Anthropic Claude variants — the
|
|
61
|
+
* single use case the consumer-built escape hatch was added for. Any other persisted shape is
|
|
62
|
+
* a programming error and throws.
|
|
63
|
+
*
|
|
64
|
+
* Exported for use by `Agent.updateAgentConfig`, which performs the same resolution when
|
|
65
|
+
* comparing previous and next models without re-running the full connectivity resolver.
|
|
66
|
+
*/
|
|
67
|
+
export function resolveAgentConfigModel(modelId) {
|
|
68
|
+
if (modelId === undefined)
|
|
69
|
+
return Models.getDefault();
|
|
70
|
+
// Known limitation: `instanceof Model` is realm-scoped — a consumer that ends up with two copies
|
|
71
|
+
// of `@salesforce/llm-gateway-sdk` resolved in their dependency tree will have their `Model`
|
|
72
|
+
// instance fail this check and fall through to `rehydratePersistedModel`. That branch handles
|
|
73
|
+
// it correctly for Claude variants but throws for anything else. The duplicate-package case is
|
|
74
|
+
// a packaging bug at the consumer; we don't paper over it here.
|
|
75
|
+
if (modelId instanceof Model)
|
|
76
|
+
return modelId;
|
|
77
|
+
if (typeof modelId === 'string')
|
|
78
|
+
return Models.getByName(modelId);
|
|
79
|
+
return rehydratePersistedModel(modelId);
|
|
80
|
+
}
|
|
81
|
+
function rehydratePersistedModel(persisted) {
|
|
82
|
+
const obj = persisted;
|
|
83
|
+
if (typeof obj.name !== 'string') {
|
|
84
|
+
throw new Error(`Cannot resolve modelId: missing string "name" on persisted object.`);
|
|
85
|
+
}
|
|
86
|
+
// If the persisted name matches an in-tree model, prefer the strict registry — the
|
|
87
|
+
// returned instance has the correct prototype and the canonical caps.
|
|
88
|
+
if (Object.values(ModelName).includes(obj.name)) {
|
|
89
|
+
return Models.getByName(obj.name);
|
|
90
|
+
}
|
|
91
|
+
if (!obj.name.startsWith('llmgateway__BedrockAnthropic')) {
|
|
92
|
+
throw new Error(`Cannot rehydrate persisted model "${obj.name}". Only Bedrock-Anthropic Claude variants are supported via the consumer-built Model escape hatch.`);
|
|
93
|
+
}
|
|
94
|
+
return createClaudeModel(obj.name, {
|
|
95
|
+
displayId: obj.displayId,
|
|
96
|
+
maxInputTokens: obj.maxInputTokens,
|
|
97
|
+
maxOutputTokens: obj.maxOutputTokens,
|
|
98
|
+
contextWindow: obj.contextWindow,
|
|
99
|
+
supportsPromptCache: obj.supportsPromptCache,
|
|
100
|
+
supportedFormats: obj.supportedFormats,
|
|
101
|
+
permittedParameters: obj.permittedParameters,
|
|
102
|
+
customHeaders: obj.customHeaders,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
54
105
|
//# sourceMappingURL=agent-connectivity-resolver.js.map
|
package/dist/agent.d.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { type AgentConfig } from './harness/harness-config.js';
|
|
|
4
4
|
import { type ChatSession } from './chat-session.js';
|
|
5
5
|
import type { McpServerInfo } from './mcp-config.js';
|
|
6
6
|
import { type JSONWebToken, type LLMGatewayClient } from '@salesforce/llm-gateway-sdk';
|
|
7
|
-
import type
|
|
7
|
+
import { type AgentConnectivityResolver } from './agent-connectivity-resolver.js';
|
|
8
8
|
import type { AgentIdentityStore } from './internal/agent-identity-store.js';
|
|
9
9
|
import type { TelemetryRouter, TelemetrySlice } from './internal/telemetry-router.js';
|
|
10
10
|
import type { TelemetryBus, TelemetryEventCallback } from './types/telemetry-events.js';
|
package/dist/agent.js
CHANGED
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
import { EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
|
|
6
6
|
import { toHarnessConfig } from './harness/harness-config.js';
|
|
7
7
|
import { DefaultChatSession } from './chat-session.js';
|
|
8
|
-
import {
|
|
8
|
+
import {} from '@salesforce/llm-gateway-sdk';
|
|
9
|
+
import { resolveAgentConfigModel } from './agent-connectivity-resolver.js';
|
|
9
10
|
import { AgentSDKError, AgentSDKErrorType } from './errors.js';
|
|
10
11
|
/**
|
|
11
12
|
* Default implementation of {@link Agent} that delegates
|
|
@@ -112,8 +113,8 @@ export class DefaultAgent {
|
|
|
112
113
|
const previousOrgJwt = this.orgJwt;
|
|
113
114
|
const nextConfig = { ...this.config, ...config };
|
|
114
115
|
const orgAliasRequested = Object.prototype.hasOwnProperty.call(config, 'orgAlias');
|
|
115
|
-
const
|
|
116
|
-
const
|
|
116
|
+
const previousModel = previousClient.getModel();
|
|
117
|
+
const nextModel = resolveAgentConfigModel(nextConfig.modelId);
|
|
117
118
|
let nextClient = previousClient;
|
|
118
119
|
let nextConnection = this.orgConnection;
|
|
119
120
|
let nextOrgJwt = this.orgJwt;
|
|
@@ -123,14 +124,16 @@ export class DefaultAgent {
|
|
|
123
124
|
nextConnection = runtime.orgConnection;
|
|
124
125
|
nextOrgJwt = runtime.orgJwt;
|
|
125
126
|
}
|
|
126
|
-
else if (
|
|
127
|
+
else if (nextModel.name !== previousModel.name) {
|
|
127
128
|
// Keep the same authenticated client, but pin the updated model.
|
|
128
129
|
// (If modelId is omitted, the resolver pinned the default at creation time.)
|
|
129
|
-
nextClient.setModel(
|
|
130
|
+
nextClient.setModel(nextModel);
|
|
130
131
|
}
|
|
131
132
|
await this.harness.destroyAgent(this.agentId);
|
|
133
|
+
let nextConfigRegistered = false;
|
|
132
134
|
try {
|
|
133
135
|
await this.harness.createAgent(this.agentId, this.projectRoot, nextClient, toHarnessConfig(nextConfig, nextOrgJwt), options);
|
|
136
|
+
nextConfigRegistered = true;
|
|
134
137
|
// Persist before the in-memory swaps so a write failure flows through the same
|
|
135
138
|
// catch block as a recreate failure: the rollback restores the harness with
|
|
136
139
|
// previousConfig and disk state remains the pre-update record.
|
|
@@ -148,16 +151,21 @@ export class DefaultAgent {
|
|
|
148
151
|
catch (error) {
|
|
149
152
|
// Best-effort restoration to keep wrapper and harness state aligned.
|
|
150
153
|
try {
|
|
151
|
-
// Restore client model if we mutated it in-place.
|
|
154
|
+
// Restore client model if we mutated it in-place. We re-pin the live previousModel
|
|
155
|
+
// instance (captured above as previousClient.getModel()) rather than re-resolving from
|
|
156
|
+
// this.config.modelId, because a JSON-rehydrated config may have a plain object there
|
|
157
|
+
// that would round-trip through createClaudeModel and lose the original prototype.
|
|
152
158
|
if (nextClient === previousClient) {
|
|
153
|
-
previousClient.setModel(
|
|
159
|
+
previousClient.setModel(previousModel);
|
|
160
|
+
}
|
|
161
|
+
// Clear nextConfig registration only when the harness recreate
|
|
162
|
+
// actually succeeded (identityStore.write-failure path) — the
|
|
163
|
+
// harness throws on unknown id, so calling destroyAgent on the
|
|
164
|
+
// harness-recreate-failure path would short-circuit the rollback
|
|
165
|
+
// createAgent below.
|
|
166
|
+
if (nextConfigRegistered) {
|
|
167
|
+
await this.harness.destroyAgent(this.agentId);
|
|
154
168
|
}
|
|
155
|
-
// Clear any nextConfig registration left behind by a successful harness recreate
|
|
156
|
-
// before the rollback createAgent runs. On the harness-recreate-failure path this
|
|
157
|
-
// is a no-op (the agent was never registered with nextConfig); on the
|
|
158
|
-
// identityStore.write-failure path it removes the live nextConfig so the rollback
|
|
159
|
-
// doesn't trip the harness's duplicate-registration guard.
|
|
160
|
-
await this.harness.destroyAgent(this.agentId);
|
|
161
169
|
await this.harness.createAgent(this.agentId, this.projectRoot, previousClient, toHarnessConfig(previousConfig, previousOrgJwt));
|
|
162
170
|
}
|
|
163
171
|
catch {
|
|
@@ -319,10 +327,18 @@ export class DefaultAgent {
|
|
|
319
327
|
}
|
|
320
328
|
attachSession(threadId) {
|
|
321
329
|
const slice = this.router.registerSession(threadId);
|
|
330
|
+
// Live getter — read at call time so getContextUsage() reflects the
|
|
331
|
+
// model bound to the agent right now, not the model that was bound
|
|
332
|
+
// when this session was created. updateAgentConfig() can swap the
|
|
333
|
+
// underlying LLMGatewayClient mid-life. Per the SDK's Critical
|
|
334
|
+
// Invariant on context-window reachability, every bound model
|
|
335
|
+
// exposes a usable `contextWindow`; #507's decoupling work must
|
|
336
|
+
// preserve that, so this access is contractually safe.
|
|
337
|
+
const getContextWindow = () => this.llmGatewayClient.getModel().contextWindow;
|
|
322
338
|
const session = new DefaultChatSession(this.harness, this.agentId, threadId, slice, {
|
|
323
339
|
telemetry: this.telemetryBus,
|
|
324
340
|
log: this.logBus,
|
|
325
|
-
}, this.clock, this.idGenerator);
|
|
341
|
+
}, getContextWindow, this.clock, this.idGenerator);
|
|
326
342
|
this.sessions.set(threadId, session);
|
|
327
343
|
this.sessionSliceUnregisters.set(threadId, () => this.router.unregisterSession(threadId));
|
|
328
344
|
this.telemetryBus.emit({
|
package/dist/chat-session.d.ts
CHANGED
|
@@ -6,6 +6,7 @@ import type { ChatEvent, ChatStreamResult } from './types/events.js';
|
|
|
6
6
|
import type { Message, MessagePart } from './types/messages.js';
|
|
7
7
|
import type { TelemetryBus, TelemetryEventCallback } from './types/telemetry-events.js';
|
|
8
8
|
import type { ToolResultInfo } from './types/tools.js';
|
|
9
|
+
import type { ContextUsage } from './types/usage.js';
|
|
9
10
|
/**
|
|
10
11
|
* Options for a single chat interaction.
|
|
11
12
|
*/
|
|
@@ -61,11 +62,17 @@ export interface ChatSession {
|
|
|
61
62
|
*
|
|
62
63
|
* "Client-side tool" means a tool you declared in {@link AgentConfig.tools}
|
|
63
64
|
* without an `execute` function — the SDK registers its name + schema with the
|
|
64
|
-
* model but does not run it. When the model calls one, the chat eventStream
|
|
65
|
-
* emits a `tool-call` event
|
|
66
|
-
*
|
|
67
|
-
*
|
|
68
|
-
*
|
|
65
|
+
* model but does not run it. When the model calls one, the chat `eventStream`
|
|
66
|
+
* emits a `tool-call` event. Your application runs the tool however it likes
|
|
67
|
+
* (HTTP call, DB query, UI prompt, etc.) and calls this method with the result;
|
|
68
|
+
* the agent loop resumes and the post-resume events (`tool-result`, model
|
|
69
|
+
* follow-up text, terminal `finish`) arrive on the **same** `eventStream`
|
|
70
|
+
* the original {@link chat} call returned. The consumer keeps iterating it.
|
|
71
|
+
*
|
|
72
|
+
* Returns `Promise<void>` once the harness has accepted the result. The
|
|
73
|
+
* promise rejects on pre-stream failure (the `chat()`-returned subscribers
|
|
74
|
+
* still observe `ErrorEvent` + `FinishEvent` before the rejection so the
|
|
75
|
+
* subscribe-side contract holds).
|
|
69
76
|
*
|
|
70
77
|
* Use this method ONLY for client-side tools. Tools provided via
|
|
71
78
|
* {@link AgentConfig.mcpServers} are executed by the harness — their results
|
|
@@ -73,23 +80,22 @@ export interface ChatSession {
|
|
|
73
80
|
* Human-in-the-loop approval of harness-executed tools uses
|
|
74
81
|
* {@link approveToolCall} / {@link declineToolCall}, not this method.
|
|
75
82
|
*
|
|
76
|
-
* On pre-stream failure, subscribers are notified with `ErrorEvent` + `FinishEvent` before
|
|
77
|
-
* the returned promise rejects. See the interface-level "Failure handling" notes for details.
|
|
78
|
-
*
|
|
79
83
|
* @param toolResult - The completed tool execution result. `toolCallId` and
|
|
80
84
|
* `toolName` MUST match the values from the originating `tool-call` event.
|
|
81
85
|
*/
|
|
82
|
-
submitToolResult(toolResult: ToolResultInfo): Promise<
|
|
86
|
+
submitToolResult(toolResult: ToolResultInfo): Promise<void>;
|
|
83
87
|
/**
|
|
84
88
|
* Approve a pending tool call, allowing the harness to execute it.
|
|
85
89
|
* Called after receiving a `tool-approval-request` event from the stream.
|
|
86
90
|
*
|
|
87
|
-
* Returns
|
|
88
|
-
* executes the
|
|
89
|
-
*
|
|
91
|
+
* Returns `Promise<void>` once the harness has accepted the approval. The
|
|
92
|
+
* harness then executes the tool and emits the resulting events
|
|
93
|
+
* (`tool-result`, model follow-up text, terminal `finish`) on the **same**
|
|
94
|
+
* `eventStream` the original {@link chat} call returned. The consumer keeps
|
|
95
|
+
* iterating it.
|
|
90
96
|
*
|
|
91
|
-
*
|
|
92
|
-
*
|
|
97
|
+
* The promise rejects on pre-stream failure; subscribers still observe
|
|
98
|
+
* `ErrorEvent` + `FinishEvent` on the chat stream before the rejection.
|
|
93
99
|
*
|
|
94
100
|
* @param toolCallId - ID of the pending tool call to approve.
|
|
95
101
|
* @param options - Optional approval metadata.
|
|
@@ -100,21 +106,20 @@ export interface ChatSession {
|
|
|
100
106
|
*/
|
|
101
107
|
approveToolCall(toolCallId: string, options?: {
|
|
102
108
|
remember?: boolean;
|
|
103
|
-
}): Promise<
|
|
109
|
+
}): Promise<void>;
|
|
104
110
|
/**
|
|
105
111
|
* Decline a pending tool call. The stream resumes with the model
|
|
106
|
-
* acknowledging the decline and potentially suggesting alternatives
|
|
112
|
+
* acknowledging the decline and potentially suggesting alternatives —
|
|
113
|
+
* those events arrive on the **same** `eventStream` the original
|
|
114
|
+
* {@link chat} call returned.
|
|
107
115
|
*
|
|
108
|
-
* Returns
|
|
109
|
-
*
|
|
110
|
-
*
|
|
111
|
-
*
|
|
112
|
-
* On pre-stream failure, subscribers are notified with `ErrorEvent` + `FinishEvent` before
|
|
113
|
-
* the returned promise rejects. See the interface-level "Failure handling" notes for details.
|
|
116
|
+
* Returns `Promise<void>` once the harness has accepted the decline. The
|
|
117
|
+
* promise rejects on pre-stream failure; subscribers still observe
|
|
118
|
+
* `ErrorEvent` + `FinishEvent` on the chat stream before the rejection.
|
|
114
119
|
*
|
|
115
120
|
* @param toolCallId - ID of the pending tool call to decline.
|
|
116
121
|
*/
|
|
117
|
-
declineToolCall(toolCallId: string): Promise<
|
|
122
|
+
declineToolCall(toolCallId: string): Promise<void>;
|
|
118
123
|
/**
|
|
119
124
|
* Retrieve message history for this session.
|
|
120
125
|
*
|
|
@@ -123,6 +128,25 @@ export interface ChatSession {
|
|
|
123
128
|
getMessageHistory(): Promise<Message[]>;
|
|
124
129
|
/** Delete all messages in this session's history. */
|
|
125
130
|
clearHistory(): Promise<void>;
|
|
131
|
+
/**
|
|
132
|
+
* Snapshot of how much of the model's context window the most recent
|
|
133
|
+
* turn used. Always returns a `ContextUsage` — pre-first-turn and
|
|
134
|
+
* immediately after `clearHistory()`, `usage` is `{}` and `usedFraction`
|
|
135
|
+
* is `undefined`, but `contextWindow` is always populated from the
|
|
136
|
+
* agent's currently-bound model.
|
|
137
|
+
*
|
|
138
|
+
* `usage` carries the **last per-step** reading from the model — the
|
|
139
|
+
* size of the prompt the model saw on its most recent invocation,
|
|
140
|
+
* which is the right "how full is my context" answer for deciding
|
|
141
|
+
* when to call `compactThread()`. This is **not** the per-turn billing
|
|
142
|
+
* aggregate; consumers who want billing totals should subscribe to
|
|
143
|
+
* `chat-stream-completed` telemetry.
|
|
144
|
+
*
|
|
145
|
+
* The `contextWindow` is read live from the agent's currently-bound
|
|
146
|
+
* model, so it reflects any `Agent.updateAgentConfig()` model swap
|
|
147
|
+
* that happened between turns.
|
|
148
|
+
*/
|
|
149
|
+
getContextUsage(): ContextUsage;
|
|
126
150
|
/**
|
|
127
151
|
* Inject context messages into the thread without triggering an LLM response.
|
|
128
152
|
* Useful for seeding file contents, system instructions, or prior conversation
|
|
@@ -176,6 +200,23 @@ export declare class DefaultChatSession implements ChatSession {
|
|
|
176
200
|
* are stale and should not bleed into the next turn).
|
|
177
201
|
*/
|
|
178
202
|
private readonly toolStartMs;
|
|
203
|
+
/**
|
|
204
|
+
* Live getter for the agent's currently-bound model's context window.
|
|
205
|
+
* Called by {@link getContextUsage} so reads reflect the model in
|
|
206
|
+
* effect right now, not the model bound when this session was created
|
|
207
|
+
* (an `Agent.updateAgentConfig()` swap can change it mid-life).
|
|
208
|
+
*/
|
|
209
|
+
private readonly getContextWindow;
|
|
210
|
+
/**
|
|
211
|
+
* Last per-step usage reading observed on this session. Initialized
|
|
212
|
+
* to `{}` (every token field undefined) so {@link getContextUsage}
|
|
213
|
+
* can always return a populated `ContextUsage`. Updated on every
|
|
214
|
+
* `step-finish` ChatEvent whose `usage` is defined; an undefined
|
|
215
|
+
* usage is carried forward (defense against rare gateway-side gaps —
|
|
216
|
+
* see W-22692131). Reset to `{}` on `clearHistory()` so a fresh
|
|
217
|
+
* thread starts unprimed.
|
|
218
|
+
*/
|
|
219
|
+
private latestUsage;
|
|
179
220
|
private disposed;
|
|
180
221
|
/**
|
|
181
222
|
* @param harness - The agent harness managing thread and message lifecycle.
|
|
@@ -183,10 +224,12 @@ export declare class DefaultChatSession implements ChatSession {
|
|
|
183
224
|
* @param threadId - ID of the conversation thread backing this session.
|
|
184
225
|
* @param inbound - Router slice delivering harness events routed to this session.
|
|
185
226
|
* @param parent - Parent agent's buses; this session forwards its events upward into them.
|
|
227
|
+
* @param getContextWindow - Live getter for the agent's currently-bound model's `contextWindow`.
|
|
228
|
+
* Called by `getContextUsage()` so reads stay correct across `Agent.updateAgentConfig()` model swaps.
|
|
186
229
|
* @param clock - Source of monotonic timestamps for telemetry events. Defaults to `RealClock`.
|
|
187
230
|
* @param idGenerator - Source of message ids for `addContext()`. Defaults to `UUIDGenerator`.
|
|
188
231
|
*/
|
|
189
|
-
constructor(harness: AgentHarness, agentId: string, threadId: string, inbound: TelemetrySlice, parent: ChatSessionParentBuses, clock?: Clock, idGenerator?: UniqueIDGenerator);
|
|
232
|
+
constructor(harness: AgentHarness, agentId: string, threadId: string, inbound: TelemetrySlice, parent: ChatSessionParentBuses, getContextWindow: () => number, clock?: Clock, idGenerator?: UniqueIDGenerator);
|
|
190
233
|
getId(): string;
|
|
191
234
|
/**
|
|
192
235
|
* @requirements
|
|
@@ -207,7 +250,7 @@ export declare class DefaultChatSession implements ChatSession {
|
|
|
207
250
|
* - MUST notify listeners with `ErrorEvent` + `FinishEvent` and re-throw if the harness throws
|
|
208
251
|
* before returning a stream result.
|
|
209
252
|
*/
|
|
210
|
-
submitToolResult(toolResult: ToolResultInfo): Promise<
|
|
253
|
+
submitToolResult(toolResult: ToolResultInfo): Promise<void>;
|
|
211
254
|
/**
|
|
212
255
|
* @requirements
|
|
213
256
|
* - MUST yield each event from the provided `stream`.
|
|
@@ -251,7 +294,7 @@ export declare class DefaultChatSession implements ChatSession {
|
|
|
251
294
|
*/
|
|
252
295
|
approveToolCall(toolCallId: string, _options?: {
|
|
253
296
|
remember?: boolean;
|
|
254
|
-
}): Promise<
|
|
297
|
+
}): Promise<void>;
|
|
255
298
|
/**
|
|
256
299
|
* @requirements
|
|
257
300
|
* - MUST delegate to `this.harness.declineToolCall()`, passing `this.agentId`, `this.threadId`, and `toolCallId`.
|
|
@@ -263,7 +306,7 @@ export declare class DefaultChatSession implements ChatSession {
|
|
|
263
306
|
* - MUST notify listeners with `ErrorEvent` + `FinishEvent` and re-throw if the harness throws
|
|
264
307
|
* before returning a stream result.
|
|
265
308
|
*/
|
|
266
|
-
declineToolCall(toolCallId: string): Promise<
|
|
309
|
+
declineToolCall(toolCallId: string): Promise<void>;
|
|
267
310
|
/**
|
|
268
311
|
* @requirements
|
|
269
312
|
* - MUST delegate to `this.harness.getMessages()`, passing `this.agentId` and `this.threadId`.
|
|
@@ -273,8 +316,34 @@ export declare class DefaultChatSession implements ChatSession {
|
|
|
273
316
|
/**
|
|
274
317
|
* @requirements
|
|
275
318
|
* - MUST delegate to `this.harness.clearMessages()`, passing `this.agentId` and `this.threadId`.
|
|
319
|
+
* - MUST reset `latestUsage` to `{}` so the next `getContextUsage()` reports a fresh
|
|
320
|
+
* "no reading yet" snapshot until the next turn produces one.
|
|
276
321
|
*/
|
|
277
322
|
clearHistory(): Promise<void>;
|
|
323
|
+
/**
|
|
324
|
+
* @requirements
|
|
325
|
+
* - MUST always return a populated `ContextUsage`. Pre-first-turn and post-`clearHistory()`,
|
|
326
|
+
* `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always
|
|
327
|
+
* populated from the agent's currently-bound model.
|
|
328
|
+
* - MUST read `contextWindow` via the constructor-injected `getContextWindow` getter
|
|
329
|
+
* so swaps via `Agent.updateAgentConfig()` are reflected on the next call. Per the
|
|
330
|
+
* SDK's Critical Invariant on context-window reachability, every bound model exposes
|
|
331
|
+
* a usable `contextWindow`; the getter does not need a defensive try/catch.
|
|
332
|
+
* - MUST compute `usedFraction = (inputTokens + cachedInputTokens + cacheWriteInputTokens) /
|
|
333
|
+
* contextWindow`, clamped to `[0, 1]`. The denominator-numerator must include cached
|
|
334
|
+
* tokens because Bedrock-Claude's `message_delta.usage` reports only the *incremental*
|
|
335
|
+
* `input_tokens` per delta — the bulk of the prompt rides on `cache_read_input_tokens`
|
|
336
|
+
* / `cache_creation_input_tokens` which the Claude adapter surfaces as
|
|
337
|
+
* `cachedInputTokens` / `cacheWriteInputTokens`. Those are real tokens the model
|
|
338
|
+
* actually loaded into its context window (Bedrock charges for them and counts them
|
|
339
|
+
* against the window), so they belong in the "how full" denominator. Mastra is
|
|
340
|
+
* unaffected — it doesn't populate the cache fields, so the sum collapses to
|
|
341
|
+
* `inputTokens` alone.
|
|
342
|
+
* - MUST treat `usedFraction` as `undefined` when ALL three input-bearing fields are
|
|
343
|
+
* undefined — pre-first-turn, post-`clearHistory()`, or a harness reading with no
|
|
344
|
+
* input-side counts at all.
|
|
345
|
+
*/
|
|
346
|
+
getContextUsage(): ContextUsage;
|
|
278
347
|
/**
|
|
279
348
|
* @requirements
|
|
280
349
|
* - IF `message` is a `string`, it MUST be formatted into a standard `Message` object array containing exactly one message.
|
|
@@ -330,5 +399,18 @@ export declare class DefaultChatSession implements ChatSession {
|
|
|
330
399
|
* measures real elapsed time even for pre-stream rejections.
|
|
331
400
|
*/
|
|
332
401
|
private notifyPreStreamError;
|
|
402
|
+
/**
|
|
403
|
+
* issue #529 contract change: a settle call (`approveToolCall` /
|
|
404
|
+
* `declineToolCall` / `submitToolResult`) rejected. The settle's
|
|
405
|
+
* Promise is the consumer's primary failure surface, but subscribers
|
|
406
|
+
* registered via {@link ChatSession.subscribe} also expect to observe
|
|
407
|
+
* `error + finish` events so a UI bound to the chat stream can
|
|
408
|
+
* render the failure. Emit those without firing
|
|
409
|
+
* `chat-stream-error` telemetry — chat-stream-* telemetry is owned
|
|
410
|
+
* by the chat() lifecycle, not by settle calls (issue #529: one
|
|
411
|
+
* chat-stream-started/completed/error pair per turn, not per
|
|
412
|
+
* settle).
|
|
413
|
+
*/
|
|
414
|
+
private notifySettleRejection;
|
|
333
415
|
private assertNotDisposed;
|
|
334
416
|
}
|
package/dist/chat-session.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Copyright 2026, Salesforce, Inc. All rights reserved.
|
|
3
3
|
* See LICENSE.txt for license terms.
|
|
4
4
|
*/
|
|
5
|
-
import { EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
|
|
5
|
+
import { backfillCreatedAt, EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
|
|
6
6
|
import { AgentSDKError, AgentSDKErrorType } from './errors.js';
|
|
7
7
|
/**
|
|
8
8
|
* Default implementation of {@link ChatSession} that delegates all operations
|
|
@@ -31,6 +31,23 @@ export class DefaultChatSession {
|
|
|
31
31
|
* are stale and should not bleed into the next turn).
|
|
32
32
|
*/
|
|
33
33
|
toolStartMs = new Map();
|
|
34
|
+
/**
|
|
35
|
+
* Live getter for the agent's currently-bound model's context window.
|
|
36
|
+
* Called by {@link getContextUsage} so reads reflect the model in
|
|
37
|
+
* effect right now, not the model bound when this session was created
|
|
38
|
+
* (an `Agent.updateAgentConfig()` swap can change it mid-life).
|
|
39
|
+
*/
|
|
40
|
+
getContextWindow;
|
|
41
|
+
/**
|
|
42
|
+
* Last per-step usage reading observed on this session. Initialized
|
|
43
|
+
* to `{}` (every token field undefined) so {@link getContextUsage}
|
|
44
|
+
* can always return a populated `ContextUsage`. Updated on every
|
|
45
|
+
* `step-finish` ChatEvent whose `usage` is defined; an undefined
|
|
46
|
+
* usage is carried forward (defense against rare gateway-side gaps —
|
|
47
|
+
* see W-22692131). Reset to `{}` on `clearHistory()` so a fresh
|
|
48
|
+
* thread starts unprimed.
|
|
49
|
+
*/
|
|
50
|
+
latestUsage = {};
|
|
34
51
|
disposed = false;
|
|
35
52
|
/**
|
|
36
53
|
* @param harness - The agent harness managing thread and message lifecycle.
|
|
@@ -38,13 +55,16 @@ export class DefaultChatSession {
|
|
|
38
55
|
* @param threadId - ID of the conversation thread backing this session.
|
|
39
56
|
* @param inbound - Router slice delivering harness events routed to this session.
|
|
40
57
|
* @param parent - Parent agent's buses; this session forwards its events upward into them.
|
|
58
|
+
* @param getContextWindow - Live getter for the agent's currently-bound model's `contextWindow`.
|
|
59
|
+
* Called by `getContextUsage()` so reads stay correct across `Agent.updateAgentConfig()` model swaps.
|
|
41
60
|
* @param clock - Source of monotonic timestamps for telemetry events. Defaults to `RealClock`.
|
|
42
61
|
* @param idGenerator - Source of message ids for `addContext()`. Defaults to `UUIDGenerator`.
|
|
43
62
|
*/
|
|
44
|
-
constructor(harness, agentId, threadId, inbound, parent, clock = new RealClock(), idGenerator = new UUIDGenerator()) {
|
|
63
|
+
constructor(harness, agentId, threadId, inbound, parent, getContextWindow, clock = new RealClock(), idGenerator = new UUIDGenerator()) {
|
|
45
64
|
this.harness = harness;
|
|
46
65
|
this.agentId = agentId;
|
|
47
66
|
this.threadId = threadId;
|
|
67
|
+
this.getContextWindow = getContextWindow;
|
|
48
68
|
this.clock = clock;
|
|
49
69
|
this.idGenerator = idGenerator;
|
|
50
70
|
this.inboundUnsubs = [inbound.telemetry.forwardTo(this.telemetryBus), inbound.log.forwardTo(this.logBus)];
|
|
@@ -89,16 +109,16 @@ export class DefaultChatSession {
|
|
|
89
109
|
*/
|
|
90
110
|
async submitToolResult(toolResult) {
|
|
91
111
|
this.assertNotDisposed();
|
|
92
|
-
|
|
112
|
+
// issue #529 contract change: settle calls are control messages on the
|
|
113
|
+
// existing chat() turn's stream — they don't open a new stream and
|
|
114
|
+
// they don't emit chat-stream-started/completed. The post-resume
|
|
115
|
+
// events flow through the harness's existing turn sink, which the
|
|
116
|
+
// consumer's chat()-returned eventStream is already iterating.
|
|
93
117
|
try {
|
|
94
|
-
|
|
95
|
-
return {
|
|
96
|
-
textStream: result.textStream,
|
|
97
|
-
eventStream: this.wrapEventStream(result.eventStream, startedAt),
|
|
98
|
-
};
|
|
118
|
+
await this.harness.submitToolResult(this.agentId, this.threadId, toolResult);
|
|
99
119
|
}
|
|
100
120
|
catch (err) {
|
|
101
|
-
this.
|
|
121
|
+
this.notifySettleRejection(err);
|
|
102
122
|
throw err;
|
|
103
123
|
}
|
|
104
124
|
}
|
|
@@ -138,6 +158,18 @@ export class DefaultChatSession {
|
|
|
138
158
|
this.chatEventBus.emit(event);
|
|
139
159
|
this.deriveToolTelemetry(event);
|
|
140
160
|
yield event;
|
|
161
|
+
if (event.type === 'step-finish' && event.usage !== undefined) {
|
|
162
|
+
// Snapshot the most recent per-step usage. Last-step semantics
|
|
163
|
+
// (not the per-turn `finish.usage` aggregate) — `finish.usage`
|
|
164
|
+
// sums every step inside the turn and double-counts persistent
|
|
165
|
+
// context, which is the wrong denominator for "how full is my
|
|
166
|
+
// context". An undefined usage on this step is intentionally
|
|
167
|
+
// ignored so the prior reading is carried forward — gateway-side
|
|
168
|
+
// gaps are rare but real (W-22692131) and clobbering with
|
|
169
|
+
// undefined would surface as a transient hole consumers can't
|
|
170
|
+
// distinguish from a fresh session.
|
|
171
|
+
this.latestUsage = event.usage;
|
|
172
|
+
}
|
|
141
173
|
if (event.type === 'finish') {
|
|
142
174
|
sawFinish = true;
|
|
143
175
|
finishUsage = event.usage;
|
|
@@ -214,19 +246,17 @@ export class DefaultChatSession {
|
|
|
214
246
|
*/
|
|
215
247
|
async approveToolCall(toolCallId, _options) {
|
|
216
248
|
this.assertNotDisposed();
|
|
217
|
-
|
|
249
|
+
// issue #529 contract change: see `submitToolResult` for the rationale.
|
|
250
|
+
// Settle is a control message on the existing turn; events flow on
|
|
251
|
+
// the chat()-returned stream.
|
|
218
252
|
try {
|
|
219
|
-
|
|
220
|
-
this.emitToolApprovalResolved(toolCallId, true);
|
|
221
|
-
return {
|
|
222
|
-
textStream: result.textStream,
|
|
223
|
-
eventStream: this.wrapEventStream(result.eventStream, startedAt),
|
|
224
|
-
};
|
|
253
|
+
await this.harness.approveToolCall(this.agentId, this.threadId, toolCallId);
|
|
225
254
|
}
|
|
226
255
|
catch (err) {
|
|
227
|
-
this.
|
|
256
|
+
this.notifySettleRejection(err);
|
|
228
257
|
throw err;
|
|
229
258
|
}
|
|
259
|
+
this.emitToolApprovalResolved(toolCallId, true);
|
|
230
260
|
}
|
|
231
261
|
/**
|
|
232
262
|
* @requirements
|
|
@@ -241,19 +271,15 @@ export class DefaultChatSession {
|
|
|
241
271
|
*/
|
|
242
272
|
async declineToolCall(toolCallId) {
|
|
243
273
|
this.assertNotDisposed();
|
|
244
|
-
|
|
274
|
+
// issue #529 contract change: see `submitToolResult` for the rationale.
|
|
245
275
|
try {
|
|
246
|
-
|
|
247
|
-
this.emitToolApprovalResolved(toolCallId, false);
|
|
248
|
-
return {
|
|
249
|
-
textStream: result.textStream,
|
|
250
|
-
eventStream: this.wrapEventStream(result.eventStream, startedAt),
|
|
251
|
-
};
|
|
276
|
+
await this.harness.declineToolCall(this.agentId, this.threadId, toolCallId);
|
|
252
277
|
}
|
|
253
278
|
catch (err) {
|
|
254
|
-
this.
|
|
279
|
+
this.notifySettleRejection(err);
|
|
255
280
|
throw err;
|
|
256
281
|
}
|
|
282
|
+
this.emitToolApprovalResolved(toolCallId, false);
|
|
257
283
|
}
|
|
258
284
|
/**
|
|
259
285
|
* @requirements
|
|
@@ -267,10 +293,53 @@ export class DefaultChatSession {
|
|
|
267
293
|
/**
|
|
268
294
|
* @requirements
|
|
269
295
|
* - MUST delegate to `this.harness.clearMessages()`, passing `this.agentId` and `this.threadId`.
|
|
296
|
+
* - MUST reset `latestUsage` to `{}` so the next `getContextUsage()` reports a fresh
|
|
297
|
+
* "no reading yet" snapshot until the next turn produces one.
|
|
270
298
|
*/
|
|
271
299
|
async clearHistory() {
|
|
272
300
|
this.assertNotDisposed();
|
|
273
301
|
await this.harness.clearMessages(this.agentId, this.threadId);
|
|
302
|
+
this.latestUsage = {};
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* @requirements
|
|
306
|
+
* - MUST always return a populated `ContextUsage`. Pre-first-turn and post-`clearHistory()`,
|
|
307
|
+
* `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always
|
|
308
|
+
* populated from the agent's currently-bound model.
|
|
309
|
+
* - MUST read `contextWindow` via the constructor-injected `getContextWindow` getter
|
|
310
|
+
* so swaps via `Agent.updateAgentConfig()` are reflected on the next call. Per the
|
|
311
|
+
* SDK's Critical Invariant on context-window reachability, every bound model exposes
|
|
312
|
+
* a usable `contextWindow`; the getter does not need a defensive try/catch.
|
|
313
|
+
* - MUST compute `usedFraction = (inputTokens + cachedInputTokens + cacheWriteInputTokens) /
|
|
314
|
+
* contextWindow`, clamped to `[0, 1]`. The denominator-numerator must include cached
|
|
315
|
+
* tokens because Bedrock-Claude's `message_delta.usage` reports only the *incremental*
|
|
316
|
+
* `input_tokens` per delta — the bulk of the prompt rides on `cache_read_input_tokens`
|
|
317
|
+
* / `cache_creation_input_tokens` which the Claude adapter surfaces as
|
|
318
|
+
* `cachedInputTokens` / `cacheWriteInputTokens`. Those are real tokens the model
|
|
319
|
+
* actually loaded into its context window (Bedrock charges for them and counts them
|
|
320
|
+
* against the window), so they belong in the "how full" denominator. Mastra is
|
|
321
|
+
* unaffected — it doesn't populate the cache fields, so the sum collapses to
|
|
322
|
+
* `inputTokens` alone.
|
|
323
|
+
* - MUST treat `usedFraction` as `undefined` when ALL three input-bearing fields are
|
|
324
|
+
* undefined — pre-first-turn, post-`clearHistory()`, or a harness reading with no
|
|
325
|
+
* input-side counts at all.
|
|
326
|
+
*/
|
|
327
|
+
getContextUsage() {
|
|
328
|
+
this.assertNotDisposed();
|
|
329
|
+
const contextWindow = this.getContextWindow();
|
|
330
|
+
const { inputTokens, cachedInputTokens, cacheWriteInputTokens } = this.latestUsage;
|
|
331
|
+
const allInputUndefined = inputTokens === undefined && cachedInputTokens === undefined && cacheWriteInputTokens === undefined;
|
|
332
|
+
const effectiveInputTokens = allInputUndefined
|
|
333
|
+
? undefined
|
|
334
|
+
: (inputTokens ?? 0) + (cachedInputTokens ?? 0) + (cacheWriteInputTokens ?? 0);
|
|
335
|
+
const usedFraction = effectiveInputTokens === undefined
|
|
336
|
+
? undefined
|
|
337
|
+
: Math.min(1, Math.max(0, effectiveInputTokens / contextWindow));
|
|
338
|
+
// Spread `latestUsage` so consumer mutation of the returned `usage`
|
|
339
|
+
// object cannot leak back into the session's internal state on a
|
|
340
|
+
// subsequent `getContextUsage()` call. `UsageMetadata`'s fields are
|
|
341
|
+
// all primitives, so a shallow copy is sufficient.
|
|
342
|
+
return { usage: { ...this.latestUsage }, contextWindow, usedFraction };
|
|
274
343
|
}
|
|
275
344
|
/**
|
|
276
345
|
* @requirements
|
|
@@ -292,7 +361,15 @@ export class DefaultChatSession {
|
|
|
292
361
|
createdAt: this.clock.now(),
|
|
293
362
|
},
|
|
294
363
|
]
|
|
295
|
-
:
|
|
364
|
+
: // `Message.createdAt` is required-on-read, optional-on-write —
|
|
365
|
+
// the SDK owns the backfill so harnesses see populated
|
|
366
|
+
// timestamps regardless of consumer-construction style. The
|
|
367
|
+
// shared `backfillCreatedAt` helper steps per-position via
|
|
368
|
+
// `clock.nextAfter` so a bulk insert produces strictly-
|
|
369
|
+
// ascending values. The two production harnesses share the
|
|
370
|
+
// same helper at their own `addContext` boundary so a
|
|
371
|
+
// direct `harness.addContext` call gets the same shape.
|
|
372
|
+
backfillCreatedAt(message, this.clock);
|
|
296
373
|
await this.harness.addContext(this.agentId, this.threadId, messages);
|
|
297
374
|
}
|
|
298
375
|
/**
|
|
@@ -444,6 +521,23 @@ export class DefaultChatSession {
|
|
|
444
521
|
error,
|
|
445
522
|
});
|
|
446
523
|
}
|
|
524
|
+
/**
|
|
525
|
+
* issue #529 contract change: a settle call (`approveToolCall` /
|
|
526
|
+
* `declineToolCall` / `submitToolResult`) rejected. The settle's
|
|
527
|
+
* Promise is the consumer's primary failure surface, but subscribers
|
|
528
|
+
* registered via {@link ChatSession.subscribe} also expect to observe
|
|
529
|
+
* `error + finish` events so a UI bound to the chat stream can
|
|
530
|
+
* render the failure. Emit those without firing
|
|
531
|
+
* `chat-stream-error` telemetry — chat-stream-* telemetry is owned
|
|
532
|
+
* by the chat() lifecycle, not by settle calls (issue #529: one
|
|
533
|
+
* chat-stream-started/completed/error pair per turn, not per
|
|
534
|
+
* settle).
|
|
535
|
+
*/
|
|
536
|
+
notifySettleRejection(err) {
|
|
537
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
538
|
+
this.chatEventBus.emit({ type: 'error', error });
|
|
539
|
+
this.chatEventBus.emit({ type: 'finish', finishReason: 'error' });
|
|
540
|
+
}
|
|
447
541
|
assertNotDisposed() {
|
|
448
542
|
if (this.disposed) {
|
|
449
543
|
throw new AgentSDKError('ChatSession has been disposed.', AgentSDKErrorType.DISPOSED);
|