@salesforce/sfdx-agent-sdk 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -152,21 +152,22 @@ keeps unparameterized call sites working.
152
152
 
153
153
  A single conversation thread.
154
154
 
155
- | Method | Signature | Description |
156
- | ------------------- | ------------------------------------------------------------------------------------- | ------------------------------------------------------------- |
157
- | `getId` | `() => string` | Session/thread identifier. |
158
- | `chat` | `(message: string, options?: ChatOptions) => Promise<ChatStreamResult>` | Send a message and stream the response. |
159
- | `submitToolResult` | `(toolResult: ToolResultInfo) => Promise<ChatStreamResult>` | Return a consumer-executed tool result and resume the stream. |
160
- | `approveToolCall` | `(toolCallId: string, options?: { remember?: boolean }) => Promise<ChatStreamResult>` | Approve a pending tool call. |
161
- | `declineToolCall` | `(toolCallId: string) => Promise<ChatStreamResult>` | Decline a pending tool call. |
162
- | `getMessageHistory` | `() => Promise<Message[]>` | Retrieve all messages in chronological order. |
163
- | `clearHistory` | `() => Promise<void>` | Delete all messages. |
164
- | `addContext` | `(message: string \| Message[]) => Promise<void>` | Inject context without triggering an LLM response. |
165
- | `subscribe` | `(callback: (event: ChatEvent) => void) => void` | Register a real-time event listener. |
166
- | `unsubscribe` | `(callback: (event: ChatEvent) => void) => void` | Remove a listener. |
167
- | `onTelemetry` | `(callback: TelemetryEventCallback) => Unsubscribe` | Subscribe to telemetry scoped to this session. |
168
- | `onLog` | `(callback: (record: LogRecord) => void) => Unsubscribe` | Subscribe to logs scoped to this session. |
169
- | `dispose` | `() => void` | Release session-level event resources. Idempotent. |
155
+ | Method | Signature | Description |
156
+ | ------------------- | ------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
157
+ | `getId` | `() => string` | Session/thread identifier. |
158
+ | `chat` | `(message: string, options?: ChatOptions) => Promise<ChatStreamResult>` | Send a message and stream the response. |
159
+ | `submitToolResult` | `(toolResult: ToolResultInfo) => Promise<ChatStreamResult>` | Return a consumer-executed tool result and resume the stream. |
160
+ | `approveToolCall` | `(toolCallId: string, options?: { remember?: boolean }) => Promise<ChatStreamResult>` | Approve a pending tool call. |
161
+ | `declineToolCall` | `(toolCallId: string) => Promise<ChatStreamResult>` | Decline a pending tool call. |
162
+ | `getMessageHistory` | `() => Promise<Message[]>` | Retrieve all messages in chronological order. |
163
+ | `clearHistory` | `() => Promise<void>` | Delete all messages. |
164
+ | `getContextUsage` | `() => ContextUsage` | Snapshot of how much of the model's context window the most recent turn used. |
165
+ | `addContext` | `(message: string \| Message[]) => Promise<void>` | Inject context without triggering an LLM response. |
166
+ | `subscribe` | `(callback: (event: ChatEvent) => void) => void` | Register a real-time event listener. |
167
+ | `unsubscribe` | `(callback: (event: ChatEvent) => void) => void` | Remove a listener. |
168
+ | `onTelemetry` | `(callback: TelemetryEventCallback) => Unsubscribe` | Subscribe to telemetry scoped to this session. |
169
+ | `onLog` | `(callback: (record: LogRecord) => void) => Unsubscribe` | Subscribe to logs scoped to this session. |
170
+ | `dispose` | `() => void` | Release session-level event resources. Idempotent. |
170
171
 
171
172
  ### `ChatStreamResult`
172
173
 
@@ -193,23 +194,28 @@ Discriminated union (`event.type`) of streaming events:
193
194
  | `step-finish` | `stepIndex`, `finishReason`, `usage?` | Step completed with per-step token usage. |
194
195
  | `error` | `error`, `code?` | Mid-stream error (yielded, not thrown). |
195
196
  | `finish` | `finishReason`, `usage?` | Stream completed with aggregate token usage. |
196
- | `unmapped-chunk` | `chunkType`, `rawChunk` | Unrecognized harness event, preserved for observability. |
197
+
198
+ > **Diagnostic logging.** The `ChatEvent` union is the harness-agnostic public stream — it never carries
199
+ > harness-internal chunk shapes. When a harness encounters a chunk type its adapter does not recognize (typically after
200
+ > an upstream Mastra / Claude SDK upgrade), the chunk is skipped on the public stream and surfaced via `LogBus.debug`
201
+ > with `chunkType` and `rawChunk` in the record's `context`. Subscribe via `manager.onLog` (or `agent.onLog` /
202
+ > `session.onLog`) at debug level to observe these. Production consumers do not need to filter for unrecognized chunks.
197
203
 
198
204
  ### Configuration Types
199
205
 
200
206
  #### `AgentConfig`
201
207
 
202
- | Field | Type | Description |
203
- | --------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
204
- | `orgAlias?` | `string` | Salesforce org alias or username. Falls back to project/default org. |
205
- | `modelId?` | `ModelName` | LLM model identifier (e.g. `'llmgateway__OpenAIGPT5'`). |
206
- | `name?` | `string` | Human-readable agent name. |
207
- | `description?` | `string` | Agent purpose description. |
208
- | `instructions?` | `string` | System instructions for the agent. |
209
- | `tools?` | `ToolDefinition[]` | Consumer-executed tool schemas. |
210
- | `mcpServers?` | `MCPConfiguration` | MCP server connections. |
211
- | `skills?` | `string[]` | Each entry is either an individual skill folder (containing `SKILL.md`) or a parent folder containing skill subfolders. Relative and absolute paths supported; forms can be mixed in the same array. |
212
- | `rules?` | `string[]` | Each entry is either an individual `.md` rule file or a directory of `.md` rule files (scanned one level deep, alphabetical, non-`.md` skipped). Bodies are composed verbatim into the agent's effective system prompt; YAML frontmatter is optional and stripped if present. Matches Claude Code's `.claude/rules/*.md` convention. |
208
+ | Field | Type | Description |
209
+ | --------------- | -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
210
+ | `orgAlias?` | `string` | Salesforce org alias or username. Falls back to project/default org. |
211
+ | `modelId?` | `ModelName \| Model` | LLM model selector. Pass a `ModelName` enum value for an in-tree model (e.g. `'llmgateway__OpenAIGPT5'`), or a pre-built `Model` instance to opt into a Bedrock-Anthropic Claude variant the SDK has not yet released — see `createClaudeModel(gatewayId, overrides)` in `@salesforce/llm-gateway-sdk`. |
212
+ | `name?` | `string` | Human-readable agent name. |
213
+ | `description?` | `string` | Agent purpose description. |
214
+ | `instructions?` | `string` | System instructions for the agent. |
215
+ | `tools?` | `ToolDefinition[]` | Consumer-executed tool schemas. |
216
+ | `mcpServers?` | `MCPConfiguration` | MCP server connections. |
217
+ | `skills?` | `string[]` | Each entry is either an individual skill folder (containing `SKILL.md`) or a parent folder containing skill subfolders. Relative and absolute paths supported; forms can be mixed in the same array. |
218
+ | `rules?` | `string[]` | Each entry is either an individual `.md` rule file or a directory of `.md` rule files (scanned one level deep, alphabetical, non-`.md` skipped). Bodies are composed verbatim into the agent's effective system prompt; YAML frontmatter is optional and stripped if present. Matches Claude Code's `.claude/rules/*.md` convention. |
213
219
 
214
220
  #### `StreamOptions`
215
221
 
@@ -241,6 +247,12 @@ type MCPRemoteServerConfig = {
241
247
  headers?: Record<string, string>;
242
248
  enabled?: boolean;
243
249
  timeout?: number;
250
+ reconnectionOptions?: {
251
+ maxRetries?: number;
252
+ initialReconnectionDelay?: number;
253
+ maxReconnectionDelay?: number;
254
+ reconnectionDelayGrowFactor?: number;
255
+ };
244
256
  alwaysLoad?: boolean;
245
257
  };
246
258
  ```
@@ -252,6 +264,14 @@ surfaces (≤ a few tools the model needs to find without prompting). The Claude
252
264
  `_meta['anthropic/alwaysLoad'] = true` on each forwarded tool (equivalent to `defer_loading: false` on the Claude API).
253
265
  The Mastra harness eager-loads all MCP tools regardless, so the flag is a no-op there.
254
266
 
267
+ **`reconnectionOptions`** tunes the HTTP MCP transport's retry / backoff behavior. Forwarded to the underlying SDK
268
+ transport on both harnesses (Claude's `@modelcontextprotocol/sdk` `StreamableHTTPClientTransport` and Mastra's
269
+ `@mastra/mcp` `HttpServerDefinition`, which is itself typed off the same MCP SDK shape). Each field is optional;
270
+ unspecified fields fall back to the MCP SDK's built-in defaults — `maxRetries: 2`, `initialReconnectionDelay: 1000` ms,
271
+ `maxReconnectionDelay: 30000` ms, `reconnectionDelayGrowFactor: 1.5`. Partial overrides are merged with those defaults
272
+ at the harness boundary so a consumer setting only `maxRetries` doesn't zero out the others. No-op for stdio servers —
273
+ only `MCPRemoteServerConfig` carries it.
274
+
255
275
  #### `McpServerInfo`
256
276
 
257
277
  | Field | Type | Description |
@@ -375,6 +395,13 @@ type ImagePart = { type: 'image'; mimeType: 'image/png' | 'image/jpeg'; data: st
375
395
  type FilePart = { type: 'file'; mimeType: 'application/pdf'; data: string; fileName?: string };
376
396
  ```
377
397
 
398
+ `createdAt` is **required-on-read, optional-on-write**:
399
+
400
+ - Messages returned from `ChatSession.getMessageHistory()` always have `createdAt` populated, and the array is sorted
401
+ ascending by `createdAt`. Consumer code can read `msg.createdAt` directly.
402
+ - Consumers constructing `Message` literals for `ChatSession.addContext()` may omit `createdAt`; the SDK backfills the
403
+ current time before forwarding to the harness. Pass an explicit value to override.
404
+
378
405
  #### Multimodal input
379
406
 
380
407
  `ChatSession.chat()` (and the harness `stream()` it delegates to) accept either a plain string or a `MessagePart[]`. Use
@@ -409,7 +436,8 @@ await session.chat([
409
436
  },
410
437
  ]);
411
438
 
412
- // Inject multimodal context before a chat turn
439
+ // Inject multimodal context before a chat turn. `createdAt` is omitted —
440
+ // the SDK backfills it before forwarding to the harness.
413
441
  await session.addContext([
414
442
  {
415
443
  id: 'ctx-screenshot',
@@ -455,9 +483,52 @@ type UsageMetadata = {
455
483
  cacheWriteInputTokens?: number;
456
484
  };
457
485
 
486
+ type ContextUsage = {
487
+ /**
488
+ * Last per-step usage reading observed on this session. Pre-first-turn and
489
+ * immediately after `clearHistory()` this is `{}` (every token field undefined).
490
+ */
491
+ usage: UsageMetadata;
492
+ /** The model's total context-window size in tokens. Always populated. */
493
+ contextWindow: number;
494
+ /**
495
+ * `(usage.inputTokens + usage.cachedInputTokens + usage.cacheWriteInputTokens) / contextWindow`,
496
+ * clamped to [0, 1]. Cached prompt tokens are summed in because they occupy the
497
+ * model's context window — on Bedrock-Claude, the bulk of the prompt is reported
498
+ * via `cachedInputTokens` / `cacheWriteInputTokens`, not `inputTokens`. `undefined`
499
+ * when ALL three input-bearing fields are missing.
500
+ */
501
+ usedFraction: number | undefined;
502
+ };
503
+
458
504
  type FinishReason = 'stop' | 'length' | 'tool-calls' | 'content-filter' | 'error' | 'other';
459
505
  ```
460
506
 
507
+ **Tracking context-window utilization.** `ChatSession.getContextUsage()` always returns a populated `ContextUsage` —
508
+ even pre-first-turn, where `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always available.
509
+ Use it to decide when to compact a thread:
510
+
511
+ ```typescript
512
+ const ctx = session.getContextUsage();
513
+ if (ctx.usedFraction !== undefined && ctx.usedFraction > 0.8) {
514
+ await agent.compactChatSession(session.getId());
515
+ }
516
+ ```
517
+
518
+ Render a context-usage indicator that distinguishes "no reading yet" from a real measurement:
519
+
520
+ ```typescript
521
+ const ctx = session.getContextUsage();
522
+ const limit = ctx.contextWindow.toLocaleString(); // always available
523
+ const used = ctx.usage.inputTokens?.toLocaleString() ?? '—';
524
+ const pct = ctx.usedFraction !== undefined ? `${Math.round(ctx.usedFraction * 100)}%` : '—';
525
+ return `${used} / ${limit} tokens (${pct})`;
526
+ ```
527
+
528
+ The snapshot uses **last-step** semantics, not the per-turn billing aggregate — `finish.usage` sums all steps in a turn
529
+ and double-counts persistent context, which is the wrong denominator for "how full is my context." For per-turn billing
530
+ totals, subscribe to `chat-stream-completed` telemetry instead.
531
+
461
532
  ### Error Handling
462
533
 
463
534
  The SDK throws `AgentSDKError` for predictable not-found and compatibility conditions. Each error has a `type` property
@@ -669,7 +740,11 @@ Returns `true` if the URL matches a Salesforce Hosted MCP Server endpoint (prod,
669
740
 
670
741
  ### Re-exported from `@salesforce/llm-gateway-sdk`
671
742
 
672
- - `ModelName` — enum of supported model identifiers
743
+ - `Model` — abstract base class. Returned by `Models.getByName(...)` and accepted as an `AgentConfig.modelId` value.
744
+ - `ModelName` — enum of in-tree model identifiers.
745
+ - `createClaudeModel(gatewayId, overrides?)` — escape-hatch factory for opting into a Bedrock-Anthropic Claude variant
746
+ the SDK has not released yet (`AgentConfig.modelId` accepts the returned instance directly).
747
+ - `ClaudeModelOverrides` — optional caps for `createClaudeModel`.
673
748
  - `SfApiEnv` — Salesforce API environment enum (`dev`, `perf`, `prod`, `stage`, `test`)
674
749
  - `inferSfApiEnv(instanceUrl, options?)` — maps an instance URL to a `SfApiEnv`. Re-exported from
675
750
  `@salesforce/agentic-common` for consumers that need the mapping without an `OrgConnection` (e.g. building a
@@ -1,4 +1,4 @@
1
- import { type JSONWebToken, type LLMGatewayClient, type LLMGatewayClientFactory } from '@salesforce/llm-gateway-sdk';
1
+ import { Model, type JSONWebToken, type LLMGatewayClient, type LLMGatewayClientFactory } from '@salesforce/llm-gateway-sdk';
2
2
  import { type OrgConnection, type OrgConnectionFactory } from '@salesforce/agentic-common';
3
3
  import type { AgentConfig } from './harness/harness-config.js';
4
4
  /**
@@ -60,3 +60,18 @@ export declare class DefaultAgentConnectivityResolver implements AgentConnectivi
60
60
  */
61
61
  resolve(projectRoot: string, config: AgentConfig): Promise<ResolvedConnectivity>;
62
62
  }
63
+ /**
64
+ * Resolves an `AgentConfig.modelId` value (which may be a {@link ModelName} enum value, a
65
+ * pre-built {@link Model} instance, or `undefined`) to a concrete {@link Model}.
66
+ *
67
+ * The enum branch goes through the strict {@link Models.getByName} registry; the live
68
+ * instance branch passes the consumer-built model through unchanged. A persisted-and-restored
69
+ * `Model` instance arrives here as a plain object (the JSON round-trip drops its prototype),
70
+ * and is rehydrated via {@link createClaudeModel} for Bedrock-Anthropic Claude variants — the
71
+ * single use case the consumer-built escape hatch was added for. Any other persisted shape is
72
+ * a programming error and throws.
73
+ *
74
+ * Exported for use by `Agent.updateAgentConfig`, which performs the same resolution when
75
+ * comparing previous and next models without re-running the full connectivity resolver.
76
+ */
77
+ export declare function resolveAgentConfigModel(modelId: AgentConfig['modelId']): Model;
@@ -2,7 +2,7 @@
2
2
  * Copyright 2026, Salesforce, Inc. All rights reserved.
3
3
  * See LICENSE.txt for license terms.
4
4
  */
5
- import { DefaultLLMGatewayClientFactory, Models, createJWTFromConnection, } from '@salesforce/llm-gateway-sdk';
5
+ import { DefaultLLMGatewayClientFactory, Model, ModelName, Models, createClaudeModel, createJWTFromConnection, } from '@salesforce/llm-gateway-sdk';
6
6
  import { SfApiEnv, RealOrgConnectionFactory, } from '@salesforce/agentic-common';
7
7
  // TODO(@W-22782317): Temporary workaround — only on prod orgs the LLM Gateway must
8
8
  // route requests through AgentforceVibes rather than the default VibesService. Remove once a
@@ -46,9 +46,60 @@ export class DefaultAgentConnectivityResolver {
46
46
  const featureId = env === SfApiEnv.Prod ? PROD_ORG_FEATURE_ID : undefined;
47
47
  const orgJwt = await createJWTFromConnection(orgConnection, { featureId });
48
48
  const llmGatewayClient = this.gatewayClientFactory.create(orgJwt, { env });
49
- const modelName = config.modelId ?? Models.getDefault().name;
50
- llmGatewayClient.setModel(Models.getByName(modelName));
49
+ llmGatewayClient.setModel(resolveAgentConfigModel(config.modelId));
51
50
  return { llmGatewayClient, orgConnection, orgJwt };
52
51
  }
53
52
  }
53
+ /**
54
+ * Resolves an `AgentConfig.modelId` value (which may be a {@link ModelName} enum value, a
55
+ * pre-built {@link Model} instance, or `undefined`) to a concrete {@link Model}.
56
+ *
57
+ * The enum branch goes through the strict {@link Models.getByName} registry; the live
58
+ * instance branch passes the consumer-built model through unchanged. A persisted-and-restored
59
+ * `Model` instance arrives here as a plain object (the JSON round-trip drops its prototype),
60
+ * and is rehydrated via {@link createClaudeModel} for Bedrock-Anthropic Claude variants — the
61
+ * single use case the consumer-built escape hatch was added for. Any other persisted shape is
62
+ * a programming error and throws.
63
+ *
64
+ * Exported for use by `Agent.updateAgentConfig`, which performs the same resolution when
65
+ * comparing previous and next models without re-running the full connectivity resolver.
66
+ */
67
+ export function resolveAgentConfigModel(modelId) {
68
+ if (modelId === undefined)
69
+ return Models.getDefault();
70
+ // Known limitation: `instanceof Model` is realm-scoped — a consumer that ends up with two copies
71
+ // of `@salesforce/llm-gateway-sdk` resolved in their dependency tree will have their `Model`
72
+ // instance fail this check and fall through to `rehydratePersistedModel`. That branch handles
73
+ // it correctly for Claude variants but throws for anything else. The duplicate-package case is
74
+ // a packaging bug at the consumer; we don't paper over it here.
75
+ if (modelId instanceof Model)
76
+ return modelId;
77
+ if (typeof modelId === 'string')
78
+ return Models.getByName(modelId);
79
+ return rehydratePersistedModel(modelId);
80
+ }
81
+ function rehydratePersistedModel(persisted) {
82
+ const obj = persisted;
83
+ if (typeof obj.name !== 'string') {
84
+ throw new Error(`Cannot resolve modelId: missing string "name" on persisted object.`);
85
+ }
86
+ // If the persisted name matches an in-tree model, prefer the strict registry — the
87
+ // returned instance has the correct prototype and the canonical caps.
88
+ if (Object.values(ModelName).includes(obj.name)) {
89
+ return Models.getByName(obj.name);
90
+ }
91
+ if (!obj.name.startsWith('llmgateway__BedrockAnthropic')) {
92
+ throw new Error(`Cannot rehydrate persisted model "${obj.name}". Only Bedrock-Anthropic Claude variants are supported via the consumer-built Model escape hatch.`);
93
+ }
94
+ return createClaudeModel(obj.name, {
95
+ displayId: obj.displayId,
96
+ maxInputTokens: obj.maxInputTokens,
97
+ maxOutputTokens: obj.maxOutputTokens,
98
+ contextWindow: obj.contextWindow,
99
+ supportsPromptCache: obj.supportsPromptCache,
100
+ supportedFormats: obj.supportedFormats,
101
+ permittedParameters: obj.permittedParameters,
102
+ customHeaders: obj.customHeaders,
103
+ });
104
+ }
54
105
  //# sourceMappingURL=agent-connectivity-resolver.js.map
package/dist/agent.d.ts CHANGED
@@ -4,7 +4,7 @@ import { type AgentConfig } from './harness/harness-config.js';
4
4
  import { type ChatSession } from './chat-session.js';
5
5
  import type { McpServerInfo } from './mcp-config.js';
6
6
  import { type JSONWebToken, type LLMGatewayClient } from '@salesforce/llm-gateway-sdk';
7
- import type { AgentConnectivityResolver } from './agent-connectivity-resolver.js';
7
+ import { type AgentConnectivityResolver } from './agent-connectivity-resolver.js';
8
8
  import type { AgentIdentityStore } from './internal/agent-identity-store.js';
9
9
  import type { TelemetryRouter, TelemetrySlice } from './internal/telemetry-router.js';
10
10
  import type { TelemetryBus, TelemetryEventCallback } from './types/telemetry-events.js';
package/dist/agent.js CHANGED
@@ -5,7 +5,8 @@
5
5
  import { EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
6
6
  import { toHarnessConfig } from './harness/harness-config.js';
7
7
  import { DefaultChatSession } from './chat-session.js';
8
- import { Models } from '@salesforce/llm-gateway-sdk';
8
+ import {} from '@salesforce/llm-gateway-sdk';
9
+ import { resolveAgentConfigModel } from './agent-connectivity-resolver.js';
9
10
  import { AgentSDKError, AgentSDKErrorType } from './errors.js';
10
11
  /**
11
12
  * Default implementation of {@link Agent} that delegates
@@ -112,8 +113,8 @@ export class DefaultAgent {
112
113
  const previousOrgJwt = this.orgJwt;
113
114
  const nextConfig = { ...this.config, ...config };
114
115
  const orgAliasRequested = Object.prototype.hasOwnProperty.call(config, 'orgAlias');
115
- const previousModelName = previousClient.getModel().name;
116
- const nextModelName = nextConfig.modelId ?? Models.getDefault().name;
116
+ const previousModel = previousClient.getModel();
117
+ const nextModel = resolveAgentConfigModel(nextConfig.modelId);
117
118
  let nextClient = previousClient;
118
119
  let nextConnection = this.orgConnection;
119
120
  let nextOrgJwt = this.orgJwt;
@@ -123,14 +124,16 @@ export class DefaultAgent {
123
124
  nextConnection = runtime.orgConnection;
124
125
  nextOrgJwt = runtime.orgJwt;
125
126
  }
126
- else if (nextModelName !== previousModelName) {
127
+ else if (nextModel.name !== previousModel.name) {
127
128
  // Keep the same authenticated client, but pin the updated model.
128
129
  // (If modelId is omitted, the resolver pinned the default at creation time.)
129
- nextClient.setModel(Models.getByName(nextModelName));
130
+ nextClient.setModel(nextModel);
130
131
  }
131
132
  await this.harness.destroyAgent(this.agentId);
133
+ let nextConfigRegistered = false;
132
134
  try {
133
135
  await this.harness.createAgent(this.agentId, this.projectRoot, nextClient, toHarnessConfig(nextConfig, nextOrgJwt), options);
136
+ nextConfigRegistered = true;
134
137
  // Persist before the in-memory swaps so a write failure flows through the same
135
138
  // catch block as a recreate failure: the rollback restores the harness with
136
139
  // previousConfig and disk state remains the pre-update record.
@@ -148,16 +151,21 @@ export class DefaultAgent {
148
151
  catch (error) {
149
152
  // Best-effort restoration to keep wrapper and harness state aligned.
150
153
  try {
151
- // Restore client model if we mutated it in-place.
154
+ // Restore client model if we mutated it in-place. We re-pin the live previousModel
155
+ // instance (captured above as previousClient.getModel()) rather than re-resolving from
156
+ // this.config.modelId, because a JSON-rehydrated config may have a plain object there
157
+ // that would round-trip through createClaudeModel and lose the original prototype.
152
158
  if (nextClient === previousClient) {
153
- previousClient.setModel(Models.getByName(previousModelName));
159
+ previousClient.setModel(previousModel);
160
+ }
161
+ // Clear nextConfig registration only when the harness recreate
162
+ // actually succeeded (identityStore.write-failure path) — the
163
+ // harness throws on unknown id, so calling destroyAgent on the
164
+ // harness-recreate-failure path would short-circuit the rollback
165
+ // createAgent below.
166
+ if (nextConfigRegistered) {
167
+ await this.harness.destroyAgent(this.agentId);
154
168
  }
155
- // Clear any nextConfig registration left behind by a successful harness recreate
156
- // before the rollback createAgent runs. On the harness-recreate-failure path this
157
- // is a no-op (the agent was never registered with nextConfig); on the
158
- // identityStore.write-failure path it removes the live nextConfig so the rollback
159
- // doesn't trip the harness's duplicate-registration guard.
160
- await this.harness.destroyAgent(this.agentId);
161
169
  await this.harness.createAgent(this.agentId, this.projectRoot, previousClient, toHarnessConfig(previousConfig, previousOrgJwt));
162
170
  }
163
171
  catch {
@@ -319,10 +327,18 @@ export class DefaultAgent {
319
327
  }
320
328
  attachSession(threadId) {
321
329
  const slice = this.router.registerSession(threadId);
330
+ // Live getter — read at call time so getContextUsage() reflects the
331
+ // model bound to the agent right now, not the model that was bound
332
+ // when this session was created. updateAgentConfig() can swap the
333
+ // underlying LLMGatewayClient mid-life. Per the SDK's Critical
334
+ // Invariant on context-window reachability, every bound model
335
+ // exposes a usable `contextWindow`; #507's decoupling work must
336
+ // preserve that, so this access is contractually safe.
337
+ const getContextWindow = () => this.llmGatewayClient.getModel().contextWindow;
322
338
  const session = new DefaultChatSession(this.harness, this.agentId, threadId, slice, {
323
339
  telemetry: this.telemetryBus,
324
340
  log: this.logBus,
325
- }, this.clock, this.idGenerator);
341
+ }, getContextWindow, this.clock, this.idGenerator);
326
342
  this.sessions.set(threadId, session);
327
343
  this.sessionSliceUnregisters.set(threadId, () => this.router.unregisterSession(threadId));
328
344
  this.telemetryBus.emit({
@@ -6,6 +6,7 @@ import type { ChatEvent, ChatStreamResult } from './types/events.js';
6
6
  import type { Message, MessagePart } from './types/messages.js';
7
7
  import type { TelemetryBus, TelemetryEventCallback } from './types/telemetry-events.js';
8
8
  import type { ToolResultInfo } from './types/tools.js';
9
+ import type { ContextUsage } from './types/usage.js';
9
10
  /**
10
11
  * Options for a single chat interaction.
11
12
  */
@@ -123,6 +124,25 @@ export interface ChatSession {
123
124
  getMessageHistory(): Promise<Message[]>;
124
125
  /** Delete all messages in this session's history. */
125
126
  clearHistory(): Promise<void>;
127
+ /**
128
+ * Snapshot of how much of the model's context window the most recent
129
+ * turn used. Always returns a `ContextUsage` — pre-first-turn and
130
+ * immediately after `clearHistory()`, `usage` is `{}` and `usedFraction`
131
+ * is `undefined`, but `contextWindow` is always populated from the
132
+ * agent's currently-bound model.
133
+ *
134
+ * `usage` carries the **last per-step** reading from the model — the
135
+ * size of the prompt the model saw on its most recent invocation,
136
+ * which is the right "how full is my context" answer for deciding
137
+ * when to call `compactThread()`. This is **not** the per-turn billing
138
+ * aggregate; consumers who want billing totals should subscribe to
139
+ * `chat-stream-completed` telemetry.
140
+ *
141
+ * The `contextWindow` is read live from the agent's currently-bound
142
+ * model, so it reflects any `Agent.updateAgentConfig()` model swap
143
+ * that happened between turns.
144
+ */
145
+ getContextUsage(): ContextUsage;
126
146
  /**
127
147
  * Inject context messages into the thread without triggering an LLM response.
128
148
  * Useful for seeding file contents, system instructions, or prior conversation
@@ -176,6 +196,23 @@ export declare class DefaultChatSession implements ChatSession {
176
196
  * are stale and should not bleed into the next turn).
177
197
  */
178
198
  private readonly toolStartMs;
199
+ /**
200
+ * Live getter for the agent's currently-bound model's context window.
201
+ * Called by {@link getContextUsage} so reads reflect the model in
202
+ * effect right now, not the model bound when this session was created
203
+ * (an `Agent.updateAgentConfig()` swap can change it mid-life).
204
+ */
205
+ private readonly getContextWindow;
206
+ /**
207
+ * Last per-step usage reading observed on this session. Initialized
208
+ * to `{}` (every token field undefined) so {@link getContextUsage}
209
+ * can always return a populated `ContextUsage`. Updated on every
210
+ * `step-finish` ChatEvent whose `usage` is defined; an undefined
211
+ * usage is carried forward (defense against rare gateway-side gaps —
212
+ * see W-22692131). Reset to `{}` on `clearHistory()` so a fresh
213
+ * thread starts unprimed.
214
+ */
215
+ private latestUsage;
179
216
  private disposed;
180
217
  /**
181
218
  * @param harness - The agent harness managing thread and message lifecycle.
@@ -183,10 +220,12 @@ export declare class DefaultChatSession implements ChatSession {
183
220
  * @param threadId - ID of the conversation thread backing this session.
184
221
  * @param inbound - Router slice delivering harness events routed to this session.
185
222
  * @param parent - Parent agent's buses; this session forwards its events upward into them.
223
+ * @param getContextWindow - Live getter for the agent's currently-bound model's `contextWindow`.
224
+ * Called by `getContextUsage()` so reads stay correct across `Agent.updateAgentConfig()` model swaps.
186
225
  * @param clock - Source of monotonic timestamps for telemetry events. Defaults to `RealClock`.
187
226
  * @param idGenerator - Source of message ids for `addContext()`. Defaults to `UUIDGenerator`.
188
227
  */
189
- constructor(harness: AgentHarness, agentId: string, threadId: string, inbound: TelemetrySlice, parent: ChatSessionParentBuses, clock?: Clock, idGenerator?: UniqueIDGenerator);
228
+ constructor(harness: AgentHarness, agentId: string, threadId: string, inbound: TelemetrySlice, parent: ChatSessionParentBuses, getContextWindow: () => number, clock?: Clock, idGenerator?: UniqueIDGenerator);
190
229
  getId(): string;
191
230
  /**
192
231
  * @requirements
@@ -273,8 +312,34 @@ export declare class DefaultChatSession implements ChatSession {
273
312
  /**
274
313
  * @requirements
275
314
  * - MUST delegate to `this.harness.clearMessages()`, passing `this.agentId` and `this.threadId`.
315
+ * - MUST reset `latestUsage` to `{}` so the next `getContextUsage()` reports a fresh
316
+ * "no reading yet" snapshot until the next turn produces one.
276
317
  */
277
318
  clearHistory(): Promise<void>;
319
+ /**
320
+ * @requirements
321
+ * - MUST always return a populated `ContextUsage`. Pre-first-turn and post-`clearHistory()`,
322
+ * `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always
323
+ * populated from the agent's currently-bound model.
324
+ * - MUST read `contextWindow` via the constructor-injected `getContextWindow` getter
325
+ * so swaps via `Agent.updateAgentConfig()` are reflected on the next call. Per the
326
+ * SDK's Critical Invariant on context-window reachability, every bound model exposes
327
+ * a usable `contextWindow`; the getter does not need a defensive try/catch.
328
+ * - MUST compute `usedFraction = (inputTokens + cachedInputTokens + cacheWriteInputTokens) /
329
+ * contextWindow`, clamped to `[0, 1]`. The denominator-numerator must include cached
330
+ * tokens because Bedrock-Claude's `message_delta.usage` reports only the *incremental*
331
+ * `input_tokens` per delta — the bulk of the prompt rides on `cache_read_input_tokens`
332
+ * / `cache_creation_input_tokens` which the Claude adapter surfaces as
333
+ * `cachedInputTokens` / `cacheWriteInputTokens`. Those are real tokens the model
334
+ * actually loaded into its context window (Bedrock charges for them and counts them
335
+ * against the window), so they belong in the "how full" denominator. Mastra is
336
+ * unaffected — it doesn't populate the cache fields, so the sum collapses to
337
+ * `inputTokens` alone.
338
+ * - MUST treat `usedFraction` as `undefined` when ALL three input-bearing fields are
339
+ * undefined — pre-first-turn, post-`clearHistory()`, or a harness reading with no
340
+ * input-side counts at all.
341
+ */
342
+ getContextUsage(): ContextUsage;
278
343
  /**
279
344
  * @requirements
280
345
  * - IF `message` is a `string`, it MUST be formatted into a standard `Message` object array containing exactly one message.
@@ -2,7 +2,7 @@
2
2
  * Copyright 2026, Salesforce, Inc. All rights reserved.
3
3
  * See LICENSE.txt for license terms.
4
4
  */
5
- import { EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
5
+ import { backfillCreatedAt, EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
6
6
  import { AgentSDKError, AgentSDKErrorType } from './errors.js';
7
7
  /**
8
8
  * Default implementation of {@link ChatSession} that delegates all operations
@@ -31,6 +31,23 @@ export class DefaultChatSession {
31
31
  * are stale and should not bleed into the next turn).
32
32
  */
33
33
  toolStartMs = new Map();
34
+ /**
35
+ * Live getter for the agent's currently-bound model's context window.
36
+ * Called by {@link getContextUsage} so reads reflect the model in
37
+ * effect right now, not the model bound when this session was created
38
+ * (an `Agent.updateAgentConfig()` swap can change it mid-life).
39
+ */
40
+ getContextWindow;
41
+ /**
42
+ * Last per-step usage reading observed on this session. Initialized
43
+ * to `{}` (every token field undefined) so {@link getContextUsage}
44
+ * can always return a populated `ContextUsage`. Updated on every
45
+ * `step-finish` ChatEvent whose `usage` is defined; an undefined
46
+ * usage is carried forward (defense against rare gateway-side gaps —
47
+ * see W-22692131). Reset to `{}` on `clearHistory()` so a fresh
48
+ * thread starts unprimed.
49
+ */
50
+ latestUsage = {};
34
51
  disposed = false;
35
52
  /**
36
53
  * @param harness - The agent harness managing thread and message lifecycle.
@@ -38,13 +55,16 @@ export class DefaultChatSession {
38
55
  * @param threadId - ID of the conversation thread backing this session.
39
56
  * @param inbound - Router slice delivering harness events routed to this session.
40
57
  * @param parent - Parent agent's buses; this session forwards its events upward into them.
58
+ * @param getContextWindow - Live getter for the agent's currently-bound model's `contextWindow`.
59
+ * Called by `getContextUsage()` so reads stay correct across `Agent.updateAgentConfig()` model swaps.
41
60
  * @param clock - Source of monotonic timestamps for telemetry events. Defaults to `RealClock`.
42
61
  * @param idGenerator - Source of message ids for `addContext()`. Defaults to `UUIDGenerator`.
43
62
  */
44
- constructor(harness, agentId, threadId, inbound, parent, clock = new RealClock(), idGenerator = new UUIDGenerator()) {
63
+ constructor(harness, agentId, threadId, inbound, parent, getContextWindow, clock = new RealClock(), idGenerator = new UUIDGenerator()) {
45
64
  this.harness = harness;
46
65
  this.agentId = agentId;
47
66
  this.threadId = threadId;
67
+ this.getContextWindow = getContextWindow;
48
68
  this.clock = clock;
49
69
  this.idGenerator = idGenerator;
50
70
  this.inboundUnsubs = [inbound.telemetry.forwardTo(this.telemetryBus), inbound.log.forwardTo(this.logBus)];
@@ -138,6 +158,18 @@ export class DefaultChatSession {
138
158
  this.chatEventBus.emit(event);
139
159
  this.deriveToolTelemetry(event);
140
160
  yield event;
161
+ if (event.type === 'step-finish' && event.usage !== undefined) {
162
+ // Snapshot the most recent per-step usage. Last-step semantics
163
+ // (not the per-turn `finish.usage` aggregate) — `finish.usage`
164
+ // sums every step inside the turn and double-counts persistent
165
+ // context, which is the wrong denominator for "how full is my
166
+ // context". An undefined usage on this step is intentionally
167
+ // ignored so the prior reading is carried forward — gateway-side
168
+ // gaps are rare but real (W-22692131) and clobbering with
169
+ // undefined would surface as a transient hole consumers can't
170
+ // distinguish from a fresh session.
171
+ this.latestUsage = event.usage;
172
+ }
141
173
  if (event.type === 'finish') {
142
174
  sawFinish = true;
143
175
  finishUsage = event.usage;
@@ -267,10 +299,53 @@ export class DefaultChatSession {
267
299
  /**
268
300
  * @requirements
269
301
  * - MUST delegate to `this.harness.clearMessages()`, passing `this.agentId` and `this.threadId`.
302
+ * - MUST reset `latestUsage` to `{}` so the next `getContextUsage()` reports a fresh
303
+ * "no reading yet" snapshot until the next turn produces one.
270
304
  */
271
305
  async clearHistory() {
272
306
  this.assertNotDisposed();
273
307
  await this.harness.clearMessages(this.agentId, this.threadId);
308
+ this.latestUsage = {};
309
+ }
310
+ /**
311
+ * @requirements
312
+ * - MUST always return a populated `ContextUsage`. Pre-first-turn and post-`clearHistory()`,
313
+ * `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always
314
+ * populated from the agent's currently-bound model.
315
+ * - MUST read `contextWindow` via the constructor-injected `getContextWindow` getter
316
+ * so swaps via `Agent.updateAgentConfig()` are reflected on the next call. Per the
317
+ * SDK's Critical Invariant on context-window reachability, every bound model exposes
318
+ * a usable `contextWindow`; the getter does not need a defensive try/catch.
319
+ * - MUST compute `usedFraction = (inputTokens + cachedInputTokens + cacheWriteInputTokens) /
320
+ * contextWindow`, clamped to `[0, 1]`. The denominator-numerator must include cached
321
+ * tokens because Bedrock-Claude's `message_delta.usage` reports only the *incremental*
322
+ * `input_tokens` per delta — the bulk of the prompt rides on `cache_read_input_tokens`
323
+ * / `cache_creation_input_tokens` which the Claude adapter surfaces as
324
+ * `cachedInputTokens` / `cacheWriteInputTokens`. Those are real tokens the model
325
+ * actually loaded into its context window (Bedrock charges for them and counts them
326
+ * against the window), so they belong in the "how full" denominator. Mastra is
327
+ * unaffected — it doesn't populate the cache fields, so the sum collapses to
328
+ * `inputTokens` alone.
329
+ * - MUST treat `usedFraction` as `undefined` when ALL three input-bearing fields are
330
+ * undefined — pre-first-turn, post-`clearHistory()`, or a harness reading with no
331
+ * input-side counts at all.
332
+ */
333
+ getContextUsage() {
334
+ this.assertNotDisposed();
335
+ const contextWindow = this.getContextWindow();
336
+ const { inputTokens, cachedInputTokens, cacheWriteInputTokens } = this.latestUsage;
337
+ const allInputUndefined = inputTokens === undefined && cachedInputTokens === undefined && cacheWriteInputTokens === undefined;
338
+ const effectiveInputTokens = allInputUndefined
339
+ ? undefined
340
+ : (inputTokens ?? 0) + (cachedInputTokens ?? 0) + (cacheWriteInputTokens ?? 0);
341
+ const usedFraction = effectiveInputTokens === undefined
342
+ ? undefined
343
+ : Math.min(1, Math.max(0, effectiveInputTokens / contextWindow));
344
+ // Spread `latestUsage` so consumer mutation of the returned `usage`
345
+ // object cannot leak back into the session's internal state on a
346
+ // subsequent `getContextUsage()` call. `UsageMetadata`'s fields are
347
+ // all primitives, so a shallow copy is sufficient.
348
+ return { usage: { ...this.latestUsage }, contextWindow, usedFraction };
274
349
  }
275
350
  /**
276
351
  * @requirements
@@ -292,7 +367,15 @@ export class DefaultChatSession {
292
367
  createdAt: this.clock.now(),
293
368
  },
294
369
  ]
295
- : message;
370
+ : // `Message.createdAt` is required-on-read, optional-on-write —
371
+ // the SDK owns the backfill so harnesses see populated
372
+ // timestamps regardless of consumer-construction style. The
373
+ // shared `backfillCreatedAt` helper steps per-position via
374
+ // `clock.nextAfter` so a bulk insert produces strictly-
375
+ // ascending values. The two production harnesses share the
376
+ // same helper at their own `addContext` boundary so a
377
+ // direct `harness.addContext` call gets the same shape.
378
+ backfillCreatedAt(message, this.clock);
296
379
  await this.harness.addContext(this.agentId, this.threadId, messages);
297
380
  }
298
381
  /**
@@ -112,7 +112,14 @@ export interface AgentHarness {
112
112
  }): Promise<void>;
113
113
  /**
114
114
  * Destroy an agent and release its resources (MCP connections, workspace, memory).
115
+ *
116
+ * MUST throw if `agentId` is not registered. Symmetric with `createThread`,
117
+ * `destroyThread`, and `clearMessages`, which all reject unknown ids the
118
+ * same way; gives SDK rollback paths in `Agent.updateAgentConfig` and
119
+ * `AgentManager.installAgent` an explicit failure mode they can catch.
120
+ *
115
121
  * @param agentId - ID of the agent to destroy.
122
+ * @returns `true` after a real removal.
116
123
  */
117
124
  destroyAgent(agentId: string): Promise<boolean>;
118
125
  /**
@@ -124,8 +131,11 @@ export interface AgentHarness {
124
131
  * including connection status and discovered tool names. This is a synchronous
125
132
  * snapshot — status is updated asynchronously by background discovery promises.
126
133
  *
134
+ * MUST throw if `agentId` is not registered.
135
+ *
127
136
  * @param agentId - ID of the agent whose MCP servers to inspect.
128
- * @returns Info for each configured MCP server (empty array if none configured).
137
+ * @returns Info for each configured MCP server (empty array if the agent
138
+ * exists but has no MCP servers configured).
129
139
  */
130
140
  getMcpServerInfo(agentId: string): McpServerInfo[];
131
141
  /**
@@ -175,14 +185,31 @@ export interface AgentHarness {
175
185
  */
176
186
  getThreadIds(agentId: string): Promise<string[]>;
177
187
  /**
178
- * Clone an existing thread, creating a new thread with copied message history.
179
- * Used to implement conversation forking.
188
+ * Clone an existing thread, creating a new thread that mirrors the source
189
+ * thread's state at the moment of the call. Used to implement conversation
190
+ * forking.
191
+ *
192
+ * The harness chooses the new thread's id; consumers read it from the
193
+ * returned value. The id is unique within the agent.
194
+ *
195
+ * Two source-state shapes are observable to consumers:
196
+ *
197
+ * - **Source thread has been streamed at least once** — the new thread
198
+ * inherits the source's persisted message history; subsequent
199
+ * `getMessages()` returns it. Implementations may copy the underlying
200
+ * transcript (Mastra's libsql `cloneThread`, Claude's `forkSession`)
201
+ * or any harness-specific equivalent.
202
+ * - **Source thread has never been streamed** (`addContext`-only or
203
+ * freshly-created) — the new thread is allocated empty by design;
204
+ * `addContext`-injected messages on the source are copied forward by
205
+ * harnesses that mirror them in-process, but no persisted transcript
206
+ * exists to fork.
207
+ *
180
208
  * @param agentId - ID of the owning agent.
181
209
  * @param sourceThreadId - ID of the thread to clone.
182
- * @param targetThreadId - Optional ID for the new thread.
183
210
  * @returns The ID of the cloned thread.
184
211
  */
185
- cloneThread(agentId: string, sourceThreadId: string, targetThreadId?: string): Promise<string>;
212
+ cloneThread(agentId: string, sourceThreadId: string): Promise<string>;
186
213
  /**
187
214
  * Compacts a thread's message history to reduce context window usage.
188
215
  * Starts a new conversation thread seeded with an LLM-generated summary of the current session.
@@ -257,9 +284,12 @@ export interface AgentHarness {
257
284
  /**
258
285
  * Retrieve message history for a thread.
259
286
  *
287
+ * MUST populate `Message.createdAt` on every returned message. MUST return
288
+ * messages sorted ascending by `createdAt`.
289
+ *
260
290
  * @param agentId - ID of the agent.
261
291
  * @param threadId - ID of the conversation thread.
262
- * @returns All messages in chronological order (ascending by creation time).
292
+ * @returns All messages in chronological order (ascending by `createdAt`).
263
293
  */
264
294
  getMessages(agentId: string, threadId: string): Promise<Message[]>;
265
295
  /**
@@ -1,6 +1,6 @@
1
1
  import type { ToolDefinition } from '../types/tools.js';
2
2
  import type { MCPConfiguration } from '../mcp-config.js';
3
- import type { JSONWebToken, ModelName } from '@salesforce/llm-gateway-sdk';
3
+ import type { JSONWebToken, Model, ModelName } from '@salesforce/llm-gateway-sdk';
4
4
  /**
5
5
  * Configuration for an agent's behavior and capabilities.
6
6
  * This excludes identity; `agentId` is handled separately.
@@ -14,8 +14,15 @@ export type AgentConfig = {
14
14
  * - Otherwise, use the default org configured on the machine.
15
15
  */
16
16
  orgAlias?: string;
17
- /** The model to use for this agent. */
18
- modelId?: ModelName;
17
+ /**
18
+ * The model to use for this agent.
19
+ *
20
+ * Accepts either a {@link ModelName} enum value (the typical case for in-tree models) or a
21
+ * pre-built {@link Model} instance. The instance form lets consumers opt into a Claude
22
+ * variant published on the gateway before the SDK has been updated — see
23
+ * `createClaudeModel(gatewayId, overrides)` from `@salesforce/llm-gateway-sdk`.
24
+ */
25
+ modelId?: ModelName | Model;
19
26
  /** Human-readable name for the agent. */
20
27
  name?: string;
21
28
  /** Description of the agent's purpose. ACP/OASF-ready metadata. */
package/dist/index.d.ts CHANGED
@@ -1,12 +1,13 @@
1
1
  export type { Message, MessagePart, ImagePart, FilePart } from './types/messages.js';
2
2
  export type { ChatEvent, StartEvent, TextDeltaEvent, ReasoningDeltaEvent, ToolCallEvent, ToolApprovalRequestEvent, ToolResultEvent, StepStartEvent, StepFinishEvent, ErrorEvent, FinishEvent, ChatStreamResult, } from './types/events.js';
3
3
  export type { ToolDefinition, ToolCallInfo, ToolResultInfo } from './types/tools.js';
4
- export type { FinishReason, UsageMetadata } from './types/usage.js';
4
+ export type { ContextUsage, FinishReason, UsageMetadata } from './types/usage.js';
5
5
  export type { AgentConfig, HarnessAgentConfig, StreamOptions, ToolApprovalMode } from './harness/harness-config.js';
6
6
  export { DEFAULT_MAX_STEPS, resolveToolApprovalMode } from './harness/harness-config.js';
7
7
  export type { MCPConfiguration, MCPServerConfig, MCPStdioServerConfig, MCPRemoteServerConfig, McpServerInfo, McpServerErrorCategory, McpServerErrorDetail, McpToolInfo, McpToolAnnotations, } from './mcp-config.js';
8
8
  export { McpServerStatus } from './mcp-config.js';
9
- export { ModelName } from '@salesforce/llm-gateway-sdk';
9
+ export { Model, ModelName, createClaudeModel } from '@salesforce/llm-gateway-sdk';
10
+ export type { ClaudeModelOverrides } from '@salesforce/llm-gateway-sdk';
10
11
  export { inferSfApiEnv, SfApiEnv } from '@salesforce/agentic-common';
11
12
  export { type AgentManager, type RestoreFailure, createAgentManager } from './agent-manager.js';
12
13
  export { type Agent } from './agent.js';
package/dist/index.js CHANGED
@@ -4,7 +4,7 @@
4
4
  */
5
5
  export { DEFAULT_MAX_STEPS, resolveToolApprovalMode } from './harness/harness-config.js';
6
6
  export { McpServerStatus } from './mcp-config.js';
7
- export { ModelName } from '@salesforce/llm-gateway-sdk';
7
+ export { Model, ModelName, createClaudeModel } from '@salesforce/llm-gateway-sdk';
8
8
  export { inferSfApiEnv, SfApiEnv } from '@salesforce/agentic-common';
9
9
  // ── Agent Layer ─────────────────────────────────────────────────────
10
10
  export { createAgentManager } from './agent-manager.js';
@@ -65,6 +65,32 @@ export type MCPRemoteServerConfig = {
65
65
  enabled?: boolean;
66
66
  /** Timeout in milliseconds for individual requests to the server. */
67
67
  timeout?: number;
68
+ /**
69
+ * Transport-level reconnection tuning for HTTP MCP servers. Forwarded to
70
+ * the underlying SDK transport (`@modelcontextprotocol/sdk`'s
71
+ * `StreamableHTTPClientTransport` on the Claude harness, and the
72
+ * equivalent plumb-through on `@mastra/mcp`'s `HttpServerDefinition`).
73
+ *
74
+ * Each field is optional. The harness mappers merge unspecified fields
75
+ * with the MCP SDK's built-in defaults (`maxRetries: 2`,
76
+ * `initialReconnectionDelay: 1000`, `maxReconnectionDelay: 30000`,
77
+ * `reconnectionDelayGrowFactor: 1.5`) so a partial override leaves the
78
+ * other fields at their defaults rather than zeroing them out — the
79
+ * underlying transport replaces the entire defaults object when
80
+ * `reconnectionOptions` is set.
81
+ *
82
+ * No-op for stdio servers — only `MCPRemoteServerConfig` carries it.
83
+ */
84
+ reconnectionOptions?: {
85
+ /** Maximum number of reconnection attempts before giving up. Default `2`. */
86
+ maxRetries?: number;
87
+ /** Initial backoff between reconnection attempts in milliseconds. Default `1000`. */
88
+ initialReconnectionDelay?: number;
89
+ /** Maximum backoff between reconnection attempts in milliseconds. Default `30000`. */
90
+ maxReconnectionDelay?: number;
91
+ /** Factor by which the reconnection delay grows after each attempt. Default `1.5`. */
92
+ reconnectionDelayGrowFactor?: number;
93
+ };
68
94
  /**
69
95
  * Opt the server's tool surface out of the active runtime's tool-search
70
96
  * deferral. See {@link MCPStdioServerConfig.alwaysLoad}.
@@ -8,7 +8,7 @@ import type { FinishReason, UsageMetadata } from './usage.js';
8
8
  * convention, with the addition of `tool-approval-request` for human-in-the-loop
9
9
  * tool approval flows.
10
10
  */
11
- export type ChatEvent = StartEvent | TextDeltaEvent | ReasoningDeltaEvent | ToolCallEvent | ToolApprovalRequestEvent | ToolResultEvent | StepStartEvent | StepFinishEvent | ErrorEvent | FinishEvent | UnmappedChunkEvent;
11
+ export type ChatEvent = StartEvent | TextDeltaEvent | ReasoningDeltaEvent | ToolCallEvent | ToolApprovalRequestEvent | ToolResultEvent | StepStartEvent | StepFinishEvent | ErrorEvent | FinishEvent;
12
12
  /**
13
13
  * The stream has begun. Symmetric counterpart to {@link FinishEvent}.
14
14
  *
@@ -155,19 +155,6 @@ export type ErrorEvent = {
155
155
  /** Machine-readable error code (e.g., `'insufficient-tokens'`). */
156
156
  code?: string;
157
157
  };
158
- /**
159
- * A stream chunk from the underlying harness that has no `ChatEvent` counterpart.
160
- *
161
- * Returned instead of silently discarding the chunk, so consumers can log or
162
- * monitor unhandled harness events for observability.
163
- */
164
- export type UnmappedChunkEvent = {
165
- type: 'unmapped-chunk';
166
- /** The original harness chunk type string (e.g., `'tool-call-suspended'`, `'raw'`). */
167
- chunkType: string;
168
- /** The raw chunk object, preserved for diagnostic logging. */
169
- rawChunk: unknown;
170
- };
171
158
  /** The entire stream has completed. */
172
159
  export type FinishEvent = {
173
160
  type: 'finish';
@@ -24,7 +24,18 @@ export type Message = {
24
24
  role: MessageRole;
25
25
  /** Message content — plain text or structured parts. */
26
26
  content: string | MessagePart[];
27
- /** Optional timestamp of when the message was created. */
27
+ /**
28
+ * Timestamp of when the message was created. **Always populated** on
29
+ * messages returned from `ChatSession.getMessageHistory()`. **Optional on
30
+ * write** — consumers constructing `Message` for `ChatSession.addContext()`
31
+ * may omit it; the SDK backfills the current time before forwarding to
32
+ * the harness, so the on-read contract still holds.
33
+ *
34
+ * The read-side guarantee lives on `AgentHarness.getMessages` — see its
35
+ * JSDoc for the contract every harness implementation upholds (populated
36
+ * `createdAt` on every returned message; array sorted ascending by
37
+ * `createdAt`). The SDK passes the harness's output through unchanged.
38
+ */
28
39
  createdAt?: Date;
29
40
  };
30
41
  /**
@@ -16,6 +16,71 @@ export type UsageMetadata = {
16
16
  /** Input tokens written to the provider cache during this interaction. */
17
17
  cacheWriteInputTokens?: number;
18
18
  };
19
+ /**
20
+ * Snapshot of how much of the model's context window the most recent
21
+ * turn used. Returned by {@link ChatSession.getContextUsage}.
22
+ *
23
+ * Consumers use this to decide when to call `compactThread()`, switch to a
24
+ * smaller model, or warn the user as the conversation approaches the
25
+ * model's context limit.
26
+ *
27
+ * `usage` carries the **last per-step** reading from the model —
28
+ * specifically the `usage` from the latest `step-finish` event whose `usage`
29
+ * was defined. This is the size of the prompt the model saw on its last
30
+ * invocation, which is the right "how full is my context" reading. This is
31
+ * **not** the per-turn billing aggregate (which sums steps and double-counts
32
+ * persistent context). For per-turn billing totals, subscribe to
33
+ * `chat-stream-completed` telemetry instead.
34
+ *
35
+ * Field shapes:
36
+ *
37
+ * - `usage` is always populated. Pre-first-turn (or post-`clearHistory()`)
38
+ * it is the empty object `{}` — i.e., a `UsageMetadata` whose token fields
39
+ * are all `undefined` — making "no reading yet" indistinguishable from
40
+ * "harness reported every field as undefined."
41
+ * - `contextWindow` is always populated, contractually. Every `Model`
42
+ * reachable via `Agent.llmGatewayClient.getModel()` must publish a
43
+ * `contextWindow`; see the `sfdx-agent-sdk` ARCHITECTURE.md Critical
44
+ * Invariant on this and issue #507.
45
+ * - `usedFraction` is `undefined` iff every input-bearing field on the
46
+ * latest reading (`inputTokens`, `cachedInputTokens`, `cacheWriteInputTokens`)
47
+ * is `undefined` — the only honest answer when we have no input-side
48
+ * reading to divide. The denominator-numerator sums all three because
49
+ * cached prompt tokens occupy real space in the context window (see the
50
+ * field-level doc on `usedFraction` for the Bedrock-Claude rationale).
51
+ * Consumers who want zero-on-empty UX can collapse with `usedFraction ?? 0`.
52
+ */
53
+ export type ContextUsage = {
54
+ /**
55
+ * Last per-step usage reading observed on this session. Pre-first-turn
56
+ * and immediately after `clearHistory()` this is `{}` (every token field
57
+ * undefined).
58
+ */
59
+ usage: UsageMetadata;
60
+ /**
61
+ * The model's total context-window size in tokens. Read live at call
62
+ * time from the agent's currently-bound `LLMGatewayClient`, so it stays
63
+ * correct across `Agent.updateAgentConfig()` model swaps.
64
+ */
65
+ contextWindow: number;
66
+ /**
67
+ * `(usage.inputTokens + usage.cachedInputTokens + usage.cacheWriteInputTokens) /
68
+ * contextWindow`, clamped to `[0, 1]`. The denominator-numerator includes
69
+ * cached prompt tokens because they are real tokens occupying the model's
70
+ * context window — Bedrock-Claude's `message_delta.usage` reports only the
71
+ * incremental `inputTokens` per delta, with the bulk of the prompt riding
72
+ * on `cachedInputTokens` / `cacheWriteInputTokens`. Counting only
73
+ * `inputTokens` would underreport "how full" by orders of magnitude on
74
+ * cache-hit paths. Mastra is unaffected because it does not populate the
75
+ * cache fields, so the sum collapses to `inputTokens` alone.
76
+ *
77
+ * `undefined` when ALL three input-bearing fields are missing on the
78
+ * latest reading (pre-first-turn, post-`clearHistory()`, or when a
79
+ * harness emits a reading without any input-side counts). Consumers
80
+ * wanting zero-on-empty: `usedFraction ?? 0`.
81
+ */
82
+ usedFraction: number | undefined;
83
+ };
19
84
  /**
20
85
  * Reason the model stopped generating.
21
86
  * Aligned with AI SDK V3's unified finish-reason set; harnesses normalize provider-specific
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@salesforce/sfdx-agent-sdk",
3
- "version": "0.14.0",
3
+ "version": "0.15.0",
4
4
  "description": "Harness-agnostic agentic infrastructure for Salesforce developer experience tooling",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -35,13 +35,13 @@
35
35
  "LICENSE.txt"
36
36
  ],
37
37
  "dependencies": {
38
- "@salesforce/agentic-common": "0.6.0",
39
- "@salesforce/llm-gateway-sdk": "0.10.0"
38
+ "@salesforce/agentic-common": "0.7.0",
39
+ "@salesforce/llm-gateway-sdk": "0.11.0"
40
40
  },
41
41
  "devDependencies": {
42
42
  "@eslint/js": "^10.0.1",
43
- "@salesforce/sfdx-agent-harness-claude": "0.10.0",
44
- "@salesforce/sfdx-agent-harness-mastra": "0.13.0",
43
+ "@salesforce/sfdx-agent-harness-claude": "0.11.0",
44
+ "@salesforce/sfdx-agent-harness-mastra": "0.14.0",
45
45
  "@types/node": "^22.19.17",
46
46
  "@vitest/coverage-istanbul": "^4.1.7",
47
47
  "@vitest/eslint-plugin": "^1.6.17",