@salesforce/sfdx-agent-sdk 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -29
- package/dist/agent-connectivity-resolver.d.ts +16 -1
- package/dist/agent-connectivity-resolver.js +54 -3
- package/dist/agent.d.ts +1 -1
- package/dist/agent.js +30 -14
- package/dist/chat-session.d.ts +66 -1
- package/dist/chat-session.js +86 -3
- package/dist/harness/agent-harness.d.ts +36 -6
- package/dist/harness/harness-config.d.ts +10 -3
- package/dist/index.d.ts +3 -2
- package/dist/index.js +1 -1
- package/dist/mcp-config.d.ts +26 -0
- package/dist/types/events.d.ts +1 -14
- package/dist/types/messages.d.ts +12 -1
- package/dist/types/telemetry-events.d.ts +6 -4
- package/dist/types/usage.d.ts +65 -0
- package/package.json +5 -5
package/README.md
CHANGED
|
@@ -152,21 +152,22 @@ keeps unparameterized call sites working.
|
|
|
152
152
|
|
|
153
153
|
A single conversation thread.
|
|
154
154
|
|
|
155
|
-
| Method | Signature | Description
|
|
156
|
-
| ------------------- | ------------------------------------------------------------------------------------- |
|
|
157
|
-
| `getId` | `() => string` | Session/thread identifier.
|
|
158
|
-
| `chat` | `(message: string, options?: ChatOptions) => Promise<ChatStreamResult>` | Send a message and stream the response.
|
|
159
|
-
| `submitToolResult` | `(toolResult: ToolResultInfo) => Promise<ChatStreamResult>` | Return a consumer-executed tool result and resume the stream.
|
|
160
|
-
| `approveToolCall` | `(toolCallId: string, options?: { remember?: boolean }) => Promise<ChatStreamResult>` | Approve a pending tool call.
|
|
161
|
-
| `declineToolCall` | `(toolCallId: string) => Promise<ChatStreamResult>` | Decline a pending tool call.
|
|
162
|
-
| `getMessageHistory` | `() => Promise<Message[]>` | Retrieve all messages in chronological order.
|
|
163
|
-
| `clearHistory` | `() => Promise<void>` | Delete all messages.
|
|
164
|
-
| `
|
|
165
|
-
| `
|
|
166
|
-
| `
|
|
167
|
-
| `
|
|
168
|
-
| `
|
|
169
|
-
| `
|
|
155
|
+
| Method | Signature | Description |
|
|
156
|
+
| ------------------- | ------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
|
|
157
|
+
| `getId` | `() => string` | Session/thread identifier. |
|
|
158
|
+
| `chat` | `(message: string, options?: ChatOptions) => Promise<ChatStreamResult>` | Send a message and stream the response. |
|
|
159
|
+
| `submitToolResult` | `(toolResult: ToolResultInfo) => Promise<ChatStreamResult>` | Return a consumer-executed tool result and resume the stream. |
|
|
160
|
+
| `approveToolCall` | `(toolCallId: string, options?: { remember?: boolean }) => Promise<ChatStreamResult>` | Approve a pending tool call. |
|
|
161
|
+
| `declineToolCall` | `(toolCallId: string) => Promise<ChatStreamResult>` | Decline a pending tool call. |
|
|
162
|
+
| `getMessageHistory` | `() => Promise<Message[]>` | Retrieve all messages in chronological order. |
|
|
163
|
+
| `clearHistory` | `() => Promise<void>` | Delete all messages. |
|
|
164
|
+
| `getContextUsage` | `() => ContextUsage` | Snapshot of how much of the model's context window the most recent turn used. |
|
|
165
|
+
| `addContext` | `(message: string \| Message[]) => Promise<void>` | Inject context without triggering an LLM response. |
|
|
166
|
+
| `subscribe` | `(callback: (event: ChatEvent) => void) => void` | Register a real-time event listener. |
|
|
167
|
+
| `unsubscribe` | `(callback: (event: ChatEvent) => void) => void` | Remove a listener. |
|
|
168
|
+
| `onTelemetry` | `(callback: TelemetryEventCallback) => Unsubscribe` | Subscribe to telemetry scoped to this session. |
|
|
169
|
+
| `onLog` | `(callback: (record: LogRecord) => void) => Unsubscribe` | Subscribe to logs scoped to this session. |
|
|
170
|
+
| `dispose` | `() => void` | Release session-level event resources. Idempotent. |
|
|
170
171
|
|
|
171
172
|
### `ChatStreamResult`
|
|
172
173
|
|
|
@@ -193,23 +194,28 @@ Discriminated union (`event.type`) of streaming events:
|
|
|
193
194
|
| `step-finish` | `stepIndex`, `finishReason`, `usage?` | Step completed with per-step token usage. |
|
|
194
195
|
| `error` | `error`, `code?` | Mid-stream error (yielded, not thrown). |
|
|
195
196
|
| `finish` | `finishReason`, `usage?` | Stream completed with aggregate token usage. |
|
|
196
|
-
|
|
197
|
+
|
|
198
|
+
> **Diagnostic logging.** The `ChatEvent` union is the harness-agnostic public stream — it never carries
|
|
199
|
+
> harness-internal chunk shapes. When a harness encounters a chunk type its adapter does not recognize (typically after
|
|
200
|
+
> an upstream Mastra / Claude SDK upgrade), the chunk is skipped on the public stream and surfaced via `LogBus.debug`
|
|
201
|
+
> with `chunkType` and `rawChunk` in the record's `context`. Subscribe via `manager.onLog` (or `agent.onLog` /
|
|
202
|
+
> `session.onLog`) at debug level to observe these. Production consumers do not need to filter for unrecognized chunks.
|
|
197
203
|
|
|
198
204
|
### Configuration Types
|
|
199
205
|
|
|
200
206
|
#### `AgentConfig`
|
|
201
207
|
|
|
202
|
-
| Field | Type
|
|
203
|
-
| --------------- |
|
|
204
|
-
| `orgAlias?` | `string`
|
|
205
|
-
| `modelId?` | `ModelName`
|
|
206
|
-
| `name?` | `string`
|
|
207
|
-
| `description?` | `string`
|
|
208
|
-
| `instructions?` | `string`
|
|
209
|
-
| `tools?` | `ToolDefinition[]`
|
|
210
|
-
| `mcpServers?` | `MCPConfiguration`
|
|
211
|
-
| `skills?` | `string[]`
|
|
212
|
-
| `rules?` | `string[]`
|
|
208
|
+
| Field | Type | Description |
|
|
209
|
+
| --------------- | -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
|
210
|
+
| `orgAlias?` | `string` | Salesforce org alias or username. Falls back to project/default org. |
|
|
211
|
+
| `modelId?` | `ModelName \| Model` | LLM model selector. Pass a `ModelName` enum value for an in-tree model (e.g. `'llmgateway__OpenAIGPT5'`), or a pre-built `Model` instance to opt into a Bedrock-Anthropic Claude variant the SDK has not yet released — see `createClaudeModel(gatewayId, overrides)` in `@salesforce/llm-gateway-sdk`. |
|
|
212
|
+
| `name?` | `string` | Human-readable agent name. |
|
|
213
|
+
| `description?` | `string` | Agent purpose description. |
|
|
214
|
+
| `instructions?` | `string` | System instructions for the agent. |
|
|
215
|
+
| `tools?` | `ToolDefinition[]` | Consumer-executed tool schemas. |
|
|
216
|
+
| `mcpServers?` | `MCPConfiguration` | MCP server connections. |
|
|
217
|
+
| `skills?` | `string[]` | Each entry is either an individual skill folder (containing `SKILL.md`) or a parent folder containing skill subfolders. Relative and absolute paths supported; forms can be mixed in the same array. |
|
|
218
|
+
| `rules?` | `string[]` | Each entry is either an individual `.md` rule file or a directory of `.md` rule files (scanned one level deep, alphabetical, non-`.md` skipped). Bodies are composed verbatim into the agent's effective system prompt; YAML frontmatter is optional and stripped if present. Matches Claude Code's `.claude/rules/*.md` convention. |
|
|
213
219
|
|
|
214
220
|
#### `StreamOptions`
|
|
215
221
|
|
|
@@ -241,6 +247,12 @@ type MCPRemoteServerConfig = {
|
|
|
241
247
|
headers?: Record<string, string>;
|
|
242
248
|
enabled?: boolean;
|
|
243
249
|
timeout?: number;
|
|
250
|
+
reconnectionOptions?: {
|
|
251
|
+
maxRetries?: number;
|
|
252
|
+
initialReconnectionDelay?: number;
|
|
253
|
+
maxReconnectionDelay?: number;
|
|
254
|
+
reconnectionDelayGrowFactor?: number;
|
|
255
|
+
};
|
|
244
256
|
alwaysLoad?: boolean;
|
|
245
257
|
};
|
|
246
258
|
```
|
|
@@ -252,6 +264,14 @@ surfaces (≤ a few tools the model needs to find without prompting). The Claude
|
|
|
252
264
|
`_meta['anthropic/alwaysLoad'] = true` on each forwarded tool (equivalent to `defer_loading: false` on the Claude API).
|
|
253
265
|
The Mastra harness eager-loads all MCP tools regardless, so the flag is a no-op there.
|
|
254
266
|
|
|
267
|
+
**`reconnectionOptions`** tunes the HTTP MCP transport's retry / backoff behavior. Forwarded to the underlying SDK
|
|
268
|
+
transport on both harnesses (Claude's `@modelcontextprotocol/sdk` `StreamableHTTPClientTransport` and Mastra's
|
|
269
|
+
`@mastra/mcp` `HttpServerDefinition`, which is itself typed off the same MCP SDK shape). Each field is optional;
|
|
270
|
+
unspecified fields fall back to the MCP SDK's built-in defaults — `maxRetries: 2`, `initialReconnectionDelay: 1000` ms,
|
|
271
|
+
`maxReconnectionDelay: 30000` ms, `reconnectionDelayGrowFactor: 1.5`. Partial overrides are merged with those defaults
|
|
272
|
+
at the harness boundary so a consumer setting only `maxRetries` doesn't zero out the others. No-op for stdio servers —
|
|
273
|
+
only `MCPRemoteServerConfig` carries it.
|
|
274
|
+
|
|
255
275
|
#### `McpServerInfo`
|
|
256
276
|
|
|
257
277
|
| Field | Type | Description |
|
|
@@ -375,6 +395,13 @@ type ImagePart = { type: 'image'; mimeType: 'image/png' | 'image/jpeg'; data: st
|
|
|
375
395
|
type FilePart = { type: 'file'; mimeType: 'application/pdf'; data: string; fileName?: string };
|
|
376
396
|
```
|
|
377
397
|
|
|
398
|
+
`createdAt` is **required-on-read, optional-on-write**:
|
|
399
|
+
|
|
400
|
+
- Messages returned from `ChatSession.getMessageHistory()` always have `createdAt` populated, and the array is sorted
|
|
401
|
+
ascending by `createdAt`. Consumer code can read `msg.createdAt` directly.
|
|
402
|
+
- Consumers constructing `Message` literals for `ChatSession.addContext()` may omit `createdAt`; the SDK backfills the
|
|
403
|
+
current time before forwarding to the harness. Pass an explicit value to override.
|
|
404
|
+
|
|
378
405
|
#### Multimodal input
|
|
379
406
|
|
|
380
407
|
`ChatSession.chat()` (and the harness `stream()` it delegates to) accept either a plain string or a `MessagePart[]`. Use
|
|
@@ -409,7 +436,8 @@ await session.chat([
|
|
|
409
436
|
},
|
|
410
437
|
]);
|
|
411
438
|
|
|
412
|
-
// Inject multimodal context before a chat turn
|
|
439
|
+
// Inject multimodal context before a chat turn. `createdAt` is omitted —
|
|
440
|
+
// the SDK backfills it before forwarding to the harness.
|
|
413
441
|
await session.addContext([
|
|
414
442
|
{
|
|
415
443
|
id: 'ctx-screenshot',
|
|
@@ -455,9 +483,52 @@ type UsageMetadata = {
|
|
|
455
483
|
cacheWriteInputTokens?: number;
|
|
456
484
|
};
|
|
457
485
|
|
|
486
|
+
type ContextUsage = {
|
|
487
|
+
/**
|
|
488
|
+
* Last per-step usage reading observed on this session. Pre-first-turn and
|
|
489
|
+
* immediately after `clearHistory()` this is `{}` (every token field undefined).
|
|
490
|
+
*/
|
|
491
|
+
usage: UsageMetadata;
|
|
492
|
+
/** The model's total context-window size in tokens. Always populated. */
|
|
493
|
+
contextWindow: number;
|
|
494
|
+
/**
|
|
495
|
+
* `(usage.inputTokens + usage.cachedInputTokens + usage.cacheWriteInputTokens) / contextWindow`,
|
|
496
|
+
* clamped to [0, 1]. Cached prompt tokens are summed in because they occupy the
|
|
497
|
+
* model's context window — on Bedrock-Claude, the bulk of the prompt is reported
|
|
498
|
+
* via `cachedInputTokens` / `cacheWriteInputTokens`, not `inputTokens`. `undefined`
|
|
499
|
+
* when ALL three input-bearing fields are missing.
|
|
500
|
+
*/
|
|
501
|
+
usedFraction: number | undefined;
|
|
502
|
+
};
|
|
503
|
+
|
|
458
504
|
type FinishReason = 'stop' | 'length' | 'tool-calls' | 'content-filter' | 'error' | 'other';
|
|
459
505
|
```
|
|
460
506
|
|
|
507
|
+
**Tracking context-window utilization.** `ChatSession.getContextUsage()` always returns a populated `ContextUsage` —
|
|
508
|
+
even pre-first-turn, where `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always available.
|
|
509
|
+
Use it to decide when to compact a thread:
|
|
510
|
+
|
|
511
|
+
```typescript
|
|
512
|
+
const ctx = session.getContextUsage();
|
|
513
|
+
if (ctx.usedFraction !== undefined && ctx.usedFraction > 0.8) {
|
|
514
|
+
await agent.compactChatSession(session.getId());
|
|
515
|
+
}
|
|
516
|
+
```
|
|
517
|
+
|
|
518
|
+
Render a context-usage indicator that distinguishes "no reading yet" from a real measurement:
|
|
519
|
+
|
|
520
|
+
```typescript
|
|
521
|
+
const ctx = session.getContextUsage();
|
|
522
|
+
const limit = ctx.contextWindow.toLocaleString(); // always available
|
|
523
|
+
const used = ctx.usage.inputTokens?.toLocaleString() ?? '—';
|
|
524
|
+
const pct = ctx.usedFraction !== undefined ? `${Math.round(ctx.usedFraction * 100)}%` : '—';
|
|
525
|
+
return `${used} / ${limit} tokens (${pct})`;
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
The snapshot uses **last-step** semantics, not the per-turn billing aggregate — `finish.usage` sums all steps in a turn
|
|
529
|
+
and double-counts persistent context, which is the wrong denominator for "how full is my context." For per-turn billing
|
|
530
|
+
totals, subscribe to `chat-stream-completed` telemetry instead.
|
|
531
|
+
|
|
461
532
|
### Error Handling
|
|
462
533
|
|
|
463
534
|
The SDK throws `AgentSDKError` for predictable not-found and compatibility conditions. Each error has a `type` property
|
|
@@ -669,7 +740,11 @@ Returns `true` if the URL matches a Salesforce Hosted MCP Server endpoint (prod,
|
|
|
669
740
|
|
|
670
741
|
### Re-exported from `@salesforce/llm-gateway-sdk`
|
|
671
742
|
|
|
672
|
-
- `
|
|
743
|
+
- `Model` — abstract base class. Returned by `Models.getByName(...)` and accepted as an `AgentConfig.modelId` value.
|
|
744
|
+
- `ModelName` — enum of in-tree model identifiers.
|
|
745
|
+
- `createClaudeModel(gatewayId, overrides?)` — escape-hatch factory for opting into a Bedrock-Anthropic Claude variant
|
|
746
|
+
the SDK has not released yet (`AgentConfig.modelId` accepts the returned instance directly).
|
|
747
|
+
- `ClaudeModelOverrides` — optional caps for `createClaudeModel`.
|
|
673
748
|
- `SfApiEnv` — Salesforce API environment enum (`dev`, `perf`, `prod`, `stage`, `test`)
|
|
674
749
|
- `inferSfApiEnv(instanceUrl, options?)` — maps an instance URL to a `SfApiEnv`. Re-exported from
|
|
675
750
|
`@salesforce/agentic-common` for consumers that need the mapping without an `OrgConnection` (e.g. building a
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type JSONWebToken, type LLMGatewayClient, type LLMGatewayClientFactory } from '@salesforce/llm-gateway-sdk';
|
|
1
|
+
import { Model, type JSONWebToken, type LLMGatewayClient, type LLMGatewayClientFactory } from '@salesforce/llm-gateway-sdk';
|
|
2
2
|
import { type OrgConnection, type OrgConnectionFactory } from '@salesforce/agentic-common';
|
|
3
3
|
import type { AgentConfig } from './harness/harness-config.js';
|
|
4
4
|
/**
|
|
@@ -60,3 +60,18 @@ export declare class DefaultAgentConnectivityResolver implements AgentConnectivi
|
|
|
60
60
|
*/
|
|
61
61
|
resolve(projectRoot: string, config: AgentConfig): Promise<ResolvedConnectivity>;
|
|
62
62
|
}
|
|
63
|
+
/**
|
|
64
|
+
* Resolves an `AgentConfig.modelId` value (which may be a {@link ModelName} enum value, a
|
|
65
|
+
* pre-built {@link Model} instance, or `undefined`) to a concrete {@link Model}.
|
|
66
|
+
*
|
|
67
|
+
* The enum branch goes through the strict {@link Models.getByName} registry; the live
|
|
68
|
+
* instance branch passes the consumer-built model through unchanged. A persisted-and-restored
|
|
69
|
+
* `Model` instance arrives here as a plain object (the JSON round-trip drops its prototype),
|
|
70
|
+
* and is rehydrated via {@link createClaudeModel} for Bedrock-Anthropic Claude variants — the
|
|
71
|
+
* single use case the consumer-built escape hatch was added for. Any other persisted shape is
|
|
72
|
+
* a programming error and throws.
|
|
73
|
+
*
|
|
74
|
+
* Exported for use by `Agent.updateAgentConfig`, which performs the same resolution when
|
|
75
|
+
* comparing previous and next models without re-running the full connectivity resolver.
|
|
76
|
+
*/
|
|
77
|
+
export declare function resolveAgentConfigModel(modelId: AgentConfig['modelId']): Model;
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Copyright 2026, Salesforce, Inc. All rights reserved.
|
|
3
3
|
* See LICENSE.txt for license terms.
|
|
4
4
|
*/
|
|
5
|
-
import { DefaultLLMGatewayClientFactory, Models, createJWTFromConnection, } from '@salesforce/llm-gateway-sdk';
|
|
5
|
+
import { DefaultLLMGatewayClientFactory, Model, ModelName, Models, createClaudeModel, createJWTFromConnection, } from '@salesforce/llm-gateway-sdk';
|
|
6
6
|
import { SfApiEnv, RealOrgConnectionFactory, } from '@salesforce/agentic-common';
|
|
7
7
|
// TODO(@W-22782317): Temporary workaround — only on prod orgs the LLM Gateway must
|
|
8
8
|
// route requests through AgentforceVibes rather than the default VibesService. Remove once a
|
|
@@ -46,9 +46,60 @@ export class DefaultAgentConnectivityResolver {
|
|
|
46
46
|
const featureId = env === SfApiEnv.Prod ? PROD_ORG_FEATURE_ID : undefined;
|
|
47
47
|
const orgJwt = await createJWTFromConnection(orgConnection, { featureId });
|
|
48
48
|
const llmGatewayClient = this.gatewayClientFactory.create(orgJwt, { env });
|
|
49
|
-
|
|
50
|
-
llmGatewayClient.setModel(Models.getByName(modelName));
|
|
49
|
+
llmGatewayClient.setModel(resolveAgentConfigModel(config.modelId));
|
|
51
50
|
return { llmGatewayClient, orgConnection, orgJwt };
|
|
52
51
|
}
|
|
53
52
|
}
|
|
53
|
+
/**
|
|
54
|
+
* Resolves an `AgentConfig.modelId` value (which may be a {@link ModelName} enum value, a
|
|
55
|
+
* pre-built {@link Model} instance, or `undefined`) to a concrete {@link Model}.
|
|
56
|
+
*
|
|
57
|
+
* The enum branch goes through the strict {@link Models.getByName} registry; the live
|
|
58
|
+
* instance branch passes the consumer-built model through unchanged. A persisted-and-restored
|
|
59
|
+
* `Model` instance arrives here as a plain object (the JSON round-trip drops its prototype),
|
|
60
|
+
* and is rehydrated via {@link createClaudeModel} for Bedrock-Anthropic Claude variants — the
|
|
61
|
+
* single use case the consumer-built escape hatch was added for. Any other persisted shape is
|
|
62
|
+
* a programming error and throws.
|
|
63
|
+
*
|
|
64
|
+
* Exported for use by `Agent.updateAgentConfig`, which performs the same resolution when
|
|
65
|
+
* comparing previous and next models without re-running the full connectivity resolver.
|
|
66
|
+
*/
|
|
67
|
+
export function resolveAgentConfigModel(modelId) {
|
|
68
|
+
if (modelId === undefined)
|
|
69
|
+
return Models.getDefault();
|
|
70
|
+
// Known limitation: `instanceof Model` is realm-scoped — a consumer that ends up with two copies
|
|
71
|
+
// of `@salesforce/llm-gateway-sdk` resolved in their dependency tree will have their `Model`
|
|
72
|
+
// instance fail this check and fall through to `rehydratePersistedModel`. That branch handles
|
|
73
|
+
// it correctly for Claude variants but throws for anything else. The duplicate-package case is
|
|
74
|
+
// a packaging bug at the consumer; we don't paper over it here.
|
|
75
|
+
if (modelId instanceof Model)
|
|
76
|
+
return modelId;
|
|
77
|
+
if (typeof modelId === 'string')
|
|
78
|
+
return Models.getByName(modelId);
|
|
79
|
+
return rehydratePersistedModel(modelId);
|
|
80
|
+
}
|
|
81
|
+
function rehydratePersistedModel(persisted) {
|
|
82
|
+
const obj = persisted;
|
|
83
|
+
if (typeof obj.name !== 'string') {
|
|
84
|
+
throw new Error(`Cannot resolve modelId: missing string "name" on persisted object.`);
|
|
85
|
+
}
|
|
86
|
+
// If the persisted name matches an in-tree model, prefer the strict registry — the
|
|
87
|
+
// returned instance has the correct prototype and the canonical caps.
|
|
88
|
+
if (Object.values(ModelName).includes(obj.name)) {
|
|
89
|
+
return Models.getByName(obj.name);
|
|
90
|
+
}
|
|
91
|
+
if (!obj.name.startsWith('llmgateway__BedrockAnthropic')) {
|
|
92
|
+
throw new Error(`Cannot rehydrate persisted model "${obj.name}". Only Bedrock-Anthropic Claude variants are supported via the consumer-built Model escape hatch.`);
|
|
93
|
+
}
|
|
94
|
+
return createClaudeModel(obj.name, {
|
|
95
|
+
displayId: obj.displayId,
|
|
96
|
+
maxInputTokens: obj.maxInputTokens,
|
|
97
|
+
maxOutputTokens: obj.maxOutputTokens,
|
|
98
|
+
contextWindow: obj.contextWindow,
|
|
99
|
+
supportsPromptCache: obj.supportsPromptCache,
|
|
100
|
+
supportedFormats: obj.supportedFormats,
|
|
101
|
+
permittedParameters: obj.permittedParameters,
|
|
102
|
+
customHeaders: obj.customHeaders,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
54
105
|
//# sourceMappingURL=agent-connectivity-resolver.js.map
|
package/dist/agent.d.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { type AgentConfig } from './harness/harness-config.js';
|
|
|
4
4
|
import { type ChatSession } from './chat-session.js';
|
|
5
5
|
import type { McpServerInfo } from './mcp-config.js';
|
|
6
6
|
import { type JSONWebToken, type LLMGatewayClient } from '@salesforce/llm-gateway-sdk';
|
|
7
|
-
import type
|
|
7
|
+
import { type AgentConnectivityResolver } from './agent-connectivity-resolver.js';
|
|
8
8
|
import type { AgentIdentityStore } from './internal/agent-identity-store.js';
|
|
9
9
|
import type { TelemetryRouter, TelemetrySlice } from './internal/telemetry-router.js';
|
|
10
10
|
import type { TelemetryBus, TelemetryEventCallback } from './types/telemetry-events.js';
|
package/dist/agent.js
CHANGED
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
import { EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
|
|
6
6
|
import { toHarnessConfig } from './harness/harness-config.js';
|
|
7
7
|
import { DefaultChatSession } from './chat-session.js';
|
|
8
|
-
import {
|
|
8
|
+
import {} from '@salesforce/llm-gateway-sdk';
|
|
9
|
+
import { resolveAgentConfigModel } from './agent-connectivity-resolver.js';
|
|
9
10
|
import { AgentSDKError, AgentSDKErrorType } from './errors.js';
|
|
10
11
|
/**
|
|
11
12
|
* Default implementation of {@link Agent} that delegates
|
|
@@ -112,8 +113,8 @@ export class DefaultAgent {
|
|
|
112
113
|
const previousOrgJwt = this.orgJwt;
|
|
113
114
|
const nextConfig = { ...this.config, ...config };
|
|
114
115
|
const orgAliasRequested = Object.prototype.hasOwnProperty.call(config, 'orgAlias');
|
|
115
|
-
const
|
|
116
|
-
const
|
|
116
|
+
const previousModel = previousClient.getModel();
|
|
117
|
+
const nextModel = resolveAgentConfigModel(nextConfig.modelId);
|
|
117
118
|
let nextClient = previousClient;
|
|
118
119
|
let nextConnection = this.orgConnection;
|
|
119
120
|
let nextOrgJwt = this.orgJwt;
|
|
@@ -123,14 +124,16 @@ export class DefaultAgent {
|
|
|
123
124
|
nextConnection = runtime.orgConnection;
|
|
124
125
|
nextOrgJwt = runtime.orgJwt;
|
|
125
126
|
}
|
|
126
|
-
else if (
|
|
127
|
+
else if (nextModel.name !== previousModel.name) {
|
|
127
128
|
// Keep the same authenticated client, but pin the updated model.
|
|
128
129
|
// (If modelId is omitted, the resolver pinned the default at creation time.)
|
|
129
|
-
nextClient.setModel(
|
|
130
|
+
nextClient.setModel(nextModel);
|
|
130
131
|
}
|
|
131
132
|
await this.harness.destroyAgent(this.agentId);
|
|
133
|
+
let nextConfigRegistered = false;
|
|
132
134
|
try {
|
|
133
135
|
await this.harness.createAgent(this.agentId, this.projectRoot, nextClient, toHarnessConfig(nextConfig, nextOrgJwt), options);
|
|
136
|
+
nextConfigRegistered = true;
|
|
134
137
|
// Persist before the in-memory swaps so a write failure flows through the same
|
|
135
138
|
// catch block as a recreate failure: the rollback restores the harness with
|
|
136
139
|
// previousConfig and disk state remains the pre-update record.
|
|
@@ -148,16 +151,21 @@ export class DefaultAgent {
|
|
|
148
151
|
catch (error) {
|
|
149
152
|
// Best-effort restoration to keep wrapper and harness state aligned.
|
|
150
153
|
try {
|
|
151
|
-
// Restore client model if we mutated it in-place.
|
|
154
|
+
// Restore client model if we mutated it in-place. We re-pin the live previousModel
|
|
155
|
+
// instance (captured above as previousClient.getModel()) rather than re-resolving from
|
|
156
|
+
// this.config.modelId, because a JSON-rehydrated config may have a plain object there
|
|
157
|
+
// that would round-trip through createClaudeModel and lose the original prototype.
|
|
152
158
|
if (nextClient === previousClient) {
|
|
153
|
-
previousClient.setModel(
|
|
159
|
+
previousClient.setModel(previousModel);
|
|
160
|
+
}
|
|
161
|
+
// Clear nextConfig registration only when the harness recreate
|
|
162
|
+
// actually succeeded (identityStore.write-failure path) — the
|
|
163
|
+
// harness throws on unknown id, so calling destroyAgent on the
|
|
164
|
+
// harness-recreate-failure path would short-circuit the rollback
|
|
165
|
+
// createAgent below.
|
|
166
|
+
if (nextConfigRegistered) {
|
|
167
|
+
await this.harness.destroyAgent(this.agentId);
|
|
154
168
|
}
|
|
155
|
-
// Clear any nextConfig registration left behind by a successful harness recreate
|
|
156
|
-
// before the rollback createAgent runs. On the harness-recreate-failure path this
|
|
157
|
-
// is a no-op (the agent was never registered with nextConfig); on the
|
|
158
|
-
// identityStore.write-failure path it removes the live nextConfig so the rollback
|
|
159
|
-
// doesn't trip the harness's duplicate-registration guard.
|
|
160
|
-
await this.harness.destroyAgent(this.agentId);
|
|
161
169
|
await this.harness.createAgent(this.agentId, this.projectRoot, previousClient, toHarnessConfig(previousConfig, previousOrgJwt));
|
|
162
170
|
}
|
|
163
171
|
catch {
|
|
@@ -319,10 +327,18 @@ export class DefaultAgent {
|
|
|
319
327
|
}
|
|
320
328
|
attachSession(threadId) {
|
|
321
329
|
const slice = this.router.registerSession(threadId);
|
|
330
|
+
// Live getter — read at call time so getContextUsage() reflects the
|
|
331
|
+
// model bound to the agent right now, not the model that was bound
|
|
332
|
+
// when this session was created. updateAgentConfig() can swap the
|
|
333
|
+
// underlying LLMGatewayClient mid-life. Per the SDK's Critical
|
|
334
|
+
// Invariant on context-window reachability, every bound model
|
|
335
|
+
// exposes a usable `contextWindow`; #507's decoupling work must
|
|
336
|
+
// preserve that, so this access is contractually safe.
|
|
337
|
+
const getContextWindow = () => this.llmGatewayClient.getModel().contextWindow;
|
|
322
338
|
const session = new DefaultChatSession(this.harness, this.agentId, threadId, slice, {
|
|
323
339
|
telemetry: this.telemetryBus,
|
|
324
340
|
log: this.logBus,
|
|
325
|
-
}, this.clock, this.idGenerator);
|
|
341
|
+
}, getContextWindow, this.clock, this.idGenerator);
|
|
326
342
|
this.sessions.set(threadId, session);
|
|
327
343
|
this.sessionSliceUnregisters.set(threadId, () => this.router.unregisterSession(threadId));
|
|
328
344
|
this.telemetryBus.emit({
|
package/dist/chat-session.d.ts
CHANGED
|
@@ -6,6 +6,7 @@ import type { ChatEvent, ChatStreamResult } from './types/events.js';
|
|
|
6
6
|
import type { Message, MessagePart } from './types/messages.js';
|
|
7
7
|
import type { TelemetryBus, TelemetryEventCallback } from './types/telemetry-events.js';
|
|
8
8
|
import type { ToolResultInfo } from './types/tools.js';
|
|
9
|
+
import type { ContextUsage } from './types/usage.js';
|
|
9
10
|
/**
|
|
10
11
|
* Options for a single chat interaction.
|
|
11
12
|
*/
|
|
@@ -123,6 +124,25 @@ export interface ChatSession {
|
|
|
123
124
|
getMessageHistory(): Promise<Message[]>;
|
|
124
125
|
/** Delete all messages in this session's history. */
|
|
125
126
|
clearHistory(): Promise<void>;
|
|
127
|
+
/**
|
|
128
|
+
* Snapshot of how much of the model's context window the most recent
|
|
129
|
+
* turn used. Always returns a `ContextUsage` — pre-first-turn and
|
|
130
|
+
* immediately after `clearHistory()`, `usage` is `{}` and `usedFraction`
|
|
131
|
+
* is `undefined`, but `contextWindow` is always populated from the
|
|
132
|
+
* agent's currently-bound model.
|
|
133
|
+
*
|
|
134
|
+
* `usage` carries the **last per-step** reading from the model — the
|
|
135
|
+
* size of the prompt the model saw on its most recent invocation,
|
|
136
|
+
* which is the right "how full is my context" answer for deciding
|
|
137
|
+
* when to call `compactThread()`. This is **not** the per-turn billing
|
|
138
|
+
* aggregate; consumers who want billing totals should subscribe to
|
|
139
|
+
* `chat-stream-completed` telemetry.
|
|
140
|
+
*
|
|
141
|
+
* The `contextWindow` is read live from the agent's currently-bound
|
|
142
|
+
* model, so it reflects any `Agent.updateAgentConfig()` model swap
|
|
143
|
+
* that happened between turns.
|
|
144
|
+
*/
|
|
145
|
+
getContextUsage(): ContextUsage;
|
|
126
146
|
/**
|
|
127
147
|
* Inject context messages into the thread without triggering an LLM response.
|
|
128
148
|
* Useful for seeding file contents, system instructions, or prior conversation
|
|
@@ -176,6 +196,23 @@ export declare class DefaultChatSession implements ChatSession {
|
|
|
176
196
|
* are stale and should not bleed into the next turn).
|
|
177
197
|
*/
|
|
178
198
|
private readonly toolStartMs;
|
|
199
|
+
/**
|
|
200
|
+
* Live getter for the agent's currently-bound model's context window.
|
|
201
|
+
* Called by {@link getContextUsage} so reads reflect the model in
|
|
202
|
+
* effect right now, not the model bound when this session was created
|
|
203
|
+
* (an `Agent.updateAgentConfig()` swap can change it mid-life).
|
|
204
|
+
*/
|
|
205
|
+
private readonly getContextWindow;
|
|
206
|
+
/**
|
|
207
|
+
* Last per-step usage reading observed on this session. Initialized
|
|
208
|
+
* to `{}` (every token field undefined) so {@link getContextUsage}
|
|
209
|
+
* can always return a populated `ContextUsage`. Updated on every
|
|
210
|
+
* `step-finish` ChatEvent whose `usage` is defined; an undefined
|
|
211
|
+
* usage is carried forward (defense against rare gateway-side gaps —
|
|
212
|
+
* see W-22692131). Reset to `{}` on `clearHistory()` so a fresh
|
|
213
|
+
* thread starts unprimed.
|
|
214
|
+
*/
|
|
215
|
+
private latestUsage;
|
|
179
216
|
private disposed;
|
|
180
217
|
/**
|
|
181
218
|
* @param harness - The agent harness managing thread and message lifecycle.
|
|
@@ -183,10 +220,12 @@ export declare class DefaultChatSession implements ChatSession {
|
|
|
183
220
|
* @param threadId - ID of the conversation thread backing this session.
|
|
184
221
|
* @param inbound - Router slice delivering harness events routed to this session.
|
|
185
222
|
* @param parent - Parent agent's buses; this session forwards its events upward into them.
|
|
223
|
+
* @param getContextWindow - Live getter for the agent's currently-bound model's `contextWindow`.
|
|
224
|
+
* Called by `getContextUsage()` so reads stay correct across `Agent.updateAgentConfig()` model swaps.
|
|
186
225
|
* @param clock - Source of monotonic timestamps for telemetry events. Defaults to `RealClock`.
|
|
187
226
|
* @param idGenerator - Source of message ids for `addContext()`. Defaults to `UUIDGenerator`.
|
|
188
227
|
*/
|
|
189
|
-
constructor(harness: AgentHarness, agentId: string, threadId: string, inbound: TelemetrySlice, parent: ChatSessionParentBuses, clock?: Clock, idGenerator?: UniqueIDGenerator);
|
|
228
|
+
constructor(harness: AgentHarness, agentId: string, threadId: string, inbound: TelemetrySlice, parent: ChatSessionParentBuses, getContextWindow: () => number, clock?: Clock, idGenerator?: UniqueIDGenerator);
|
|
190
229
|
getId(): string;
|
|
191
230
|
/**
|
|
192
231
|
* @requirements
|
|
@@ -273,8 +312,34 @@ export declare class DefaultChatSession implements ChatSession {
|
|
|
273
312
|
/**
|
|
274
313
|
* @requirements
|
|
275
314
|
* - MUST delegate to `this.harness.clearMessages()`, passing `this.agentId` and `this.threadId`.
|
|
315
|
+
* - MUST reset `latestUsage` to `{}` so the next `getContextUsage()` reports a fresh
|
|
316
|
+
* "no reading yet" snapshot until the next turn produces one.
|
|
276
317
|
*/
|
|
277
318
|
clearHistory(): Promise<void>;
|
|
319
|
+
/**
|
|
320
|
+
* @requirements
|
|
321
|
+
* - MUST always return a populated `ContextUsage`. Pre-first-turn and post-`clearHistory()`,
|
|
322
|
+
* `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always
|
|
323
|
+
* populated from the agent's currently-bound model.
|
|
324
|
+
* - MUST read `contextWindow` via the constructor-injected `getContextWindow` getter
|
|
325
|
+
* so swaps via `Agent.updateAgentConfig()` are reflected on the next call. Per the
|
|
326
|
+
* SDK's Critical Invariant on context-window reachability, every bound model exposes
|
|
327
|
+
* a usable `contextWindow`; the getter does not need a defensive try/catch.
|
|
328
|
+
* - MUST compute `usedFraction = (inputTokens + cachedInputTokens + cacheWriteInputTokens) /
|
|
329
|
+
* contextWindow`, clamped to `[0, 1]`. The denominator-numerator must include cached
|
|
330
|
+
* tokens because Bedrock-Claude's `message_delta.usage` reports only the *incremental*
|
|
331
|
+
* `input_tokens` per delta — the bulk of the prompt rides on `cache_read_input_tokens`
|
|
332
|
+
* / `cache_creation_input_tokens` which the Claude adapter surfaces as
|
|
333
|
+
* `cachedInputTokens` / `cacheWriteInputTokens`. Those are real tokens the model
|
|
334
|
+
* actually loaded into its context window (Bedrock charges for them and counts them
|
|
335
|
+
* against the window), so they belong in the "how full" denominator. Mastra is
|
|
336
|
+
* unaffected — it doesn't populate the cache fields, so the sum collapses to
|
|
337
|
+
* `inputTokens` alone.
|
|
338
|
+
* - MUST treat `usedFraction` as `undefined` when ALL three input-bearing fields are
|
|
339
|
+
* undefined — pre-first-turn, post-`clearHistory()`, or a harness reading with no
|
|
340
|
+
* input-side counts at all.
|
|
341
|
+
*/
|
|
342
|
+
getContextUsage(): ContextUsage;
|
|
278
343
|
/**
|
|
279
344
|
* @requirements
|
|
280
345
|
* - IF `message` is a `string`, it MUST be formatted into a standard `Message` object array containing exactly one message.
|
package/dist/chat-session.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Copyright 2026, Salesforce, Inc. All rights reserved.
|
|
3
3
|
* See LICENSE.txt for license terms.
|
|
4
4
|
*/
|
|
5
|
-
import { EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
|
|
5
|
+
import { backfillCreatedAt, EventBus, LogBus, RealClock, UUIDGenerator, } from '@salesforce/agentic-common';
|
|
6
6
|
import { AgentSDKError, AgentSDKErrorType } from './errors.js';
|
|
7
7
|
/**
|
|
8
8
|
* Default implementation of {@link ChatSession} that delegates all operations
|
|
@@ -31,6 +31,23 @@ export class DefaultChatSession {
|
|
|
31
31
|
* are stale and should not bleed into the next turn).
|
|
32
32
|
*/
|
|
33
33
|
toolStartMs = new Map();
|
|
34
|
+
/**
|
|
35
|
+
* Live getter for the agent's currently-bound model's context window.
|
|
36
|
+
* Called by {@link getContextUsage} so reads reflect the model in
|
|
37
|
+
* effect right now, not the model bound when this session was created
|
|
38
|
+
* (an `Agent.updateAgentConfig()` swap can change it mid-life).
|
|
39
|
+
*/
|
|
40
|
+
getContextWindow;
|
|
41
|
+
/**
|
|
42
|
+
* Last per-step usage reading observed on this session. Initialized
|
|
43
|
+
* to `{}` (every token field undefined) so {@link getContextUsage}
|
|
44
|
+
* can always return a populated `ContextUsage`. Updated on every
|
|
45
|
+
* `step-finish` ChatEvent whose `usage` is defined; an undefined
|
|
46
|
+
* usage is carried forward (defense against rare gateway-side gaps —
|
|
47
|
+
* see W-22692131). Reset to `{}` on `clearHistory()` so a fresh
|
|
48
|
+
* thread starts unprimed.
|
|
49
|
+
*/
|
|
50
|
+
latestUsage = {};
|
|
34
51
|
disposed = false;
|
|
35
52
|
/**
|
|
36
53
|
* @param harness - The agent harness managing thread and message lifecycle.
|
|
@@ -38,13 +55,16 @@ export class DefaultChatSession {
|
|
|
38
55
|
* @param threadId - ID of the conversation thread backing this session.
|
|
39
56
|
* @param inbound - Router slice delivering harness events routed to this session.
|
|
40
57
|
* @param parent - Parent agent's buses; this session forwards its events upward into them.
|
|
58
|
+
* @param getContextWindow - Live getter for the agent's currently-bound model's `contextWindow`.
|
|
59
|
+
* Called by `getContextUsage()` so reads stay correct across `Agent.updateAgentConfig()` model swaps.
|
|
41
60
|
* @param clock - Source of monotonic timestamps for telemetry events. Defaults to `RealClock`.
|
|
42
61
|
* @param idGenerator - Source of message ids for `addContext()`. Defaults to `UUIDGenerator`.
|
|
43
62
|
*/
|
|
44
|
-
constructor(harness, agentId, threadId, inbound, parent, clock = new RealClock(), idGenerator = new UUIDGenerator()) {
|
|
63
|
+
constructor(harness, agentId, threadId, inbound, parent, getContextWindow, clock = new RealClock(), idGenerator = new UUIDGenerator()) {
|
|
45
64
|
this.harness = harness;
|
|
46
65
|
this.agentId = agentId;
|
|
47
66
|
this.threadId = threadId;
|
|
67
|
+
this.getContextWindow = getContextWindow;
|
|
48
68
|
this.clock = clock;
|
|
49
69
|
this.idGenerator = idGenerator;
|
|
50
70
|
this.inboundUnsubs = [inbound.telemetry.forwardTo(this.telemetryBus), inbound.log.forwardTo(this.logBus)];
|
|
@@ -138,6 +158,18 @@ export class DefaultChatSession {
|
|
|
138
158
|
this.chatEventBus.emit(event);
|
|
139
159
|
this.deriveToolTelemetry(event);
|
|
140
160
|
yield event;
|
|
161
|
+
if (event.type === 'step-finish' && event.usage !== undefined) {
|
|
162
|
+
// Snapshot the most recent per-step usage. Last-step semantics
|
|
163
|
+
// (not the per-turn `finish.usage` aggregate) — `finish.usage`
|
|
164
|
+
// sums every step inside the turn and double-counts persistent
|
|
165
|
+
// context, which is the wrong denominator for "how full is my
|
|
166
|
+
// context". An undefined usage on this step is intentionally
|
|
167
|
+
// ignored so the prior reading is carried forward — gateway-side
|
|
168
|
+
// gaps are rare but real (W-22692131) and clobbering with
|
|
169
|
+
// undefined would surface as a transient hole consumers can't
|
|
170
|
+
// distinguish from a fresh session.
|
|
171
|
+
this.latestUsage = event.usage;
|
|
172
|
+
}
|
|
141
173
|
if (event.type === 'finish') {
|
|
142
174
|
sawFinish = true;
|
|
143
175
|
finishUsage = event.usage;
|
|
@@ -267,10 +299,53 @@ export class DefaultChatSession {
|
|
|
267
299
|
/**
|
|
268
300
|
* @requirements
|
|
269
301
|
* - MUST delegate to `this.harness.clearMessages()`, passing `this.agentId` and `this.threadId`.
|
|
302
|
+
* - MUST reset `latestUsage` to `{}` so the next `getContextUsage()` reports a fresh
|
|
303
|
+
* "no reading yet" snapshot until the next turn produces one.
|
|
270
304
|
*/
|
|
271
305
|
async clearHistory() {
|
|
272
306
|
this.assertNotDisposed();
|
|
273
307
|
await this.harness.clearMessages(this.agentId, this.threadId);
|
|
308
|
+
this.latestUsage = {};
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* @requirements
|
|
312
|
+
* - MUST always return a populated `ContextUsage`. Pre-first-turn and post-`clearHistory()`,
|
|
313
|
+
* `usage` is `{}` and `usedFraction` is `undefined`, but `contextWindow` is always
|
|
314
|
+
* populated from the agent's currently-bound model.
|
|
315
|
+
* - MUST read `contextWindow` via the constructor-injected `getContextWindow` getter
|
|
316
|
+
* so swaps via `Agent.updateAgentConfig()` are reflected on the next call. Per the
|
|
317
|
+
* SDK's Critical Invariant on context-window reachability, every bound model exposes
|
|
318
|
+
* a usable `contextWindow`; the getter does not need a defensive try/catch.
|
|
319
|
+
* - MUST compute `usedFraction = (inputTokens + cachedInputTokens + cacheWriteInputTokens) /
|
|
320
|
+
* contextWindow`, clamped to `[0, 1]`. The denominator-numerator must include cached
|
|
321
|
+
* tokens because Bedrock-Claude's `message_delta.usage` reports only the *incremental*
|
|
322
|
+
* `input_tokens` per delta — the bulk of the prompt rides on `cache_read_input_tokens`
|
|
323
|
+
* / `cache_creation_input_tokens` which the Claude adapter surfaces as
|
|
324
|
+
* `cachedInputTokens` / `cacheWriteInputTokens`. Those are real tokens the model
|
|
325
|
+
* actually loaded into its context window (Bedrock charges for them and counts them
|
|
326
|
+
* against the window), so they belong in the "how full" denominator. Mastra is
|
|
327
|
+
* unaffected — it doesn't populate the cache fields, so the sum collapses to
|
|
328
|
+
* `inputTokens` alone.
|
|
329
|
+
* - MUST treat `usedFraction` as `undefined` when ALL three input-bearing fields are
|
|
330
|
+
* undefined — pre-first-turn, post-`clearHistory()`, or a harness reading with no
|
|
331
|
+
* input-side counts at all.
|
|
332
|
+
*/
|
|
333
|
+
getContextUsage() {
|
|
334
|
+
this.assertNotDisposed();
|
|
335
|
+
const contextWindow = this.getContextWindow();
|
|
336
|
+
const { inputTokens, cachedInputTokens, cacheWriteInputTokens } = this.latestUsage;
|
|
337
|
+
const allInputUndefined = inputTokens === undefined && cachedInputTokens === undefined && cacheWriteInputTokens === undefined;
|
|
338
|
+
const effectiveInputTokens = allInputUndefined
|
|
339
|
+
? undefined
|
|
340
|
+
: (inputTokens ?? 0) + (cachedInputTokens ?? 0) + (cacheWriteInputTokens ?? 0);
|
|
341
|
+
const usedFraction = effectiveInputTokens === undefined
|
|
342
|
+
? undefined
|
|
343
|
+
: Math.min(1, Math.max(0, effectiveInputTokens / contextWindow));
|
|
344
|
+
// Spread `latestUsage` so consumer mutation of the returned `usage`
|
|
345
|
+
// object cannot leak back into the session's internal state on a
|
|
346
|
+
// subsequent `getContextUsage()` call. `UsageMetadata`'s fields are
|
|
347
|
+
// all primitives, so a shallow copy is sufficient.
|
|
348
|
+
return { usage: { ...this.latestUsage }, contextWindow, usedFraction };
|
|
274
349
|
}
|
|
275
350
|
/**
|
|
276
351
|
* @requirements
|
|
@@ -292,7 +367,15 @@ export class DefaultChatSession {
|
|
|
292
367
|
createdAt: this.clock.now(),
|
|
293
368
|
},
|
|
294
369
|
]
|
|
295
|
-
:
|
|
370
|
+
: // `Message.createdAt` is required-on-read, optional-on-write —
|
|
371
|
+
// the SDK owns the backfill so harnesses see populated
|
|
372
|
+
// timestamps regardless of consumer-construction style. The
|
|
373
|
+
// shared `backfillCreatedAt` helper steps per-position via
|
|
374
|
+
// `clock.nextAfter` so a bulk insert produces strictly-
|
|
375
|
+
// ascending values. The two production harnesses share the
|
|
376
|
+
// same helper at their own `addContext` boundary so a
|
|
377
|
+
// direct `harness.addContext` call gets the same shape.
|
|
378
|
+
backfillCreatedAt(message, this.clock);
|
|
296
379
|
await this.harness.addContext(this.agentId, this.threadId, messages);
|
|
297
380
|
}
|
|
298
381
|
/**
|
|
@@ -112,7 +112,14 @@ export interface AgentHarness {
|
|
|
112
112
|
}): Promise<void>;
|
|
113
113
|
/**
|
|
114
114
|
* Destroy an agent and release its resources (MCP connections, workspace, memory).
|
|
115
|
+
*
|
|
116
|
+
* MUST throw if `agentId` is not registered. Symmetric with `createThread`,
|
|
117
|
+
* `destroyThread`, and `clearMessages`, which all reject unknown ids the
|
|
118
|
+
* same way; gives SDK rollback paths in `Agent.updateAgentConfig` and
|
|
119
|
+
* `AgentManager.installAgent` an explicit failure mode they can catch.
|
|
120
|
+
*
|
|
115
121
|
* @param agentId - ID of the agent to destroy.
|
|
122
|
+
* @returns `true` after a real removal.
|
|
116
123
|
*/
|
|
117
124
|
destroyAgent(agentId: string): Promise<boolean>;
|
|
118
125
|
/**
|
|
@@ -124,8 +131,11 @@ export interface AgentHarness {
|
|
|
124
131
|
* including connection status and discovered tool names. This is a synchronous
|
|
125
132
|
* snapshot — status is updated asynchronously by background discovery promises.
|
|
126
133
|
*
|
|
134
|
+
* MUST throw if `agentId` is not registered.
|
|
135
|
+
*
|
|
127
136
|
* @param agentId - ID of the agent whose MCP servers to inspect.
|
|
128
|
-
* @returns Info for each configured MCP server (empty array if
|
|
137
|
+
* @returns Info for each configured MCP server (empty array if the agent
|
|
138
|
+
* exists but has no MCP servers configured).
|
|
129
139
|
*/
|
|
130
140
|
getMcpServerInfo(agentId: string): McpServerInfo[];
|
|
131
141
|
/**
|
|
@@ -175,14 +185,31 @@ export interface AgentHarness {
|
|
|
175
185
|
*/
|
|
176
186
|
getThreadIds(agentId: string): Promise<string[]>;
|
|
177
187
|
/**
|
|
178
|
-
* Clone an existing thread, creating a new thread
|
|
179
|
-
* Used to implement conversation
|
|
188
|
+
* Clone an existing thread, creating a new thread that mirrors the source
|
|
189
|
+
* thread's state at the moment of the call. Used to implement conversation
|
|
190
|
+
* forking.
|
|
191
|
+
*
|
|
192
|
+
* The harness chooses the new thread's id; consumers read it from the
|
|
193
|
+
* returned value. The id is unique within the agent.
|
|
194
|
+
*
|
|
195
|
+
* Two source-state shapes are observable to consumers:
|
|
196
|
+
*
|
|
197
|
+
* - **Source thread has been streamed at least once** — the new thread
|
|
198
|
+
* inherits the source's persisted message history; subsequent
|
|
199
|
+
* `getMessages()` returns it. Implementations may copy the underlying
|
|
200
|
+
* transcript (Mastra's libsql `cloneThread`, Claude's `forkSession`)
|
|
201
|
+
* or any harness-specific equivalent.
|
|
202
|
+
* - **Source thread has never been streamed** (`addContext`-only or
|
|
203
|
+
* freshly-created) — the new thread is allocated empty by design;
|
|
204
|
+
* `addContext`-injected messages on the source are copied forward by
|
|
205
|
+
* harnesses that mirror them in-process, but no persisted transcript
|
|
206
|
+
* exists to fork.
|
|
207
|
+
*
|
|
180
208
|
* @param agentId - ID of the owning agent.
|
|
181
209
|
* @param sourceThreadId - ID of the thread to clone.
|
|
182
|
-
* @param targetThreadId - Optional ID for the new thread.
|
|
183
210
|
* @returns The ID of the cloned thread.
|
|
184
211
|
*/
|
|
185
|
-
cloneThread(agentId: string, sourceThreadId: string
|
|
212
|
+
cloneThread(agentId: string, sourceThreadId: string): Promise<string>;
|
|
186
213
|
/**
|
|
187
214
|
* Compacts a thread's message history to reduce context window usage.
|
|
188
215
|
* Starts a new conversation thread seeded with an LLM-generated summary of the current session.
|
|
@@ -257,9 +284,12 @@ export interface AgentHarness {
|
|
|
257
284
|
/**
|
|
258
285
|
* Retrieve message history for a thread.
|
|
259
286
|
*
|
|
287
|
+
* MUST populate `Message.createdAt` on every returned message. MUST return
|
|
288
|
+
* messages sorted ascending by `createdAt`.
|
|
289
|
+
*
|
|
260
290
|
* @param agentId - ID of the agent.
|
|
261
291
|
* @param threadId - ID of the conversation thread.
|
|
262
|
-
* @returns All messages in chronological order (ascending by
|
|
292
|
+
* @returns All messages in chronological order (ascending by `createdAt`).
|
|
263
293
|
*/
|
|
264
294
|
getMessages(agentId: string, threadId: string): Promise<Message[]>;
|
|
265
295
|
/**
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { ToolDefinition } from '../types/tools.js';
|
|
2
2
|
import type { MCPConfiguration } from '../mcp-config.js';
|
|
3
|
-
import type { JSONWebToken, ModelName } from '@salesforce/llm-gateway-sdk';
|
|
3
|
+
import type { JSONWebToken, Model, ModelName } from '@salesforce/llm-gateway-sdk';
|
|
4
4
|
/**
|
|
5
5
|
* Configuration for an agent's behavior and capabilities.
|
|
6
6
|
* This excludes identity; `agentId` is handled separately.
|
|
@@ -14,8 +14,15 @@ export type AgentConfig = {
|
|
|
14
14
|
* - Otherwise, use the default org configured on the machine.
|
|
15
15
|
*/
|
|
16
16
|
orgAlias?: string;
|
|
17
|
-
/**
|
|
18
|
-
|
|
17
|
+
/**
|
|
18
|
+
* The model to use for this agent.
|
|
19
|
+
*
|
|
20
|
+
* Accepts either a {@link ModelName} enum value (the typical case for in-tree models) or a
|
|
21
|
+
* pre-built {@link Model} instance. The instance form lets consumers opt into a Claude
|
|
22
|
+
* variant published on the gateway before the SDK has been updated — see
|
|
23
|
+
* `createClaudeModel(gatewayId, overrides)` from `@salesforce/llm-gateway-sdk`.
|
|
24
|
+
*/
|
|
25
|
+
modelId?: ModelName | Model;
|
|
19
26
|
/** Human-readable name for the agent. */
|
|
20
27
|
name?: string;
|
|
21
28
|
/** Description of the agent's purpose. ACP/OASF-ready metadata. */
|
package/dist/index.d.ts
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
export type { Message, MessagePart, ImagePart, FilePart } from './types/messages.js';
|
|
2
2
|
export type { ChatEvent, StartEvent, TextDeltaEvent, ReasoningDeltaEvent, ToolCallEvent, ToolApprovalRequestEvent, ToolResultEvent, StepStartEvent, StepFinishEvent, ErrorEvent, FinishEvent, ChatStreamResult, } from './types/events.js';
|
|
3
3
|
export type { ToolDefinition, ToolCallInfo, ToolResultInfo } from './types/tools.js';
|
|
4
|
-
export type { FinishReason, UsageMetadata } from './types/usage.js';
|
|
4
|
+
export type { ContextUsage, FinishReason, UsageMetadata } from './types/usage.js';
|
|
5
5
|
export type { AgentConfig, HarnessAgentConfig, StreamOptions, ToolApprovalMode } from './harness/harness-config.js';
|
|
6
6
|
export { DEFAULT_MAX_STEPS, resolveToolApprovalMode } from './harness/harness-config.js';
|
|
7
7
|
export type { MCPConfiguration, MCPServerConfig, MCPStdioServerConfig, MCPRemoteServerConfig, McpServerInfo, McpServerErrorCategory, McpServerErrorDetail, McpToolInfo, McpToolAnnotations, } from './mcp-config.js';
|
|
8
8
|
export { McpServerStatus } from './mcp-config.js';
|
|
9
|
-
export { ModelName } from '@salesforce/llm-gateway-sdk';
|
|
9
|
+
export { Model, ModelName, createClaudeModel } from '@salesforce/llm-gateway-sdk';
|
|
10
|
+
export type { ClaudeModelOverrides } from '@salesforce/llm-gateway-sdk';
|
|
10
11
|
export { inferSfApiEnv, SfApiEnv } from '@salesforce/agentic-common';
|
|
11
12
|
export { type AgentManager, type RestoreFailure, createAgentManager } from './agent-manager.js';
|
|
12
13
|
export { type Agent } from './agent.js';
|
package/dist/index.js
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
*/
|
|
5
5
|
export { DEFAULT_MAX_STEPS, resolveToolApprovalMode } from './harness/harness-config.js';
|
|
6
6
|
export { McpServerStatus } from './mcp-config.js';
|
|
7
|
-
export { ModelName } from '@salesforce/llm-gateway-sdk';
|
|
7
|
+
export { Model, ModelName, createClaudeModel } from '@salesforce/llm-gateway-sdk';
|
|
8
8
|
export { inferSfApiEnv, SfApiEnv } from '@salesforce/agentic-common';
|
|
9
9
|
// ── Agent Layer ─────────────────────────────────────────────────────
|
|
10
10
|
export { createAgentManager } from './agent-manager.js';
|
package/dist/mcp-config.d.ts
CHANGED
|
@@ -65,6 +65,32 @@ export type MCPRemoteServerConfig = {
|
|
|
65
65
|
enabled?: boolean;
|
|
66
66
|
/** Timeout in milliseconds for individual requests to the server. */
|
|
67
67
|
timeout?: number;
|
|
68
|
+
/**
|
|
69
|
+
* Transport-level reconnection tuning for HTTP MCP servers. Forwarded to
|
|
70
|
+
* the underlying SDK transport (`@modelcontextprotocol/sdk`'s
|
|
71
|
+
* `StreamableHTTPClientTransport` on the Claude harness, and the
|
|
72
|
+
* equivalent plumb-through on `@mastra/mcp`'s `HttpServerDefinition`).
|
|
73
|
+
*
|
|
74
|
+
* Each field is optional. The harness mappers merge unspecified fields
|
|
75
|
+
* with the MCP SDK's built-in defaults (`maxRetries: 2`,
|
|
76
|
+
* `initialReconnectionDelay: 1000`, `maxReconnectionDelay: 30000`,
|
|
77
|
+
* `reconnectionDelayGrowFactor: 1.5`) so a partial override leaves the
|
|
78
|
+
* other fields at their defaults rather than zeroing them out — the
|
|
79
|
+
* underlying transport replaces the entire defaults object when
|
|
80
|
+
* `reconnectionOptions` is set.
|
|
81
|
+
*
|
|
82
|
+
* No-op for stdio servers — only `MCPRemoteServerConfig` carries it.
|
|
83
|
+
*/
|
|
84
|
+
reconnectionOptions?: {
|
|
85
|
+
/** Maximum number of reconnection attempts before giving up. Default `2`. */
|
|
86
|
+
maxRetries?: number;
|
|
87
|
+
/** Initial backoff between reconnection attempts in milliseconds. Default `1000`. */
|
|
88
|
+
initialReconnectionDelay?: number;
|
|
89
|
+
/** Maximum backoff between reconnection attempts in milliseconds. Default `30000`. */
|
|
90
|
+
maxReconnectionDelay?: number;
|
|
91
|
+
/** Factor by which the reconnection delay grows after each attempt. Default `1.5`. */
|
|
92
|
+
reconnectionDelayGrowFactor?: number;
|
|
93
|
+
};
|
|
68
94
|
/**
|
|
69
95
|
* Opt the server's tool surface out of the active runtime's tool-search
|
|
70
96
|
* deferral. See {@link MCPStdioServerConfig.alwaysLoad}.
|
package/dist/types/events.d.ts
CHANGED
|
@@ -8,7 +8,7 @@ import type { FinishReason, UsageMetadata } from './usage.js';
|
|
|
8
8
|
* convention, with the addition of `tool-approval-request` for human-in-the-loop
|
|
9
9
|
* tool approval flows.
|
|
10
10
|
*/
|
|
11
|
-
export type ChatEvent = StartEvent | TextDeltaEvent | ReasoningDeltaEvent | ToolCallEvent | ToolApprovalRequestEvent | ToolResultEvent | StepStartEvent | StepFinishEvent | ErrorEvent | FinishEvent
|
|
11
|
+
export type ChatEvent = StartEvent | TextDeltaEvent | ReasoningDeltaEvent | ToolCallEvent | ToolApprovalRequestEvent | ToolResultEvent | StepStartEvent | StepFinishEvent | ErrorEvent | FinishEvent;
|
|
12
12
|
/**
|
|
13
13
|
* The stream has begun. Symmetric counterpart to {@link FinishEvent}.
|
|
14
14
|
*
|
|
@@ -155,19 +155,6 @@ export type ErrorEvent = {
|
|
|
155
155
|
/** Machine-readable error code (e.g., `'insufficient-tokens'`). */
|
|
156
156
|
code?: string;
|
|
157
157
|
};
|
|
158
|
-
/**
|
|
159
|
-
* A stream chunk from the underlying harness that has no `ChatEvent` counterpart.
|
|
160
|
-
*
|
|
161
|
-
* Returned instead of silently discarding the chunk, so consumers can log or
|
|
162
|
-
* monitor unhandled harness events for observability.
|
|
163
|
-
*/
|
|
164
|
-
export type UnmappedChunkEvent = {
|
|
165
|
-
type: 'unmapped-chunk';
|
|
166
|
-
/** The original harness chunk type string (e.g., `'tool-call-suspended'`, `'raw'`). */
|
|
167
|
-
chunkType: string;
|
|
168
|
-
/** The raw chunk object, preserved for diagnostic logging. */
|
|
169
|
-
rawChunk: unknown;
|
|
170
|
-
};
|
|
171
158
|
/** The entire stream has completed. */
|
|
172
159
|
export type FinishEvent = {
|
|
173
160
|
type: 'finish';
|
package/dist/types/messages.d.ts
CHANGED
|
@@ -24,7 +24,18 @@ export type Message = {
|
|
|
24
24
|
role: MessageRole;
|
|
25
25
|
/** Message content — plain text or structured parts. */
|
|
26
26
|
content: string | MessagePart[];
|
|
27
|
-
/**
|
|
27
|
+
/**
|
|
28
|
+
* Timestamp of when the message was created. **Always populated** on
|
|
29
|
+
* messages returned from `ChatSession.getMessageHistory()`. **Optional on
|
|
30
|
+
* write** — consumers constructing `Message` for `ChatSession.addContext()`
|
|
31
|
+
* may omit it; the SDK backfills the current time before forwarding to
|
|
32
|
+
* the harness, so the on-read contract still holds.
|
|
33
|
+
*
|
|
34
|
+
* The read-side guarantee lives on `AgentHarness.getMessages` — see its
|
|
35
|
+
* JSDoc for the contract every harness implementation upholds (populated
|
|
36
|
+
* `createdAt` on every returned message; array sorted ascending by
|
|
37
|
+
* `createdAt`). The SDK passes the harness's output through unchanged.
|
|
38
|
+
*/
|
|
28
39
|
createdAt?: Date;
|
|
29
40
|
};
|
|
30
41
|
/**
|
|
@@ -122,10 +122,12 @@ export type McpServerDiscoveryFailedEvent = Base<'mcp-server-discovery-failed'>
|
|
|
122
122
|
* then on success `Reconnecting → Connected`, or on failure
|
|
123
123
|
* `Reconnecting → Error`.
|
|
124
124
|
*
|
|
125
|
-
*
|
|
126
|
-
*
|
|
127
|
-
*
|
|
128
|
-
* `
|
|
125
|
+
* Both the Mastra and Claude harnesses emit this on every per-server
|
|
126
|
+
* `McpServerStatus` transition they observe (initial discovery success /
|
|
127
|
+
* failure, reconnect entry, reconnect outcome). Mid-session transport drops
|
|
128
|
+
* on a `Connected` server are not observable on either harness's underlying
|
|
129
|
+
* SDK between turns; the next operation that touches the server discovers
|
|
130
|
+
* the drop and emits the transition then.
|
|
129
131
|
*/
|
|
130
132
|
export type McpServerStatusChangedEvent = Base<'mcp-server-status-changed'> & {
|
|
131
133
|
agentId: string;
|
package/dist/types/usage.d.ts
CHANGED
|
@@ -16,6 +16,71 @@ export type UsageMetadata = {
|
|
|
16
16
|
/** Input tokens written to the provider cache during this interaction. */
|
|
17
17
|
cacheWriteInputTokens?: number;
|
|
18
18
|
};
|
|
19
|
+
/**
|
|
20
|
+
* Snapshot of how much of the model's context window the most recent
|
|
21
|
+
* turn used. Returned by {@link ChatSession.getContextUsage}.
|
|
22
|
+
*
|
|
23
|
+
* Consumers use this to decide when to call `compactThread()`, switch to a
|
|
24
|
+
* smaller model, or warn the user as the conversation approaches the
|
|
25
|
+
* model's context limit.
|
|
26
|
+
*
|
|
27
|
+
* `usage` carries the **last per-step** reading from the model —
|
|
28
|
+
* specifically the `usage` from the latest `step-finish` event whose `usage`
|
|
29
|
+
* was defined. This is the size of the prompt the model saw on its last
|
|
30
|
+
* invocation, which is the right "how full is my context" reading. This is
|
|
31
|
+
* **not** the per-turn billing aggregate (which sums steps and double-counts
|
|
32
|
+
* persistent context). For per-turn billing totals, subscribe to
|
|
33
|
+
* `chat-stream-completed` telemetry instead.
|
|
34
|
+
*
|
|
35
|
+
* Field shapes:
|
|
36
|
+
*
|
|
37
|
+
* - `usage` is always populated. Pre-first-turn (or post-`clearHistory()`)
|
|
38
|
+
* it is the empty object `{}` — i.e., a `UsageMetadata` whose token fields
|
|
39
|
+
* are all `undefined` — making "no reading yet" indistinguishable from
|
|
40
|
+
* "harness reported every field as undefined."
|
|
41
|
+
* - `contextWindow` is always populated, contractually. Every `Model`
|
|
42
|
+
* reachable via `Agent.llmGatewayClient.getModel()` must publish a
|
|
43
|
+
* `contextWindow`; see the `sfdx-agent-sdk` ARCHITECTURE.md Critical
|
|
44
|
+
* Invariant on this and issue #507.
|
|
45
|
+
* - `usedFraction` is `undefined` iff every input-bearing field on the
|
|
46
|
+
* latest reading (`inputTokens`, `cachedInputTokens`, `cacheWriteInputTokens`)
|
|
47
|
+
* is `undefined` — the only honest answer when we have no input-side
|
|
48
|
+
* reading to divide. The denominator-numerator sums all three because
|
|
49
|
+
* cached prompt tokens occupy real space in the context window (see the
|
|
50
|
+
* field-level doc on `usedFraction` for the Bedrock-Claude rationale).
|
|
51
|
+
* Consumers who want zero-on-empty UX can collapse with `usedFraction ?? 0`.
|
|
52
|
+
*/
|
|
53
|
+
export type ContextUsage = {
|
|
54
|
+
/**
|
|
55
|
+
* Last per-step usage reading observed on this session. Pre-first-turn
|
|
56
|
+
* and immediately after `clearHistory()` this is `{}` (every token field
|
|
57
|
+
* undefined).
|
|
58
|
+
*/
|
|
59
|
+
usage: UsageMetadata;
|
|
60
|
+
/**
|
|
61
|
+
* The model's total context-window size in tokens. Read live at call
|
|
62
|
+
* time from the agent's currently-bound `LLMGatewayClient`, so it stays
|
|
63
|
+
* correct across `Agent.updateAgentConfig()` model swaps.
|
|
64
|
+
*/
|
|
65
|
+
contextWindow: number;
|
|
66
|
+
/**
|
|
67
|
+
* `(usage.inputTokens + usage.cachedInputTokens + usage.cacheWriteInputTokens) /
|
|
68
|
+
* contextWindow`, clamped to `[0, 1]`. The denominator-numerator includes
|
|
69
|
+
* cached prompt tokens because they are real tokens occupying the model's
|
|
70
|
+
* context window — Bedrock-Claude's `message_delta.usage` reports only the
|
|
71
|
+
* incremental `inputTokens` per delta, with the bulk of the prompt riding
|
|
72
|
+
* on `cachedInputTokens` / `cacheWriteInputTokens`. Counting only
|
|
73
|
+
* `inputTokens` would underreport "how full" by orders of magnitude on
|
|
74
|
+
* cache-hit paths. Mastra is unaffected because it does not populate the
|
|
75
|
+
* cache fields, so the sum collapses to `inputTokens` alone.
|
|
76
|
+
*
|
|
77
|
+
* `undefined` when ALL three input-bearing fields are missing on the
|
|
78
|
+
* latest reading (pre-first-turn, post-`clearHistory()`, or when a
|
|
79
|
+
* harness emits a reading without any input-side counts). Consumers
|
|
80
|
+
* wanting zero-on-empty: `usedFraction ?? 0`.
|
|
81
|
+
*/
|
|
82
|
+
usedFraction: number | undefined;
|
|
83
|
+
};
|
|
19
84
|
/**
|
|
20
85
|
* Reason the model stopped generating.
|
|
21
86
|
* Aligned with AI SDK V3's unified finish-reason set; harnesses normalize provider-specific
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@salesforce/sfdx-agent-sdk",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.15.0",
|
|
4
4
|
"description": "Harness-agnostic agentic infrastructure for Salesforce developer experience tooling",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -35,13 +35,13 @@
|
|
|
35
35
|
"LICENSE.txt"
|
|
36
36
|
],
|
|
37
37
|
"dependencies": {
|
|
38
|
-
"@salesforce/agentic-common": "0.
|
|
39
|
-
"@salesforce/llm-gateway-sdk": "0.
|
|
38
|
+
"@salesforce/agentic-common": "0.7.0",
|
|
39
|
+
"@salesforce/llm-gateway-sdk": "0.11.0"
|
|
40
40
|
},
|
|
41
41
|
"devDependencies": {
|
|
42
42
|
"@eslint/js": "^10.0.1",
|
|
43
|
-
"@salesforce/sfdx-agent-harness-claude": "0.
|
|
44
|
-
"@salesforce/sfdx-agent-harness-mastra": "0.
|
|
43
|
+
"@salesforce/sfdx-agent-harness-claude": "0.11.0",
|
|
44
|
+
"@salesforce/sfdx-agent-harness-mastra": "0.14.0",
|
|
45
45
|
"@types/node": "^22.19.17",
|
|
46
46
|
"@vitest/coverage-istanbul": "^4.1.7",
|
|
47
47
|
"@vitest/eslint-plugin": "^1.6.17",
|