@aexhq/sdk 0.34.0 → 0.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_contracts/submission.d.ts +58 -23
- package/dist/_contracts/submission.js +54 -10
- package/dist/cli.mjs +79 -0
- package/dist/cli.mjs.sha256 +1 -1
- package/dist/client.d.ts +19 -0
- package/dist/client.js +92 -6
- package/dist/client.js.map +1 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/dist/retry.d.ts +162 -0
- package/dist/retry.js +320 -0
- package/dist/retry.js.map +1 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/docs/retries.md +129 -0
- package/examples/feature-tour.ts +301 -0
- package/package.json +1 -1
|
@@ -402,33 +402,31 @@ export interface PlatformRunSubmissionRequest {
|
|
|
402
402
|
* terminal wait window and self-kill deadline.
|
|
403
403
|
*/
|
|
404
404
|
readonly timeoutMs?: number;
|
|
405
|
-
/**
|
|
406
|
-
* Lineage parent (agent-session §9). When present the server admits this
|
|
407
|
-
* run as a CHILD of `parentRunId`: it walks the parent's lineage, enforces
|
|
408
|
-
* the max-subagent-depth + per-root concurrency caps, and persists
|
|
409
|
-
* `parent_run_id` + a server-derived `depth`. The client may name a parent
|
|
410
|
-
* but NEVER the depth — depth is computed server-side from the parent row,
|
|
411
|
-
* so a forged value cannot bypass the cap.
|
|
412
|
-
*/
|
|
413
|
-
readonly parentRunId?: string;
|
|
414
405
|
/**
|
|
415
406
|
* Optional per-run callback URL. The platform delivers exactly the terminal
|
|
416
407
|
* `run.finished` event to this URL at the settle-consistent barrier, signed
|
|
417
|
-
* Standard-Webhooks style. It is a sibling of {@link idempotencyKey}
|
|
418
|
-
*
|
|
419
|
-
*
|
|
420
|
-
*
|
|
408
|
+
* Standard-Webhooks style. It is a sibling of {@link idempotencyKey} — an
|
|
409
|
+
* operational/delivery concern, NOT part of the hashed submission brief, so
|
|
410
|
+
* the same idempotency key with a different callback URL never 409s and the
|
|
411
|
+
* field never enters `request_hash`.
|
|
421
412
|
*/
|
|
422
413
|
readonly webhook?: RunWebhookSpec;
|
|
423
414
|
/**
|
|
424
415
|
* Optional per-run override of the lineage limits (max concurrent child runs,
|
|
425
|
-
* max subagent depth
|
|
426
|
-
*
|
|
427
|
-
*
|
|
428
|
-
*
|
|
416
|
+
* max subagent depth, per-run spend cap). These are dials the client may
|
|
417
|
+
* *request*; the server resolves them against the per-workspace ceiling and
|
|
418
|
+
* the hard platform ceiling (clamping happens in the resolver, NOT this
|
|
419
|
+
* parser). Absent fields fall back to the platform defaults. Only shape +
|
|
429
420
|
* positivity are validated here.
|
|
430
421
|
*/
|
|
431
422
|
readonly limits?: RunLimits;
|
|
423
|
+
/**
|
|
424
|
+
* Optional capacity intent for the run's managed machine. `spot: true` opts
|
|
425
|
+
* the run into interruptible capacity; absent / `spot: false` requests
|
|
426
|
+
* standard capacity (the default). Intent only — the managed runtime selects
|
|
427
|
+
* capacity from it.
|
|
428
|
+
*/
|
|
429
|
+
readonly machine?: RunMachine;
|
|
432
430
|
}
|
|
433
431
|
/** Per-run webhook callback. v1: terminal-only; the URL must be https. */
|
|
434
432
|
export interface RunWebhookSpec {
|
|
@@ -444,15 +442,26 @@ export interface RunLimits {
|
|
|
444
442
|
readonly maxConcurrentChildRuns?: number;
|
|
445
443
|
readonly maxSubagentDepth?: number;
|
|
446
444
|
/**
|
|
447
|
-
* Per-run spend cap in USD (defense-in-depth). The platform
|
|
448
|
-
*
|
|
449
|
-
*
|
|
450
|
-
*
|
|
451
|
-
*
|
|
452
|
-
*
|
|
445
|
+
* Per-run spend cap in USD (defense-in-depth). The platform kills the run once
|
|
446
|
+
* it would out-spend the cap. A positive number; omitted ⇒ unbounded per-run
|
|
447
|
+
* (only the run's wall-clock `timeout` + the per-workspace spend cap apply).
|
|
448
|
+
* Only shape/positivity are validated here.
|
|
449
|
+
*
|
|
450
|
+
* The frozen boot session config the managed runtime folds the loop against
|
|
451
|
+
* names this same USD value `budgetUsd`; {@link sessionBudgetLimits} is the
|
|
452
|
+
* single source of truth for that wire→boot name mapping.
|
|
453
453
|
*/
|
|
454
454
|
readonly maxSpendUsd?: number;
|
|
455
455
|
}
|
|
456
|
+
/**
|
|
457
|
+
* Per-run machine/capacity intent. v1 exposes only `spot`: opt the run into
|
|
458
|
+
* interruptible capacity (`spot: true`) vs standard capacity (absent /
|
|
459
|
+
* `spot: false`, the default). Only the boolean intent is public — capacity
|
|
460
|
+
* selection is a runtime concern.
|
|
461
|
+
*/
|
|
462
|
+
export interface RunMachine {
|
|
463
|
+
readonly spot?: boolean;
|
|
464
|
+
}
|
|
456
465
|
/**
|
|
457
466
|
* Wire shape posted by the SDK and CLI. `workspaceId` is **omitted by
|
|
458
467
|
* design** — token-authenticated clients never name the workspace
|
|
@@ -500,6 +509,32 @@ export declare function parseRunWebhook(input: unknown): RunWebhookSpec | undefi
|
|
|
500
509
|
* collapses to `undefined` so it carries no signal onto the request.
|
|
501
510
|
*/
|
|
502
511
|
export declare function parseRunLimits(input: unknown): RunLimits | undefined;
|
|
512
|
+
/**
|
|
513
|
+
* Boot-session budget fragment. The public submit surface names a run's spend
|
|
514
|
+
* cap `limits.maxSpendUsd`; the frozen boot session config the managed runtime
|
|
515
|
+
* folds the loop against names the SAME USD value `budgetUsd` — the field the
|
|
516
|
+
* session planner reads to enforce/terminate a run that would out-spend its cap.
|
|
517
|
+
* This is the single source of truth for that wire→boot name mapping so the two
|
|
518
|
+
* layers can never drift.
|
|
519
|
+
*
|
|
520
|
+
* Returns a fragment safe to spread into `sessionConfig.limits`: `{ budgetUsd }`
|
|
521
|
+
* when a cap is set, `{}` when none is (an absent cap stays absent — the run is
|
|
522
|
+
* unbounded per-run, subject only to the run timeout + the per-workspace cap).
|
|
523
|
+
* Pure: same input ⇒ same output.
|
|
524
|
+
*/
|
|
525
|
+
export declare function sessionBudgetLimits(limits: RunLimits | undefined): {
|
|
526
|
+
budgetUsd?: number;
|
|
527
|
+
};
|
|
528
|
+
/**
|
|
529
|
+
* Parse the optional per-run `machine` capacity intent. Mirrors
|
|
530
|
+
* {@link parseRunWebhook}: absent ⇒ `undefined`; a non-object or any unknown
|
|
531
|
+
* subfield is rejected so the strict top-level allow-list extends to the nested
|
|
532
|
+
* object. `spot` must be a boolean when present. A no-signal object (e.g.
|
|
533
|
+
* `machine: {}`) collapses to `undefined` so it never lands an empty object on
|
|
534
|
+
* the request. An explicit `spot` (true or false) is preserved verbatim. Only
|
|
535
|
+
* shape is validated here — capacity selection is a runtime concern.
|
|
536
|
+
*/
|
|
537
|
+
export declare function parseRunMachine(input: unknown): RunMachine | undefined;
|
|
503
538
|
export declare function parseRunProvider(input: unknown): RunProvider;
|
|
504
539
|
/**
|
|
505
540
|
* Cross-check the supplied secrets bundle against the credential mode. BYOK
|
|
@@ -658,8 +658,9 @@ export function crossValidateSecretEnvAndValues(secretEnv, envSecrets) {
|
|
|
658
658
|
}
|
|
659
659
|
}
|
|
660
660
|
export function parseInlineSecrets(input) {
|
|
661
|
-
//
|
|
662
|
-
//
|
|
661
|
+
// Absent/null secrets collapse to an empty bundle; the credential-policy gate
|
|
662
|
+
// (enforceCredentialSecretPolicy) decides whether that is admissible for the
|
|
663
|
+
// run's mode (a run inheriting keys server-side may legitimately omit them).
|
|
663
664
|
if (input === undefined || input === null)
|
|
664
665
|
return {};
|
|
665
666
|
const value = requireRecord(input, "secrets");
|
|
@@ -994,9 +995,9 @@ export function parseRunSubmissionRequest(input, options = {}) {
|
|
|
994
995
|
"runtimeSize",
|
|
995
996
|
"timeout",
|
|
996
997
|
"proxyEndpoints",
|
|
997
|
-
"parentRunId",
|
|
998
998
|
"webhook",
|
|
999
999
|
"limits",
|
|
1000
|
+
"machine",
|
|
1000
1001
|
SECRETS_KEY
|
|
1001
1002
|
]);
|
|
1002
1003
|
for (const key of Object.keys(value)) {
|
|
@@ -1020,16 +1021,12 @@ export function parseRunSubmissionRequest(input, options = {}) {
|
|
|
1020
1021
|
void options;
|
|
1021
1022
|
const runtimeSize = parseRuntimeSize(value.runtimeSize);
|
|
1022
1023
|
const timeoutMs = parseRunTimeout(value.timeout);
|
|
1023
|
-
// Lineage parent only. `depth` is NEVER accepted from the wire — the server
|
|
1024
|
-
// derives it from the parent row (a forged depth must not bypass the cap).
|
|
1025
|
-
const parentRunId = optionalString(value.parentRunId, "submission.parentRunId");
|
|
1026
1024
|
const webhook = parseRunWebhook(value.webhook);
|
|
1027
1025
|
const limits = parseRunLimits(value.limits);
|
|
1026
|
+
const machine = parseRunMachine(value.machine);
|
|
1028
1027
|
const proxyEndpoints = parseProxyEndpoints(value.proxyEndpoints);
|
|
1029
1028
|
const secrets = parseInlineSecrets(value.secrets);
|
|
1030
|
-
enforceCredentialSecretPolicy(secrets, provider
|
|
1031
|
-
inheritsFromParent: parentRunId !== undefined
|
|
1032
|
-
});
|
|
1029
|
+
enforceCredentialSecretPolicy(secrets, provider);
|
|
1033
1030
|
crossValidateProxyEndpointsAndAuth(proxyEndpoints, secrets.proxyEndpointAuth);
|
|
1034
1031
|
const submission = parseSubmission(value.submission);
|
|
1035
1032
|
assertRunModelMatchesProvider(provider, submission.model);
|
|
@@ -1060,9 +1057,9 @@ export function parseRunSubmissionRequest(input, options = {}) {
|
|
|
1060
1057
|
...(runtimeSize ? { runtimeSize } : {}),
|
|
1061
1058
|
...(timeoutMs !== undefined ? { timeoutMs } : {}),
|
|
1062
1059
|
...(proxyEndpoints ? { proxyEndpoints } : {}),
|
|
1063
|
-
...(parentRunId !== undefined ? { parentRunId } : {}),
|
|
1064
1060
|
...(webhook !== undefined ? { webhook } : {}),
|
|
1065
1061
|
...(limits !== undefined ? { limits } : {}),
|
|
1062
|
+
...(machine !== undefined ? { machine } : {}),
|
|
1066
1063
|
secrets
|
|
1067
1064
|
};
|
|
1068
1065
|
}
|
|
@@ -1141,6 +1138,53 @@ export function parseRunLimits(input) {
|
|
|
1141
1138
|
...(maxSpendUsd !== undefined ? { maxSpendUsd } : {})
|
|
1142
1139
|
};
|
|
1143
1140
|
}
|
|
1141
|
+
/**
|
|
1142
|
+
* Boot-session budget fragment. The public submit surface names a run's spend
|
|
1143
|
+
* cap `limits.maxSpendUsd`; the frozen boot session config the managed runtime
|
|
1144
|
+
* folds the loop against names the SAME USD value `budgetUsd` — the field the
|
|
1145
|
+
* session planner reads to enforce/terminate a run that would out-spend its cap.
|
|
1146
|
+
* This is the single source of truth for that wire→boot name mapping so the two
|
|
1147
|
+
* layers can never drift.
|
|
1148
|
+
*
|
|
1149
|
+
* Returns a fragment safe to spread into `sessionConfig.limits`: `{ budgetUsd }`
|
|
1150
|
+
* when a cap is set, `{}` when none is (an absent cap stays absent — the run is
|
|
1151
|
+
* unbounded per-run, subject only to the run timeout + the per-workspace cap).
|
|
1152
|
+
* Pure: same input ⇒ same output.
|
|
1153
|
+
*/
|
|
1154
|
+
export function sessionBudgetLimits(limits) {
|
|
1155
|
+
if (limits?.maxSpendUsd === undefined) {
|
|
1156
|
+
return {};
|
|
1157
|
+
}
|
|
1158
|
+
return { budgetUsd: limits.maxSpendUsd };
|
|
1159
|
+
}
|
|
1160
|
+
/**
|
|
1161
|
+
* Parse the optional per-run `machine` capacity intent. Mirrors
|
|
1162
|
+
* {@link parseRunWebhook}: absent ⇒ `undefined`; a non-object or any unknown
|
|
1163
|
+
* subfield is rejected so the strict top-level allow-list extends to the nested
|
|
1164
|
+
* object. `spot` must be a boolean when present. A no-signal object (e.g.
|
|
1165
|
+
* `machine: {}`) collapses to `undefined` so it never lands an empty object on
|
|
1166
|
+
* the request. An explicit `spot` (true or false) is preserved verbatim. Only
|
|
1167
|
+
* shape is validated here — capacity selection is a runtime concern.
|
|
1168
|
+
*/
|
|
1169
|
+
export function parseRunMachine(input) {
|
|
1170
|
+
if (input === undefined) {
|
|
1171
|
+
return undefined;
|
|
1172
|
+
}
|
|
1173
|
+
const value = requireRecord(input, "machine");
|
|
1174
|
+
const allowed = new Set(["spot"]);
|
|
1175
|
+
for (const key of Object.keys(value)) {
|
|
1176
|
+
if (!allowed.has(key)) {
|
|
1177
|
+
throw new Error(`machine.${key} is not an allowed field; permitted: ${[...allowed].join(", ")}`);
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1180
|
+
if (value.spot !== undefined && typeof value.spot !== "boolean") {
|
|
1181
|
+
throw new Error("machine.spot must be a boolean");
|
|
1182
|
+
}
|
|
1183
|
+
if (value.spot === undefined) {
|
|
1184
|
+
return undefined;
|
|
1185
|
+
}
|
|
1186
|
+
return { spot: value.spot };
|
|
1187
|
+
}
|
|
1144
1188
|
export function parseRunProvider(input) {
|
|
1145
1189
|
if (input === undefined) {
|
|
1146
1190
|
return DEFAULT_RUN_PROVIDER;
|
package/dist/cli.mjs
CHANGED
|
@@ -4400,6 +4400,82 @@ async function runWhoamiCmd(io2, argv) {
|
|
|
4400
4400
|
}
|
|
4401
4401
|
}
|
|
4402
4402
|
|
|
4403
|
+
// dist/host/redeem.js
|
|
4404
|
+
function messageForStatus(status2, serverMessage) {
|
|
4405
|
+
switch (status2) {
|
|
4406
|
+
case 404:
|
|
4407
|
+
return "coupon code not found";
|
|
4408
|
+
case 403:
|
|
4409
|
+
return "this coupon can't be redeemed by this workspace";
|
|
4410
|
+
case 409:
|
|
4411
|
+
return "coupon already redeemed";
|
|
4412
|
+
case 400:
|
|
4413
|
+
return serverMessage ? `invalid input: ${serverMessage}` : "invalid input";
|
|
4414
|
+
case 401:
|
|
4415
|
+
return "not authorized \u2014 check --api-token, or run `aex login`";
|
|
4416
|
+
default:
|
|
4417
|
+
return serverMessage ? `redeem failed: ${serverMessage}` : `redeem failed (HTTP ${status2})`;
|
|
4418
|
+
}
|
|
4419
|
+
}
|
|
4420
|
+
async function runRedeemCmd(io2, argv) {
|
|
4421
|
+
if (await refuseInsideManagedRun(io2, "redeem"))
|
|
4422
|
+
return USAGE_ERR;
|
|
4423
|
+
const common = await resolveCommonHostFlags(io2, argv);
|
|
4424
|
+
if (!common.ok) {
|
|
4425
|
+
io2.stderr(`${common.reason}
|
|
4426
|
+
`);
|
|
4427
|
+
return USAGE_ERR;
|
|
4428
|
+
}
|
|
4429
|
+
const positional = common.rest.filter((arg) => !arg.startsWith("--"));
|
|
4430
|
+
if (positional.length !== 1) {
|
|
4431
|
+
io2.stderr("usage: aex redeem <code> [common flags]\n");
|
|
4432
|
+
return USAGE_ERR;
|
|
4433
|
+
}
|
|
4434
|
+
const code = positional[0];
|
|
4435
|
+
const base = common.flags.aexUrl.replace(/\/+$/, "");
|
|
4436
|
+
const url = `${base}/billing/redeem`;
|
|
4437
|
+
let response;
|
|
4438
|
+
try {
|
|
4439
|
+
response = await io2.fetchImpl(url, {
|
|
4440
|
+
method: "POST",
|
|
4441
|
+
headers: {
|
|
4442
|
+
accept: "application/json",
|
|
4443
|
+
"content-type": "application/json",
|
|
4444
|
+
authorization: `Bearer ${common.flags.apiToken}`
|
|
4445
|
+
},
|
|
4446
|
+
body: JSON.stringify({ code })
|
|
4447
|
+
});
|
|
4448
|
+
} catch (err2) {
|
|
4449
|
+
io2.stderr(`redeem failed: ${err2 instanceof Error ? err2.message : String(err2)}
|
|
4450
|
+
`);
|
|
4451
|
+
return RUNTIME_ERR;
|
|
4452
|
+
}
|
|
4453
|
+
if (common.flags.debug) {
|
|
4454
|
+
io2.stderr(`[aex] POST /billing/redeem -> ${response.status}
|
|
4455
|
+
`);
|
|
4456
|
+
}
|
|
4457
|
+
const text = await response.text();
|
|
4458
|
+
let body = {};
|
|
4459
|
+
try {
|
|
4460
|
+
if (text.length > 0)
|
|
4461
|
+
body = JSON.parse(text);
|
|
4462
|
+
} catch {
|
|
4463
|
+
body = {};
|
|
4464
|
+
}
|
|
4465
|
+
if (!response.ok) {
|
|
4466
|
+
const serverMessage = body && typeof body === "object" && typeof body.message === "string" ? body.message : void 0;
|
|
4467
|
+
io2.stderr(`${messageForStatus(response.status, serverMessage)}
|
|
4468
|
+
`);
|
|
4469
|
+
return RUNTIME_ERR;
|
|
4470
|
+
}
|
|
4471
|
+
const ok = body;
|
|
4472
|
+
const amountUsd = typeof ok.amountUsd === "number" ? ok.amountUsd : 0;
|
|
4473
|
+
const newBalanceUsd = typeof ok.newBalanceUsd === "number" ? ok.newBalanceUsd : 0;
|
|
4474
|
+
io2.stdout(`Redeemed $${amountUsd.toFixed(2)}. New balance: $${newBalanceUsd.toFixed(2)}.
|
|
4475
|
+
`);
|
|
4476
|
+
return SUCCESS;
|
|
4477
|
+
}
|
|
4478
|
+
|
|
4403
4479
|
// dist/host/debug.js
|
|
4404
4480
|
import { dirname, resolve as resolvePath3 } from "node:path";
|
|
4405
4481
|
function status(source, state, opts = {}) {
|
|
@@ -5680,6 +5756,8 @@ async function dispatch(io2, args) {
|
|
|
5680
5756
|
return runDeleteAssetCmd(io2, rest);
|
|
5681
5757
|
case "whoami":
|
|
5682
5758
|
return runWhoamiCmd(io2, rest);
|
|
5759
|
+
case "redeem":
|
|
5760
|
+
return runRedeemCmd(io2, rest);
|
|
5683
5761
|
case "login":
|
|
5684
5762
|
return runLoginCmd(io2, rest);
|
|
5685
5763
|
case "logout":
|
|
@@ -5740,6 +5818,7 @@ Protocol version: ${manifest.protocolVersion}
|
|
|
5740
5818
|
io2.stdout(" aex delete <session-id> --api-token T\n");
|
|
5741
5819
|
io2.stdout(" aex delete-asset <assetId|hash> --api-token T\n");
|
|
5742
5820
|
io2.stdout(" aex whoami --api-token T\n");
|
|
5821
|
+
io2.stdout(" aex redeem <code> --api-token T Redeem a coupon code into the workspace prepaid balance\n");
|
|
5743
5822
|
io2.stdout(" aex login --api-token T [--aex-url U] Persist token + url (then other verbs need no --api-token)\n");
|
|
5744
5823
|
io2.stdout(" aex logout Clear the stored token\n");
|
|
5745
5824
|
io2.stdout(" aex auth status Show the resolved config (token never printed)\n");
|
package/dist/cli.mjs.sha256
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
ebfa6eb1106a2447b2462511c45f0c96874889e3189a78fd4a66521d663bde76 cli.mjs
|
package/dist/client.d.ts
CHANGED
|
@@ -4,6 +4,7 @@ import { type UploadedAsset } from "./asset-upload.js";
|
|
|
4
4
|
import { File } from "./file.js";
|
|
5
5
|
import { McpServer } from "./mcp-server.js";
|
|
6
6
|
import { ProxyEndpoint } from "./proxy-endpoint.js";
|
|
7
|
+
import { type RetryOptions } from "./retry.js";
|
|
7
8
|
import { Secret } from "./secret.js";
|
|
8
9
|
import { SkillTool } from "./skill-tool.js";
|
|
9
10
|
import { Tool } from "./tool.js";
|
|
@@ -24,6 +25,16 @@ export interface AgentExecutorOptions {
|
|
|
24
25
|
* route the traces elsewhere. Purely local — nothing is uploaded.
|
|
25
26
|
*/
|
|
26
27
|
readonly debug?: boolean | DebugSink;
|
|
28
|
+
/**
|
|
29
|
+
* Built-in transport retry policy. Every BFF request is retried on transient
|
|
30
|
+
* failures (HTTP 429/500/502/503/504/529 and network errors) with bounded
|
|
31
|
+
* exponential backoff + jitter, honoring `Retry-After`. Billable submits carry
|
|
32
|
+
* a stable idempotency key, so a retry never creates a duplicate billable run.
|
|
33
|
+
*
|
|
34
|
+
* Omit for sensible defaults (4 attempts, ~2 min budget); pass an object to
|
|
35
|
+
* tune `maxAttempts` / delays / `maxElapsedMs`; pass `false` to disable.
|
|
36
|
+
*/
|
|
37
|
+
readonly retry?: RetryOptions | false;
|
|
27
38
|
}
|
|
28
39
|
/**
|
|
29
40
|
* The settle-consistent result of {@link AgentExecutor.run}:
|
|
@@ -250,6 +261,14 @@ export declare class SessionHandle {
|
|
|
250
261
|
get id(): string;
|
|
251
262
|
get record(): Session;
|
|
252
263
|
send(input: SessionInput, options?: SessionSendOptions): SessionTurnStream;
|
|
264
|
+
/**
|
|
265
|
+
* Re-send the last message on this session — the clean way to retry a turn a
|
|
266
|
+
* throttle or transient failure interrupted. By default it REUSES the previous
|
|
267
|
+
* message's idempotency key, so if the original turn actually landed
|
|
268
|
+
* server-side the replay de-duplicates instead of creating a second billable
|
|
269
|
+
* turn; pass a fresh `idempotencyKey` to force a brand-new turn.
|
|
270
|
+
*/
|
|
271
|
+
replayLast(options?: SessionSendOptions): SessionTurnStream;
|
|
253
272
|
suspend(options?: Pick<SessionSendOptions, "idempotencyKey">): Promise<SessionStateChangeAccepted>;
|
|
254
273
|
cancel(options?: Pick<SessionSendOptions, "idempotencyKey">): Promise<SessionStateChangeAccepted>;
|
|
255
274
|
resume(options?: Pick<SessionSendOptions, "idempotencyKey">): Promise<SessionStateChangeAccepted>;
|
package/dist/client.js
CHANGED
|
@@ -4,6 +4,7 @@ import { uploadAsset } from "./asset-upload.js";
|
|
|
4
4
|
import { File } from "./file.js";
|
|
5
5
|
import { McpServer } from "./mcp-server.js";
|
|
6
6
|
import { splitProxyEndpoints } from "./proxy-endpoint.js";
|
|
7
|
+
import { AexRateLimitError, isThrottleFault, parseProviderFault, withRetry } from "./retry.js";
|
|
7
8
|
import { splitSecretEnv } from "./secret.js";
|
|
8
9
|
import { SkillTool } from "./skill-tool.js";
|
|
9
10
|
import { Tool } from "./tool.js";
|
|
@@ -40,6 +41,8 @@ export class SessionHandle {
|
|
|
40
41
|
#http;
|
|
41
42
|
#fetch;
|
|
42
43
|
#session;
|
|
44
|
+
/** The last message sent on this handle, for {@link SessionHandle.replayLast}. */
|
|
45
|
+
#lastSend;
|
|
43
46
|
constructor(http, session, fetch) {
|
|
44
47
|
this.#http = http;
|
|
45
48
|
this.#session = session;
|
|
@@ -56,8 +59,28 @@ export class SessionHandle {
|
|
|
56
59
|
assertNoSessionSendSignal(options, "SessionHandle.send");
|
|
57
60
|
return sendSessionInternal(this, input, options);
|
|
58
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* Re-send the last message on this session — the clean way to retry a turn a
|
|
64
|
+
* throttle or transient failure interrupted. By default it REUSES the previous
|
|
65
|
+
* message's idempotency key, so if the original turn actually landed
|
|
66
|
+
* server-side the replay de-duplicates instead of creating a second billable
|
|
67
|
+
* turn; pass a fresh `idempotencyKey` to force a brand-new turn.
|
|
68
|
+
*/
|
|
69
|
+
replayLast(options = {}) {
|
|
70
|
+
assertNoSessionSendSignal(options, "SessionHandle.replayLast");
|
|
71
|
+
const last = this.#lastSend;
|
|
72
|
+
if (last === undefined) {
|
|
73
|
+
throw new RunStateError("SessionHandle.replayLast: no message has been sent on this session yet");
|
|
74
|
+
}
|
|
75
|
+
return sendSessionInternal(this, last.input, {
|
|
76
|
+
...options,
|
|
77
|
+
idempotencyKey: options.idempotencyKey ?? last.idempotencyKey
|
|
78
|
+
});
|
|
79
|
+
}
|
|
59
80
|
async *#send(input, options) {
|
|
60
|
-
const
|
|
81
|
+
const idempotencyKey = options.idempotencyKey ?? generateIdempotencyKey();
|
|
82
|
+
this.#lastSend = { input, idempotencyKey };
|
|
83
|
+
const accepted = await operations.sendSessionMessage(this.#http, this.id, { input }, { idempotencyKey });
|
|
61
84
|
this.#session = accepted.session;
|
|
62
85
|
const turn = accepted.turn;
|
|
63
86
|
const events = [];
|
|
@@ -303,10 +326,15 @@ export class SessionClient {
|
|
|
303
326
|
const { message, deleteAfter, messageIdempotencyKey, stream, ...createOptions } = options;
|
|
304
327
|
assertNoLegacySessionFields(options, "Aex.sessions.run");
|
|
305
328
|
const input = normaliseSessionInput(message, "Aex.sessions.run", "message");
|
|
306
|
-
|
|
329
|
+
// Derive the message key from the create key (like the CLI) so a retried run
|
|
330
|
+
// with the same `idempotencyKey` de-duplicates BOTH the create and the
|
|
331
|
+
// billable turn — never a duplicate billable run.
|
|
332
|
+
const createKey = createOptions.idempotencyKey ?? generateIdempotencyKey();
|
|
333
|
+
const messageKey = messageIdempotencyKey ?? deriveMessageKey(createKey);
|
|
334
|
+
const session = await this.create({ ...createOptions, idempotencyKey: createKey });
|
|
307
335
|
const result = await session.send(input, {
|
|
308
336
|
...(stream ?? {}),
|
|
309
|
-
idempotencyKey:
|
|
337
|
+
idempotencyKey: messageKey
|
|
310
338
|
}).done();
|
|
311
339
|
if (deleteAfter) {
|
|
312
340
|
await session.delete();
|
|
@@ -583,10 +611,16 @@ export class AgentExecutor {
|
|
|
583
611
|
if (!options.apiToken) {
|
|
584
612
|
throw new Error("AgentExecutor: apiToken is required");
|
|
585
613
|
}
|
|
614
|
+
// Wrap the transport fetch (the caller's override, or global `fetch`) with
|
|
615
|
+
// the bounded-retry layer so every BFF request gets default resilience.
|
|
616
|
+
// The raw `#fetch` below stays unwrapped for the direct-to-storage asset PUT
|
|
617
|
+
// and presigned output GETs, which target object storage, not the API plane.
|
|
618
|
+
const baseFetch = options.fetch ?? ((input, init) => fetch(input, init));
|
|
619
|
+
const retryingFetch = withRetry(baseFetch, options.retry);
|
|
586
620
|
this.#http = new HttpClient({
|
|
587
621
|
...(options.baseUrl ? { baseUrl: options.baseUrl } : {}),
|
|
588
622
|
apiToken: options.apiToken,
|
|
589
|
-
|
|
623
|
+
fetch: retryingFetch,
|
|
590
624
|
// Opt-in local diagnostics: emit a redacted per-request trace to
|
|
591
625
|
// stderr. Uploads nothing. A caller wanting a custom sink can pass
|
|
592
626
|
// a function instead of `true`.
|
|
@@ -646,10 +680,15 @@ export class AgentExecutor {
|
|
|
646
680
|
...(opts.idleTimeoutMs !== undefined ? { idleTimeoutMs: opts.idleTimeoutMs } : {}),
|
|
647
681
|
...(opts.pingIntervalMs !== undefined ? { pingIntervalMs: opts.pingIntervalMs } : {})
|
|
648
682
|
};
|
|
649
|
-
|
|
683
|
+
// Derive the message key from the create key (like the CLI) so a retried
|
|
684
|
+
// run with the same `idempotencyKey` de-duplicates BOTH the create and the
|
|
685
|
+
// billable turn server-side — never a duplicate billable run (sdk-dx-3).
|
|
686
|
+
const createKey = createOptions.idempotencyKey ?? generateIdempotencyKey();
|
|
687
|
+
const messageKey = messageIdempotencyKey ?? deriveMessageKey(createKey);
|
|
688
|
+
const session = await this.sessions.create({ ...createOptions, idempotencyKey: createKey });
|
|
650
689
|
const turnResult = await sendSessionInternal(session, input, {
|
|
651
690
|
...streamOptions,
|
|
652
|
-
idempotencyKey:
|
|
691
|
+
idempotencyKey: messageKey
|
|
653
692
|
}).done();
|
|
654
693
|
if (deleteAfter) {
|
|
655
694
|
await session.delete();
|
|
@@ -678,6 +717,19 @@ export class AgentExecutor {
|
|
|
678
717
|
...(!ok && errorMessage ? { error: errorMessage } : {})
|
|
679
718
|
};
|
|
680
719
|
if (opts.throwOnFailure && !ok) {
|
|
720
|
+
// A turn that failed because the upstream provider throttled us surfaces
|
|
721
|
+
// as a structured, non-leaky AexRateLimitError carrying the provider
|
|
722
|
+
// fault, so callers can branch on `isRateLimited(err)` and replay.
|
|
723
|
+
const throttle = throttleFromSession(turnResult.session);
|
|
724
|
+
if (throttle) {
|
|
725
|
+
throw new AexRateLimitError({
|
|
726
|
+
status: throttle.status ?? 429,
|
|
727
|
+
attempts: 1,
|
|
728
|
+
source: "provider",
|
|
729
|
+
providerFault: throttle,
|
|
730
|
+
...(throttle.retryAfterMs !== undefined ? { retryAfterMs: throttle.retryAfterMs } : {})
|
|
731
|
+
});
|
|
732
|
+
}
|
|
681
733
|
throw new RunStateError(`AgentExecutor.run: session ${runId} ended ${turnResult.status}${errorMessage ? `: ${errorMessage}` : ""}`, { runId, status: turnResult.status });
|
|
682
734
|
}
|
|
683
735
|
return result;
|
|
@@ -905,6 +957,40 @@ function generateIdempotencyKey() {
|
|
|
905
957
|
return cryptoObj.randomUUID();
|
|
906
958
|
return `idem-${Date.now().toString(36)}-${Math.random().toString(36).slice(2)}`;
|
|
907
959
|
}
|
|
960
|
+
/**
|
|
961
|
+
* Derive the message idempotency key from the session-create key. Mirrors the
|
|
962
|
+
* CLI (`<createKey>:message`) so a retried `run` / `sessions.run` that reuses
|
|
963
|
+
* one `idempotencyKey` de-duplicates BOTH the create and the billable turn.
|
|
964
|
+
*/
|
|
965
|
+
function deriveMessageKey(createKey) {
|
|
966
|
+
return `${createKey}:message`;
|
|
967
|
+
}
|
|
968
|
+
/**
|
|
969
|
+
* Extract a throttle-class {@link ProviderFault} from a failed session record.
|
|
970
|
+
* Reads a structured `providerFault` / `error` field first (the shape the
|
|
971
|
+
* runtime is expected to emit on a throttled turn), then falls back to a
|
|
972
|
+
* heuristic scan of `errorMessage`. Returns `undefined` when the failure is not
|
|
973
|
+
* a throttle.
|
|
974
|
+
*/
|
|
975
|
+
function throttleFromSession(session) {
|
|
976
|
+
const fault = parseProviderFault(session.providerFault) ??
|
|
977
|
+
parseProviderFault(session.error) ??
|
|
978
|
+
faultFromErrorMessage(typeof session.errorMessage === "string" ? session.errorMessage : undefined);
|
|
979
|
+
return fault && isThrottleFault(fault) ? fault : undefined;
|
|
980
|
+
}
|
|
981
|
+
/** Last-resort throttle detection from a free-text run error message. */
|
|
982
|
+
function faultFromErrorMessage(message) {
|
|
983
|
+
if (message === undefined || message.length === 0)
|
|
984
|
+
return undefined;
|
|
985
|
+
const lower = message.toLowerCase();
|
|
986
|
+
if (/\b429\b|rate.?limit|too many requests/.test(lower)) {
|
|
987
|
+
return { kind: "rate_limit", message };
|
|
988
|
+
}
|
|
989
|
+
if (/\b529\b|overloaded/.test(lower)) {
|
|
990
|
+
return { kind: "overloaded", message };
|
|
991
|
+
}
|
|
992
|
+
return undefined;
|
|
993
|
+
}
|
|
908
994
|
function normaliseSessionInput(input, surface, field) {
|
|
909
995
|
if (typeof input === "string") {
|
|
910
996
|
if (!input) {
|