@aexhq/sdk 0.34.0 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -402,33 +402,31 @@ export interface PlatformRunSubmissionRequest {
402
402
  * terminal wait window and self-kill deadline.
403
403
  */
404
404
  readonly timeoutMs?: number;
405
- /**
406
- * Lineage parent (agent-session §9). When present the server admits this
407
- * run as a CHILD of `parentRunId`: it walks the parent's lineage, enforces
408
- * the max-subagent-depth + per-root concurrency caps, and persists
409
- * `parent_run_id` + a server-derived `depth`. The client may name a parent
410
- * but NEVER the depth — depth is computed server-side from the parent row,
411
- * so a forged value cannot bypass the cap.
412
- */
413
- readonly parentRunId?: string;
414
405
  /**
415
406
  * Optional per-run callback URL. The platform delivers exactly the terminal
416
407
  * `run.finished` event to this URL at the settle-consistent barrier, signed
417
- * Standard-Webhooks style. It is a sibling of {@link idempotencyKey} /
418
- * {@link parentRunId} — an operational/delivery concern, NOT part of the
419
- * hashed submission brief, so the same idempotency key with a different
420
- * callback URL never 409s and the field never enters `request_hash`.
408
+ * Standard-Webhooks style. It is a sibling of {@link idempotencyKey} — an
409
+ * operational/delivery concern, NOT part of the hashed submission brief, so
410
+ * the same idempotency key with a different callback URL never 409s and the
411
+ * field never enters `request_hash`.
421
412
  */
422
413
  readonly webhook?: RunWebhookSpec;
423
414
  /**
424
415
  * Optional per-run override of the lineage limits (max concurrent child runs,
425
- * max subagent depth). A sibling of {@link parentRunId} — these are dials the
426
- * client may *request*; the server resolves them against the per-workspace
427
- * ceiling and the hard platform ceiling (clamping happens in the resolver, NOT
428
- * this parser). Absent fields fall back to the platform defaults. Only shape +
416
+ * max subagent depth, per-run spend cap). These are dials the client may
417
+ * *request*; the server resolves them against the per-workspace ceiling and
418
+ * the hard platform ceiling (clamping happens in the resolver, NOT this
419
+ * parser). Absent fields fall back to the platform defaults. Only shape +
429
420
  * positivity are validated here.
430
421
  */
431
422
  readonly limits?: RunLimits;
423
+ /**
424
+ * Optional capacity intent for the run's managed machine. `spot: true` opts
425
+ * the run into interruptible capacity; absent / `spot: false` requests
426
+ * standard capacity (the default). Intent only — the managed runtime selects
427
+ * capacity from it.
428
+ */
429
+ readonly machine?: RunMachine;
432
430
  }
433
431
  /** Per-run webhook callback. v1: terminal-only; the URL must be https. */
434
432
  export interface RunWebhookSpec {
@@ -444,15 +442,26 @@ export interface RunLimits {
444
442
  readonly maxConcurrentChildRuns?: number;
445
443
  readonly maxSubagentDepth?: number;
446
444
  /**
447
- * Per-run spend cap in USD (defense-in-depth). The platform converts it to a
448
- * wall-clock budget (priced compute is wall-time; BYOK provider tokens cost the
449
- * platform nothing) and kills the run once it would out-spend the cap. A
450
- * positive number; omitted unbounded per-run (only the run's wall-clock
451
- * `timeout` + the per-workspace spend cap apply). Only shape/positivity are
452
- * validated here.
445
+ * Per-run spend cap in USD (defense-in-depth). The platform kills the run once
446
+ * it would out-spend the cap. A positive number; omitted unbounded per-run
447
+ * (only the run's wall-clock `timeout` + the per-workspace spend cap apply).
448
+ * Only shape/positivity are validated here.
449
+ *
450
+ * The frozen boot session config the managed runtime folds the loop against
451
+ * names this same USD value `budgetUsd`; {@link sessionBudgetLimits} is the
452
+ * single source of truth for that wire→boot name mapping.
453
453
  */
454
454
  readonly maxSpendUsd?: number;
455
455
  }
456
+ /**
457
+ * Per-run machine/capacity intent. v1 exposes only `spot`: opt the run into
458
+ * interruptible capacity (`spot: true`) vs standard capacity (absent /
459
+ * `spot: false`, the default). Only the boolean intent is public — capacity
460
+ * selection is a runtime concern.
461
+ */
462
+ export interface RunMachine {
463
+ readonly spot?: boolean;
464
+ }
456
465
  /**
457
466
  * Wire shape posted by the SDK and CLI. `workspaceId` is **omitted by
458
467
  * design** — token-authenticated clients never name the workspace
@@ -500,6 +509,32 @@ export declare function parseRunWebhook(input: unknown): RunWebhookSpec | undefi
500
509
  * collapses to `undefined` so it carries no signal onto the request.
501
510
  */
502
511
  export declare function parseRunLimits(input: unknown): RunLimits | undefined;
512
+ /**
513
+ * Boot-session budget fragment. The public submit surface names a run's spend
514
+ * cap `limits.maxSpendUsd`; the frozen boot session config the managed runtime
515
+ * folds the loop against names the SAME USD value `budgetUsd` — the field the
516
+ * session planner reads to enforce/terminate a run that would out-spend its cap.
517
+ * This is the single source of truth for that wire→boot name mapping so the two
518
+ * layers can never drift.
519
+ *
520
+ * Returns a fragment safe to spread into `sessionConfig.limits`: `{ budgetUsd }`
521
+ * when a cap is set, `{}` when none is (an absent cap stays absent — the run is
522
+ * unbounded per-run, subject only to the run timeout + the per-workspace cap).
523
+ * Pure: same input ⇒ same output.
524
+ */
525
+ export declare function sessionBudgetLimits(limits: RunLimits | undefined): {
526
+ budgetUsd?: number;
527
+ };
528
+ /**
529
+ * Parse the optional per-run `machine` capacity intent. Mirrors
530
+ * {@link parseRunWebhook}: absent ⇒ `undefined`; a non-object or any unknown
531
+ * subfield is rejected so the strict top-level allow-list extends to the nested
532
+ * object. `spot` must be a boolean when present. A no-signal object (e.g.
533
+ * `machine: {}`) collapses to `undefined` so it never lands an empty object on
534
+ * the request. An explicit `spot` (true or false) is preserved verbatim. Only
535
+ * shape is validated here — capacity selection is a runtime concern.
536
+ */
537
+ export declare function parseRunMachine(input: unknown): RunMachine | undefined;
503
538
  export declare function parseRunProvider(input: unknown): RunProvider;
504
539
  /**
505
540
  * Cross-check the supplied secrets bundle against the credential mode. BYOK
@@ -658,8 +658,9 @@ export function crossValidateSecretEnvAndValues(secretEnv, envSecrets) {
658
658
  }
659
659
  }
660
660
  export function parseInlineSecrets(input) {
661
- // A child run (parentRunId set) inherits its provider keys server-side from
662
- // the parent's vault, so it may omit `secrets` entirely.
661
+ // Absent/null secrets collapse to an empty bundle; the credential-policy gate
662
+ // (enforceCredentialSecretPolicy) decides whether that is admissible for the
663
+ // run's mode (a run inheriting keys server-side may legitimately omit them).
663
664
  if (input === undefined || input === null)
664
665
  return {};
665
666
  const value = requireRecord(input, "secrets");
@@ -994,9 +995,9 @@ export function parseRunSubmissionRequest(input, options = {}) {
994
995
  "runtimeSize",
995
996
  "timeout",
996
997
  "proxyEndpoints",
997
- "parentRunId",
998
998
  "webhook",
999
999
  "limits",
1000
+ "machine",
1000
1001
  SECRETS_KEY
1001
1002
  ]);
1002
1003
  for (const key of Object.keys(value)) {
@@ -1020,16 +1021,12 @@ export function parseRunSubmissionRequest(input, options = {}) {
1020
1021
  void options;
1021
1022
  const runtimeSize = parseRuntimeSize(value.runtimeSize);
1022
1023
  const timeoutMs = parseRunTimeout(value.timeout);
1023
- // Lineage parent only. `depth` is NEVER accepted from the wire — the server
1024
- // derives it from the parent row (a forged depth must not bypass the cap).
1025
- const parentRunId = optionalString(value.parentRunId, "submission.parentRunId");
1026
1024
  const webhook = parseRunWebhook(value.webhook);
1027
1025
  const limits = parseRunLimits(value.limits);
1026
+ const machine = parseRunMachine(value.machine);
1028
1027
  const proxyEndpoints = parseProxyEndpoints(value.proxyEndpoints);
1029
1028
  const secrets = parseInlineSecrets(value.secrets);
1030
- enforceCredentialSecretPolicy(secrets, provider, {
1031
- inheritsFromParent: parentRunId !== undefined
1032
- });
1029
+ enforceCredentialSecretPolicy(secrets, provider);
1033
1030
  crossValidateProxyEndpointsAndAuth(proxyEndpoints, secrets.proxyEndpointAuth);
1034
1031
  const submission = parseSubmission(value.submission);
1035
1032
  assertRunModelMatchesProvider(provider, submission.model);
@@ -1060,9 +1057,9 @@ export function parseRunSubmissionRequest(input, options = {}) {
1060
1057
  ...(runtimeSize ? { runtimeSize } : {}),
1061
1058
  ...(timeoutMs !== undefined ? { timeoutMs } : {}),
1062
1059
  ...(proxyEndpoints ? { proxyEndpoints } : {}),
1063
- ...(parentRunId !== undefined ? { parentRunId } : {}),
1064
1060
  ...(webhook !== undefined ? { webhook } : {}),
1065
1061
  ...(limits !== undefined ? { limits } : {}),
1062
+ ...(machine !== undefined ? { machine } : {}),
1066
1063
  secrets
1067
1064
  };
1068
1065
  }
@@ -1141,6 +1138,53 @@ export function parseRunLimits(input) {
1141
1138
  ...(maxSpendUsd !== undefined ? { maxSpendUsd } : {})
1142
1139
  };
1143
1140
  }
1141
+ /**
1142
+ * Boot-session budget fragment. The public submit surface names a run's spend
1143
+ * cap `limits.maxSpendUsd`; the frozen boot session config the managed runtime
1144
+ * folds the loop against names the SAME USD value `budgetUsd` — the field the
1145
+ * session planner reads to enforce/terminate a run that would out-spend its cap.
1146
+ * This is the single source of truth for that wire→boot name mapping so the two
1147
+ * layers can never drift.
1148
+ *
1149
+ * Returns a fragment safe to spread into `sessionConfig.limits`: `{ budgetUsd }`
1150
+ * when a cap is set, `{}` when none is (an absent cap stays absent — the run is
1151
+ * unbounded per-run, subject only to the run timeout + the per-workspace cap).
1152
+ * Pure: same input ⇒ same output.
1153
+ */
1154
+ export function sessionBudgetLimits(limits) {
1155
+ if (limits?.maxSpendUsd === undefined) {
1156
+ return {};
1157
+ }
1158
+ return { budgetUsd: limits.maxSpendUsd };
1159
+ }
1160
+ /**
1161
+ * Parse the optional per-run `machine` capacity intent. Mirrors
1162
+ * {@link parseRunWebhook}: absent ⇒ `undefined`; a non-object or any unknown
1163
+ * subfield is rejected so the strict top-level allow-list extends to the nested
1164
+ * object. `spot` must be a boolean when present. A no-signal object (e.g.
1165
+ * `machine: {}`) collapses to `undefined` so it never lands an empty object on
1166
+ * the request. An explicit `spot` (true or false) is preserved verbatim. Only
1167
+ * shape is validated here — capacity selection is a runtime concern.
1168
+ */
1169
+ export function parseRunMachine(input) {
1170
+ if (input === undefined) {
1171
+ return undefined;
1172
+ }
1173
+ const value = requireRecord(input, "machine");
1174
+ const allowed = new Set(["spot"]);
1175
+ for (const key of Object.keys(value)) {
1176
+ if (!allowed.has(key)) {
1177
+ throw new Error(`machine.${key} is not an allowed field; permitted: ${[...allowed].join(", ")}`);
1178
+ }
1179
+ }
1180
+ if (value.spot !== undefined && typeof value.spot !== "boolean") {
1181
+ throw new Error("machine.spot must be a boolean");
1182
+ }
1183
+ if (value.spot === undefined) {
1184
+ return undefined;
1185
+ }
1186
+ return { spot: value.spot };
1187
+ }
1144
1188
  export function parseRunProvider(input) {
1145
1189
  if (input === undefined) {
1146
1190
  return DEFAULT_RUN_PROVIDER;
package/dist/cli.mjs CHANGED
@@ -4400,6 +4400,82 @@ async function runWhoamiCmd(io2, argv) {
4400
4400
  }
4401
4401
  }
4402
4402
 
4403
+ // dist/host/redeem.js
4404
+ function messageForStatus(status2, serverMessage) {
4405
+ switch (status2) {
4406
+ case 404:
4407
+ return "coupon code not found";
4408
+ case 403:
4409
+ return "this coupon can't be redeemed by this workspace";
4410
+ case 409:
4411
+ return "coupon already redeemed";
4412
+ case 400:
4413
+ return serverMessage ? `invalid input: ${serverMessage}` : "invalid input";
4414
+ case 401:
4415
+ return "not authorized \u2014 check --api-token, or run `aex login`";
4416
+ default:
4417
+ return serverMessage ? `redeem failed: ${serverMessage}` : `redeem failed (HTTP ${status2})`;
4418
+ }
4419
+ }
4420
+ async function runRedeemCmd(io2, argv) {
4421
+ if (await refuseInsideManagedRun(io2, "redeem"))
4422
+ return USAGE_ERR;
4423
+ const common = await resolveCommonHostFlags(io2, argv);
4424
+ if (!common.ok) {
4425
+ io2.stderr(`${common.reason}
4426
+ `);
4427
+ return USAGE_ERR;
4428
+ }
4429
+ const positional = common.rest.filter((arg) => !arg.startsWith("--"));
4430
+ if (positional.length !== 1) {
4431
+ io2.stderr("usage: aex redeem <code> [common flags]\n");
4432
+ return USAGE_ERR;
4433
+ }
4434
+ const code = positional[0];
4435
+ const base = common.flags.aexUrl.replace(/\/+$/, "");
4436
+ const url = `${base}/billing/redeem`;
4437
+ let response;
4438
+ try {
4439
+ response = await io2.fetchImpl(url, {
4440
+ method: "POST",
4441
+ headers: {
4442
+ accept: "application/json",
4443
+ "content-type": "application/json",
4444
+ authorization: `Bearer ${common.flags.apiToken}`
4445
+ },
4446
+ body: JSON.stringify({ code })
4447
+ });
4448
+ } catch (err2) {
4449
+ io2.stderr(`redeem failed: ${err2 instanceof Error ? err2.message : String(err2)}
4450
+ `);
4451
+ return RUNTIME_ERR;
4452
+ }
4453
+ if (common.flags.debug) {
4454
+ io2.stderr(`[aex] POST /billing/redeem -> ${response.status}
4455
+ `);
4456
+ }
4457
+ const text = await response.text();
4458
+ let body = {};
4459
+ try {
4460
+ if (text.length > 0)
4461
+ body = JSON.parse(text);
4462
+ } catch {
4463
+ body = {};
4464
+ }
4465
+ if (!response.ok) {
4466
+ const serverMessage = body && typeof body === "object" && typeof body.message === "string" ? body.message : void 0;
4467
+ io2.stderr(`${messageForStatus(response.status, serverMessage)}
4468
+ `);
4469
+ return RUNTIME_ERR;
4470
+ }
4471
+ const ok = body;
4472
+ const amountUsd = typeof ok.amountUsd === "number" ? ok.amountUsd : 0;
4473
+ const newBalanceUsd = typeof ok.newBalanceUsd === "number" ? ok.newBalanceUsd : 0;
4474
+ io2.stdout(`Redeemed $${amountUsd.toFixed(2)}. New balance: $${newBalanceUsd.toFixed(2)}.
4475
+ `);
4476
+ return SUCCESS;
4477
+ }
4478
+
4403
4479
  // dist/host/debug.js
4404
4480
  import { dirname, resolve as resolvePath3 } from "node:path";
4405
4481
  function status(source, state, opts = {}) {
@@ -5680,6 +5756,8 @@ async function dispatch(io2, args) {
5680
5756
  return runDeleteAssetCmd(io2, rest);
5681
5757
  case "whoami":
5682
5758
  return runWhoamiCmd(io2, rest);
5759
+ case "redeem":
5760
+ return runRedeemCmd(io2, rest);
5683
5761
  case "login":
5684
5762
  return runLoginCmd(io2, rest);
5685
5763
  case "logout":
@@ -5740,6 +5818,7 @@ Protocol version: ${manifest.protocolVersion}
5740
5818
  io2.stdout(" aex delete <session-id> --api-token T\n");
5741
5819
  io2.stdout(" aex delete-asset <assetId|hash> --api-token T\n");
5742
5820
  io2.stdout(" aex whoami --api-token T\n");
5821
+ io2.stdout(" aex redeem <code> --api-token T Redeem a coupon code into the workspace prepaid balance\n");
5743
5822
  io2.stdout(" aex login --api-token T [--aex-url U] Persist token + url (then other verbs need no --api-token)\n");
5744
5823
  io2.stdout(" aex logout Clear the stored token\n");
5745
5824
  io2.stdout(" aex auth status Show the resolved config (token never printed)\n");
@@ -1 +1 @@
1
- fe0642cff5926cbaf21e48544c4e022a183f284d8e4c25818784fc84b1df2c5a cli.mjs
1
+ ebfa6eb1106a2447b2462511c45f0c96874889e3189a78fd4a66521d663bde76 cli.mjs
package/dist/client.d.ts CHANGED
@@ -4,6 +4,7 @@ import { type UploadedAsset } from "./asset-upload.js";
4
4
  import { File } from "./file.js";
5
5
  import { McpServer } from "./mcp-server.js";
6
6
  import { ProxyEndpoint } from "./proxy-endpoint.js";
7
+ import { type RetryOptions } from "./retry.js";
7
8
  import { Secret } from "./secret.js";
8
9
  import { SkillTool } from "./skill-tool.js";
9
10
  import { Tool } from "./tool.js";
@@ -24,6 +25,16 @@ export interface AgentExecutorOptions {
24
25
  * route the traces elsewhere. Purely local — nothing is uploaded.
25
26
  */
26
27
  readonly debug?: boolean | DebugSink;
28
+ /**
29
+ * Built-in transport retry policy. Every BFF request is retried on transient
30
+ * failures (HTTP 429/500/502/503/504/529 and network errors) with bounded
31
+ * exponential backoff + jitter, honoring `Retry-After`. Billable submits carry
32
+ * a stable idempotency key, so a retry never creates a duplicate billable run.
33
+ *
34
+ * Omit for sensible defaults (4 attempts, ~2 min budget); pass an object to
35
+ * tune `maxAttempts` / delays / `maxElapsedMs`; pass `false` to disable.
36
+ */
37
+ readonly retry?: RetryOptions | false;
27
38
  }
28
39
  /**
29
40
  * The settle-consistent result of {@link AgentExecutor.run}:
@@ -250,6 +261,14 @@ export declare class SessionHandle {
250
261
  get id(): string;
251
262
  get record(): Session;
252
263
  send(input: SessionInput, options?: SessionSendOptions): SessionTurnStream;
264
+ /**
265
+ * Re-send the last message on this session — the clean way to retry a turn a
266
+ * throttle or transient failure interrupted. By default it REUSES the previous
267
+ * message's idempotency key, so if the original turn actually landed
268
+ * server-side the replay de-duplicates instead of creating a second billable
269
+ * turn; pass a fresh `idempotencyKey` to force a brand-new turn.
270
+ */
271
+ replayLast(options?: SessionSendOptions): SessionTurnStream;
253
272
  suspend(options?: Pick<SessionSendOptions, "idempotencyKey">): Promise<SessionStateChangeAccepted>;
254
273
  cancel(options?: Pick<SessionSendOptions, "idempotencyKey">): Promise<SessionStateChangeAccepted>;
255
274
  resume(options?: Pick<SessionSendOptions, "idempotencyKey">): Promise<SessionStateChangeAccepted>;
package/dist/client.js CHANGED
@@ -4,6 +4,7 @@ import { uploadAsset } from "./asset-upload.js";
4
4
  import { File } from "./file.js";
5
5
  import { McpServer } from "./mcp-server.js";
6
6
  import { splitProxyEndpoints } from "./proxy-endpoint.js";
7
+ import { AexRateLimitError, isThrottleFault, parseProviderFault, withRetry } from "./retry.js";
7
8
  import { splitSecretEnv } from "./secret.js";
8
9
  import { SkillTool } from "./skill-tool.js";
9
10
  import { Tool } from "./tool.js";
@@ -40,6 +41,8 @@ export class SessionHandle {
40
41
  #http;
41
42
  #fetch;
42
43
  #session;
44
+ /** The last message sent on this handle, for {@link SessionHandle.replayLast}. */
45
+ #lastSend;
43
46
  constructor(http, session, fetch) {
44
47
  this.#http = http;
45
48
  this.#session = session;
@@ -56,8 +59,28 @@ export class SessionHandle {
56
59
  assertNoSessionSendSignal(options, "SessionHandle.send");
57
60
  return sendSessionInternal(this, input, options);
58
61
  }
62
+ /**
63
+ * Re-send the last message on this session — the clean way to retry a turn a
64
+ * throttle or transient failure interrupted. By default it REUSES the previous
65
+ * message's idempotency key, so if the original turn actually landed
66
+ * server-side the replay de-duplicates instead of creating a second billable
67
+ * turn; pass a fresh `idempotencyKey` to force a brand-new turn.
68
+ */
69
+ replayLast(options = {}) {
70
+ assertNoSessionSendSignal(options, "SessionHandle.replayLast");
71
+ const last = this.#lastSend;
72
+ if (last === undefined) {
73
+ throw new RunStateError("SessionHandle.replayLast: no message has been sent on this session yet");
74
+ }
75
+ return sendSessionInternal(this, last.input, {
76
+ ...options,
77
+ idempotencyKey: options.idempotencyKey ?? last.idempotencyKey
78
+ });
79
+ }
59
80
  async *#send(input, options) {
60
- const accepted = await operations.sendSessionMessage(this.#http, this.id, { input }, { idempotencyKey: options.idempotencyKey ?? generateIdempotencyKey() });
81
+ const idempotencyKey = options.idempotencyKey ?? generateIdempotencyKey();
82
+ this.#lastSend = { input, idempotencyKey };
83
+ const accepted = await operations.sendSessionMessage(this.#http, this.id, { input }, { idempotencyKey });
61
84
  this.#session = accepted.session;
62
85
  const turn = accepted.turn;
63
86
  const events = [];
@@ -303,10 +326,15 @@ export class SessionClient {
303
326
  const { message, deleteAfter, messageIdempotencyKey, stream, ...createOptions } = options;
304
327
  assertNoLegacySessionFields(options, "Aex.sessions.run");
305
328
  const input = normaliseSessionInput(message, "Aex.sessions.run", "message");
306
- const session = await this.create(createOptions);
329
+ // Derive the message key from the create key (like the CLI) so a retried run
330
+ // with the same `idempotencyKey` de-duplicates BOTH the create and the
331
+ // billable turn — never a duplicate billable run.
332
+ const createKey = createOptions.idempotencyKey ?? generateIdempotencyKey();
333
+ const messageKey = messageIdempotencyKey ?? deriveMessageKey(createKey);
334
+ const session = await this.create({ ...createOptions, idempotencyKey: createKey });
307
335
  const result = await session.send(input, {
308
336
  ...(stream ?? {}),
309
- idempotencyKey: messageIdempotencyKey ?? generateIdempotencyKey()
337
+ idempotencyKey: messageKey
310
338
  }).done();
311
339
  if (deleteAfter) {
312
340
  await session.delete();
@@ -583,10 +611,16 @@ export class AgentExecutor {
583
611
  if (!options.apiToken) {
584
612
  throw new Error("AgentExecutor: apiToken is required");
585
613
  }
614
+ // Wrap the transport fetch (the caller's override, or global `fetch`) with
615
+ // the bounded-retry layer so every BFF request gets default resilience.
616
+ // The raw `#fetch` below stays unwrapped for the direct-to-storage asset PUT
617
+ // and presigned output GETs, which target object storage, not the API plane.
618
+ const baseFetch = options.fetch ?? ((input, init) => fetch(input, init));
619
+ const retryingFetch = withRetry(baseFetch, options.retry);
586
620
  this.#http = new HttpClient({
587
621
  ...(options.baseUrl ? { baseUrl: options.baseUrl } : {}),
588
622
  apiToken: options.apiToken,
589
- ...(options.fetch ? { fetch: options.fetch } : {}),
623
+ fetch: retryingFetch,
590
624
  // Opt-in local diagnostics: emit a redacted per-request trace to
591
625
  // stderr. Uploads nothing. A caller wanting a custom sink can pass
592
626
  // a function instead of `true`.
@@ -646,10 +680,15 @@ export class AgentExecutor {
646
680
  ...(opts.idleTimeoutMs !== undefined ? { idleTimeoutMs: opts.idleTimeoutMs } : {}),
647
681
  ...(opts.pingIntervalMs !== undefined ? { pingIntervalMs: opts.pingIntervalMs } : {})
648
682
  };
649
- const session = await this.sessions.create(createOptions);
683
+ // Derive the message key from the create key (like the CLI) so a retried
684
+ // run with the same `idempotencyKey` de-duplicates BOTH the create and the
685
+ // billable turn server-side — never a duplicate billable run (sdk-dx-3).
686
+ const createKey = createOptions.idempotencyKey ?? generateIdempotencyKey();
687
+ const messageKey = messageIdempotencyKey ?? deriveMessageKey(createKey);
688
+ const session = await this.sessions.create({ ...createOptions, idempotencyKey: createKey });
650
689
  const turnResult = await sendSessionInternal(session, input, {
651
690
  ...streamOptions,
652
- idempotencyKey: messageIdempotencyKey ?? generateIdempotencyKey()
691
+ idempotencyKey: messageKey
653
692
  }).done();
654
693
  if (deleteAfter) {
655
694
  await session.delete();
@@ -678,6 +717,19 @@ export class AgentExecutor {
678
717
  ...(!ok && errorMessage ? { error: errorMessage } : {})
679
718
  };
680
719
  if (opts.throwOnFailure && !ok) {
720
+ // A turn that failed because the upstream provider throttled us surfaces
721
+ // as a structured, non-leaky AexRateLimitError carrying the provider
722
+ // fault, so callers can branch on `isRateLimited(err)` and replay.
723
+ const throttle = throttleFromSession(turnResult.session);
724
+ if (throttle) {
725
+ throw new AexRateLimitError({
726
+ status: throttle.status ?? 429,
727
+ attempts: 1,
728
+ source: "provider",
729
+ providerFault: throttle,
730
+ ...(throttle.retryAfterMs !== undefined ? { retryAfterMs: throttle.retryAfterMs } : {})
731
+ });
732
+ }
681
733
  throw new RunStateError(`AgentExecutor.run: session ${runId} ended ${turnResult.status}${errorMessage ? `: ${errorMessage}` : ""}`, { runId, status: turnResult.status });
682
734
  }
683
735
  return result;
@@ -905,6 +957,40 @@ function generateIdempotencyKey() {
905
957
  return cryptoObj.randomUUID();
906
958
  return `idem-${Date.now().toString(36)}-${Math.random().toString(36).slice(2)}`;
907
959
  }
960
+ /**
961
+ * Derive the message idempotency key from the session-create key. Mirrors the
962
+ * CLI (`<createKey>:message`) so a retried `run` / `sessions.run` that reuses
963
+ * one `idempotencyKey` de-duplicates BOTH the create and the billable turn.
964
+ */
965
+ function deriveMessageKey(createKey) {
966
+ return `${createKey}:message`;
967
+ }
968
+ /**
969
+ * Extract a throttle-class {@link ProviderFault} from a failed session record.
970
+ * Reads a structured `providerFault` / `error` field first (the shape the
971
+ * runtime is expected to emit on a throttled turn), then falls back to a
972
+ * heuristic scan of `errorMessage`. Returns `undefined` when the failure is not
973
+ * a throttle.
974
+ */
975
+ function throttleFromSession(session) {
976
+ const fault = parseProviderFault(session.providerFault) ??
977
+ parseProviderFault(session.error) ??
978
+ faultFromErrorMessage(typeof session.errorMessage === "string" ? session.errorMessage : undefined);
979
+ return fault && isThrottleFault(fault) ? fault : undefined;
980
+ }
981
+ /** Last-resort throttle detection from a free-text run error message. */
982
+ function faultFromErrorMessage(message) {
983
+ if (message === undefined || message.length === 0)
984
+ return undefined;
985
+ const lower = message.toLowerCase();
986
+ if (/\b429\b|rate.?limit|too many requests/.test(lower)) {
987
+ return { kind: "rate_limit", message };
988
+ }
989
+ if (/\b529\b|overloaded/.test(lower)) {
990
+ return { kind: "overloaded", message };
991
+ }
992
+ return undefined;
993
+ }
908
994
  function normaliseSessionInput(input, surface, field) {
909
995
  if (typeof input === "string") {
910
996
  if (!input) {