npm - @aexhq/sdk - Versions diffs - 0.34.0 → 0.35.0 - Mend

@aexhq/sdk 0.34.0 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/_contracts/submission.d.ts +58 -23
package/dist/_contracts/submission.js +54 -10
package/dist/cli.mjs +79 -0
package/dist/cli.mjs.sha256 +1 -1
package/dist/client.d.ts +19 -0
package/dist/client.js +92 -6
package/dist/client.js.map +1 -1
package/dist/index.d.ts +2 -0
package/dist/index.js +6 -0
package/dist/index.js.map +1 -1
package/dist/retry.d.ts +162 -0
package/dist/retry.js +320 -0
package/dist/retry.js.map +1 -0
package/dist/version.d.ts +1 -1
package/dist/version.js +1 -1
package/docs/retries.md +129 -0
package/examples/feature-tour.ts +301 -0
package/package.json +1 -1

package/dist/_contracts/submission.d.ts CHANGED Viewed

@@ -402,33 +402,31 @@ export interface PlatformRunSubmissionRequest {
      * terminal wait window and self-kill deadline.
      */
     readonly timeoutMs?: number;
-    /**
-     * Lineage parent (agent-session §9). When present the server admits this
-     * run as a CHILD of `parentRunId`: it walks the parent's lineage, enforces
-     * the max-subagent-depth + per-root concurrency caps, and persists
-     * `parent_run_id` + a server-derived `depth`. The client may name a parent
-     * but NEVER the depth — depth is computed server-side from the parent row,
-     * so a forged value cannot bypass the cap.
-     */
-    readonly parentRunId?: string;
     /**
      * Optional per-run callback URL. The platform delivers exactly the terminal
      * `run.finished` event to this URL at the settle-consistent barrier, signed
-     * Standard-Webhooks style. It is a sibling of {@link idempotencyKey} /
-     * {@link parentRunId} — an operational/delivery concern, NOT part of the
-     * hashed submission brief, so the same idempotency key with a different
-     * callback URL never 409s and the field never enters `request_hash`.
+     * Standard-Webhooks style. It is a sibling of {@link idempotencyKey} — an
+     * operational/delivery concern, NOT part of the hashed submission brief, so
+     * the same idempotency key with a different callback URL never 409s and the
+     * field never enters `request_hash`.
      */
     readonly webhook?: RunWebhookSpec;
     /**
      * Optional per-run override of the lineage limits (max concurrent child runs,
-     * max subagent depth). A sibling of {@link parentRunId} — these are dials the
-     * client may *request*; the server resolves them against the per-workspace
-     * ceiling and the hard platform ceiling (clamping happens in the resolver, NOT
-     * this parser). Absent fields fall back to the platform defaults. Only shape +
+     * max subagent depth, per-run spend cap). These are dials the client may
+     * *request*; the server resolves them against the per-workspace ceiling and
+     * the hard platform ceiling (clamping happens in the resolver, NOT this
+     * parser). Absent fields fall back to the platform defaults. Only shape +
      * positivity are validated here.
      */
     readonly limits?: RunLimits;
+    /**
+     * Optional capacity intent for the run's managed machine. `spot: true` opts
+     * the run into interruptible capacity; absent / `spot: false` requests
+     * standard capacity (the default). Intent only — the managed runtime selects
+     * capacity from it.
+     */
+    readonly machine?: RunMachine;
 }
 /** Per-run webhook callback. v1: terminal-only; the URL must be https. */
 export interface RunWebhookSpec {
@@ -444,15 +442,26 @@ export interface RunLimits {
     readonly maxConcurrentChildRuns?: number;
     readonly maxSubagentDepth?: number;
     /**
-     * Per-run spend cap in USD (defense-in-depth). The platform converts it to a
-     * wall-clock budget (priced compute is wall-time; BYOK provider tokens cost the
-     * platform nothing) and kills the run once it would out-spend the cap. A
-     * positive number; omitted ⇒ unbounded per-run (only the run's wall-clock
-     * `timeout` + the per-workspace spend cap apply). Only shape/positivity are
-     * validated here.
+     * Per-run spend cap in USD (defense-in-depth). The platform kills the run once
+     * it would out-spend the cap. A positive number; omitted ⇒ unbounded per-run
+     * (only the run's wall-clock `timeout` + the per-workspace spend cap apply).
+     * Only shape/positivity are validated here.
+     *
+     * The frozen boot session config the managed runtime folds the loop against
+     * names this same USD value `budgetUsd`; {@link sessionBudgetLimits} is the
+     * single source of truth for that wire→boot name mapping.
      */
     readonly maxSpendUsd?: number;
 }
+/**
+ * Per-run machine/capacity intent. v1 exposes only `spot`: opt the run into
+ * interruptible capacity (`spot: true`) vs standard capacity (absent /
+ * `spot: false`, the default). Only the boolean intent is public — capacity
+ * selection is a runtime concern.
+ */
+export interface RunMachine {
+    readonly spot?: boolean;
+}
 /**
  * Wire shape posted by the SDK and CLI. `workspaceId` is **omitted by
  * design** — token-authenticated clients never name the workspace
@@ -500,6 +509,32 @@ export declare function parseRunWebhook(input: unknown): RunWebhookSpec | undefi
  * collapses to `undefined` so it carries no signal onto the request.
  */
 export declare function parseRunLimits(input: unknown): RunLimits | undefined;
+/**
+ * Boot-session budget fragment. The public submit surface names a run's spend
+ * cap `limits.maxSpendUsd`; the frozen boot session config the managed runtime
+ * folds the loop against names the SAME USD value `budgetUsd` — the field the
+ * session planner reads to enforce/terminate a run that would out-spend its cap.
+ * This is the single source of truth for that wire→boot name mapping so the two
+ * layers can never drift.
+ *
+ * Returns a fragment safe to spread into `sessionConfig.limits`: `{ budgetUsd }`
+ * when a cap is set, `{}` when none is (an absent cap stays absent — the run is
+ * unbounded per-run, subject only to the run timeout + the per-workspace cap).
+ * Pure: same input ⇒ same output.
+ */
+export declare function sessionBudgetLimits(limits: RunLimits | undefined): {
+    budgetUsd?: number;
+};
+/**
+ * Parse the optional per-run `machine` capacity intent. Mirrors
+ * {@link parseRunWebhook}: absent ⇒ `undefined`; a non-object or any unknown
+ * subfield is rejected so the strict top-level allow-list extends to the nested
+ * object. `spot` must be a boolean when present. A no-signal object (e.g.
+ * `machine: {}`) collapses to `undefined` so it never lands an empty object on
+ * the request. An explicit `spot` (true or false) is preserved verbatim. Only
+ * shape is validated here — capacity selection is a runtime concern.
+ */
+export declare function parseRunMachine(input: unknown): RunMachine | undefined;
 export declare function parseRunProvider(input: unknown): RunProvider;
 /**
  * Cross-check the supplied secrets bundle against the credential mode. BYOK

package/dist/_contracts/submission.js CHANGED Viewed

@@ -658,8 +658,9 @@ export function crossValidateSecretEnvAndValues(secretEnv, envSecrets) {
     }
 }
 export function parseInlineSecrets(input) {
-    // A child run (parentRunId set) inherits its provider keys server-side from
-    // the parent's vault, so it may omit `secrets` entirely.
+    // Absent/null secrets collapse to an empty bundle; the credential-policy gate
+    // (enforceCredentialSecretPolicy) decides whether that is admissible for the
+    // run's mode (a run inheriting keys server-side may legitimately omit them).
     if (input === undefined || input === null)
         return {};
     const value = requireRecord(input, "secrets");
@@ -994,9 +995,9 @@ export function parseRunSubmissionRequest(input, options = {}) {
         "runtimeSize",
         "timeout",
         "proxyEndpoints",
-        "parentRunId",
         "webhook",
         "limits",
+        "machine",
         SECRETS_KEY
     ]);
     for (const key of Object.keys(value)) {
@@ -1020,16 +1021,12 @@ export function parseRunSubmissionRequest(input, options = {}) {
     void options;
     const runtimeSize = parseRuntimeSize(value.runtimeSize);
     const timeoutMs = parseRunTimeout(value.timeout);
-    // Lineage parent only. `depth` is NEVER accepted from the wire — the server
-    // derives it from the parent row (a forged depth must not bypass the cap).
-    const parentRunId = optionalString(value.parentRunId, "submission.parentRunId");
     const webhook = parseRunWebhook(value.webhook);
     const limits = parseRunLimits(value.limits);
+    const machine = parseRunMachine(value.machine);
     const proxyEndpoints = parseProxyEndpoints(value.proxyEndpoints);
     const secrets = parseInlineSecrets(value.secrets);
-    enforceCredentialSecretPolicy(secrets, provider, {
-        inheritsFromParent: parentRunId !== undefined
-    });
+    enforceCredentialSecretPolicy(secrets, provider);
     crossValidateProxyEndpointsAndAuth(proxyEndpoints, secrets.proxyEndpointAuth);
     const submission = parseSubmission(value.submission);
     assertRunModelMatchesProvider(provider, submission.model);
@@ -1060,9 +1057,9 @@ export function parseRunSubmissionRequest(input, options = {}) {
         ...(runtimeSize ? { runtimeSize } : {}),
         ...(timeoutMs !== undefined ? { timeoutMs } : {}),
         ...(proxyEndpoints ? { proxyEndpoints } : {}),
-        ...(parentRunId !== undefined ? { parentRunId } : {}),
         ...(webhook !== undefined ? { webhook } : {}),
         ...(limits !== undefined ? { limits } : {}),
+        ...(machine !== undefined ? { machine } : {}),
         secrets
     };
 }
@@ -1141,6 +1138,53 @@ export function parseRunLimits(input) {
         ...(maxSpendUsd !== undefined ? { maxSpendUsd } : {})
     };
 }
+/**
+ * Boot-session budget fragment. The public submit surface names a run's spend
+ * cap `limits.maxSpendUsd`; the frozen boot session config the managed runtime
+ * folds the loop against names the SAME USD value `budgetUsd` — the field the
+ * session planner reads to enforce/terminate a run that would out-spend its cap.
+ * This is the single source of truth for that wire→boot name mapping so the two
+ * layers can never drift.
+ *
+ * Returns a fragment safe to spread into `sessionConfig.limits`: `{ budgetUsd }`
+ * when a cap is set, `{}` when none is (an absent cap stays absent — the run is
+ * unbounded per-run, subject only to the run timeout + the per-workspace cap).
+ * Pure: same input ⇒ same output.
+ */
+export function sessionBudgetLimits(limits) {
+    if (limits?.maxSpendUsd === undefined) {
+        return {};
+    }
+    return { budgetUsd: limits.maxSpendUsd };
+}
+/**
+ * Parse the optional per-run `machine` capacity intent. Mirrors
+ * {@link parseRunWebhook}: absent ⇒ `undefined`; a non-object or any unknown
+ * subfield is rejected so the strict top-level allow-list extends to the nested
+ * object. `spot` must be a boolean when present. A no-signal object (e.g.
+ * `machine: {}`) collapses to `undefined` so it never lands an empty object on
+ * the request. An explicit `spot` (true or false) is preserved verbatim. Only
+ * shape is validated here — capacity selection is a runtime concern.
+ */
+export function parseRunMachine(input) {
+    if (input === undefined) {
+        return undefined;
+    }
+    const value = requireRecord(input, "machine");
+    const allowed = new Set(["spot"]);
+    for (const key of Object.keys(value)) {
+        if (!allowed.has(key)) {
+            throw new Error(`machine.${key} is not an allowed field; permitted: ${[...allowed].join(", ")}`);
+        }
+    }
+    if (value.spot !== undefined && typeof value.spot !== "boolean") {
+        throw new Error("machine.spot must be a boolean");
+    }
+    if (value.spot === undefined) {
+        return undefined;
+    }
+    return { spot: value.spot };
+}
 export function parseRunProvider(input) {
     if (input === undefined) {
         return DEFAULT_RUN_PROVIDER;

package/dist/cli.mjs CHANGED Viewed

@@ -4400,6 +4400,82 @@ async function runWhoamiCmd(io2, argv) {
   }
 }
+// dist/host/redeem.js
+function messageForStatus(status2, serverMessage) {
+  switch (status2) {
+    case 404:
+      return "coupon code not found";
+    case 403:
+      return "this coupon can't be redeemed by this workspace";
+    case 409:
+      return "coupon already redeemed";
+    case 400:
+      return serverMessage ? `invalid input: ${serverMessage}` : "invalid input";
+    case 401:
+      return "not authorized \u2014 check --api-token, or run `aex login`";
+    default:
+      return serverMessage ? `redeem failed: ${serverMessage}` : `redeem failed (HTTP ${status2})`;
+  }
+}
+async function runRedeemCmd(io2, argv) {
+  if (await refuseInsideManagedRun(io2, "redeem"))
+    return USAGE_ERR;
+  const common = await resolveCommonHostFlags(io2, argv);
+  if (!common.ok) {
+    io2.stderr(`${common.reason}
+`);
+    return USAGE_ERR;
+  }
+  const positional = common.rest.filter((arg) => !arg.startsWith("--"));
+  if (positional.length !== 1) {
+    io2.stderr("usage: aex redeem <code> [common flags]\n");
+    return USAGE_ERR;
+  }
+  const code = positional[0];
+  const base = common.flags.aexUrl.replace(/\/+$/, "");
+  const url = `${base}/billing/redeem`;
+  let response;
+  try {
+    response = await io2.fetchImpl(url, {
+      method: "POST",
+      headers: {
+        accept: "application/json",
+        "content-type": "application/json",
+        authorization: `Bearer ${common.flags.apiToken}`
+      },
+      body: JSON.stringify({ code })
+    });
+  } catch (err2) {
+    io2.stderr(`redeem failed: ${err2 instanceof Error ? err2.message : String(err2)}
+`);
+    return RUNTIME_ERR;
+  }
+  if (common.flags.debug) {
+    io2.stderr(`[aex] POST /billing/redeem -> ${response.status}
+`);
+  }
+  const text = await response.text();
+  let body = {};
+  try {
+    if (text.length > 0)
+      body = JSON.parse(text);
+  } catch {
+    body = {};
+  }
+  if (!response.ok) {
+    const serverMessage = body && typeof body === "object" && typeof body.message === "string" ? body.message : void 0;
+    io2.stderr(`${messageForStatus(response.status, serverMessage)}
+`);
+    return RUNTIME_ERR;
+  }
+  const ok = body;
+  const amountUsd = typeof ok.amountUsd === "number" ? ok.amountUsd : 0;
+  const newBalanceUsd = typeof ok.newBalanceUsd === "number" ? ok.newBalanceUsd : 0;
+  io2.stdout(`Redeemed $${amountUsd.toFixed(2)}. New balance: $${newBalanceUsd.toFixed(2)}.
+`);
+  return SUCCESS;
+}
 // dist/host/debug.js
 import { dirname, resolve as resolvePath3 } from "node:path";
 function status(source, state, opts = {}) {
@@ -5680,6 +5756,8 @@ async function dispatch(io2, args) {
       return runDeleteAssetCmd(io2, rest);
     case "whoami":
       return runWhoamiCmd(io2, rest);
+    case "redeem":
+      return runRedeemCmd(io2, rest);
     case "login":
       return runLoginCmd(io2, rest);
     case "logout":
@@ -5740,6 +5818,7 @@ Protocol version: ${manifest.protocolVersion}
   io2.stdout("  aex delete <session-id> --api-token T\n");
   io2.stdout("  aex delete-asset <assetId|hash> --api-token T\n");
   io2.stdout("  aex whoami --api-token T\n");
+  io2.stdout("  aex redeem <code> --api-token T             Redeem a coupon code into the workspace prepaid balance\n");
   io2.stdout("  aex login --api-token T [--aex-url U]      Persist token + url (then other verbs need no --api-token)\n");
   io2.stdout("  aex logout                                 Clear the stored token\n");
   io2.stdout("  aex auth status                            Show the resolved config (token never printed)\n");

package/dist/cli.mjs.sha256 CHANGED Viewed

	@@ -1 +1 @@
1	- ~~fe0642cff5926cbaf21e48544c4e022a183f284d8e4c25818784fc84b1df2c5a~~ cli.mjs
1	+ ebfa6eb1106a2447b2462511c45f0c96874889e3189a78fd4a66521d663bde76 cli.mjs

package/dist/client.d.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import { type UploadedAsset } from "./asset-upload.js";
 import { File } from "./file.js";
 import { McpServer } from "./mcp-server.js";
 import { ProxyEndpoint } from "./proxy-endpoint.js";
+import { type RetryOptions } from "./retry.js";
 import { Secret } from "./secret.js";
 import { SkillTool } from "./skill-tool.js";
 import { Tool } from "./tool.js";
@@ -24,6 +25,16 @@ export interface AgentExecutorOptions {
      * route the traces elsewhere. Purely local — nothing is uploaded.
      */
     readonly debug?: boolean | DebugSink;
+    /**
+     * Built-in transport retry policy. Every BFF request is retried on transient
+     * failures (HTTP 429/500/502/503/504/529 and network errors) with bounded
+     * exponential backoff + jitter, honoring `Retry-After`. Billable submits carry
+     * a stable idempotency key, so a retry never creates a duplicate billable run.
+     *
+     * Omit for sensible defaults (4 attempts, ~2 min budget); pass an object to
+     * tune `maxAttempts` / delays / `maxElapsedMs`; pass `false` to disable.
+     */
+    readonly retry?: RetryOptions | false;
 }
 /**
  * The settle-consistent result of {@link AgentExecutor.run}:
@@ -250,6 +261,14 @@ export declare class SessionHandle {
     get id(): string;
     get record(): Session;
     send(input: SessionInput, options?: SessionSendOptions): SessionTurnStream;
+    /**
+     * Re-send the last message on this session — the clean way to retry a turn a
+     * throttle or transient failure interrupted. By default it REUSES the previous
+     * message's idempotency key, so if the original turn actually landed
+     * server-side the replay de-duplicates instead of creating a second billable
+     * turn; pass a fresh `idempotencyKey` to force a brand-new turn.
+     */
+    replayLast(options?: SessionSendOptions): SessionTurnStream;
     suspend(options?: Pick<SessionSendOptions, "idempotencyKey">): Promise<SessionStateChangeAccepted>;
     cancel(options?: Pick<SessionSendOptions, "idempotencyKey">): Promise<SessionStateChangeAccepted>;
     resume(options?: Pick<SessionSendOptions, "idempotencyKey">): Promise<SessionStateChangeAccepted>;

package/dist/client.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { uploadAsset } from "./asset-upload.js";
 import { File } from "./file.js";
 import { McpServer } from "./mcp-server.js";
 import { splitProxyEndpoints } from "./proxy-endpoint.js";
+import { AexRateLimitError, isThrottleFault, parseProviderFault, withRetry } from "./retry.js";
 import { splitSecretEnv } from "./secret.js";
 import { SkillTool } from "./skill-tool.js";
 import { Tool } from "./tool.js";
@@ -40,6 +41,8 @@ export class SessionHandle {
     #http;
     #fetch;
     #session;
+    /** The last message sent on this handle, for {@link SessionHandle.replayLast}. */
+    #lastSend;
     constructor(http, session, fetch) {
         this.#http = http;
         this.#session = session;
@@ -56,8 +59,28 @@ export class SessionHandle {
         assertNoSessionSendSignal(options, "SessionHandle.send");
         return sendSessionInternal(this, input, options);
     }
+    /**
+     * Re-send the last message on this session — the clean way to retry a turn a
+     * throttle or transient failure interrupted. By default it REUSES the previous
+     * message's idempotency key, so if the original turn actually landed
+     * server-side the replay de-duplicates instead of creating a second billable
+     * turn; pass a fresh `idempotencyKey` to force a brand-new turn.
+     */
+    replayLast(options = {}) {
+        assertNoSessionSendSignal(options, "SessionHandle.replayLast");
+        const last = this.#lastSend;
+        if (last === undefined) {
+            throw new RunStateError("SessionHandle.replayLast: no message has been sent on this session yet");
+        }
+        return sendSessionInternal(this, last.input, {
+            ...options,
+            idempotencyKey: options.idempotencyKey ?? last.idempotencyKey
+        });
+    }
     async *#send(input, options) {
-        const accepted = await operations.sendSessionMessage(this.#http, this.id, { input }, { idempotencyKey: options.idempotencyKey ?? generateIdempotencyKey() });
+        const idempotencyKey = options.idempotencyKey ?? generateIdempotencyKey();
+        this.#lastSend = { input, idempotencyKey };
+        const accepted = await operations.sendSessionMessage(this.#http, this.id, { input }, { idempotencyKey });
         this.#session = accepted.session;
         const turn = accepted.turn;
         const events = [];
@@ -303,10 +326,15 @@ export class SessionClient {
         const { message, deleteAfter, messageIdempotencyKey, stream, ...createOptions } = options;
         assertNoLegacySessionFields(options, "Aex.sessions.run");
         const input = normaliseSessionInput(message, "Aex.sessions.run", "message");
-        const session = await this.create(createOptions);
+        // Derive the message key from the create key (like the CLI) so a retried run
+        // with the same `idempotencyKey` de-duplicates BOTH the create and the
+        // billable turn — never a duplicate billable run.
+        const createKey = createOptions.idempotencyKey ?? generateIdempotencyKey();
+        const messageKey = messageIdempotencyKey ?? deriveMessageKey(createKey);
+        const session = await this.create({ ...createOptions, idempotencyKey: createKey });
         const result = await session.send(input, {
             ...(stream ?? {}),
-            idempotencyKey: messageIdempotencyKey ?? generateIdempotencyKey()
+            idempotencyKey: messageKey
         }).done();
         if (deleteAfter) {
             await session.delete();
@@ -583,10 +611,16 @@ export class AgentExecutor {
         if (!options.apiToken) {
             throw new Error("AgentExecutor: apiToken is required");
         }
+        // Wrap the transport fetch (the caller's override, or global `fetch`) with
+        // the bounded-retry layer so every BFF request gets default resilience.
+        // The raw `#fetch` below stays unwrapped for the direct-to-storage asset PUT
+        // and presigned output GETs, which target object storage, not the API plane.
+        const baseFetch = options.fetch ?? ((input, init) => fetch(input, init));
+        const retryingFetch = withRetry(baseFetch, options.retry);
         this.#http = new HttpClient({
             ...(options.baseUrl ? { baseUrl: options.baseUrl } : {}),
             apiToken: options.apiToken,
-            ...(options.fetch ? { fetch: options.fetch } : {}),
+            fetch: retryingFetch,
             // Opt-in local diagnostics: emit a redacted per-request trace to
             // stderr. Uploads nothing. A caller wanting a custom sink can pass
             // a function instead of `true`.
@@ -646,10 +680,15 @@ export class AgentExecutor {
                 ...(opts.idleTimeoutMs !== undefined ? { idleTimeoutMs: opts.idleTimeoutMs } : {}),
                 ...(opts.pingIntervalMs !== undefined ? { pingIntervalMs: opts.pingIntervalMs } : {})
             };
-            const session = await this.sessions.create(createOptions);
+            // Derive the message key from the create key (like the CLI) so a retried
+            // run with the same `idempotencyKey` de-duplicates BOTH the create and the
+            // billable turn server-side — never a duplicate billable run (sdk-dx-3).
+            const createKey = createOptions.idempotencyKey ?? generateIdempotencyKey();
+            const messageKey = messageIdempotencyKey ?? deriveMessageKey(createKey);
+            const session = await this.sessions.create({ ...createOptions, idempotencyKey: createKey });
             const turnResult = await sendSessionInternal(session, input, {
                 ...streamOptions,
-                idempotencyKey: messageIdempotencyKey ?? generateIdempotencyKey()
+                idempotencyKey: messageKey
             }).done();
             if (deleteAfter) {
                 await session.delete();
@@ -678,6 +717,19 @@ export class AgentExecutor {
                 ...(!ok && errorMessage ? { error: errorMessage } : {})
             };
             if (opts.throwOnFailure && !ok) {
+                // A turn that failed because the upstream provider throttled us surfaces
+                // as a structured, non-leaky AexRateLimitError carrying the provider
+                // fault, so callers can branch on `isRateLimited(err)` and replay.
+                const throttle = throttleFromSession(turnResult.session);
+                if (throttle) {
+                    throw new AexRateLimitError({
+                        status: throttle.status ?? 429,
+                        attempts: 1,
+                        source: "provider",
+                        providerFault: throttle,
+                        ...(throttle.retryAfterMs !== undefined ? { retryAfterMs: throttle.retryAfterMs } : {})
+                    });
+                }
                 throw new RunStateError(`AgentExecutor.run: session ${runId} ended ${turnResult.status}${errorMessage ? `: ${errorMessage}` : ""}`, { runId, status: turnResult.status });
             }
             return result;
@@ -905,6 +957,40 @@ function generateIdempotencyKey() {
         return cryptoObj.randomUUID();
     return `idem-${Date.now().toString(36)}-${Math.random().toString(36).slice(2)}`;
 }
+/**
+ * Derive the message idempotency key from the session-create key. Mirrors the
+ * CLI (`<createKey>:message`) so a retried `run` / `sessions.run` that reuses
+ * one `idempotencyKey` de-duplicates BOTH the create and the billable turn.
+ */
+function deriveMessageKey(createKey) {
+    return `${createKey}:message`;
+}
+/**
+ * Extract a throttle-class {@link ProviderFault} from a failed session record.
+ * Reads a structured `providerFault` / `error` field first (the shape the
+ * runtime is expected to emit on a throttled turn), then falls back to a
+ * heuristic scan of `errorMessage`. Returns `undefined` when the failure is not
+ * a throttle.
+ */
+function throttleFromSession(session) {
+    const fault = parseProviderFault(session.providerFault) ??
+        parseProviderFault(session.error) ??
+        faultFromErrorMessage(typeof session.errorMessage === "string" ? session.errorMessage : undefined);
+    return fault && isThrottleFault(fault) ? fault : undefined;
+}
+/** Last-resort throttle detection from a free-text run error message. */
+function faultFromErrorMessage(message) {
+    if (message === undefined || message.length === 0)
+        return undefined;
+    const lower = message.toLowerCase();
+    if (/\b429\b|rate.?limit|too many requests/.test(lower)) {
+        return { kind: "rate_limit", message };
+    }
+    if (/\b529\b|overloaded/.test(lower)) {
+        return { kind: "overloaded", message };
+    }
+    return undefined;
+}
 function normaliseSessionInput(input, surface, field) {
     if (typeof input === "string") {
         if (!input) {