agent-relay-runner 0.54.0 → 0.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
- "version": "0.54.0",
3
+ "version": "0.55.0",
4
4
  "description": "Unified provider lifecycle runner for Agent Relay",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
3
  "description": "Thin Agent Relay runner bridge for Claude Code",
4
- "version": "0.54.0",
4
+ "version": "0.55.0",
5
5
  "agentRelayContracts": {
6
6
  "providerPluginProtocol": 1
7
7
  }
@@ -31,6 +31,26 @@ relay_post_status_clearing_subagents() {
31
31
  relay_post_status "$1" "${2:-}" "${3:-}" "${4:-}" "${5:-}" "${6:-}" subagent
32
32
  }
33
33
 
34
+ # Report a provider usage/rate-limit hold to the runner (#286). The runner turns
35
+ # this into a `providerState: blocked` (reason rate_limit), posts a chat notice,
36
+ # and auto-resumes the agent once the window resets. reset_time (unix seconds,
37
+ # only "if available" per CC StopFailure docs) drives the resume; the runner falls
38
+ # back to a poll window when it is absent. Fire-and-forget like the other reports.
39
+ relay_post_rate_limit() {
40
+ local error_type="${1:-}"
41
+ local reset_time="${2:-}"
42
+ local error_message="${3:-}"
43
+ local port="${AGENT_RELAY_RUNNER_PORT:-}"
44
+ [ -z "$port" ] && return 0
45
+ local body="{\"errorType\":\"$(relay_json_escape "$error_type")\""
46
+ case "$reset_time" in ''|*[!0-9]*) ;; *) body="${body},\"resetTime\":${reset_time}" ;; esac
47
+ [ -n "$error_message" ] && body="${body},\"errorMessage\":\"$(relay_json_escape "$error_message")\""
48
+ body="${body}}"
49
+ curl -fsS -X POST "http://127.0.0.1:${port}/rate-limit" \
50
+ -H 'Content-Type: application/json' \
51
+ -d "$body" >/dev/null 2>&1 || true
52
+ }
53
+
34
54
  relay_post_timeline_status() {
35
55
  relay_post_status "$1" "${2:-provider-turn}" "" "" "" "" "${3:-}" "$4"
36
56
  }
@@ -146,6 +166,13 @@ relay_json_string_field() {
146
166
  printf '%s' "$input" | sed -nE 's/.*"'"$field"'"[[:space:]]*:[[:space:]]*"([^"]*)".*/\1/p' | head -1
147
167
  }
148
168
 
169
+ relay_json_number_field() {
170
+ local field="${1:-}"
171
+ local input="${2:-}"
172
+ [ -z "$field" ] && return 0
173
+ printf '%s' "$input" | sed -nE 's/.*"'"$field"'"[[:space:]]*:[[:space:]]*([0-9]+).*/\1/p' | head -1
174
+ }
175
+
149
176
  relay_json_bool_field() {
150
177
  local field="${1:-}"
151
178
  local input="${2:-}"
@@ -4,10 +4,28 @@ source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/
4
4
  relay_install_hook_guard stop-failure
5
5
 
6
6
  payload="$(cat || true)"
7
- error="$(relay_json_string_field error "$payload")"
7
+ # CC sends the error class as `error_type` (StopFailure docs, verified 2026-06);
8
+ # fall back to legacy `error` so older payloads / tests still match.
9
+ error="$(relay_json_string_field error_type "$payload")"
10
+ [ -z "$error" ] && error="$(relay_json_string_field error "$payload")"
11
+ reset_time="$(relay_json_number_field reset_time "$payload")"
12
+ error_message="$(relay_json_string_field error_message "$payload")"
8
13
 
14
+ # #286: a usage/rate-limit stall is transient and time-bounded — hold the agent
15
+ # (providerState blocked) and auto-resume at reset, instead of silently going idle.
16
+ # The disambiguator for billing_error is reset_time: a subscription rolling-window
17
+ # limit carries one (and is auto-resumable); true credit exhaustion does not (fatal).
9
18
  case "$error" in
10
- authentication_failed|oauth_org_not_allowed|billing_error|model_not_found)
19
+ rate_limit|overloaded)
20
+ relay_post_rate_limit "$error" "$reset_time" "$error_message"
21
+ ;;
22
+ billing_error)
23
+ case "$reset_time" in
24
+ ''|*[!0-9]*) relay_post_status_clearing_subagents error ;;
25
+ *) relay_post_rate_limit "$error" "$reset_time" "$error_message" ;;
26
+ esac
27
+ ;;
28
+ authentication_failed|oauth_org_not_allowed|model_not_found)
11
29
  relay_post_status_clearing_subagents error
12
30
  ;;
13
31
  *)
@@ -12,6 +12,7 @@ import type { SessionEvent } from "../session-insights";
12
12
  import { prepareClaudeProfileHome, profileUsesHostProviderGlobals } from "../profile-home";
13
13
  import { relayMcpClaudeConfigArg } from "../relay-mcp";
14
14
  import { claudeProviderMessageText } from "./claude-delivery";
15
+ import { buildRateLimitProviderState, parseClaudeRateLimitPane } from "../rate-limit";
15
16
 
16
17
  export class ClaudeAdapter implements ProviderAdapter {
17
18
  readonly provider = "claude";
@@ -23,6 +24,9 @@ export class ClaudeAdapter implements ProviderAdapter {
23
24
  private tmuxWatcher?: Timer;
24
25
  private turnWatcher?: Timer;
25
26
  private modelUnavailableReported = false;
27
+ // #286: true while a usage-limit modal is being held, so the pane watcher dismisses
28
+ // it + emits the hold once (not every 2s tick). Cleared when the modal leaves the pane.
29
+ private rateLimitReported = false;
26
30
 
27
31
  onStatusChange(cb: (status: ProviderStatusUpdate) => void): void {
28
32
  this.statusCb = cb;
@@ -358,10 +362,36 @@ export class ClaudeAdapter implements ProviderAdapter {
358
362
  if (status) {
359
363
  this.modelUnavailableReported = true;
360
364
  this.statusCb(status);
365
+ return;
366
+ }
367
+ // #286: the subscription usage-limit modal fires no hook, so detect it from the
368
+ // pane. Dismiss it (Escape = "wait", which just stops the turn) so the agent
369
+ // returns to a clean prompt the resume `continue` can land on, then hold it.
370
+ const rateLimit = claudeRateLimitStatus(pane, sessionName);
371
+ if (rateLimit) {
372
+ if (!this.rateLimitReported) {
373
+ this.rateLimitReported = true;
374
+ this.dismissRateLimitModal(sessionName, socketName);
375
+ this.statusCb(rateLimit);
376
+ }
377
+ } else if (this.rateLimitReported) {
378
+ // Modal gone (dismissed, or a real turn resumed) — re-arm for the next limit.
379
+ this.rateLimitReported = false;
361
380
  }
362
381
  }, 2000);
363
382
  }
364
383
 
384
+ // Send Escape to dismiss the usage-limit modal — the "wait" choice, which stops the
385
+ // turn (per the CC TUI) and returns the agent to a normal idle prompt. Best-effort:
386
+ // a failed send-keys must never wedge the pane watcher.
387
+ private dismissRateLimitModal(sessionName: string, socketName?: string): void {
388
+ try {
389
+ Bun.spawnSync(tmuxCommand(socketName, "send-keys", "-t", sessionName, "Escape"), { stdin: "ignore", stdout: "ignore", stderr: "ignore" });
390
+ } catch {
391
+ // ignore — the hold still publishes; worst case the modal lingers until the next tick.
392
+ }
393
+ }
394
+
365
395
  private async shutdownTmux(sessionName: string, opts: { graceful: boolean; timeoutMs: number }, socketName?: string): Promise<void> {
366
396
  this.stopTurnWatch();
367
397
  if (this.tmuxWatcher) {
@@ -531,6 +561,24 @@ export function claudePaneIsBusy(text: string): boolean {
531
561
  return CLAUDE_BUSY_SPINNER_RE.test(text) || text.includes("esc to interrupt");
532
562
  }
533
563
 
564
+ // #286: detect the subscription usage-limit modal in the pane and turn it into the
565
+ // provider-neutral rate-limit hold (idle + blocked) that the relay's resume sweep lifts
566
+ // at reset. Mirrors claudeModelUnavailableStatus, but non-terminal (idle, not error).
567
+ export function claudeRateLimitStatus(text: string, sessionName?: string): ProviderStatusUpdate | null {
568
+ const parsed = parseClaudeRateLimitPane(text);
569
+ if (!parsed) return null;
570
+ return {
571
+ status: "idle",
572
+ clear: ["subagent"],
573
+ providerState: buildRateLimitProviderState({
574
+ errorType: "session_limit",
575
+ ...(parsed.resetAt ? { resetAt: parsed.resetAt } : {}),
576
+ message: parsed.message,
577
+ source: sessionName ? `claude-pane:${sessionName}` : "claude-pane",
578
+ }),
579
+ };
580
+ }
581
+
534
582
  export function claudeModelUnavailableStatus(text: string, sessionName?: string): ProviderStatusUpdate | null {
535
583
  const message = extractClaudeModelUnavailableMessage(text);
536
584
  if (!message) return null;
@@ -3,6 +3,7 @@ import type { Message, ReplyObligation } from "agent-relay-sdk";
3
3
  import { errMessage, isRecord } from "agent-relay-sdk";
4
4
  import type { ProviderPermissionDecisionInput, ProviderStatusEvent, SemanticStatus, TerminalAttachSpec } from "./adapter";
5
5
  import { logger, parseLogLevel, LOG_LEVELS } from "./logger";
6
+ import { buildRateLimitProviderState } from "./rate-limit";
6
7
 
7
8
  // A hook that failed in a way it could not handle itself reports here so the
8
9
  // failure is never silent (#198 item 5). Phase 1 logs it FATAL to the per-agent
@@ -106,6 +107,9 @@ export function startControlServer(options: ControlServerOptions): ControlServer
106
107
  if (url.pathname === "/hook-fatal" && req.method === "POST") {
107
108
  return handleHookFatal(req, options);
108
109
  }
110
+ if (url.pathname === "/rate-limit" && req.method === "POST") {
111
+ return handleRateLimit(req, options);
112
+ }
109
113
  if (url.pathname === "/monitor") {
110
114
  const upgraded = srv.upgrade(req, { data: { kind: "monitor" } });
111
115
  return upgraded ? undefined : new Response("WebSocket upgrade failed", { status: 400 });
@@ -441,6 +445,28 @@ async function handleStatus(req: Request, options: ControlServerOptions): Promis
441
445
  return Response.json({ ok: true, ...update });
442
446
  }
443
447
 
448
+ // #286: the stop-failure hook reports a usage/rate-limit stall here. Build the
449
+ // provider-neutral `blocked` state (reason rate_limit) — the same seam Claude
450
+ // model-unavailable and Codex approvals ride — so the dashboard shows it
451
+ // distinctly (never as idle/available) and the relay's resume sweep can lift it
452
+ // at reset. The agent stays `idle` underneath (the turn truly ended); the runner
453
+ // carries the hold via its rateLimitHold field, not an active-work claim.
454
+ async function handleRateLimit(req: Request, options: ControlServerOptions): Promise<Response> {
455
+ const body = await req.json().catch(() => null);
456
+ if (!isRecord(body)) return Response.json({ error: "invalid body" }, { status: 400 });
457
+ const errorType = typeof body.errorType === "string" && body.errorType ? body.errorType : "rate_limit";
458
+ // CC's reset_time is unix SECONDS ("if available"); normalize to ms for the relay.
459
+ const resetTimeSec = typeof body.resetTime === "number" && Number.isFinite(body.resetTime) ? body.resetTime : undefined;
460
+ const resetAt = resetTimeSec !== undefined ? Math.round(resetTimeSec * 1000) : undefined;
461
+ const errorMessage = typeof body.errorMessage === "string" && body.errorMessage ? body.errorMessage : undefined;
462
+ options.onStatus({
463
+ status: "idle",
464
+ clear: ["subagent"],
465
+ providerState: buildRateLimitProviderState({ errorType, resetAt, message: errorMessage, source: "claude" }),
466
+ });
467
+ return Response.json({ ok: true, errorType, ...(resetAt ? { resetAt } : {}) });
468
+ }
469
+
444
470
  function statusTimelineEvent(body: Partial<ProviderStatusEvent> | null): ProviderStatusEvent["timeline"] | undefined {
445
471
  const timeline = body?.timeline;
446
472
  if (!timeline || typeof timeline !== "object" || Array.isArray(timeline)) return undefined;
@@ -0,0 +1,156 @@
1
+ // #286 — shared rate-limit hold construction + Claude pane detection.
2
+ //
3
+ // Two detection arms feed the SAME provider-neutral `blocked` hold:
4
+ // 1. The StopFailure hook (clean API-error turn-end: API-429/auth/billing) → control-server.
5
+ // 2. Pane-scrape of the subscription "session/weekly limit" modal (no hook exists for it —
6
+ // anthropics/claude-code#34817 was closed not-planned) → the Claude adapter.
7
+ // Both build the hold here so the shape stays identical, and the relay's resume sweep
8
+ // (services/rate-limit-resume.ts) lifts either once the window resets.
9
+
10
+ export const RATE_LIMIT_BLOCK_REASON = "rate_limit";
11
+
12
+ // Reject a parsed reset that's in the past or implausibly far out — a bad parse would
13
+ // otherwise resume too early (thrash) or hold for days. Beyond this we drop resetAt and
14
+ // fall back to the resume sweep's poll window.
15
+ const MAX_RESET_AHEAD_MS = 7 * 24 * 60 * 60 * 1000;
16
+
17
+ export interface RateLimitHoldInput {
18
+ errorType?: string;
19
+ /** Unix ms the limit window resets, when known. */
20
+ resetAt?: number;
21
+ message?: string;
22
+ source?: string;
23
+ }
24
+
25
+ /** The blocked providerState a rate-limit hold carries. Shared by both detection arms. */
26
+ export function buildRateLimitProviderState(input: RateLimitHoldInput): Record<string, unknown> {
27
+ const now = Date.now();
28
+ const resetAt = typeof input.resetAt === "number" && Number.isFinite(input.resetAt) ? input.resetAt : undefined;
29
+ const label = resetAt ? `usage limit · resets ${formatResetClock(resetAt)}` : "usage limit reached";
30
+ return {
31
+ state: "blocked",
32
+ reason: RATE_LIMIT_BLOCK_REASON,
33
+ label,
34
+ recommendedAction: "Holding until the usage window resets; agent-relay auto-resumes the agent.",
35
+ source: input.source ?? "claude",
36
+ errorType: input.errorType ?? RATE_LIMIT_BLOCK_REASON,
37
+ enteredAt: now,
38
+ ...(resetAt ? { resetAt } : {}),
39
+ ...(input.message ? { message: input.message } : {}),
40
+ updatedAt: now,
41
+ };
42
+ }
43
+
44
+ /** Host-local HH:MM for the chat notice / badge label (matches how the CLI shows "resets 3:45pm"). */
45
+ function formatResetClock(resetAtMs: number): string {
46
+ try {
47
+ return new Date(resetAtMs).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit" });
48
+ } catch {
49
+ return new Date(resetAtMs).toISOString().slice(11, 16);
50
+ }
51
+ }
52
+
53
+ // The subscription limit line wordings seen in the wild (claude-auto-retry patterns +
54
+ // claude-auto-retry#15: "session"/"weekly limit" slipped past older regexes). We REQUIRE a
55
+ // limit phrase AND a reset/retry phrase on the same line so normal output mentioning
56
+ // "rate limit" can't false-positive the hold.
57
+ const LIMIT_PHRASE_RE = /(?:hit (?:your|the)\s*(?:[\w-]+\s+)*limit|usage limit|\d+-hour limit|limit reached|out of (?:extra )?usage|rate limit(?:\s+(?:hit|reached|exceeded))?|weekly limit|session limit)/i;
58
+ const RESET_AT_RE = /\bresets?\b(?:\s+at)?\s+(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\s*(?:\(([^)]+)\))?/i;
59
+ const TRY_AGAIN_RE = /\btry again in\s+(\d+)\s*(second|minute|hour|day)s?/i;
60
+
61
+ export interface PaneRateLimit {
62
+ /** The matched limit line, for the chat notice / observability. */
63
+ message: string;
64
+ /** Unix ms reset, when parseable from the pane (else undefined → resume falls back to poll). */
65
+ resetAt?: number;
66
+ }
67
+
68
+ /**
69
+ * Detect a Claude usage/rate-limit modal in a captured tmux pane and parse its reset time.
70
+ * Requires BOTH a limit phrase and a reset/retry phrase (anywhere in the pane, so it's robust
71
+ * to a modal that wraps the two onto separate lines) — that pairing is what distinguishes the
72
+ * real modal from normal output mentioning "rate limit". Returns null otherwise. `nowMs` is
73
+ * injectable for tests.
74
+ */
75
+ export function parseClaudeRateLimitPane(text: string, nowMs: number = Date.now()): PaneRateLimit | null {
76
+ if (!text) return null;
77
+ const limit = LIMIT_PHRASE_RE.exec(text);
78
+ if (!limit) return null;
79
+ const message = limitLineAt(text, limit.index);
80
+
81
+ const relative = TRY_AGAIN_RE.exec(text);
82
+ if (relative) {
83
+ const unitMs = { second: 1000, minute: 60_000, hour: 3_600_000, day: 86_400_000 }[relative[2]!.toLowerCase()] ?? 0;
84
+ const resetAt = clampReset(nowMs + Number(relative[1]) * unitMs, nowMs);
85
+ return { message, ...(resetAt ? { resetAt } : {}) };
86
+ }
87
+ const reset = RESET_AT_RE.exec(text);
88
+ if (reset) {
89
+ const resetAt = clampReset(parseClockReset(reset, nowMs), nowMs);
90
+ return { message, ...(resetAt ? { resetAt } : {}) };
91
+ }
92
+ // Limit phrase but no reset/retry anywhere — not confident it's the modal; skip to
93
+ // avoid false-positiving on normal output (docs, the agent's own text) that says "limit".
94
+ return null;
95
+ }
96
+
97
+ // The clean limit line containing the match offset, stripped of box-drawing chrome.
98
+ function limitLineAt(text: string, index: number): string {
99
+ const start = text.lastIndexOf("\n", index) + 1;
100
+ const endNl = text.indexOf("\n", index);
101
+ const end = endNl === -1 ? text.length : endNl;
102
+ return text.slice(start, end).replace(/[│┌┐└┘─┤├╭╮╯╰]/g, " ").replace(/\s+/g, " ").trim();
103
+ }
104
+
105
+ function clampReset(resetAt: number | undefined, nowMs: number): number | undefined {
106
+ if (resetAt === undefined || !Number.isFinite(resetAt)) return undefined;
107
+ if (resetAt <= nowMs || resetAt - nowMs > MAX_RESET_AHEAD_MS) return undefined;
108
+ return resetAt;
109
+ }
110
+
111
+ // Next occurrence of a wall-clock "H[:MM] am/pm" in the message's IANA timezone (if given
112
+ // and valid) or host-local. Timezone-aware so "resets 4:50pm (Asia/Shanghai)" is correct
113
+ // regardless of where the runner host sits.
114
+ function parseClockReset(match: RegExpExecArray, nowMs: number): number | undefined {
115
+ let hour = Number(match[1]);
116
+ const minute = match[2] ? Number(match[2]) : 0;
117
+ const meridiem = match[3]?.toLowerCase();
118
+ const tz = match[4]?.trim();
119
+ if (!Number.isFinite(hour) || hour > 23 || minute > 59) return undefined;
120
+ if (meridiem === "pm" && hour < 12) hour += 12;
121
+ if (meridiem === "am" && hour === 12) hour = 0;
122
+
123
+ if (tz && isValidTimeZone(tz)) {
124
+ const { y, mo, d, offsetMs } = tzWallAndOffset(tz, nowMs);
125
+ let target = Date.UTC(y, mo - 1, d, hour, minute, 0) - offsetMs;
126
+ if (target <= nowMs) target += 86_400_000;
127
+ return target;
128
+ }
129
+ const d = new Date(nowMs);
130
+ d.setHours(hour, minute, 0, 0);
131
+ let target = d.getTime();
132
+ if (target <= nowMs) target += 86_400_000;
133
+ return target;
134
+ }
135
+
136
+ function isValidTimeZone(tz: string): boolean {
137
+ try {
138
+ new Intl.DateTimeFormat("en-US", { timeZone: tz });
139
+ return true;
140
+ } catch {
141
+ return false;
142
+ }
143
+ }
144
+
145
+ // The target TZ's wall-clock date and UTC offset at `atMs`, via Intl (no deps).
146
+ function tzWallAndOffset(tz: string, atMs: number): { y: number; mo: number; d: number; offsetMs: number } {
147
+ const dtf = new Intl.DateTimeFormat("en-US", {
148
+ timeZone: tz, hour12: false,
149
+ year: "numeric", month: "2-digit", day: "2-digit", hour: "2-digit", minute: "2-digit", second: "2-digit",
150
+ });
151
+ const parts = Object.fromEntries(dtf.formatToParts(new Date(atMs)).map((p) => [p.type, p.value])) as Record<string, string>;
152
+ const y = Number(parts.year), mo = Number(parts.month), d = Number(parts.day);
153
+ const h = Number(parts.hour) % 24, mi = Number(parts.minute), s = Number(parts.second);
154
+ const asUtc = Date.UTC(y, mo - 1, d, h, mi, s);
155
+ return { y, mo, d, offsetMs: asUtc - atMs };
156
+ }
package/src/runner.ts CHANGED
@@ -278,6 +278,12 @@ export class AgentRunner {
278
278
  // busy reconciler doesn't mistake a permission prompt for a stuck-busy turn.
279
279
  private providerBlocked = false;
280
280
  private terminalFailure?: { reason: string; message: string; providerState?: Record<string, unknown> };
281
+ // #286: a usage/rate-limit hold. The turn truly ended (idle), but the agent is
282
+ // held until the limit resets, so this can't ride an active-work claim like the
283
+ // permission-blocked state does. publishStatus surfaces it as the agent's
284
+ // providerState; it clears when a new turn starts (the relay's resume message
285
+ // wakes one) so the blocked badge lifts on its own.
286
+ private rateLimitHold?: Record<string, unknown>;
281
287
  // Reasoning tailer (item 5): streams the in-flight turn's reasoning/tool steps
282
288
  // from the Claude transcript into chat as discreet session events.
283
289
  private reasoningTail?: { timer: ReturnType<typeof setInterval>; seen: Set<string> };
@@ -1103,11 +1109,22 @@ export class AgentRunner {
1103
1109
  };
1104
1110
  }
1105
1111
  if (typeof update !== "string" && update.providerState) {
1106
- const state = (update.providerState as { state?: unknown }).state;
1107
- this.providerBlocked = state === "blocked";
1112
+ const ps = update.providerState as { state?: unknown; reason?: unknown };
1113
+ this.providerBlocked = ps.state === "blocked";
1114
+ // A rate-limit hold persists across the idle the turn ends on; any other
1115
+ // providerState supersedes it (clears a stale hold).
1116
+ if (ps.state === "blocked" && ps.reason === "rate_limit") {
1117
+ const fresh = !this.rateLimitHold;
1118
+ this.rateLimitHold = update.providerState;
1119
+ if (fresh) this.publishRateLimitNotice(update.providerState);
1120
+ } else {
1121
+ this.rateLimitHold = undefined;
1122
+ }
1108
1123
  } else if (status === "idle") {
1109
1124
  this.providerBlocked = false;
1110
1125
  }
1126
+ // Forward progress (a real turn) lifts the hold so the blocked badge clears.
1127
+ if (status === "busy") this.rateLimitHold = undefined;
1111
1128
  if (typeof update !== "string" && status === "error") {
1112
1129
  const terminalReason = typeof update.metadata?.terminalFailureReason === "string"
1113
1130
  ? update.metadata.terminalFailureReason
@@ -1780,6 +1797,21 @@ export class AgentRunner {
1780
1797
  });
1781
1798
  }
1782
1799
 
1800
+ // #286: a discreet, durable chat marker when a usage/rate-limit hold begins, via
1801
+ // the same session-mirror lane as the compaction notice. Outbound session event
1802
+ // (NOT an inbound message) so it shows in the dashboard chat WITHOUT waking a turn
1803
+ // — waking a still-limited agent would just re-fail. The relay sends the waking
1804
+ // resume message later, once the window has reset.
1805
+ private publishRateLimitNotice(providerState: Record<string, unknown>): void {
1806
+ const label = typeof providerState.label === "string" && providerState.label ? providerState.label : "usage limit reached";
1807
+ this.publishSessionEvent({
1808
+ from: this.agentId,
1809
+ to: "user",
1810
+ body: `⏳ ${label} — holding; agent-relay will auto-resume at reset.`,
1811
+ session: { type: "notice", origin: "provider", label: "rate-limit", ...(this.currentTurnId ? { turnId: this.currentTurnId } : {}) },
1812
+ });
1813
+ }
1814
+
1783
1815
  private publishStatus(): void {
1784
1816
  this.claims.expire();
1785
1817
  const status = this.claims.currentStatus();
@@ -1787,7 +1819,7 @@ export class AgentRunner {
1787
1819
  const activeWork = this.claims.activeWork();
1788
1820
  const activeSubagents = activeWork.filter((item) => item.kind === "subagent");
1789
1821
  const terminalFailure = this.terminalFailure;
1790
- const providerState = terminalFailure?.providerState ?? providerStateFromActiveWork(activeWork);
1822
+ const providerState = terminalFailure?.providerState ?? this.rateLimitHold ?? providerStateFromActiveWork(activeWork);
1791
1823
  this.bus.setSemanticStatus(status === "offline" || status === "error" ? "idle" : status);
1792
1824
  const timelineEvent = this.pendingTimelineEvent;
1793
1825
  this.pendingTimelineEvent = undefined;