agent-relay-runner 0.53.0 → 0.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
- "version": "0.53.0",
3
+ "version": "0.55.0",
4
4
  "description": "Unified provider lifecycle runner for Agent Relay",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
3
  "description": "Thin Agent Relay runner bridge for Claude Code",
4
- "version": "0.53.0",
4
+ "version": "0.55.0",
5
5
  "agentRelayContracts": {
6
6
  "providerPluginProtocol": 1
7
7
  }
@@ -31,6 +31,26 @@ relay_post_status_clearing_subagents() {
31
31
  relay_post_status "$1" "${2:-}" "${3:-}" "${4:-}" "${5:-}" "${6:-}" subagent
32
32
  }
33
33
 
34
+ # Report a provider usage/rate-limit hold to the runner (#286). The runner turns
35
+ # this into a `providerState: blocked` (reason rate_limit), posts a chat notice,
36
+ # and auto-resumes the agent once the window resets. reset_time (unix seconds,
37
+ # only "if available" per CC StopFailure docs) drives the resume; the runner falls
38
+ # back to a poll window when it is absent. Fire-and-forget like the other reports.
39
+ relay_post_rate_limit() {
40
+ local error_type="${1:-}"
41
+ local reset_time="${2:-}"
42
+ local error_message="${3:-}"
43
+ local port="${AGENT_RELAY_RUNNER_PORT:-}"
44
+ [ -z "$port" ] && return 0
45
+ local body="{\"errorType\":\"$(relay_json_escape "$error_type")\""
46
+ case "$reset_time" in ''|*[!0-9]*) ;; *) body="${body},\"resetTime\":${reset_time}" ;; esac
47
+ [ -n "$error_message" ] && body="${body},\"errorMessage\":\"$(relay_json_escape "$error_message")\""
48
+ body="${body}}"
49
+ curl -fsS -X POST "http://127.0.0.1:${port}/rate-limit" \
50
+ -H 'Content-Type: application/json' \
51
+ -d "$body" >/dev/null 2>&1 || true
52
+ }
53
+
34
54
  relay_post_timeline_status() {
35
55
  relay_post_status "$1" "${2:-provider-turn}" "" "" "" "" "${3:-}" "$4"
36
56
  }
@@ -146,6 +166,13 @@ relay_json_string_field() {
146
166
  printf '%s' "$input" | sed -nE 's/.*"'"$field"'"[[:space:]]*:[[:space:]]*"([^"]*)".*/\1/p' | head -1
147
167
  }
148
168
 
169
+ relay_json_number_field() {
170
+ local field="${1:-}"
171
+ local input="${2:-}"
172
+ [ -z "$field" ] && return 0
173
+ printf '%s' "$input" | sed -nE 's/.*"'"$field"'"[[:space:]]*:[[:space:]]*([0-9]+).*/\1/p' | head -1
174
+ }
175
+
149
176
  relay_json_bool_field() {
150
177
  local field="${1:-}"
151
178
  local input="${2:-}"
@@ -4,10 +4,28 @@ source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/
4
4
  relay_install_hook_guard stop-failure
5
5
 
6
6
  payload="$(cat || true)"
7
- error="$(relay_json_string_field error "$payload")"
7
+ # CC sends the error class as `error_type` (StopFailure docs, verified 2026-06);
8
+ # fall back to legacy `error` so older payloads / tests still match.
9
+ error="$(relay_json_string_field error_type "$payload")"
10
+ [ -z "$error" ] && error="$(relay_json_string_field error "$payload")"
11
+ reset_time="$(relay_json_number_field reset_time "$payload")"
12
+ error_message="$(relay_json_string_field error_message "$payload")"
8
13
 
14
+ # #286: a usage/rate-limit stall is transient and time-bounded — hold the agent
15
+ # (providerState blocked) and auto-resume at reset, instead of silently going idle.
16
+ # The disambiguator for billing_error is reset_time: a subscription rolling-window
17
+ # limit carries one (and is auto-resumable); true credit exhaustion does not (fatal).
9
18
  case "$error" in
10
- authentication_failed|oauth_org_not_allowed|billing_error|model_not_found)
19
+ rate_limit|overloaded)
20
+ relay_post_rate_limit "$error" "$reset_time" "$error_message"
21
+ ;;
22
+ billing_error)
23
+ case "$reset_time" in
24
+ ''|*[!0-9]*) relay_post_status_clearing_subagents error ;;
25
+ *) relay_post_rate_limit "$error" "$reset_time" "$error_message" ;;
26
+ esac
27
+ ;;
28
+ authentication_failed|oauth_org_not_allowed|model_not_found)
11
29
  relay_post_status_clearing_subagents error
12
30
  ;;
13
31
  *)
@@ -12,6 +12,7 @@ import type { SessionEvent } from "../session-insights";
12
12
  import { prepareClaudeProfileHome, profileUsesHostProviderGlobals } from "../profile-home";
13
13
  import { relayMcpClaudeConfigArg } from "../relay-mcp";
14
14
  import { claudeProviderMessageText } from "./claude-delivery";
15
+ import { buildRateLimitProviderState, parseClaudeRateLimitPane } from "../rate-limit";
15
16
 
16
17
  export class ClaudeAdapter implements ProviderAdapter {
17
18
  readonly provider = "claude";
@@ -23,6 +24,9 @@ export class ClaudeAdapter implements ProviderAdapter {
23
24
  private tmuxWatcher?: Timer;
24
25
  private turnWatcher?: Timer;
25
26
  private modelUnavailableReported = false;
27
+ // #286: true while a usage-limit modal is being held, so the pane watcher dismisses
28
+ // it + emits the hold once (not every 2s tick). Cleared when the modal leaves the pane.
29
+ private rateLimitReported = false;
26
30
 
27
31
  onStatusChange(cb: (status: ProviderStatusUpdate) => void): void {
28
32
  this.statusCb = cb;
@@ -358,10 +362,36 @@ export class ClaudeAdapter implements ProviderAdapter {
358
362
  if (status) {
359
363
  this.modelUnavailableReported = true;
360
364
  this.statusCb(status);
365
+ return;
366
+ }
367
+ // #286: the subscription usage-limit modal fires no hook, so detect it from the
368
+ // pane. Dismiss it (Escape = "wait", which just stops the turn) so the agent
369
+ // returns to a clean prompt the resume `continue` can land on, then hold it.
370
+ const rateLimit = claudeRateLimitStatus(pane, sessionName);
371
+ if (rateLimit) {
372
+ if (!this.rateLimitReported) {
373
+ this.rateLimitReported = true;
374
+ this.dismissRateLimitModal(sessionName, socketName);
375
+ this.statusCb(rateLimit);
376
+ }
377
+ } else if (this.rateLimitReported) {
378
+ // Modal gone (dismissed, or a real turn resumed) — re-arm for the next limit.
379
+ this.rateLimitReported = false;
361
380
  }
362
381
  }, 2000);
363
382
  }
364
383
 
384
+ // Send Escape to dismiss the usage-limit modal — the "wait" choice, which stops the
385
+ // turn (per the CC TUI) and returns the agent to a normal idle prompt. Best-effort:
386
+ // a failed send-keys must never wedge the pane watcher.
387
+ private dismissRateLimitModal(sessionName: string, socketName?: string): void {
388
+ try {
389
+ Bun.spawnSync(tmuxCommand(socketName, "send-keys", "-t", sessionName, "Escape"), { stdin: "ignore", stdout: "ignore", stderr: "ignore" });
390
+ } catch {
391
+ // ignore — the hold still publishes; worst case the modal lingers until the next tick.
392
+ }
393
+ }
394
+
365
395
  private async shutdownTmux(sessionName: string, opts: { graceful: boolean; timeoutMs: number }, socketName?: string): Promise<void> {
366
396
  this.stopTurnWatch();
367
397
  if (this.tmuxWatcher) {
@@ -531,6 +561,24 @@ export function claudePaneIsBusy(text: string): boolean {
531
561
  return CLAUDE_BUSY_SPINNER_RE.test(text) || text.includes("esc to interrupt");
532
562
  }
533
563
 
564
+ // #286: detect the subscription usage-limit modal in the pane and turn it into the
565
+ // provider-neutral rate-limit hold (idle + blocked) that the relay's resume sweep lifts
566
+ // at reset. Mirrors claudeModelUnavailableStatus, but non-terminal (idle, not error).
567
+ export function claudeRateLimitStatus(text: string, sessionName?: string): ProviderStatusUpdate | null {
568
+ const parsed = parseClaudeRateLimitPane(text);
569
+ if (!parsed) return null;
570
+ return {
571
+ status: "idle",
572
+ clear: ["subagent"],
573
+ providerState: buildRateLimitProviderState({
574
+ errorType: "session_limit",
575
+ ...(parsed.resetAt ? { resetAt: parsed.resetAt } : {}),
576
+ message: parsed.message,
577
+ source: sessionName ? `claude-pane:${sessionName}` : "claude-pane",
578
+ }),
579
+ };
580
+ }
581
+
534
582
  export function claudeModelUnavailableStatus(text: string, sessionName?: string): ProviderStatusUpdate | null {
535
583
  const message = extractClaudeModelUnavailableMessage(text);
536
584
  if (!message) return null;
@@ -0,0 +1,176 @@
1
+ import type { RelayHttpClient } from "agent-relay-sdk";
2
+ import type { OutboxRecord } from "./outbox";
3
+
4
+ const CONTINUATION_ARCHIVE_MAX_POST_BODY_BYTES = 64 * 1024;
5
+ const CONTINUATION_ARCHIVE_CHUNK_TARGET_BYTES = 56 * 1024;
6
+ const CONTINUATION_ARCHIVE_MAX_GENERATION_BYTES = 8 * 1024 * 1024;
7
+
8
+ interface ContinuationArchiveOutboxPayload {
9
+ agentId: string;
10
+ segment: string;
11
+ generation?: number;
12
+ deliveredChunks?: number;
13
+ totalChunks?: number;
14
+ }
15
+
16
+ interface ContinuationArchiveResponse {
17
+ archive?: { generation?: unknown };
18
+ }
19
+
20
+ export function boundContinuationArchiveSegment(segment: string): { segment: string; keptBytes: number; droppedBytes: number } {
21
+ const bytes = utf8Bytes(segment);
22
+ if (bytes <= CONTINUATION_ARCHIVE_MAX_GENERATION_BYTES) {
23
+ return { segment, keptBytes: bytes, droppedBytes: 0 };
24
+ }
25
+ const bounded = takeUtf8Prefix(segment, CONTINUATION_ARCHIVE_MAX_GENERATION_BYTES);
26
+ const keptBytes = utf8Bytes(bounded);
27
+ return { segment: bounded.trimEnd(), keptBytes, droppedBytes: bytes - keptBytes };
28
+ }
29
+
30
+ export async function deliverContinuationArchiveRecord(input: {
31
+ record: OutboxRecord;
32
+ http: Pick<RelayHttpClient, "recordContinuationArchive">;
33
+ updatePayload: (seq: number, payload: unknown) => void;
34
+ sessionLog: (message: string) => void;
35
+ }): Promise<void> {
36
+ const payload = input.record.payload as ContinuationArchiveOutboxPayload;
37
+ let segment = payload.segment;
38
+ if (!payload.agentId || typeof segment !== "string") throw new Error("invalid continuation archive outbox payload");
39
+
40
+ const bounded = boundContinuationArchiveSegment(segment);
41
+ if (bounded.droppedBytes > 0) {
42
+ segment = bounded.segment;
43
+ input.sessionLog(`continuation archive truncated at ${bounded.keptBytes} bytes; dropped ${bounded.droppedBytes} bytes before delivery`);
44
+ input.updatePayload(input.record.seq, { ...payload, segment, deliveredChunks: 0, totalChunks: undefined, generation: undefined });
45
+ }
46
+
47
+ const chunks = splitContinuationArchiveSegment({
48
+ agentId: payload.agentId,
49
+ segment,
50
+ occurredAt: input.record.occurredAt,
51
+ });
52
+ const knownGeneration = Number.isSafeInteger(payload.generation) ? payload.generation : undefined;
53
+ const deliveredChunks = knownGeneration === undefined ? 0 : Math.max(0, Math.min(payload.deliveredChunks ?? 0, chunks.length));
54
+ let generation = knownGeneration;
55
+
56
+ for (let index = deliveredChunks; index < chunks.length; index += 1) {
57
+ const request = {
58
+ agentId: payload.agentId,
59
+ segment: chunks[index]!,
60
+ occurredAt: input.record.occurredAt,
61
+ ...(generation !== undefined ? { generation } : {}),
62
+ };
63
+ assertContinuationArchivePostFits(request);
64
+ const response = await input.http.recordContinuationArchive(request) as ContinuationArchiveResponse;
65
+ const returnedGeneration = archiveGeneration(response);
66
+ if (generation === undefined) {
67
+ if (returnedGeneration === undefined) throw new Error("continuation archive response missing generation");
68
+ generation = returnedGeneration;
69
+ } else if (returnedGeneration !== undefined && returnedGeneration !== generation) {
70
+ throw new Error(`continuation archive generation mismatch: expected ${generation}, got ${returnedGeneration}`);
71
+ }
72
+ input.updatePayload(input.record.seq, {
73
+ ...payload,
74
+ segment,
75
+ generation,
76
+ deliveredChunks: index + 1,
77
+ totalChunks: chunks.length,
78
+ } satisfies ContinuationArchiveOutboxPayload);
79
+ }
80
+ }
81
+
82
+ function splitContinuationArchiveSegment(input: { agentId: string; segment: string; occurredAt: number }): string[] {
83
+ const chunks: string[] = [];
84
+ let current = "";
85
+ for (const line of transcriptLines(input.segment)) {
86
+ if (!line) continue;
87
+ if (fitsContinuationArchiveChunk(input.agentId, line, input.occurredAt)) {
88
+ const next = current ? current + line : line;
89
+ if (utf8Bytes(next) <= CONTINUATION_ARCHIVE_CHUNK_TARGET_BYTES && fitsContinuationArchiveChunk(input.agentId, next, input.occurredAt)) {
90
+ current = next;
91
+ continue;
92
+ }
93
+ if (current) chunks.push(current.trimEnd());
94
+ current = line;
95
+ continue;
96
+ }
97
+ if (current) {
98
+ chunks.push(current.trimEnd());
99
+ current = "";
100
+ }
101
+ chunks.push(...splitLongContinuationArchiveLine(input.agentId, line, input.occurredAt));
102
+ }
103
+ if (current) chunks.push(current.trimEnd());
104
+ return chunks.filter((chunk) => chunk.length > 0);
105
+ }
106
+
107
+ function transcriptLines(segment: string): string[] {
108
+ const lines = segment.match(/[^\n]*(?:\n|$)/g) ?? [segment];
109
+ return lines.filter((line) => line.length > 0);
110
+ }
111
+
112
+ function splitLongContinuationArchiveLine(agentId: string, line: string, occurredAt: number): string[] {
113
+ const chars = Array.from(line);
114
+ const chunks: string[] = [];
115
+ let offset = 0;
116
+ while (offset < chars.length) {
117
+ let low = 1;
118
+ let high = chars.length - offset;
119
+ let best = 0;
120
+ while (low <= high) {
121
+ const mid = Math.floor((low + high) / 2);
122
+ const candidate = chars.slice(offset, offset + mid).join("");
123
+ if (utf8Bytes(candidate) <= CONTINUATION_ARCHIVE_CHUNK_TARGET_BYTES && fitsContinuationArchiveChunk(agentId, candidate, occurredAt)) {
124
+ best = mid;
125
+ low = mid + 1;
126
+ } else {
127
+ high = mid - 1;
128
+ }
129
+ }
130
+ if (best === 0) throw new Error("continuation archive chunk budget cannot fit one character");
131
+ chunks.push(chars.slice(offset, offset + best).join(""));
132
+ offset += best;
133
+ }
134
+ return chunks;
135
+ }
136
+
137
+ function fitsContinuationArchiveChunk(agentId: string, segment: string, occurredAt: number): boolean {
138
+ return continuationArchivePostBytes({
139
+ agentId,
140
+ segment,
141
+ occurredAt,
142
+ generation: Number.MAX_SAFE_INTEGER,
143
+ }) < CONTINUATION_ARCHIVE_MAX_POST_BODY_BYTES;
144
+ }
145
+
146
+ function assertContinuationArchivePostFits(input: { agentId: string; segment: string; occurredAt: number; generation?: number }): void {
147
+ const bytes = continuationArchivePostBytes(input);
148
+ if (bytes >= CONTINUATION_ARCHIVE_MAX_POST_BODY_BYTES) {
149
+ throw new Error(`continuation archive chunk JSON body is ${bytes} bytes; max is ${CONTINUATION_ARCHIVE_MAX_POST_BODY_BYTES - 1}`);
150
+ }
151
+ }
152
+
153
+ function continuationArchivePostBytes(input: { agentId: string; segment: string; occurredAt: number; generation?: number }): number {
154
+ return utf8Bytes(JSON.stringify(input));
155
+ }
156
+
157
+ function archiveGeneration(response: ContinuationArchiveResponse): number | undefined {
158
+ const generation = response.archive?.generation;
159
+ return typeof generation === "number" && Number.isSafeInteger(generation) && generation >= 0 ? generation : undefined;
160
+ }
161
+
162
+ function takeUtf8Prefix(value: string, maxBytes: number): string {
163
+ let used = 0;
164
+ let out = "";
165
+ for (const char of value) {
166
+ const bytes = utf8Bytes(char);
167
+ if (used + bytes > maxBytes) break;
168
+ out += char;
169
+ used += bytes;
170
+ }
171
+ return out;
172
+ }
173
+
174
+ function utf8Bytes(value: string): number {
175
+ return new TextEncoder().encode(value).byteLength;
176
+ }
@@ -3,6 +3,7 @@ import type { Message, ReplyObligation } from "agent-relay-sdk";
3
3
  import { errMessage, isRecord } from "agent-relay-sdk";
4
4
  import type { ProviderPermissionDecisionInput, ProviderStatusEvent, SemanticStatus, TerminalAttachSpec } from "./adapter";
5
5
  import { logger, parseLogLevel, LOG_LEVELS } from "./logger";
6
+ import { buildRateLimitProviderState } from "./rate-limit";
6
7
 
7
8
  // A hook that failed in a way it could not handle itself reports here so the
8
9
  // failure is never silent (#198 item 5). Phase 1 logs it FATAL to the per-agent
@@ -106,6 +107,9 @@ export function startControlServer(options: ControlServerOptions): ControlServer
106
107
  if (url.pathname === "/hook-fatal" && req.method === "POST") {
107
108
  return handleHookFatal(req, options);
108
109
  }
110
+ if (url.pathname === "/rate-limit" && req.method === "POST") {
111
+ return handleRateLimit(req, options);
112
+ }
109
113
  if (url.pathname === "/monitor") {
110
114
  const upgraded = srv.upgrade(req, { data: { kind: "monitor" } });
111
115
  return upgraded ? undefined : new Response("WebSocket upgrade failed", { status: 400 });
@@ -441,6 +445,28 @@ async function handleStatus(req: Request, options: ControlServerOptions): Promis
441
445
  return Response.json({ ok: true, ...update });
442
446
  }
443
447
 
448
+ // #286: the stop-failure hook reports a usage/rate-limit stall here. Build the
449
+ // provider-neutral `blocked` state (reason rate_limit) — the same seam Claude
450
+ // model-unavailable and Codex approvals ride — so the dashboard shows it
451
+ // distinctly (never as idle/available) and the relay's resume sweep can lift it
452
+ // at reset. The agent stays `idle` underneath (the turn truly ended); the runner
453
+ // carries the hold via its rateLimitHold field, not an active-work claim.
454
+ async function handleRateLimit(req: Request, options: ControlServerOptions): Promise<Response> {
455
+ const body = await req.json().catch(() => null);
456
+ if (!isRecord(body)) return Response.json({ error: "invalid body" }, { status: 400 });
457
+ const errorType = typeof body.errorType === "string" && body.errorType ? body.errorType : "rate_limit";
458
+ // CC's reset_time is unix SECONDS ("if available"); normalize to ms for the relay.
459
+ const resetTimeSec = typeof body.resetTime === "number" && Number.isFinite(body.resetTime) ? body.resetTime : undefined;
460
+ const resetAt = resetTimeSec !== undefined ? Math.round(resetTimeSec * 1000) : undefined;
461
+ const errorMessage = typeof body.errorMessage === "string" && body.errorMessage ? body.errorMessage : undefined;
462
+ options.onStatus({
463
+ status: "idle",
464
+ clear: ["subagent"],
465
+ providerState: buildRateLimitProviderState({ errorType, resetAt, message: errorMessage, source: "claude" }),
466
+ });
467
+ return Response.json({ ok: true, errorType, ...(resetAt ? { resetAt } : {}) });
468
+ }
469
+
444
470
  function statusTimelineEvent(body: Partial<ProviderStatusEvent> | null): ProviderStatusEvent["timeline"] | undefined {
445
471
  const timeline = body?.timeline;
446
472
  if (!timeline || typeof timeline !== "object" || Array.isArray(timeline)) return undefined;
package/src/outbox.ts CHANGED
@@ -298,6 +298,11 @@ export class Outbox {
298
298
  return (this.db.query("SELECT count(*) AS n FROM outbox WHERE poisoned = 0").get() as { n: number }).n;
299
299
  }
300
300
 
301
+ updatePayload(seq: number, payload: unknown): void {
302
+ const payloadJson = JSON.stringify(payload ?? null);
303
+ this.db.query("UPDATE outbox SET payload = ? WHERE seq = ? AND poisoned = 0").run(payloadJson, seq);
304
+ }
305
+
301
306
  poisonedCount(): number {
302
307
  return (this.db.query("SELECT count(*) AS n FROM outbox WHERE poisoned = 1").get() as { n: number }).n;
303
308
  }
@@ -0,0 +1,156 @@
1
+ // #286 — shared rate-limit hold construction + Claude pane detection.
2
+ //
3
+ // Two detection arms feed the SAME provider-neutral `blocked` hold:
4
+ // 1. The StopFailure hook (clean API-error turn-end: API-429/auth/billing) → control-server.
5
+ // 2. Pane-scrape of the subscription "session/weekly limit" modal (no hook exists for it —
6
+ // anthropics/claude-code#34817 was closed not-planned) → the Claude adapter.
7
+ // Both build the hold here so the shape stays identical, and the relay's resume sweep
8
+ // (services/rate-limit-resume.ts) lifts either once the window resets.
9
+
10
+ export const RATE_LIMIT_BLOCK_REASON = "rate_limit";
11
+
12
+ // Reject a parsed reset that's in the past or implausibly far out — a bad parse would
13
+ // otherwise resume too early (thrash) or hold for days. Beyond this we drop resetAt and
14
+ // fall back to the resume sweep's poll window.
15
+ const MAX_RESET_AHEAD_MS = 7 * 24 * 60 * 60 * 1000;
16
+
17
+ export interface RateLimitHoldInput {
18
+ errorType?: string;
19
+ /** Unix ms the limit window resets, when known. */
20
+ resetAt?: number;
21
+ message?: string;
22
+ source?: string;
23
+ }
24
+
25
+ /** The blocked providerState a rate-limit hold carries. Shared by both detection arms. */
26
+ export function buildRateLimitProviderState(input: RateLimitHoldInput): Record<string, unknown> {
27
+ const now = Date.now();
28
+ const resetAt = typeof input.resetAt === "number" && Number.isFinite(input.resetAt) ? input.resetAt : undefined;
29
+ const label = resetAt ? `usage limit · resets ${formatResetClock(resetAt)}` : "usage limit reached";
30
+ return {
31
+ state: "blocked",
32
+ reason: RATE_LIMIT_BLOCK_REASON,
33
+ label,
34
+ recommendedAction: "Holding until the usage window resets; agent-relay auto-resumes the agent.",
35
+ source: input.source ?? "claude",
36
+ errorType: input.errorType ?? RATE_LIMIT_BLOCK_REASON,
37
+ enteredAt: now,
38
+ ...(resetAt ? { resetAt } : {}),
39
+ ...(input.message ? { message: input.message } : {}),
40
+ updatedAt: now,
41
+ };
42
+ }
43
+
44
+ /** Host-local HH:MM for the chat notice / badge label (matches how the CLI shows "resets 3:45pm"). */
45
+ function formatResetClock(resetAtMs: number): string {
46
+ try {
47
+ return new Date(resetAtMs).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit" });
48
+ } catch {
49
+ return new Date(resetAtMs).toISOString().slice(11, 16);
50
+ }
51
+ }
52
+
53
+ // The subscription limit line wordings seen in the wild (claude-auto-retry patterns +
54
+ // claude-auto-retry#15: "session"/"weekly limit" slipped past older regexes). We REQUIRE a
55
+ // limit phrase AND a reset/retry phrase on the same line so normal output mentioning
56
+ // "rate limit" can't false-positive the hold.
57
+ const LIMIT_PHRASE_RE = /(?:hit (?:your|the)\s*(?:[\w-]+\s+)*limit|usage limit|\d+-hour limit|limit reached|out of (?:extra )?usage|rate limit(?:\s+(?:hit|reached|exceeded))?|weekly limit|session limit)/i;
58
+ const RESET_AT_RE = /\bresets?\b(?:\s+at)?\s+(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\s*(?:\(([^)]+)\))?/i;
59
+ const TRY_AGAIN_RE = /\btry again in\s+(\d+)\s*(second|minute|hour|day)s?/i;
60
+
61
+ export interface PaneRateLimit {
62
+ /** The matched limit line, for the chat notice / observability. */
63
+ message: string;
64
+ /** Unix ms reset, when parseable from the pane (else undefined → resume falls back to poll). */
65
+ resetAt?: number;
66
+ }
67
+
68
+ /**
69
+ * Detect a Claude usage/rate-limit modal in a captured tmux pane and parse its reset time.
70
+ * Requires BOTH a limit phrase and a reset/retry phrase (anywhere in the pane, so it's robust
71
+ * to a modal that wraps the two onto separate lines) — that pairing is what distinguishes the
72
+ * real modal from normal output mentioning "rate limit". Returns null otherwise. `nowMs` is
73
+ * injectable for tests.
74
+ */
75
+ export function parseClaudeRateLimitPane(text: string, nowMs: number = Date.now()): PaneRateLimit | null {
76
+ if (!text) return null;
77
+ const limit = LIMIT_PHRASE_RE.exec(text);
78
+ if (!limit) return null;
79
+ const message = limitLineAt(text, limit.index);
80
+
81
+ const relative = TRY_AGAIN_RE.exec(text);
82
+ if (relative) {
83
+ const unitMs = { second: 1000, minute: 60_000, hour: 3_600_000, day: 86_400_000 }[relative[2]!.toLowerCase()] ?? 0;
84
+ const resetAt = clampReset(nowMs + Number(relative[1]) * unitMs, nowMs);
85
+ return { message, ...(resetAt ? { resetAt } : {}) };
86
+ }
87
+ const reset = RESET_AT_RE.exec(text);
88
+ if (reset) {
89
+ const resetAt = clampReset(parseClockReset(reset, nowMs), nowMs);
90
+ return { message, ...(resetAt ? { resetAt } : {}) };
91
+ }
92
+ // Limit phrase but no reset/retry anywhere — not confident it's the modal; skip to
93
+ // avoid false-positiving on normal output (docs, the agent's own text) that says "limit".
94
+ return null;
95
+ }
96
+
97
+ // The clean limit line containing the match offset, stripped of box-drawing chrome.
98
+ function limitLineAt(text: string, index: number): string {
99
+ const start = text.lastIndexOf("\n", index) + 1;
100
+ const endNl = text.indexOf("\n", index);
101
+ const end = endNl === -1 ? text.length : endNl;
102
+ return text.slice(start, end).replace(/[│┌┐└┘─┤├╭╮╯╰]/g, " ").replace(/\s+/g, " ").trim();
103
+ }
104
+
105
+ function clampReset(resetAt: number | undefined, nowMs: number): number | undefined {
106
+ if (resetAt === undefined || !Number.isFinite(resetAt)) return undefined;
107
+ if (resetAt <= nowMs || resetAt - nowMs > MAX_RESET_AHEAD_MS) return undefined;
108
+ return resetAt;
109
+ }
110
+
111
+ // Next occurrence of a wall-clock "H[:MM] am/pm" in the message's IANA timezone (if given
112
+ // and valid) or host-local. Timezone-aware so "resets 4:50pm (Asia/Shanghai)" is correct
113
+ // regardless of where the runner host sits.
114
+ function parseClockReset(match: RegExpExecArray, nowMs: number): number | undefined {
115
+ let hour = Number(match[1]);
116
+ const minute = match[2] ? Number(match[2]) : 0;
117
+ const meridiem = match[3]?.toLowerCase();
118
+ const tz = match[4]?.trim();
119
+ if (!Number.isFinite(hour) || hour > 23 || minute > 59) return undefined;
120
+ if (meridiem === "pm" && hour < 12) hour += 12;
121
+ if (meridiem === "am" && hour === 12) hour = 0;
122
+
123
+ if (tz && isValidTimeZone(tz)) {
124
+ const { y, mo, d, offsetMs } = tzWallAndOffset(tz, nowMs);
125
+ let target = Date.UTC(y, mo - 1, d, hour, minute, 0) - offsetMs;
126
+ if (target <= nowMs) target += 86_400_000;
127
+ return target;
128
+ }
129
+ const d = new Date(nowMs);
130
+ d.setHours(hour, minute, 0, 0);
131
+ let target = d.getTime();
132
+ if (target <= nowMs) target += 86_400_000;
133
+ return target;
134
+ }
135
+
136
+ function isValidTimeZone(tz: string): boolean {
137
+ try {
138
+ new Intl.DateTimeFormat("en-US", { timeZone: tz });
139
+ return true;
140
+ } catch {
141
+ return false;
142
+ }
143
+ }
144
+
145
+ // The target TZ's wall-clock date and UTC offset at `atMs`, via Intl (no deps).
146
+ function tzWallAndOffset(tz: string, atMs: number): { y: number; mo: number; d: number; offsetMs: number } {
147
+ const dtf = new Intl.DateTimeFormat("en-US", {
148
+ timeZone: tz, hour12: false,
149
+ year: "numeric", month: "2-digit", day: "2-digit", hour: "2-digit", minute: "2-digit", second: "2-digit",
150
+ });
151
+ const parts = Object.fromEntries(dtf.formatToParts(new Date(atMs)).map((p) => [p.type, p.value])) as Record<string, string>;
152
+ const y = Number(parts.year), mo = Number(parts.month), d = Number(parts.day);
153
+ const h = Number(parts.hour) % 24, mi = Number(parts.minute), s = Number(parts.second);
154
+ const asUtc = Date.UTC(y, mo - 1, d, h, mi, s);
155
+ return { y, mo, d, offsetMs: asUtc - atMs };
156
+ }
package/src/runner.ts CHANGED
@@ -21,6 +21,7 @@ import { RelayMcpProxy } from "./relay-mcp-proxy";
21
21
  import { runtimeMetadata } from "./version";
22
22
  import { logger, parseLogLevel } from "./logger";
23
23
  import { ensureSessionScratch, reapSessionScratch, sweepStaleSessions, type SessionScratchLayout } from "./session-scratch";
24
+ import { boundContinuationArchiveSegment, deliverContinuationArchiveRecord } from "./continuation-archive";
24
25
 
25
26
  // A destructive session transition. The runner runs end-of-session work (Insights
26
27
  // capture, #183/#184) before the invasive operation and, during that window, presents a
@@ -277,6 +278,12 @@ export class AgentRunner {
277
278
  // busy reconciler doesn't mistake a permission prompt for a stuck-busy turn.
278
279
  private providerBlocked = false;
279
280
  private terminalFailure?: { reason: string; message: string; providerState?: Record<string, unknown> };
281
+ // #286: a usage/rate-limit hold. The turn truly ended (idle), but the agent is
282
+ // held until the limit resets, so this can't ride an active-work claim like the
283
+ // permission-blocked state does. publishStatus surfaces it as the agent's
284
+ // providerState; it clears when a new turn starts (the relay's resume message
285
+ // wakes one) so the blocked badge lifts on its own.
286
+ private rateLimitHold?: Record<string, unknown>;
280
287
  // Reasoning tailer (item 5): streams the in-flight turn's reasoning/tool steps
281
288
  // from the Claude transcript into chat as discreet session events.
282
289
  private reasoningTail?: { timer: ReturnType<typeof setInterval>; seen: Set<string> };
@@ -1102,11 +1109,22 @@ export class AgentRunner {
1102
1109
  };
1103
1110
  }
1104
1111
  if (typeof update !== "string" && update.providerState) {
1105
- const state = (update.providerState as { state?: unknown }).state;
1106
- this.providerBlocked = state === "blocked";
1112
+ const ps = update.providerState as { state?: unknown; reason?: unknown };
1113
+ this.providerBlocked = ps.state === "blocked";
1114
+ // A rate-limit hold persists across the idle the turn ends on; any other
1115
+ // providerState supersedes it (clears a stale hold).
1116
+ if (ps.state === "blocked" && ps.reason === "rate_limit") {
1117
+ const fresh = !this.rateLimitHold;
1118
+ this.rateLimitHold = update.providerState;
1119
+ if (fresh) this.publishRateLimitNotice(update.providerState);
1120
+ } else {
1121
+ this.rateLimitHold = undefined;
1122
+ }
1107
1123
  } else if (status === "idle") {
1108
1124
  this.providerBlocked = false;
1109
1125
  }
1126
+ // Forward progress (a real turn) lifts the hold so the blocked badge clears.
1127
+ if (status === "busy") this.rateLimitHold = undefined;
1110
1128
  if (typeof update !== "string" && status === "error") {
1111
1129
  const terminalReason = typeof update.metadata?.terminalFailureReason === "string"
1112
1130
  ? update.metadata.terminalFailureReason
@@ -1289,9 +1307,7 @@ export class AgentRunner {
1289
1307
  });
1290
1308
  }
1291
1309
 
1292
- // The outbox transport: map a queued record to its HTTP call. Throw to retry, return to
1293
- // ack (delete). occurredAt + idempotencyKey are injected from the record so retries are
1294
- // exactly-once server-side and carry true event time.
1310
+ // Map queued records to HTTP calls. Throw to retry, return to ack/delete.
1295
1311
  private async deliverOutboxEvent(record: OutboxRecord): Promise<void> {
1296
1312
  try {
1297
1313
  if (record.kind === "session-message") {
@@ -1310,10 +1326,7 @@ export class AgentRunner {
1310
1326
  return;
1311
1327
  }
1312
1328
  if (record.kind === "continuation-archive") {
1313
- await this.http.recordContinuationArchive({
1314
- ...(record.payload as Parameters<RelayHttpClient["recordContinuationArchive"]>[0]),
1315
- occurredAt: record.occurredAt,
1316
- });
1329
+ await deliverContinuationArchiveRecord({ record, http: this.http, updatePayload: (seq, payload) => this.outbox.updatePayload(seq, payload), sessionLog: (message) => this.sessionLog(message) });
1317
1330
  return;
1318
1331
  }
1319
1332
  if (record.kind === "mcp-tool-call") {
@@ -1510,14 +1523,16 @@ export class AgentRunner {
1510
1523
  const segment = archive.slice(this.archiveObservedChars).trim();
1511
1524
  this.archiveObservedChars = archive.length;
1512
1525
  if (!segment) return;
1526
+ const bounded = boundContinuationArchiveSegment(segment);
1527
+ if (bounded.droppedBytes > 0) this.sessionLog(`continuation archive truncated at ${bounded.keptBytes} bytes; dropped ${bounded.droppedBytes} bytes (${reason})`);
1513
1528
  this.outbox.enqueue({
1514
1529
  kind: "continuation-archive",
1515
1530
  payload: {
1516
1531
  agentId: this.agentId,
1517
- segment,
1532
+ segment: bounded.segment,
1518
1533
  },
1519
1534
  });
1520
- this.sessionLog(`continuation archive queued (${segment.length} chars, ${reason})`);
1535
+ this.sessionLog(`continuation archive queued (${bounded.segment.length} chars, ${reason})`);
1521
1536
  }
1522
1537
 
1523
1538
  private async captureContextRatio(reason: SessionDestroyReason, opts?: { transcriptPath?: string }): Promise<void> {
@@ -1782,6 +1797,21 @@ export class AgentRunner {
1782
1797
  });
1783
1798
  }
1784
1799
 
1800
+ // #286: a discreet, durable chat marker when a usage/rate-limit hold begins, via
1801
+ // the same session-mirror lane as the compaction notice. Outbound session event
1802
+ // (NOT an inbound message) so it shows in the dashboard chat WITHOUT waking a turn
1803
+ // — waking a still-limited agent would just re-fail. The relay sends the waking
1804
+ // resume message later, once the window has reset.
1805
+ private publishRateLimitNotice(providerState: Record<string, unknown>): void {
1806
+ const label = typeof providerState.label === "string" && providerState.label ? providerState.label : "usage limit reached";
1807
+ this.publishSessionEvent({
1808
+ from: this.agentId,
1809
+ to: "user",
1810
+ body: `⏳ ${label} — holding; agent-relay will auto-resume at reset.`,
1811
+ session: { type: "notice", origin: "provider", label: "rate-limit", ...(this.currentTurnId ? { turnId: this.currentTurnId } : {}) },
1812
+ });
1813
+ }
1814
+
1785
1815
  private publishStatus(): void {
1786
1816
  this.claims.expire();
1787
1817
  const status = this.claims.currentStatus();
@@ -1789,7 +1819,7 @@ export class AgentRunner {
1789
1819
  const activeWork = this.claims.activeWork();
1790
1820
  const activeSubagents = activeWork.filter((item) => item.kind === "subagent");
1791
1821
  const terminalFailure = this.terminalFailure;
1792
- const providerState = terminalFailure?.providerState ?? providerStateFromActiveWork(activeWork);
1822
+ const providerState = terminalFailure?.providerState ?? this.rateLimitHold ?? providerStateFromActiveWork(activeWork);
1793
1823
  this.bus.setSemanticStatus(status === "offline" || status === "error" ? "idle" : status);
1794
1824
  const timelineEvent = this.pendingTimelineEvent;
1795
1825
  this.pendingTimelineEvent = undefined;