agent-relay-runner 0.35.2 → 0.35.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
- "version": "0.35.2",
3
+ "version": "0.35.4",
4
4
  "description": "Unified provider lifecycle runner for Agent Relay",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
3
  "description": "Thin Agent Relay runner bridge for Claude Code",
4
- "version": "0.35.2",
4
+ "version": "0.35.4",
5
5
  "agentRelayContracts": {
6
6
  "providerPluginProtocol": 1
7
7
  }
@@ -12,9 +12,14 @@ trap '[ "${_relay_clear_idle_on_exit:-0}" = "1" ] && relay_post_status_clearing_
12
12
 
13
13
  payload="$(cat || true)"
14
14
  stop_hook_active="$(relay_json_bool_field stop_hook_active "$payload")"
15
+
16
+ # Session-mirror capture is orthogonal to reply-obligation gating (#332): a recursive Stop
17
+ # (stop_hook_active=true) must NOT suppress capturing this turn's response, or the live mirror
18
+ # silently drops frames. Always post the turn; only the reply-obligation decision is gated below.
19
+ last_assistant_msg="$(echo "$payload" | jq -c '.last_assistant_message // empty' 2>/dev/null || true)"
20
+ relay_post_session_turn "$(relay_json_string_field transcript_path "$payload")" "$last_assistant_msg"
21
+
15
22
  if [ "$stop_hook_active" != "true" ]; then
16
- last_assistant_msg="$(echo "$payload" | jq -c '.last_assistant_message // empty' 2>/dev/null || true)"
17
- relay_post_session_turn "$(relay_json_string_field transcript_path "$payload")" "$last_assistant_msg"
18
23
  # `|| true`: under `set -e`, a non-zero from the obligation check must never abort
19
24
  # the hook before the idle-clear — clearing the turn is the critical path (#199).
20
25
  stop_decision="$(relay_pending_reply_stop_decision || true)"
package/src/adapter.ts CHANGED
@@ -154,7 +154,9 @@ export interface ProviderAdapter {
154
154
  probeActivity?(process: ManagedProcess): Promise<"busy" | "idle" | "unknown">;
155
155
  terminalAttachSpec?(process: ManagedProcess): Promise<TerminalAttachSpec>;
156
156
  respondToPermissionDecision?(process: ManagedProcess, input: ProviderPermissionDecisionInput): Promise<Record<string, unknown> | void>;
157
- deliverInitialPrompt?(process: ManagedProcess, prompt: string): Promise<void>;
157
+ // `options.readyTimeoutMs` lets the runner widen the provider-ready wait for the
158
+ // first (cold-start) delivery vs. a fast re-attempt after a ready signal (#329).
159
+ deliverInitialPrompt?(process: ManagedProcess, prompt: string, options?: { readyTimeoutMs?: number }): Promise<void>;
158
160
  deliver(process: ManagedProcess, messages: Message[]): Promise<void>;
159
161
  onStatusChange(cb: (status: ProviderStatusUpdate) => void): void;
160
162
  // Subscribe to session-mirror events from providers that emit them directly
@@ -191,11 +191,11 @@ export class ClaudeAdapter implements ProviderAdapter {
191
191
  }
192
192
  }
193
193
 
194
- async deliverInitialPrompt(process: ManagedProcess, prompt: string): Promise<void> {
194
+ async deliverInitialPrompt(process: ManagedProcess, prompt: string, options?: { readyTimeoutMs?: number }): Promise<void> {
195
195
  const session = process.meta?.tmuxSession as string | undefined;
196
196
  const socket = process.meta?.tmuxSocket as string | undefined;
197
197
  if (!session || !tmuxHasSession(session, socket)) throw new Error("no active tmux session for initial prompt");
198
- await waitForClaudeInputReady(session, CLAUDE_TMUX_READY_TIMEOUT_MS, socket);
198
+ await waitForClaudeInputReady(session, options?.readyTimeoutMs ?? CLAUDE_TMUX_READY_TIMEOUT_MS, socket);
199
199
  await submitTextToTmux(session, prompt, socket);
200
200
  }
201
201
 
@@ -286,8 +286,8 @@ export class CodexAdapter implements ProviderAdapter {
286
286
  };
287
287
  }
288
288
 
289
- async deliverInitialPrompt(process: ManagedProcess, prompt: string): Promise<void> {
290
- const text = prompt.trim();
289
+ async deliverInitialPrompt(process: ManagedProcess, prompt: string, _options?: { readyTimeoutMs?: number }): Promise<void> {
290
+ const text = prompt.trim(); // _options.readyTimeoutMs (#329) is a Claude-TUI concern; ignored here
291
291
  if (!text) return;
292
292
  const threadId = await ensureCodexThread(process);
293
293
  let input = [
package/src/runner.ts CHANGED
@@ -148,6 +148,13 @@ const INTERRUPT_RECONCILE_DELAY_MS = 1_500;
148
148
  // match) — it is emitted only by the harness, never typed by a human, so any
149
149
  // echo starting with it is a system injection, not a user prompt (#289).
150
150
  const RELAY_INJECTION_MARKERS = ["[relay message #", "[agent-relay", "<task-notification>"];
151
+ // #329: a cold isolated profile (first-run trust/onboarding/install gates) can take far longer
152
+ // than the warm-restart window to render an input-ready TUI, so the first prompt delivery gets a
153
+ // generous timeout. If it still misses, the prompt is re-attempted on the next provider ready
154
+ // signal (short timeout, since it just went idle), up to a bounded number of attempts.
155
+ const INITIAL_PROMPT_FIRST_READY_TIMEOUT_MS = 30_000;
156
+ const INITIAL_PROMPT_RETRY_READY_TIMEOUT_MS = 10_000;
157
+ const MAX_INITIAL_PROMPT_ATTEMPTS = 6;
151
158
  // Reasoning tailer poll cadence (item 5). Coarse on purpose — reasoning is a
152
159
  // discreet progress signal, not a token stream, so ~1.2s keeps it light.
153
160
  const REASONING_POLL_MS = 1_200;
@@ -175,6 +182,10 @@ export class AgentRunner {
175
182
  // insights, hook-fatal) go through this durable, disk-backed, timestamped queue instead of
176
183
  // direct fire-and-forget HTTP — so nothing is lost across a server/Runner restart.
177
184
  private readonly outbox: Outbox;
185
+ // #332: a fast-lane outbox for display-only session-mirror trace events. Routing them through
186
+ // their own FIFO queue stops one transient trace failure from head-of-line blocking real-message
187
+ // delivery (and vice versa); its backoff is capped low since a stale live-mirror frame is cheap.
188
+ private readonly sessionOutbox: Outbox;
178
189
  private currentToken?: string;
179
190
  private currentTokenJti?: string;
180
191
  private currentTokenProfileId?: string;
@@ -231,6 +242,11 @@ export class AgentRunner {
231
242
  private readonly activeTaskClaims = new Map<number, ActiveTaskClaim>();
232
243
  private pendingTimelineEvent?: RunnerTimelineEvent;
233
244
  private pendingPromptMessageId?: number;
245
+ // #329: a spawn-time initial prompt whose first delivery timed out before the TUI was
246
+ // ready. Held (not dropped) for a ready-signal-driven re-attempt; cleared on success.
247
+ private pendingInitialPrompt?: string;
248
+ private deliveringInitialPrompt = false;
249
+ private initialPromptAttempts = 0;
234
250
  // Session-mirror: a synthesized id grouping a turn's reasoning/tool steps and
235
251
  // its final response. Set when a provider-turn starts, cleared when it ends.
236
252
  private currentTurnId?: string;
@@ -281,6 +297,14 @@ export class AgentRunner {
281
297
  const outboxDir = process.env.AGENT_RELAY_RUNNER_OUTBOX_DIR
282
298
  ?? (process.env.AGENT_RELAY_RUNNER_INFO_FILE ? join(dirname(process.env.AGENT_RELAY_RUNNER_INFO_FILE), "outbox") : undefined);
283
299
  this.outbox = new Outbox({ agentId: this.agentId, dir: outboxDir, send: (record) => this.deliverOutboxEvent(record) });
300
+ this.sessionOutbox = new Outbox({
301
+ agentId: `${this.agentId}-session`,
302
+ dir: outboxDir,
303
+ send: (record) => this.deliverOutboxEvent(record),
304
+ // Ephemeral trace frames: cap the per-row stall low and poison sooner than the main queue.
305
+ maxBackoffMs: 5_000,
306
+ maxAttempts: 6,
307
+ });
284
308
  this.bus = new RelayBusClient({
285
309
  url: relayBusUrl(options.relayUrl),
286
310
  role: "provider",
@@ -383,6 +407,7 @@ export class AgentRunner {
383
407
  await this.bus.connect();
384
408
  this.obligationCache.start();
385
409
  this.outbox.start();
410
+ this.sessionOutbox.start();
386
411
  this.ensureScratch();
387
412
  void this.sweepStaleScratch();
388
413
  this.process = await this.spawnProvider();
@@ -424,6 +449,7 @@ export class AgentRunner {
424
449
  this.stopReasoningTail();
425
450
  this.obligationCache.stop();
426
451
  this.outbox.close();
452
+ this.sessionOutbox.close();
427
453
  this.proxy?.stop();
428
454
  this.control?.stop();
429
455
  await this.bus.close();
@@ -574,11 +600,27 @@ export class AgentRunner {
574
600
 
575
601
  private async deliverInitialPrompt(): Promise<void> {
576
602
  const prompt = this.options.prompt?.trim();
577
- if (!prompt || !this.process || !this.options.adapter.deliverInitialPrompt) return;
603
+ if (prompt) await this.attemptInitialPromptDelivery(prompt, INITIAL_PROMPT_FIRST_READY_TIMEOUT_MS);
604
+ }
605
+
606
+ // Deliver the spawn-time first prompt, surviving a cold-start TUI that isn't input-ready yet
607
+ // (#329). A ready-timeout no longer swallow-drops the prompt (the old bug → a taskless worker
608
+ // that looked healthy): it's stashed for a ready-signal-driven retry (setProviderStatus) and
609
+ // surfaced on the runner timeline, so the spawner sees a stuck delivery, not just a log line.
610
+ private async attemptInitialPromptDelivery(prompt: string, readyTimeoutMs: number): Promise<void> {
611
+ if (this.deliveringInitialPrompt || this.stopped || !this.process || !this.options.adapter.deliverInitialPrompt) return;
612
+ this.deliveringInitialPrompt = true;
613
+ const attempt = (this.initialPromptAttempts += 1);
578
614
  try {
579
- await this.options.adapter.deliverInitialPrompt(this.process, prompt);
615
+ await this.options.adapter.deliverInitialPrompt(this.process, prompt, { readyTimeoutMs });
616
+ this.pendingInitialPrompt = undefined;
580
617
  } catch (error) {
581
- logger.error("runner", `initial prompt delivery failed: ${error}`);
618
+ const giveUp = attempt >= MAX_INITIAL_PROMPT_ATTEMPTS;
619
+ this.pendingInitialPrompt = giveUp ? undefined : prompt;
620
+ logger.error("runner", `initial prompt attempt ${attempt} failed${giveUp ? " (giving up)" : "; will retry on next ready signal"}: ${errMessage(error)}`);
621
+ this.publishRunnerTimelineEvent({ status: "prompt_failed", timestamp: Date.now(), title: "Initial prompt not delivered", body: errMessage(error), metadata: { attempts: attempt, terminal: giveUp } });
622
+ } finally {
623
+ this.deliveringInitialPrompt = false;
582
624
  }
583
625
  }
584
626
 
@@ -1078,6 +1120,11 @@ export class AgentRunner {
1078
1120
  if (typeof update !== "string") {
1079
1121
  for (const kind of update.clear ?? []) this.claims.clearWorkKind(kind);
1080
1122
  }
1123
+ // #329: the provider just went idle (ready). If a first initial-prompt delivery timed out on
1124
+ // a cold-start TUI, re-attempt now — the ready wait should resolve almost immediately.
1125
+ if (status === "idle" && this.pendingInitialPrompt && !this.deliveringInitialPrompt) {
1126
+ void this.attemptInitialPromptDelivery(this.pendingInitialPrompt, INITIAL_PROMPT_RETRY_READY_TIMEOUT_MS);
1127
+ }
1081
1128
  this.publishStatus();
1082
1129
  }
1083
1130
 
@@ -1161,8 +1208,9 @@ export class AgentRunner {
1161
1208
  }): void {
1162
1209
  // Durable, ordered, timestamped (#196): the actual POST happens in deliverOutboxEvent,
1163
1210
  // retried until it lands. occurredAt is stamped now so a queued event reports when it
1164
- // truly happened, not when the server finally accepted it.
1165
- this.outbox.enqueue({
1211
+ // truly happened, not when the server finally accepted it. Routed through the fast-lane
1212
+ // sessionOutbox (#332) so a transient trace failure can't head-of-line block real messages.
1213
+ this.sessionOutbox.enqueue({
1166
1214
  kind: "session-message",
1167
1215
  payload: {
1168
1216
  from: input.from,