agent-relay-runner 0.31.1 → 0.32.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
- "version": "0.31.1",
3
+ "version": "0.32.2",
4
4
  "description": "Unified provider lifecycle runner for Agent Relay",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
3
  "description": "Thin Agent Relay runner bridge for Claude Code",
4
- "version": "0.31.1",
4
+ "version": "0.32.2",
5
5
  "agentRelayContracts": {
6
6
  "providerPluginProtocol": 1
7
7
  }
@@ -44,9 +44,16 @@ relay_post_session_turn() {
44
44
  local last_assistant_message="${2:-}"
45
45
  local port="${AGENT_RELAY_RUNNER_PORT:-}"
46
46
  [ -z "$port" ] && return 0
47
- [ -z "$transcript_path" ] && return 0
48
- local body="{\"transcriptPath\":\"$(relay_json_escape "$transcript_path")\""
49
- [ -n "$last_assistant_message" ] && body="${body},\"lastAssistantMessage\":${last_assistant_message}"
47
+ # transcript_path is the primary capture source, but the Stop payload can omit it
48
+ # (known CC edge case). last_assistant_message is the fallback for exactly that —
49
+ # POST with whichever we have; only bail when both are missing.
50
+ [ -z "$transcript_path" ] && [ -z "$last_assistant_message" ] && return 0
51
+ local body="{"
52
+ [ -n "$transcript_path" ] && body="${body}\"transcriptPath\":\"$(relay_json_escape "$transcript_path")\""
53
+ if [ -n "$last_assistant_message" ]; then
54
+ [ "$body" != "{" ] && body="${body},"
55
+ body="${body}\"lastAssistantMessage\":${last_assistant_message}"
56
+ fi
50
57
  body="${body}}"
51
58
  curl -fsS -X POST "http://127.0.0.1:${port}/session-turn" \
52
59
  -H 'Content-Type: application/json' \
@@ -352,8 +352,13 @@ async function handleSessionTurn(req: Request, options: ControlServerOptions): P
352
352
  if (!options.onSessionTurn) return Response.json({ ok: false, reason: "session capture unavailable" });
353
353
  const body = await req.json().catch(() => null);
354
354
  const transcriptPath = isRecord(body) && typeof body.transcriptPath === "string" ? body.transcriptPath : "";
355
- if (!transcriptPath) return Response.json({ ok: false, reason: "transcriptPath required" }, { status: 400 });
356
355
  const lastAssistantMessage = isRecord(body) ? body.lastAssistantMessage : undefined;
356
+ // transcriptPath is the primary source, but it can be absent (known CC Stop edge case).
357
+ // lastAssistantMessage is the fallback for exactly that — accept either; reject only when
358
+ // both are missing so the turn still reaches the dashboard chat (#264).
359
+ if (!transcriptPath && lastAssistantMessage === undefined) {
360
+ return Response.json({ ok: false, reason: "transcriptPath or lastAssistantMessage required" }, { status: 400 });
361
+ }
357
362
  // Awaited on purpose: the Stop hook posts this synchronously before its
358
363
  // reply-obligation check, so the captured turn must be persisted (and the
359
364
  // obligation cleared) before this response returns.
package/src/outbox.ts CHANGED
@@ -68,6 +68,10 @@ const DEFAULTS = {
68
68
  pollMs: 5_000,
69
69
  };
70
70
 
71
+ // Pause between forced delivery attempts during flush() — keeps a brief server outage
72
+ // from hot-looping while still retrying fast enough to land within the flush deadline.
73
+ const FLUSH_RETRY_PAUSE_MS = 150;
74
+
71
75
  interface Row {
72
76
  seq: number;
73
77
  kind: string;
@@ -93,6 +97,8 @@ export class Outbox {
93
97
 
94
98
  private draining = false;
95
99
  private rerun = false;
100
+ // Set by flush() to make the next drain pass ignore per-row backoff (deliver NOW).
101
+ private forceDue = false;
96
102
  private pollTimer?: ReturnType<typeof setInterval>;
97
103
  private dueTimer?: ReturnType<typeof setTimeout>;
98
104
  private stopped = false;
@@ -185,24 +191,45 @@ export class Outbox {
185
191
  this.pollTimer.unref?.();
186
192
  }
187
193
 
194
+ // Drain everything we can before the process exits, ignoring per-row backoff. Used on
195
+ // shutdown/kill/crash (#183): the capture seam durably enqueues the end-of-session
196
+ // Insights datapoint, but the per-agent outbox is never reopened once the agent is gone,
197
+ // so a row that hasn't been POSTed yet is stranded forever. flush() blocks (bounded) on
198
+ // delivery — the opposite intent of the polite, backoff-respecting steady-state drain.
199
+ // Returns true if the queue fully drained within the deadline.
200
+ async flush(timeoutMs: number): Promise<boolean> {
201
+ const deadline = Date.now() + Math.max(0, timeoutMs);
202
+ for (;;) {
203
+ if (this.stopped) return this.pendingCount() === 0;
204
+ if (this.pendingCount() === 0) return true;
205
+ await this.drain(true);
206
+ if (this.pendingCount() === 0) return true;
207
+ if (Date.now() >= deadline) return false;
208
+ await new Promise<void>((resolve) => setTimeout(resolve, FLUSH_RETRY_PAUSE_MS));
209
+ }
210
+ }
211
+
188
212
  // Process the queue strictly oldest-first. Coalesces concurrent calls; if a drain is
189
213
  // requested while one is running, it re-runs once at the end (so an enqueue during a
190
- // send isn't missed).
191
- async drain(): Promise<void> {
214
+ // send isn't missed). `force` (from flush) makes this pass ignore per-row backoff.
215
+ async drain(force = false): Promise<void> {
192
216
  if (this.stopped) return;
217
+ if (force) this.forceDue = true;
193
218
  if (this.draining) { this.rerun = true; return; }
194
219
  this.draining = true;
195
220
  try {
196
221
  do {
197
222
  this.rerun = false;
198
- await this.drainOnce();
199
- } while (this.rerun && !this.stopped);
223
+ const due = this.forceDue;
224
+ this.forceDue = false;
225
+ await this.drainOnce(due);
226
+ } while ((this.rerun || this.forceDue) && !this.stopped);
200
227
  } finally {
201
228
  this.draining = false;
202
229
  }
203
230
  }
204
231
 
205
- private async drainOnce(): Promise<void> {
232
+ private async drainOnce(force = false): Promise<void> {
206
233
  for (;;) {
207
234
  if (this.stopped) return;
208
235
  const row = this.db
@@ -211,7 +238,7 @@ export class Outbox {
211
238
  if (!row) return;
212
239
 
213
240
  const now = Date.now();
214
- if (row.next_attempt_at > now) {
241
+ if (!force && row.next_attempt_at > now) {
215
242
  // Head isn't due yet. Don't reorder past it (FIFO) — schedule a wake-up and stop.
216
243
  this.scheduleDue(row.next_attempt_at - now);
217
244
  return;
package/src/runner.ts CHANGED
@@ -26,7 +26,15 @@ import { ensureSessionScratch, reapSessionScratch, sweepStaleSessions, type Sess
26
26
  // capture, #183/#184) before the invasive operation and, during that window, presents a
27
27
  // distinct non-addressable lifecycle state. Bus commands and provider hooks (Claude
28
28
  // PreCompact / SessionEnd) both normalize to one of these.
29
- type SessionDestroyReason = "compact" | "clear" | "restart" | "shutdown" | "kill";
29
+ type SessionDestroyReason = "compact" | "clear" | "restart" | "shutdown" | "kill" | "crash";
30
+
31
+ // Reasons after which the runner process won't survive to drain the durable outbox (and the
32
+ // per-agent outbox is never reopened once the agent is gone). For these, pre-destroy must
33
+ // block on delivery of the just-captured Insights datapoint, not just enqueue it (#183).
34
+ // `restart` (bus command) deliberately excluded: the runner stays alive and drains normally.
35
+ function reasonExitsRunner(reason: SessionDestroyReason): boolean {
36
+ return reason === "shutdown" || reason === "kill" || reason === "crash";
37
+ }
30
38
 
31
39
  // `finalizing-<reason>` is the transient pre-destroy window; the others are the executing
32
40
  // teardown states the dashboard already renders.
@@ -38,6 +46,11 @@ type LifecycleAction =
38
46
  // transcript read or a wedged provider from stalling a shutdown the operator asked for.
39
47
  const PRE_DESTROY_TIMEOUT_MS = 4_000;
40
48
 
49
+ // Bounded window to deliver the durable outbox before an exit-bound teardown (#183). Kept
50
+ // short so a wedged/down server can't stall an operator-requested shutdown for long; a
51
+ // row that still can't land is logged, not silently dropped.
52
+ const OUTBOX_FLUSH_TIMEOUT_MS = 3_000;
53
+
41
54
  // Map a lifecycle bus command to its destructive boundary reason, or undefined for
42
55
  // non-destructive commands (interrupt, inject, reconnect, permission decisions).
43
56
  function boundaryReasonForCommand(type: string): SessionDestroyReason | undefined {
@@ -815,6 +828,16 @@ export class AgentRunner {
815
828
  if (this.restartPending) return;
816
829
  this.restartPending = true;
817
830
  try {
831
+ // Best-effort Insights capture for the segment that just ended in a crash (#183). This
832
+ // path has no controlled teardown, so without it crashed sessions silently drop their
833
+ // context-ratio datapoint. The process handle is still set (cleared later), so the
834
+ // Claude transcript is readable; the runner stays alive here (restart or offline), so the
835
+ // durable outbox drains normally — no flush needed.
836
+ await Promise.race([
837
+ this.captureContextRatio("crash"),
838
+ new Promise<void>((resolve) => setTimeout(resolve, PRE_DESTROY_TIMEOUT_MS)),
839
+ ]).catch((error) => this.sessionLog(`insights: crash capture failed: ${errMessage(error)}`));
840
+
818
841
  const now = Date.now();
819
842
  const runtimeMs = this.processStartedAt ? now - this.processStartedAt : Number.POSITIVE_INFINITY;
820
843
  const recent = this.unexpectedExitTimes.filter((time) => now - time <= UNEXPECTED_EXIT_WINDOW_MS);
@@ -1308,6 +1331,17 @@ export class AgentRunner {
1308
1331
  } catch (error) {
1309
1332
  this.sessionLog(`insights: pre-destroy capture failed: ${errMessage(error)}`);
1310
1333
  }
1334
+ // For exit-bound transitions the runner won't be alive afterward to drain the durable
1335
+ // outbox, so block (bounded) on delivering what capture just enqueued. This runs before
1336
+ // handleCommand's finally deletes the agent, so the runtime token is still valid here.
1337
+ if (reasonExitsRunner(reason)) {
1338
+ try {
1339
+ const delivered = await this.outbox.flush(OUTBOX_FLUSH_TIMEOUT_MS);
1340
+ if (!delivered) this.sessionLog(`insights: outbox flush incomplete before ${reason} (${this.outbox.pendingCount()} pending)`);
1341
+ } catch (error) {
1342
+ this.sessionLog(`insights: outbox flush failed: ${errMessage(error)}`);
1343
+ }
1344
+ }
1311
1345
  })();
1312
1346
  this.preDestroyPromise = run;
1313
1347
  void run.finally(() => { this.preDestroyPromise = undefined; });