agent-relay-runner 0.32.1 → 0.32.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/plugins/claude/.claude-plugin/plugin.json +1 -1
- package/src/outbox.ts +33 -6
- package/src/runner.ts +35 -1
package/package.json
CHANGED
package/src/outbox.ts
CHANGED
|
@@ -68,6 +68,10 @@ const DEFAULTS = {
|
|
|
68
68
|
pollMs: 5_000,
|
|
69
69
|
};
|
|
70
70
|
|
|
71
|
+
// Pause between forced delivery attempts during flush() — keeps a brief server outage
|
|
72
|
+
// from hot-looping while still retrying fast enough to land within the flush deadline.
|
|
73
|
+
const FLUSH_RETRY_PAUSE_MS = 150;
|
|
74
|
+
|
|
71
75
|
interface Row {
|
|
72
76
|
seq: number;
|
|
73
77
|
kind: string;
|
|
@@ -93,6 +97,8 @@ export class Outbox {
|
|
|
93
97
|
|
|
94
98
|
private draining = false;
|
|
95
99
|
private rerun = false;
|
|
100
|
+
// Set by flush() to make the next drain pass ignore per-row backoff (deliver NOW).
|
|
101
|
+
private forceDue = false;
|
|
96
102
|
private pollTimer?: ReturnType<typeof setInterval>;
|
|
97
103
|
private dueTimer?: ReturnType<typeof setTimeout>;
|
|
98
104
|
private stopped = false;
|
|
@@ -185,24 +191,45 @@ export class Outbox {
|
|
|
185
191
|
this.pollTimer.unref?.();
|
|
186
192
|
}
|
|
187
193
|
|
|
194
|
+
// Drain everything we can before the process exits, ignoring per-row backoff. Used on
|
|
195
|
+
// shutdown/kill/crash (#183): the capture seam durably enqueues the end-of-session
|
|
196
|
+
// Insights datapoint, but the per-agent outbox is never reopened once the agent is gone,
|
|
197
|
+
// so a row that hasn't been POSTed yet is stranded forever. flush() blocks (bounded) on
|
|
198
|
+
// delivery — the opposite intent of the polite, backoff-respecting steady-state drain.
|
|
199
|
+
// Returns true if the queue fully drained within the deadline.
|
|
200
|
+
async flush(timeoutMs: number): Promise<boolean> {
|
|
201
|
+
const deadline = Date.now() + Math.max(0, timeoutMs);
|
|
202
|
+
for (;;) {
|
|
203
|
+
if (this.stopped) return this.pendingCount() === 0;
|
|
204
|
+
if (this.pendingCount() === 0) return true;
|
|
205
|
+
await this.drain(true);
|
|
206
|
+
if (this.pendingCount() === 0) return true;
|
|
207
|
+
if (Date.now() >= deadline) return false;
|
|
208
|
+
await new Promise<void>((resolve) => setTimeout(resolve, FLUSH_RETRY_PAUSE_MS));
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
188
212
|
// Process the queue strictly oldest-first. Coalesces concurrent calls; if a drain is
|
|
189
213
|
// requested while one is running, it re-runs once at the end (so an enqueue during a
|
|
190
|
-
// send isn't missed).
|
|
191
|
-
async drain(): Promise<void> {
|
|
214
|
+
// send isn't missed). `force` (from flush) makes this pass ignore per-row backoff.
|
|
215
|
+
async drain(force = false): Promise<void> {
|
|
192
216
|
if (this.stopped) return;
|
|
217
|
+
if (force) this.forceDue = true;
|
|
193
218
|
if (this.draining) { this.rerun = true; return; }
|
|
194
219
|
this.draining = true;
|
|
195
220
|
try {
|
|
196
221
|
do {
|
|
197
222
|
this.rerun = false;
|
|
198
|
-
|
|
199
|
-
|
|
223
|
+
const due = this.forceDue;
|
|
224
|
+
this.forceDue = false;
|
|
225
|
+
await this.drainOnce(due);
|
|
226
|
+
} while ((this.rerun || this.forceDue) && !this.stopped);
|
|
200
227
|
} finally {
|
|
201
228
|
this.draining = false;
|
|
202
229
|
}
|
|
203
230
|
}
|
|
204
231
|
|
|
205
|
-
private async drainOnce(): Promise<void> {
|
|
232
|
+
private async drainOnce(force = false): Promise<void> {
|
|
206
233
|
for (;;) {
|
|
207
234
|
if (this.stopped) return;
|
|
208
235
|
const row = this.db
|
|
@@ -211,7 +238,7 @@ export class Outbox {
|
|
|
211
238
|
if (!row) return;
|
|
212
239
|
|
|
213
240
|
const now = Date.now();
|
|
214
|
-
if (row.next_attempt_at > now) {
|
|
241
|
+
if (!force && row.next_attempt_at > now) {
|
|
215
242
|
// Head isn't due yet. Don't reorder past it (FIFO) — schedule a wake-up and stop.
|
|
216
243
|
this.scheduleDue(row.next_attempt_at - now);
|
|
217
244
|
return;
|
package/src/runner.ts
CHANGED
|
@@ -26,7 +26,15 @@ import { ensureSessionScratch, reapSessionScratch, sweepStaleSessions, type Sess
|
|
|
26
26
|
// capture, #183/#184) before the invasive operation and, during that window, presents a
|
|
27
27
|
// distinct non-addressable lifecycle state. Bus commands and provider hooks (Claude
|
|
28
28
|
// PreCompact / SessionEnd) both normalize to one of these.
|
|
29
|
-
type SessionDestroyReason = "compact" | "clear" | "restart" | "shutdown" | "kill";
|
|
29
|
+
type SessionDestroyReason = "compact" | "clear" | "restart" | "shutdown" | "kill" | "crash";
|
|
30
|
+
|
|
31
|
+
// Reasons after which the runner process won't survive to drain the durable outbox (and the
|
|
32
|
+
// per-agent outbox is never reopened once the agent is gone). For these, pre-destroy must
|
|
33
|
+
// block on delivery of the just-captured Insights datapoint, not just enqueue it (#183).
|
|
34
|
+
// `restart` (bus command) deliberately excluded: the runner stays alive and drains normally.
|
|
35
|
+
function reasonExitsRunner(reason: SessionDestroyReason): boolean {
|
|
36
|
+
return reason === "shutdown" || reason === "kill" || reason === "crash";
|
|
37
|
+
}
|
|
30
38
|
|
|
31
39
|
// `finalizing-<reason>` is the transient pre-destroy window; the others are the executing
|
|
32
40
|
// teardown states the dashboard already renders.
|
|
@@ -38,6 +46,11 @@ type LifecycleAction =
|
|
|
38
46
|
// transcript read or a wedged provider from stalling a shutdown the operator asked for.
|
|
39
47
|
const PRE_DESTROY_TIMEOUT_MS = 4_000;
|
|
40
48
|
|
|
49
|
+
// Bounded window to deliver the durable outbox before an exit-bound teardown (#183). Kept
|
|
50
|
+
// short so a wedged/down server can't stall an operator-requested shutdown for long; a
|
|
51
|
+
// row that still can't land is logged, not silently dropped.
|
|
52
|
+
const OUTBOX_FLUSH_TIMEOUT_MS = 3_000;
|
|
53
|
+
|
|
41
54
|
// Map a lifecycle bus command to its destructive boundary reason, or undefined for
|
|
42
55
|
// non-destructive commands (interrupt, inject, reconnect, permission decisions).
|
|
43
56
|
function boundaryReasonForCommand(type: string): SessionDestroyReason | undefined {
|
|
@@ -815,6 +828,16 @@ export class AgentRunner {
|
|
|
815
828
|
if (this.restartPending) return;
|
|
816
829
|
this.restartPending = true;
|
|
817
830
|
try {
|
|
831
|
+
// Best-effort Insights capture for the segment that just ended in a crash (#183). This
|
|
832
|
+
// path has no controlled teardown, so without it crashed sessions silently drop their
|
|
833
|
+
// context-ratio datapoint. The process handle is still set (cleared later), so the
|
|
834
|
+
// Claude transcript is readable; the runner stays alive here (restart or offline), so the
|
|
835
|
+
// durable outbox drains normally — no flush needed.
|
|
836
|
+
await Promise.race([
|
|
837
|
+
this.captureContextRatio("crash"),
|
|
838
|
+
new Promise<void>((resolve) => setTimeout(resolve, PRE_DESTROY_TIMEOUT_MS)),
|
|
839
|
+
]).catch((error) => this.sessionLog(`insights: crash capture failed: ${errMessage(error)}`));
|
|
840
|
+
|
|
818
841
|
const now = Date.now();
|
|
819
842
|
const runtimeMs = this.processStartedAt ? now - this.processStartedAt : Number.POSITIVE_INFINITY;
|
|
820
843
|
const recent = this.unexpectedExitTimes.filter((time) => now - time <= UNEXPECTED_EXIT_WINDOW_MS);
|
|
@@ -1308,6 +1331,17 @@ export class AgentRunner {
|
|
|
1308
1331
|
} catch (error) {
|
|
1309
1332
|
this.sessionLog(`insights: pre-destroy capture failed: ${errMessage(error)}`);
|
|
1310
1333
|
}
|
|
1334
|
+
// For exit-bound transitions the runner won't be alive afterward to drain the durable
|
|
1335
|
+
// outbox, so block (bounded) on delivering what capture just enqueued. This runs before
|
|
1336
|
+
// handleCommand's finally deletes the agent, so the runtime token is still valid here.
|
|
1337
|
+
if (reasonExitsRunner(reason)) {
|
|
1338
|
+
try {
|
|
1339
|
+
const delivered = await this.outbox.flush(OUTBOX_FLUSH_TIMEOUT_MS);
|
|
1340
|
+
if (!delivered) this.sessionLog(`insights: outbox flush incomplete before ${reason} (${this.outbox.pendingCount()} pending)`);
|
|
1341
|
+
} catch (error) {
|
|
1342
|
+
this.sessionLog(`insights: outbox flush failed: ${errMessage(error)}`);
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1311
1345
|
})();
|
|
1312
1346
|
this.preDestroyPromise = run;
|
|
1313
1347
|
void run.finally(() => { this.preDestroyPromise = undefined; });
|