agent-relay-runner 0.11.6 → 0.11.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/plugins/claude/.claude-plugin/plugin.json +1 -1
- package/plugins/claude/hooks/relay-status.sh +39 -0
- package/plugins/claude/hooks/session-start.sh +6 -0
- package/plugins/claude/hooks/user-prompt-submit.sh +3 -0
- package/src/adapter.ts +4 -0
- package/src/runner.ts +258 -28
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-relay-runner",
|
|
3
|
-
"version": "0.11.
|
|
3
|
+
"version": "0.11.9",
|
|
4
4
|
"description": "Unified provider lifecycle runner for Agent Relay",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"directory": "runner"
|
|
21
21
|
},
|
|
22
22
|
"dependencies": {
|
|
23
|
-
"agent-relay-sdk": "0.2.
|
|
23
|
+
"agent-relay-sdk": "0.2.5"
|
|
24
24
|
},
|
|
25
25
|
"devDependencies": {
|
|
26
26
|
"@types/bun": "latest",
|
|
@@ -82,3 +82,42 @@ relay_json_bool_field() {
|
|
|
82
82
|
relay_json_escape() {
|
|
83
83
|
printf '%s' "${1:-}" | sed 's/\\/\\\\/g; s/"/\\"/g'
|
|
84
84
|
}
|
|
85
|
+
|
|
86
|
+
# Print a short "request-review when done" reminder to stdout IFF this agent owns
|
|
87
|
+
# an isolated workspace whose branch has committed work not yet integrated into
|
|
88
|
+
# base. Prints nothing otherwise — so plain chat, shared-mode, and no-change
|
|
89
|
+
# sessions never pay context/token cost. Reads the workspace from
|
|
90
|
+
# AGENT_RELAY_WORKSPACE_JSON (set by the orchestrator at spawn) and does a local,
|
|
91
|
+
# network-free git count. Always returns 0 (never aborts its caller).
|
|
92
|
+
relay_review_reminder_text() {
|
|
93
|
+
local ws="${AGENT_RELAY_WORKSPACE_JSON:-}"
|
|
94
|
+
[ -z "$ws" ] && return 0
|
|
95
|
+
local mode worktree base id branch ahead
|
|
96
|
+
mode="$(relay_json_string_field mode "$ws")"
|
|
97
|
+
[ "$mode" = "isolated" ] || return 0
|
|
98
|
+
worktree="$(relay_json_string_field worktreePath "$ws")"
|
|
99
|
+
base="$(relay_json_string_field baseSha "$ws")"
|
|
100
|
+
[ -z "$base" ] && base="$(relay_json_string_field baseRef "$ws")"
|
|
101
|
+
id="$(relay_json_string_field id "$ws")"
|
|
102
|
+
branch="$(relay_json_string_field branch "$ws")"
|
|
103
|
+
[ -n "$worktree" ] || return 0
|
|
104
|
+
[ -n "$base" ] || return 0
|
|
105
|
+
[ -n "$id" ] || return 0
|
|
106
|
+
ahead="$(git -C "$worktree" rev-list --count "${base}..HEAD" 2>/dev/null || echo 0)"
|
|
107
|
+
case "$ahead" in ''|*[!0-9]*) ahead=0 ;; esac
|
|
108
|
+
[ "$ahead" -gt 0 ] || return 0
|
|
109
|
+
printf '[agent-relay] You have %s committed change(s) on `%s` that are not yet integrated into base. If your task is complete, request review so Agent Relay can auto-land it: POST /api/workspaces/%s/actions with {"action":"request-review"}. If you are still working, ignore this — it only appears while there is unmerged committed work.' \
|
|
110
|
+
"$ahead" "${branch:-this branch}" "$id"
|
|
111
|
+
return 0
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
# Wrap reminder text in the Claude Code additionalContext envelope for a given
|
|
115
|
+
# hook event. Emits nothing when the text is empty.
|
|
116
|
+
relay_emit_additional_context() {
|
|
117
|
+
local event="${1:-}" text="${2:-}"
|
|
118
|
+
[ -z "$event" ] && return 0
|
|
119
|
+
[ -z "$text" ] && return 0
|
|
120
|
+
printf '{"hookSpecificOutput":{"hookEventName":"%s","additionalContext":"%s"}}' \
|
|
121
|
+
"$event" "$(relay_json_escape "$text")"
|
|
122
|
+
return 0
|
|
123
|
+
}
|
|
@@ -12,3 +12,9 @@ case "$source_kind" in
|
|
|
12
12
|
*)
|
|
13
13
|
;;
|
|
14
14
|
esac
|
|
15
|
+
|
|
16
|
+
# Re-prime the request-review reminder when a session (re)starts — crucially on
|
|
17
|
+
# source=="compact", which is how it survives a context compaction (PreCompact
|
|
18
|
+
# cannot inject post-compact context; SessionStart can). No-op on a fresh startup
|
|
19
|
+
# with no committed work, and silent for non-isolated/no-change sessions.
|
|
20
|
+
relay_emit_additional_context SessionStart "$(relay_review_reminder_text || true)"
|
|
@@ -2,3 +2,6 @@
|
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
|
|
4
4
|
relay_post_status busy
|
|
5
|
+
# Re-surface the request-review reminder each turn while there is unmerged
|
|
6
|
+
# committed work — so a long session can't "forget" to land it. Silent otherwise.
|
|
7
|
+
relay_emit_additional_context UserPromptSubmit "$(relay_review_reminder_text || true)"
|
package/src/adapter.ts
CHANGED
package/src/runner.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { hostname } from "node:os";
|
|
2
|
-
import { appendFileSync, mkdirSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { appendFileSync, closeSync, mkdirSync, openSync, readSync, statSync, writeFileSync } from "node:fs";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
5
5
|
import type { AgentProfile, ContextState, Message, ProviderCapabilities, TaskStatusInput, WorkspaceMetadata } from "agent-relay-sdk";
|
|
@@ -67,6 +67,18 @@ const UNEXPECTED_EXIT_WINDOW_MS = 2 * 60 * 1000;
|
|
|
67
67
|
const RAPID_EXIT_MS = 30 * 1000;
|
|
68
68
|
const MAX_RAPID_UNEXPECTED_EXITS = 3;
|
|
69
69
|
const MAX_TIMER_DELAY_MS = 2_147_483_647;
|
|
70
|
+
const LOG_TAIL_BYTES = 128 * 1024;
|
|
71
|
+
const CLAUDE_RESUME_RE = /\bclaude\s+--resume\s+([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b/gi;
|
|
72
|
+
|
|
73
|
+
interface RunnerTimelineEvent {
|
|
74
|
+
status: string;
|
|
75
|
+
id?: string;
|
|
76
|
+
timestamp: number;
|
|
77
|
+
title?: string;
|
|
78
|
+
body?: string;
|
|
79
|
+
icon?: string;
|
|
80
|
+
metadata?: Record<string, unknown>;
|
|
81
|
+
}
|
|
70
82
|
|
|
71
83
|
export class AgentRunner {
|
|
72
84
|
private readonly agentId: string;
|
|
@@ -101,7 +113,7 @@ export class AgentRunner {
|
|
|
101
113
|
private readonly unexpectedExitTimes: number[] = [];
|
|
102
114
|
private readonly pendingMessages = new Map<number, Message>();
|
|
103
115
|
private readonly activeTaskClaims = new Map<number, ActiveTaskClaim>();
|
|
104
|
-
private pendingTimelineEvent?:
|
|
116
|
+
private pendingTimelineEvent?: RunnerTimelineEvent;
|
|
105
117
|
private pendingPromptMessageId?: number;
|
|
106
118
|
private scratch?: SessionScratchLayout;
|
|
107
119
|
|
|
@@ -579,9 +591,69 @@ export class AgentRunner {
|
|
|
579
591
|
const recent = this.unexpectedExitTimes.filter((time) => now - time <= UNEXPECTED_EXIT_WINDOW_MS);
|
|
580
592
|
recent.push(now);
|
|
581
593
|
this.unexpectedExitTimes.splice(0, this.unexpectedExitTimes.length, ...recent);
|
|
594
|
+
const diagnostics = this.providerExitDiagnostics(status, runtimeMs);
|
|
595
|
+
|
|
596
|
+
this.publishRunnerTimelineEvent({
|
|
597
|
+
status: "provider.exit_detected",
|
|
598
|
+
id: `provider-exit-${this.providerSessionId}-${now}`,
|
|
599
|
+
timestamp: now,
|
|
600
|
+
title: "Provider exited",
|
|
601
|
+
body: `${this.options.provider} reported ${status} after ${Math.round(runtimeMs / 1000)}s`,
|
|
602
|
+
icon: "ti-plug-off",
|
|
603
|
+
metadata: {
|
|
604
|
+
eventType: "provider.exit_detected",
|
|
605
|
+
...diagnostics,
|
|
606
|
+
},
|
|
607
|
+
});
|
|
608
|
+
|
|
609
|
+
if (this.shouldStopUnexpectedProviderExit(diagnostics)) {
|
|
610
|
+
const hasResumeId = typeof diagnostics.claudeResumeId === "string" && diagnostics.claudeResumeId.length > 0;
|
|
611
|
+
console.warn(`[runner] ${this.options.provider} exited; leaving agent offline for manual recovery`);
|
|
612
|
+
this.publishRunnerTimelineEvent({
|
|
613
|
+
status: "provider.restart_decision",
|
|
614
|
+
id: `provider-restart-decision-${this.providerSessionId}-${now}`,
|
|
615
|
+
timestamp: Date.now(),
|
|
616
|
+
title: "Provider restart skipped",
|
|
617
|
+
body: hasResumeId
|
|
618
|
+
? "Claude exited; runner will not auto-resume. Resume id captured for manual recovery."
|
|
619
|
+
: "Claude exited; runner will not restart automatically.",
|
|
620
|
+
icon: "ti-player-stop",
|
|
621
|
+
metadata: {
|
|
622
|
+
eventType: "provider.restart_decision",
|
|
623
|
+
decision: "stop-surface",
|
|
624
|
+
reason: hasResumeId ? "claude-exit-manual-resume-available" : "claude-exit-manual-intervention-required",
|
|
625
|
+
...diagnostics,
|
|
626
|
+
},
|
|
627
|
+
});
|
|
628
|
+
this.process = undefined;
|
|
629
|
+
this.setProviderStatus({
|
|
630
|
+
status,
|
|
631
|
+
reason: "provider-turn",
|
|
632
|
+
id: `provider-exit-${this.providerSessionId}`,
|
|
633
|
+
clear: ["provider-turn", "subagent"],
|
|
634
|
+
});
|
|
635
|
+
return;
|
|
636
|
+
}
|
|
582
637
|
|
|
583
638
|
if (runtimeMs < RAPID_EXIT_MS && recent.length > MAX_RAPID_UNEXPECTED_EXITS) {
|
|
584
639
|
console.error(`[runner] provider session exited ${recent.length} times within ${Math.round(UNEXPECTED_EXIT_WINDOW_MS / 1000)}s; giving up`);
|
|
640
|
+
this.publishRunnerTimelineEvent({
|
|
641
|
+
status: "provider.restart_decision",
|
|
642
|
+
id: `provider-restart-decision-${this.providerSessionId}-${now}`,
|
|
643
|
+
timestamp: Date.now(),
|
|
644
|
+
title: "Provider restart skipped",
|
|
645
|
+
body: `rapid unexpected exits exceeded ${MAX_RAPID_UNEXPECTED_EXITS}`,
|
|
646
|
+
icon: "ti-alert-triangle",
|
|
647
|
+
metadata: {
|
|
648
|
+
eventType: "provider.restart_decision",
|
|
649
|
+
decision: "give-up",
|
|
650
|
+
reason: "rapid-unexpected-provider-exits",
|
|
651
|
+
rapidExitCount: recent.length,
|
|
652
|
+
rapidExitWindowMs: UNEXPECTED_EXIT_WINDOW_MS,
|
|
653
|
+
maxRapidUnexpectedExits: MAX_RAPID_UNEXPECTED_EXITS,
|
|
654
|
+
...diagnostics,
|
|
655
|
+
},
|
|
656
|
+
});
|
|
585
657
|
this.setProviderStatus(status);
|
|
586
658
|
this.options.onProviderExit?.(0);
|
|
587
659
|
return;
|
|
@@ -589,6 +661,23 @@ export class AgentRunner {
|
|
|
589
661
|
|
|
590
662
|
const delayMs = Math.min(10_000, Math.max(500, 500 * recent.length));
|
|
591
663
|
console.warn(`[runner] provider session exited unexpectedly after ${Math.round(runtimeMs / 1000)}s; restarting in ${delayMs}ms`);
|
|
664
|
+
this.publishRunnerTimelineEvent({
|
|
665
|
+
status: "provider.restart_decision",
|
|
666
|
+
id: `provider-restart-decision-${this.providerSessionId}-${now}`,
|
|
667
|
+
timestamp: Date.now(),
|
|
668
|
+
title: "Provider restart scheduled",
|
|
669
|
+
body: `runner will start a fresh ${this.options.provider} provider in ${delayMs}ms`,
|
|
670
|
+
icon: "ti-refresh",
|
|
671
|
+
metadata: {
|
|
672
|
+
eventType: "provider.restart_decision",
|
|
673
|
+
decision: "restart-fresh",
|
|
674
|
+
reason: "unexpected-headless-terminal-exit",
|
|
675
|
+
delayMs,
|
|
676
|
+
rapidExitCount: recent.length,
|
|
677
|
+
rapidExitWindowMs: UNEXPECTED_EXIT_WINDOW_MS,
|
|
678
|
+
...diagnostics,
|
|
679
|
+
},
|
|
680
|
+
});
|
|
592
681
|
await Bun.sleep(delayMs);
|
|
593
682
|
if (this.stopped || this.exitCommandInProgress) return;
|
|
594
683
|
try {
|
|
@@ -605,6 +694,10 @@ export class AgentRunner {
|
|
|
605
694
|
}
|
|
606
695
|
}
|
|
607
696
|
|
|
697
|
+
private shouldStopUnexpectedProviderExit(diagnostics: Record<string, unknown>): boolean {
|
|
698
|
+
return this.options.provider === "claude" && diagnostics.exitCommandInProgress !== true;
|
|
699
|
+
}
|
|
700
|
+
|
|
608
701
|
private async shutdownProvider(hard: boolean, timeoutMs = this.options.providerConfig.headless.shutdownTimeoutMs): Promise<void> {
|
|
609
702
|
this.lifecycleAction = hard ? "killing" : "shutting-down";
|
|
610
703
|
this.publishStatus();
|
|
@@ -620,6 +713,46 @@ export class AgentRunner {
|
|
|
620
713
|
this.stopped = true;
|
|
621
714
|
}
|
|
622
715
|
|
|
716
|
+
private publishRunnerTimelineEvent(event: RunnerTimelineEvent): void {
|
|
717
|
+
this.pendingTimelineEvent = {
|
|
718
|
+
...event,
|
|
719
|
+
metadata: {
|
|
720
|
+
source: "runner",
|
|
721
|
+
provider: this.options.provider,
|
|
722
|
+
runnerId: this.options.runnerId,
|
|
723
|
+
agentId: this.agentId,
|
|
724
|
+
policyName: this.options.policyName ?? null,
|
|
725
|
+
spawnRequestId: this.options.spawnRequestId ?? null,
|
|
726
|
+
label: this.options.label ?? null,
|
|
727
|
+
providerSessionId: this.providerSessionId,
|
|
728
|
+
...(event.metadata ?? {}),
|
|
729
|
+
},
|
|
730
|
+
};
|
|
731
|
+
this.publishStatus();
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
private providerExitDiagnostics(status: SemanticStatus, runtimeMs: number): Record<string, unknown> {
|
|
735
|
+
const tmuxSession = typeof this.process?.meta?.tmuxSession === "string" ? this.process.meta.tmuxSession : undefined;
|
|
736
|
+
const tmuxSocket = typeof this.process?.meta?.tmuxSocket === "string" ? this.process.meta.tmuxSocket : undefined;
|
|
737
|
+
const exitSource = tmuxSession ? "tmux-session-ended" : this.process?.process ? "process-exit" : "provider-status";
|
|
738
|
+
const logFile = typeof process.env.AGENT_RELAY_LOG_FILE === "string" ? process.env.AGENT_RELAY_LOG_FILE : undefined;
|
|
739
|
+
const claudeResumeId = this.options.provider === "claude" && logFile ? latestClaudeResumeIdFromLogFile(logFile) : undefined;
|
|
740
|
+
return {
|
|
741
|
+
status,
|
|
742
|
+
runtimeMs: Number.isFinite(runtimeMs) ? runtimeMs : null,
|
|
743
|
+
exitSource,
|
|
744
|
+
exitCommandInProgress: this.exitCommandInProgress,
|
|
745
|
+
stopped: this.stopped,
|
|
746
|
+
restartInProgress: this.restartInProgress,
|
|
747
|
+
restartPending: this.restartPending,
|
|
748
|
+
headless: this.options.headless,
|
|
749
|
+
hasTerminalSession: Boolean(tmuxSession),
|
|
750
|
+
tmuxSession: tmuxSession ?? null,
|
|
751
|
+
tmuxSocket: tmuxSocket ?? null,
|
|
752
|
+
claudeResumeId: claudeResumeId ?? null,
|
|
753
|
+
};
|
|
754
|
+
}
|
|
755
|
+
|
|
623
756
|
private async updateCommand(commandId: string, status: string, result?: Record<string, unknown>, error?: string): Promise<void> {
|
|
624
757
|
await this.bus.updateCommand(commandId, { status, ...(result ? { result } : {}), ...(error ? { error } : {}) });
|
|
625
758
|
}
|
|
@@ -646,6 +779,10 @@ export class AgentRunner {
|
|
|
646
779
|
status: update.timeline.status,
|
|
647
780
|
...(update.timeline.id ? { id: update.timeline.id } : {}),
|
|
648
781
|
timestamp: update.timeline.timestamp ?? Date.now(),
|
|
782
|
+
...(update.timeline.title ? { title: update.timeline.title } : {}),
|
|
783
|
+
...(update.timeline.body ? { body: update.timeline.body } : {}),
|
|
784
|
+
...(update.timeline.icon ? { icon: update.timeline.icon } : {}),
|
|
785
|
+
...(update.timeline.metadata ? { metadata: update.timeline.metadata } : {}),
|
|
649
786
|
};
|
|
650
787
|
}
|
|
651
788
|
if (status === "busy") {
|
|
@@ -874,9 +1011,20 @@ export class AgentRunner {
|
|
|
874
1011
|
private scheduleRuntimeTokenRenewal(delayMs?: number): void {
|
|
875
1012
|
if (this.tokenRenewTimer) clearTimeout(this.tokenRenewTimer);
|
|
876
1013
|
this.tokenRenewTimer = undefined;
|
|
877
|
-
if (
|
|
878
|
-
const
|
|
879
|
-
|
|
1014
|
+
if (this.stopped) return;
|
|
1015
|
+
const canSelfRenew = this.isRuntimeTokenRenewable();
|
|
1016
|
+
const canRemint = this.canRemintViaOrchestrator();
|
|
1017
|
+
// Keep the renewal clock ticking as long as the session can recover its token
|
|
1018
|
+
// by EITHER path. Without the re-mint fallback an expired token would stop the
|
|
1019
|
+
// timer forever (the old deadlock that stranded live agents off the bus).
|
|
1020
|
+
if (!canSelfRenew && !canRemint) return;
|
|
1021
|
+
let computedDelay = delayMs;
|
|
1022
|
+
if (computedDelay === undefined) {
|
|
1023
|
+
computedDelay = canSelfRenew
|
|
1024
|
+
? runtimeTokenRenewDelayMs(this.currentTokenExpiresAt!, Date.now())
|
|
1025
|
+
: TOKEN_RENEW_RETRY_MS; // expired but re-mintable → retry via orchestrator soon
|
|
1026
|
+
if (computedDelay === undefined) computedDelay = TOKEN_RENEW_RETRY_MS;
|
|
1027
|
+
}
|
|
880
1028
|
const schedule = runtimeTokenRenewTimerSchedule(computedDelay);
|
|
881
1029
|
if (!schedule) return;
|
|
882
1030
|
this.tokenRenewTimer = setTimeout(() => {
|
|
@@ -889,6 +1037,8 @@ export class AgentRunner {
|
|
|
889
1037
|
}, schedule.delayMs);
|
|
890
1038
|
}
|
|
891
1039
|
|
|
1040
|
+
// Can the runner self-renew right now? Requires a non-expired runner-profile token
|
|
1041
|
+
// (the relay rejects renewal of an expired token).
|
|
892
1042
|
private isRuntimeTokenRenewable(): boolean {
|
|
893
1043
|
return Boolean(
|
|
894
1044
|
this.currentToken &&
|
|
@@ -898,32 +1048,36 @@ export class AgentRunner {
|
|
|
898
1048
|
);
|
|
899
1049
|
}
|
|
900
1050
|
|
|
1051
|
+
// Can the runner recover its token via the orchestrator? Works even when the token
|
|
1052
|
+
// is already expired — the orchestrator's standing credential is the authority.
|
|
1053
|
+
private canRemintViaOrchestrator(): boolean {
|
|
1054
|
+
return Boolean(
|
|
1055
|
+
process.env.AGENT_RELAY_ORCHESTRATOR_URL &&
|
|
1056
|
+
this.currentToken &&
|
|
1057
|
+
(this.currentTokenProfileId === "provider-agent" || this.currentTokenProfileId === "provider-interactive"),
|
|
1058
|
+
);
|
|
1059
|
+
}
|
|
1060
|
+
|
|
901
1061
|
private async renewRuntimeToken(): Promise<void> {
|
|
902
|
-
if (this.stopped || this.tokenRenewInFlight || !this.
|
|
1062
|
+
if (this.stopped || this.tokenRenewInFlight || !this.currentToken) return;
|
|
903
1063
|
this.tokenRenewInFlight = true;
|
|
904
1064
|
try {
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
this.
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
};
|
|
922
|
-
this.bus.reconnectTransport("runtime token renewed");
|
|
923
|
-
this.publishStatus();
|
|
924
|
-
this.scheduleRuntimeTokenRenewal();
|
|
925
|
-
} catch (error) {
|
|
926
|
-
this.logRuntimeTokenRenewalFailure(error);
|
|
1065
|
+
// Preferred path: self-renew directly against the relay while the token is
|
|
1066
|
+
// still valid. Cheapest and needs no orchestrator round-trip.
|
|
1067
|
+
if (this.isRuntimeTokenRenewable()) {
|
|
1068
|
+
try {
|
|
1069
|
+
const renewed = await this.http.renewRuntimeToken();
|
|
1070
|
+
this.applyRenewedToken(renewed.token, renewed.record, "runtime-token-renewed");
|
|
1071
|
+
return;
|
|
1072
|
+
} catch (error) {
|
|
1073
|
+
this.logRuntimeTokenRenewalFailure(error);
|
|
1074
|
+
// Relay unreachable or token rejected — fall through to orchestrator re-mint.
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
// Recovery path: token expired, or self-renew failed. Ask the orchestrator —
|
|
1078
|
+
// it holds a long-lived credential and can mint a fresh runner token, so a
|
|
1079
|
+
// live session heals instead of being stranded off the bus.
|
|
1080
|
+
if (this.canRemintViaOrchestrator() && await this.remintViaOrchestrator()) return;
|
|
927
1081
|
this.pendingTimelineEvent = {
|
|
928
1082
|
status: "runtime-token-renewal-failed",
|
|
929
1083
|
timestamp: Date.now(),
|
|
@@ -935,6 +1089,56 @@ export class AgentRunner {
|
|
|
935
1089
|
}
|
|
936
1090
|
}
|
|
937
1091
|
|
|
1092
|
+
// Apply a freshly issued token across every live surface — runner state, the
|
|
1093
|
+
// RunnerOptions bag (re-injected into the provider on respawn), the HTTP client,
|
|
1094
|
+
// the bus client — then force a bus handshake with the new token and reschedule.
|
|
1095
|
+
private applyRenewedToken(
|
|
1096
|
+
token: string,
|
|
1097
|
+
record: { jti: string; profileId?: string; expiresAt?: number },
|
|
1098
|
+
status: "runtime-token-renewed" | "runtime-token-reminted",
|
|
1099
|
+
): void {
|
|
1100
|
+
this.currentToken = token;
|
|
1101
|
+
this.currentTokenJti = record.jti;
|
|
1102
|
+
this.currentTokenProfileId = record.profileId ?? this.currentTokenProfileId;
|
|
1103
|
+
this.currentTokenExpiresAt = record.expiresAt;
|
|
1104
|
+
this.options.token = token;
|
|
1105
|
+
this.options.tokenJti = record.jti;
|
|
1106
|
+
this.options.tokenProfileId = this.currentTokenProfileId;
|
|
1107
|
+
this.options.tokenExpiresAt = this.currentTokenExpiresAt;
|
|
1108
|
+
this.http.setToken(token);
|
|
1109
|
+
this.bus.setToken(token);
|
|
1110
|
+
this.httpLivenessAuthFailed = false;
|
|
1111
|
+
this.pendingTimelineEvent = { status, id: record.jti, timestamp: Date.now() };
|
|
1112
|
+
this.bus.reconnectTransport(status === "runtime-token-reminted" ? "runtime token re-minted" : "runtime token renewed");
|
|
1113
|
+
this.publishStatus();
|
|
1114
|
+
this.scheduleRuntimeTokenRenewal();
|
|
1115
|
+
}
|
|
1116
|
+
|
|
1117
|
+
// Recover the runtime token through the orchestrator. The runner proxies its own
|
|
1118
|
+
// (possibly expired) token; the orchestrator re-mints it via the relay using its
|
|
1119
|
+
// standing credential. Returns true on success.
|
|
1120
|
+
private async remintViaOrchestrator(): Promise<boolean> {
|
|
1121
|
+
const orchUrl = process.env.AGENT_RELAY_ORCHESTRATOR_URL;
|
|
1122
|
+
if (!orchUrl || !this.currentToken) return false;
|
|
1123
|
+
try {
|
|
1124
|
+
const res = await fetch(`${orchUrl.replace(/\/+$/, "")}/api/runtime-tokens/runner-renew`, {
|
|
1125
|
+
method: "POST",
|
|
1126
|
+
headers: { "Content-Type": "application/json" },
|
|
1127
|
+
body: JSON.stringify({ token: this.currentToken }),
|
|
1128
|
+
signal: AbortSignal.timeout(10_000),
|
|
1129
|
+
});
|
|
1130
|
+
if (!res.ok) return false;
|
|
1131
|
+
const renewed = await res.json() as { token?: string; record?: { jti: string; profileId?: string; expiresAt?: number } };
|
|
1132
|
+
if (!renewed?.token || !renewed.record) return false;
|
|
1133
|
+
this.applyRenewedToken(renewed.token, renewed.record, "runtime-token-reminted");
|
|
1134
|
+
this.logRunnerDiagnostic(`[runner] runtime token re-minted via orchestrator (jti ${renewed.record.jti})`);
|
|
1135
|
+
return true;
|
|
1136
|
+
} catch (error) {
|
|
1137
|
+
this.logRuntimeTokenRenewalFailure(error);
|
|
1138
|
+
return false;
|
|
1139
|
+
}
|
|
1140
|
+
}
|
|
1141
|
+
|
|
938
1142
|
private logRuntimeTokenRenewalFailure(error: unknown): void {
|
|
939
1143
|
const key = httpErrorKey(error);
|
|
940
1144
|
const now = Date.now();
|
|
@@ -1146,6 +1350,32 @@ export function runnerShouldRestartUnexpectedProviderExit(
|
|
|
1146
1350
|
&& input.hasTerminalSession;
|
|
1147
1351
|
}
|
|
1148
1352
|
|
|
1353
|
+
export function latestClaudeResumeIdFromText(text: string): string | undefined {
|
|
1354
|
+
let latest: string | undefined;
|
|
1355
|
+
CLAUDE_RESUME_RE.lastIndex = 0;
|
|
1356
|
+
for (let match = CLAUDE_RESUME_RE.exec(text); match; match = CLAUDE_RESUME_RE.exec(text)) {
|
|
1357
|
+
latest = match[1];
|
|
1358
|
+
}
|
|
1359
|
+
return latest;
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
export function latestClaudeResumeIdFromLogFile(path: string): string | undefined {
|
|
1363
|
+
let fd: number | undefined;
|
|
1364
|
+
try {
|
|
1365
|
+
const stat = statSync(path);
|
|
1366
|
+
const length = Math.min(stat.size, LOG_TAIL_BYTES);
|
|
1367
|
+
const offset = Math.max(0, stat.size - length);
|
|
1368
|
+
const buffer = Buffer.alloc(length);
|
|
1369
|
+
fd = openSync(path, "r");
|
|
1370
|
+
readSync(fd, buffer, 0, length, offset);
|
|
1371
|
+
return latestClaudeResumeIdFromText(buffer.toString("utf8"));
|
|
1372
|
+
} catch {
|
|
1373
|
+
return undefined;
|
|
1374
|
+
} finally {
|
|
1375
|
+
if (fd !== undefined) closeSync(fd);
|
|
1376
|
+
}
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1149
1379
|
function commandTimeoutMs(params: Record<string, unknown>, fallback = 10_000): number {
|
|
1150
1380
|
const raw = params.timeoutMs;
|
|
1151
1381
|
if (typeof raw !== "number" || !Number.isSafeInteger(raw) || raw <= 0) return fallback;
|