agent-relay-runner 0.12.3 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/plugins/claude/.claude-plugin/plugin.json +1 -1
- package/plugins/claude/hooks/relay-status.sh +20 -0
- package/plugins/claude/hooks/session-end.sh +4 -0
- package/src/adapters/claude-transcript.ts +131 -0
- package/src/adapters/claude.ts +19 -5
- package/src/adapters/codex.ts +12 -0
- package/src/control-server.ts +18 -0
- package/src/runner.ts +95 -9
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-relay-runner",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.13.0",
|
|
4
4
|
"description": "Unified provider lifecycle runner for Agent Relay",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"directory": "runner"
|
|
21
21
|
},
|
|
22
22
|
"dependencies": {
|
|
23
|
-
"agent-relay-sdk": "0.2.
|
|
23
|
+
"agent-relay-sdk": "0.2.7"
|
|
24
24
|
},
|
|
25
25
|
"devDependencies": {
|
|
26
26
|
"@types/bun": "latest",
|
|
@@ -72,6 +72,26 @@ relay_post_user_prompt() {
|
|
|
72
72
|
-d "$body" >/dev/null 2>&1 || true
|
|
73
73
|
}
|
|
74
74
|
|
|
75
|
+
relay_post_session_end() {
|
|
76
|
+
# Insights #184: tell the runner the session ended so it can compute the
|
|
77
|
+
# end-of-session context-gathering ratio from the full transcript. Fire-and-forget;
|
|
78
|
+
# the transcript path is optional (the runner falls back to the last path it saw).
|
|
79
|
+
local transcript_path="${1:-}"
|
|
80
|
+
local reason="${2:-}"
|
|
81
|
+
local port="${AGENT_RELAY_RUNNER_PORT:-}"
|
|
82
|
+
[ -z "$port" ] && return 0
|
|
83
|
+
local body="{"
|
|
84
|
+
[ -n "$transcript_path" ] && body="${body}\"transcriptPath\":\"$(relay_json_escape "$transcript_path")\""
|
|
85
|
+
if [ -n "$reason" ]; then
|
|
86
|
+
[ "$body" != "{" ] && body="${body},"
|
|
87
|
+
body="${body}\"reason\":\"$(relay_json_escape "$reason")\""
|
|
88
|
+
fi
|
|
89
|
+
body="${body}}"
|
|
90
|
+
curl -fsS --max-time 3 -X POST "http://127.0.0.1:${port}/session-end" \
|
|
91
|
+
-H 'Content-Type: application/json' \
|
|
92
|
+
-d "$body" >/dev/null 2>&1 || true
|
|
93
|
+
}
|
|
94
|
+
|
|
75
95
|
relay_pending_reply_stop_decision() {
|
|
76
96
|
local port="${AGENT_RELAY_RUNNER_PORT:-}"
|
|
77
97
|
[ -z "$port" ] && return 0
|
|
@@ -4,6 +4,7 @@ source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/
|
|
|
4
4
|
|
|
5
5
|
payload="$(cat || true)"
|
|
6
6
|
reason="$(relay_json_string_field reason "$payload")"
|
|
7
|
+
transcript_path="$(relay_json_string_field transcript_path "$payload")"
|
|
7
8
|
|
|
8
9
|
case "$reason" in
|
|
9
10
|
clear)
|
|
@@ -14,5 +15,8 @@ case "$reason" in
|
|
|
14
15
|
;;
|
|
15
16
|
logout|prompt_input_exit|bypass_permissions_disabled|other|*)
|
|
16
17
|
relay_post_status_clearing_subagents offline
|
|
18
|
+
# Real session termination: capture end-of-session Insights (#184). Order after the
|
|
19
|
+
# status post is arbitrary — the runner reads the transcript file regardless.
|
|
20
|
+
relay_post_session_end "$transcript_path" "$reason"
|
|
17
21
|
;;
|
|
18
22
|
esac
|
|
@@ -16,6 +16,7 @@ interface TranscriptBlock {
|
|
|
16
16
|
thinking?: string;
|
|
17
17
|
name?: string;
|
|
18
18
|
input?: Record<string, unknown>;
|
|
19
|
+
is_error?: boolean;
|
|
19
20
|
}
|
|
20
21
|
|
|
21
22
|
export interface TurnStep {
|
|
@@ -186,6 +187,136 @@ export function summarizeToolUse(name: string, input: Record<string, unknown> |
|
|
|
186
187
|
return summary.length > 200 ? `${summary.slice(0, 197)}…` : summary;
|
|
187
188
|
}
|
|
188
189
|
|
|
190
|
+
// --- Insights #184: context-gathering ratio (epic #183, docs/self-improvement.md) ---
|
|
191
|
+
//
|
|
192
|
+
// Computed mechanically from the whole-session transcript at session end — no model
|
|
193
|
+
// involvement, so it costs zero agent tokens and the agent can't game it. The ratio is
|
|
194
|
+
// paired with cheap outcome proxies (user re-prompts, tool errors) so it's never read
|
|
195
|
+
// alone — see the anti-Goodhart constraint in the epic.
|
|
196
|
+
|
|
197
|
+
// Tools that acquire context without changing anything. Anything not matched here is
|
|
198
|
+
// treated as an action (mutation, execution, or a delegation/direction decision) —
|
|
199
|
+
// Bash counts as an action because it executes (a conservative, documented choice for
|
|
200
|
+
// v0; `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
|
|
201
|
+
const GATHERING_TOOLS = new Set([
|
|
202
|
+
"Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
|
|
203
|
+
]);
|
|
204
|
+
const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
|
|
205
|
+
|
|
206
|
+
function isGatheringTool(name: string): boolean {
|
|
207
|
+
if (GATHERING_TOOLS.has(name)) return true;
|
|
208
|
+
// MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
|
|
209
|
+
return GATHERING_NAME.test(name);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
export interface ContextRatioMetric {
|
|
213
|
+
/** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
|
|
214
|
+
ratio: number;
|
|
215
|
+
gatheringCalls: number;
|
|
216
|
+
actionCalls: number;
|
|
217
|
+
totalToolCalls: number;
|
|
218
|
+
/** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
|
|
219
|
+
leadingGather: number;
|
|
220
|
+
/** Substantive assistant turns (turns that produced text or a tool call). */
|
|
221
|
+
turns: number;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
export interface SessionOutcomeProxy {
|
|
225
|
+
/** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
|
|
226
|
+
userPrompts: number;
|
|
227
|
+
/** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
|
|
228
|
+
toolErrors: number;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
export interface SessionAnalysis {
|
|
232
|
+
metric: ContextRatioMetric;
|
|
233
|
+
outcome: SessionOutcomeProxy;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Walk the full transcript and compute the context-gathering ratio plus paired outcome
|
|
238
|
+
* proxies. Returns null when there's nothing substantive to measure (no tool calls) —
|
|
239
|
+
* trivial sessions have nothing to learn from and shouldn't pollute the baselines.
|
|
240
|
+
*/
|
|
241
|
+
export function analyzeSession(jsonl: string): SessionAnalysis | null {
|
|
242
|
+
let gatheringCalls = 0;
|
|
243
|
+
let actionCalls = 0;
|
|
244
|
+
let leadingGather = 0;
|
|
245
|
+
let sawAction = false;
|
|
246
|
+
let userPrompts = 0;
|
|
247
|
+
let toolErrors = 0;
|
|
248
|
+
let turns = 0;
|
|
249
|
+
|
|
250
|
+
for (const line of jsonl.split("\n")) {
|
|
251
|
+
const trimmed = line.trim();
|
|
252
|
+
if (!trimmed) continue;
|
|
253
|
+
let entry: TranscriptEntry;
|
|
254
|
+
try {
|
|
255
|
+
entry = JSON.parse(trimmed) as TranscriptEntry;
|
|
256
|
+
} catch {
|
|
257
|
+
continue;
|
|
258
|
+
}
|
|
259
|
+
if (isRealUserPrompt(entry)) userPrompts++;
|
|
260
|
+
if (entry.type === "user") {
|
|
261
|
+
for (const b of blocks(entry.message)) {
|
|
262
|
+
if (b.type === "tool_result" && b.is_error === true) toolErrors++;
|
|
263
|
+
}
|
|
264
|
+
continue;
|
|
265
|
+
}
|
|
266
|
+
if (entry.type !== "assistant") continue;
|
|
267
|
+
let producedSomething = false;
|
|
268
|
+
for (const b of blocks(entry.message)) {
|
|
269
|
+
if (b.type === "text" && b.text?.trim()) producedSomething = true;
|
|
270
|
+
if (b.type !== "tool_use" || typeof b.name !== "string" || !b.name) continue;
|
|
271
|
+
producedSomething = true;
|
|
272
|
+
if (isGatheringTool(b.name)) {
|
|
273
|
+
gatheringCalls++;
|
|
274
|
+
if (!sawAction) leadingGather++;
|
|
275
|
+
} else {
|
|
276
|
+
actionCalls++;
|
|
277
|
+
sawAction = true;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
if (producedSomething) turns++;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
const totalToolCalls = gatheringCalls + actionCalls;
|
|
284
|
+
if (totalToolCalls === 0) return null;
|
|
285
|
+
|
|
286
|
+
return {
|
|
287
|
+
metric: {
|
|
288
|
+
ratio: gatheringCalls / totalToolCalls,
|
|
289
|
+
gatheringCalls,
|
|
290
|
+
actionCalls,
|
|
291
|
+
totalToolCalls,
|
|
292
|
+
leadingGather,
|
|
293
|
+
turns,
|
|
294
|
+
},
|
|
295
|
+
outcome: { userPrompts, toolErrors },
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/** Count substantive assistant turns — used by the #185 introspection gate. */
|
|
300
|
+
export function countSubstantiveTurns(jsonl: string): number {
|
|
301
|
+
let turns = 0;
|
|
302
|
+
for (const line of jsonl.split("\n")) {
|
|
303
|
+
const trimmed = line.trim();
|
|
304
|
+
if (!trimmed) continue;
|
|
305
|
+
let entry: TranscriptEntry;
|
|
306
|
+
try {
|
|
307
|
+
entry = JSON.parse(trimmed) as TranscriptEntry;
|
|
308
|
+
} catch {
|
|
309
|
+
continue;
|
|
310
|
+
}
|
|
311
|
+
if (entry.type !== "assistant") continue;
|
|
312
|
+
const hasContent = blocks(entry.message).some(
|
|
313
|
+
(b) => (b.type === "text" && b.text?.trim()) || (b.type === "tool_use" && b.name),
|
|
314
|
+
);
|
|
315
|
+
if (hasContent) turns++;
|
|
316
|
+
}
|
|
317
|
+
return turns;
|
|
318
|
+
}
|
|
319
|
+
|
|
189
320
|
export function extractHookAssistantMessage(content: unknown): string {
|
|
190
321
|
if (typeof content === "string") return content.trim();
|
|
191
322
|
if (!Array.isArray(content)) return "";
|
package/src/adapters/claude.ts
CHANGED
|
@@ -355,6 +355,13 @@ export function sessionStatusLineSettingsArgs(...argLists: string[][]): string[]
|
|
|
355
355
|
command: "agent-relay context-probe --wrap",
|
|
356
356
|
refreshInterval: 30,
|
|
357
357
|
},
|
|
358
|
+
// Force readable thinking text for managed sessions so the session-mirror can
|
|
359
|
+
// surface reasoning in the dashboard. With showThinkingSummaries:false the API
|
|
360
|
+
// redacts thinking to a signature-only stub (empty text), leaving the transcript
|
|
361
|
+
// tail nothing to mirror. --settings merges per-key, so this overrides only this
|
|
362
|
+
// key for managed sessions — a host rig default of false still governs the
|
|
363
|
+
// operator's own interactive TUI sessions.
|
|
364
|
+
showThinkingSummaries: true,
|
|
358
365
|
})];
|
|
359
366
|
}
|
|
360
367
|
|
|
@@ -448,12 +455,19 @@ export function claudePaneLooksReady(text: string): boolean {
|
|
|
448
455
|
|| text.includes("Claude Code");
|
|
449
456
|
}
|
|
450
457
|
|
|
458
|
+
// The working-spinner footer carries a live elapsed-time counter while a turn is in
|
|
459
|
+
// flight, e.g. "✶ Perambulating… (2m 17s · ↓ 8.7k tokens)" — gerund, "… (", then
|
|
460
|
+
// "[Nh ][Nm ]Ns". Anchored on the gerund ellipsis so it can't match the "… +N lines
|
|
461
|
+
// (ctrl+o to expand)" truncation marker, the idle input box, or the persistent
|
|
462
|
+
// "/btw … without interrupting Claude's current work" queue hint.
|
|
463
|
+
const CLAUDE_BUSY_SPINNER_RE = /…\s*\((?:\d+h\s+)?(?:\d+m\s+)?\d+s\b/;
|
|
464
|
+
|
|
451
465
|
export function claudePaneIsBusy(text: string): boolean {
|
|
452
|
-
// Claude
|
|
453
|
-
//
|
|
454
|
-
//
|
|
455
|
-
//
|
|
456
|
-
return text.includes("esc to interrupt");
|
|
466
|
+
// Claude Code <2.1.x rendered "(esc to interrupt)" in the spinner footer; 2.1.x
|
|
467
|
+
// dropped that hint but kept the "(<elapsed>" counter, which is the stable busy
|
|
468
|
+
// signal across versions. Match either so the busy probe (and the reconciler
|
|
469
|
+
// backstop that depends on it) keep working as the footer wording changes.
|
|
470
|
+
return CLAUDE_BUSY_SPINNER_RE.test(text) || text.includes("esc to interrupt");
|
|
457
471
|
}
|
|
458
472
|
|
|
459
473
|
async function waitForClaudeInputReady(sessionName: string, timeoutMs = CLAUDE_TMUX_READY_TIMEOUT_MS, socketName?: string): Promise<void> {
|
package/src/adapters/codex.ts
CHANGED
|
@@ -453,6 +453,18 @@ export function codexToolSummary(type: string | undefined, item: Record<string,
|
|
|
453
453
|
if (type === "webSearch") {
|
|
454
454
|
return { label: "Search", body: clip(oneLine(item.query) || "web search") };
|
|
455
455
|
}
|
|
456
|
+
if (type === "plan") {
|
|
457
|
+
return { label: "Plan", body: clip(oneLine(item.text) || "updated plan") };
|
|
458
|
+
}
|
|
459
|
+
if (type === "collabAgentToolCall") {
|
|
460
|
+
const tool = stringValue(item.tool) ?? "collab";
|
|
461
|
+
const prompt = oneLine(item.prompt);
|
|
462
|
+
const targets = Array.isArray(item.receiverThreadIds)
|
|
463
|
+
? item.receiverThreadIds.filter((t): t is string => typeof t === "string").length
|
|
464
|
+
: 0;
|
|
465
|
+
const detail = prompt || (targets ? `${targets} agent${targets === 1 ? "" : "s"}` : tool);
|
|
466
|
+
return { label: `Collab/${tool}`, body: clip(detail) };
|
|
467
|
+
}
|
|
456
468
|
return null;
|
|
457
469
|
}
|
|
458
470
|
|
package/src/control-server.ts
CHANGED
|
@@ -28,6 +28,11 @@ interface ControlServerOptions {
|
|
|
28
28
|
// directly into the session (web terminal / TUI) so the runner can mirror it
|
|
29
29
|
// into the dashboard chat and start tailing the turn transcript for reasoning.
|
|
30
30
|
onUserPrompt?(input: { prompt: string; transcriptPath?: string }): Promise<void>;
|
|
31
|
+
// A provider SessionEnd hook signals the session is over so the runner can
|
|
32
|
+
// compute end-of-session Insights signals (#184 context ratio) from the full
|
|
33
|
+
// transcript. transcriptPath is optional — the runner falls back to the last
|
|
34
|
+
// path it saw during the session.
|
|
35
|
+
onSessionEnd?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
|
|
31
36
|
}
|
|
32
37
|
|
|
33
38
|
export function startControlServer(options: ControlServerOptions): ControlServer {
|
|
@@ -73,6 +78,9 @@ export function startControlServer(options: ControlServerOptions): ControlServer
|
|
|
73
78
|
if (url.pathname === "/user-prompt" && req.method === "POST") {
|
|
74
79
|
return handleUserPrompt(req, options);
|
|
75
80
|
}
|
|
81
|
+
if (url.pathname === "/session-end" && req.method === "POST") {
|
|
82
|
+
return handleSessionEnd(req, options);
|
|
83
|
+
}
|
|
76
84
|
if (url.pathname === "/monitor") {
|
|
77
85
|
const upgraded = srv.upgrade(req, { data: { kind: "monitor" } });
|
|
78
86
|
return upgraded ? undefined : new Response("WebSocket upgrade failed", { status: 400 });
|
|
@@ -343,6 +351,16 @@ async function handleUserPrompt(req: Request, options: ControlServerOptions): Pr
|
|
|
343
351
|
return Response.json({ ok: true });
|
|
344
352
|
}
|
|
345
353
|
|
|
354
|
+
async function handleSessionEnd(req: Request, options: ControlServerOptions): Promise<Response> {
|
|
355
|
+
if (!options.onSessionEnd) return Response.json({ ok: false, reason: "session-end capture unavailable" });
|
|
356
|
+
const body = await req.json().catch(() => null);
|
|
357
|
+
const reason = isRecord(body) && typeof body.reason === "string" ? body.reason : undefined;
|
|
358
|
+
const transcriptPath = isRecord(body) && typeof body.transcriptPath === "string" ? body.transcriptPath : undefined;
|
|
359
|
+
// Fire-and-forget: the SessionEnd hook must not block Claude shutting down.
|
|
360
|
+
void Promise.resolve(options.onSessionEnd({ reason, transcriptPath })).catch(() => {});
|
|
361
|
+
return Response.json({ ok: true });
|
|
362
|
+
}
|
|
363
|
+
|
|
346
364
|
async function handleStatus(req: Request, options: ControlServerOptions): Promise<Response> {
|
|
347
365
|
const body = await req.json().catch(() => null) as Partial<ProviderStatusEvent> | null;
|
|
348
366
|
const status = body?.status;
|
package/src/runner.ts
CHANGED
|
@@ -9,7 +9,7 @@ import type { ManagedProcess, ProviderAdapter, ProviderConfig, ProviderPermissio
|
|
|
9
9
|
import { messagesWithCachedAttachments } from "./attachment-cache";
|
|
10
10
|
import { ClaimTracker } from "./claim-tracker";
|
|
11
11
|
import { startControlServer, type ControlServer } from "./control-server";
|
|
12
|
-
import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete } from "./adapters/claude-transcript";
|
|
12
|
+
import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete, analyzeSession } from "./adapters/claude-transcript";
|
|
13
13
|
import { agentProfileProjectionReport } from "./profile-projection";
|
|
14
14
|
import { profileUsesHostProviderGlobals } from "./profile-home";
|
|
15
15
|
import { runtimeMetadata } from "./version";
|
|
@@ -63,6 +63,9 @@ const CLAIM_RENEW_INTERVAL_MS = 5 * 60 * 1000;
|
|
|
63
63
|
const HTTP_LIVENESS_INTERVAL_MS = 20_000;
|
|
64
64
|
const HTTP_LIVENESS_LOG_INTERVAL_MS = 5 * 60 * 1000;
|
|
65
65
|
const TOKEN_RENEW_RETRY_MS = 60_000;
|
|
66
|
+
// Debounce reactive token recovery so a burst of 401-ing calls in the same window
|
|
67
|
+
// triggers a single re-mint attempt, not one per failing request.
|
|
68
|
+
const REACTIVE_TOKEN_RECOVERY_DEBOUNCE_MS = 10_000;
|
|
66
69
|
const UNEXPECTED_EXIT_WINDOW_MS = 2 * 60 * 1000;
|
|
67
70
|
const RAPID_EXIT_MS = 30 * 1000;
|
|
68
71
|
const MAX_RAPID_UNEXPECTED_EXITS = 3;
|
|
@@ -128,8 +131,12 @@ export class AgentRunner {
|
|
|
128
131
|
private tokenRenewTimer?: Timer;
|
|
129
132
|
private tokenRenewInFlight = false;
|
|
130
133
|
private tokenRenewLastLog?: { key: string; at: number };
|
|
134
|
+
private reactiveTokenRecoveryAt?: number;
|
|
131
135
|
private processStartedAt = 0;
|
|
132
136
|
private providerSessionId = crypto.randomUUID();
|
|
137
|
+
// Last transcript path seen this session — used by end-of-session Insights (#184)
|
|
138
|
+
// when the SessionEnd hook payload omits it.
|
|
139
|
+
private lastTranscriptPath?: string;
|
|
133
140
|
private lifecycleAction?: "shutting-down" | "killing" | "restarting";
|
|
134
141
|
private readonly unexpectedExitTimes: number[] = [];
|
|
135
142
|
private readonly pendingMessages = new Map<number, Message>();
|
|
@@ -139,10 +146,12 @@ export class AgentRunner {
|
|
|
139
146
|
// Session-mirror: a synthesized id grouping a turn's reasoning/tool steps and
|
|
140
147
|
// its final response. Set when a provider-turn starts, cleared when it ends.
|
|
141
148
|
private currentTurnId?: string;
|
|
142
|
-
// Prompt-echo dedup:
|
|
143
|
-
// initial prompt)
|
|
144
|
-
//
|
|
145
|
-
|
|
149
|
+
// Prompt-echo dedup: a short, time-bounded queue of prompts the runner itself
|
|
150
|
+
// injected (chat box or initial prompt) that are still awaiting their matching
|
|
151
|
+
// UserPromptSubmit echo. A single slot dropped earlier entries when several prompts
|
|
152
|
+
// were injected before their echoes returned (rapid sends while the provider is busy
|
|
153
|
+
// and queues them) — the evicted ones then double-posted. Match consumes one entry.
|
|
154
|
+
private injectedPrompts: Array<{ text: string; at: number }> = [];
|
|
146
155
|
// Busy reconciler: consecutive idle probes observed while claims still say busy.
|
|
147
156
|
private busyReconcileIdleStreak = 0;
|
|
148
157
|
private busyReconcileTimer?: ReturnType<typeof setInterval>;
|
|
@@ -239,6 +248,7 @@ export class AgentRunner {
|
|
|
239
248
|
onReplyObligations: () => this.http.listReplyObligations(this.agentId),
|
|
240
249
|
onSessionTurn: (input) => this.publishSessionTurn(input),
|
|
241
250
|
onUserPrompt: (input) => this.handleUserPrompt(input),
|
|
251
|
+
onSessionEnd: (input) => this.handleSessionEnd(input),
|
|
242
252
|
});
|
|
243
253
|
this.writeRunnerInfoFile();
|
|
244
254
|
this.options.adapter.onStatusChange((status) => {
|
|
@@ -303,6 +313,7 @@ export class AgentRunner {
|
|
|
303
313
|
|
|
304
314
|
private async spawnProvider(): Promise<ManagedProcess> {
|
|
305
315
|
this.providerSessionId = crypto.randomUUID();
|
|
316
|
+
this.lastTranscriptPath = undefined;
|
|
306
317
|
const includeProviderGlobals = profileUsesHostProviderGlobals(this.options);
|
|
307
318
|
const env = {
|
|
308
319
|
...process.env as Record<string, string>,
|
|
@@ -600,7 +611,7 @@ export class AgentRunner {
|
|
|
600
611
|
if (messageId) this.pendingPromptMessageId = messageId;
|
|
601
612
|
// Mark so the matching UserPromptSubmit echo isn't double-posted: a chat-box
|
|
602
613
|
// prompt already created its own session message shown in the dashboard.
|
|
603
|
-
this.
|
|
614
|
+
this.recordInjectedPrompt(body.trim());
|
|
604
615
|
await this.options.adapter.deliverInitialPrompt(this.process, body);
|
|
605
616
|
return { injected: true, messageId };
|
|
606
617
|
}
|
|
@@ -891,6 +902,7 @@ export class AgentRunner {
|
|
|
891
902
|
// no relay message) are mirrored too. A reply obligation, when present, is still
|
|
892
903
|
// used as replyTo so the Stop hook stops nagging the agent to /reply.
|
|
893
904
|
private async publishSessionTurn(input: { transcriptPath: string; lastAssistantMessage?: unknown }): Promise<void> {
|
|
905
|
+
if (input.transcriptPath) this.lastTranscriptPath = input.transcriptPath;
|
|
894
906
|
const turnId = this.currentTurnId;
|
|
895
907
|
this.stopReasoningTail();
|
|
896
908
|
// Optional correlation for threading + obligation clearing — never a capture gate.
|
|
@@ -972,6 +984,7 @@ export class AgentRunner {
|
|
|
972
984
|
});
|
|
973
985
|
} catch (error) {
|
|
974
986
|
this.logRunnerDiagnostic(`session ${input.session.type} capture failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
987
|
+
if (isHttpAuthError(error)) this.recoverRuntimeTokenAfterAuthFailure("session-capture");
|
|
975
988
|
}
|
|
976
989
|
}
|
|
977
990
|
|
|
@@ -980,6 +993,7 @@ export class AgentRunner {
|
|
|
980
993
|
// tailing for the turn. Skips prompts the runner itself injected (chat box, relay
|
|
981
994
|
// deliveries) so those aren't double-posted.
|
|
982
995
|
private async handleUserPrompt(input: { prompt: string; transcriptPath?: string }): Promise<void> {
|
|
996
|
+
if (input.transcriptPath) this.lastTranscriptPath = input.transcriptPath;
|
|
983
997
|
if (!this.currentTurnId) this.currentTurnId = crypto.randomUUID();
|
|
984
998
|
const text = input.prompt.trim();
|
|
985
999
|
if (text && !this.isRunnerInjectedPrompt(text)) {
|
|
@@ -996,6 +1010,42 @@ export class AgentRunner {
|
|
|
996
1010
|
if (input.transcriptPath) this.startReasoningTail(input.transcriptPath);
|
|
997
1011
|
}
|
|
998
1012
|
|
|
1013
|
+
// SessionEnd: compute end-of-session Insights signals (#184 context-gathering
|
|
1014
|
+
// ratio) from the full transcript and record them with the relay. Mechanical and
|
|
1015
|
+
// model-free — costs zero agent tokens and the agent can't game it. The relay drops
|
|
1016
|
+
// the observation if Insights or this signal is toggled off. Best-effort: never
|
|
1017
|
+
// blocks or fails provider shutdown.
|
|
1018
|
+
private async handleSessionEnd(input: { reason?: string; transcriptPath?: string }): Promise<void> {
|
|
1019
|
+
// Only Claude transcripts have this shape; Codex sessions are skipped for now.
|
|
1020
|
+
if (this.options.provider !== "claude") return;
|
|
1021
|
+
const transcriptPath = input.transcriptPath || this.lastTranscriptPath;
|
|
1022
|
+
if (!transcriptPath) return;
|
|
1023
|
+
let jsonl: string;
|
|
1024
|
+
try {
|
|
1025
|
+
jsonl = await readFile(transcriptPath, "utf8");
|
|
1026
|
+
} catch {
|
|
1027
|
+
return;
|
|
1028
|
+
}
|
|
1029
|
+
const analysis = analyzeSession(jsonl);
|
|
1030
|
+
if (!analysis) return; // no tool calls = nothing substantive to measure
|
|
1031
|
+
try {
|
|
1032
|
+
await this.http.recordInsightObservation({
|
|
1033
|
+
sessionId: this.providerSessionId,
|
|
1034
|
+
project: this.options.cwd,
|
|
1035
|
+
agentId: this.agentId,
|
|
1036
|
+
signal: "context_ratio",
|
|
1037
|
+
value: { ...analysis.metric, ...(input.reason ? { endReason: input.reason } : {}) },
|
|
1038
|
+
outcome: { ...analysis.outcome },
|
|
1039
|
+
source: "server",
|
|
1040
|
+
});
|
|
1041
|
+
this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering)`);
|
|
1042
|
+
} catch (error) {
|
|
1043
|
+
// 409 = Insights/feature toggled off; anything else is best-effort too.
|
|
1044
|
+
this.sessionDebug(`insights context_ratio skipped: ${error instanceof Error ? error.message : String(error)}`);
|
|
1045
|
+
if (isHttpAuthError(error)) this.recoverRuntimeTokenAfterAuthFailure("insights");
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
|
|
999
1049
|
// Route a provider-emitted session event (Codex app-server) into the chat mirror.
|
|
1000
1050
|
// Mirrors the same semantics as the Claude lane: prompts are echoed with dedup,
|
|
1001
1051
|
// and a response is only auto-captured when the agent won't separately reply to a
|
|
@@ -1048,11 +1098,23 @@ export class AgentRunner {
|
|
|
1048
1098
|
});
|
|
1049
1099
|
}
|
|
1050
1100
|
|
|
1101
|
+
// Remember an injected prompt so its UserPromptSubmit echo can be suppressed. Prunes
|
|
1102
|
+
// expired entries first; a defensive length cap guards against echoes that never
|
|
1103
|
+
// arrive (e.g. the provider drops a queued prompt) so the queue can't grow unbounded.
|
|
1104
|
+
private recordInjectedPrompt(text: string): void {
|
|
1105
|
+
const now = Date.now();
|
|
1106
|
+
this.injectedPrompts = this.injectedPrompts.filter((p) => now - p.at < PROMPT_ECHO_DEDUP_MS);
|
|
1107
|
+
this.injectedPrompts.push({ text, at: now });
|
|
1108
|
+
if (this.injectedPrompts.length > 50) this.injectedPrompts.shift();
|
|
1109
|
+
}
|
|
1110
|
+
|
|
1051
1111
|
private isRunnerInjectedPrompt(text: string): boolean {
|
|
1052
1112
|
if (RELAY_INJECTION_MARKERS.some((marker) => text.startsWith(marker))) return true;
|
|
1053
|
-
const
|
|
1054
|
-
|
|
1055
|
-
|
|
1113
|
+
const now = Date.now();
|
|
1114
|
+
this.injectedPrompts = this.injectedPrompts.filter((p) => now - p.at < PROMPT_ECHO_DEDUP_MS);
|
|
1115
|
+
const idx = this.injectedPrompts.findIndex((p) => p.text === text);
|
|
1116
|
+
if (idx !== -1) {
|
|
1117
|
+
this.injectedPrompts.splice(idx, 1); // consume one — identical repeats each match once
|
|
1056
1118
|
return true;
|
|
1057
1119
|
}
|
|
1058
1120
|
return false;
|
|
@@ -1259,6 +1321,25 @@ export class AgentRunner {
|
|
|
1259
1321
|
this.httpLivenessAuthFailed = true;
|
|
1260
1322
|
if (this.httpLivenessTimer) clearInterval(this.httpLivenessTimer);
|
|
1261
1323
|
this.httpLivenessTimer = undefined;
|
|
1324
|
+
// A 401/403 here is the only timely signal that the token died — stopping the
|
|
1325
|
+
// liveness timer means there is no second chance, so recover from THIS failure.
|
|
1326
|
+
this.recoverRuntimeTokenAfterAuthFailure("http-liveness");
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1329
|
+
// A definitive relay auth failure (401/403) means the runtime token is dead right
|
|
1330
|
+
// now — expired, or (the common case) revoked when the relay marked this agent
|
|
1331
|
+
// stale across its own restart/reconnect. The proactive renew timer is keyed to
|
|
1332
|
+
// TTL and structurally cannot catch a revocation, so the auth failure itself must
|
|
1333
|
+
// drive recovery. renewRuntimeToken() prefers an orchestrator re-mint, which heals
|
|
1334
|
+
// even a revoked token. Debounced so a burst of failing calls re-mints once.
|
|
1335
|
+
private recoverRuntimeTokenAfterAuthFailure(source: string): void {
|
|
1336
|
+
if (this.stopped || this.tokenRenewInFlight) return;
|
|
1337
|
+
if (!this.isRuntimeTokenRenewable() && !this.canRemintViaOrchestrator()) return;
|
|
1338
|
+
const now = Date.now();
|
|
1339
|
+
if (this.reactiveTokenRecoveryAt && now - this.reactiveTokenRecoveryAt < REACTIVE_TOKEN_RECOVERY_DEBOUNCE_MS) return;
|
|
1340
|
+
this.reactiveTokenRecoveryAt = now;
|
|
1341
|
+
this.logRunnerDiagnostic(`[runner] relay auth failure on ${source}; recovering runtime token`);
|
|
1342
|
+
void this.renewRuntimeToken();
|
|
1262
1343
|
}
|
|
1263
1344
|
|
|
1264
1345
|
private logHttpLivenessFailure(error: unknown, authFailed: boolean): void {
|
|
@@ -1432,6 +1513,11 @@ export class AgentRunner {
|
|
|
1432
1513
|
this.http.setToken(token);
|
|
1433
1514
|
this.bus.setToken(token);
|
|
1434
1515
|
this.httpLivenessAuthFailed = false;
|
|
1516
|
+
this.reactiveTokenRecoveryAt = undefined;
|
|
1517
|
+
// An earlier auth failure may have stopped the liveness loop; restart it so the
|
|
1518
|
+
// agent reports live again on the fresh token. startHttpLiveness clears any
|
|
1519
|
+
// existing timer first, so this is safe on the normal (proactive) renew path too.
|
|
1520
|
+
this.startHttpLiveness();
|
|
1435
1521
|
this.pendingTimelineEvent = { status, id: record.jti, timestamp: Date.now() };
|
|
1436
1522
|
this.bus.reconnectTransport(status === "runtime-token-reminted" ? "runtime token re-minted" : "runtime token renewed");
|
|
1437
1523
|
this.publishStatus();
|