agent-relay-runner 0.12.4 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
- "version": "0.12.4",
3
+ "version": "0.14.0",
4
4
  "description": "Unified provider lifecycle runner for Agent Relay",
5
5
  "type": "module",
6
6
  "bin": {
@@ -20,7 +20,7 @@
20
20
  "directory": "runner"
21
21
  },
22
22
  "dependencies": {
23
- "agent-relay-sdk": "0.2.6"
23
+ "agent-relay-sdk": "0.2.7"
24
24
  },
25
25
  "devDependencies": {
26
26
  "@types/bun": "latest",
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
3
  "description": "Thin Agent Relay runner bridge for Claude Code",
4
- "version": "0.12.4",
4
+ "version": "0.14.0",
5
5
  "agentRelayContracts": {
6
6
  "providerPluginProtocol": 1
7
7
  }
@@ -72,6 +72,26 @@ relay_post_user_prompt() {
72
72
  -d "$body" >/dev/null 2>&1 || true
73
73
  }
74
74
 
75
+ relay_post_session_end() {
76
+ # Insights #184: tell the runner the session ended so it can compute the
77
+ # end-of-session context-gathering ratio from the full transcript. Fire-and-forget;
78
+ # the transcript path is optional (the runner falls back to the last path it saw).
79
+ local transcript_path="${1:-}"
80
+ local reason="${2:-}"
81
+ local port="${AGENT_RELAY_RUNNER_PORT:-}"
82
+ [ -z "$port" ] && return 0
83
+ local body="{"
84
+ [ -n "$transcript_path" ] && body="${body}\"transcriptPath\":\"$(relay_json_escape "$transcript_path")\""
85
+ if [ -n "$reason" ]; then
86
+ [ "$body" != "{" ] && body="${body},"
87
+ body="${body}\"reason\":\"$(relay_json_escape "$reason")\""
88
+ fi
89
+ body="${body}}"
90
+ curl -fsS --max-time 3 -X POST "http://127.0.0.1:${port}/session-end" \
91
+ -H 'Content-Type: application/json' \
92
+ -d "$body" >/dev/null 2>&1 || true
93
+ }
94
+
75
95
  relay_pending_reply_stop_decision() {
76
96
  local port="${AGENT_RELAY_RUNNER_PORT:-}"
77
97
  [ -z "$port" ] && return 0
@@ -4,6 +4,7 @@ source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/
4
4
 
5
5
  payload="$(cat || true)"
6
6
  reason="$(relay_json_string_field reason "$payload")"
7
+ transcript_path="$(relay_json_string_field transcript_path "$payload")"
7
8
 
8
9
  case "$reason" in
9
10
  clear)
@@ -14,5 +15,8 @@ case "$reason" in
14
15
  ;;
15
16
  logout|prompt_input_exit|bypass_permissions_disabled|other|*)
16
17
  relay_post_status_clearing_subagents offline
18
+ # Real session termination: capture end-of-session Insights (#184). Order after the
19
+ # status post is arbitrary — the runner reads the transcript file regardless.
20
+ relay_post_session_end "$transcript_path" "$reason"
17
21
  ;;
18
22
  esac
@@ -16,6 +16,7 @@ interface TranscriptBlock {
16
16
  thinking?: string;
17
17
  name?: string;
18
18
  input?: Record<string, unknown>;
19
+ is_error?: boolean;
19
20
  }
20
21
 
21
22
  export interface TurnStep {
@@ -186,6 +187,136 @@ export function summarizeToolUse(name: string, input: Record<string, unknown> |
186
187
  return summary.length > 200 ? `${summary.slice(0, 197)}…` : summary;
187
188
  }
188
189
 
190
+ // --- Insights #184: context-gathering ratio (epic #183, docs/self-improvement.md) ---
191
+ //
192
+ // Computed mechanically from the whole-session transcript at session end — no model
193
+ // involvement, so it costs zero agent tokens and the agent can't game it. The ratio is
194
+ // paired with cheap outcome proxies (user re-prompts, tool errors) so it's never read
195
+ // alone — see the anti-Goodhart constraint in the epic.
196
+
197
+ // Tools that acquire context without changing anything. Anything not matched here is
198
+ // treated as an action (mutation, execution, or a delegation/direction decision) —
199
+ // Bash counts as an action because it executes (a conservative, documented choice for
200
+ // v0; `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
201
+ const GATHERING_TOOLS = new Set([
202
+ "Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
203
+ ]);
204
+ const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
205
+
206
+ function isGatheringTool(name: string): boolean {
207
+ if (GATHERING_TOOLS.has(name)) return true;
208
+ // MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
209
+ return GATHERING_NAME.test(name);
210
+ }
211
+
212
+ export interface ContextRatioMetric {
213
+ /** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
214
+ ratio: number;
215
+ gatheringCalls: number;
216
+ actionCalls: number;
217
+ totalToolCalls: number;
218
+ /** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
219
+ leadingGather: number;
220
+ /** Substantive assistant turns (turns that produced text or a tool call). */
221
+ turns: number;
222
+ }
223
+
224
+ export interface SessionOutcomeProxy {
225
+ /** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
226
+ userPrompts: number;
227
+ /** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
228
+ toolErrors: number;
229
+ }
230
+
231
+ export interface SessionAnalysis {
232
+ metric: ContextRatioMetric;
233
+ outcome: SessionOutcomeProxy;
234
+ }
235
+
236
+ /**
237
+ * Walk the full transcript and compute the context-gathering ratio plus paired outcome
238
+ * proxies. Returns null when there's nothing substantive to measure (no tool calls) —
239
+ * trivial sessions have nothing to learn from and shouldn't pollute the baselines.
240
+ */
241
+ export function analyzeSession(jsonl: string): SessionAnalysis | null {
242
+ let gatheringCalls = 0;
243
+ let actionCalls = 0;
244
+ let leadingGather = 0;
245
+ let sawAction = false;
246
+ let userPrompts = 0;
247
+ let toolErrors = 0;
248
+ let turns = 0;
249
+
250
+ for (const line of jsonl.split("\n")) {
251
+ const trimmed = line.trim();
252
+ if (!trimmed) continue;
253
+ let entry: TranscriptEntry;
254
+ try {
255
+ entry = JSON.parse(trimmed) as TranscriptEntry;
256
+ } catch {
257
+ continue;
258
+ }
259
+ if (isRealUserPrompt(entry)) userPrompts++;
260
+ if (entry.type === "user") {
261
+ for (const b of blocks(entry.message)) {
262
+ if (b.type === "tool_result" && b.is_error === true) toolErrors++;
263
+ }
264
+ continue;
265
+ }
266
+ if (entry.type !== "assistant") continue;
267
+ let producedSomething = false;
268
+ for (const b of blocks(entry.message)) {
269
+ if (b.type === "text" && b.text?.trim()) producedSomething = true;
270
+ if (b.type !== "tool_use" || typeof b.name !== "string" || !b.name) continue;
271
+ producedSomething = true;
272
+ if (isGatheringTool(b.name)) {
273
+ gatheringCalls++;
274
+ if (!sawAction) leadingGather++;
275
+ } else {
276
+ actionCalls++;
277
+ sawAction = true;
278
+ }
279
+ }
280
+ if (producedSomething) turns++;
281
+ }
282
+
283
+ const totalToolCalls = gatheringCalls + actionCalls;
284
+ if (totalToolCalls === 0) return null;
285
+
286
+ return {
287
+ metric: {
288
+ ratio: gatheringCalls / totalToolCalls,
289
+ gatheringCalls,
290
+ actionCalls,
291
+ totalToolCalls,
292
+ leadingGather,
293
+ turns,
294
+ },
295
+ outcome: { userPrompts, toolErrors },
296
+ };
297
+ }
298
+
299
+ /** Count substantive assistant turns — used by the #185 introspection gate. */
300
+ export function countSubstantiveTurns(jsonl: string): number {
301
+ let turns = 0;
302
+ for (const line of jsonl.split("\n")) {
303
+ const trimmed = line.trim();
304
+ if (!trimmed) continue;
305
+ let entry: TranscriptEntry;
306
+ try {
307
+ entry = JSON.parse(trimmed) as TranscriptEntry;
308
+ } catch {
309
+ continue;
310
+ }
311
+ if (entry.type !== "assistant") continue;
312
+ const hasContent = blocks(entry.message).some(
313
+ (b) => (b.type === "text" && b.text?.trim()) || (b.type === "tool_use" && b.name),
314
+ );
315
+ if (hasContent) turns++;
316
+ }
317
+ return turns;
318
+ }
319
+
189
320
  export function extractHookAssistantMessage(content: unknown): string {
190
321
  if (typeof content === "string") return content.trim();
191
322
  if (!Array.isArray(content)) return "";
@@ -28,6 +28,11 @@ interface ControlServerOptions {
28
28
  // directly into the session (web terminal / TUI) so the runner can mirror it
29
29
  // into the dashboard chat and start tailing the turn transcript for reasoning.
30
30
  onUserPrompt?(input: { prompt: string; transcriptPath?: string }): Promise<void>;
31
+ // A provider SessionEnd hook signals the session is over so the runner can
32
+ // compute end-of-session Insights signals (#184 context ratio) from the full
33
+ // transcript. transcriptPath is optional — the runner falls back to the last
34
+ // path it saw during the session.
35
+ onSessionEnd?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
31
36
  }
32
37
 
33
38
  export function startControlServer(options: ControlServerOptions): ControlServer {
@@ -73,6 +78,9 @@ export function startControlServer(options: ControlServerOptions): ControlServer
73
78
  if (url.pathname === "/user-prompt" && req.method === "POST") {
74
79
  return handleUserPrompt(req, options);
75
80
  }
81
+ if (url.pathname === "/session-end" && req.method === "POST") {
82
+ return handleSessionEnd(req, options);
83
+ }
76
84
  if (url.pathname === "/monitor") {
77
85
  const upgraded = srv.upgrade(req, { data: { kind: "monitor" } });
78
86
  return upgraded ? undefined : new Response("WebSocket upgrade failed", { status: 400 });
@@ -343,6 +351,16 @@ async function handleUserPrompt(req: Request, options: ControlServerOptions): Pr
343
351
  return Response.json({ ok: true });
344
352
  }
345
353
 
354
+ async function handleSessionEnd(req: Request, options: ControlServerOptions): Promise<Response> {
355
+ if (!options.onSessionEnd) return Response.json({ ok: false, reason: "session-end capture unavailable" });
356
+ const body = await req.json().catch(() => null);
357
+ const reason = isRecord(body) && typeof body.reason === "string" ? body.reason : undefined;
358
+ const transcriptPath = isRecord(body) && typeof body.transcriptPath === "string" ? body.transcriptPath : undefined;
359
+ // Fire-and-forget: the SessionEnd hook must not block Claude shutting down.
360
+ void Promise.resolve(options.onSessionEnd({ reason, transcriptPath })).catch(() => {});
361
+ return Response.json({ ok: true });
362
+ }
363
+
346
364
  async function handleStatus(req: Request, options: ControlServerOptions): Promise<Response> {
347
365
  const body = await req.json().catch(() => null) as Partial<ProviderStatusEvent> | null;
348
366
  const status = body?.status;
package/src/runner.ts CHANGED
@@ -9,7 +9,7 @@ import type { ManagedProcess, ProviderAdapter, ProviderConfig, ProviderPermissio
9
9
  import { messagesWithCachedAttachments } from "./attachment-cache";
10
10
  import { ClaimTracker } from "./claim-tracker";
11
11
  import { startControlServer, type ControlServer } from "./control-server";
12
- import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete } from "./adapters/claude-transcript";
12
+ import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete, analyzeSession } from "./adapters/claude-transcript";
13
13
  import { agentProfileProjectionReport } from "./profile-projection";
14
14
  import { profileUsesHostProviderGlobals } from "./profile-home";
15
15
  import { runtimeMetadata } from "./version";
@@ -134,6 +134,9 @@ export class AgentRunner {
134
134
  private reactiveTokenRecoveryAt?: number;
135
135
  private processStartedAt = 0;
136
136
  private providerSessionId = crypto.randomUUID();
137
+ // Last transcript path seen this session — used by end-of-session Insights (#184)
138
+ // when the SessionEnd hook payload omits it.
139
+ private lastTranscriptPath?: string;
137
140
  private lifecycleAction?: "shutting-down" | "killing" | "restarting";
138
141
  private readonly unexpectedExitTimes: number[] = [];
139
142
  private readonly pendingMessages = new Map<number, Message>();
@@ -143,10 +146,12 @@ export class AgentRunner {
143
146
  // Session-mirror: a synthesized id grouping a turn's reasoning/tool steps and
144
147
  // its final response. Set when a provider-turn starts, cleared when it ends.
145
148
  private currentTurnId?: string;
146
- // Prompt-echo dedup: the last prompt the runner itself injected (chat box or
147
- // initial prompt). A UserPromptSubmit hook echo matching this within the window
148
- // is the same prompt arriving back from the provider and must not double-post.
149
- private lastInjectedPrompt?: { text: string; at: number };
149
+ // Prompt-echo dedup: a short, time-bounded queue of prompts the runner itself
150
+ // injected (chat box or initial prompt) that are still awaiting their matching
151
+ // UserPromptSubmit echo. A single slot dropped earlier entries when several prompts
152
+ // were injected before their echoes returned (rapid sends while the provider is busy
153
+ // and queues them) — the evicted ones then double-posted. Match consumes one entry.
154
+ private injectedPrompts: Array<{ text: string; at: number }> = [];
150
155
  // Busy reconciler: consecutive idle probes observed while claims still say busy.
151
156
  private busyReconcileIdleStreak = 0;
152
157
  private busyReconcileTimer?: ReturnType<typeof setInterval>;
@@ -243,6 +248,7 @@ export class AgentRunner {
243
248
  onReplyObligations: () => this.http.listReplyObligations(this.agentId),
244
249
  onSessionTurn: (input) => this.publishSessionTurn(input),
245
250
  onUserPrompt: (input) => this.handleUserPrompt(input),
251
+ onSessionEnd: (input) => this.handleSessionEnd(input),
246
252
  });
247
253
  this.writeRunnerInfoFile();
248
254
  this.options.adapter.onStatusChange((status) => {
@@ -307,6 +313,7 @@ export class AgentRunner {
307
313
 
308
314
  private async spawnProvider(): Promise<ManagedProcess> {
309
315
  this.providerSessionId = crypto.randomUUID();
316
+ this.lastTranscriptPath = undefined;
310
317
  const includeProviderGlobals = profileUsesHostProviderGlobals(this.options);
311
318
  const env = {
312
319
  ...process.env as Record<string, string>,
@@ -604,7 +611,7 @@ export class AgentRunner {
604
611
  if (messageId) this.pendingPromptMessageId = messageId;
605
612
  // Mark so the matching UserPromptSubmit echo isn't double-posted: a chat-box
606
613
  // prompt already created its own session message shown in the dashboard.
607
- this.lastInjectedPrompt = { text: body.trim(), at: Date.now() };
614
+ this.recordInjectedPrompt(body.trim());
608
615
  await this.options.adapter.deliverInitialPrompt(this.process, body);
609
616
  return { injected: true, messageId };
610
617
  }
@@ -895,6 +902,7 @@ export class AgentRunner {
895
902
  // no relay message) are mirrored too. A reply obligation, when present, is still
896
903
  // used as replyTo so the Stop hook stops nagging the agent to /reply.
897
904
  private async publishSessionTurn(input: { transcriptPath: string; lastAssistantMessage?: unknown }): Promise<void> {
905
+ if (input.transcriptPath) this.lastTranscriptPath = input.transcriptPath;
898
906
  const turnId = this.currentTurnId;
899
907
  this.stopReasoningTail();
900
908
  // Optional correlation for threading + obligation clearing — never a capture gate.
@@ -985,6 +993,7 @@ export class AgentRunner {
985
993
  // tailing for the turn. Skips prompts the runner itself injected (chat box, relay
986
994
  // deliveries) so those aren't double-posted.
987
995
  private async handleUserPrompt(input: { prompt: string; transcriptPath?: string }): Promise<void> {
996
+ if (input.transcriptPath) this.lastTranscriptPath = input.transcriptPath;
988
997
  if (!this.currentTurnId) this.currentTurnId = crypto.randomUUID();
989
998
  const text = input.prompt.trim();
990
999
  if (text && !this.isRunnerInjectedPrompt(text)) {
@@ -1001,6 +1010,42 @@ export class AgentRunner {
1001
1010
  if (input.transcriptPath) this.startReasoningTail(input.transcriptPath);
1002
1011
  }
1003
1012
 
1013
+ // SessionEnd: compute end-of-session Insights signals (#184 context-gathering
1014
+ // ratio) from the full transcript and record them with the relay. Mechanical and
1015
+ // model-free — costs zero agent tokens and the agent can't game it. The relay drops
1016
+ // the observation if Insights or this signal is toggled off. Best-effort: never
1017
+ // blocks or fails provider shutdown.
1018
+ private async handleSessionEnd(input: { reason?: string; transcriptPath?: string }): Promise<void> {
1019
+ // Only Claude transcripts have this shape; Codex sessions are skipped for now.
1020
+ if (this.options.provider !== "claude") return;
1021
+ const transcriptPath = input.transcriptPath || this.lastTranscriptPath;
1022
+ if (!transcriptPath) return;
1023
+ let jsonl: string;
1024
+ try {
1025
+ jsonl = await readFile(transcriptPath, "utf8");
1026
+ } catch {
1027
+ return;
1028
+ }
1029
+ const analysis = analyzeSession(jsonl);
1030
+ if (!analysis) return; // no tool calls = nothing substantive to measure
1031
+ try {
1032
+ await this.http.recordInsightObservation({
1033
+ sessionId: this.providerSessionId,
1034
+ project: this.options.cwd,
1035
+ agentId: this.agentId,
1036
+ signal: "context_ratio",
1037
+ value: { ...analysis.metric, ...(input.reason ? { endReason: input.reason } : {}) },
1038
+ outcome: { ...analysis.outcome },
1039
+ source: "server",
1040
+ });
1041
+ this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering)`);
1042
+ } catch (error) {
1043
+ // 409 = Insights/feature toggled off; anything else is best-effort too.
1044
+ this.sessionDebug(`insights context_ratio skipped: ${error instanceof Error ? error.message : String(error)}`);
1045
+ if (isHttpAuthError(error)) this.recoverRuntimeTokenAfterAuthFailure("insights");
1046
+ }
1047
+ }
1048
+
1004
1049
  // Route a provider-emitted session event (Codex app-server) into the chat mirror.
1005
1050
  // Mirrors the same semantics as the Claude lane: prompts are echoed with dedup,
1006
1051
  // and a response is only auto-captured when the agent won't separately reply to a
@@ -1053,11 +1098,23 @@ export class AgentRunner {
1053
1098
  });
1054
1099
  }
1055
1100
 
1101
+ // Remember an injected prompt so its UserPromptSubmit echo can be suppressed. Prunes
1102
+ // expired entries first; a defensive length cap guards against echoes that never
1103
+ // arrive (e.g. the provider drops a queued prompt) so the queue can't grow unbounded.
1104
+ private recordInjectedPrompt(text: string): void {
1105
+ const now = Date.now();
1106
+ this.injectedPrompts = this.injectedPrompts.filter((p) => now - p.at < PROMPT_ECHO_DEDUP_MS);
1107
+ this.injectedPrompts.push({ text, at: now });
1108
+ if (this.injectedPrompts.length > 50) this.injectedPrompts.shift();
1109
+ }
1110
+
1056
1111
  private isRunnerInjectedPrompt(text: string): boolean {
1057
1112
  if (RELAY_INJECTION_MARKERS.some((marker) => text.startsWith(marker))) return true;
1058
- const recent = this.lastInjectedPrompt;
1059
- if (recent && recent.text === text && Date.now() - recent.at < PROMPT_ECHO_DEDUP_MS) {
1060
- this.lastInjectedPrompt = undefined;
1113
+ const now = Date.now();
1114
+ this.injectedPrompts = this.injectedPrompts.filter((p) => now - p.at < PROMPT_ECHO_DEDUP_MS);
1115
+ const idx = this.injectedPrompts.findIndex((p) => p.text === text);
1116
+ if (idx !== -1) {
1117
+ this.injectedPrompts.splice(idx, 1); // consume one — identical repeats each match once
1061
1118
  return true;
1062
1119
  }
1063
1120
  return false;