npm - agent-relay-runner - Versions diffs - 0.12.4 → 0.14.0 - Mend

agent-relay-runner 0.12.4 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +2 -2
package/plugins/claude/.claude-plugin/plugin.json +1 -1
package/plugins/claude/hooks/relay-status.sh +20 -0
package/plugins/claude/hooks/session-end.sh +4 -0
package/src/adapters/claude-transcript.ts +131 -0
package/src/control-server.ts +18 -0
package/src/runner.ts +66 -9

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-relay-runner",
-  "version": "0.12.4",
+  "version": "0.14.0",
   "description": "Unified provider lifecycle runner for Agent Relay",
   "type": "module",
   "bin": {
@@ -20,7 +20,7 @@
     "directory": "runner"
   },
   "dependencies": {
-    "agent-relay-sdk": "0.2.6"
+    "agent-relay-sdk": "0.2.7"
   },
   "devDependencies": {
     "@types/bun": "latest",

package/plugins/claude/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "agent-relay-runner",
   "description": "Thin Agent Relay runner bridge for Claude Code",
-  "version": "0.12.4",
+  "version": "0.14.0",
   "agentRelayContracts": {
     "providerPluginProtocol": 1
   }

package/plugins/claude/hooks/relay-status.sh CHANGED Viewed

@@ -72,6 +72,26 @@ relay_post_user_prompt() {
     -d "$body" >/dev/null 2>&1 || true
 }
+relay_post_session_end() {
+  # Insights #184: tell the runner the session ended so it can compute the
+  # end-of-session context-gathering ratio from the full transcript. Fire-and-forget;
+  # the transcript path is optional (the runner falls back to the last path it saw).
+  local transcript_path="${1:-}"
+  local reason="${2:-}"
+  local port="${AGENT_RELAY_RUNNER_PORT:-}"
+  [ -z "$port" ] && return 0
+  local body="{"
+  [ -n "$transcript_path" ] && body="${body}\"transcriptPath\":\"$(relay_json_escape "$transcript_path")\""
+  if [ -n "$reason" ]; then
+    [ "$body" != "{" ] && body="${body},"
+    body="${body}\"reason\":\"$(relay_json_escape "$reason")\""
+  fi
+  body="${body}}"
+  curl -fsS --max-time 3 -X POST "http://127.0.0.1:${port}/session-end" \
+    -H 'Content-Type: application/json' \
+    -d "$body" >/dev/null 2>&1 || true
+}
 relay_pending_reply_stop_decision() {
   local port="${AGENT_RELAY_RUNNER_PORT:-}"
   [ -z "$port" ] && return 0

package/plugins/claude/hooks/session-end.sh CHANGED Viewed

@@ -4,6 +4,7 @@ source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/
 payload="$(cat || true)"
 reason="$(relay_json_string_field reason "$payload")"
+transcript_path="$(relay_json_string_field transcript_path "$payload")"
 case "$reason" in
   clear)
@@ -14,5 +15,8 @@ case "$reason" in
     ;;
   logout|prompt_input_exit|bypass_permissions_disabled|other|*)
     relay_post_status_clearing_subagents offline
+    # Real session termination: capture end-of-session Insights (#184). Order after the
+    # status post is arbitrary — the runner reads the transcript file regardless.
+    relay_post_session_end "$transcript_path" "$reason"
     ;;
 esac

package/src/adapters/claude-transcript.ts CHANGED Viewed

@@ -16,6 +16,7 @@ interface TranscriptBlock {
   thinking?: string;
   name?: string;
   input?: Record<string, unknown>;
+  is_error?: boolean;
 }
 export interface TurnStep {
@@ -186,6 +187,136 @@ export function summarizeToolUse(name: string, input: Record<string, unknown> |
   return summary.length > 200 ? `${summary.slice(0, 197)}…` : summary;
 }
+// --- Insights #184: context-gathering ratio (epic #183, docs/self-improvement.md) ---
+//
+// Computed mechanically from the whole-session transcript at session end — no model
+// involvement, so it costs zero agent tokens and the agent can't game it. The ratio is
+// paired with cheap outcome proxies (user re-prompts, tool errors) so it's never read
+// alone — see the anti-Goodhart constraint in the epic.
+// Tools that acquire context without changing anything. Anything not matched here is
+// treated as an action (mutation, execution, or a delegation/direction decision) —
+// Bash counts as an action because it executes (a conservative, documented choice for
+// v0; `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
+const GATHERING_TOOLS = new Set([
+  "Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
+]);
+const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
+function isGatheringTool(name: string): boolean {
+  if (GATHERING_TOOLS.has(name)) return true;
+  // MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
+  return GATHERING_NAME.test(name);
+}
+export interface ContextRatioMetric {
+  /** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
+  ratio: number;
+  gatheringCalls: number;
+  actionCalls: number;
+  totalToolCalls: number;
+  /** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
+  leadingGather: number;
+  /** Substantive assistant turns (turns that produced text or a tool call). */
+  turns: number;
+}
+export interface SessionOutcomeProxy {
+  /** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
+  userPrompts: number;
+  /** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
+  toolErrors: number;
+}
+export interface SessionAnalysis {
+  metric: ContextRatioMetric;
+  outcome: SessionOutcomeProxy;
+}
+/**
+ * Walk the full transcript and compute the context-gathering ratio plus paired outcome
+ * proxies. Returns null when there's nothing substantive to measure (no tool calls) —
+ * trivial sessions have nothing to learn from and shouldn't pollute the baselines.
+ */
+export function analyzeSession(jsonl: string): SessionAnalysis | null {
+  let gatheringCalls = 0;
+  let actionCalls = 0;
+  let leadingGather = 0;
+  let sawAction = false;
+  let userPrompts = 0;
+  let toolErrors = 0;
+  let turns = 0;
+  for (const line of jsonl.split("\n")) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    let entry: TranscriptEntry;
+    try {
+      entry = JSON.parse(trimmed) as TranscriptEntry;
+    } catch {
+      continue;
+    }
+    if (isRealUserPrompt(entry)) userPrompts++;
+    if (entry.type === "user") {
+      for (const b of blocks(entry.message)) {
+        if (b.type === "tool_result" && b.is_error === true) toolErrors++;
+      }
+      continue;
+    }
+    if (entry.type !== "assistant") continue;
+    let producedSomething = false;
+    for (const b of blocks(entry.message)) {
+      if (b.type === "text" && b.text?.trim()) producedSomething = true;
+      if (b.type !== "tool_use" || typeof b.name !== "string" || !b.name) continue;
+      producedSomething = true;
+      if (isGatheringTool(b.name)) {
+        gatheringCalls++;
+        if (!sawAction) leadingGather++;
+      } else {
+        actionCalls++;
+        sawAction = true;
+      }
+    }
+    if (producedSomething) turns++;
+  }
+  const totalToolCalls = gatheringCalls + actionCalls;
+  if (totalToolCalls === 0) return null;
+  return {
+    metric: {
+      ratio: gatheringCalls / totalToolCalls,
+      gatheringCalls,
+      actionCalls,
+      totalToolCalls,
+      leadingGather,
+      turns,
+    },
+    outcome: { userPrompts, toolErrors },
+  };
+}
+/** Count substantive assistant turns — used by the #185 introspection gate. */
+export function countSubstantiveTurns(jsonl: string): number {
+  let turns = 0;
+  for (const line of jsonl.split("\n")) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    let entry: TranscriptEntry;
+    try {
+      entry = JSON.parse(trimmed) as TranscriptEntry;
+    } catch {
+      continue;
+    }
+    if (entry.type !== "assistant") continue;
+    const hasContent = blocks(entry.message).some(
+      (b) => (b.type === "text" && b.text?.trim()) || (b.type === "tool_use" && b.name),
+    );
+    if (hasContent) turns++;
+  }
+  return turns;
+}
 export function extractHookAssistantMessage(content: unknown): string {
   if (typeof content === "string") return content.trim();
   if (!Array.isArray(content)) return "";

package/src/control-server.ts CHANGED Viewed

@@ -28,6 +28,11 @@ interface ControlServerOptions {
   // directly into the session (web terminal / TUI) so the runner can mirror it
   // into the dashboard chat and start tailing the turn transcript for reasoning.
   onUserPrompt?(input: { prompt: string; transcriptPath?: string }): Promise<void>;
+  // A provider SessionEnd hook signals the session is over so the runner can
+  // compute end-of-session Insights signals (#184 context ratio) from the full
+  // transcript. transcriptPath is optional — the runner falls back to the last
+  // path it saw during the session.
+  onSessionEnd?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
 }
 export function startControlServer(options: ControlServerOptions): ControlServer {
@@ -73,6 +78,9 @@ export function startControlServer(options: ControlServerOptions): ControlServer
       if (url.pathname === "/user-prompt" && req.method === "POST") {
         return handleUserPrompt(req, options);
       }
+      if (url.pathname === "/session-end" && req.method === "POST") {
+        return handleSessionEnd(req, options);
+      }
       if (url.pathname === "/monitor") {
         const upgraded = srv.upgrade(req, { data: { kind: "monitor" } });
         return upgraded ? undefined : new Response("WebSocket upgrade failed", { status: 400 });
@@ -343,6 +351,16 @@ async function handleUserPrompt(req: Request, options: ControlServerOptions): Pr
   return Response.json({ ok: true });
 }
+async function handleSessionEnd(req: Request, options: ControlServerOptions): Promise<Response> {
+  if (!options.onSessionEnd) return Response.json({ ok: false, reason: "session-end capture unavailable" });
+  const body = await req.json().catch(() => null);
+  const reason = isRecord(body) && typeof body.reason === "string" ? body.reason : undefined;
+  const transcriptPath = isRecord(body) && typeof body.transcriptPath === "string" ? body.transcriptPath : undefined;
+  // Fire-and-forget: the SessionEnd hook must not block Claude shutting down.
+  void Promise.resolve(options.onSessionEnd({ reason, transcriptPath })).catch(() => {});
+  return Response.json({ ok: true });
+}
 async function handleStatus(req: Request, options: ControlServerOptions): Promise<Response> {
   const body = await req.json().catch(() => null) as Partial<ProviderStatusEvent> | null;
   const status = body?.status;

package/src/runner.ts CHANGED Viewed

@@ -9,7 +9,7 @@ import type { ManagedProcess, ProviderAdapter, ProviderConfig, ProviderPermissio
 import { messagesWithCachedAttachments } from "./attachment-cache";
 import { ClaimTracker } from "./claim-tracker";
 import { startControlServer, type ControlServer } from "./control-server";
-import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete } from "./adapters/claude-transcript";
+import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete, analyzeSession } from "./adapters/claude-transcript";
 import { agentProfileProjectionReport } from "./profile-projection";
 import { profileUsesHostProviderGlobals } from "./profile-home";
 import { runtimeMetadata } from "./version";
@@ -134,6 +134,9 @@ export class AgentRunner {
   private reactiveTokenRecoveryAt?: number;
   private processStartedAt = 0;
   private providerSessionId = crypto.randomUUID();
+  // Last transcript path seen this session — used by end-of-session Insights (#184)
+  // when the SessionEnd hook payload omits it.
+  private lastTranscriptPath?: string;
   private lifecycleAction?: "shutting-down" | "killing" | "restarting";
   private readonly unexpectedExitTimes: number[] = [];
   private readonly pendingMessages = new Map<number, Message>();
@@ -143,10 +146,12 @@ export class AgentRunner {
   // Session-mirror: a synthesized id grouping a turn's reasoning/tool steps and
   // its final response. Set when a provider-turn starts, cleared when it ends.
   private currentTurnId?: string;
-  // Prompt-echo dedup: the last prompt the runner itself injected (chat box or
-  // initial prompt). A UserPromptSubmit hook echo matching this within the window
-  // is the same prompt arriving back from the provider and must not double-post.
-  private lastInjectedPrompt?: { text: string; at: number };
+  // Prompt-echo dedup: a short, time-bounded queue of prompts the runner itself
+  // injected (chat box or initial prompt) that are still awaiting their matching
+  // UserPromptSubmit echo. A single slot dropped earlier entries when several prompts
+  // were injected before their echoes returned (rapid sends while the provider is busy
+  // and queues them) — the evicted ones then double-posted. Match consumes one entry.
+  private injectedPrompts: Array<{ text: string; at: number }> = [];
   // Busy reconciler: consecutive idle probes observed while claims still say busy.
   private busyReconcileIdleStreak = 0;
   private busyReconcileTimer?: ReturnType<typeof setInterval>;
@@ -243,6 +248,7 @@ export class AgentRunner {
       onReplyObligations: () => this.http.listReplyObligations(this.agentId),
       onSessionTurn: (input) => this.publishSessionTurn(input),
       onUserPrompt: (input) => this.handleUserPrompt(input),
+      onSessionEnd: (input) => this.handleSessionEnd(input),
     });
     this.writeRunnerInfoFile();
     this.options.adapter.onStatusChange((status) => {
@@ -307,6 +313,7 @@ export class AgentRunner {
   private async spawnProvider(): Promise<ManagedProcess> {
     this.providerSessionId = crypto.randomUUID();
+    this.lastTranscriptPath = undefined;
     const includeProviderGlobals = profileUsesHostProviderGlobals(this.options);
     const env = {
       ...process.env as Record<string, string>,
@@ -604,7 +611,7 @@ export class AgentRunner {
     if (messageId) this.pendingPromptMessageId = messageId;
     // Mark so the matching UserPromptSubmit echo isn't double-posted: a chat-box
     // prompt already created its own session message shown in the dashboard.
-    this.lastInjectedPrompt = { text: body.trim(), at: Date.now() };
+    this.recordInjectedPrompt(body.trim());
     await this.options.adapter.deliverInitialPrompt(this.process, body);
     return { injected: true, messageId };
   }
@@ -895,6 +902,7 @@ export class AgentRunner {
   // no relay message) are mirrored too. A reply obligation, when present, is still
   // used as replyTo so the Stop hook stops nagging the agent to /reply.
   private async publishSessionTurn(input: { transcriptPath: string; lastAssistantMessage?: unknown }): Promise<void> {
+    if (input.transcriptPath) this.lastTranscriptPath = input.transcriptPath;
     const turnId = this.currentTurnId;
     this.stopReasoningTail();
     // Optional correlation for threading + obligation clearing — never a capture gate.
@@ -985,6 +993,7 @@ export class AgentRunner {
   // tailing for the turn. Skips prompts the runner itself injected (chat box, relay
   // deliveries) so those aren't double-posted.
   private async handleUserPrompt(input: { prompt: string; transcriptPath?: string }): Promise<void> {
+    if (input.transcriptPath) this.lastTranscriptPath = input.transcriptPath;
     if (!this.currentTurnId) this.currentTurnId = crypto.randomUUID();
     const text = input.prompt.trim();
     if (text && !this.isRunnerInjectedPrompt(text)) {
@@ -1001,6 +1010,42 @@ export class AgentRunner {
     if (input.transcriptPath) this.startReasoningTail(input.transcriptPath);
   }
+  // SessionEnd: compute end-of-session Insights signals (#184 context-gathering
+  // ratio) from the full transcript and record them with the relay. Mechanical and
+  // model-free — costs zero agent tokens and the agent can't game it. The relay drops
+  // the observation if Insights or this signal is toggled off. Best-effort: never
+  // blocks or fails provider shutdown.
+  private async handleSessionEnd(input: { reason?: string; transcriptPath?: string }): Promise<void> {
+    // Only Claude transcripts have this shape; Codex sessions are skipped for now.
+    if (this.options.provider !== "claude") return;
+    const transcriptPath = input.transcriptPath || this.lastTranscriptPath;
+    if (!transcriptPath) return;
+    let jsonl: string;
+    try {
+      jsonl = await readFile(transcriptPath, "utf8");
+    } catch {
+      return;
+    }
+    const analysis = analyzeSession(jsonl);
+    if (!analysis) return; // no tool calls = nothing substantive to measure
+    try {
+      await this.http.recordInsightObservation({
+        sessionId: this.providerSessionId,
+        project: this.options.cwd,
+        agentId: this.agentId,
+        signal: "context_ratio",
+        value: { ...analysis.metric, ...(input.reason ? { endReason: input.reason } : {}) },
+        outcome: { ...analysis.outcome },
+        source: "server",
+      });
+      this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering)`);
+    } catch (error) {
+      // 409 = Insights/feature toggled off; anything else is best-effort too.
+      this.sessionDebug(`insights context_ratio skipped: ${error instanceof Error ? error.message : String(error)}`);
+      if (isHttpAuthError(error)) this.recoverRuntimeTokenAfterAuthFailure("insights");
+    }
+  }
   // Route a provider-emitted session event (Codex app-server) into the chat mirror.
   // Mirrors the same semantics as the Claude lane: prompts are echoed with dedup,
   // and a response is only auto-captured when the agent won't separately reply to a
@@ -1053,11 +1098,23 @@ export class AgentRunner {
     });
   }
+  // Remember an injected prompt so its UserPromptSubmit echo can be suppressed. Prunes
+  // expired entries first; a defensive length cap guards against echoes that never
+  // arrive (e.g. the provider drops a queued prompt) so the queue can't grow unbounded.
+  private recordInjectedPrompt(text: string): void {
+    const now = Date.now();
+    this.injectedPrompts = this.injectedPrompts.filter((p) => now - p.at < PROMPT_ECHO_DEDUP_MS);
+    this.injectedPrompts.push({ text, at: now });
+    if (this.injectedPrompts.length > 50) this.injectedPrompts.shift();
+  }
   private isRunnerInjectedPrompt(text: string): boolean {
     if (RELAY_INJECTION_MARKERS.some((marker) => text.startsWith(marker))) return true;
-    const recent = this.lastInjectedPrompt;
-    if (recent && recent.text === text && Date.now() - recent.at < PROMPT_ECHO_DEDUP_MS) {
-      this.lastInjectedPrompt = undefined;
+    const now = Date.now();
+    this.injectedPrompts = this.injectedPrompts.filter((p) => now - p.at < PROMPT_ECHO_DEDUP_MS);
+    const idx = this.injectedPrompts.findIndex((p) => p.text === text);
+    if (idx !== -1) {
+      this.injectedPrompts.splice(idx, 1); // consume one — identical repeats each match once
       return true;
     }
     return false;