agent-relay-runner 0.23.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/plugins/claude/.claude-plugin/plugin.json +1 -1
- package/plugins/claude/hooks/pre-compact.sh +6 -0
- package/plugins/claude/hooks/relay-status.sh +6 -5
- package/plugins/claude/hooks/session-end.sh +5 -2
- package/src/adapter.ts +10 -0
- package/src/adapters/claude-transcript.ts +21 -75
- package/src/adapters/claude.ts +16 -0
- package/src/adapters/codex.ts +64 -3
- package/src/claim-tracker.ts +0 -12
- package/src/control-server.ts +12 -11
- package/src/runner.ts +121 -31
- package/src/session-insights.ts +118 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-relay-runner",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.25.0",
|
|
4
4
|
"description": "Unified provider lifecycle runner for Agent Relay",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"directory": "runner"
|
|
21
21
|
},
|
|
22
22
|
"dependencies": {
|
|
23
|
-
"agent-relay-sdk": "0.2.
|
|
23
|
+
"agent-relay-sdk": "0.2.14"
|
|
24
24
|
},
|
|
25
25
|
"devDependencies": {
|
|
26
26
|
"@types/bun": "latest",
|
|
@@ -3,4 +3,10 @@ set -euo pipefail
|
|
|
3
3
|
source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
|
|
4
4
|
relay_install_hook_guard pre-compact
|
|
5
5
|
|
|
6
|
+
payload="$(cat || true)"
|
|
7
|
+
transcript_path="$(relay_json_string_field transcript_path "$payload")"
|
|
8
|
+
|
|
6
9
|
relay_post_timeline_status busy provider-turn "" compacting
|
|
10
|
+
# Pre-destroy seam (#183): compaction is about to discard context — capture this segment's
|
|
11
|
+
# Insights (#184) from the full transcript first. The most common boundary in practice.
|
|
12
|
+
relay_post_session_boundary "$transcript_path" compact
|
|
@@ -72,10 +72,11 @@ relay_post_user_prompt() {
|
|
|
72
72
|
-d "$body" >/dev/null 2>&1 || true
|
|
73
73
|
}
|
|
74
74
|
|
|
75
|
-
|
|
76
|
-
#
|
|
77
|
-
# end-of-session
|
|
78
|
-
#
|
|
75
|
+
relay_post_session_boundary() {
|
|
76
|
+
# Pre-destroy seam (#183): tell the runner a context reset or termination is imminent
|
|
77
|
+
# (compact/clear/logout/…) so it can run end-of-session work — the #184 context-gathering
|
|
78
|
+
# ratio capture — from the full transcript before the invasive op. Fire-and-forget; the
|
|
79
|
+
# transcript path is optional (the runner falls back to the last path it saw).
|
|
79
80
|
local transcript_path="${1:-}"
|
|
80
81
|
local reason="${2:-}"
|
|
81
82
|
local port="${AGENT_RELAY_RUNNER_PORT:-}"
|
|
@@ -87,7 +88,7 @@ relay_post_session_end() {
|
|
|
87
88
|
body="${body}\"reason\":\"$(relay_json_escape "$reason")\""
|
|
88
89
|
fi
|
|
89
90
|
body="${body}}"
|
|
90
|
-
curl -fsS --max-time 3 -X POST "http://127.0.0.1:${port}/session-
|
|
91
|
+
curl -fsS --max-time 3 -X POST "http://127.0.0.1:${port}/session-boundary" \
|
|
91
92
|
-H 'Content-Type: application/json' \
|
|
92
93
|
-d "$body" >/dev/null 2>&1 || true
|
|
93
94
|
}
|
|
@@ -10,14 +10,17 @@ transcript_path="$(relay_json_string_field transcript_path "$payload")"
|
|
|
10
10
|
case "$reason" in
|
|
11
11
|
clear)
|
|
12
12
|
relay_post_timeline_status idle provider-turn subagent clearing-context
|
|
13
|
+
# /clear wipes the context — the most common work boundary for a long-lived agent.
|
|
14
|
+
# Capture this segment's Insights (#183/#184) before it's gone.
|
|
15
|
+
relay_post_session_boundary "$transcript_path" clear
|
|
13
16
|
;;
|
|
14
17
|
resume)
|
|
15
18
|
relay_post_status_clearing_subagents idle
|
|
16
19
|
;;
|
|
17
20
|
logout|prompt_input_exit|bypass_permissions_disabled|other|*)
|
|
18
21
|
relay_post_status_clearing_subagents offline
|
|
19
|
-
# Real session termination: capture end-of-session Insights (#184). Order after the
|
|
22
|
+
# Real session termination: capture end-of-session Insights (#183/#184). Order after the
|
|
20
23
|
# status post is arbitrary — the runner reads the transcript file regardless.
|
|
21
|
-
|
|
24
|
+
relay_post_session_boundary "$transcript_path" "$reason"
|
|
22
25
|
;;
|
|
23
26
|
esac
|
package/src/adapter.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { AgentProfile, Message } from "agent-relay-sdk";
|
|
2
2
|
import { isRecord } from "agent-relay-sdk";
|
|
3
|
+
import type { SessionEvent } from "./session-insights";
|
|
3
4
|
|
|
4
5
|
export type SemanticStatus = "idle" | "busy" | "offline" | "error";
|
|
5
6
|
type ProviderWorkKind = "provider-turn" | "subagent";
|
|
@@ -133,6 +134,15 @@ export interface ProviderAdapter {
|
|
|
133
134
|
shutdown(process: ManagedProcess, opts: { graceful: boolean; timeoutMs: number }): Promise<void>;
|
|
134
135
|
compact?(process: ManagedProcess): Promise<Record<string, unknown> | void>;
|
|
135
136
|
clearContext?(process: ManagedProcess): Promise<Record<string, unknown> | void>;
|
|
137
|
+
// Normalize the session so far into the provider-agnostic SessionEvent stream the
|
|
138
|
+
// Insights context-ratio signal (#183/#184) reduces. Called by the runner's
|
|
139
|
+
// pre-session-destroy seam before any compact/clear/restart/shutdown. The runner owns
|
|
140
|
+
// the per-segment cursor (it slices events since the last capture), so this returns the
|
|
141
|
+
// full ordered event list for the current process lifetime. `ctx.transcriptPath` is
|
|
142
|
+
// supplied for transcript-backed providers (Claude); event-stream providers (Codex)
|
|
143
|
+
// ignore it and return their accumulated log. Return null when there is nothing to
|
|
144
|
+
// measure. Best-effort: may be omitted by providers without a session view yet.
|
|
145
|
+
collectSessionEvents?(process: ManagedProcess, ctx: { transcriptPath?: string }): Promise<SessionEvent[] | null>;
|
|
136
146
|
// Interrupt the in-flight turn without ending the session (ESC for Claude's
|
|
137
147
|
// tmux pane, turn/interrupt for the Codex app-server). Provider-independent at
|
|
138
148
|
// the runner boundary; each adapter does what its provider actually supports.
|
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
// entry carrying text, not just tool_result blocks). We collect the assistant
|
|
11
11
|
// `text` blocks from that turn — thinking and tool_use are dropped.
|
|
12
12
|
|
|
13
|
+
import { computeContextRatio, type SessionAnalysis, type SessionEvent } from "../session-insights";
|
|
14
|
+
|
|
13
15
|
interface TranscriptBlock {
|
|
14
16
|
type?: string;
|
|
15
17
|
text?: string;
|
|
@@ -194,59 +196,14 @@ export function summarizeToolUse(name: string, input: Record<string, unknown> |
|
|
|
194
196
|
// paired with cheap outcome proxies (user re-prompts, tool errors) so it's never read
|
|
195
197
|
// alone — see the anti-Goodhart constraint in the epic.
|
|
196
198
|
|
|
197
|
-
// Tools that acquire context without changing anything. Anything not matched here is
|
|
198
|
-
// treated as an action (mutation, execution, or a delegation/direction decision) —
|
|
199
|
-
// Bash counts as an action because it executes (a conservative, documented choice for
|
|
200
|
-
// v0; `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
|
|
201
|
-
const GATHERING_TOOLS = new Set([
|
|
202
|
-
"Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
|
|
203
|
-
]);
|
|
204
|
-
const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
|
|
205
|
-
|
|
206
|
-
function isGatheringTool(name: string): boolean {
|
|
207
|
-
if (GATHERING_TOOLS.has(name)) return true;
|
|
208
|
-
// MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
|
|
209
|
-
return GATHERING_NAME.test(name);
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
export interface ContextRatioMetric {
|
|
213
|
-
/** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
|
|
214
|
-
ratio: number;
|
|
215
|
-
gatheringCalls: number;
|
|
216
|
-
actionCalls: number;
|
|
217
|
-
totalToolCalls: number;
|
|
218
|
-
/** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
|
|
219
|
-
leadingGather: number;
|
|
220
|
-
/** Substantive assistant turns (turns that produced text or a tool call). */
|
|
221
|
-
turns: number;
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
export interface SessionOutcomeProxy {
|
|
225
|
-
/** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
|
|
226
|
-
userPrompts: number;
|
|
227
|
-
/** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
|
|
228
|
-
toolErrors: number;
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
export interface SessionAnalysis {
|
|
232
|
-
metric: ContextRatioMetric;
|
|
233
|
-
outcome: SessionOutcomeProxy;
|
|
234
|
-
}
|
|
235
|
-
|
|
236
199
|
/**
|
|
237
|
-
*
|
|
238
|
-
*
|
|
239
|
-
*
|
|
200
|
+
* Normalize a full Claude transcript into the provider-agnostic `SessionEvent` stream
|
|
201
|
+
* (#183/#184). The classifier and ratio math live in `session-insights.ts` and are shared
|
|
202
|
+
* across providers; this only knows the Claude JSONL shape. Events are emitted in
|
|
203
|
+
* transcript order so `leadingGather` is meaningful.
|
|
240
204
|
*/
|
|
241
|
-
export function
|
|
242
|
-
|
|
243
|
-
let actionCalls = 0;
|
|
244
|
-
let leadingGather = 0;
|
|
245
|
-
let sawAction = false;
|
|
246
|
-
let userPrompts = 0;
|
|
247
|
-
let toolErrors = 0;
|
|
248
|
-
let turns = 0;
|
|
249
|
-
|
|
205
|
+
export function collectClaudeSessionEvents(jsonl: string): SessionEvent[] {
|
|
206
|
+
const events: SessionEvent[] = [];
|
|
250
207
|
for (const line of jsonl.split("\n")) {
|
|
251
208
|
const trimmed = line.trim();
|
|
252
209
|
if (!trimmed) continue;
|
|
@@ -256,10 +213,10 @@ export function analyzeSession(jsonl: string): SessionAnalysis | null {
|
|
|
256
213
|
} catch {
|
|
257
214
|
continue;
|
|
258
215
|
}
|
|
259
|
-
if (isRealUserPrompt(entry))
|
|
216
|
+
if (isRealUserPrompt(entry)) events.push({ type: "user_prompt" });
|
|
260
217
|
if (entry.type === "user") {
|
|
261
218
|
for (const b of blocks(entry.message)) {
|
|
262
|
-
if (b.type === "tool_result" && b.is_error === true)
|
|
219
|
+
if (b.type === "tool_result" && b.is_error === true) events.push({ type: "tool_error" });
|
|
263
220
|
}
|
|
264
221
|
continue;
|
|
265
222
|
}
|
|
@@ -269,31 +226,20 @@ export function analyzeSession(jsonl: string): SessionAnalysis | null {
|
|
|
269
226
|
if (b.type === "text" && b.text?.trim()) producedSomething = true;
|
|
270
227
|
if (b.type !== "tool_use" || typeof b.name !== "string" || !b.name) continue;
|
|
271
228
|
producedSomething = true;
|
|
272
|
-
|
|
273
|
-
gatheringCalls++;
|
|
274
|
-
if (!sawAction) leadingGather++;
|
|
275
|
-
} else {
|
|
276
|
-
actionCalls++;
|
|
277
|
-
sawAction = true;
|
|
278
|
-
}
|
|
229
|
+
events.push({ type: "tool", name: b.name });
|
|
279
230
|
}
|
|
280
|
-
if (producedSomething)
|
|
231
|
+
if (producedSomething) events.push({ type: "turn" });
|
|
281
232
|
}
|
|
233
|
+
return events;
|
|
234
|
+
}
|
|
282
235
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
actionCalls,
|
|
291
|
-
totalToolCalls,
|
|
292
|
-
leadingGather,
|
|
293
|
-
turns,
|
|
294
|
-
},
|
|
295
|
-
outcome: { userPrompts, toolErrors },
|
|
296
|
-
};
|
|
236
|
+
/**
|
|
237
|
+
* Walk the full transcript and compute the context-gathering ratio plus paired outcome
|
|
238
|
+
* proxies. Returns null when there's nothing substantive to measure (no tool calls) —
|
|
239
|
+
* trivial sessions have nothing to learn from and shouldn't pollute the baselines.
|
|
240
|
+
*/
|
|
241
|
+
export function analyzeSession(jsonl: string): SessionAnalysis | null {
|
|
242
|
+
return computeContextRatio(collectClaudeSessionEvents(jsonl));
|
|
297
243
|
}
|
|
298
244
|
|
|
299
245
|
/** Count substantive assistant turns — used by the #185 introspection gate. */
|
package/src/adapters/claude.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
2
3
|
import { homedir, tmpdir } from "node:os";
|
|
3
4
|
import { join, resolve } from "node:path";
|
|
4
5
|
import type { Message } from "agent-relay-sdk";
|
|
@@ -6,6 +7,8 @@ import { shellEscape as shellQuote } from "agent-relay-sdk/shell-utils";
|
|
|
6
7
|
import { tmuxCommand, tmuxHasSession } from "agent-relay-sdk/tmux-utils";
|
|
7
8
|
import { sanitizeFsName } from "agent-relay-sdk/fs-name";
|
|
8
9
|
import { profileAllowsRelayFeature, type ManagedProcess, type ProviderAdapter, type ProviderConfig, type ProviderStatusUpdate, type RunnerSpawnConfig, type SemanticStatus, type SpawnArgs } from "../adapter";
|
|
10
|
+
import { collectClaudeSessionEvents } from "./claude-transcript";
|
|
11
|
+
import type { SessionEvent } from "../session-insights";
|
|
9
12
|
import { prepareClaudeProfileHome, profileUsesHostProviderGlobals } from "../profile-home";
|
|
10
13
|
import { relayMcpClaudeConfigArg } from "../relay-mcp";
|
|
11
14
|
import { claudeProviderMessageText } from "./claude-delivery";
|
|
@@ -64,6 +67,19 @@ export class ClaudeAdapter implements ProviderAdapter {
|
|
|
64
67
|
return { method: "tmux-inject", command: "/clear" };
|
|
65
68
|
}
|
|
66
69
|
|
|
70
|
+
// #183/#184: parse the full Claude transcript into the shared SessionEvent stream. The
|
|
71
|
+
// runner slices per-segment, so we return the whole transcript's events each call.
|
|
72
|
+
async collectSessionEvents(_process: ManagedProcess, ctx: { transcriptPath?: string }): Promise<SessionEvent[] | null> {
|
|
73
|
+
if (!ctx.transcriptPath) return null;
|
|
74
|
+
let jsonl: string;
|
|
75
|
+
try {
|
|
76
|
+
jsonl = await readFile(ctx.transcriptPath, "utf8");
|
|
77
|
+
} catch {
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
return collectClaudeSessionEvents(jsonl);
|
|
81
|
+
}
|
|
82
|
+
|
|
67
83
|
async interrupt(process: ManagedProcess): Promise<Record<string, unknown>> {
|
|
68
84
|
const session = process.meta?.tmuxSession as string | undefined;
|
|
69
85
|
const socket = process.meta?.tmuxSocket as string | undefined;
|
package/src/adapters/codex.ts
CHANGED
|
@@ -8,6 +8,7 @@ import { profileAllowsRelayFeature, providerMessageText, RELAY_CONTEXT, type Man
|
|
|
8
8
|
import { workspaceDepsNoteFromEnv } from "../relay-instructions";
|
|
9
9
|
import { relayMcpCodexConfigArgs, tomlString } from "../relay-mcp";
|
|
10
10
|
import { logger } from "../logger";
|
|
11
|
+
import type { SessionEvent } from "../session-insights";
|
|
11
12
|
|
|
12
13
|
/** Relay context prepended to a Codex agent's first turn: the standard relay
|
|
13
14
|
* blurb plus, when running in an isolated workspace, the deps caveat (#159). */
|
|
@@ -41,6 +42,13 @@ export class CodexAdapter implements ProviderAdapter {
|
|
|
41
42
|
private turnMessages: string[] = [];
|
|
42
43
|
private readonly itemTextBuffers = new Map<string, string>();
|
|
43
44
|
private captureMode: "final" | "full" = "final";
|
|
45
|
+
// #183/#184: the normalized session-event log for the current process lifetime, fed
|
|
46
|
+
// from the same completed-item stream that drives the chat mirror. The runner slices
|
|
47
|
+
// this per-segment (since the last compact/clear/restart) via its own cursor, so we
|
|
48
|
+
// accumulate and never clear mid-session; spawn() resets it for a fresh process. Soft
|
|
49
|
+
// cap below keeps a runaway session from growing memory unbounded.
|
|
50
|
+
private sessionEvents: SessionEvent[] = [];
|
|
51
|
+
private static readonly SESSION_EVENTS_CAP = 50_000;
|
|
44
52
|
|
|
45
53
|
onStatusChange(cb: (status: ProviderStatusUpdate) => void): void {
|
|
46
54
|
this.statusCb = cb;
|
|
@@ -75,6 +83,7 @@ export class CodexAdapter implements ProviderAdapter {
|
|
|
75
83
|
|
|
76
84
|
async spawn(config: RunnerSpawnConfig): Promise<ManagedProcess> {
|
|
77
85
|
this.captureMode = (config.providerConfig as ProviderConfig).chatCaptureMode ?? "final";
|
|
86
|
+
this.sessionEvents = []; // fresh process → fresh segment cursor (#184)
|
|
78
87
|
const args = this.buildSpawnArgs(config, config.providerConfig as ProviderConfig);
|
|
79
88
|
const appServer = Bun.spawn([args.command, ...args.args], {
|
|
80
89
|
cwd: args.cwd,
|
|
@@ -377,13 +386,19 @@ export class CodexAdapter implements ProviderAdapter {
|
|
|
377
386
|
const itemId = codexItemId(item);
|
|
378
387
|
if (type === "agentMessage") {
|
|
379
388
|
const text = (stringValue(item.text) ?? (itemId ? this.itemTextBuffers.get(itemId) : undefined))?.trim();
|
|
380
|
-
if (text)
|
|
389
|
+
if (text) {
|
|
390
|
+
this.turnMessages.push(text);
|
|
391
|
+
this.recordInsightEvent({ type: "turn" }); // a substantive assistant turn
|
|
392
|
+
}
|
|
381
393
|
if (itemId) this.itemTextBuffers.delete(itemId);
|
|
382
394
|
return;
|
|
383
395
|
}
|
|
384
396
|
if (type === "userMessage") {
|
|
385
397
|
const text = codexUserMessageText(item.content);
|
|
386
|
-
if (text)
|
|
398
|
+
if (text) {
|
|
399
|
+
this.recordInsightEvent({ type: "user_prompt" });
|
|
400
|
+
this.sessionEventCb({ type: "prompt", origin: "terminal", body: text, ...(turnId ? { turnId } : {}) });
|
|
401
|
+
}
|
|
387
402
|
return;
|
|
388
403
|
}
|
|
389
404
|
if (type === "reasoning") {
|
|
@@ -394,10 +409,31 @@ export class CodexAdapter implements ProviderAdapter {
|
|
|
394
409
|
return;
|
|
395
410
|
}
|
|
396
411
|
const tool = codexToolSummary(type, item);
|
|
397
|
-
if (tool)
|
|
412
|
+
if (tool) {
|
|
413
|
+
this.recordInsightEvent({ type: "tool", name: codexInsightToolName(type, item) });
|
|
414
|
+
if (codexItemFailed(item)) this.recordInsightEvent({ type: "tool_error" });
|
|
415
|
+
this.sessionEventCb({ type: "tool", origin: "provider", body: tool.body, label: tool.label, status: "completed", ...(turnId ? { turnId } : {}) });
|
|
416
|
+
}
|
|
398
417
|
if (itemId) this.itemTextBuffers.delete(itemId);
|
|
399
418
|
}
|
|
400
419
|
|
|
420
|
+
// #183/#184: append to the session-event log with a soft cap. On overflow we drop the
|
|
421
|
+
// oldest half; the runner detects the resulting length shrink and resets its segment
|
|
422
|
+
// cursor (worst case: one slightly-truncated datapoint on a pathologically long session).
|
|
423
|
+
private recordInsightEvent(event: SessionEvent): void {
|
|
424
|
+
this.sessionEvents.push(event);
|
|
425
|
+
if (this.sessionEvents.length > CodexAdapter.SESSION_EVENTS_CAP) {
|
|
426
|
+
this.sessionEvents = this.sessionEvents.slice(this.sessionEvents.length >> 1);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// Whole-session event stream for the Insights context-ratio signal (#184). Codex learns
|
|
431
|
+
// about activity through app-server item events, not a transcript, so we replay the log
|
|
432
|
+
// accumulated since this process started. The runner owns per-segment slicing.
|
|
433
|
+
async collectSessionEvents(): Promise<SessionEvent[] | null> {
|
|
434
|
+
return [...this.sessionEvents];
|
|
435
|
+
}
|
|
436
|
+
|
|
401
437
|
private handleCodexItemDelta(method: string, params: Record<string, unknown> | undefined): void {
|
|
402
438
|
if (!method.includes("item/") && !method.includes("item.")) return;
|
|
403
439
|
const item = isRecord(params?.item) ? params.item : undefined;
|
|
@@ -492,6 +528,31 @@ export function codexReasoningText(item: Record<string, unknown>): string {
|
|
|
492
528
|
}
|
|
493
529
|
|
|
494
530
|
/** Build a compact { label, body } activity summary for a Codex tool item. */
|
|
531
|
+
// Canonical tool name for the #184 gathering/action classifier. Codex item types map to
|
|
532
|
+
// names the shared classifier already understands: commandExecution executes (→ Bash, an
|
|
533
|
+
// action), fileChange mutates (→ Edit), webSearch gathers (→ WebSearch, in the set), and
|
|
534
|
+
// MCP/dynamic calls carry their real tool name so name-shape classification applies.
|
|
535
|
+
export function codexInsightToolName(type: string | undefined, item: Record<string, unknown>): string {
|
|
536
|
+
switch (type) {
|
|
537
|
+
case "commandExecution": return "Bash";
|
|
538
|
+
case "fileChange": return "Edit";
|
|
539
|
+
case "webSearch": return "WebSearch";
|
|
540
|
+
case "mcpToolCall":
|
|
541
|
+
case "dynamicToolCall":
|
|
542
|
+
case "collabAgentToolCall":
|
|
543
|
+
return stringValue(item.tool) ?? type ?? "tool";
|
|
544
|
+
default: return type ?? "tool";
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
// Did a completed tool item fail? Mirrors Claude's tool_result is_error outcome proxy.
|
|
549
|
+
export function codexItemFailed(item: Record<string, unknown>): boolean {
|
|
550
|
+
if (stringValue(item.status) === "failed") return true;
|
|
551
|
+
if (item.error != null && item.error !== false) return true;
|
|
552
|
+
const exitCode = item.exitCode ?? item.exit_code;
|
|
553
|
+
return typeof exitCode === "number" && exitCode !== 0;
|
|
554
|
+
}
|
|
555
|
+
|
|
495
556
|
export function codexToolSummary(type: string | undefined, item: Record<string, unknown>): { label: string; body: string } | null {
|
|
496
557
|
const oneLine = (value: unknown): string => (typeof value === "string" ? value.replace(/\s+/g, " ").trim() : "");
|
|
497
558
|
const clip = (text: string): string => (text.length > 200 ? `${text.slice(0, 197)}…` : text);
|
package/src/claim-tracker.ts
CHANGED
|
@@ -83,14 +83,6 @@ export class ClaimTracker {
|
|
|
83
83
|
return before !== this.currentStatus();
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
-
clearKind(kind: ClaimKind): boolean {
|
|
87
|
-
const before = this.currentStatus();
|
|
88
|
-
for (const key of [...this.claims.keys()]) {
|
|
89
|
-
if (key.startsWith(`${kind}:`)) this.claims.delete(key);
|
|
90
|
-
}
|
|
91
|
-
return before !== this.currentStatus();
|
|
92
|
-
}
|
|
93
|
-
|
|
94
86
|
expire(now = Date.now()): boolean {
|
|
95
87
|
const before = this.currentStatus();
|
|
96
88
|
for (const [key, claim] of this.claims) {
|
|
@@ -111,10 +103,6 @@ export class ClaimTracker {
|
|
|
111
103
|
return [...reasons];
|
|
112
104
|
}
|
|
113
105
|
|
|
114
|
-
activeClaims(): ClaimRecord[] {
|
|
115
|
-
return [...this.claims.values()];
|
|
116
|
-
}
|
|
117
|
-
|
|
118
106
|
activeWork(): WorkRecord[] {
|
|
119
107
|
return [...this.work.values()];
|
|
120
108
|
}
|
package/src/control-server.ts
CHANGED
|
@@ -39,11 +39,12 @@ interface ControlServerOptions {
|
|
|
39
39
|
// directly into the session (web terminal / TUI) so the runner can mirror it
|
|
40
40
|
// into the dashboard chat and start tailing the turn transcript for reasoning.
|
|
41
41
|
onUserPrompt?(input: { prompt: string; transcriptPath?: string }): Promise<void>;
|
|
42
|
-
// A provider
|
|
43
|
-
//
|
|
44
|
-
//
|
|
45
|
-
//
|
|
46
|
-
|
|
42
|
+
// A provider session-boundary hook (Claude PreCompact / SessionEnd) signals an imminent
|
|
43
|
+
// context reset or termination so the runner can run end-of-session work (#183 pre-destroy
|
|
44
|
+
// seam: #184 context-ratio capture) before the invasive operation. `reason` is the raw
|
|
45
|
+
// provider reason (compact, clear, logout, …); transcriptPath is optional — the runner
|
|
46
|
+
// falls back to the last path it saw during the session.
|
|
47
|
+
onSessionBoundary?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
|
|
47
48
|
// Phase 1 observability (#198): a hook reporting an unhandled failure. The
|
|
48
49
|
// control server already logs it FATAL; this is the seam for Phase 2 to also
|
|
49
50
|
// surface it to the server via the runner outbox.
|
|
@@ -93,8 +94,8 @@ export function startControlServer(options: ControlServerOptions): ControlServer
|
|
|
93
94
|
if (url.pathname === "/user-prompt" && req.method === "POST") {
|
|
94
95
|
return handleUserPrompt(req, options);
|
|
95
96
|
}
|
|
96
|
-
if (url.pathname === "/session-
|
|
97
|
-
return
|
|
97
|
+
if (url.pathname === "/session-boundary" && req.method === "POST") {
|
|
98
|
+
return handleSessionBoundary(req, options);
|
|
98
99
|
}
|
|
99
100
|
if (url.pathname === "/log-level" && req.method === "GET") {
|
|
100
101
|
return Response.json({ level: logger.getLevel(), levels: LOG_LEVELS });
|
|
@@ -375,13 +376,13 @@ async function handleUserPrompt(req: Request, options: ControlServerOptions): Pr
|
|
|
375
376
|
return Response.json({ ok: true });
|
|
376
377
|
}
|
|
377
378
|
|
|
378
|
-
async function
|
|
379
|
-
if (!options.
|
|
379
|
+
async function handleSessionBoundary(req: Request, options: ControlServerOptions): Promise<Response> {
|
|
380
|
+
if (!options.onSessionBoundary) return Response.json({ ok: false, reason: "session-boundary capture unavailable" });
|
|
380
381
|
const body = await req.json().catch(() => null);
|
|
381
382
|
const reason = isRecord(body) && typeof body.reason === "string" ? body.reason : undefined;
|
|
382
383
|
const transcriptPath = isRecord(body) && typeof body.transcriptPath === "string" ? body.transcriptPath : undefined;
|
|
383
|
-
// Fire-and-forget:
|
|
384
|
-
void Promise.resolve(options.
|
|
384
|
+
// Fire-and-forget: a PreCompact/SessionEnd hook must not block Claude compacting or exiting.
|
|
385
|
+
void Promise.resolve(options.onSessionBoundary({ reason, transcriptPath })).catch(() => {});
|
|
385
386
|
return Response.json({ ok: true });
|
|
386
387
|
}
|
|
387
388
|
|
package/src/runner.ts
CHANGED
|
@@ -11,7 +11,8 @@ import { ClaimTracker } from "./claim-tracker";
|
|
|
11
11
|
import { startControlServer, type ControlServer } from "./control-server";
|
|
12
12
|
import { ReplyObligationCache } from "./reply-obligation-cache";
|
|
13
13
|
import { Outbox, type OutboxRecord } from "./outbox";
|
|
14
|
-
import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete
|
|
14
|
+
import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete } from "./adapters/claude-transcript";
|
|
15
|
+
import { computeContextRatio } from "./session-insights";
|
|
15
16
|
import { agentProfileProjectionReport } from "./profile-projection";
|
|
16
17
|
import { profileUsesHostProviderGlobals } from "./profile-home";
|
|
17
18
|
import { RELAY_MCP_TOKEN_ENV, relayMcpEndpoint } from "./relay-mcp";
|
|
@@ -20,6 +21,35 @@ import { runtimeMetadata } from "./version";
|
|
|
20
21
|
import { logger, parseLogLevel } from "./logger";
|
|
21
22
|
import { ensureSessionScratch, reapSessionScratch, sweepStaleSessions, type SessionScratchLayout } from "./session-scratch";
|
|
22
23
|
|
|
24
|
+
// A destructive session transition. The runner runs end-of-session work (Insights
|
|
25
|
+
// capture, #183/#184) before the invasive operation and, during that window, presents a
|
|
26
|
+
// distinct non-addressable lifecycle state. Bus commands and provider hooks (Claude
|
|
27
|
+
// PreCompact / SessionEnd) both normalize to one of these.
|
|
28
|
+
type SessionDestroyReason = "compact" | "clear" | "restart" | "shutdown" | "kill";
|
|
29
|
+
|
|
30
|
+
// `finalizing-<reason>` is the transient pre-destroy window; the others are the executing
|
|
31
|
+
// teardown states the dashboard already renders.
|
|
32
|
+
type LifecycleAction =
|
|
33
|
+
| "shutting-down" | "killing" | "restarting"
|
|
34
|
+
| `finalizing-${SessionDestroyReason}`;
|
|
35
|
+
|
|
36
|
+
// Pre-destroy work is best-effort and must never hang teardown. Capping it keeps a slow
|
|
37
|
+
// transcript read or a wedged provider from stalling a shutdown the operator asked for.
|
|
38
|
+
const PRE_DESTROY_TIMEOUT_MS = 4_000;
|
|
39
|
+
|
|
40
|
+
// Map a lifecycle bus command to its destructive boundary reason, or undefined for
|
|
41
|
+
// non-destructive commands (interrupt, inject, reconnect, permission decisions).
|
|
42
|
+
function boundaryReasonForCommand(type: string): SessionDestroyReason | undefined {
|
|
43
|
+
switch (type) {
|
|
44
|
+
case "agent.compact": return "compact";
|
|
45
|
+
case "agent.clearContext": return "clear";
|
|
46
|
+
case "agent.restart": return "restart";
|
|
47
|
+
case "agent.shutdown": return "shutdown";
|
|
48
|
+
case "agent.kill": return "kill";
|
|
49
|
+
default: return undefined;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
23
53
|
interface RunnerOptions {
|
|
24
54
|
provider: string;
|
|
25
55
|
model?: string;
|
|
@@ -164,7 +194,17 @@ export class AgentRunner {
|
|
|
164
194
|
// Last transcript path seen this session — used by end-of-session Insights (#184)
|
|
165
195
|
// when the SessionEnd hook payload omits it.
|
|
166
196
|
private lastTranscriptPath?: string;
|
|
167
|
-
private lifecycleAction?:
|
|
197
|
+
private lifecycleAction?: LifecycleAction;
|
|
198
|
+
// #183/#184 per-segment cursor: how many of the current session's normalized events
|
|
199
|
+
// have already been folded into an observation, and the key (transcript path / Codex
|
|
200
|
+
// session) that count belongs to. A boundary captures only events since the last one,
|
|
201
|
+
// so each datapoint is one work chunk between context resets; a key change or a shrink
|
|
202
|
+
// (transcript rotated, Codex buffer trimmed) resets the cursor.
|
|
203
|
+
private insightsObserved = 0;
|
|
204
|
+
private insightsCursorKey = "";
|
|
205
|
+
// Coalesces concurrent pre-session-destroy runs (e.g. the shutdown bus command and the
|
|
206
|
+
// SessionEnd hook both fire for the same teardown) so the cursor isn't raced.
|
|
207
|
+
private preDestroyPromise?: Promise<void>;
|
|
168
208
|
private readonly unexpectedExitTimes: number[] = [];
|
|
169
209
|
private readonly pendingMessages = new Map<number, Message>();
|
|
170
210
|
private readonly activeTaskClaims = new Map<number, ActiveTaskClaim>();
|
|
@@ -293,7 +333,7 @@ export class AgentRunner {
|
|
|
293
333
|
onReplyObligations: () => Promise.resolve(this.obligationCache.get()),
|
|
294
334
|
onSessionTurn: (input) => this.publishSessionTurn(input),
|
|
295
335
|
onUserPrompt: (input) => this.handleUserPrompt(input),
|
|
296
|
-
|
|
336
|
+
onSessionBoundary: (input) => this.handleSessionBoundary(input),
|
|
297
337
|
onHookFatal: (report) => this.reportHookFatal(report),
|
|
298
338
|
});
|
|
299
339
|
this.startMcpProxy();
|
|
@@ -595,17 +635,22 @@ export class AgentRunner {
|
|
|
595
635
|
if (type !== "agent.shutdown" && type !== "agent.restart" && type !== "agent.reconnect" && type !== "agent.kill" && type !== "agent.compact" && type !== "agent.clearContext" && type !== "agent.injectContext" && type !== "agent.permissionDecision" && type !== "agent.interrupt" && type !== "prompt.inject") return;
|
|
596
636
|
|
|
597
637
|
const exitAfterCommand = type === "agent.shutdown" || type === "agent.kill";
|
|
598
|
-
if (exitAfterCommand)
|
|
599
|
-
this.exitCommandInProgress = true;
|
|
600
|
-
this.lifecycleAction = type === "agent.kill" ? "killing" : "shutting-down";
|
|
601
|
-
} else if (type === "agent.restart") {
|
|
602
|
-
this.lifecycleAction = "restarting";
|
|
603
|
-
}
|
|
638
|
+
if (exitAfterCommand) this.exitCommandInProgress = true;
|
|
604
639
|
this.claims.startClaim("command", commandId);
|
|
605
|
-
this.publishStatus();
|
|
606
640
|
try {
|
|
607
641
|
await this.updateCommand(commandId, "accepted");
|
|
608
642
|
await this.updateCommand(commandId, "running");
|
|
643
|
+
// Pre-session-destroy seam (#183): for destructive transitions, run end-of-session
|
|
644
|
+
// work (Insights capture, #184) BEFORE the invasive operation, surfaced as a
|
|
645
|
+
// non-addressable "finalizing" state so the agent isn't mistaken for merely busy.
|
|
646
|
+
const destroyReason = boundaryReasonForCommand(type);
|
|
647
|
+
if (destroyReason) await this.runPreSessionDestroy(destroyReason);
|
|
648
|
+
// Move from the transient finalizing window to the executing teardown state (or drop
|
|
649
|
+
// it entirely for compact/clear, which complete promptly once capture is done).
|
|
650
|
+
if (exitAfterCommand) this.lifecycleAction = type === "agent.kill" ? "killing" : "shutting-down";
|
|
651
|
+
else if (type === "agent.restart") this.lifecycleAction = "restarting";
|
|
652
|
+
else this.lifecycleAction = undefined;
|
|
653
|
+
this.publishStatus();
|
|
609
654
|
let providerResult: Record<string, unknown> | void = undefined;
|
|
610
655
|
if (type === "agent.restart") await this.restartProvider();
|
|
611
656
|
else if (type === "agent.reconnect") this.publishStatus();
|
|
@@ -1206,26 +1251,71 @@ export class AgentRunner {
|
|
|
1206
1251
|
if (input.transcriptPath) this.startReasoningTail(input.transcriptPath);
|
|
1207
1252
|
}
|
|
1208
1253
|
|
|
1209
|
-
//
|
|
1210
|
-
//
|
|
1211
|
-
//
|
|
1212
|
-
//
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1254
|
+
// A provider lifecycle hook reported a session boundary (Claude PreCompact / SessionEnd
|
|
1255
|
+
// → control server). Normalize the raw provider reason to a SessionDestroyReason and run
|
|
1256
|
+
// the same pre-destroy seam the bus commands use. `clear`/`compact` continue the session;
|
|
1257
|
+
// anything else (logout, prompt_input_exit, other) is a real termination.
|
|
1258
|
+
private async handleSessionBoundary(input: { reason?: string; transcriptPath?: string }): Promise<void> {
|
|
1259
|
+
const reason = input.reason === "compact" ? "compact"
|
|
1260
|
+
: input.reason === "clear" ? "clear"
|
|
1261
|
+
: "shutdown";
|
|
1262
|
+
await this.runPreSessionDestroy(reason, { transcriptPath: input.transcriptPath });
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
// The pre-session-destroy seam (#183): the single place end-of-session work runs before
|
|
1266
|
+
// an invasive transition (compact/clear/restart/shutdown/kill). Best-effort and
|
|
1267
|
+
// time-boxed so it never hangs teardown; concurrent calls for the same teardown coalesce
|
|
1268
|
+
// (a shutdown bus command and the SessionEnd hook can both fire). During the window the
|
|
1269
|
+
// agent is published non-addressable so the operator sees "wrapping up", not "busy".
|
|
1270
|
+
private runPreSessionDestroy(reason: SessionDestroyReason, opts?: { transcriptPath?: string }): Promise<void> {
|
|
1271
|
+
if (this.preDestroyPromise) return this.preDestroyPromise;
|
|
1272
|
+
const run = (async () => {
|
|
1273
|
+
this.publishFinalizing(reason);
|
|
1274
|
+
try {
|
|
1275
|
+
await Promise.race([
|
|
1276
|
+
this.captureContextRatio(reason, opts),
|
|
1277
|
+
new Promise<void>((resolve) => setTimeout(resolve, PRE_DESTROY_TIMEOUT_MS)),
|
|
1278
|
+
]);
|
|
1279
|
+
} catch (error) {
|
|
1280
|
+
this.sessionLog(`insights: pre-destroy capture failed: ${errMessage(error)}`);
|
|
1281
|
+
}
|
|
1282
|
+
})();
|
|
1283
|
+
this.preDestroyPromise = run;
|
|
1284
|
+
void run.finally(() => { this.preDestroyPromise = undefined; });
|
|
1285
|
+
return run;
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
// Publish the transient pre-destroy state: a non-offline status with ready:false (so the
|
|
1289
|
+
// agent drops out of isAgentOnline fan-out targeting without going "offline") plus a
|
|
1290
|
+
// finalizing-<reason> lifecycleAction the dashboard renders as "wrapping up" with the
|
|
1291
|
+
// composer disabled.
|
|
1292
|
+
private publishFinalizing(reason: SessionDestroyReason): void {
|
|
1293
|
+
this.lifecycleAction = `finalizing-${reason}`;
|
|
1294
|
+
void this.bus.statusAsync({ agentStatus: "busy", ready: false, meta: { lifecycleAction: this.lifecycleAction, lifecycleActionAt: Date.now() } });
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
// Compute the #184 context-gathering ratio for the segment since the last boundary and
|
|
1298
|
+
// queue it (durable outbox, #196). Provider-agnostic: the adapter normalizes its session
|
|
1299
|
+
// into the shared SessionEvent stream; the math + classifier live in session-insights.ts.
|
|
1300
|
+
// Per-segment via a runner-side cursor, so each datapoint is one work chunk between
|
|
1301
|
+
// context resets. Mechanical, model-free → zero agent tokens, un-gameable.
|
|
1302
|
+
private async captureContextRatio(reason: SessionDestroyReason, opts?: { transcriptPath?: string }): Promise<void> {
|
|
1303
|
+
const adapter = this.options.adapter;
|
|
1304
|
+
if (!adapter.collectSessionEvents || !this.process) return;
|
|
1305
|
+
const transcriptPath = opts?.transcriptPath ?? this.lastTranscriptPath;
|
|
1306
|
+
const events = await adapter.collectSessionEvents(this.process, { transcriptPath });
|
|
1307
|
+
if (!events) return;
|
|
1308
|
+
// Reset the cursor when the underlying log changed identity (transcript rotated on
|
|
1309
|
+
// resume) or shrank (Codex buffer trimmed) — otherwise the slice would be wrong.
|
|
1310
|
+
const key = transcriptPath ?? `session:${this.providerSessionId}`;
|
|
1311
|
+
if (key !== this.insightsCursorKey || events.length < this.insightsObserved) {
|
|
1312
|
+
this.insightsCursorKey = key;
|
|
1313
|
+
this.insightsObserved = 0;
|
|
1224
1314
|
}
|
|
1225
|
-
const
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1315
|
+
const segment = events.slice(this.insightsObserved);
|
|
1316
|
+
this.insightsObserved = events.length;
|
|
1317
|
+
const analysis = computeContextRatio(segment);
|
|
1318
|
+
if (!analysis) return; // no tool calls this segment = nothing substantive to measure
|
|
1229
1319
|
this.outbox.enqueue({
|
|
1230
1320
|
kind: "insight",
|
|
1231
1321
|
payload: {
|
|
@@ -1233,12 +1323,12 @@ export class AgentRunner {
|
|
|
1233
1323
|
project: this.options.cwd,
|
|
1234
1324
|
agentId: this.agentId,
|
|
1235
1325
|
signal: "context_ratio",
|
|
1236
|
-
value: { ...analysis.metric,
|
|
1326
|
+
value: { ...analysis.metric, endReason: reason },
|
|
1237
1327
|
outcome: { ...analysis.outcome },
|
|
1238
1328
|
source: "server",
|
|
1239
1329
|
},
|
|
1240
1330
|
});
|
|
1241
|
-
this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering) queued`);
|
|
1331
|
+
this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering, ${reason}) queued`);
|
|
1242
1332
|
}
|
|
1243
1333
|
|
|
1244
1334
|
// Route a provider-emitted session event (Codex app-server) into the chat mirror.
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
// Provider-agnostic core for the #184 context-gathering signal (epic #183).
|
|
2
|
+
//
|
|
3
|
+
// The transcript *format* is provider-specific (Claude JSONL, Codex app-server items,
|
|
4
|
+
// future providers), so each adapter normalizes its session into the same `SessionEvent`
|
|
5
|
+
// stream via `collectSessionEvents`. Everything downstream — the gathering/action
|
|
6
|
+
// classifier and the ratio math — lives here once and is shared, so a tool reclassified
|
|
7
|
+
// for one provider is reclassified for all, and a new provider only implements the
|
|
8
|
+
// normalization.
|
|
9
|
+
//
|
|
10
|
+
// The classifier is model-free and runs in the runner, so it costs zero agent tokens and
|
|
11
|
+
// the agent can't game it.
|
|
12
|
+
|
|
13
|
+
// A normalized, ordered session event. Order is significant: `leadingGather` counts the
|
|
14
|
+
// run of gathering tools before the first action.
|
|
15
|
+
export type SessionEvent =
|
|
16
|
+
// A tool invocation. Gathering-vs-action is decided here by `isGatheringTool(name)`.
|
|
17
|
+
| { type: "tool"; name: string }
|
|
18
|
+
// A failed tool result (paired outcome proxy — failures/workarounds the agent hit).
|
|
19
|
+
| { type: "tool_error" }
|
|
20
|
+
// A real user prompt (paired outcome proxy — more back-and-forth ~ clarification/correction).
|
|
21
|
+
| { type: "user_prompt" }
|
|
22
|
+
// A substantive assistant turn (one that produced text or a tool call).
|
|
23
|
+
| { type: "turn" };
|
|
24
|
+
|
|
25
|
+
// Tools that acquire context without changing anything. Anything not matched here is
|
|
26
|
+
// treated as an action (mutation, execution, or a delegation/direction decision) — Bash
|
|
27
|
+
// counts as an action because it executes (a conservative, documented choice for v0;
|
|
28
|
+
// `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
|
|
29
|
+
const GATHERING_TOOLS = new Set([
|
|
30
|
+
"Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
|
|
31
|
+
]);
|
|
32
|
+
const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
|
|
33
|
+
|
|
34
|
+
export function isGatheringTool(name: string): boolean {
|
|
35
|
+
if (GATHERING_TOOLS.has(name)) return true;
|
|
36
|
+
// MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
|
|
37
|
+
return GATHERING_NAME.test(name);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface ContextRatioMetric {
|
|
41
|
+
/** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
|
|
42
|
+
ratio: number;
|
|
43
|
+
gatheringCalls: number;
|
|
44
|
+
actionCalls: number;
|
|
45
|
+
totalToolCalls: number;
|
|
46
|
+
/** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
|
|
47
|
+
leadingGather: number;
|
|
48
|
+
/** Substantive assistant turns (turns that produced text or a tool call). */
|
|
49
|
+
turns: number;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface SessionOutcomeProxy {
|
|
53
|
+
/** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
|
|
54
|
+
userPrompts: number;
|
|
55
|
+
/** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
|
|
56
|
+
toolErrors: number;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface SessionAnalysis {
|
|
60
|
+
metric: ContextRatioMetric;
|
|
61
|
+
outcome: SessionOutcomeProxy;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Reduce a normalized event stream to the context-gathering ratio plus paired outcome
|
|
66
|
+
* proxies. Returns null when there's nothing substantive to measure (no tool calls) —
|
|
67
|
+
* trivial segments have nothing to learn from and shouldn't pollute the baselines.
|
|
68
|
+
*
|
|
69
|
+
* Per-segment by construction: callers pass only the events since the last capture
|
|
70
|
+
* boundary (compact/clear/restart/shutdown), so each result describes one work chunk.
|
|
71
|
+
*/
|
|
72
|
+
export function computeContextRatio(events: SessionEvent[]): SessionAnalysis | null {
|
|
73
|
+
let gatheringCalls = 0;
|
|
74
|
+
let actionCalls = 0;
|
|
75
|
+
let leadingGather = 0;
|
|
76
|
+
let sawAction = false;
|
|
77
|
+
let userPrompts = 0;
|
|
78
|
+
let toolErrors = 0;
|
|
79
|
+
let turns = 0;
|
|
80
|
+
|
|
81
|
+
for (const event of events) {
|
|
82
|
+
switch (event.type) {
|
|
83
|
+
case "user_prompt":
|
|
84
|
+
userPrompts++;
|
|
85
|
+
break;
|
|
86
|
+
case "tool_error":
|
|
87
|
+
toolErrors++;
|
|
88
|
+
break;
|
|
89
|
+
case "turn":
|
|
90
|
+
turns++;
|
|
91
|
+
break;
|
|
92
|
+
case "tool":
|
|
93
|
+
if (isGatheringTool(event.name)) {
|
|
94
|
+
gatheringCalls++;
|
|
95
|
+
if (!sawAction) leadingGather++;
|
|
96
|
+
} else {
|
|
97
|
+
actionCalls++;
|
|
98
|
+
sawAction = true;
|
|
99
|
+
}
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const totalToolCalls = gatheringCalls + actionCalls;
|
|
105
|
+
if (totalToolCalls === 0) return null;
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
metric: {
|
|
109
|
+
ratio: gatheringCalls / totalToolCalls,
|
|
110
|
+
gatheringCalls,
|
|
111
|
+
actionCalls,
|
|
112
|
+
totalToolCalls,
|
|
113
|
+
leadingGather,
|
|
114
|
+
turns,
|
|
115
|
+
},
|
|
116
|
+
outcome: { userPrompts, toolErrors },
|
|
117
|
+
};
|
|
118
|
+
}
|