@inixiative/agent-session 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -0
- package/package.json +24 -0
- package/src/claude-code-session.ts +779 -0
- package/src/codex-session.ts +947 -0
- package/src/harness-session.ts +155 -0
- package/src/index.ts +21 -0
|
@@ -0,0 +1,947 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// CodexSession — long-lived OpenAI Codex CLI process with full event capture
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
//
|
|
5
|
+
// Two implementations of the SAME HarnessSession interface, mirroring
|
|
6
|
+
// ClaudeCodeSession. Both keep one persistent `codex` process alive and stream
|
|
7
|
+
// turns over a JSON-RPC channel on stdin/stdout, classifying every event into
|
|
8
|
+
// the shared SessionEvent taxonomy and resolving a turn on completion.
|
|
9
|
+
//
|
|
10
|
+
// CodexMcpSession (default) — `codex mcp-server` (stdio MCP JSON-RPC)
|
|
11
|
+
// CodexAppServerSession (experimental) — `codex app-server` (WebSocket JSON-RPC)
|
|
12
|
+
//
|
|
13
|
+
// Both DISABLE codex's own approvals + sandbox (the union of
|
|
14
|
+
// `--dangerously-bypass-approvals-and-sandbox`) so OUR container is the only
|
|
15
|
+
// jail — identical intent to ClaudeCodeSession's `bypassPermissions`.
|
|
16
|
+
//
|
|
17
|
+
// Architecture (mirrors claude-code-session.ts):
|
|
18
|
+
// start() → spawn one process, start background stdout reader, MCP handshake
|
|
19
|
+
// send() → JSON-RPC tools/call (codex / codex-reply), resolve on completion
|
|
20
|
+
// fork() → new (unstarted) session resuming from the captured threadId
|
|
21
|
+
// kill() → close stdin, kill process
|
|
22
|
+
//
|
|
23
|
+
// codex's native "session ID" is the threadId returned by the `codex` tool's
|
|
24
|
+
// structuredContent — captured into externalSessionId and used for multi-turn
|
|
25
|
+
// (`codex-reply`) and fork.
|
|
26
|
+
//
|
|
27
|
+
// Verified against codex 0.140.
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
import type {
|
|
31
|
+
BeforeSendHook,
|
|
32
|
+
HarnessSession,
|
|
33
|
+
SessionEvent,
|
|
34
|
+
SessionEventHandler,
|
|
35
|
+
SessionResult,
|
|
36
|
+
SessionArtifact,
|
|
37
|
+
} from "./harness-session";
|
|
38
|
+
|
|
39
|
+
// Re-export types so importers can stay on one path (mirrors claude-code-session).
|
|
40
|
+
export type {
|
|
41
|
+
SessionEvent,
|
|
42
|
+
SessionEventKind,
|
|
43
|
+
SessionResult,
|
|
44
|
+
SessionArtifact,
|
|
45
|
+
} from "./harness-session";
|
|
46
|
+
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// Shared subprocess shape (mirrors ClaudeCodeSession.PipedSubprocess)
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
/** Concrete type for Bun.spawn with all pipes — also the shape tests mock. */
|
|
52
|
+
export interface PipedSubprocess {
|
|
53
|
+
stdin: { write(data: string): void; flush(): void; end(): void };
|
|
54
|
+
stdout: ReadableStream<Uint8Array>;
|
|
55
|
+
stderr: ReadableStream<Uint8Array>;
|
|
56
|
+
exited: Promise<number>;
|
|
57
|
+
kill(): void;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export type CodexSpawn = (
|
|
61
|
+
cmd: string[],
|
|
62
|
+
opts: { cwd: string; env: Record<string, string | undefined> },
|
|
63
|
+
) => PipedSubprocess;
|
|
64
|
+
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
// Configuration (mirrors ClaudeCodeSessionConfig)
|
|
67
|
+
// ---------------------------------------------------------------------------
|
|
68
|
+
|
|
69
|
+
export interface CodexSessionConfig {
|
|
70
|
+
/** Path to codex CLI binary. Defaults to "codex". */
|
|
71
|
+
bin?: string;
|
|
72
|
+
/** Model. Defaults to "gpt-5.5". */
|
|
73
|
+
model?: string;
|
|
74
|
+
/**
|
|
75
|
+
* Reasoning-effort level (`model_reasoning_effort`). One of
|
|
76
|
+
* minimal|low|medium|high|xhigh. Omitted → codex's default. Recorded per run.
|
|
77
|
+
*/
|
|
78
|
+
effort?: string;
|
|
79
|
+
/** Working directory for the session. */
|
|
80
|
+
cwd?: string;
|
|
81
|
+
/** Default per-send timeout in ms. Defaults to 600000 (10 min). */
|
|
82
|
+
timeout?: number;
|
|
83
|
+
/**
|
|
84
|
+
* Base context to pre-load. Codex has no `--append-system-prompt`; we prepend
|
|
85
|
+
* it to the first turn's prompt (and keep it for fork). Persists logically for
|
|
86
|
+
* the session via the continued thread.
|
|
87
|
+
*/
|
|
88
|
+
baseContext?: string;
|
|
89
|
+
/**
|
|
90
|
+
* codex's native thread ID (the value the `codex` tool returns). When set, the
|
|
91
|
+
* first send() continues that thread via `codex-reply` — used for fork and
|
|
92
|
+
* crash recovery. Also set by a SessionAdapter resuming a mapped Foundry thread.
|
|
93
|
+
*/
|
|
94
|
+
externalSessionId?: string;
|
|
95
|
+
/**
|
|
96
|
+
* Override for the process spawner. Defaults to Bun.spawn. Tests inject a fake
|
|
97
|
+
* subprocess that emulates the codex JSON-RPC protocol; the docker-spawn helper
|
|
98
|
+
* wraps the CLI in `docker run`.
|
|
99
|
+
*/
|
|
100
|
+
spawn?: CodexSpawn;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
// Internal turn queue entry (identical shape to ClaudeCodeSession)
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
interface QueuedTurn {
|
|
108
|
+
message: string;
|
|
109
|
+
timeout: number;
|
|
110
|
+
resolve: (result: SessionResult) => void;
|
|
111
|
+
reject: (error: Error) => void;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// codex reasoning-effort levels (model_reasoning_effort), low→high.
|
|
115
|
+
const VALID_EFFORTS = ["minimal", "low", "medium", "high", "xhigh"];
|
|
116
|
+
|
|
117
|
+
// ---------------------------------------------------------------------------
|
|
118
|
+
// Base class — shared queue, event log, classification helpers, lifecycle
|
|
119
|
+
// ---------------------------------------------------------------------------
|
|
120
|
+
//
|
|
121
|
+
// CodexMcpSession and CodexAppServerSession differ only in the wire protocol
|
|
122
|
+
// (how start() handshakes, how a turn is sent, and how a raw message maps to
|
|
123
|
+
// SessionEvents). Everything else — the turn queue, token accounting, the
|
|
124
|
+
// stdout read loop, event emission, fork, artifact — is shared here, exactly as
|
|
125
|
+
// ClaudeCodeSession structures it.
|
|
126
|
+
|
|
127
|
+
abstract class BaseCodexSession implements HarnessSession {
|
|
128
|
+
// -- Config --
|
|
129
|
+
protected _bin: string;
|
|
130
|
+
protected _model: string;
|
|
131
|
+
protected _effort?: string;
|
|
132
|
+
protected _cwd: string;
|
|
133
|
+
protected _defaultTimeout: number;
|
|
134
|
+
protected _baseContext?: string;
|
|
135
|
+
protected _spawn?: CodexSpawn;
|
|
136
|
+
|
|
137
|
+
// -- Process --
|
|
138
|
+
protected _proc: PipedSubprocess | null = null;
|
|
139
|
+
protected _stderr = "";
|
|
140
|
+
|
|
141
|
+
// -- Session state --
|
|
142
|
+
protected _externalSessionId?: string;
|
|
143
|
+
protected _alive = false;
|
|
144
|
+
protected _eventLog: SessionEvent[] = [];
|
|
145
|
+
protected _handlers: SessionEventHandler[] = [];
|
|
146
|
+
protected _beforeSendHooks: BeforeSendHook[] = [];
|
|
147
|
+
protected _turns = 0;
|
|
148
|
+
protected _totalTokens = { input: 0, output: 0 };
|
|
149
|
+
protected _startedAt: number;
|
|
150
|
+
/** Whether baseContext has been prepended onto a sent turn yet. */
|
|
151
|
+
protected _injectedBaseContext = false;
|
|
152
|
+
/** Whether this session was created via fork() (continues a thread). */
|
|
153
|
+
protected _forking = false;
|
|
154
|
+
|
|
155
|
+
// -- JSON-RPC --
|
|
156
|
+
/** Monotonic JSON-RPC request id. */
|
|
157
|
+
protected _rpcId = 0;
|
|
158
|
+
/** Pending JSON-RPC responses, keyed by request id. */
|
|
159
|
+
protected _pending = new Map<
|
|
160
|
+
number,
|
|
161
|
+
{ resolve: (result: unknown) => void; reject: (e: Error) => void }
|
|
162
|
+
>();
|
|
163
|
+
|
|
164
|
+
// -- Turn queue (identical to ClaudeCodeSession) --
|
|
165
|
+
protected _queue: QueuedTurn[] = [];
|
|
166
|
+
protected _inflight: QueuedTurn | null = null;
|
|
167
|
+
protected _turnEvents: SessionEvent[] = [];
|
|
168
|
+
protected _resultText = "";
|
|
169
|
+
protected _turnTimer: ReturnType<typeof setTimeout> | null = null;
|
|
170
|
+
|
|
171
|
+
constructor(config?: CodexSessionConfig) {
|
|
172
|
+
const bin = config?.bin ?? "codex";
|
|
173
|
+
if (!/^[a-zA-Z0-9_.\/\\-]+$/.test(bin)) {
|
|
174
|
+
throw new Error(`Invalid codex CLI binary path: "${bin}"`);
|
|
175
|
+
}
|
|
176
|
+
if (config?.effort && !VALID_EFFORTS.includes(config.effort)) {
|
|
177
|
+
throw new Error(
|
|
178
|
+
`Invalid codex effort "${config.effort}". Valid: ${VALID_EFFORTS.join(", ")}`,
|
|
179
|
+
);
|
|
180
|
+
}
|
|
181
|
+
this._bin = bin;
|
|
182
|
+
this._model = config?.model ?? "gpt-5.5";
|
|
183
|
+
this._effort = config?.effort;
|
|
184
|
+
this._cwd = config?.cwd ?? process.cwd();
|
|
185
|
+
this._defaultTimeout = config?.timeout ?? 600_000;
|
|
186
|
+
this._baseContext = config?.baseContext;
|
|
187
|
+
this._externalSessionId = config?.externalSessionId;
|
|
188
|
+
this._spawn = config?.spawn;
|
|
189
|
+
this._startedAt = Date.now();
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// ---------------------------------------------------------------------------
|
|
193
|
+
// Accessors (identical to ClaudeCodeSession)
|
|
194
|
+
// ---------------------------------------------------------------------------
|
|
195
|
+
|
|
196
|
+
get alive(): boolean { return this._alive; }
|
|
197
|
+
get externalSessionId(): string | undefined { return this._externalSessionId; }
|
|
198
|
+
get events(): readonly SessionEvent[] { return this._eventLog; }
|
|
199
|
+
get turns(): number { return this._turns; }
|
|
200
|
+
get totalTokens(): Readonly<{ input: number; output: number }> {
|
|
201
|
+
return { ...this._totalTokens };
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// ---------------------------------------------------------------------------
|
|
205
|
+
// Event subscription (identical to ClaudeCodeSession)
|
|
206
|
+
// ---------------------------------------------------------------------------
|
|
207
|
+
|
|
208
|
+
onEvent(handler: SessionEventHandler): () => void {
|
|
209
|
+
this._handlers.push(handler);
|
|
210
|
+
return () => {
|
|
211
|
+
const idx = this._handlers.indexOf(handler);
|
|
212
|
+
if (idx !== -1) this._handlers.splice(idx, 1);
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
onBeforeSend(hook: BeforeSendHook): () => void {
|
|
217
|
+
this._beforeSendHooks.push(hook);
|
|
218
|
+
return () => {
|
|
219
|
+
const idx = this._beforeSendHooks.indexOf(hook);
|
|
220
|
+
if (idx !== -1) this._beforeSendHooks.splice(idx, 1);
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Mid-turn push. Like Claude Code's stream-json stdin, codex's tools/call has
|
|
226
|
+
* no out-of-band signal channel mid-turn; we emit a "push_ignored" error event
|
|
227
|
+
* so callers observe the attempt without the model seeing the payload until the
|
|
228
|
+
* next turn.
|
|
229
|
+
*/
|
|
230
|
+
async push(payload: { kind: string; text: string }): Promise<void> {
|
|
231
|
+
this._emit({
|
|
232
|
+
kind: "error",
|
|
233
|
+
timestamp: Date.now(),
|
|
234
|
+
text: `push_ignored: kind=${payload.kind} — codex turn has no OOB channel`,
|
|
235
|
+
raw: payload,
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// ---------------------------------------------------------------------------
|
|
240
|
+
// interrupt / kill / artifact (identical to ClaudeCodeSession)
|
|
241
|
+
// ---------------------------------------------------------------------------
|
|
242
|
+
|
|
243
|
+
interrupt(): void {
|
|
244
|
+
if (!this._inflight) return;
|
|
245
|
+
this._rejectInflight(new Error("Turn interrupted"));
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
kill(): void {
|
|
249
|
+
if (!this._proc) return;
|
|
250
|
+
this._alive = false;
|
|
251
|
+
|
|
252
|
+
if (this._turnTimer) {
|
|
253
|
+
clearTimeout(this._turnTimer);
|
|
254
|
+
this._turnTimer = null;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
this._rejectInflight(new Error("Session killed"));
|
|
258
|
+
this._rejectQueue(new Error("Session killed"));
|
|
259
|
+
for (const p of this._pending.values()) p.reject(new Error("Session killed"));
|
|
260
|
+
this._pending.clear();
|
|
261
|
+
|
|
262
|
+
try { this._proc.stdin.end(); } catch { /* already closed */ }
|
|
263
|
+
try { this._proc.kill(); } catch { /* already dead */ }
|
|
264
|
+
this._proc = null;
|
|
265
|
+
|
|
266
|
+
this._emit({ kind: "session_end", timestamp: Date.now() });
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
artifact(): SessionArtifact {
|
|
270
|
+
return {
|
|
271
|
+
externalSessionId: this._externalSessionId,
|
|
272
|
+
events: [...this._eventLog],
|
|
273
|
+
startedAt: this._startedAt,
|
|
274
|
+
endedAt: this._alive ? undefined : Date.now(),
|
|
275
|
+
turns: this._turns,
|
|
276
|
+
totalTokens: { ...this._totalTokens },
|
|
277
|
+
toolCalls: this._eventLog.filter((e) => e.kind === "tool_use").length,
|
|
278
|
+
toolResults: this._eventLog.filter((e) => e.kind === "tool_result").length,
|
|
279
|
+
errors: this._eventLog.filter((e) => e.kind === "error").length,
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// ---------------------------------------------------------------------------
|
|
284
|
+
// start() — spawn the persistent process + handshake (subclass-specific)
|
|
285
|
+
// ---------------------------------------------------------------------------
|
|
286
|
+
|
|
287
|
+
async start(): Promise<void> {
|
|
288
|
+
if (this._proc) throw new Error("Session already started");
|
|
289
|
+
|
|
290
|
+
const args = this._buildSpawnArgs();
|
|
291
|
+
const env: Record<string, string | undefined> = {
|
|
292
|
+
...process.env,
|
|
293
|
+
DISABLE_AUTOUPDATER: "1",
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
if (this._spawn) {
|
|
297
|
+
this._proc = this._spawn([this._bin, ...args], { cwd: this._cwd, env });
|
|
298
|
+
} else {
|
|
299
|
+
this._proc = Bun.spawn([this._bin, ...args], {
|
|
300
|
+
cwd: this._cwd,
|
|
301
|
+
stdin: "pipe",
|
|
302
|
+
stdout: "pipe",
|
|
303
|
+
stderr: "pipe",
|
|
304
|
+
env,
|
|
305
|
+
}) as unknown as PipedSubprocess;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
this._alive = true;
|
|
309
|
+
this._emit({ kind: "session_start", timestamp: Date.now() });
|
|
310
|
+
|
|
311
|
+
this._readStdout();
|
|
312
|
+
this._readStderr();
|
|
313
|
+
|
|
314
|
+
this._proc.exited.then((code) => {
|
|
315
|
+
if (!this._alive) return;
|
|
316
|
+
this._alive = false;
|
|
317
|
+
const errMsg = this._stderr.trim()
|
|
318
|
+
? `Process exited (code ${code}): ${this._stderr.trim().slice(0, 500)}`
|
|
319
|
+
: `Process exited with code ${code}`;
|
|
320
|
+
this._rejectInflight(new Error(errMsg));
|
|
321
|
+
this._rejectQueue(new Error("Session ended"));
|
|
322
|
+
for (const p of this._pending.values()) p.reject(new Error("Session ended"));
|
|
323
|
+
this._pending.clear();
|
|
324
|
+
this._emit({ kind: "session_end", timestamp: Date.now() });
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
await this._handshake();
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// ---------------------------------------------------------------------------
|
|
331
|
+
// send() — queue a turn, resolve on completion (identical control flow)
|
|
332
|
+
// ---------------------------------------------------------------------------
|
|
333
|
+
|
|
334
|
+
async send(
|
|
335
|
+
message: string,
|
|
336
|
+
opts?: { timeout?: number },
|
|
337
|
+
): Promise<SessionResult> {
|
|
338
|
+
if (!this._proc && this._externalSessionId) {
|
|
339
|
+
await this.start();
|
|
340
|
+
}
|
|
341
|
+
if (!this._proc) throw new Error("Session not started — call start() first");
|
|
342
|
+
if (!this._alive) throw new Error("Session ended");
|
|
343
|
+
|
|
344
|
+
const timeout = opts?.timeout ?? this._defaultTimeout;
|
|
345
|
+
|
|
346
|
+
let transformed = message;
|
|
347
|
+
for (const hook of this._beforeSendHooks) {
|
|
348
|
+
transformed = await hook(transformed);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Prepend baseContext onto the first turn (codex has no system-prompt flag).
|
|
352
|
+
if (this._baseContext && !this._injectedBaseContext) {
|
|
353
|
+
transformed = `${this._baseContext}\n\n${transformed}`;
|
|
354
|
+
this._injectedBaseContext = true;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
return new Promise<SessionResult>((resolve, reject) => {
|
|
358
|
+
const turn: QueuedTurn = { message: transformed, timeout, resolve, reject };
|
|
359
|
+
if (!this._inflight) {
|
|
360
|
+
this._dispatchTurn(turn);
|
|
361
|
+
} else {
|
|
362
|
+
this._queue.push(turn);
|
|
363
|
+
}
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// ---------------------------------------------------------------------------
|
|
368
|
+
// Private — turn dispatch + queue (mirrors ClaudeCodeSession)
|
|
369
|
+
// ---------------------------------------------------------------------------
|
|
370
|
+
|
|
371
|
+
private _dispatchTurn(turn: QueuedTurn): void {
|
|
372
|
+
this._inflight = turn;
|
|
373
|
+
this._turnEvents = [];
|
|
374
|
+
this._resultText = "";
|
|
375
|
+
|
|
376
|
+
// Send via the subclass's wire protocol. The returned promise resolves when
|
|
377
|
+
// the tools/call (or turn) completes; that resolves the turn.
|
|
378
|
+
this._sendTurn(turn.message)
|
|
379
|
+
.then((tokens) => {
|
|
380
|
+
if (this._inflight !== turn) return; // interrupted / timed out
|
|
381
|
+
if (tokens) {
|
|
382
|
+
this._totalTokens.input += tokens.input;
|
|
383
|
+
this._totalTokens.output += tokens.output;
|
|
384
|
+
}
|
|
385
|
+
this._resolveTurn(tokens);
|
|
386
|
+
})
|
|
387
|
+
.catch((err: Error) => {
|
|
388
|
+
if (this._inflight !== turn) return;
|
|
389
|
+
this._rejectInflight(err);
|
|
390
|
+
this._processNextTurn();
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
if (turn.timeout > 0) {
|
|
394
|
+
this._turnTimer = setTimeout(() => {
|
|
395
|
+
this._turnTimer = null;
|
|
396
|
+
this._rejectInflight(new Error(`Turn timed out after ${turn.timeout}ms`));
|
|
397
|
+
}, turn.timeout);
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
private _resolveTurn(tokens?: { input: number; output: number }): void {
|
|
402
|
+
if (!this._inflight) return;
|
|
403
|
+
|
|
404
|
+
if (this._turnTimer) {
|
|
405
|
+
clearTimeout(this._turnTimer);
|
|
406
|
+
this._turnTimer = null;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
this._turns++;
|
|
410
|
+
const result: SessionResult = {
|
|
411
|
+
content: this._resultText,
|
|
412
|
+
events: [...this._turnEvents],
|
|
413
|
+
tokens: tokens ?? this._turnEvents.find((e) => e.tokens)?.tokens,
|
|
414
|
+
externalSessionId: this._externalSessionId,
|
|
415
|
+
};
|
|
416
|
+
this._inflight.resolve(result);
|
|
417
|
+
this._inflight = null;
|
|
418
|
+
|
|
419
|
+
this._processNextTurn();
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
private _processNextTurn(): void {
|
|
423
|
+
if (this._queue.length > 0 && this._alive) {
|
|
424
|
+
const next = this._queue.shift()!;
|
|
425
|
+
this._dispatchTurn(next);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
protected _rejectInflight(err: Error): void {
|
|
430
|
+
if (!this._inflight) return;
|
|
431
|
+
if (this._turnTimer) {
|
|
432
|
+
clearTimeout(this._turnTimer);
|
|
433
|
+
this._turnTimer = null;
|
|
434
|
+
}
|
|
435
|
+
this._inflight.reject(err);
|
|
436
|
+
this._inflight = null;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
protected _rejectQueue(err: Error): void {
|
|
440
|
+
for (const turn of this._queue) turn.reject(err);
|
|
441
|
+
this._queue = [];
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// ---------------------------------------------------------------------------
|
|
445
|
+
// Private — stdout/stderr readers (mirrors ClaudeCodeSession)
|
|
446
|
+
// ---------------------------------------------------------------------------
|
|
447
|
+
|
|
448
|
+
private async _readStdout(): Promise<void> {
|
|
449
|
+
const reader = this._proc!.stdout.getReader();
|
|
450
|
+
const decoder = new TextDecoder();
|
|
451
|
+
let buffer = "";
|
|
452
|
+
try {
|
|
453
|
+
while (true) {
|
|
454
|
+
const { done, value } = await reader.read();
|
|
455
|
+
if (done) break;
|
|
456
|
+
buffer += decoder.decode(value, { stream: true });
|
|
457
|
+
const lines = buffer.split("\n");
|
|
458
|
+
buffer = lines.pop()!;
|
|
459
|
+
for (const line of lines) {
|
|
460
|
+
if (!line.trim()) continue;
|
|
461
|
+
this._processLine(line);
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
if (buffer.trim()) this._processLine(buffer);
|
|
465
|
+
} catch (err) {
|
|
466
|
+
this._rejectInflight(err as Error);
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
private async _readStderr(): Promise<void> {
|
|
471
|
+
const reader = this._proc!.stderr.getReader();
|
|
472
|
+
const decoder = new TextDecoder();
|
|
473
|
+
try {
|
|
474
|
+
while (true) {
|
|
475
|
+
const { done, value } = await reader.read();
|
|
476
|
+
if (done) break;
|
|
477
|
+
this._stderr += decoder.decode(value, { stream: true });
|
|
478
|
+
}
|
|
479
|
+
} catch { /* ignore */ }
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
// ---------------------------------------------------------------------------
|
|
483
|
+
// Private — JSON-RPC line processor (shared envelope; payload is per-protocol)
|
|
484
|
+
// ---------------------------------------------------------------------------
|
|
485
|
+
|
|
486
|
+
private _processLine(line: string): void {
|
|
487
|
+
let msg: unknown;
|
|
488
|
+
try {
|
|
489
|
+
msg = JSON.parse(line);
|
|
490
|
+
} catch {
|
|
491
|
+
return;
|
|
492
|
+
}
|
|
493
|
+
const raw = msg as Record<string, unknown>;
|
|
494
|
+
|
|
495
|
+
// JSON-RPC response to one of our requests (has matching `id` + result/error).
|
|
496
|
+
if (typeof raw.id === "number" && (("result" in raw) || ("error" in raw))) {
|
|
497
|
+
const pending = this._pending.get(raw.id);
|
|
498
|
+
if (pending) {
|
|
499
|
+
this._pending.delete(raw.id);
|
|
500
|
+
if ("error" in raw && raw.error) {
|
|
501
|
+
const e = raw.error as Record<string, unknown>;
|
|
502
|
+
pending.reject(new Error((e.message as string) ?? JSON.stringify(e)));
|
|
503
|
+
} else {
|
|
504
|
+
pending.resolve(raw.result);
|
|
505
|
+
}
|
|
506
|
+
return;
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Otherwise it's a notification (streamed event) — classify it.
|
|
511
|
+
const classified = this._classify(raw);
|
|
512
|
+
for (const event of classified) {
|
|
513
|
+
this._emit(event);
|
|
514
|
+
this._turnEvents.push(event);
|
|
515
|
+
if (event.kind === "result" || event.kind === "text") {
|
|
516
|
+
if (event.text) this._resultText = event.text;
|
|
517
|
+
}
|
|
518
|
+
if (event.tokens) {
|
|
519
|
+
this._totalTokens.input += event.tokens.input;
|
|
520
|
+
this._totalTokens.output += event.tokens.output;
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
// ---------------------------------------------------------------------------
|
|
526
|
+
// Private — emit (identical to ClaudeCodeSession)
|
|
527
|
+
// ---------------------------------------------------------------------------
|
|
528
|
+
|
|
529
|
+
protected _emit(event: SessionEvent): void {
|
|
530
|
+
this._eventLog.push(event);
|
|
531
|
+
for (const handler of this._handlers) {
|
|
532
|
+
try {
|
|
533
|
+
handler(event);
|
|
534
|
+
} catch (err) {
|
|
535
|
+
console.warn(
|
|
536
|
+
`[${this.constructor.name}] handler error:`,
|
|
537
|
+
(err as Error).message,
|
|
538
|
+
);
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// ---------------------------------------------------------------------------
|
|
544
|
+
// Private — JSON-RPC request helper (resolves on the matching response)
|
|
545
|
+
// ---------------------------------------------------------------------------
|
|
546
|
+
|
|
547
|
+
protected _rpcRequest(method: string, params?: unknown): Promise<unknown> {
|
|
548
|
+
const id = ++this._rpcId;
|
|
549
|
+
const payload = JSON.stringify({ jsonrpc: "2.0", id, method, params }) + "\n";
|
|
550
|
+
return new Promise<unknown>((resolve, reject) => {
|
|
551
|
+
this._pending.set(id, { resolve, reject });
|
|
552
|
+
try {
|
|
553
|
+
this._proc!.stdin.write(payload);
|
|
554
|
+
this._proc!.stdin.flush();
|
|
555
|
+
} catch (err) {
|
|
556
|
+
this._pending.delete(id);
|
|
557
|
+
reject(err as Error);
|
|
558
|
+
}
|
|
559
|
+
});
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
protected _rpcNotify(method: string, params?: unknown): void {
|
|
563
|
+
const payload = JSON.stringify({ jsonrpc: "2.0", method, params }) + "\n";
|
|
564
|
+
this._proc!.stdin.write(payload);
|
|
565
|
+
this._proc!.stdin.flush();
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
// ---------------------------------------------------------------------------
|
|
569
|
+
// Subclass hooks — the only protocol-specific surface
|
|
570
|
+
// ---------------------------------------------------------------------------
|
|
571
|
+
|
|
572
|
+
/** CLI args for the persistent process. */
|
|
573
|
+
protected abstract _buildSpawnArgs(): string[];
|
|
574
|
+
/** Protocol handshake after spawn (initialize / initialized / etc.). */
|
|
575
|
+
protected abstract _handshake(): Promise<void>;
|
|
576
|
+
/** Send one turn; resolve with token usage when the turn completes. */
|
|
577
|
+
protected abstract _sendTurn(
|
|
578
|
+
message: string,
|
|
579
|
+
): Promise<{ input: number; output: number } | undefined>;
|
|
580
|
+
/** Map a streamed notification to SessionEvents. */
|
|
581
|
+
protected abstract _classify(msg: Record<string, unknown>): SessionEvent[];
|
|
582
|
+
|
|
583
|
+
/** Shared fork constructor — subclass passes its own ctor. */
|
|
584
|
+
fork(opts?: { cwd?: string; baseContext?: string }): HarnessSession {
|
|
585
|
+
if (!this._externalSessionId) {
|
|
586
|
+
throw new Error(
|
|
587
|
+
"Cannot fork — no thread ID yet (send at least one message first)",
|
|
588
|
+
);
|
|
589
|
+
}
|
|
590
|
+
const Ctor = this.constructor as new (c?: CodexSessionConfig) => BaseCodexSession;
|
|
591
|
+
const forked = new Ctor({
|
|
592
|
+
bin: this._bin,
|
|
593
|
+
model: this._model,
|
|
594
|
+
effort: this._effort,
|
|
595
|
+
cwd: opts?.cwd ?? this._cwd,
|
|
596
|
+
timeout: this._defaultTimeout,
|
|
597
|
+
baseContext: opts?.baseContext ?? this._baseContext,
|
|
598
|
+
externalSessionId: this._externalSessionId,
|
|
599
|
+
spawn: this._spawn,
|
|
600
|
+
});
|
|
601
|
+
forked._forking = true;
|
|
602
|
+
return forked;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// ---------------------------------------------------------------------------
|
|
607
|
+
// CodexMcpSession (DEFAULT) — `codex mcp-server` over stdio MCP JSON-RPC
|
|
608
|
+
// ---------------------------------------------------------------------------
|
|
609
|
+
//
|
|
610
|
+
// Handshake: initialize → notifications/initialized → tools/list.
|
|
611
|
+
// A turn is an MCP `tools/call`:
|
|
612
|
+
// - first turn → tool "codex" (params: prompt, model, cwd, ...)
|
|
613
|
+
// - subsequent turns → tool "codex-reply" (params: prompt, threadId)
|
|
614
|
+
// During the call, codex streams `codex/event` notifications (agent message,
|
|
615
|
+
// reasoning, command execution, token usage) — mapped to SessionEvents. The
|
|
616
|
+
// threadId comes back in the call result's structuredContent (captured for
|
|
617
|
+
// multi-turn + fork).
|
|
618
|
+
|
|
619
|
+
export class CodexMcpSession extends BaseCodexSession {
|
|
620
|
+
protected _buildSpawnArgs(): string[] {
|
|
621
|
+
// Disable codex's own approvals + sandbox (the union of
|
|
622
|
+
// --dangerously-bypass-approvals-and-sandbox) so OUR container is the jail.
|
|
623
|
+
// model_reasoning_effort sets the effort level; passed as TOML-ish `-c` values.
|
|
624
|
+
const args = [
|
|
625
|
+
"mcp-server",
|
|
626
|
+
"-c", `sandbox_mode="danger-full-access"`,
|
|
627
|
+
"-c", `approval_policy="never"`,
|
|
628
|
+
];
|
|
629
|
+
if (this._effort) {
|
|
630
|
+
args.push("-c", `model_reasoning_effort="${this._effort}"`);
|
|
631
|
+
}
|
|
632
|
+
return args;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
protected async _handshake(): Promise<void> {
|
|
636
|
+
await this._rpcRequest("initialize", {
|
|
637
|
+
protocolVersion: "2025-06-18",
|
|
638
|
+
capabilities: {},
|
|
639
|
+
clientInfo: { name: "inixiative-bench", version: "0.1.0" },
|
|
640
|
+
});
|
|
641
|
+
this._rpcNotify("notifications/initialized");
|
|
642
|
+
// tools/list confirms the `codex` + `codex-reply` tools are present.
|
|
643
|
+
await this._rpcRequest("tools/list", {});
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
protected async _sendTurn(
|
|
647
|
+
message: string,
|
|
648
|
+
): Promise<{ input: number; output: number } | undefined> {
|
|
649
|
+
const isReply = this._externalSessionId !== undefined;
|
|
650
|
+
const name = isReply ? "codex-reply" : "codex";
|
|
651
|
+
const args: Record<string, unknown> = isReply
|
|
652
|
+
? { prompt: message, threadId: this._externalSessionId }
|
|
653
|
+
: {
|
|
654
|
+
prompt: message,
|
|
655
|
+
model: this._model,
|
|
656
|
+
cwd: this._cwd,
|
|
657
|
+
// Belt-and-suspenders: also disable per-call (matches spawn `-c` flags).
|
|
658
|
+
sandbox: "danger-full-access",
|
|
659
|
+
"approval-policy": "never",
|
|
660
|
+
...(this._effort
|
|
661
|
+
? { config: { model_reasoning_effort: this._effort } }
|
|
662
|
+
: {}),
|
|
663
|
+
};
|
|
664
|
+
|
|
665
|
+
const result = (await this._rpcRequest("tools/call", {
|
|
666
|
+
name,
|
|
667
|
+
arguments: args,
|
|
668
|
+
})) as Record<string, unknown> | undefined;
|
|
669
|
+
|
|
670
|
+
// Capture the threadId for multi-turn (codex-reply) + fork.
|
|
671
|
+
const structured = result?.structuredContent as
|
|
672
|
+
| Record<string, unknown>
|
|
673
|
+
| undefined;
|
|
674
|
+
const threadId = structured?.threadId as string | undefined;
|
|
675
|
+
if (threadId && !this._externalSessionId) {
|
|
676
|
+
this._externalSessionId = threadId;
|
|
677
|
+
}
|
|
678
|
+
// Final text: prefer structuredContent.content, else the tool result content.
|
|
679
|
+
const finalText =
|
|
680
|
+
(structured?.content as string | undefined) ??
|
|
681
|
+
this._extractToolText(result?.content);
|
|
682
|
+
if (finalText) this._resultText = finalText;
|
|
683
|
+
|
|
684
|
+
// Usage, when the call result reports it. Streamed token_count events are
|
|
685
|
+
// already accounted by the base loop; the call result is the authoritative
|
|
686
|
+
// turn total, returned to _dispatchTurn for the SessionResult. To avoid
|
|
687
|
+
// double-counting against streamed events, we only return it if no streamed
|
|
688
|
+
// token event was seen this turn.
|
|
689
|
+
const sawStreamedTokens = this._turnEvents.some((e) => e.tokens);
|
|
690
|
+
const usage = structured?.usage as Record<string, number> | undefined;
|
|
691
|
+
if (usage && !sawStreamedTokens) {
|
|
692
|
+
return {
|
|
693
|
+
input: (usage.input_tokens ?? usage.inputTokens ?? 0) as number,
|
|
694
|
+
output: (usage.output_tokens ?? usage.outputTokens ?? 0) as number,
|
|
695
|
+
};
|
|
696
|
+
}
|
|
697
|
+
return undefined;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
private _extractToolText(content: unknown): string | undefined {
|
|
701
|
+
if (!Array.isArray(content)) return undefined;
|
|
702
|
+
const parts: string[] = [];
|
|
703
|
+
for (const block of content) {
|
|
704
|
+
const b = block as Record<string, unknown>;
|
|
705
|
+
if (b.type === "text" && typeof b.text === "string") parts.push(b.text);
|
|
706
|
+
}
|
|
707
|
+
return parts.length ? parts.join("\n") : undefined;
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
protected _classify(msg: Record<string, unknown>): SessionEvent[] {
|
|
711
|
+
// Streamed events arrive as JSON-RPC notifications: method "codex/event"
|
|
712
|
+
// with params carrying a `msg` of a tagged type.
|
|
713
|
+
if (msg.method !== "codex/event") return [];
|
|
714
|
+
const params = msg.params as Record<string, unknown> | undefined;
|
|
715
|
+
const ev = (params?.msg ?? params) as Record<string, unknown> | undefined;
|
|
716
|
+
if (!ev) return [];
|
|
717
|
+
return classifyCodexEvent(ev, "type");
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
// ---------------------------------------------------------------------------
|
|
722
|
+
// CodexAppServerSession (EXPERIMENTAL) — `codex app-server` over WebSocket
|
|
723
|
+
// ---------------------------------------------------------------------------
|
|
724
|
+
//
|
|
725
|
+
// EXPERIMENTAL: the app-server protocol is newer and less battle-tested than
|
|
726
|
+
// mcp-server. Prefer CodexMcpSession unless you specifically need app-server.
|
|
727
|
+
//
|
|
728
|
+
// `codex app-server --listen ws://127.0.0.1:<port>` — localhost needs no auth
|
|
729
|
+
// token. JSON-RPC 2.0 with slash-delimited methods:
|
|
730
|
+
// initialize → initialized → thread/start (returns thread.id) → turn/start
|
|
731
|
+
// Consume turn/started, item/started, item/completed, turn/completed
|
|
732
|
+
// notifications (ThreadItem types agentMessage / reasoning / commandExecution;
|
|
733
|
+
// ThreadTokenUsage). Mapped to SessionEvents.
|
|
734
|
+
//
|
|
735
|
+
// We connect over the spawned process's stdin/stdout JSON-RPC for parity with
|
|
736
|
+
// the rest of the harness (the WebSocket listen address is for external
|
|
737
|
+
// clients; the stdio channel carries the same JSON-RPC frames). The base
|
|
738
|
+
// class's read/write loop is reused unchanged.
|
|
739
|
+
|
|
740
|
+
export class CodexAppServerSession extends BaseCodexSession {
|
|
741
|
+
/** Resolves when the in-flight turn's `turn/completed` arrives. */
|
|
742
|
+
private _turnDone?: {
|
|
743
|
+
resolve: (t: { input: number; output: number } | undefined) => void;
|
|
744
|
+
reject: (e: Error) => void;
|
|
745
|
+
};
|
|
746
|
+
|
|
747
|
+
protected _buildSpawnArgs(): string[] {
|
|
748
|
+
// app-server on localhost needs no auth token. Approvals/sandbox are set
|
|
749
|
+
// per-thread in thread/start (approvalPolicy / sandbox below).
|
|
750
|
+
return ["app-server", "--listen", "ws://127.0.0.1:0"];
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
protected async _handshake(): Promise<void> {
|
|
754
|
+
await this._rpcRequest("initialize", {
|
|
755
|
+
protocolVersion: "2025-06-18",
|
|
756
|
+
capabilities: {},
|
|
757
|
+
clientInfo: { name: "inixiative-bench", version: "0.1.0" },
|
|
758
|
+
});
|
|
759
|
+
this._rpcNotify("initialized");
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
protected async _sendTurn(
|
|
763
|
+
message: string,
|
|
764
|
+
): Promise<{ input: number; output: number } | undefined> {
|
|
765
|
+
// Start (or reuse) a thread. Disable codex's approvals + sandbox so OUR
|
|
766
|
+
// container is the only jail.
|
|
767
|
+
if (!this._externalSessionId) {
|
|
768
|
+
const started = (await this._rpcRequest("thread/start", {
|
|
769
|
+
model: this._model,
|
|
770
|
+
cwd: this._cwd,
|
|
771
|
+
approvalPolicy: "never",
|
|
772
|
+
sandbox: "danger-full-access",
|
|
773
|
+
...(this._effort ? { modelReasoningEffort: this._effort } : {}),
|
|
774
|
+
})) as Record<string, unknown> | undefined;
|
|
775
|
+
const thread = started?.thread as Record<string, unknown> | undefined;
|
|
776
|
+
const id = (thread?.id ?? started?.threadId) as string | undefined;
|
|
777
|
+
if (id) this._externalSessionId = id;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
const completed = new Promise<{ input: number; output: number } | undefined>(
|
|
781
|
+
(resolve, reject) => {
|
|
782
|
+
this._turnDone = { resolve, reject };
|
|
783
|
+
},
|
|
784
|
+
);
|
|
785
|
+
|
|
786
|
+
// turn/start streams turn/started, item/*, turn/completed back as
|
|
787
|
+
// notifications, consumed in _classify; turn/completed resolves the turn.
|
|
788
|
+
await this._rpcRequest("turn/start", {
|
|
789
|
+
threadId: this._externalSessionId,
|
|
790
|
+
input: message,
|
|
791
|
+
});
|
|
792
|
+
|
|
793
|
+
return completed;
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
protected _classify(msg: Record<string, unknown>): SessionEvent[] {
|
|
797
|
+
const method = msg.method as string | undefined;
|
|
798
|
+
if (!method) return [];
|
|
799
|
+
const params = (msg.params ?? {}) as Record<string, unknown>;
|
|
800
|
+
|
|
801
|
+
if (method === "turn/completed") {
|
|
802
|
+
const usage = (params.usage ?? params.tokenUsage) as
|
|
803
|
+
| Record<string, number>
|
|
804
|
+
| undefined;
|
|
805
|
+
const tokens = usage
|
|
806
|
+
? {
|
|
807
|
+
input: (usage.inputTokens ?? usage.input_tokens ?? 0) as number,
|
|
808
|
+
output: (usage.outputTokens ?? usage.output_tokens ?? 0) as number,
|
|
809
|
+
}
|
|
810
|
+
: undefined;
|
|
811
|
+
this._turnDone?.resolve(tokens);
|
|
812
|
+
this._turnDone = undefined;
|
|
813
|
+
// Note: no `tokens` on this event — _sendTurn returns them to _dispatchTurn,
|
|
814
|
+
// which does the accounting once. Putting tokens here too would double-count.
|
|
815
|
+
return [{ kind: "result", timestamp: Date.now(), text: this._resultText, raw: msg }];
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
if (method === "item/completed" || method === "item/started") {
|
|
819
|
+
const item = (params.item ?? params) as Record<string, unknown>;
|
|
820
|
+
return classifyCodexEvent(item, "type");
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
// turn/started and other lifecycle notifications carry no turn content.
|
|
824
|
+
return [];
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
// ---------------------------------------------------------------------------
|
|
829
|
+
// Shared event mapping — codex item/event → SessionEvent
|
|
830
|
+
// ---------------------------------------------------------------------------
|
|
831
|
+
//
|
|
832
|
+
// Both protocols carry the same ThreadItem / event shapes (agent message,
|
|
833
|
+
// reasoning, command execution, token usage). `tag` is the discriminant field
|
|
834
|
+
// ("type" for both mcp `codex/event.msg.type` and app-server `item.type`).
|
|
835
|
+
//
|
|
836
|
+
// agent_message / agentMessage → text
|
|
837
|
+
// reasoning / agent_reasoning → thinking
|
|
838
|
+
// command_execution / commandExecution → tool_use (+ tool_result when done)
|
|
839
|
+
// token_count / usage → tokens (attached to a result event)
|
|
840
|
+
|
|
841
|
+
function classifyCodexEvent(
|
|
842
|
+
ev: Record<string, unknown>,
|
|
843
|
+
tag: string,
|
|
844
|
+
): SessionEvent[] {
|
|
845
|
+
const ts = Date.now();
|
|
846
|
+
const type = String(ev[tag] ?? "").toLowerCase();
|
|
847
|
+
const events: SessionEvent[] = [];
|
|
848
|
+
|
|
849
|
+
// Agent message → text.
|
|
850
|
+
if (type === "agent_message" || type === "agentmessage" || type === "agent_message_delta") {
|
|
851
|
+
const text = (ev.message ?? ev.text ?? ev.delta) as string | undefined;
|
|
852
|
+
if (text) events.push({ kind: "text", timestamp: ts, text, raw: ev });
|
|
853
|
+
return events;
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
// Reasoning → thinking.
|
|
857
|
+
if (
|
|
858
|
+
type === "reasoning" ||
|
|
859
|
+
type === "agent_reasoning" ||
|
|
860
|
+
type === "agentreasoning" ||
|
|
861
|
+
type === "agent_reasoning_delta"
|
|
862
|
+
) {
|
|
863
|
+
const text = (ev.text ?? ev.reasoning ?? ev.delta ?? ev.summary) as string | undefined;
|
|
864
|
+
if (text) events.push({ kind: "thinking", timestamp: ts, text, raw: ev });
|
|
865
|
+
return events;
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
// Command execution → tool_use, plus tool_result if output is present.
|
|
869
|
+
if (
|
|
870
|
+
type === "command_execution" ||
|
|
871
|
+
type === "commandexecution" ||
|
|
872
|
+
type === "exec_command_begin" ||
|
|
873
|
+
type === "exec_command_end"
|
|
874
|
+
) {
|
|
875
|
+
const command = (ev.command ?? ev.cmd) as string | string[] | undefined;
|
|
876
|
+
const cmdStr = Array.isArray(command) ? command.join(" ") : command;
|
|
877
|
+
events.push({
|
|
878
|
+
kind: "tool_use",
|
|
879
|
+
timestamp: ts,
|
|
880
|
+
toolName: "shell",
|
|
881
|
+
toolInput: cmdStr ? { command: cmdStr } : (ev as Record<string, unknown>),
|
|
882
|
+
raw: ev,
|
|
883
|
+
});
|
|
884
|
+
const output = (ev.output ?? ev.stdout ?? ev.aggregated_output) as
|
|
885
|
+
| string
|
|
886
|
+
| undefined;
|
|
887
|
+
const exitCode = (ev.exit_code ?? ev.exitCode) as number | undefined;
|
|
888
|
+
if (output !== undefined || exitCode !== undefined) {
|
|
889
|
+
events.push({
|
|
890
|
+
kind: "tool_result",
|
|
891
|
+
timestamp: ts,
|
|
892
|
+
toolOutput: output ?? "",
|
|
893
|
+
toolError: exitCode !== undefined && exitCode !== 0,
|
|
894
|
+
raw: ev,
|
|
895
|
+
});
|
|
896
|
+
}
|
|
897
|
+
return events;
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
// Token usage → carried on a result-less event so accounting picks it up.
|
|
901
|
+
if (
|
|
902
|
+
type === "token_count" ||
|
|
903
|
+
type === "token_usage" ||
|
|
904
|
+
type === "usage" ||
|
|
905
|
+
type === "tokenusage"
|
|
906
|
+
) {
|
|
907
|
+
const u = (ev.info ?? ev.usage ?? ev) as Record<string, unknown>;
|
|
908
|
+
const input =
|
|
909
|
+
(u.input_tokens ?? u.inputTokens ?? u.total_input_tokens ?? 0) as number;
|
|
910
|
+
const output =
|
|
911
|
+
(u.output_tokens ?? u.outputTokens ?? u.total_output_tokens ?? 0) as number;
|
|
912
|
+
if (input || output) {
|
|
913
|
+
events.push({
|
|
914
|
+
kind: "result",
|
|
915
|
+
timestamp: ts,
|
|
916
|
+
tokens: { input, output },
|
|
917
|
+
raw: ev,
|
|
918
|
+
});
|
|
919
|
+
}
|
|
920
|
+
return events;
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
// Errors.
|
|
924
|
+
if (type === "error" || type === "stream_error") {
|
|
925
|
+
events.push({
|
|
926
|
+
kind: "error",
|
|
927
|
+
timestamp: ts,
|
|
928
|
+
text: (ev.message ?? ev.error ?? JSON.stringify(ev)) as string,
|
|
929
|
+
raw: ev,
|
|
930
|
+
});
|
|
931
|
+
return events;
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
// Unclassified events are preserved via `raw` on nothing — but we keep them
|
|
935
|
+
// out of turn content. Oracle can still introspect via the live stream.
|
|
936
|
+
return events;
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
// ---------------------------------------------------------------------------
|
|
940
|
+
// Default export selection
|
|
941
|
+
// ---------------------------------------------------------------------------
|
|
942
|
+
|
|
943
|
+
/**
|
|
944
|
+
* The default CodexSession is the MCP variant (stdio mcp-server). Import
|
|
945
|
+
* CodexAppServerSession explicitly for the experimental app-server variant.
|
|
946
|
+
*/
|
|
947
|
+
export const CodexSession = CodexMcpSession;
|