@posthog/agent 2.3.312 → 2.3.316

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@posthog/agent",
3
- "version": "2.3.312",
3
+ "version": "2.3.316",
4
4
  "repository": "https://github.com/PostHog/code",
5
5
  "description": "TypeScript agent framework wrapping Claude Agent SDK with Git-based task execution for PostHog",
6
6
  "exports": {
@@ -0,0 +1,289 @@
1
+ import { Readable, Writable } from "node:stream";
2
+ import type { AgentSideConnection, McpServer } from "@agentclientprotocol/sdk";
3
+ import { beforeEach, describe, expect, it, vi } from "vitest";
4
+ import { POSTHOG_METHODS } from "../../acp-extensions";
5
+
6
+ type MockCodexConnection = {
7
+ initialize: ReturnType<typeof vi.fn>;
8
+ newSession: ReturnType<typeof vi.fn>;
9
+ loadSession: ReturnType<typeof vi.fn>;
10
+ setSessionMode: ReturnType<typeof vi.fn>;
11
+ listSessions: ReturnType<typeof vi.fn>;
12
+ prompt: ReturnType<typeof vi.fn>;
13
+ setSessionConfigOption: ReturnType<typeof vi.fn>;
14
+ cancel: ReturnType<typeof vi.fn>;
15
+ };
16
+
17
+ type SpawnHandle = {
18
+ process: { pid: number };
19
+ stdin: Writable;
20
+ stdout: Readable;
21
+ kill: ReturnType<typeof vi.fn>;
22
+ };
23
+
24
+ const hoisted = vi.hoisted(() => {
25
+ // Everything the mock factories depend on must live here — vi.mock()
26
+ // invocations are hoisted above any other top-level code.
27
+ const createdConnections: MockCodexConnection[] = [];
28
+ const spawnedProcesses: SpawnHandle[] = [];
29
+
30
+ const makeConnection = (): MockCodexConnection => ({
31
+ initialize: vi.fn().mockResolvedValue({
32
+ protocolVersion: 1,
33
+ agentCapabilities: {},
34
+ }),
35
+ newSession: vi.fn(),
36
+ loadSession: vi.fn().mockResolvedValue({
37
+ modes: { currentModeId: "auto", availableModes: [] },
38
+ configOptions: [],
39
+ }),
40
+ setSessionMode: vi.fn().mockResolvedValue({}),
41
+ listSessions: vi.fn(),
42
+ prompt: vi.fn(),
43
+ setSessionConfigOption: vi.fn(),
44
+ cancel: vi.fn().mockResolvedValue(undefined),
45
+ });
46
+
47
+ const clientSideConnectionCtor = vi.fn(() => {
48
+ const conn = makeConnection();
49
+ createdConnections.push(conn);
50
+ return conn;
51
+ });
52
+
53
+ const spawnCodexProcessMock = vi.fn(() => {
54
+ const handle: SpawnHandle = {
55
+ process: { pid: 1000 + spawnedProcesses.length },
56
+ stdin: new Writable({
57
+ write(_chunk, _encoding, callback) {
58
+ callback();
59
+ },
60
+ }),
61
+ stdout: new Readable({ read() {} }),
62
+ kill: vi.fn(),
63
+ };
64
+ spawnedProcesses.push(handle);
65
+ return handle;
66
+ });
67
+
68
+ return {
69
+ createdConnections,
70
+ spawnedProcesses,
71
+ clientSideConnectionCtor,
72
+ spawnCodexProcessMock,
73
+ };
74
+ });
75
+
76
+ const createdConnections = hoisted.createdConnections;
77
+ const spawnedProcesses = hoisted.spawnedProcesses;
78
+ const clientSideConnectionCtor = hoisted.clientSideConnectionCtor;
79
+
80
+ vi.mock("@agentclientprotocol/sdk", async () => {
81
+ const actual = await vi.importActual("@agentclientprotocol/sdk");
82
+ return {
83
+ ...actual,
84
+ ClientSideConnection: hoisted.clientSideConnectionCtor,
85
+ ndJsonStream: vi.fn(() => ({}) as object),
86
+ };
87
+ });
88
+
89
+ vi.mock("./spawn", () => ({
90
+ spawnCodexProcess: hoisted.spawnCodexProcessMock,
91
+ }));
92
+
93
+ vi.mock("./settings", () => ({
94
+ CodexSettingsManager: vi.fn().mockImplementation((cwd: string) => ({
95
+ initialize: vi.fn().mockResolvedValue(undefined),
96
+ dispose: vi.fn(),
97
+ getCwd: () => cwd,
98
+ setCwd: vi.fn(),
99
+ getSettings: () => ({ mcpServerNames: [] }),
100
+ })),
101
+ }));
102
+
103
+ import { CodexAcpAgent } from "./codex-agent";
104
+
105
+ type PrivateAgent = {
106
+ session: {
107
+ abortController: AbortController;
108
+ settingsManager: { dispose: ReturnType<typeof vi.fn> };
109
+ notificationHistory: unknown[];
110
+ promptRunning: boolean;
111
+ };
112
+ sessionId: string;
113
+ sessionState: {
114
+ sessionId: string;
115
+ cwd: string;
116
+ accumulatedUsage: {
117
+ inputTokens: number;
118
+ outputTokens: number;
119
+ cachedReadTokens: number;
120
+ cachedWriteTokens: number;
121
+ };
122
+ configOptions: unknown[];
123
+ taskRunId?: string;
124
+ };
125
+ codexProcess: SpawnHandle;
126
+ codexConnection: MockCodexConnection;
127
+ lastInitRequest?: { protocolVersion: number };
128
+ };
129
+
130
+ function makeAgent(): CodexAcpAgent {
131
+ const client = {
132
+ extNotification: vi.fn().mockResolvedValue(undefined),
133
+ } as unknown as AgentSideConnection;
134
+ return new CodexAcpAgent(client, {
135
+ codexProcessOptions: { cwd: "/tmp/repo" },
136
+ });
137
+ }
138
+
139
+ function primeSession(
140
+ agent: CodexAcpAgent,
141
+ sessionId: string,
142
+ ): {
143
+ oldProcess: SpawnHandle;
144
+ oldConnection: MockCodexConnection;
145
+ priv: PrivateAgent;
146
+ } {
147
+ const priv = agent as unknown as PrivateAgent;
148
+ priv.sessionId = sessionId;
149
+ priv.sessionState = {
150
+ sessionId,
151
+ cwd: "/tmp/repo",
152
+ accumulatedUsage: {
153
+ inputTokens: 42,
154
+ outputTokens: 17,
155
+ cachedReadTokens: 0,
156
+ cachedWriteTokens: 0,
157
+ },
158
+ configOptions: [{ id: "opt", value: "x" }],
159
+ taskRunId: "run-1",
160
+ };
161
+ priv.session.notificationHistory = [{ foo: "bar" }];
162
+ priv.lastInitRequest = { protocolVersion: 1 };
163
+ return {
164
+ oldProcess: priv.codexProcess,
165
+ oldConnection: priv.codexConnection,
166
+ priv,
167
+ };
168
+ }
169
+
170
+ describe("CodexAcpAgent.extMethod refresh_session", () => {
171
+ beforeEach(() => {
172
+ spawnedProcesses.length = 0;
173
+ createdConnections.length = 0;
174
+ clientSideConnectionCtor.mockClear();
175
+ });
176
+
177
+ it("returns methodNotFound for unknown extension methods", async () => {
178
+ const agent = makeAgent();
179
+ await expect(agent.extMethod("_posthog/nope", {})).rejects.toThrow(
180
+ /Method not found/i,
181
+ );
182
+ });
183
+
184
+ it("rejects when mcpServers is missing", async () => {
185
+ const agent = makeAgent();
186
+ await expect(
187
+ agent.extMethod(POSTHOG_METHODS.REFRESH_SESSION, {}),
188
+ ).rejects.toThrow(/at least one refreshable field/);
189
+ });
190
+
191
+ it("rejects when mcpServers is not an array", async () => {
192
+ const agent = makeAgent();
193
+ await expect(
194
+ agent.extMethod(POSTHOG_METHODS.REFRESH_SESSION, {
195
+ mcpServers: "nope" as unknown,
196
+ }),
197
+ ).rejects.toThrow(/mcpServers must be an array/);
198
+ });
199
+
200
+ it("rejects refresh while a prompt is in flight", async () => {
201
+ const agent = makeAgent();
202
+ const { priv } = primeSession(agent, "s-1");
203
+ priv.session.promptRunning = true;
204
+
205
+ await expect(
206
+ agent.extMethod(POSTHOG_METHODS.REFRESH_SESSION, {
207
+ mcpServers: [
208
+ { name: "posthog", type: "http", url: "https://new", headers: [] },
209
+ ],
210
+ }),
211
+ ).rejects.toThrow(/prompt turn is in flight/);
212
+ });
213
+
214
+ it("respawns the subprocess, re-initializes, and rehydrates with new MCP servers", async () => {
215
+ const agent = makeAgent();
216
+ const { oldProcess, oldConnection, priv } = primeSession(agent, "s-2");
217
+ const oldAbortController = priv.session.abortController;
218
+ const oldSettingsManager = priv.session.settingsManager;
219
+
220
+ const mcpServers: McpServer[] = [
221
+ {
222
+ name: "posthog",
223
+ type: "http",
224
+ url: "https://fresh",
225
+ headers: [{ name: "x-foo", value: "bar" }],
226
+ },
227
+ ];
228
+
229
+ const result = await agent.extMethod(POSTHOG_METHODS.REFRESH_SESSION, {
230
+ mcpServers,
231
+ });
232
+
233
+ expect(result).toEqual({ refreshed: true });
234
+
235
+ // Old subprocess torn down, old connection cancelled.
236
+ expect(oldConnection.cancel).toHaveBeenCalledWith({ sessionId: "s-2" });
237
+ expect(oldProcess.kill).toHaveBeenCalledTimes(1);
238
+ expect(oldAbortController.signal.aborted).toBe(true);
239
+ expect(oldSettingsManager.dispose).toHaveBeenCalledTimes(1);
240
+
241
+ // A fresh subprocess was spawned and a new ClientSideConnection wired up.
242
+ expect(spawnedProcesses).toHaveLength(2);
243
+ expect(createdConnections).toHaveLength(2);
244
+ const newConnection = createdConnections[1];
245
+ if (!newConnection) throw new Error("expected a second connection");
246
+
247
+ // ACP handshake replayed against the new subprocess.
248
+ expect(newConnection.initialize).toHaveBeenCalledWith({
249
+ protocolVersion: 1,
250
+ });
251
+ expect(newConnection.loadSession).toHaveBeenCalledWith({
252
+ sessionId: "s-2",
253
+ cwd: "/tmp/repo",
254
+ mcpServers,
255
+ });
256
+
257
+ // References swapped to the new instances.
258
+ expect(priv.codexProcess).toBe(spawnedProcesses[1]);
259
+ expect(priv.codexConnection).toBe(newConnection);
260
+ expect(priv.session.abortController).not.toBe(oldAbortController);
261
+ expect(priv.session.settingsManager).not.toBe(oldSettingsManager);
262
+
263
+ // Session-level state preserved across refresh.
264
+ expect(priv.sessionState.accumulatedUsage.inputTokens).toBe(42);
265
+ expect(priv.sessionState.accumulatedUsage.outputTokens).toBe(17);
266
+ expect(priv.sessionState.configOptions).toEqual([
267
+ { id: "opt", value: "x" },
268
+ ]);
269
+ expect(priv.sessionState.taskRunId).toBe("run-1");
270
+ expect(priv.session.notificationHistory).toEqual([{ foo: "bar" }]);
271
+ });
272
+
273
+ it("does not fail refresh when cancel() throws on the stale connection", async () => {
274
+ const agent = makeAgent();
275
+ const { oldConnection } = primeSession(agent, "s-3");
276
+ oldConnection.cancel.mockRejectedValueOnce(new Error("already dead"));
277
+
278
+ await expect(
279
+ agent.extMethod(POSTHOG_METHODS.REFRESH_SESSION, {
280
+ mcpServers: [
281
+ { name: "posthog", type: "http", url: "https://x", headers: [] },
282
+ ],
283
+ }),
284
+ ).resolves.toEqual({ refreshed: true });
285
+
286
+ expect(spawnedProcesses).toHaveLength(2);
287
+ expect(createdConnections[1]?.loadSession).toHaveBeenCalled();
288
+ });
289
+ });
@@ -21,11 +21,13 @@ import {
21
21
  type ListSessionsResponse,
22
22
  type LoadSessionRequest,
23
23
  type LoadSessionResponse,
24
+ type McpServer,
24
25
  type NewSessionRequest,
25
26
  type NewSessionResponse,
26
27
  ndJsonStream,
27
28
  type PromptRequest,
28
29
  type PromptResponse,
30
+ RequestError,
29
31
  type ResumeSessionRequest,
30
32
  type ResumeSessionResponse,
31
33
  type SetSessionConfigOptionRequest,
@@ -34,7 +36,11 @@ import {
34
36
  type SetSessionModeResponse,
35
37
  } from "@agentclientprotocol/sdk";
36
38
  import packageJson from "../../../package.json" with { type: "json" };
37
- import { POSTHOG_NOTIFICATIONS } from "../../acp-extensions";
39
+ import {
40
+ isMethod,
41
+ POSTHOG_METHODS,
42
+ POSTHOG_NOTIFICATIONS,
43
+ } from "../../acp-extensions";
38
44
  import {
39
45
  type CodeExecutionMode,
40
46
  type CodexNativeMode,
@@ -84,6 +90,7 @@ export interface CodexAcpAgentOptions {
84
90
 
85
91
  type CodexSession = BaseSession & {
86
92
  settingsManager: CodexSettingsManager;
93
+ promptRunning: boolean;
87
94
  };
88
95
 
89
96
  function toCodexPermissionMode(mode?: string): PermissionMode {
@@ -156,6 +163,11 @@ export class CodexAcpAgent extends BaseAcpAgent {
156
163
  * single-owner.
157
164
  */
158
165
  private promptMutex: Promise<unknown> = Promise.resolve();
166
+ private readonly codexProcessOptions: CodexProcessOptions;
167
+ private readonly processCallbacks?: ProcessSpawnedCallback;
168
+ // Snapshot of the initialize() request so refreshSession can replay the
169
+ // same handshake against a respawned codex-acp subprocess.
170
+ private lastInitRequest?: InitializeRequest;
159
171
 
160
172
  constructor(client: AgentSideConnection, options: CodexAcpAgentOptions) {
161
173
  super(client);
@@ -166,6 +178,9 @@ export class CodexAcpAgent extends BaseAcpAgent {
166
178
  const cwd = options.codexProcessOptions.cwd ?? process.cwd();
167
179
  const settingsManager = new CodexSettingsManager(cwd);
168
180
 
181
+ this.codexProcessOptions = options.codexProcessOptions;
182
+ this.processCallbacks = options.processCallbacks;
183
+
169
184
  // Spawn the codex-acp subprocess
170
185
  this.codexProcess = spawnCodexProcess({
171
186
  ...options.codexProcessOptions,
@@ -185,6 +200,7 @@ export class CodexAcpAgent extends BaseAcpAgent {
185
200
  settingsManager,
186
201
  notificationHistory: [],
187
202
  cancelled: false,
203
+ promptRunning: false,
188
204
  };
189
205
 
190
206
  this.sessionState = createSessionState("", cwd);
@@ -203,6 +219,9 @@ export class CodexAcpAgent extends BaseAcpAgent {
203
219
  // Initialize settings
204
220
  await this.session.settingsManager.initialize();
205
221
 
222
+ // Snapshot the handshake so refreshSession can replay it after respawn.
223
+ this.lastInitRequest = request;
224
+
206
225
  // Forward to codex-acp
207
226
  const response = await this.codexConnection.initialize(request);
208
227
 
@@ -427,9 +446,13 @@ export class CodexAcpAgent extends BaseAcpAgent {
427
446
  // injected PR context is not rendered as a user message.
428
447
  await this.broadcastUserMessage(params);
429
448
 
430
- const response = await this.codexConnection.prompt(
431
- prependPrContext(params),
432
- );
449
+ this.session.promptRunning = true;
450
+ let response: PromptResponse;
451
+ try {
452
+ response = await this.codexConnection.prompt(prependPrContext(params));
453
+ } finally {
454
+ this.session.promptRunning = false;
455
+ }
433
456
 
434
457
  // Usage is already accumulated via sessionUpdate notifications in
435
458
  // codex-client.ts. Do NOT also add response.usage here or tokens
@@ -491,6 +514,125 @@ export class CodexAcpAgent extends BaseAcpAgent {
491
514
  }
492
515
  }
493
516
 
517
+ /**
518
+ * Refresh the session between turns. Currently the only refreshable field
519
+ * is `mcpServers`. Unlike Claude (where we rebuild an in-process Query with
520
+ * `resume`), Codex runs as a `codex-acp` subprocess whose MCP set is bound
521
+ * at `newSession`/`loadSession` time and whose user-local MCPs are disabled
522
+ * via spawn-time `-c mcp_servers.<name>.enabled=false` CLI args. To
523
+ * guarantee the caller-supplied set fully wins, we respawn the subprocess
524
+ * and rehydrate the session via `loadSession` — codex-acp persists sessions
525
+ * to disk, so conversation history is preserved.
526
+ *
527
+ * This is an `extMethod` (request/response), not `extNotification`, so the
528
+ * caller can await completion before sending the next prompt.
529
+ *
530
+ * Caller contract: only call REFRESH_SESSION between turns (no prompt in flight).
531
+ */
532
+ async extMethod(
533
+ method: string,
534
+ params: Record<string, unknown>,
535
+ ): Promise<Record<string, unknown>> {
536
+ if (!isMethod(method, POSTHOG_METHODS.REFRESH_SESSION)) {
537
+ throw RequestError.methodNotFound(method);
538
+ }
539
+
540
+ // Trust boundary: refresh is only safe when the caller is trusted infra
541
+ // (e.g. the sandbox agent-server). Do not route this method from
542
+ // untrusted clients — mcpServers contents are forwarded verbatim to
543
+ // codex-acp with no URL/command validation.
544
+ if (params.mcpServers === undefined) {
545
+ throw new RequestError(
546
+ -32602,
547
+ "refresh_session requires at least one refreshable field (e.g. mcpServers)",
548
+ );
549
+ }
550
+ if (!Array.isArray(params.mcpServers)) {
551
+ throw new RequestError(
552
+ -32602,
553
+ "refresh_session: mcpServers must be an array",
554
+ );
555
+ }
556
+
557
+ await this.refreshSession(params.mcpServers as McpServer[]);
558
+ return { refreshed: true };
559
+ }
560
+
561
+ private async refreshSession(mcpServers: McpServer[]): Promise<void> {
562
+ const prev = this.session;
563
+ if (prev.promptRunning) {
564
+ throw new RequestError(
565
+ -32002,
566
+ "Cannot refresh session while a prompt turn is in flight",
567
+ );
568
+ }
569
+
570
+ this.logger.info("Refreshing Codex session with fresh MCP servers", {
571
+ serverCount: mcpServers.length,
572
+ sessionId: this.sessionId,
573
+ });
574
+
575
+ // Abort FIRST so any stuck in-flight ACP request unblocks — otherwise
576
+ // cancel() can deadlock waiting on a codex-acp call that never returns.
577
+ prev.abortController.abort();
578
+ try {
579
+ await this.codexConnection.cancel({ sessionId: this.sessionId });
580
+ } catch (err) {
581
+ this.logger.warn("cancel() during refresh failed (non-fatal)", {
582
+ error: err,
583
+ });
584
+ }
585
+ this.codexProcess.kill();
586
+
587
+ // Respawn with the same options and a fresh settings manager rooted at
588
+ // the current cwd (so the `mcp_servers.<name>.enabled=false` args are
589
+ // regenerated from the latest ~/.codex/config.toml).
590
+ const cwd = prev.settingsManager.getCwd();
591
+ const newSettingsManager = new CodexSettingsManager(cwd);
592
+ await newSettingsManager.initialize();
593
+
594
+ const newProcess = spawnCodexProcess({
595
+ ...this.codexProcessOptions,
596
+ cwd,
597
+ settings: newSettingsManager.getSettings(),
598
+ logger: this.logger,
599
+ processCallbacks: this.processCallbacks,
600
+ });
601
+
602
+ const codexReadable = nodeReadableToWebReadable(newProcess.stdout);
603
+ const codexWritable = nodeWritableToWebWritable(newProcess.stdin);
604
+ const codexStream = ndJsonStream(codexWritable, codexReadable);
605
+
606
+ const newAbortController = new AbortController();
607
+ const newConnection = new ClientSideConnection(
608
+ (_agent) =>
609
+ createCodexClient(this.client, this.logger, this.sessionState),
610
+ codexStream,
611
+ );
612
+
613
+ // Re-run ACP init on the new subprocess, then rehydrate the session with
614
+ // the new MCP set. loadSession is codex-acp's equivalent of Claude's
615
+ // `resume` — conversation history is restored from disk.
616
+ const initRequest: InitializeRequest = this.lastInitRequest ?? {
617
+ protocolVersion: 1,
618
+ };
619
+ await newConnection.initialize(initRequest);
620
+ await newConnection.loadSession({
621
+ sessionId: this.sessionId,
622
+ cwd: this.sessionState.cwd,
623
+ mcpServers,
624
+ });
625
+
626
+ // Swap everything at once so closeSession/prompt/cancel target the new
627
+ // subprocess going forward. Preserve sessionState (accumulatedUsage,
628
+ // taskRunId, configOptions) untouched.
629
+ this.codexProcess = newProcess;
630
+ this.codexConnection = newConnection;
631
+ prev.settingsManager.dispose();
632
+ prev.settingsManager = newSettingsManager;
633
+ prev.abortController = newAbortController;
634
+ }
635
+
494
636
  async setSessionMode(
495
637
  params: SetSessionModeRequest,
496
638
  ): Promise<SetSessionModeResponse> {