@posthog/agent 2.3.520 → 2.3.524
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/codex/structured-output-mcp-server.d.ts +2 -0
- package/dist/adapters/codex/structured-output-mcp-server.js +54 -0
- package/dist/adapters/codex/structured-output-mcp-server.js.map +1 -0
- package/dist/agent.js +126 -20
- package/dist/agent.js.map +1 -1
- package/dist/posthog-api.js +2 -2
- package/dist/posthog-api.js.map +1 -1
- package/dist/server/agent-server.js +126 -20
- package/dist/server/agent-server.js.map +1 -1
- package/dist/server/bin.cjs +149 -42
- package/dist/server/bin.cjs.map +1 -1
- package/package.json +4 -4
- package/src/adapters/acp-connection.ts +1 -0
- package/src/adapters/codex/codex-agent.test.ts +134 -1
- package/src/adapters/codex/codex-agent.ts +122 -17
- package/src/adapters/codex/codex-client.test.ts +178 -0
- package/src/adapters/codex/codex-client.ts +68 -0
- package/src/adapters/codex/structured-output-constants.ts +9 -0
- package/src/adapters/codex/structured-output-mcp-server.ts +72 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@posthog/agent",
|
|
3
|
-
"version": "2.3.
|
|
3
|
+
"version": "2.3.524",
|
|
4
4
|
"repository": "https://github.com/PostHog/code",
|
|
5
5
|
"description": "TypeScript agent framework wrapping Claude Agent SDK with Git-based task execution for PostHog",
|
|
6
6
|
"exports": {
|
|
@@ -103,12 +103,11 @@
|
|
|
103
103
|
"typescript": "^5.5.0",
|
|
104
104
|
"vitest": "^2.1.8",
|
|
105
105
|
"@posthog/git": "1.0.0",
|
|
106
|
-
"@posthog/
|
|
107
|
-
"@posthog/
|
|
106
|
+
"@posthog/shared": "1.0.0",
|
|
107
|
+
"@posthog/enricher": "1.0.0"
|
|
108
108
|
},
|
|
109
109
|
"dependencies": {
|
|
110
110
|
"@agentclientprotocol/sdk": "0.19.0",
|
|
111
|
-
"ajv": "^8.17.1",
|
|
112
111
|
"@anthropic-ai/claude-agent-sdk": "0.2.112",
|
|
113
112
|
"@anthropic-ai/sdk": "0.89.0",
|
|
114
113
|
"@hono/node-server": "^1.19.9",
|
|
@@ -122,6 +121,7 @@
|
|
|
122
121
|
"hono": "^4.11.7",
|
|
123
122
|
"jsonwebtoken": "^9.0.2",
|
|
124
123
|
"minimatch": "^10.0.3",
|
|
124
|
+
"@modelcontextprotocol/sdk": "1.29.0",
|
|
125
125
|
"tar": "^7.5.0",
|
|
126
126
|
"uuid": "13.0.0",
|
|
127
127
|
"yoga-wasm-web": "^0.3.3",
|
|
@@ -205,6 +205,7 @@ function createCodexConnection(config: AcpConnectionConfig): AcpConnection {
|
|
|
205
205
|
codexProcessOptions: config.codexOptions ?? {},
|
|
206
206
|
processCallbacks: config.processCallbacks,
|
|
207
207
|
posthogApiConfig: resolveEnricherApiConfig(config),
|
|
208
|
+
onStructuredOutput: config.onStructuredOutput,
|
|
208
209
|
});
|
|
209
210
|
return agent;
|
|
210
211
|
}, agentStream);
|
|
@@ -53,6 +53,11 @@ vi.mock("./settings", () => ({
|
|
|
53
53
|
})),
|
|
54
54
|
}));
|
|
55
55
|
|
|
56
|
+
vi.mock("node:fs", async (importActual) => {
|
|
57
|
+
const actual = await importActual<typeof import("node:fs")>();
|
|
58
|
+
return { ...actual, existsSync: vi.fn(actual.existsSync) };
|
|
59
|
+
});
|
|
60
|
+
|
|
56
61
|
import { CodexAcpAgent } from "./codex-agent";
|
|
57
62
|
|
|
58
63
|
describe("CodexAcpAgent", () => {
|
|
@@ -60,7 +65,12 @@ describe("CodexAcpAgent", () => {
|
|
|
60
65
|
vi.clearAllMocks();
|
|
61
66
|
});
|
|
62
67
|
|
|
63
|
-
function createAgent(
|
|
68
|
+
function createAgent(
|
|
69
|
+
overrides: Partial<AgentSideConnection> = {},
|
|
70
|
+
agentOptions?: {
|
|
71
|
+
onStructuredOutput?: (output: Record<string, unknown>) => Promise<void>;
|
|
72
|
+
},
|
|
73
|
+
): {
|
|
64
74
|
agent: CodexAcpAgent;
|
|
65
75
|
client: AgentSideConnection & {
|
|
66
76
|
extNotification: ReturnType<typeof vi.fn>;
|
|
@@ -80,6 +90,7 @@ describe("CodexAcpAgent", () => {
|
|
|
80
90
|
codexProcessOptions: {
|
|
81
91
|
cwd: process.cwd(),
|
|
82
92
|
},
|
|
93
|
+
onStructuredOutput: agentOptions?.onStructuredOutput,
|
|
83
94
|
});
|
|
84
95
|
return { agent, client };
|
|
85
96
|
}
|
|
@@ -295,6 +306,128 @@ describe("CodexAcpAgent", () => {
|
|
|
295
306
|
).resolves.toEqual({ stopReason: "end_turn" });
|
|
296
307
|
});
|
|
297
308
|
|
|
309
|
+
describe("structured output injection", () => {
|
|
310
|
+
const schema = {
|
|
311
|
+
type: "object",
|
|
312
|
+
properties: { answer: { type: "string" } },
|
|
313
|
+
required: ["answer"],
|
|
314
|
+
} as const;
|
|
315
|
+
|
|
316
|
+
beforeEach(async () => {
|
|
317
|
+
// The resolver checks existsSync to find the compiled MCP script.
|
|
318
|
+
// In unit tests the dist asset isn't on the walk-up path, so we
|
|
319
|
+
// make the first candidate succeed. Nothing in this test actually
|
|
320
|
+
// spawns the script — the agent only forwards the path to codex-acp.
|
|
321
|
+
const fs = await import("node:fs");
|
|
322
|
+
vi.mocked(fs.existsSync).mockReturnValue(true);
|
|
323
|
+
});
|
|
324
|
+
|
|
325
|
+
it("injects the create_output MCP server and system-prompt note when jsonSchema and callback are present", async () => {
|
|
326
|
+
const { agent } = createAgent({}, { onStructuredOutput: vi.fn() });
|
|
327
|
+
mockCodexConnection.newSession.mockResolvedValue({
|
|
328
|
+
sessionId: "session-1",
|
|
329
|
+
modes: { currentModeId: "auto", availableModes: [] },
|
|
330
|
+
configOptions: [],
|
|
331
|
+
} satisfies Partial<NewSessionResponse>);
|
|
332
|
+
|
|
333
|
+
await agent.newSession({
|
|
334
|
+
cwd: process.cwd(),
|
|
335
|
+
mcpServers: [{ name: "existing", command: "echo", args: [], env: [] }],
|
|
336
|
+
_meta: { jsonSchema: schema, systemPrompt: "be terse." },
|
|
337
|
+
} as never);
|
|
338
|
+
|
|
339
|
+
const forwarded = mockCodexConnection.newSession.mock.calls[0][0] as {
|
|
340
|
+
mcpServers: Array<{ name: string; command: string; env: unknown }>;
|
|
341
|
+
_meta: { systemPrompt: string };
|
|
342
|
+
};
|
|
343
|
+
|
|
344
|
+
// Existing MCP server is preserved; ours is appended.
|
|
345
|
+
expect(forwarded.mcpServers).toHaveLength(2);
|
|
346
|
+
expect(forwarded.mcpServers[0].name).toBe("existing");
|
|
347
|
+
expect(forwarded.mcpServers[1].name).toBe("posthog_output");
|
|
348
|
+
expect(forwarded.mcpServers[1].command).toBe(process.execPath);
|
|
349
|
+
|
|
350
|
+
// The schema is forwarded base64-encoded so codex-acp doesn't have
|
|
351
|
+
// to escape it through a shell.
|
|
352
|
+
const envEntry = (
|
|
353
|
+
forwarded.mcpServers[1].env as Array<{ name: string; value: string }>
|
|
354
|
+
).find((e) => e.name === "POSTHOG_OUTPUT_SCHEMA");
|
|
355
|
+
expect(envEntry).toBeDefined();
|
|
356
|
+
const decoded = JSON.parse(
|
|
357
|
+
Buffer.from(envEntry?.value ?? "", "base64").toString("utf-8"),
|
|
358
|
+
);
|
|
359
|
+
expect(decoded).toEqual(schema);
|
|
360
|
+
|
|
361
|
+
// Existing systemPrompt is preserved with the structured-output
|
|
362
|
+
// instruction appended (not overwritten).
|
|
363
|
+
expect(forwarded._meta.systemPrompt.startsWith("be terse.")).toBe(true);
|
|
364
|
+
expect(forwarded._meta.systemPrompt).toContain("create_output");
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
it("is a no-op when jsonSchema is absent", async () => {
|
|
368
|
+
const { agent } = createAgent({}, { onStructuredOutput: vi.fn() });
|
|
369
|
+
mockCodexConnection.newSession.mockResolvedValue({
|
|
370
|
+
sessionId: "session-1",
|
|
371
|
+
modes: { currentModeId: "auto", availableModes: [] },
|
|
372
|
+
configOptions: [],
|
|
373
|
+
} satisfies Partial<NewSessionResponse>);
|
|
374
|
+
|
|
375
|
+
await agent.newSession({
|
|
376
|
+
cwd: process.cwd(),
|
|
377
|
+
mcpServers: [],
|
|
378
|
+
} as never);
|
|
379
|
+
|
|
380
|
+
const forwarded = mockCodexConnection.newSession.mock.calls[0][0] as {
|
|
381
|
+
mcpServers: unknown[];
|
|
382
|
+
_meta?: { systemPrompt?: string };
|
|
383
|
+
};
|
|
384
|
+
expect(forwarded.mcpServers).toEqual([]);
|
|
385
|
+
expect(forwarded._meta?.systemPrompt).toBeUndefined();
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
it("is a no-op when onStructuredOutput callback is not wired", async () => {
|
|
389
|
+
const { agent } = createAgent();
|
|
390
|
+
mockCodexConnection.newSession.mockResolvedValue({
|
|
391
|
+
sessionId: "session-1",
|
|
392
|
+
modes: { currentModeId: "auto", availableModes: [] },
|
|
393
|
+
configOptions: [],
|
|
394
|
+
} satisfies Partial<NewSessionResponse>);
|
|
395
|
+
|
|
396
|
+
await agent.newSession({
|
|
397
|
+
cwd: process.cwd(),
|
|
398
|
+
mcpServers: [],
|
|
399
|
+
_meta: { jsonSchema: schema },
|
|
400
|
+
} as never);
|
|
401
|
+
|
|
402
|
+
const forwarded = mockCodexConnection.newSession.mock.calls[0][0] as {
|
|
403
|
+
mcpServers: unknown[];
|
|
404
|
+
};
|
|
405
|
+
expect(forwarded.mcpServers).toEqual([]);
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
it("also injects on loadSession", async () => {
|
|
409
|
+
const { agent } = createAgent({}, { onStructuredOutput: vi.fn() });
|
|
410
|
+
mockCodexConnection.loadSession.mockResolvedValue({
|
|
411
|
+
modes: { currentModeId: "auto", availableModes: [] },
|
|
412
|
+
configOptions: [],
|
|
413
|
+
} satisfies Partial<LoadSessionResponse>);
|
|
414
|
+
|
|
415
|
+
await agent.loadSession({
|
|
416
|
+
sessionId: "session-1",
|
|
417
|
+
cwd: process.cwd(),
|
|
418
|
+
mcpServers: [],
|
|
419
|
+
_meta: { jsonSchema: schema },
|
|
420
|
+
} as never);
|
|
421
|
+
|
|
422
|
+
const forwarded = mockCodexConnection.loadSession.mock.calls[0][0] as {
|
|
423
|
+
mcpServers: Array<{ name: string }>;
|
|
424
|
+
};
|
|
425
|
+
expect(forwarded.mcpServers.map((s) => s.name)).toContain(
|
|
426
|
+
"posthog_output",
|
|
427
|
+
);
|
|
428
|
+
});
|
|
429
|
+
});
|
|
430
|
+
|
|
298
431
|
it("broadcasts user prompt as user_message_chunk before delegating to codex-acp", async () => {
|
|
299
432
|
const { agent, client } = createAgent();
|
|
300
433
|
// Seed an active session so prompt() has the state it expects.
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
* - System prompt injection
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
+
import { existsSync } from "node:fs";
|
|
13
|
+
import { resolve as resolvePath } from "node:path";
|
|
12
14
|
import {
|
|
13
15
|
type AgentSideConnection,
|
|
14
16
|
type AuthenticateRequest,
|
|
@@ -22,6 +24,7 @@ import {
|
|
|
22
24
|
type LoadSessionRequest,
|
|
23
25
|
type LoadSessionResponse,
|
|
24
26
|
type McpServer,
|
|
27
|
+
type McpServerStdio,
|
|
25
28
|
type NewSessionRequest,
|
|
26
29
|
type NewSessionResponse,
|
|
27
30
|
ndJsonStream,
|
|
@@ -72,6 +75,15 @@ import {
|
|
|
72
75
|
type CodexProcessOptions,
|
|
73
76
|
spawnCodexProcess,
|
|
74
77
|
} from "./spawn";
|
|
78
|
+
import {
|
|
79
|
+
STRUCTURED_OUTPUT_MCP_NAME,
|
|
80
|
+
STRUCTURED_OUTPUT_TOOL_NAME,
|
|
81
|
+
} from "./structured-output-constants";
|
|
82
|
+
|
|
83
|
+
export {
|
|
84
|
+
STRUCTURED_OUTPUT_MCP_NAME,
|
|
85
|
+
STRUCTURED_OUTPUT_TOOL_NAME,
|
|
86
|
+
} from "./structured-output-constants";
|
|
75
87
|
|
|
76
88
|
interface NewSessionMeta {
|
|
77
89
|
taskRunId?: string;
|
|
@@ -86,12 +98,14 @@ interface NewSessionMeta {
|
|
|
86
98
|
additionalRoots?: string[];
|
|
87
99
|
disableBuiltInTools?: boolean;
|
|
88
100
|
allowedDomains?: string[];
|
|
101
|
+
jsonSchema?: Record<string, unknown> | null;
|
|
89
102
|
}
|
|
90
103
|
|
|
91
104
|
export interface CodexAcpAgentOptions {
|
|
92
105
|
codexProcessOptions: CodexProcessOptions;
|
|
93
106
|
processCallbacks?: ProcessSpawnedCallback;
|
|
94
107
|
posthogApiConfig?: PostHogAPIConfig;
|
|
108
|
+
onStructuredOutput?: (output: Record<string, unknown>) => Promise<void>;
|
|
95
109
|
}
|
|
96
110
|
|
|
97
111
|
type CodexSession = BaseSession & {
|
|
@@ -153,6 +167,46 @@ function getCurrentPermissionMode(
|
|
|
153
167
|
return toCodexPermissionMode(fallbackMode);
|
|
154
168
|
}
|
|
155
169
|
|
|
170
|
+
const STRUCTURED_OUTPUT_INSTRUCTIONS = `\n\nWhen you have completed the task, call the \`${STRUCTURED_OUTPUT_TOOL_NAME}\` tool with the final structured result. The tool's input schema matches the required output format for this task. Do not describe the result in a plain message — submitting it via the tool is required for the task to be considered complete.`;
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Builds the stdio MCP server config that exposes the `create_output` tool.
|
|
174
|
+
* The child process validates tool input against the JSON schema with AJV.
|
|
175
|
+
* We pass the schema as a base64-encoded env var to avoid shell escaping.
|
|
176
|
+
*
|
|
177
|
+
* Path resolves relative to the compiled adapter location. When bundled into
|
|
178
|
+
* different entry points (dist/agent.js, dist/server/bin.cjs, dist/server/
|
|
179
|
+
* harness/bin.js, etc), `import.meta.dirname` sits at different depths. Walk
|
|
180
|
+
* up until we find the script so each bundle locates the shared dist asset.
|
|
181
|
+
*/
|
|
182
|
+
function resolveStructuredOutputMcpScript(): string {
|
|
183
|
+
const rel = "adapters/codex/structured-output-mcp-server.js";
|
|
184
|
+
let dir = import.meta.dirname ?? __dirname;
|
|
185
|
+
for (let i = 0; i < 5; i++) {
|
|
186
|
+
const candidate = resolvePath(dir, rel);
|
|
187
|
+
if (existsSync(candidate)) return candidate;
|
|
188
|
+
dir = resolvePath(dir, "..");
|
|
189
|
+
}
|
|
190
|
+
throw new Error(
|
|
191
|
+
`Could not locate ${rel} relative to ${import.meta.dirname ?? __dirname}.`,
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function buildStructuredOutputMcpServer(
|
|
196
|
+
jsonSchema: Record<string, unknown>,
|
|
197
|
+
): McpServerStdio {
|
|
198
|
+
const scriptPath = resolveStructuredOutputMcpScript();
|
|
199
|
+
const schemaBase64 = Buffer.from(JSON.stringify(jsonSchema)).toString(
|
|
200
|
+
"base64",
|
|
201
|
+
);
|
|
202
|
+
return {
|
|
203
|
+
name: STRUCTURED_OUTPUT_MCP_NAME,
|
|
204
|
+
command: process.execPath,
|
|
205
|
+
args: [scriptPath],
|
|
206
|
+
env: [{ name: "POSTHOG_OUTPUT_SCHEMA", value: schemaBase64 }],
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
156
210
|
export class CodexAcpAgent extends BaseAcpAgent {
|
|
157
211
|
readonly adapterName = "codex";
|
|
158
212
|
declare session: CodexSession;
|
|
@@ -172,6 +226,9 @@ export class CodexAcpAgent extends BaseAcpAgent {
|
|
|
172
226
|
private promptMutex: Promise<unknown> = Promise.resolve();
|
|
173
227
|
private readonly codexProcessOptions: CodexProcessOptions;
|
|
174
228
|
private readonly processCallbacks?: ProcessSpawnedCallback;
|
|
229
|
+
private readonly onStructuredOutput?: (
|
|
230
|
+
output: Record<string, unknown>,
|
|
231
|
+
) => Promise<void>;
|
|
175
232
|
// Snapshot of the initialize() request so refreshSession can replay the
|
|
176
233
|
// same handshake against a respawned codex-acp subprocess.
|
|
177
234
|
private lastInitRequest?: InitializeRequest;
|
|
@@ -188,6 +245,7 @@ export class CodexAcpAgent extends BaseAcpAgent {
|
|
|
188
245
|
|
|
189
246
|
this.codexProcessOptions = options.codexProcessOptions;
|
|
190
247
|
this.processCallbacks = options.processCallbacks;
|
|
248
|
+
this.onStructuredOutput = options.onStructuredOutput;
|
|
191
249
|
|
|
192
250
|
// Spawn the codex-acp subprocess
|
|
193
251
|
this.codexProcess = spawnCodexProcess({
|
|
@@ -222,6 +280,7 @@ export class CodexAcpAgent extends BaseAcpAgent {
|
|
|
222
280
|
(_agent) =>
|
|
223
281
|
createCodexClient(this.client, this.logger, this.sessionState, {
|
|
224
282
|
enrichmentDeps: this.enrichment?.deps,
|
|
283
|
+
onStructuredOutput: this.onStructuredOutput,
|
|
225
284
|
}),
|
|
226
285
|
codexStream,
|
|
227
286
|
);
|
|
@@ -265,7 +324,8 @@ export class CodexAcpAgent extends BaseAcpAgent {
|
|
|
265
324
|
const meta = params._meta as NewSessionMeta | undefined;
|
|
266
325
|
const requestedPermissionMode = toCodexPermissionMode(meta?.permissionMode);
|
|
267
326
|
|
|
268
|
-
const
|
|
327
|
+
const injectedParams = this.applyStructuredOutput(params, meta);
|
|
328
|
+
const response = await this.codexConnection.newSession(injectedParams);
|
|
269
329
|
response.configOptions = normalizeCodexConfigOptions(
|
|
270
330
|
response.configOptions,
|
|
271
331
|
);
|
|
@@ -305,11 +365,12 @@ export class CodexAcpAgent extends BaseAcpAgent {
|
|
|
305
365
|
}
|
|
306
366
|
|
|
307
367
|
async loadSession(params: LoadSessionRequest): Promise<LoadSessionResponse> {
|
|
308
|
-
const
|
|
368
|
+
const meta = params._meta as NewSessionMeta | undefined;
|
|
369
|
+
const injectedParams = this.applyStructuredOutput(params, meta);
|
|
370
|
+
const response = await this.codexConnection.loadSession(injectedParams);
|
|
309
371
|
response.configOptions = normalizeCodexConfigOptions(
|
|
310
372
|
response.configOptions,
|
|
311
373
|
);
|
|
312
|
-
const meta = params._meta as NewSessionMeta | undefined;
|
|
313
374
|
const currentPermissionMode = getCurrentPermissionMode(
|
|
314
375
|
response.modes?.currentModeId,
|
|
315
376
|
meta?.permissionMode,
|
|
@@ -342,17 +403,22 @@ export class CodexAcpAgent extends BaseAcpAgent {
|
|
|
342
403
|
async unstable_resumeSession(
|
|
343
404
|
params: ResumeSessionRequest,
|
|
344
405
|
): Promise<ResumeSessionResponse> {
|
|
406
|
+
const meta = params._meta as NewSessionMeta | undefined;
|
|
407
|
+
const injectedParams = this.applyStructuredOutput(
|
|
408
|
+
{
|
|
409
|
+
sessionId: params.sessionId,
|
|
410
|
+
cwd: params.cwd,
|
|
411
|
+
mcpServers: params.mcpServers ?? [],
|
|
412
|
+
_meta: params._meta,
|
|
413
|
+
},
|
|
414
|
+
meta,
|
|
415
|
+
);
|
|
416
|
+
|
|
345
417
|
// codex-acp doesn't support resume natively, use loadSession instead
|
|
346
|
-
const loadResponse = await this.codexConnection.loadSession(
|
|
347
|
-
sessionId: params.sessionId,
|
|
348
|
-
cwd: params.cwd,
|
|
349
|
-
mcpServers: params.mcpServers ?? [],
|
|
350
|
-
});
|
|
418
|
+
const loadResponse = await this.codexConnection.loadSession(injectedParams);
|
|
351
419
|
loadResponse.configOptions = normalizeCodexConfigOptions(
|
|
352
420
|
loadResponse.configOptions,
|
|
353
421
|
);
|
|
354
|
-
|
|
355
|
-
const meta = params._meta as NewSessionMeta | undefined;
|
|
356
422
|
const currentPermissionMode = getCurrentPermissionMode(
|
|
357
423
|
loadResponse.modes?.currentModeId,
|
|
358
424
|
meta?.permissionMode,
|
|
@@ -384,17 +450,22 @@ export class CodexAcpAgent extends BaseAcpAgent {
|
|
|
384
450
|
async unstable_forkSession(
|
|
385
451
|
params: ForkSessionRequest,
|
|
386
452
|
): Promise<ForkSessionResponse> {
|
|
453
|
+
const meta = params._meta as NewSessionMeta | undefined;
|
|
454
|
+
const injectedParams = this.applyStructuredOutput(
|
|
455
|
+
{
|
|
456
|
+
cwd: params.cwd,
|
|
457
|
+
mcpServers: params.mcpServers ?? [],
|
|
458
|
+
_meta: params._meta,
|
|
459
|
+
},
|
|
460
|
+
meta,
|
|
461
|
+
);
|
|
462
|
+
|
|
387
463
|
// Create a new session via codex-acp (fork isn't natively supported)
|
|
388
|
-
const newResponse = await this.codexConnection.newSession(
|
|
389
|
-
cwd: params.cwd,
|
|
390
|
-
mcpServers: params.mcpServers ?? [],
|
|
391
|
-
_meta: params._meta,
|
|
392
|
-
});
|
|
464
|
+
const newResponse = await this.codexConnection.newSession(injectedParams);
|
|
393
465
|
newResponse.configOptions = normalizeCodexConfigOptions(
|
|
394
466
|
newResponse.configOptions,
|
|
395
467
|
);
|
|
396
468
|
|
|
397
|
-
const meta = params._meta as NewSessionMeta | undefined;
|
|
398
469
|
const requestedPermissionMode = toCodexPermissionMode(meta?.permissionMode);
|
|
399
470
|
this.sessionState = createSessionState(newResponse.sessionId, params.cwd, {
|
|
400
471
|
taskRunId: meta?.taskRunId,
|
|
@@ -414,6 +485,38 @@ export class CodexAcpAgent extends BaseAcpAgent {
|
|
|
414
485
|
return newResponse;
|
|
415
486
|
}
|
|
416
487
|
|
|
488
|
+
/**
|
|
489
|
+
* When the caller wires up `onStructuredOutput` and provides a JSON schema
|
|
490
|
+
* via `_meta.jsonSchema`, inject the stdio MCP server that exposes
|
|
491
|
+
* `create_output` and append instructions telling the model to use it.
|
|
492
|
+
*
|
|
493
|
+
* Codex has no native equivalent of Claude's `outputFormat`, so we lean on
|
|
494
|
+
* MCP tool-calling to get validated structured output back.
|
|
495
|
+
*/
|
|
496
|
+
private applyStructuredOutput<
|
|
497
|
+
T extends { mcpServers?: McpServer[]; _meta?: unknown },
|
|
498
|
+
>(request: T, meta: NewSessionMeta | undefined): T {
|
|
499
|
+
if (!meta?.jsonSchema || !this.onStructuredOutput) {
|
|
500
|
+
return request;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
const mcpServer = buildStructuredOutputMcpServer(meta.jsonSchema);
|
|
504
|
+
const existingMeta = (request._meta ?? {}) as Record<string, unknown>;
|
|
505
|
+
const existingSystemPrompt =
|
|
506
|
+
typeof existingMeta.systemPrompt === "string"
|
|
507
|
+
? existingMeta.systemPrompt
|
|
508
|
+
: "";
|
|
509
|
+
|
|
510
|
+
return {
|
|
511
|
+
...request,
|
|
512
|
+
mcpServers: [...(request.mcpServers ?? []), mcpServer],
|
|
513
|
+
_meta: {
|
|
514
|
+
...existingMeta,
|
|
515
|
+
systemPrompt: existingSystemPrompt + STRUCTURED_OUTPUT_INSTRUCTIONS,
|
|
516
|
+
},
|
|
517
|
+
};
|
|
518
|
+
}
|
|
519
|
+
|
|
417
520
|
private async applyInitialPermissionMode(
|
|
418
521
|
sessionId: string,
|
|
419
522
|
permissionMode?: string,
|
|
@@ -630,7 +733,9 @@ export class CodexAcpAgent extends BaseAcpAgent {
|
|
|
630
733
|
const newAbortController = new AbortController();
|
|
631
734
|
const newConnection = new ClientSideConnection(
|
|
632
735
|
(_agent) =>
|
|
633
|
-
createCodexClient(this.client, this.logger, this.sessionState
|
|
736
|
+
createCodexClient(this.client, this.logger, this.sessionState, {
|
|
737
|
+
onStructuredOutput: this.onStructuredOutput,
|
|
738
|
+
}),
|
|
634
739
|
codexStream,
|
|
635
740
|
);
|
|
636
741
|
|
|
@@ -2,6 +2,7 @@ import type {
|
|
|
2
2
|
AgentSideConnection,
|
|
3
3
|
ReadTextFileRequest,
|
|
4
4
|
ReadTextFileResponse,
|
|
5
|
+
SessionNotification,
|
|
5
6
|
} from "@agentclientprotocol/sdk";
|
|
6
7
|
import { describe, expect, test, vi } from "vitest";
|
|
7
8
|
import type { FileEnrichmentDeps } from "../../enrichment/file-enricher";
|
|
@@ -110,3 +111,180 @@ describe("createCodexClient readTextFile", () => {
|
|
|
110
111
|
expect(upstream.readTextFile).toHaveBeenCalledWith(params);
|
|
111
112
|
});
|
|
112
113
|
});
|
|
114
|
+
|
|
115
|
+
describe("createCodexClient onStructuredOutput", () => {
|
|
116
|
+
const logger = new Logger({ debug: false, prefix: "[test]" });
|
|
117
|
+
const sessionState = createSessionState("sess", "/tmp");
|
|
118
|
+
|
|
119
|
+
function makeUpstream(): AgentSideConnection {
|
|
120
|
+
return {
|
|
121
|
+
sessionUpdate: vi.fn(async () => {}),
|
|
122
|
+
requestPermission: vi.fn(),
|
|
123
|
+
readTextFile: vi.fn(),
|
|
124
|
+
writeTextFile: vi.fn(),
|
|
125
|
+
createTerminal: vi.fn(),
|
|
126
|
+
terminalOutput: vi.fn(),
|
|
127
|
+
releaseTerminal: vi.fn(),
|
|
128
|
+
waitForTerminalExit: vi.fn(),
|
|
129
|
+
killTerminal: vi.fn(),
|
|
130
|
+
extMethod: vi.fn(),
|
|
131
|
+
extNotification: vi.fn(),
|
|
132
|
+
} as unknown as AgentSideConnection;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function notification(update: Record<string, unknown>): SessionNotification {
|
|
136
|
+
return {
|
|
137
|
+
sessionId: "sess",
|
|
138
|
+
update,
|
|
139
|
+
} as unknown as SessionNotification;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
test("fires once when create_output completes after rawInput arrived", async () => {
|
|
143
|
+
const onStructuredOutput = vi.fn(async () => {});
|
|
144
|
+
const upstream = makeUpstream();
|
|
145
|
+
const client = createCodexClient(upstream, logger, sessionState, {
|
|
146
|
+
onStructuredOutput,
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
await client.sessionUpdate?.(
|
|
150
|
+
notification({
|
|
151
|
+
sessionUpdate: "tool_call",
|
|
152
|
+
toolCallId: "tc-1",
|
|
153
|
+
title: "create_output",
|
|
154
|
+
status: "in_progress",
|
|
155
|
+
rawInput: { result: "ok", count: 5 },
|
|
156
|
+
}),
|
|
157
|
+
);
|
|
158
|
+
expect(onStructuredOutput).not.toHaveBeenCalled();
|
|
159
|
+
|
|
160
|
+
await client.sessionUpdate?.(
|
|
161
|
+
notification({
|
|
162
|
+
sessionUpdate: "tool_call_update",
|
|
163
|
+
toolCallId: "tc-1",
|
|
164
|
+
title: "create_output",
|
|
165
|
+
status: "completed",
|
|
166
|
+
}),
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
expect(onStructuredOutput).toHaveBeenCalledTimes(1);
|
|
170
|
+
expect(onStructuredOutput).toHaveBeenCalledWith({ result: "ok", count: 5 });
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
test("matches mcp__-prefixed tool titles", async () => {
|
|
174
|
+
const onStructuredOutput = vi.fn(async () => {});
|
|
175
|
+
const upstream = makeUpstream();
|
|
176
|
+
const client = createCodexClient(upstream, logger, sessionState, {
|
|
177
|
+
onStructuredOutput,
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
await client.sessionUpdate?.(
|
|
181
|
+
notification({
|
|
182
|
+
sessionUpdate: "tool_call",
|
|
183
|
+
toolCallId: "tc-1",
|
|
184
|
+
title: "mcp__posthog_output__create_output",
|
|
185
|
+
status: "completed",
|
|
186
|
+
rawInput: { ok: true },
|
|
187
|
+
}),
|
|
188
|
+
);
|
|
189
|
+
|
|
190
|
+
expect(onStructuredOutput).toHaveBeenCalledWith({ ok: true });
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
test("ignores tool calls that aren't create_output", async () => {
|
|
194
|
+
const onStructuredOutput = vi.fn(async () => {});
|
|
195
|
+
const upstream = makeUpstream();
|
|
196
|
+
const client = createCodexClient(upstream, logger, sessionState, {
|
|
197
|
+
onStructuredOutput,
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
await client.sessionUpdate?.(
|
|
201
|
+
notification({
|
|
202
|
+
sessionUpdate: "tool_call",
|
|
203
|
+
toolCallId: "tc-1",
|
|
204
|
+
title: "Read",
|
|
205
|
+
status: "completed",
|
|
206
|
+
rawInput: { path: "/tmp/x" },
|
|
207
|
+
}),
|
|
208
|
+
);
|
|
209
|
+
|
|
210
|
+
expect(onStructuredOutput).not.toHaveBeenCalled();
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
test("does not fire when rawInput never arrived", async () => {
|
|
214
|
+
const onStructuredOutput = vi.fn(async () => {});
|
|
215
|
+
const upstream = makeUpstream();
|
|
216
|
+
const client = createCodexClient(upstream, logger, sessionState, {
|
|
217
|
+
onStructuredOutput,
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
await client.sessionUpdate?.(
|
|
221
|
+
notification({
|
|
222
|
+
sessionUpdate: "tool_call",
|
|
223
|
+
toolCallId: "tc-1",
|
|
224
|
+
title: "create_output",
|
|
225
|
+
status: "completed",
|
|
226
|
+
}),
|
|
227
|
+
);
|
|
228
|
+
|
|
229
|
+
expect(onStructuredOutput).not.toHaveBeenCalled();
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
test("does not fire twice if completed is re-emitted for the same tool call", async () => {
|
|
233
|
+
const onStructuredOutput = vi.fn(async () => {});
|
|
234
|
+
const upstream = makeUpstream();
|
|
235
|
+
const client = createCodexClient(upstream, logger, sessionState, {
|
|
236
|
+
onStructuredOutput,
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
const completed = notification({
|
|
240
|
+
sessionUpdate: "tool_call",
|
|
241
|
+
toolCallId: "tc-1",
|
|
242
|
+
title: "create_output",
|
|
243
|
+
status: "completed",
|
|
244
|
+
rawInput: { final: 1 },
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
await client.sessionUpdate?.(completed);
|
|
248
|
+
await client.sessionUpdate?.(completed);
|
|
249
|
+
|
|
250
|
+
expect(onStructuredOutput).toHaveBeenCalledTimes(1);
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
test("forwards the notification upstream regardless of structured-output handling", async () => {
|
|
254
|
+
const onStructuredOutput = vi.fn(async () => {});
|
|
255
|
+
const upstream = makeUpstream();
|
|
256
|
+
const client = createCodexClient(upstream, logger, sessionState, {
|
|
257
|
+
onStructuredOutput,
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
const note = notification({
|
|
261
|
+
sessionUpdate: "tool_call",
|
|
262
|
+
toolCallId: "tc-1",
|
|
263
|
+
title: "create_output",
|
|
264
|
+
status: "completed",
|
|
265
|
+
rawInput: { final: 1 },
|
|
266
|
+
});
|
|
267
|
+
await client.sessionUpdate?.(note);
|
|
268
|
+
|
|
269
|
+
expect(upstream.sessionUpdate).toHaveBeenCalledWith(note);
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
test("does nothing when the callback is not wired", async () => {
|
|
273
|
+
const upstream = makeUpstream();
|
|
274
|
+
const client = createCodexClient(upstream, logger, sessionState);
|
|
275
|
+
|
|
276
|
+
// No onStructuredOutput configured — must not throw and must still
|
|
277
|
+
// forward upstream.
|
|
278
|
+
await client.sessionUpdate?.(
|
|
279
|
+
notification({
|
|
280
|
+
sessionUpdate: "tool_call",
|
|
281
|
+
toolCallId: "tc-1",
|
|
282
|
+
title: "create_output",
|
|
283
|
+
status: "completed",
|
|
284
|
+
rawInput: { x: 1 },
|
|
285
|
+
}),
|
|
286
|
+
);
|
|
287
|
+
|
|
288
|
+
expect(upstream.sessionUpdate).toHaveBeenCalledTimes(1);
|
|
289
|
+
});
|
|
290
|
+
});
|