imprint-mcp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/CHANGELOG.md +168 -0
  2. package/LICENSE +21 -0
  3. package/README.md +322 -0
  4. package/examples/discoverandgo/README.md +57 -0
  5. package/examples/discoverandgo/book_discoverandgo_museum_pass/cron.json +8 -0
  6. package/examples/discoverandgo/book_discoverandgo_museum_pass/index.ts +89 -0
  7. package/examples/discoverandgo/book_discoverandgo_museum_pass/workflow.json +39 -0
  8. package/examples/echo/README.md +37 -0
  9. package/examples/echo/echo_test/index.ts +31 -0
  10. package/examples/google-flights/search_google_flights/index.ts +101 -0
  11. package/examples/google-flights/search_google_flights/parser.test.ts +140 -0
  12. package/examples/google-flights/search_google_flights/parser.ts +189 -0
  13. package/examples/google-flights/search_google_flights/playbook.yaml +130 -0
  14. package/examples/google-flights/search_google_flights/workflow.json +48 -0
  15. package/examples/google-hotels/search_google_hotels/index.ts +194 -0
  16. package/examples/google-hotels/search_google_hotels/parser.test.ts +168 -0
  17. package/examples/google-hotels/search_google_hotels/parser.ts +330 -0
  18. package/examples/google-hotels/search_google_hotels/playbook.yaml +125 -0
  19. package/examples/google-hotels/search_google_hotels/workflow.json +111 -0
  20. package/examples/namecheap-domains/search_namecheap_domains/index.ts +144 -0
  21. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +380 -0
  22. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +50 -0
  23. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +136 -0
  24. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +97 -0
  25. package/examples/southwest/README.md +81 -0
  26. package/examples/southwest/search_southwest_flights/backends.json +23 -0
  27. package/examples/southwest/search_southwest_flights/cron.json +19 -0
  28. package/examples/southwest/search_southwest_flights/index.ts +110 -0
  29. package/examples/southwest/search_southwest_flights/playbook.yaml +46 -0
  30. package/examples/southwest/search_southwest_flights/workflow.json +54 -0
  31. package/package.json +78 -0
  32. package/prompts/compile-agent.md +580 -0
  33. package/prompts/intent-detection.md +198 -0
  34. package/prompts/playbook-compilation.md +279 -0
  35. package/prompts/request-triage.md +74 -0
  36. package/prompts/tool-candidate-detection.md +104 -0
  37. package/src/cli.ts +1287 -0
  38. package/src/imprint/agent.ts +468 -0
  39. package/src/imprint/app-api-hosts.ts +53 -0
  40. package/src/imprint/backend-ladder.ts +568 -0
  41. package/src/imprint/check.ts +136 -0
  42. package/src/imprint/chromium.ts +211 -0
  43. package/src/imprint/claude-cli-compile.ts +640 -0
  44. package/src/imprint/cli-credential.ts +394 -0
  45. package/src/imprint/codex-cli-compile.ts +712 -0
  46. package/src/imprint/compile-agent-types.ts +40 -0
  47. package/src/imprint/compile-agent.ts +404 -0
  48. package/src/imprint/compile-tools.ts +1389 -0
  49. package/src/imprint/compile.ts +720 -0
  50. package/src/imprint/cookie-jar.ts +246 -0
  51. package/src/imprint/credential-bundle.ts +195 -0
  52. package/src/imprint/credential-extract.ts +290 -0
  53. package/src/imprint/credential-store.ts +707 -0
  54. package/src/imprint/cron.ts +312 -0
  55. package/src/imprint/doctor.ts +223 -0
  56. package/src/imprint/emit.ts +154 -0
  57. package/src/imprint/etld.ts +134 -0
  58. package/src/imprint/freeform-redact.ts +216 -0
  59. package/src/imprint/inject-listener.ts +137 -0
  60. package/src/imprint/install.ts +795 -0
  61. package/src/imprint/integrations.ts +385 -0
  62. package/src/imprint/is-compiled.ts +2 -0
  63. package/src/imprint/json-path.ts +100 -0
  64. package/src/imprint/llm.ts +998 -0
  65. package/src/imprint/load-json.ts +54 -0
  66. package/src/imprint/log.ts +33 -0
  67. package/src/imprint/login.ts +166 -0
  68. package/src/imprint/mcp-compile-server.ts +282 -0
  69. package/src/imprint/mcp-maintenance.ts +1790 -0
  70. package/src/imprint/mcp-server.ts +350 -0
  71. package/src/imprint/multi-progress.ts +69 -0
  72. package/src/imprint/notify.ts +155 -0
  73. package/src/imprint/paths.ts +64 -0
  74. package/src/imprint/playbook-parser.ts +21 -0
  75. package/src/imprint/playbook-runner.ts +465 -0
  76. package/src/imprint/probe-backends.ts +251 -0
  77. package/src/imprint/progress.ts +28 -0
  78. package/src/imprint/record.ts +470 -0
  79. package/src/imprint/redact.ts +550 -0
  80. package/src/imprint/replay-capture.ts +387 -0
  81. package/src/imprint/request-context.ts +66 -0
  82. package/src/imprint/runtime-link.ts +73 -0
  83. package/src/imprint/runtime.ts +942 -0
  84. package/src/imprint/sensitive-keys.ts +156 -0
  85. package/src/imprint/session-diff.ts +409 -0
  86. package/src/imprint/session-merge.ts +198 -0
  87. package/src/imprint/session-writer.ts +149 -0
  88. package/src/imprint/sites.ts +27 -0
  89. package/src/imprint/stealth-fetch.ts +434 -0
  90. package/src/imprint/teach-state.ts +235 -0
  91. package/src/imprint/teach.ts +2120 -0
  92. package/src/imprint/tool-candidates.ts +423 -0
  93. package/src/imprint/tool-loader.ts +186 -0
  94. package/src/imprint/tool-selection.ts +70 -0
  95. package/src/imprint/tracing.ts +508 -0
  96. package/src/imprint/types.ts +472 -0
  97. package/src/imprint/version.ts +21 -0
@@ -0,0 +1,712 @@
1
+ /**
2
+ * compile-agent driver for codex-cli.
3
+ *
4
+ * Codex CLI can run non-interactively with JSONL progress and stdio MCP
5
+ * servers. This mirrors the claude-cli compile path: expose the compile tools
6
+ * through the existing MCP server, let Codex drive the agent loop, and accept
7
+ * success only after the MCP done() tool writes the verified sentinel.
8
+ */
9
+
10
+ import { type ChildProcess, spawn } from 'node:child_process';
11
+ import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs';
12
+ import { isAbsolute as pathIsAbsolute, join as pathJoin } from 'node:path';
13
+ import { type Span, context as otelContext } from '@opentelemetry/api';
14
+ import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
15
+ import { preferredAgentModel } from './llm.ts';
16
+ import { createLog } from './log.ts';
17
+ import { COMPILE_SENTINELS } from './mcp-compile-server.ts';
18
+ import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
19
+ import {
20
+ endTraceSpan,
21
+ llmSpanAttributes,
22
+ resolveTraceTokenCount,
23
+ setSpanAttributes,
24
+ startTraceSpan,
25
+ traceJsonInputOutputAttributes,
26
+ traceLlmIoEnabled,
27
+ traceLlmMessages,
28
+ traceToolIoEnabled,
29
+ traced,
30
+ } from './tracing.ts';
31
+ import type { Session } from './types.ts';
32
+
33
+ const log = createLog('compile-codex-cli');
34
+
35
+ const REPO_ROOT = pathJoin(import.meta.dir, '..', '..');
36
+ const CLI_PATH = pathJoin(REPO_ROOT, 'src', 'cli.ts');
37
+ const MCP_SERVER_NAME = 'imprint-compile';
38
+ const MAX_VERIFICATION_CYCLES = 5;
39
+
40
+ interface CompileViaCodexCliOptions {
41
+ session: Session;
42
+ absoluteToolDir: string;
43
+ sessionPath: string;
44
+ systemPromptPath: string;
45
+ deadlineMs: number;
46
+ startTime: number;
47
+ onProgress?: (p: CompileAgentProgress) => void;
48
+ keepTest?: boolean;
49
+ candidate?: ToolCandidate;
50
+ sharedContext?: SharedCompileContext;
51
+ }
52
+
53
+ interface CodexJsonEvent {
54
+ type: string;
55
+ thread_id?: string;
56
+ item?: {
57
+ id?: string;
58
+ type?: string;
59
+ text?: string;
60
+ content?: unknown;
61
+ name?: string;
62
+ tool_name?: string;
63
+ tool?: string;
64
+ server?: string;
65
+ command?: string[];
66
+ arguments?: unknown;
67
+ args?: unknown;
68
+ input?: unknown;
69
+ result?: unknown;
70
+ output?: unknown;
71
+ error?: unknown;
72
+ status?: string;
73
+ is_error?: boolean;
74
+ };
75
+ usage?: {
76
+ input_tokens?: number;
77
+ output_tokens?: number;
78
+ cached_input_tokens?: number;
79
+ reasoning_output_tokens?: number;
80
+ };
81
+ message?: string;
82
+ error?: { message?: string };
83
+ }
84
+
85
+ export async function compileViaCodexCli(
86
+ opts: CompileViaCodexCliOptions,
87
+ ): Promise<CompileAgentResult> {
88
+ return await traced(
89
+ 'compile.codex_cli_agent',
90
+ 'AGENT',
91
+ {
92
+ 'imprint.site': opts.session.site,
93
+ 'imprint.tool_name': opts.candidate?.toolName,
94
+ 'imprint.session_path': opts.sessionPath,
95
+ 'imprint.tool_dir': opts.absoluteToolDir,
96
+ 'imprint.model': preferredAgentModel('codex-cli'),
97
+ },
98
+ async (span) => {
99
+ const result = await compileViaCodexCliImpl(opts, span);
100
+ setSpanAttributes(span, {
101
+ 'imprint.compile.outcome': result.outcome,
102
+ 'imprint.compile.success': result.success,
103
+ 'imprint.compile.turns': result.turns,
104
+ 'imprint.compile.duration_ms': result.durationMs,
105
+ 'imprint.compile.input_tokens': result.inputTokens,
106
+ 'imprint.compile.output_tokens': result.outputTokens,
107
+ 'imprint.compile.conversation_log': result.conversationLogPath,
108
+ });
109
+ return result;
110
+ },
111
+ );
112
+ }
113
+
114
+ async function compileViaCodexCliImpl(
115
+ opts: CompileViaCodexCliOptions,
116
+ traceSpan?: Span,
117
+ ): Promise<CompileAgentResult> {
118
+ mkdirSync(opts.absoluteToolDir, { recursive: true });
119
+ for (const name of [COMPILE_SENTINELS.done, COMPILE_SENTINELS.giveUp]) {
120
+ const p = pathJoin(opts.absoluteToolDir, name);
121
+ if (existsSync(p)) {
122
+ try {
123
+ unlinkSync(p);
124
+ } catch {
125
+ // best effort
126
+ }
127
+ }
128
+ }
129
+
130
+ const bunPath = process.execPath;
131
+ const sessionPathAbs = pathIsAbsolute(opts.sessionPath)
132
+ ? opts.sessionPath
133
+ : pathJoin(REPO_ROOT, opts.sessionPath);
134
+ const mcpArgs = [
135
+ 'run',
136
+ CLI_PATH,
137
+ '__mcp-compile-server',
138
+ '--session-path',
139
+ sessionPathAbs,
140
+ '--tool-dir',
141
+ opts.absoluteToolDir,
142
+ ...(opts.candidate ? ['--candidate-json', JSON.stringify(opts.candidate)] : []),
143
+ ...(opts.sharedContext ? ['--shared-context-json', JSON.stringify(opts.sharedContext)] : []),
144
+ ];
145
+
146
+ let systemPrompt: string;
147
+ try {
148
+ systemPrompt = readFileSync(opts.systemPromptPath, 'utf8');
149
+ } catch (err) {
150
+ return finalErrorResult(opts, `failed to read system prompt: ${errMsg(err)}`);
151
+ }
152
+
153
+ const initialPrompt = `<system_instructions>
154
+ ${systemPrompt}
155
+ </system_instructions>
156
+
157
+ A new compile task is starting.
158
+
159
+ Session path: ${sessionPathAbs}
160
+ Tool directory: ${opts.absoluteToolDir}
161
+ You will write artifacts into the tool directory.
162
+ ${formatCandidateContext(opts.candidate, opts.sharedContext)}
163
+
164
+ Use the imprint-compile MCP tools to inspect the session, write artifacts, run tests, and call done(). Begin by calling read_session_summary, then proceed per the system instructions.`;
165
+
166
+ const model = preferredAgentModel('codex-cli');
167
+ const initialTokenCount = resolveTraceTokenCount(null, initialPrompt);
168
+ const captureLlmIo = traceLlmIoEnabled();
169
+ setSpanAttributes(traceSpan, {
170
+ ...llmSpanAttributes({
171
+ provider: 'codex-cli',
172
+ model,
173
+ inputTokens: initialTokenCount.tokens,
174
+ tokenCountsEstimated: true,
175
+ inputTokenSource: initialTokenCount.source,
176
+ inputMessages: captureLlmIo
177
+ ? traceLlmMessages([{ role: 'user', content: initialPrompt }])
178
+ : undefined,
179
+ inputValue: captureLlmIo ? initialPrompt : undefined,
180
+ invocationParameters: {
181
+ command: 'codex exec',
182
+ json: true,
183
+ sandbox: 'workspace-write',
184
+ tool_timeout_sec: 300,
185
+ },
186
+ }),
187
+ 'imprint.compile.initial_prompt_chars': initialPrompt.length,
188
+ });
189
+
190
+ const args = [
191
+ '-a',
192
+ 'never',
193
+ 'exec',
194
+ '--json',
195
+ '--ephemeral',
196
+ '--ignore-user-config',
197
+ '--ignore-rules',
198
+ '--skip-git-repo-check',
199
+ '-C',
200
+ REPO_ROOT,
201
+ '-s',
202
+ 'workspace-write',
203
+ '-m',
204
+ model,
205
+ '-c',
206
+ `mcp_servers.${MCP_SERVER_NAME}.command=${JSON.stringify(bunPath)}`,
207
+ '-c',
208
+ `mcp_servers.${MCP_SERVER_NAME}.args=${JSON.stringify(mcpArgs)}`,
209
+ '-c',
210
+ `mcp_servers.${MCP_SERVER_NAME}.default_tools_approval_mode=${JSON.stringify('approve')}`,
211
+ '-c',
212
+ `mcp_servers.${MCP_SERVER_NAME}.tool_timeout_sec=300`,
213
+ '-c',
214
+ 'shell_environment_policy.inherit=all',
215
+ '-',
216
+ ];
217
+
218
+ log(`spawning codex (mcp-server=${MCP_SERVER_NAME})`);
219
+
220
+ let child: ChildProcess;
221
+ try {
222
+ child = spawn('codex', args, {
223
+ cwd: REPO_ROOT,
224
+ env: process.env,
225
+ stdio: ['pipe', 'pipe', 'pipe'],
226
+ });
227
+ } catch (err) {
228
+ return finalErrorResult(opts, `failed to spawn codex-cli: ${errMsg(err)}`);
229
+ }
230
+
231
+ try {
232
+ child.stdin?.end(initialPrompt);
233
+ } catch (err) {
234
+ try {
235
+ child.kill('SIGTERM');
236
+ } catch {
237
+ // already gone
238
+ }
239
+ return finalErrorResult(opts, `failed to send prompt to codex-cli: ${errMsg(err)}`);
240
+ }
241
+
242
+ const result = await driveJsonl(child, opts, traceSpan);
243
+ const hasActualUsage = result.inputTokens > 0 || result.outputTokens > 0;
244
+ const inputTokenCount = resolveTraceTokenCount(
245
+ hasActualUsage ? result.inputTokens : null,
246
+ initialPrompt,
247
+ );
248
+ const outputTokenCount = resolveTraceTokenCount(
249
+ hasActualUsage ? result.outputTokens : null,
250
+ result.message,
251
+ );
252
+ setSpanAttributes(traceSpan, {
253
+ ...llmSpanAttributes({
254
+ provider: 'codex-cli',
255
+ model,
256
+ inputTokens: inputTokenCount.tokens,
257
+ outputTokens: outputTokenCount.tokens,
258
+ tokenCountsEstimated:
259
+ inputTokenCount.source === 'estimated' || outputTokenCount.source === 'estimated',
260
+ inputTokenSource: inputTokenCount.source,
261
+ outputTokenSource: outputTokenCount.source,
262
+ }),
263
+ 'imprint.compile.message': result.message,
264
+ });
265
+ return result;
266
+ }
267
+
268
+ async function driveJsonl(
269
+ child: ChildProcess,
270
+ opts: CompileViaCodexCliOptions,
271
+ traceSpan?: Span,
272
+ ): Promise<CompileAgentResult> {
273
+ // Capture OTel context so child-process event handlers can parent spans
274
+ // under the current compile.codex_cli_agent span. Bun's event emitters
275
+ // don't propagate AsyncLocalStorage, so without this the agent.turn.*
276
+ // spans appear as orphaned root traces in Phoenix.
277
+ const parentCtx = otelContext.active();
278
+
279
+ const conversationLog: unknown[] = [];
280
+ let inputTokens = 0;
281
+ let outputTokens = 0;
282
+ let turn = 0;
283
+ let lastErrorMessage = '';
284
+ let stderrBuf = '';
285
+ let agentMessageCount = 0;
286
+ const toolSpans = new Map<string, Span>();
287
+ let currentTurnSpan: Span | null = null;
288
+
289
+ const budgetMs = Math.max(0, opts.deadlineMs - Date.now());
290
+ const fireProgress = (phase: 'thinking' | 'tool', toolName?: string): void => {
291
+ opts.onProgress?.({
292
+ turn,
293
+ phase,
294
+ toolName,
295
+ elapsedMs: Date.now() - opts.startTime,
296
+ budgetMs,
297
+ inputTokens,
298
+ outputTokens,
299
+ verificationCycle: 1,
300
+ maxVerificationCycles: MAX_VERIFICATION_CYCLES,
301
+ });
302
+ };
303
+
304
+ const doneSentinel = pathJoin(opts.absoluteToolDir, COMPILE_SENTINELS.done);
305
+ const giveUpSentinel = pathJoin(opts.absoluteToolDir, COMPILE_SENTINELS.giveUp);
306
+
307
+ const sentinelTimer = setInterval(() => {
308
+ if (!existsSync(doneSentinel) && !existsSync(giveUpSentinel)) return;
309
+ try {
310
+ child.kill('SIGTERM');
311
+ } catch {
312
+ // already gone
313
+ }
314
+ }, 500);
315
+
316
+ const deadlineTimer = setTimeout(
317
+ () => {
318
+ log('wall-clock deadline exceeded, terminating codex');
319
+ try {
320
+ child.kill('SIGTERM');
321
+ setTimeout(() => {
322
+ if (!child.killed) child.kill('SIGKILL');
323
+ }, 5000);
324
+ } catch {
325
+ // already gone
326
+ }
327
+ },
328
+ Math.max(0, opts.deadlineMs - Date.now()),
329
+ );
330
+
331
+ let stdoutBuf = '';
332
+ child.stdout?.on('data', (chunk: Buffer) => {
333
+ otelContext.with(parentCtx, () => {
334
+ stdoutBuf += chunk.toString('utf8');
335
+ while (true) {
336
+ const nl = stdoutBuf.indexOf('\n');
337
+ if (nl < 0) break;
338
+ const line = stdoutBuf.slice(0, nl).trim();
339
+ stdoutBuf = stdoutBuf.slice(nl + 1);
340
+ if (!line) continue;
341
+
342
+ let evt: CodexJsonEvent;
343
+ try {
344
+ evt = JSON.parse(line) as CodexJsonEvent;
345
+ } catch (err) {
346
+ log(`unparseable jsonl line: ${errMsg(err)}`);
347
+ continue;
348
+ }
349
+
350
+ conversationLog.push(evt);
351
+
352
+ if (evt.type === 'thread.started') {
353
+ log(`thread_id=${evt.thread_id ?? '(none)'}`);
354
+ setSpanAttributes(traceSpan, { 'codex.thread_id': evt.thread_id });
355
+ continue;
356
+ }
357
+
358
+ if (evt.type === 'turn.started') {
359
+ if (currentTurnSpan) endTraceSpan(currentTurnSpan);
360
+ turn++;
361
+ currentTurnSpan = startTraceSpan(`agent.turn.${turn}`, 'CHAIN', {
362
+ 'imprint.agent.turn': turn,
363
+ 'imprint.agent.cumulative_input_tokens': inputTokens,
364
+ 'imprint.agent.cumulative_output_tokens': outputTokens,
365
+ });
366
+ fireProgress('thinking');
367
+ continue;
368
+ }
369
+
370
+ if ((evt.type === 'item.started' || evt.type === 'item.completed') && evt.item) {
371
+ const agentMessage = codexAgentMessageText(evt.item);
372
+ if (agentMessage && evt.type === 'item.completed') {
373
+ agentMessageCount++;
374
+ setSpanAttributes(traceSpan, {
375
+ 'imprint.codex.agent_messages': agentMessageCount,
376
+ 'imprint.codex.last_agent_message_chars': agentMessage.length,
377
+ ...(traceLlmIoEnabled()
378
+ ? llmSpanAttributes({
379
+ provider: 'codex-cli',
380
+ model: preferredAgentModel('codex-cli'),
381
+ outputMessages: traceLlmMessages([
382
+ { role: 'assistant', content: agentMessage },
383
+ ]),
384
+ outputValue: agentMessage,
385
+ })
386
+ : {}),
387
+ });
388
+ continue;
389
+ }
390
+ const toolName = codexToolName(evt.item);
391
+ if (toolName) {
392
+ traceCodexToolEvent(toolSpans, evt.type, evt.item, toolName);
393
+ fireProgress(evt.type === 'item.started' ? 'tool' : 'thinking', toolName);
394
+ }
395
+ continue;
396
+ }
397
+
398
+ if (evt.type === 'turn.completed') {
399
+ const turnInput = evt.usage?.input_tokens ?? 0;
400
+ const turnOutput = evt.usage?.output_tokens ?? 0;
401
+ inputTokens += turnInput;
402
+ outputTokens += turnOutput;
403
+ if (currentTurnSpan) {
404
+ setSpanAttributes(currentTurnSpan, {
405
+ 'imprint.agent.turn_input_tokens': turnInput,
406
+ 'imprint.agent.turn_output_tokens': turnOutput,
407
+ });
408
+ endTraceSpan(currentTurnSpan);
409
+ currentTurnSpan = null;
410
+ }
411
+ continue;
412
+ }
413
+
414
+ if (evt.type === 'error' || evt.type === 'turn.failed') {
415
+ lastErrorMessage = evt.message ?? evt.error?.message ?? JSON.stringify(evt);
416
+ }
417
+ }
418
+ });
419
+ });
420
+
421
+ child.stderr?.on('data', (chunk: Buffer) => {
422
+ const s = chunk.toString('utf8');
423
+ stderrBuf += s;
424
+ log(`[codex stderr] ${s.trim()}`);
425
+ });
426
+
427
+ const exitCode: number = await new Promise((resolve) => {
428
+ child.once('exit', (code) => resolve(code ?? -1));
429
+ child.once('error', () => resolve(-1));
430
+ });
431
+ clearInterval(sentinelTimer);
432
+ clearTimeout(deadlineTimer);
433
+ if (currentTurnSpan) endTraceSpan(currentTurnSpan);
434
+ for (const span of toolSpans.values()) endTraceSpan(span);
435
+ toolSpans.clear();
436
+
437
+ if (stdoutBuf.trim()) {
438
+ log(`unflushed stdout tail (${stdoutBuf.length} bytes) discarded`);
439
+ }
440
+
441
+ const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
442
+ try {
443
+ writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
444
+ } catch (err) {
445
+ log(`failed to persist conversation log: ${errMsg(err)}`);
446
+ }
447
+
448
+ const workflowPath = pathJoin(opts.absoluteToolDir, 'workflow.json');
449
+ const parserPath = pathJoin(opts.absoluteToolDir, 'parser.ts');
450
+ const parserTestPath = pathJoin(opts.absoluteToolDir, 'parser.test.ts');
451
+
452
+ const verifiedOk =
453
+ existsSync(doneSentinel) &&
454
+ (() => {
455
+ try {
456
+ const raw = readFileSync(doneSentinel, 'utf8').trim();
457
+ return raw ? JSON.parse(raw).verification === 'passed' : false;
458
+ } catch {
459
+ return false;
460
+ }
461
+ })();
462
+ if (verifiedOk && !opts.keepTest && existsSync(parserTestPath)) {
463
+ try {
464
+ unlinkSync(parserTestPath);
465
+ } catch {
466
+ // best effort
467
+ }
468
+ }
469
+
470
+ const baseResult: Pick<
471
+ CompileAgentResult,
472
+ | 'workflowPath'
473
+ | 'parserPath'
474
+ | 'parserTestPath'
475
+ | 'conversationLogPath'
476
+ | 'turns'
477
+ | 'durationMs'
478
+ | 'inputTokens'
479
+ | 'outputTokens'
480
+ | 'cacheReadInputTokens'
481
+ | 'cacheCreationInputTokens'
482
+ > = {
483
+ workflowPath: existsSync(workflowPath) ? workflowPath : undefined,
484
+ parserPath: existsSync(parserPath) ? parserPath : undefined,
485
+ parserTestPath: existsSync(parserTestPath) ? parserTestPath : undefined,
486
+ conversationLogPath,
487
+ turns: turn,
488
+ durationMs: Date.now() - opts.startTime,
489
+ inputTokens,
490
+ outputTokens,
491
+ cacheReadInputTokens: 0,
492
+ cacheCreationInputTokens: 0,
493
+ };
494
+
495
+ if (existsSync(doneSentinel)) {
496
+ let payload: {
497
+ summary?: string;
498
+ verification?: string;
499
+ cycles?: number;
500
+ failures?: string[];
501
+ } = {};
502
+ try {
503
+ const raw = readFileSync(doneSentinel, 'utf8').trim();
504
+ if (raw) payload = JSON.parse(raw);
505
+ } catch (err) {
506
+ log(`failed to parse done sentinel: ${errMsg(err)}`);
507
+ }
508
+ if (payload.verification === 'passed') {
509
+ return {
510
+ success: true,
511
+ outcome: 'done',
512
+ message: payload.summary ?? 'Task completed',
513
+ ...baseResult,
514
+ };
515
+ }
516
+ return {
517
+ success: false,
518
+ outcome: 'error',
519
+ message: `Verification failed after ${payload.cycles ?? '?'} cycles. Final failures:\n${(payload.failures ?? []).join('\n')}`,
520
+ ...baseResult,
521
+ };
522
+ }
523
+
524
+ if (existsSync(giveUpSentinel)) {
525
+ let payload: { reason?: string; what_was_tried?: string } = {};
526
+ try {
527
+ const raw = readFileSync(giveUpSentinel, 'utf8').trim();
528
+ if (raw) payload = JSON.parse(raw);
529
+ } catch (err) {
530
+ log(`failed to parse give_up sentinel: ${errMsg(err)}`);
531
+ }
532
+ return {
533
+ success: false,
534
+ outcome: 'give_up',
535
+ message: `Agent gave up: ${payload.reason ?? 'unknown reason'}\n${payload.what_was_tried ?? ''}`,
536
+ ...baseResult,
537
+ };
538
+ }
539
+
540
+ if (Date.now() > opts.deadlineMs) {
541
+ return {
542
+ success: false,
543
+ outcome: 'timeout',
544
+ message: `codex-cli exceeded the ${Math.round((opts.deadlineMs - opts.startTime) / 60000)} minute deadline before completing.`,
545
+ ...baseResult,
546
+ };
547
+ }
548
+
549
+ if (exitCode === 0) {
550
+ return {
551
+ success: false,
552
+ outcome: 'soft_cap',
553
+ message: 'codex-cli exited without calling done() or give_up(). It may have stopped early.',
554
+ ...baseResult,
555
+ };
556
+ }
557
+
558
+ const errorTail = lastErrorMessage || stderrBuf.trim().slice(-500);
559
+ return {
560
+ success: false,
561
+ outcome: 'error',
562
+ message: `codex-cli exited with code ${exitCode}${errorTail ? `\n${errorTail}` : ''}`,
563
+ ...baseResult,
564
+ };
565
+ }
566
+
567
+ function traceCodexToolEvent(
568
+ spans: Map<string, Span>,
569
+ eventType: string,
570
+ item: NonNullable<CodexJsonEvent['item']>,
571
+ toolName: string,
572
+ ): void {
573
+ const id = item.id ?? `${toolName}:${spans.size}`;
574
+ const captureIo = traceToolIoEnabled();
575
+ if (eventType === 'item.started') {
576
+ const span = startTraceSpan(`mcp.${toolName}`, 'TOOL', {
577
+ 'mcp.server': item.server ?? MCP_SERVER_NAME,
578
+ 'mcp.tool_name': toolName,
579
+ 'codex.item_id': id,
580
+ 'codex.item_type': item.type,
581
+ ...(captureIo && codexToolInput(item) !== undefined
582
+ ? traceJsonInputOutputAttributes('input', codexToolInput(item), `mcp.${toolName}.input`)
583
+ : {}),
584
+ });
585
+ if (span) spans.set(id, span);
586
+ return;
587
+ }
588
+ const completionAttributes = {
589
+ 'codex.item_status': item.status,
590
+ ...(captureIo && codexToolOutput(item) !== undefined
591
+ ? traceJsonInputOutputAttributes('output', codexToolOutput(item), `mcp.${toolName}.output`)
592
+ : {}),
593
+ };
594
+ const toolError = codexToolError(item);
595
+ const span = spans.get(id);
596
+ if (!span) {
597
+ const completedSpan = startTraceSpan(`mcp.${toolName}`, 'TOOL', {
598
+ 'mcp.server': item.server ?? MCP_SERVER_NAME,
599
+ 'mcp.tool_name': toolName,
600
+ 'codex.item_id': id,
601
+ 'codex.item_type': item.type,
602
+ 'codex.event': 'completed_without_start',
603
+ ...completionAttributes,
604
+ });
605
+ endTraceSpan(completedSpan, toolError);
606
+ return;
607
+ }
608
+ setSpanAttributes(span, completionAttributes);
609
+ endTraceSpan(span, toolError);
610
+ spans.delete(id);
611
+ }
612
+
613
+ function codexAgentMessageText(item: NonNullable<CodexJsonEvent['item']>): string | undefined {
614
+ if (item.type !== 'agent_message') return undefined;
615
+ if (typeof item.text === 'string') return item.text;
616
+ if (typeof item.content === 'string') return item.content;
617
+ if (Array.isArray(item.content)) {
618
+ const text = item.content
619
+ .map((block) => {
620
+ if (typeof block === 'string') return block;
621
+ if (isRecord(block) && typeof block.text === 'string') return block.text;
622
+ return '';
623
+ })
624
+ .filter(Boolean)
625
+ .join('');
626
+ return text || undefined;
627
+ }
628
+ return undefined;
629
+ }
630
+
631
+ function codexToolName(item: NonNullable<CodexJsonEvent['item']>): string | undefined {
632
+ const type = item.type ?? '';
633
+ if (type === 'agent_message') return undefined;
634
+ const name = item.name ?? item.tool_name ?? item.tool;
635
+ if (!name) return undefined;
636
+ return name.replace(`mcp__${MCP_SERVER_NAME}__`, '');
637
+ }
638
+
639
+ function codexToolInput(item: NonNullable<CodexJsonEvent['item']>): unknown {
640
+ return (
641
+ item.arguments ??
642
+ item.args ??
643
+ item.input ??
644
+ (item.command ? { command: item.command } : undefined)
645
+ );
646
+ }
647
+
648
+ function codexToolOutput(item: NonNullable<CodexJsonEvent['item']>): unknown {
649
+ return (
650
+ item.result ??
651
+ item.output ??
652
+ item.content ??
653
+ item.error ??
654
+ (item.status ? { status: item.status } : undefined)
655
+ );
656
+ }
657
+
658
+ function codexToolError(item: NonNullable<CodexJsonEvent['item']>): Error | undefined {
659
+ if (!item.is_error && item.status !== 'error' && item.status !== 'failed') return undefined;
660
+ const message =
661
+ item.error === undefined
662
+ ? `${codexToolName(item) ?? 'tool'} failed`
663
+ : typeof item.error === 'string'
664
+ ? item.error
665
+ : JSON.stringify(item.error);
666
+ return new Error(message);
667
+ }
668
+
669
+ function isRecord(value: unknown): value is Record<string, unknown> {
670
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
671
+ }
672
+
673
+ function finalErrorResult(opts: CompileViaCodexCliOptions, message: string): CompileAgentResult {
674
+ mkdirSync(opts.absoluteToolDir, { recursive: true });
675
+ const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
676
+ try {
677
+ writeFileSync(conversationLogPath, JSON.stringify({ error: message }, null, 2), 'utf8');
678
+ } catch {
679
+ // best effort
680
+ }
681
+ return {
682
+ success: false,
683
+ outcome: 'error',
684
+ message,
685
+ conversationLogPath,
686
+ turns: 0,
687
+ durationMs: Date.now() - opts.startTime,
688
+ inputTokens: 0,
689
+ outputTokens: 0,
690
+ cacheReadInputTokens: 0,
691
+ cacheCreationInputTokens: 0,
692
+ };
693
+ }
694
+
695
+ function formatCandidateContext(
696
+ candidate: ToolCandidate | undefined,
697
+ sharedContext: SharedCompileContext | undefined,
698
+ ): string {
699
+ if (!candidate && !sharedContext) return '';
700
+ return `
701
+ Selected candidate context:
702
+ ${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
703
+
704
+ Shared compile context:
705
+ ${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
706
+
707
+ Compile only the selected candidate. Do not create tools for other actions in the recording.`;
708
+ }
709
+
710
+ function errMsg(err: unknown): string {
711
+ return err instanceof Error ? err.message : String(err);
712
+ }