imprint-mcp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/CHANGELOG.md +168 -0
  2. package/LICENSE +21 -0
  3. package/README.md +322 -0
  4. package/examples/discoverandgo/README.md +57 -0
  5. package/examples/discoverandgo/book_discoverandgo_museum_pass/cron.json +8 -0
  6. package/examples/discoverandgo/book_discoverandgo_museum_pass/index.ts +89 -0
  7. package/examples/discoverandgo/book_discoverandgo_museum_pass/workflow.json +39 -0
  8. package/examples/echo/README.md +37 -0
  9. package/examples/echo/echo_test/index.ts +31 -0
  10. package/examples/google-flights/search_google_flights/index.ts +101 -0
  11. package/examples/google-flights/search_google_flights/parser.test.ts +140 -0
  12. package/examples/google-flights/search_google_flights/parser.ts +189 -0
  13. package/examples/google-flights/search_google_flights/playbook.yaml +130 -0
  14. package/examples/google-flights/search_google_flights/workflow.json +48 -0
  15. package/examples/google-hotels/search_google_hotels/index.ts +194 -0
  16. package/examples/google-hotels/search_google_hotels/parser.test.ts +168 -0
  17. package/examples/google-hotels/search_google_hotels/parser.ts +330 -0
  18. package/examples/google-hotels/search_google_hotels/playbook.yaml +125 -0
  19. package/examples/google-hotels/search_google_hotels/workflow.json +111 -0
  20. package/examples/namecheap-domains/search_namecheap_domains/index.ts +144 -0
  21. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +380 -0
  22. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +50 -0
  23. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +136 -0
  24. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +97 -0
  25. package/examples/southwest/README.md +81 -0
  26. package/examples/southwest/search_southwest_flights/backends.json +23 -0
  27. package/examples/southwest/search_southwest_flights/cron.json +19 -0
  28. package/examples/southwest/search_southwest_flights/index.ts +110 -0
  29. package/examples/southwest/search_southwest_flights/playbook.yaml +46 -0
  30. package/examples/southwest/search_southwest_flights/workflow.json +54 -0
  31. package/package.json +78 -0
  32. package/prompts/compile-agent.md +580 -0
  33. package/prompts/intent-detection.md +198 -0
  34. package/prompts/playbook-compilation.md +279 -0
  35. package/prompts/request-triage.md +74 -0
  36. package/prompts/tool-candidate-detection.md +104 -0
  37. package/src/cli.ts +1287 -0
  38. package/src/imprint/agent.ts +468 -0
  39. package/src/imprint/app-api-hosts.ts +53 -0
  40. package/src/imprint/backend-ladder.ts +568 -0
  41. package/src/imprint/check.ts +136 -0
  42. package/src/imprint/chromium.ts +211 -0
  43. package/src/imprint/claude-cli-compile.ts +640 -0
  44. package/src/imprint/cli-credential.ts +394 -0
  45. package/src/imprint/codex-cli-compile.ts +712 -0
  46. package/src/imprint/compile-agent-types.ts +40 -0
  47. package/src/imprint/compile-agent.ts +404 -0
  48. package/src/imprint/compile-tools.ts +1389 -0
  49. package/src/imprint/compile.ts +720 -0
  50. package/src/imprint/cookie-jar.ts +246 -0
  51. package/src/imprint/credential-bundle.ts +195 -0
  52. package/src/imprint/credential-extract.ts +290 -0
  53. package/src/imprint/credential-store.ts +707 -0
  54. package/src/imprint/cron.ts +312 -0
  55. package/src/imprint/doctor.ts +223 -0
  56. package/src/imprint/emit.ts +154 -0
  57. package/src/imprint/etld.ts +134 -0
  58. package/src/imprint/freeform-redact.ts +216 -0
  59. package/src/imprint/inject-listener.ts +137 -0
  60. package/src/imprint/install.ts +795 -0
  61. package/src/imprint/integrations.ts +385 -0
  62. package/src/imprint/is-compiled.ts +2 -0
  63. package/src/imprint/json-path.ts +100 -0
  64. package/src/imprint/llm.ts +998 -0
  65. package/src/imprint/load-json.ts +54 -0
  66. package/src/imprint/log.ts +33 -0
  67. package/src/imprint/login.ts +166 -0
  68. package/src/imprint/mcp-compile-server.ts +282 -0
  69. package/src/imprint/mcp-maintenance.ts +1790 -0
  70. package/src/imprint/mcp-server.ts +350 -0
  71. package/src/imprint/multi-progress.ts +69 -0
  72. package/src/imprint/notify.ts +155 -0
  73. package/src/imprint/paths.ts +64 -0
  74. package/src/imprint/playbook-parser.ts +21 -0
  75. package/src/imprint/playbook-runner.ts +465 -0
  76. package/src/imprint/probe-backends.ts +251 -0
  77. package/src/imprint/progress.ts +28 -0
  78. package/src/imprint/record.ts +470 -0
  79. package/src/imprint/redact.ts +550 -0
  80. package/src/imprint/replay-capture.ts +387 -0
  81. package/src/imprint/request-context.ts +66 -0
  82. package/src/imprint/runtime-link.ts +73 -0
  83. package/src/imprint/runtime.ts +942 -0
  84. package/src/imprint/sensitive-keys.ts +156 -0
  85. package/src/imprint/session-diff.ts +409 -0
  86. package/src/imprint/session-merge.ts +198 -0
  87. package/src/imprint/session-writer.ts +149 -0
  88. package/src/imprint/sites.ts +27 -0
  89. package/src/imprint/stealth-fetch.ts +434 -0
  90. package/src/imprint/teach-state.ts +235 -0
  91. package/src/imprint/teach.ts +2120 -0
  92. package/src/imprint/tool-candidates.ts +423 -0
  93. package/src/imprint/tool-loader.ts +186 -0
  94. package/src/imprint/tool-selection.ts +70 -0
  95. package/src/imprint/tracing.ts +508 -0
  96. package/src/imprint/types.ts +472 -0
  97. package/src/imprint/version.ts +21 -0
@@ -0,0 +1,640 @@
1
+ /**
2
+ * compile-agent driver for claude-cli.
3
+ *
4
+ * claude-cli doesn't implement messageWithTools (its CLI surface only does
5
+ * single-turn text completion), so we can't drive it turn-by-turn the way
6
+ * runAgentLoop drives anthropic-api. Instead we shell out to
7
+ * `claude -p` with imprint's compile tools registered as a stdio MCP server
8
+ * and let claude-cli's own internal agent loop drive the work.
9
+ *
10
+ * Key design points:
11
+ *
12
+ * - **Subscription auth**: we deliberately do NOT pass `--bare`. Without bare
13
+ * mode claude-cli reads OAuth from the keychain, so a Pro/Max subscriber
14
+ * spends subscription tokens, not API credit.
15
+ *
16
+ * - **Tool dispatch happens in the MCP server**, not here. See
17
+ * mcp-compile-server.ts. The `done` tool there runs externalVerification
18
+ * inline; on failure it returns the failure list as the tool_result and the
19
+ * model keeps iterating in the same conversation. On success it writes a
20
+ * sentinel file we poll for.
21
+ *
22
+ * - **Progress reporting**: stream-json events from claude-cli are translated
23
+ * into CompileAgentProgress events for the existing onProgress callback,
24
+ * so the spinner UX in teach.ts is unchanged.
25
+ */
26
+
27
+ import { type ChildProcess, spawn } from 'node:child_process';
28
+ import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs';
29
+ import { join as pathJoin } from 'node:path';
30
+ import { type Span, context as otelContext } from '@opentelemetry/api';
31
+ import type { OnDeadlineReached } from './agent.ts';
32
+ import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
33
+ import { preferredAgentModel } from './llm.ts';
34
+ import { createLog } from './log.ts';
35
+ import { COMPILE_SENTINELS } from './mcp-compile-server.ts';
36
+ import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
37
+ import {
38
+ endTraceSpan,
39
+ llmSpanAttributes,
40
+ setSpanAttributes,
41
+ startTraceSpan,
42
+ traceJsonInputOutputAttributes,
43
+ traceLlmIoEnabled,
44
+ traced,
45
+ } from './tracing.ts';
46
+ import type { Session } from './types.ts';
47
+
48
+ const log = createLog('compile-claude-cli');
49
+
50
+ const REPO_ROOT = pathJoin(import.meta.dir, '..', '..');
51
+ const CLI_PATH = pathJoin(REPO_ROOT, 'src', 'cli.ts');
52
+ const MCP_SERVER_NAME = 'imprint-compile';
53
+ const MAX_VERIFICATION_CYCLES = 5;
54
+
55
+ interface CompileViaClaudeCliOptions {
56
+ session: Session;
57
+ absoluteToolDir: string;
58
+ sessionPath: string;
59
+ systemPromptPath: string;
60
+ deadlineMs: number;
61
+ startTime: number;
62
+ onProgress?: (p: CompileAgentProgress) => void;
63
+ /** Called when wall-clock deadline is reached; return ms to extend or null to time out. */
64
+ onDeadlineReached?: OnDeadlineReached;
65
+ /** Retain parser.test.ts after successful verification. Mirrors the
66
+ * in-process loop's `keepTest`. */
67
+ keepTest?: boolean;
68
+ candidate?: ToolCandidate;
69
+ sharedContext?: SharedCompileContext;
70
+ }
71
+
72
+ interface StreamJsonEvent {
73
+ type: string;
74
+ subtype?: string;
75
+ session_id?: string;
76
+ // assistant/user message envelope
77
+ message?: {
78
+ content?: Array<
79
+ | { type: 'text'; text: string }
80
+ | { type: 'tool_use'; name: string; input?: unknown }
81
+ | { type: 'tool_result'; tool_use_id: string; content: unknown; is_error?: boolean }
82
+ >;
83
+ usage?: { input_tokens?: number; output_tokens?: number };
84
+ stop_reason?: string;
85
+ };
86
+ // result envelope (terminal event)
87
+ result?: string;
88
+ is_error?: boolean;
89
+ duration_ms?: number;
90
+ num_turns?: number;
91
+ total_cost_usd?: number;
92
+ usage?: {
93
+ input_tokens?: number;
94
+ output_tokens?: number;
95
+ cache_read_input_tokens?: number;
96
+ cache_creation_input_tokens?: number;
97
+ };
98
+ // partial-message stream events
99
+ event?: { delta?: { type?: string; text?: string } };
100
+ }
101
+
102
+ export async function compileViaClaudeCli(
103
+ opts: CompileViaClaudeCliOptions,
104
+ ): Promise<CompileAgentResult> {
105
+ return await traced(
106
+ 'compile.claude_cli_agent',
107
+ 'AGENT',
108
+ {
109
+ 'imprint.site': opts.session.site,
110
+ 'imprint.tool_dir': opts.absoluteToolDir,
111
+ 'imprint.provider': 'claude-cli',
112
+ 'imprint.model': preferredAgentModel('claude-cli'),
113
+ },
114
+ async (span) => {
115
+ const result = await compileViaClaudeCliImpl(opts);
116
+ setSpanAttributes(span, {
117
+ 'imprint.compile.outcome': result.outcome,
118
+ 'imprint.compile.turns': result.turns,
119
+ 'imprint.compile.duration_ms': result.durationMs,
120
+ 'imprint.compile.input_tokens': result.inputTokens,
121
+ 'imprint.compile.output_tokens': result.outputTokens,
122
+ 'imprint.compile.cache_read_input_tokens': result.cacheReadInputTokens,
123
+ 'imprint.compile.cache_creation_input_tokens': result.cacheCreationInputTokens,
124
+ ...llmSpanAttributes({
125
+ provider: 'claude-cli',
126
+ model: preferredAgentModel('claude-cli'),
127
+ inputTokens: result.inputTokens,
128
+ outputTokens: result.outputTokens,
129
+ cacheReadTokens: result.cacheReadInputTokens,
130
+ cacheWriteTokens: result.cacheCreationInputTokens,
131
+ }),
132
+ });
133
+ return result;
134
+ },
135
+ );
136
+ }
137
+
138
+ async function compileViaClaudeCliImpl(
139
+ opts: CompileViaClaudeCliOptions,
140
+ ): Promise<CompileAgentResult> {
141
+ // Ensure tool dir exists and clear any prior sentinels — a stale
142
+ // sentinel from a previous run would short-circuit our success detection.
143
+ mkdirSync(opts.absoluteToolDir, { recursive: true });
144
+ for (const name of [COMPILE_SENTINELS.done, COMPILE_SENTINELS.giveUp]) {
145
+ const p = pathJoin(opts.absoluteToolDir, name);
146
+ if (existsSync(p)) {
147
+ try {
148
+ unlinkSync(p); // remove, not truncate — existsSync() is what gates success/give-up detection later
149
+ } catch {
150
+ // best effort
151
+ }
152
+ }
153
+ }
154
+
155
+ // Build the inline MCP config. The MCP server is the same imprint binary
156
+ // re-invoked with the hidden __mcp-compile-server verb. Use the bun runner
157
+ // the parent was launched with so the child runs in the same TS toolchain.
158
+ const bunPath = process.execPath;
159
+ const sessionPathAbs = opts.sessionPath.startsWith('/')
160
+ ? opts.sessionPath
161
+ : pathJoin(REPO_ROOT, opts.sessionPath);
162
+ const mcpConfig = {
163
+ mcpServers: {
164
+ [MCP_SERVER_NAME]: {
165
+ command: bunPath,
166
+ args: [
167
+ 'run',
168
+ CLI_PATH,
169
+ '__mcp-compile-server',
170
+ '--session-path',
171
+ sessionPathAbs,
172
+ '--tool-dir',
173
+ opts.absoluteToolDir,
174
+ ...(opts.candidate ? ['--candidate-json', JSON.stringify(opts.candidate)] : []),
175
+ ...(opts.sharedContext
176
+ ? ['--shared-context-json', JSON.stringify(opts.sharedContext)]
177
+ : []),
178
+ ],
179
+ },
180
+ },
181
+ };
182
+
183
+ const initialPrompt = `A new compile task is starting.
184
+
185
+ Session path: ${sessionPathAbs}
186
+ Tool directory: ${opts.absoluteToolDir}
187
+ You will write artifacts into the tool directory.
188
+ ${formatCandidateContext(opts.candidate, opts.sharedContext)}
189
+
190
+ Begin by calling read_session_summary to orient yourself, then proceed per the system prompt.`;
191
+
192
+ const args = [
193
+ '--print',
194
+ '--output-format',
195
+ 'stream-json',
196
+ '--verbose',
197
+ '--strict-mcp-config',
198
+ '--mcp-config',
199
+ JSON.stringify(mcpConfig),
200
+ '--system-prompt-file',
201
+ opts.systemPromptPath,
202
+ // Disable the built-in tool set so claude only uses our MCP tools.
203
+ '--tools',
204
+ '',
205
+ // Pre-approve every tool from our MCP server so no permission prompt
206
+ // fires in non-interactive print mode.
207
+ '--allowedTools',
208
+ `mcp__${MCP_SERVER_NAME}__read_session_summary`,
209
+ '--allowedTools',
210
+ `mcp__${MCP_SERVER_NAME}__read_request`,
211
+ '--allowedTools',
212
+ `mcp__${MCP_SERVER_NAME}__read_response_body`,
213
+ '--allowedTools',
214
+ `mcp__${MCP_SERVER_NAME}__search_response_body`,
215
+ '--allowedTools',
216
+ `mcp__${MCP_SERVER_NAME}__read_file`,
217
+ '--allowedTools',
218
+ `mcp__${MCP_SERVER_NAME}__write_file`,
219
+ '--allowedTools',
220
+ `mcp__${MCP_SERVER_NAME}__run_bash`,
221
+ '--allowedTools',
222
+ `mcp__${MCP_SERVER_NAME}__run_tests`,
223
+ '--allowedTools',
224
+ `mcp__${MCP_SERVER_NAME}__done`,
225
+ '--allowedTools',
226
+ `mcp__${MCP_SERVER_NAME}__give_up`,
227
+ // Bound the run. softTurnCap=100 in the in-process loop × up to 5
228
+ // verification cycles = 500 hard ceiling there. Verification is now
229
+ // in-tool so we pick a single bound that comfortably exceeds typical runs
230
+ // (~5-15 turns per the system prompt) plus retry budget.
231
+ '--max-turns',
232
+ '200',
233
+ '--permission-mode',
234
+ 'bypassPermissions',
235
+ '--no-session-persistence',
236
+ '--disable-slash-commands',
237
+ '--model',
238
+ preferredAgentModel('claude-cli'),
239
+ initialPrompt,
240
+ ];
241
+
242
+ log(`spawning claude (max-turns=200, mcp-server=${MCP_SERVER_NAME})`);
243
+
244
+ let child: ChildProcess;
245
+ try {
246
+ child = spawn('claude', args, {
247
+ cwd: REPO_ROOT,
248
+ env: process.env,
249
+ stdio: ['ignore', 'pipe', 'pipe'],
250
+ });
251
+ } catch (err) {
252
+ return finalErrorResult(opts, `failed to spawn claude-cli: ${errMsg(err)}`);
253
+ }
254
+
255
+ const result = await driveStreamJson(child, opts);
256
+ return result;
257
+ }
258
+
259
+ async function driveStreamJson(
260
+ child: ChildProcess,
261
+ opts: CompileViaClaudeCliOptions,
262
+ ): Promise<CompileAgentResult> {
263
+ // Capture OTel context so child-process event handlers can parent spans
264
+ // under the current compile.claude_cli_agent span. Bun's event emitters
265
+ // don't propagate AsyncLocalStorage, so without this the agent.turn.*
266
+ // spans appear as orphaned root traces in Phoenix.
267
+ const parentCtx = otelContext.active();
268
+
269
+ const conversationLog: unknown[] = [];
270
+ const captureLlmIo = traceLlmIoEnabled();
271
+ let inputTokens = 0;
272
+ let outputTokens = 0;
273
+ let cacheReadInputTokens = 0;
274
+ let cacheCreationInputTokens = 0;
275
+ let turn = 0;
276
+ let lastErrorEvent: StreamJsonEvent | null = null;
277
+ let stderrBuf = '';
278
+ let currentTurnSpan: Span | null = null;
279
+ let turnInputTokens = 0;
280
+ let turnOutputTokens = 0;
281
+
282
+ const budgetMs = Math.max(0, opts.deadlineMs - Date.now());
283
+ const fireProgress = (phase: 'thinking' | 'tool', toolName?: string): void => {
284
+ opts.onProgress?.({
285
+ turn,
286
+ phase,
287
+ toolName,
288
+ elapsedMs: Date.now() - opts.startTime,
289
+ budgetMs,
290
+ inputTokens,
291
+ outputTokens,
292
+ verificationCycle: 1,
293
+ maxVerificationCycles: MAX_VERIFICATION_CYCLES,
294
+ });
295
+ };
296
+
297
+ // Wall-clock guard: if we hit the deadline, ask the user or kill the child.
298
+ let currentDeadlineMs = opts.deadlineMs;
299
+ let childExited = false;
300
+
301
+ const killChild = (): void => {
302
+ log('wall-clock deadline exceeded, terminating claude');
303
+ try {
304
+ child.kill('SIGTERM');
305
+ setTimeout(() => {
306
+ if (!child.killed) child.kill('SIGKILL');
307
+ }, 5000);
308
+ } catch {
309
+ // already gone
310
+ }
311
+ };
312
+
313
+ const scheduleDeadlineCheck = (): ReturnType<typeof setTimeout> => {
314
+ const remaining = Math.max(0, currentDeadlineMs - Date.now());
315
+ return setTimeout(async () => {
316
+ if (childExited) return;
317
+ if (opts.onDeadlineReached) {
318
+ const extensionMs = await opts.onDeadlineReached();
319
+ if (childExited) return;
320
+ if (extensionMs != null && extensionMs > 0) {
321
+ currentDeadlineMs += extensionMs;
322
+ deadlineTimer = scheduleDeadlineCheck();
323
+ return;
324
+ }
325
+ }
326
+ killChild();
327
+ }, remaining);
328
+ };
329
+
330
+ let deadlineTimer = scheduleDeadlineCheck();
331
+
332
+ // Stdout: newline-delimited stream-json events.
333
+ let stdoutBuf = '';
334
+ child.stdout?.on('data', (chunk: Buffer) => {
335
+ otelContext.with(parentCtx, () => {
336
+ stdoutBuf += chunk.toString('utf8');
337
+ while (true) {
338
+ const nl = stdoutBuf.indexOf('\n');
339
+ if (nl < 0) break;
340
+ const line = stdoutBuf.slice(0, nl).trim();
341
+ stdoutBuf = stdoutBuf.slice(nl + 1);
342
+ if (!line) continue;
343
+
344
+ let evt: StreamJsonEvent;
345
+ try {
346
+ evt = JSON.parse(line);
347
+ } catch (err) {
348
+ log(`unparseable stream-json line: ${err instanceof Error ? err.message : String(err)}`);
349
+ continue;
350
+ }
351
+
352
+ conversationLog.push(evt);
353
+
354
+ // Token accounting from any event that carries usage.
355
+ const evtInputTokens =
356
+ (evt.usage?.input_tokens ?? 0) + (evt.message?.usage?.input_tokens ?? 0);
357
+ const evtOutputTokens =
358
+ (evt.usage?.output_tokens ?? 0) + (evt.message?.usage?.output_tokens ?? 0);
359
+ if (evtInputTokens || evtOutputTokens) {
360
+ inputTokens += evtInputTokens;
361
+ outputTokens += evtOutputTokens;
362
+ turnInputTokens += evtInputTokens;
363
+ turnOutputTokens += evtOutputTokens;
364
+ }
365
+
366
+ if (evt.type === 'system' && evt.subtype === 'init') {
367
+ log(`session_id=${evt.session_id ?? '(none)'}`);
368
+ continue;
369
+ }
370
+
371
+ if (evt.type === 'assistant' && Array.isArray(evt.message?.content)) {
372
+ if (currentTurnSpan) {
373
+ setSpanAttributes(currentTurnSpan, {
374
+ 'imprint.agent.turn_input_tokens': turnInputTokens,
375
+ 'imprint.agent.turn_output_tokens': turnOutputTokens,
376
+ });
377
+ endTraceSpan(currentTurnSpan);
378
+ }
379
+ turn++;
380
+ turnInputTokens = 0;
381
+ turnOutputTokens = 0;
382
+ currentTurnSpan = startTraceSpan(`agent.turn.${turn}`, 'CHAIN', {
383
+ 'imprint.agent.turn': turn,
384
+ 'imprint.agent.cumulative_input_tokens': inputTokens,
385
+ 'imprint.agent.cumulative_output_tokens': outputTokens,
386
+ });
387
+ if (currentTurnSpan && captureLlmIo) {
388
+ setSpanAttributes(
389
+ currentTurnSpan,
390
+ traceJsonInputOutputAttributes('output', evt.message.content),
391
+ );
392
+ }
393
+ fireProgress('thinking');
394
+ for (const block of evt.message.content) {
395
+ if (block && (block as { type?: string }).type === 'tool_use') {
396
+ const fullName = (block as { name?: string }).name ?? '(unknown)';
397
+ // Strip mcp__<server>__ prefix for human-readable progress.
398
+ const short = fullName.replace(`mcp__${MCP_SERVER_NAME}__`, '');
399
+ fireProgress('tool', short);
400
+ }
401
+ }
402
+ continue;
403
+ }
404
+
405
+ if (evt.type === 'user' && Array.isArray(evt.message?.content)) {
406
+ if (currentTurnSpan && captureLlmIo) {
407
+ setSpanAttributes(
408
+ currentTurnSpan,
409
+ traceJsonInputOutputAttributes('input', evt.message.content),
410
+ );
411
+ }
412
+ continue;
413
+ }
414
+
415
+ if (evt.type === 'result') {
416
+ if (evt.usage) {
417
+ inputTokens = evt.usage.input_tokens ?? inputTokens;
418
+ outputTokens = evt.usage.output_tokens ?? outputTokens;
419
+ cacheReadInputTokens = evt.usage.cache_read_input_tokens ?? cacheReadInputTokens;
420
+ cacheCreationInputTokens =
421
+ evt.usage.cache_creation_input_tokens ?? cacheCreationInputTokens;
422
+ }
423
+ if (evt.is_error) {
424
+ lastErrorEvent = evt;
425
+ }
426
+ continue;
427
+ }
428
+
429
+ if (evt.type === 'system' && evt.subtype === 'api_retry') {
430
+ log(`api_retry: ${(evt as { error?: string }).error ?? '(unknown)'}`);
431
+ }
432
+ }
433
+ });
434
+ });
435
+
436
+ child.stderr?.on('data', (chunk: Buffer) => {
437
+ const s = chunk.toString('utf8');
438
+ stderrBuf += s;
439
+ // Forward to our debug log only — don't pollute the user's console.
440
+ log(`[claude stderr] ${s.trim()}`);
441
+ });
442
+
443
+ // Wait for the child to exit on its own. Sentinel detection happens after.
444
+ const exitCode: number = await new Promise((resolve) => {
445
+ child.once('exit', (code) => resolve(code ?? -1));
446
+ child.once('error', () => resolve(-1));
447
+ });
448
+ childExited = true;
449
+ clearTimeout(deadlineTimer);
450
+ if (currentTurnSpan) {
451
+ setSpanAttributes(currentTurnSpan, {
452
+ 'imprint.agent.turn_input_tokens': turnInputTokens,
453
+ 'imprint.agent.turn_output_tokens': turnOutputTokens,
454
+ });
455
+ endTraceSpan(currentTurnSpan);
456
+ }
457
+
458
+ // Drain any remaining buffered output.
459
+ if (stdoutBuf.trim()) {
460
+ log(`unflushed stdout tail (${stdoutBuf.length} bytes) discarded`);
461
+ }
462
+
463
+ // Persist conversation log for post-mortem.
464
+ const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
465
+ try {
466
+ writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
467
+ } catch (err) {
468
+ log(`failed to persist conversation log: ${errMsg(err)}`);
469
+ }
470
+
471
+ // Inspect sentinels to determine outcome.
472
+ const doneSentinel = pathJoin(opts.absoluteToolDir, COMPILE_SENTINELS.done);
473
+ const giveUpSentinel = pathJoin(opts.absoluteToolDir, COMPILE_SENTINELS.giveUp);
474
+ const workflowPath = pathJoin(opts.absoluteToolDir, 'workflow.json');
475
+ const parserPath = pathJoin(opts.absoluteToolDir, 'parser.ts');
476
+ const parserTestPath = pathJoin(opts.absoluteToolDir, 'parser.test.ts');
477
+
478
+ // Determine success up-front so we can clean up the ephemeral parser.test.ts
479
+ // before constructing baseResult (which captures parserTestPath via existsSync).
480
+ const verifiedOk =
481
+ existsSync(doneSentinel) &&
482
+ (() => {
483
+ try {
484
+ const raw = readFileSync(doneSentinel, 'utf8').trim();
485
+ return raw ? JSON.parse(raw).verification === 'passed' : false;
486
+ } catch {
487
+ return false;
488
+ }
489
+ })();
490
+ if (verifiedOk && !opts.keepTest && existsSync(parserTestPath)) {
491
+ try {
492
+ unlinkSync(parserTestPath);
493
+ } catch {
494
+ // best effort
495
+ }
496
+ }
497
+
498
+ const baseResult: Pick<
499
+ CompileAgentResult,
500
+ | 'workflowPath'
501
+ | 'parserPath'
502
+ | 'parserTestPath'
503
+ | 'conversationLogPath'
504
+ | 'turns'
505
+ | 'durationMs'
506
+ | 'inputTokens'
507
+ | 'outputTokens'
508
+ | 'cacheReadInputTokens'
509
+ | 'cacheCreationInputTokens'
510
+ > = {
511
+ workflowPath: existsSync(workflowPath) ? workflowPath : undefined,
512
+ parserPath: existsSync(parserPath) ? parserPath : undefined,
513
+ parserTestPath: existsSync(parserTestPath) ? parserTestPath : undefined,
514
+ conversationLogPath,
515
+ turns: turn,
516
+ durationMs: Date.now() - opts.startTime,
517
+ inputTokens,
518
+ outputTokens,
519
+ cacheReadInputTokens,
520
+ cacheCreationInputTokens,
521
+ };
522
+
523
+ // Wall-clock deadline exceeded?
524
+ if (Date.now() > currentDeadlineMs && !existsSync(doneSentinel) && !existsSync(giveUpSentinel)) {
525
+ return {
526
+ success: false,
527
+ outcome: 'timeout',
528
+ message: `claude-cli exceeded the ${Math.round((currentDeadlineMs - opts.startTime) / 60000)} minute deadline before completing.`,
529
+ ...baseResult,
530
+ };
531
+ }
532
+
533
+ if (existsSync(doneSentinel)) {
534
+ let payload: {
535
+ summary?: string;
536
+ verification?: string;
537
+ cycles?: number;
538
+ failures?: string[];
539
+ } = {};
540
+ try {
541
+ const raw = readFileSync(doneSentinel, 'utf8').trim();
542
+ if (raw) payload = JSON.parse(raw);
543
+ } catch (err) {
544
+ log(`failed to parse done sentinel: ${errMsg(err)}`);
545
+ }
546
+ if (payload.verification === 'passed') {
547
+ return {
548
+ success: true,
549
+ outcome: 'done',
550
+ message: payload.summary ?? 'Task completed',
551
+ ...baseResult,
552
+ };
553
+ }
554
+ return {
555
+ success: false,
556
+ outcome: 'error',
557
+ message: `Verification failed after ${payload.cycles ?? '?'} cycles. Final failures:\n${(payload.failures ?? []).join('\n')}`,
558
+ ...baseResult,
559
+ };
560
+ }
561
+
562
+ if (existsSync(giveUpSentinel)) {
563
+ let payload: { reason?: string; what_was_tried?: string } = {};
564
+ try {
565
+ const raw = readFileSync(giveUpSentinel, 'utf8').trim();
566
+ if (raw) payload = JSON.parse(raw);
567
+ } catch (err) {
568
+ log(`failed to parse give_up sentinel: ${errMsg(err)}`);
569
+ }
570
+ return {
571
+ success: false,
572
+ outcome: 'give_up',
573
+ message: `Agent gave up: ${payload.reason ?? 'unknown reason'}\n${payload.what_was_tried ?? ''}`,
574
+ ...baseResult,
575
+ };
576
+ }
577
+
578
+ // No sentinel and clean exit — claude likely hit max-turns or stopped
579
+ // without ever calling done/give_up.
580
+ if (exitCode === 0) {
581
+ return {
582
+ success: false,
583
+ outcome: 'soft_cap',
584
+ message:
585
+ 'claude-cli exited without calling done() or give_up(). It may have hit --max-turns or stopped early.',
586
+ ...baseResult,
587
+ };
588
+ }
589
+
590
+ // Any other exit → error.
591
+ const errorTail =
592
+ (lastErrorEvent as StreamJsonEvent | null)?.result ?? stderrBuf.trim().slice(-500);
593
+ return {
594
+ success: false,
595
+ outcome: 'error',
596
+ message: `claude-cli exited with code ${exitCode}${errorTail ? `\n${errorTail}` : ''}`,
597
+ ...baseResult,
598
+ };
599
+ }
600
+
601
+ function finalErrorResult(opts: CompileViaClaudeCliOptions, message: string): CompileAgentResult {
602
+ mkdirSync(opts.absoluteToolDir, { recursive: true });
603
+ const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
604
+ try {
605
+ writeFileSync(conversationLogPath, JSON.stringify({ error: message }, null, 2), 'utf8');
606
+ } catch {
607
+ // best effort
608
+ }
609
+ return {
610
+ success: false,
611
+ outcome: 'error',
612
+ message,
613
+ conversationLogPath,
614
+ turns: 0,
615
+ durationMs: Date.now() - opts.startTime,
616
+ inputTokens: 0,
617
+ outputTokens: 0,
618
+ cacheReadInputTokens: 0,
619
+ cacheCreationInputTokens: 0,
620
+ };
621
+ }
622
+
623
+ function formatCandidateContext(
624
+ candidate: ToolCandidate | undefined,
625
+ sharedContext: SharedCompileContext | undefined,
626
+ ): string {
627
+ if (!candidate && !sharedContext) return '';
628
+ return `
629
+ Selected candidate context:
630
+ ${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
631
+
632
+ Shared compile context:
633
+ ${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
634
+
635
+ Compile only the selected candidate. Do not create tools for other actions in the recording.`;
636
+ }
637
+
638
+ function errMsg(err: unknown): string {
639
+ return err instanceof Error ? err.message : String(err);
640
+ }