imprint-mcp 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +168 -0
- package/LICENSE +21 -0
- package/README.md +322 -0
- package/examples/discoverandgo/README.md +57 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/cron.json +8 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/index.ts +89 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/workflow.json +39 -0
- package/examples/echo/README.md +37 -0
- package/examples/echo/echo_test/index.ts +31 -0
- package/examples/google-flights/search_google_flights/index.ts +101 -0
- package/examples/google-flights/search_google_flights/parser.test.ts +140 -0
- package/examples/google-flights/search_google_flights/parser.ts +189 -0
- package/examples/google-flights/search_google_flights/playbook.yaml +130 -0
- package/examples/google-flights/search_google_flights/workflow.json +48 -0
- package/examples/google-hotels/search_google_hotels/index.ts +194 -0
- package/examples/google-hotels/search_google_hotels/parser.test.ts +168 -0
- package/examples/google-hotels/search_google_hotels/parser.ts +330 -0
- package/examples/google-hotels/search_google_hotels/playbook.yaml +125 -0
- package/examples/google-hotels/search_google_hotels/workflow.json +111 -0
- package/examples/namecheap-domains/search_namecheap_domains/index.ts +144 -0
- package/examples/namecheap-domains/search_namecheap_domains/parser.ts +380 -0
- package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +50 -0
- package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +136 -0
- package/examples/namecheap-domains/search_namecheap_domains/workflow.json +97 -0
- package/examples/southwest/README.md +81 -0
- package/examples/southwest/search_southwest_flights/backends.json +23 -0
- package/examples/southwest/search_southwest_flights/cron.json +19 -0
- package/examples/southwest/search_southwest_flights/index.ts +110 -0
- package/examples/southwest/search_southwest_flights/playbook.yaml +46 -0
- package/examples/southwest/search_southwest_flights/workflow.json +54 -0
- package/package.json +78 -0
- package/prompts/compile-agent.md +580 -0
- package/prompts/intent-detection.md +198 -0
- package/prompts/playbook-compilation.md +279 -0
- package/prompts/request-triage.md +74 -0
- package/prompts/tool-candidate-detection.md +104 -0
- package/src/cli.ts +1287 -0
- package/src/imprint/agent.ts +468 -0
- package/src/imprint/app-api-hosts.ts +53 -0
- package/src/imprint/backend-ladder.ts +568 -0
- package/src/imprint/check.ts +136 -0
- package/src/imprint/chromium.ts +211 -0
- package/src/imprint/claude-cli-compile.ts +640 -0
- package/src/imprint/cli-credential.ts +394 -0
- package/src/imprint/codex-cli-compile.ts +712 -0
- package/src/imprint/compile-agent-types.ts +40 -0
- package/src/imprint/compile-agent.ts +404 -0
- package/src/imprint/compile-tools.ts +1389 -0
- package/src/imprint/compile.ts +720 -0
- package/src/imprint/cookie-jar.ts +246 -0
- package/src/imprint/credential-bundle.ts +195 -0
- package/src/imprint/credential-extract.ts +290 -0
- package/src/imprint/credential-store.ts +707 -0
- package/src/imprint/cron.ts +312 -0
- package/src/imprint/doctor.ts +223 -0
- package/src/imprint/emit.ts +154 -0
- package/src/imprint/etld.ts +134 -0
- package/src/imprint/freeform-redact.ts +216 -0
- package/src/imprint/inject-listener.ts +137 -0
- package/src/imprint/install.ts +795 -0
- package/src/imprint/integrations.ts +385 -0
- package/src/imprint/is-compiled.ts +2 -0
- package/src/imprint/json-path.ts +100 -0
- package/src/imprint/llm.ts +998 -0
- package/src/imprint/load-json.ts +54 -0
- package/src/imprint/log.ts +33 -0
- package/src/imprint/login.ts +166 -0
- package/src/imprint/mcp-compile-server.ts +282 -0
- package/src/imprint/mcp-maintenance.ts +1790 -0
- package/src/imprint/mcp-server.ts +350 -0
- package/src/imprint/multi-progress.ts +69 -0
- package/src/imprint/notify.ts +155 -0
- package/src/imprint/paths.ts +64 -0
- package/src/imprint/playbook-parser.ts +21 -0
- package/src/imprint/playbook-runner.ts +465 -0
- package/src/imprint/probe-backends.ts +251 -0
- package/src/imprint/progress.ts +28 -0
- package/src/imprint/record.ts +470 -0
- package/src/imprint/redact.ts +550 -0
- package/src/imprint/replay-capture.ts +387 -0
- package/src/imprint/request-context.ts +66 -0
- package/src/imprint/runtime-link.ts +73 -0
- package/src/imprint/runtime.ts +942 -0
- package/src/imprint/sensitive-keys.ts +156 -0
- package/src/imprint/session-diff.ts +409 -0
- package/src/imprint/session-merge.ts +198 -0
- package/src/imprint/session-writer.ts +149 -0
- package/src/imprint/sites.ts +27 -0
- package/src/imprint/stealth-fetch.ts +434 -0
- package/src/imprint/teach-state.ts +235 -0
- package/src/imprint/teach.ts +2120 -0
- package/src/imprint/tool-candidates.ts +423 -0
- package/src/imprint/tool-loader.ts +186 -0
- package/src/imprint/tool-selection.ts +70 -0
- package/src/imprint/tracing.ts +508 -0
- package/src/imprint/types.ts +472 -0
- package/src/imprint/version.ts +21 -0
|
@@ -0,0 +1,640 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* compile-agent driver for claude-cli.
|
|
3
|
+
*
|
|
4
|
+
* claude-cli doesn't implement messageWithTools (its CLI surface only does
|
|
5
|
+
* single-turn text completion), so we can't drive it turn-by-turn the way
|
|
6
|
+
* runAgentLoop drives anthropic-api. Instead we shell out to
|
|
7
|
+
* `claude -p` with imprint's compile tools registered as a stdio MCP server
|
|
8
|
+
* and let claude-cli's own internal agent loop drive the work.
|
|
9
|
+
*
|
|
10
|
+
* Key design points:
|
|
11
|
+
*
|
|
12
|
+
* - **Subscription auth**: we deliberately do NOT pass `--bare`. Without bare
|
|
13
|
+
* mode claude-cli reads OAuth from the keychain, so a Pro/Max subscriber
|
|
14
|
+
* spends subscription tokens, not API credit.
|
|
15
|
+
*
|
|
16
|
+
* - **Tool dispatch happens in the MCP server**, not here. See
|
|
17
|
+
* mcp-compile-server.ts. The `done` tool there runs externalVerification
|
|
18
|
+
* inline; on failure it returns the failure list as the tool_result and the
|
|
19
|
+
* model keeps iterating in the same conversation. On success it writes a
|
|
20
|
+
* sentinel file we poll for.
|
|
21
|
+
*
|
|
22
|
+
* - **Progress reporting**: stream-json events from claude-cli are translated
|
|
23
|
+
* into CompileAgentProgress events for the existing onProgress callback,
|
|
24
|
+
* so the spinner UX in teach.ts is unchanged.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { type ChildProcess, spawn } from 'node:child_process';
|
|
28
|
+
import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs';
|
|
29
|
+
import { join as pathJoin } from 'node:path';
|
|
30
|
+
import { type Span, context as otelContext } from '@opentelemetry/api';
|
|
31
|
+
import type { OnDeadlineReached } from './agent.ts';
|
|
32
|
+
import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
|
|
33
|
+
import { preferredAgentModel } from './llm.ts';
|
|
34
|
+
import { createLog } from './log.ts';
|
|
35
|
+
import { COMPILE_SENTINELS } from './mcp-compile-server.ts';
|
|
36
|
+
import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
|
|
37
|
+
import {
|
|
38
|
+
endTraceSpan,
|
|
39
|
+
llmSpanAttributes,
|
|
40
|
+
setSpanAttributes,
|
|
41
|
+
startTraceSpan,
|
|
42
|
+
traceJsonInputOutputAttributes,
|
|
43
|
+
traceLlmIoEnabled,
|
|
44
|
+
traced,
|
|
45
|
+
} from './tracing.ts';
|
|
46
|
+
import type { Session } from './types.ts';
|
|
47
|
+
|
|
48
|
+
const log = createLog('compile-claude-cli');
|
|
49
|
+
|
|
50
|
+
const REPO_ROOT = pathJoin(import.meta.dir, '..', '..');
|
|
51
|
+
const CLI_PATH = pathJoin(REPO_ROOT, 'src', 'cli.ts');
|
|
52
|
+
const MCP_SERVER_NAME = 'imprint-compile';
|
|
53
|
+
const MAX_VERIFICATION_CYCLES = 5;
|
|
54
|
+
|
|
55
|
+
interface CompileViaClaudeCliOptions {
|
|
56
|
+
session: Session;
|
|
57
|
+
absoluteToolDir: string;
|
|
58
|
+
sessionPath: string;
|
|
59
|
+
systemPromptPath: string;
|
|
60
|
+
deadlineMs: number;
|
|
61
|
+
startTime: number;
|
|
62
|
+
onProgress?: (p: CompileAgentProgress) => void;
|
|
63
|
+
/** Called when wall-clock deadline is reached; return ms to extend or null to time out. */
|
|
64
|
+
onDeadlineReached?: OnDeadlineReached;
|
|
65
|
+
/** Retain parser.test.ts after successful verification. Mirrors the
|
|
66
|
+
* in-process loop's `keepTest`. */
|
|
67
|
+
keepTest?: boolean;
|
|
68
|
+
candidate?: ToolCandidate;
|
|
69
|
+
sharedContext?: SharedCompileContext;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
interface StreamJsonEvent {
|
|
73
|
+
type: string;
|
|
74
|
+
subtype?: string;
|
|
75
|
+
session_id?: string;
|
|
76
|
+
// assistant/user message envelope
|
|
77
|
+
message?: {
|
|
78
|
+
content?: Array<
|
|
79
|
+
| { type: 'text'; text: string }
|
|
80
|
+
| { type: 'tool_use'; name: string; input?: unknown }
|
|
81
|
+
| { type: 'tool_result'; tool_use_id: string; content: unknown; is_error?: boolean }
|
|
82
|
+
>;
|
|
83
|
+
usage?: { input_tokens?: number; output_tokens?: number };
|
|
84
|
+
stop_reason?: string;
|
|
85
|
+
};
|
|
86
|
+
// result envelope (terminal event)
|
|
87
|
+
result?: string;
|
|
88
|
+
is_error?: boolean;
|
|
89
|
+
duration_ms?: number;
|
|
90
|
+
num_turns?: number;
|
|
91
|
+
total_cost_usd?: number;
|
|
92
|
+
usage?: {
|
|
93
|
+
input_tokens?: number;
|
|
94
|
+
output_tokens?: number;
|
|
95
|
+
cache_read_input_tokens?: number;
|
|
96
|
+
cache_creation_input_tokens?: number;
|
|
97
|
+
};
|
|
98
|
+
// partial-message stream events
|
|
99
|
+
event?: { delta?: { type?: string; text?: string } };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export async function compileViaClaudeCli(
|
|
103
|
+
opts: CompileViaClaudeCliOptions,
|
|
104
|
+
): Promise<CompileAgentResult> {
|
|
105
|
+
return await traced(
|
|
106
|
+
'compile.claude_cli_agent',
|
|
107
|
+
'AGENT',
|
|
108
|
+
{
|
|
109
|
+
'imprint.site': opts.session.site,
|
|
110
|
+
'imprint.tool_dir': opts.absoluteToolDir,
|
|
111
|
+
'imprint.provider': 'claude-cli',
|
|
112
|
+
'imprint.model': preferredAgentModel('claude-cli'),
|
|
113
|
+
},
|
|
114
|
+
async (span) => {
|
|
115
|
+
const result = await compileViaClaudeCliImpl(opts);
|
|
116
|
+
setSpanAttributes(span, {
|
|
117
|
+
'imprint.compile.outcome': result.outcome,
|
|
118
|
+
'imprint.compile.turns': result.turns,
|
|
119
|
+
'imprint.compile.duration_ms': result.durationMs,
|
|
120
|
+
'imprint.compile.input_tokens': result.inputTokens,
|
|
121
|
+
'imprint.compile.output_tokens': result.outputTokens,
|
|
122
|
+
'imprint.compile.cache_read_input_tokens': result.cacheReadInputTokens,
|
|
123
|
+
'imprint.compile.cache_creation_input_tokens': result.cacheCreationInputTokens,
|
|
124
|
+
...llmSpanAttributes({
|
|
125
|
+
provider: 'claude-cli',
|
|
126
|
+
model: preferredAgentModel('claude-cli'),
|
|
127
|
+
inputTokens: result.inputTokens,
|
|
128
|
+
outputTokens: result.outputTokens,
|
|
129
|
+
cacheReadTokens: result.cacheReadInputTokens,
|
|
130
|
+
cacheWriteTokens: result.cacheCreationInputTokens,
|
|
131
|
+
}),
|
|
132
|
+
});
|
|
133
|
+
return result;
|
|
134
|
+
},
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
async function compileViaClaudeCliImpl(
|
|
139
|
+
opts: CompileViaClaudeCliOptions,
|
|
140
|
+
): Promise<CompileAgentResult> {
|
|
141
|
+
// Ensure tool dir exists and clear any prior sentinels — a stale
|
|
142
|
+
// sentinel from a previous run would short-circuit our success detection.
|
|
143
|
+
mkdirSync(opts.absoluteToolDir, { recursive: true });
|
|
144
|
+
for (const name of [COMPILE_SENTINELS.done, COMPILE_SENTINELS.giveUp]) {
|
|
145
|
+
const p = pathJoin(opts.absoluteToolDir, name);
|
|
146
|
+
if (existsSync(p)) {
|
|
147
|
+
try {
|
|
148
|
+
unlinkSync(p); // remove, not truncate — existsSync() is what gates success/give-up detection later
|
|
149
|
+
} catch {
|
|
150
|
+
// best effort
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Build the inline MCP config. The MCP server is the same imprint binary
|
|
156
|
+
// re-invoked with the hidden __mcp-compile-server verb. Use the bun runner
|
|
157
|
+
// the parent was launched with so the child runs in the same TS toolchain.
|
|
158
|
+
const bunPath = process.execPath;
|
|
159
|
+
const sessionPathAbs = opts.sessionPath.startsWith('/')
|
|
160
|
+
? opts.sessionPath
|
|
161
|
+
: pathJoin(REPO_ROOT, opts.sessionPath);
|
|
162
|
+
const mcpConfig = {
|
|
163
|
+
mcpServers: {
|
|
164
|
+
[MCP_SERVER_NAME]: {
|
|
165
|
+
command: bunPath,
|
|
166
|
+
args: [
|
|
167
|
+
'run',
|
|
168
|
+
CLI_PATH,
|
|
169
|
+
'__mcp-compile-server',
|
|
170
|
+
'--session-path',
|
|
171
|
+
sessionPathAbs,
|
|
172
|
+
'--tool-dir',
|
|
173
|
+
opts.absoluteToolDir,
|
|
174
|
+
...(opts.candidate ? ['--candidate-json', JSON.stringify(opts.candidate)] : []),
|
|
175
|
+
...(opts.sharedContext
|
|
176
|
+
? ['--shared-context-json', JSON.stringify(opts.sharedContext)]
|
|
177
|
+
: []),
|
|
178
|
+
],
|
|
179
|
+
},
|
|
180
|
+
},
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
const initialPrompt = `A new compile task is starting.
|
|
184
|
+
|
|
185
|
+
Session path: ${sessionPathAbs}
|
|
186
|
+
Tool directory: ${opts.absoluteToolDir}
|
|
187
|
+
You will write artifacts into the tool directory.
|
|
188
|
+
${formatCandidateContext(opts.candidate, opts.sharedContext)}
|
|
189
|
+
|
|
190
|
+
Begin by calling read_session_summary to orient yourself, then proceed per the system prompt.`;
|
|
191
|
+
|
|
192
|
+
const args = [
|
|
193
|
+
'--print',
|
|
194
|
+
'--output-format',
|
|
195
|
+
'stream-json',
|
|
196
|
+
'--verbose',
|
|
197
|
+
'--strict-mcp-config',
|
|
198
|
+
'--mcp-config',
|
|
199
|
+
JSON.stringify(mcpConfig),
|
|
200
|
+
'--system-prompt-file',
|
|
201
|
+
opts.systemPromptPath,
|
|
202
|
+
// Disable the built-in tool set so claude only uses our MCP tools.
|
|
203
|
+
'--tools',
|
|
204
|
+
'',
|
|
205
|
+
// Pre-approve every tool from our MCP server so no permission prompt
|
|
206
|
+
// fires in non-interactive print mode.
|
|
207
|
+
'--allowedTools',
|
|
208
|
+
`mcp__${MCP_SERVER_NAME}__read_session_summary`,
|
|
209
|
+
'--allowedTools',
|
|
210
|
+
`mcp__${MCP_SERVER_NAME}__read_request`,
|
|
211
|
+
'--allowedTools',
|
|
212
|
+
`mcp__${MCP_SERVER_NAME}__read_response_body`,
|
|
213
|
+
'--allowedTools',
|
|
214
|
+
`mcp__${MCP_SERVER_NAME}__search_response_body`,
|
|
215
|
+
'--allowedTools',
|
|
216
|
+
`mcp__${MCP_SERVER_NAME}__read_file`,
|
|
217
|
+
'--allowedTools',
|
|
218
|
+
`mcp__${MCP_SERVER_NAME}__write_file`,
|
|
219
|
+
'--allowedTools',
|
|
220
|
+
`mcp__${MCP_SERVER_NAME}__run_bash`,
|
|
221
|
+
'--allowedTools',
|
|
222
|
+
`mcp__${MCP_SERVER_NAME}__run_tests`,
|
|
223
|
+
'--allowedTools',
|
|
224
|
+
`mcp__${MCP_SERVER_NAME}__done`,
|
|
225
|
+
'--allowedTools',
|
|
226
|
+
`mcp__${MCP_SERVER_NAME}__give_up`,
|
|
227
|
+
// Bound the run. softTurnCap=100 in the in-process loop × up to 5
|
|
228
|
+
// verification cycles = 500 hard ceiling there. Verification is now
|
|
229
|
+
// in-tool so we pick a single bound that comfortably exceeds typical runs
|
|
230
|
+
// (~5-15 turns per the system prompt) plus retry budget.
|
|
231
|
+
'--max-turns',
|
|
232
|
+
'200',
|
|
233
|
+
'--permission-mode',
|
|
234
|
+
'bypassPermissions',
|
|
235
|
+
'--no-session-persistence',
|
|
236
|
+
'--disable-slash-commands',
|
|
237
|
+
'--model',
|
|
238
|
+
preferredAgentModel('claude-cli'),
|
|
239
|
+
initialPrompt,
|
|
240
|
+
];
|
|
241
|
+
|
|
242
|
+
log(`spawning claude (max-turns=200, mcp-server=${MCP_SERVER_NAME})`);
|
|
243
|
+
|
|
244
|
+
let child: ChildProcess;
|
|
245
|
+
try {
|
|
246
|
+
child = spawn('claude', args, {
|
|
247
|
+
cwd: REPO_ROOT,
|
|
248
|
+
env: process.env,
|
|
249
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
250
|
+
});
|
|
251
|
+
} catch (err) {
|
|
252
|
+
return finalErrorResult(opts, `failed to spawn claude-cli: ${errMsg(err)}`);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const result = await driveStreamJson(child, opts);
|
|
256
|
+
return result;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
async function driveStreamJson(
|
|
260
|
+
child: ChildProcess,
|
|
261
|
+
opts: CompileViaClaudeCliOptions,
|
|
262
|
+
): Promise<CompileAgentResult> {
|
|
263
|
+
// Capture OTel context so child-process event handlers can parent spans
|
|
264
|
+
// under the current compile.claude_cli_agent span. Bun's event emitters
|
|
265
|
+
// don't propagate AsyncLocalStorage, so without this the agent.turn.*
|
|
266
|
+
// spans appear as orphaned root traces in Phoenix.
|
|
267
|
+
const parentCtx = otelContext.active();
|
|
268
|
+
|
|
269
|
+
const conversationLog: unknown[] = [];
|
|
270
|
+
const captureLlmIo = traceLlmIoEnabled();
|
|
271
|
+
let inputTokens = 0;
|
|
272
|
+
let outputTokens = 0;
|
|
273
|
+
let cacheReadInputTokens = 0;
|
|
274
|
+
let cacheCreationInputTokens = 0;
|
|
275
|
+
let turn = 0;
|
|
276
|
+
let lastErrorEvent: StreamJsonEvent | null = null;
|
|
277
|
+
let stderrBuf = '';
|
|
278
|
+
let currentTurnSpan: Span | null = null;
|
|
279
|
+
let turnInputTokens = 0;
|
|
280
|
+
let turnOutputTokens = 0;
|
|
281
|
+
|
|
282
|
+
const budgetMs = Math.max(0, opts.deadlineMs - Date.now());
|
|
283
|
+
const fireProgress = (phase: 'thinking' | 'tool', toolName?: string): void => {
|
|
284
|
+
opts.onProgress?.({
|
|
285
|
+
turn,
|
|
286
|
+
phase,
|
|
287
|
+
toolName,
|
|
288
|
+
elapsedMs: Date.now() - opts.startTime,
|
|
289
|
+
budgetMs,
|
|
290
|
+
inputTokens,
|
|
291
|
+
outputTokens,
|
|
292
|
+
verificationCycle: 1,
|
|
293
|
+
maxVerificationCycles: MAX_VERIFICATION_CYCLES,
|
|
294
|
+
});
|
|
295
|
+
};
|
|
296
|
+
|
|
297
|
+
// Wall-clock guard: if we hit the deadline, ask the user or kill the child.
|
|
298
|
+
let currentDeadlineMs = opts.deadlineMs;
|
|
299
|
+
let childExited = false;
|
|
300
|
+
|
|
301
|
+
const killChild = (): void => {
|
|
302
|
+
log('wall-clock deadline exceeded, terminating claude');
|
|
303
|
+
try {
|
|
304
|
+
child.kill('SIGTERM');
|
|
305
|
+
setTimeout(() => {
|
|
306
|
+
if (!child.killed) child.kill('SIGKILL');
|
|
307
|
+
}, 5000);
|
|
308
|
+
} catch {
|
|
309
|
+
// already gone
|
|
310
|
+
}
|
|
311
|
+
};
|
|
312
|
+
|
|
313
|
+
const scheduleDeadlineCheck = (): ReturnType<typeof setTimeout> => {
|
|
314
|
+
const remaining = Math.max(0, currentDeadlineMs - Date.now());
|
|
315
|
+
return setTimeout(async () => {
|
|
316
|
+
if (childExited) return;
|
|
317
|
+
if (opts.onDeadlineReached) {
|
|
318
|
+
const extensionMs = await opts.onDeadlineReached();
|
|
319
|
+
if (childExited) return;
|
|
320
|
+
if (extensionMs != null && extensionMs > 0) {
|
|
321
|
+
currentDeadlineMs += extensionMs;
|
|
322
|
+
deadlineTimer = scheduleDeadlineCheck();
|
|
323
|
+
return;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
killChild();
|
|
327
|
+
}, remaining);
|
|
328
|
+
};
|
|
329
|
+
|
|
330
|
+
let deadlineTimer = scheduleDeadlineCheck();
|
|
331
|
+
|
|
332
|
+
// Stdout: newline-delimited stream-json events.
|
|
333
|
+
let stdoutBuf = '';
|
|
334
|
+
child.stdout?.on('data', (chunk: Buffer) => {
|
|
335
|
+
otelContext.with(parentCtx, () => {
|
|
336
|
+
stdoutBuf += chunk.toString('utf8');
|
|
337
|
+
while (true) {
|
|
338
|
+
const nl = stdoutBuf.indexOf('\n');
|
|
339
|
+
if (nl < 0) break;
|
|
340
|
+
const line = stdoutBuf.slice(0, nl).trim();
|
|
341
|
+
stdoutBuf = stdoutBuf.slice(nl + 1);
|
|
342
|
+
if (!line) continue;
|
|
343
|
+
|
|
344
|
+
let evt: StreamJsonEvent;
|
|
345
|
+
try {
|
|
346
|
+
evt = JSON.parse(line);
|
|
347
|
+
} catch (err) {
|
|
348
|
+
log(`unparseable stream-json line: ${err instanceof Error ? err.message : String(err)}`);
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
conversationLog.push(evt);
|
|
353
|
+
|
|
354
|
+
// Token accounting from any event that carries usage.
|
|
355
|
+
const evtInputTokens =
|
|
356
|
+
(evt.usage?.input_tokens ?? 0) + (evt.message?.usage?.input_tokens ?? 0);
|
|
357
|
+
const evtOutputTokens =
|
|
358
|
+
(evt.usage?.output_tokens ?? 0) + (evt.message?.usage?.output_tokens ?? 0);
|
|
359
|
+
if (evtInputTokens || evtOutputTokens) {
|
|
360
|
+
inputTokens += evtInputTokens;
|
|
361
|
+
outputTokens += evtOutputTokens;
|
|
362
|
+
turnInputTokens += evtInputTokens;
|
|
363
|
+
turnOutputTokens += evtOutputTokens;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
if (evt.type === 'system' && evt.subtype === 'init') {
|
|
367
|
+
log(`session_id=${evt.session_id ?? '(none)'}`);
|
|
368
|
+
continue;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
if (evt.type === 'assistant' && Array.isArray(evt.message?.content)) {
|
|
372
|
+
if (currentTurnSpan) {
|
|
373
|
+
setSpanAttributes(currentTurnSpan, {
|
|
374
|
+
'imprint.agent.turn_input_tokens': turnInputTokens,
|
|
375
|
+
'imprint.agent.turn_output_tokens': turnOutputTokens,
|
|
376
|
+
});
|
|
377
|
+
endTraceSpan(currentTurnSpan);
|
|
378
|
+
}
|
|
379
|
+
turn++;
|
|
380
|
+
turnInputTokens = 0;
|
|
381
|
+
turnOutputTokens = 0;
|
|
382
|
+
currentTurnSpan = startTraceSpan(`agent.turn.${turn}`, 'CHAIN', {
|
|
383
|
+
'imprint.agent.turn': turn,
|
|
384
|
+
'imprint.agent.cumulative_input_tokens': inputTokens,
|
|
385
|
+
'imprint.agent.cumulative_output_tokens': outputTokens,
|
|
386
|
+
});
|
|
387
|
+
if (currentTurnSpan && captureLlmIo) {
|
|
388
|
+
setSpanAttributes(
|
|
389
|
+
currentTurnSpan,
|
|
390
|
+
traceJsonInputOutputAttributes('output', evt.message.content),
|
|
391
|
+
);
|
|
392
|
+
}
|
|
393
|
+
fireProgress('thinking');
|
|
394
|
+
for (const block of evt.message.content) {
|
|
395
|
+
if (block && (block as { type?: string }).type === 'tool_use') {
|
|
396
|
+
const fullName = (block as { name?: string }).name ?? '(unknown)';
|
|
397
|
+
// Strip mcp__<server>__ prefix for human-readable progress.
|
|
398
|
+
const short = fullName.replace(`mcp__${MCP_SERVER_NAME}__`, '');
|
|
399
|
+
fireProgress('tool', short);
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
continue;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
if (evt.type === 'user' && Array.isArray(evt.message?.content)) {
|
|
406
|
+
if (currentTurnSpan && captureLlmIo) {
|
|
407
|
+
setSpanAttributes(
|
|
408
|
+
currentTurnSpan,
|
|
409
|
+
traceJsonInputOutputAttributes('input', evt.message.content),
|
|
410
|
+
);
|
|
411
|
+
}
|
|
412
|
+
continue;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
if (evt.type === 'result') {
|
|
416
|
+
if (evt.usage) {
|
|
417
|
+
inputTokens = evt.usage.input_tokens ?? inputTokens;
|
|
418
|
+
outputTokens = evt.usage.output_tokens ?? outputTokens;
|
|
419
|
+
cacheReadInputTokens = evt.usage.cache_read_input_tokens ?? cacheReadInputTokens;
|
|
420
|
+
cacheCreationInputTokens =
|
|
421
|
+
evt.usage.cache_creation_input_tokens ?? cacheCreationInputTokens;
|
|
422
|
+
}
|
|
423
|
+
if (evt.is_error) {
|
|
424
|
+
lastErrorEvent = evt;
|
|
425
|
+
}
|
|
426
|
+
continue;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
if (evt.type === 'system' && evt.subtype === 'api_retry') {
|
|
430
|
+
log(`api_retry: ${(evt as { error?: string }).error ?? '(unknown)'}`);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
});
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
child.stderr?.on('data', (chunk: Buffer) => {
|
|
437
|
+
const s = chunk.toString('utf8');
|
|
438
|
+
stderrBuf += s;
|
|
439
|
+
// Forward to our debug log only — don't pollute the user's console.
|
|
440
|
+
log(`[claude stderr] ${s.trim()}`);
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
// Wait for the child to exit on its own. Sentinel detection happens after.
|
|
444
|
+
const exitCode: number = await new Promise((resolve) => {
|
|
445
|
+
child.once('exit', (code) => resolve(code ?? -1));
|
|
446
|
+
child.once('error', () => resolve(-1));
|
|
447
|
+
});
|
|
448
|
+
childExited = true;
|
|
449
|
+
clearTimeout(deadlineTimer);
|
|
450
|
+
if (currentTurnSpan) {
|
|
451
|
+
setSpanAttributes(currentTurnSpan, {
|
|
452
|
+
'imprint.agent.turn_input_tokens': turnInputTokens,
|
|
453
|
+
'imprint.agent.turn_output_tokens': turnOutputTokens,
|
|
454
|
+
});
|
|
455
|
+
endTraceSpan(currentTurnSpan);
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// Drain any remaining buffered output.
|
|
459
|
+
if (stdoutBuf.trim()) {
|
|
460
|
+
log(`unflushed stdout tail (${stdoutBuf.length} bytes) discarded`);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// Persist conversation log for post-mortem.
|
|
464
|
+
const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
|
|
465
|
+
try {
|
|
466
|
+
writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
|
|
467
|
+
} catch (err) {
|
|
468
|
+
log(`failed to persist conversation log: ${errMsg(err)}`);
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
// Inspect sentinels to determine outcome.
|
|
472
|
+
const doneSentinel = pathJoin(opts.absoluteToolDir, COMPILE_SENTINELS.done);
|
|
473
|
+
const giveUpSentinel = pathJoin(opts.absoluteToolDir, COMPILE_SENTINELS.giveUp);
|
|
474
|
+
const workflowPath = pathJoin(opts.absoluteToolDir, 'workflow.json');
|
|
475
|
+
const parserPath = pathJoin(opts.absoluteToolDir, 'parser.ts');
|
|
476
|
+
const parserTestPath = pathJoin(opts.absoluteToolDir, 'parser.test.ts');
|
|
477
|
+
|
|
478
|
+
// Determine success up-front so we can clean up the ephemeral parser.test.ts
|
|
479
|
+
// before constructing baseResult (which captures parserTestPath via existsSync).
|
|
480
|
+
const verifiedOk =
|
|
481
|
+
existsSync(doneSentinel) &&
|
|
482
|
+
(() => {
|
|
483
|
+
try {
|
|
484
|
+
const raw = readFileSync(doneSentinel, 'utf8').trim();
|
|
485
|
+
return raw ? JSON.parse(raw).verification === 'passed' : false;
|
|
486
|
+
} catch {
|
|
487
|
+
return false;
|
|
488
|
+
}
|
|
489
|
+
})();
|
|
490
|
+
if (verifiedOk && !opts.keepTest && existsSync(parserTestPath)) {
|
|
491
|
+
try {
|
|
492
|
+
unlinkSync(parserTestPath);
|
|
493
|
+
} catch {
|
|
494
|
+
// best effort
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
const baseResult: Pick<
|
|
499
|
+
CompileAgentResult,
|
|
500
|
+
| 'workflowPath'
|
|
501
|
+
| 'parserPath'
|
|
502
|
+
| 'parserTestPath'
|
|
503
|
+
| 'conversationLogPath'
|
|
504
|
+
| 'turns'
|
|
505
|
+
| 'durationMs'
|
|
506
|
+
| 'inputTokens'
|
|
507
|
+
| 'outputTokens'
|
|
508
|
+
| 'cacheReadInputTokens'
|
|
509
|
+
| 'cacheCreationInputTokens'
|
|
510
|
+
> = {
|
|
511
|
+
workflowPath: existsSync(workflowPath) ? workflowPath : undefined,
|
|
512
|
+
parserPath: existsSync(parserPath) ? parserPath : undefined,
|
|
513
|
+
parserTestPath: existsSync(parserTestPath) ? parserTestPath : undefined,
|
|
514
|
+
conversationLogPath,
|
|
515
|
+
turns: turn,
|
|
516
|
+
durationMs: Date.now() - opts.startTime,
|
|
517
|
+
inputTokens,
|
|
518
|
+
outputTokens,
|
|
519
|
+
cacheReadInputTokens,
|
|
520
|
+
cacheCreationInputTokens,
|
|
521
|
+
};
|
|
522
|
+
|
|
523
|
+
// Wall-clock deadline exceeded?
|
|
524
|
+
if (Date.now() > currentDeadlineMs && !existsSync(doneSentinel) && !existsSync(giveUpSentinel)) {
|
|
525
|
+
return {
|
|
526
|
+
success: false,
|
|
527
|
+
outcome: 'timeout',
|
|
528
|
+
message: `claude-cli exceeded the ${Math.round((currentDeadlineMs - opts.startTime) / 60000)} minute deadline before completing.`,
|
|
529
|
+
...baseResult,
|
|
530
|
+
};
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
if (existsSync(doneSentinel)) {
|
|
534
|
+
let payload: {
|
|
535
|
+
summary?: string;
|
|
536
|
+
verification?: string;
|
|
537
|
+
cycles?: number;
|
|
538
|
+
failures?: string[];
|
|
539
|
+
} = {};
|
|
540
|
+
try {
|
|
541
|
+
const raw = readFileSync(doneSentinel, 'utf8').trim();
|
|
542
|
+
if (raw) payload = JSON.parse(raw);
|
|
543
|
+
} catch (err) {
|
|
544
|
+
log(`failed to parse done sentinel: ${errMsg(err)}`);
|
|
545
|
+
}
|
|
546
|
+
if (payload.verification === 'passed') {
|
|
547
|
+
return {
|
|
548
|
+
success: true,
|
|
549
|
+
outcome: 'done',
|
|
550
|
+
message: payload.summary ?? 'Task completed',
|
|
551
|
+
...baseResult,
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
return {
|
|
555
|
+
success: false,
|
|
556
|
+
outcome: 'error',
|
|
557
|
+
message: `Verification failed after ${payload.cycles ?? '?'} cycles. Final failures:\n${(payload.failures ?? []).join('\n')}`,
|
|
558
|
+
...baseResult,
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
if (existsSync(giveUpSentinel)) {
|
|
563
|
+
let payload: { reason?: string; what_was_tried?: string } = {};
|
|
564
|
+
try {
|
|
565
|
+
const raw = readFileSync(giveUpSentinel, 'utf8').trim();
|
|
566
|
+
if (raw) payload = JSON.parse(raw);
|
|
567
|
+
} catch (err) {
|
|
568
|
+
log(`failed to parse give_up sentinel: ${errMsg(err)}`);
|
|
569
|
+
}
|
|
570
|
+
return {
|
|
571
|
+
success: false,
|
|
572
|
+
outcome: 'give_up',
|
|
573
|
+
message: `Agent gave up: ${payload.reason ?? 'unknown reason'}\n${payload.what_was_tried ?? ''}`,
|
|
574
|
+
...baseResult,
|
|
575
|
+
};
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
// No sentinel and clean exit — claude likely hit max-turns or stopped
|
|
579
|
+
// without ever calling done/give_up.
|
|
580
|
+
if (exitCode === 0) {
|
|
581
|
+
return {
|
|
582
|
+
success: false,
|
|
583
|
+
outcome: 'soft_cap',
|
|
584
|
+
message:
|
|
585
|
+
'claude-cli exited without calling done() or give_up(). It may have hit --max-turns or stopped early.',
|
|
586
|
+
...baseResult,
|
|
587
|
+
};
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
// Any other exit → error.
|
|
591
|
+
const errorTail =
|
|
592
|
+
(lastErrorEvent as StreamJsonEvent | null)?.result ?? stderrBuf.trim().slice(-500);
|
|
593
|
+
return {
|
|
594
|
+
success: false,
|
|
595
|
+
outcome: 'error',
|
|
596
|
+
message: `claude-cli exited with code ${exitCode}${errorTail ? `\n${errorTail}` : ''}`,
|
|
597
|
+
...baseResult,
|
|
598
|
+
};
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
function finalErrorResult(opts: CompileViaClaudeCliOptions, message: string): CompileAgentResult {
|
|
602
|
+
mkdirSync(opts.absoluteToolDir, { recursive: true });
|
|
603
|
+
const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
|
|
604
|
+
try {
|
|
605
|
+
writeFileSync(conversationLogPath, JSON.stringify({ error: message }, null, 2), 'utf8');
|
|
606
|
+
} catch {
|
|
607
|
+
// best effort
|
|
608
|
+
}
|
|
609
|
+
return {
|
|
610
|
+
success: false,
|
|
611
|
+
outcome: 'error',
|
|
612
|
+
message,
|
|
613
|
+
conversationLogPath,
|
|
614
|
+
turns: 0,
|
|
615
|
+
durationMs: Date.now() - opts.startTime,
|
|
616
|
+
inputTokens: 0,
|
|
617
|
+
outputTokens: 0,
|
|
618
|
+
cacheReadInputTokens: 0,
|
|
619
|
+
cacheCreationInputTokens: 0,
|
|
620
|
+
};
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
function formatCandidateContext(
|
|
624
|
+
candidate: ToolCandidate | undefined,
|
|
625
|
+
sharedContext: SharedCompileContext | undefined,
|
|
626
|
+
): string {
|
|
627
|
+
if (!candidate && !sharedContext) return '';
|
|
628
|
+
return `
|
|
629
|
+
Selected candidate context:
|
|
630
|
+
${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
|
|
631
|
+
|
|
632
|
+
Shared compile context:
|
|
633
|
+
${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
|
|
634
|
+
|
|
635
|
+
Compile only the selected candidate. Do not create tools for other actions in the recording.`;
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
function errMsg(err: unknown): string {
|
|
639
|
+
return err instanceof Error ? err.message : String(err);
|
|
640
|
+
}
|