imprint-mcp 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -201
- package/examples/discoverandgo/README.md +1 -1
- package/examples/echo/README.md +1 -1
- package/examples/google-flights/README.md +28 -0
- package/examples/google-flights/_shared/batchexecute.ts +63 -0
- package/examples/google-flights/_shared/flights_request.ts +95 -0
- package/examples/google-flights/_shared/package.json +9 -0
- package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
- package/examples/google-flights/get_flight_booking_details/package.json +9 -0
- package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
- package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
- package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
- package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
- package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
- package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
- package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
- package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
- package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
- package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
- package/examples/google-flights/lookup_airport/index.ts +101 -0
- package/examples/google-flights/lookup_airport/package.json +9 -0
- package/examples/google-flights/lookup_airport/parser.ts +66 -0
- package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
- package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
- package/examples/google-flights/lookup_airport/workflow.json +57 -0
- package/examples/google-flights/search_flights/index.ts +219 -0
- package/examples/google-flights/search_flights/package.json +9 -0
- package/examples/google-flights/search_flights/parser.ts +169 -0
- package/examples/google-flights/search_flights/playbook.yaml +184 -0
- package/examples/google-flights/search_flights/request-transform.ts +119 -0
- package/examples/google-flights/search_flights/workflow.json +143 -0
- package/examples/google-hotels/README.md +29 -0
- package/examples/google-hotels/_shared/batchexecute.ts +73 -0
- package/examples/google-hotels/_shared/freq.ts +158 -0
- package/examples/google-hotels/_shared/package.json +9 -0
- package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
- package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
- package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
- package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
- package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
- package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
- package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
- package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
- package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
- package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
- package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
- package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
- package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
- package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
- package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
- package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
- package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
- package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
- package/examples/google-hotels/search_hotels/index.ts +207 -0
- package/examples/google-hotels/search_hotels/package.json +9 -0
- package/examples/google-hotels/search_hotels/parser.ts +260 -0
- package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
- package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
- package/examples/google-hotels/search_hotels/workflow.json +127 -0
- package/package.json +3 -2
- package/prompts/audit-agent.md +71 -0
- package/prompts/build-planning.md +74 -0
- package/prompts/compile-agent.md +131 -27
- package/prompts/prereq-builder.md +64 -0
- package/prompts/prereq-planner.md +34 -0
- package/prompts/tool-planning.md +39 -0
- package/src/cli.ts +109 -2
- package/src/imprint/agent.ts +5 -0
- package/src/imprint/audit.ts +996 -0
- package/src/imprint/backend-ladder.ts +1214 -184
- package/src/imprint/build-plan.ts +1051 -0
- package/src/imprint/cdp-browser-fetch.ts +589 -0
- package/src/imprint/cdp-jar-cache.ts +320 -0
- package/src/imprint/chromium.ts +135 -0
- package/src/imprint/claude-cli-compile.ts +125 -25
- package/src/imprint/codex-cli-compile.ts +26 -23
- package/src/imprint/compile-agent-types.ts +38 -0
- package/src/imprint/compile-agent.ts +63 -25
- package/src/imprint/compile-tools.ts +1656 -64
- package/src/imprint/compile.ts +13 -1
- package/src/imprint/concurrency.ts +87 -0
- package/src/imprint/cron.ts +1 -0
- package/src/imprint/doctor.ts +39 -0
- package/src/imprint/freeform-redact.ts +5 -4
- package/src/imprint/integrations.ts +2 -2
- package/src/imprint/llm.ts +56 -8
- package/src/imprint/mcp-compile-server.ts +43 -10
- package/src/imprint/mcp-maintenance.ts +9 -101
- package/src/imprint/mcp-server.ts +73 -7
- package/src/imprint/multi-progress.ts +7 -2
- package/src/imprint/param-grounding.ts +367 -0
- package/src/imprint/paths.ts +29 -0
- package/src/imprint/playbook-runner.ts +101 -40
- package/src/imprint/prereq-builder.ts +651 -0
- package/src/imprint/probe-backends.ts +6 -3
- package/src/imprint/record.ts +10 -1
- package/src/imprint/redact.ts +30 -2
- package/src/imprint/replay-capture.ts +19 -18
- package/src/imprint/runtime.ts +19 -10
- package/src/imprint/session-diff.ts +79 -2
- package/src/imprint/session-merge.ts +9 -5
- package/src/imprint/stealth-chromium.ts +81 -0
- package/src/imprint/stealth-fetch.ts +309 -29
- package/src/imprint/stealth-token-cache.ts +88 -0
- package/src/imprint/teach-plan.ts +251 -0
- package/src/imprint/teach-state.ts +10 -0
- package/src/imprint/teach.ts +456 -142
- package/src/imprint/tool-candidates.ts +72 -14
- package/src/imprint/tool-plan.ts +313 -0
- package/src/imprint/tracing.ts +135 -6
- package/src/imprint/types.ts +61 -3
- package/examples/google-flights/search_google_flights/index.ts +0 -101
- package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
- package/examples/google-flights/search_google_flights/parser.ts +0 -189
- package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
- package/examples/google-flights/search_google_flights/workflow.json +0 -48
- package/examples/google-hotels/search_google_hotels/index.ts +0 -194
- package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
- package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
- package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
- package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
- package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
- package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
- package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
- package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
- package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
|
@@ -29,7 +29,9 @@ import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from '
|
|
|
29
29
|
import { join as pathJoin } from 'node:path';
|
|
30
30
|
import { type Span, context as otelContext } from '@opentelemetry/api';
|
|
31
31
|
import type { OnDeadlineReached } from './agent.ts';
|
|
32
|
+
import { type SharedModuleManifestEntry, resolvePlanSliceFromFile } from './build-plan.ts';
|
|
32
33
|
import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
|
|
34
|
+
import { formatCandidateContext, formatToolPlan } from './compile-agent-types.ts';
|
|
33
35
|
import { preferredAgentModel } from './llm.ts';
|
|
34
36
|
import { createLog } from './log.ts';
|
|
35
37
|
import { COMPILE_SENTINELS } from './mcp-compile-server.ts';
|
|
@@ -39,6 +41,7 @@ import {
|
|
|
39
41
|
llmSpanAttributes,
|
|
40
42
|
setSpanAttributes,
|
|
41
43
|
startTraceSpan,
|
|
44
|
+
totalPromptTokens,
|
|
42
45
|
traceJsonInputOutputAttributes,
|
|
43
46
|
traceLlmIoEnabled,
|
|
44
47
|
traced,
|
|
@@ -52,6 +55,35 @@ const CLI_PATH = pathJoin(REPO_ROOT, 'src', 'cli.ts');
|
|
|
52
55
|
const MCP_SERVER_NAME = 'imprint-compile';
|
|
53
56
|
const MAX_VERIFICATION_CYCLES = 5;
|
|
54
57
|
|
|
58
|
+
/**
|
|
59
|
+
* Thinking effort for the compile agent. Deliberately `high`, not `max`:
|
|
60
|
+
* empirically, max-effort thinking generates a large volume of reasoning tokens
|
|
61
|
+
* on reverse-engineering tasks, which measurably raises the model's usage-policy
|
|
62
|
+
* safety-filter false-positive rate. `high` keeps strong reasoning with far
|
|
63
|
+
* fewer spurious refusals. Passed as an explicit `--effort` flag so it overrides
|
|
64
|
+
* any CLAUDE_EFFORT inherited from the environment.
|
|
65
|
+
*/
|
|
66
|
+
const COMPILE_EFFORT_LEVEL = 'high';
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Signature of Claude Code's usage-policy safety refusal (surfaced in the
|
|
70
|
+
* terminal result event / our error message). The block is a transient,
|
|
71
|
+
* probabilistic false positive on legitimate compiles, so we retry a fresh
|
|
72
|
+
* session a few times before surfacing it as a hard failure.
|
|
73
|
+
*/
|
|
74
|
+
const USAGE_POLICY_REFUSAL =
|
|
75
|
+
/unable to respond to this request|appears to violate our Usage Policy/i;
|
|
76
|
+
|
|
77
|
+
/** Total attempts (1 initial + retries) when a usage-policy refusal is hit. */
|
|
78
|
+
const MAX_USAGE_POLICY_ATTEMPTS = 3;
|
|
79
|
+
|
|
80
|
+
/** Exponential backoff with jitter between refusal retries. Spacing matters:
|
|
81
|
+
* bursts of near-identical requests raise the safety-filter trip rate. */
|
|
82
|
+
function usagePolicyBackoffMs(attempt: number): number {
|
|
83
|
+
const base = 5000 * 2 ** (attempt - 1); // 5s, 10s, ...
|
|
84
|
+
return base + Math.floor(Math.random() * base * 0.5);
|
|
85
|
+
}
|
|
86
|
+
|
|
55
87
|
interface CompileViaClaudeCliOptions {
|
|
56
88
|
session: Session;
|
|
57
89
|
absoluteToolDir: string;
|
|
@@ -67,6 +99,12 @@ interface CompileViaClaudeCliOptions {
|
|
|
67
99
|
keepTest?: boolean;
|
|
68
100
|
candidate?: ToolCandidate;
|
|
69
101
|
sharedContext?: SharedCompileContext;
|
|
102
|
+
/** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
|
|
103
|
+
buildPlanPath?: string;
|
|
104
|
+
/** Shared-module build manifest for this site (verified flags). */
|
|
105
|
+
sharedModules?: SharedModuleManifestEntry[];
|
|
106
|
+
/** Per-tool implementation plan injected into the agent's initial message. */
|
|
107
|
+
toolPlan?: string;
|
|
70
108
|
}
|
|
71
109
|
|
|
72
110
|
interface StreamJsonEvent {
|
|
@@ -124,7 +162,14 @@ export async function compileViaClaudeCli(
|
|
|
124
162
|
...llmSpanAttributes({
|
|
125
163
|
provider: 'claude-cli',
|
|
126
164
|
model: preferredAgentModel('claude-cli'),
|
|
127
|
-
|
|
165
|
+
// TOTAL prompt (uncached + cache); the cache split is passed separately
|
|
166
|
+
// for cost. `result.inputTokens` alone is the uncached delta (often a
|
|
167
|
+
// few hundred), which would mislabel `llm.token_count.prompt`.
|
|
168
|
+
inputTokens: totalPromptTokens(
|
|
169
|
+
result.inputTokens,
|
|
170
|
+
result.cacheReadInputTokens,
|
|
171
|
+
result.cacheCreationInputTokens,
|
|
172
|
+
),
|
|
128
173
|
outputTokens: result.outputTokens,
|
|
129
174
|
cacheReadTokens: result.cacheReadInputTokens,
|
|
130
175
|
cacheWriteTokens: result.cacheCreationInputTokens,
|
|
@@ -135,9 +180,41 @@ export async function compileViaClaudeCli(
|
|
|
135
180
|
);
|
|
136
181
|
}
|
|
137
182
|
|
|
183
|
+
/**
|
|
184
|
+
* Drives the compile, retrying a fresh claude-cli session when an attempt is
|
|
185
|
+
* blocked by the usage-policy safety filter. The block is a flaky false positive
|
|
186
|
+
* (see USAGE_POLICY_REFUSAL); a re-roll almost always succeeds. All other
|
|
187
|
+
* outcomes (success, give_up, verification failure, timeout) return immediately.
|
|
188
|
+
*/
|
|
138
189
|
async function compileViaClaudeCliImpl(
|
|
139
190
|
opts: CompileViaClaudeCliOptions,
|
|
140
191
|
): Promise<CompileAgentResult> {
|
|
192
|
+
let lastResult: CompileAgentResult | undefined;
|
|
193
|
+
for (let attempt = 1; attempt <= MAX_USAGE_POLICY_ATTEMPTS; attempt++) {
|
|
194
|
+
const result = await runClaudeCliAttempt(opts);
|
|
195
|
+
const isRefusal = !result.success && USAGE_POLICY_REFUSAL.test(result.message ?? '');
|
|
196
|
+
if (!isRefusal) return result;
|
|
197
|
+
lastResult = result;
|
|
198
|
+
if (attempt < MAX_USAGE_POLICY_ATTEMPTS) {
|
|
199
|
+
const backoffMs = usagePolicyBackoffMs(attempt);
|
|
200
|
+
log(
|
|
201
|
+
`usage-policy refusal on attempt ${attempt}/${MAX_USAGE_POLICY_ATTEMPTS}; ` +
|
|
202
|
+
`retrying a fresh session in ${Math.round(backoffMs / 1000)}s`,
|
|
203
|
+
);
|
|
204
|
+
await new Promise((resolve) => setTimeout(resolve, backoffMs));
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Every attempt was blocked. Annotate the final error so the operator knows
|
|
209
|
+
// it was the (flaky) safety filter, not their recording or workflow.
|
|
210
|
+
const exhausted = lastResult as CompileAgentResult;
|
|
211
|
+
return {
|
|
212
|
+
...exhausted,
|
|
213
|
+
message: `${exhausted.message}\n\nBlocked by the model's usage-policy safety filter on all ${MAX_USAGE_POLICY_ATTEMPTS} attempts. This is typically a transient false positive on reverse-engineering compiles — re-run this tool, or compile it with a different provider (e.g. codex-cli).`,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
async function runClaudeCliAttempt(opts: CompileViaClaudeCliOptions): Promise<CompileAgentResult> {
|
|
141
218
|
// Ensure tool dir exists and clear any prior sentinels — a stale
|
|
142
219
|
// sentinel from a previous run would short-circuit our success detection.
|
|
143
220
|
mkdirSync(opts.absoluteToolDir, { recursive: true });
|
|
@@ -175,17 +252,27 @@ async function compileViaClaudeCliImpl(
|
|
|
175
252
|
...(opts.sharedContext
|
|
176
253
|
? ['--shared-context-json', JSON.stringify(opts.sharedContext)]
|
|
177
254
|
: []),
|
|
255
|
+
...(opts.buildPlanPath ? ['--build-plan-path', opts.buildPlanPath] : []),
|
|
256
|
+
...(opts.sharedModules
|
|
257
|
+
? ['--shared-modules-json', JSON.stringify(opts.sharedModules)]
|
|
258
|
+
: []),
|
|
178
259
|
],
|
|
179
260
|
},
|
|
180
261
|
},
|
|
181
262
|
};
|
|
182
263
|
|
|
264
|
+
const { assignedSharedModules } = resolvePlanSliceFromFile(
|
|
265
|
+
opts.buildPlanPath,
|
|
266
|
+
opts.candidate?.toolName,
|
|
267
|
+
opts.sharedModules,
|
|
268
|
+
);
|
|
183
269
|
const initialPrompt = `A new compile task is starting.
|
|
184
270
|
|
|
185
271
|
Session path: ${sessionPathAbs}
|
|
186
272
|
Tool directory: ${opts.absoluteToolDir}
|
|
187
273
|
You will write artifacts into the tool directory.
|
|
188
|
-
${formatCandidateContext(opts.candidate, opts.sharedContext)}
|
|
274
|
+
${formatCandidateContext(opts.candidate, opts.sharedContext, assignedSharedModules)}
|
|
275
|
+
${formatToolPlan(opts.toolPlan)}
|
|
189
276
|
|
|
190
277
|
Begin by calling read_session_summary to orient yourself, then proceed per the system prompt.`;
|
|
191
278
|
|
|
@@ -199,6 +286,8 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
|
|
|
199
286
|
JSON.stringify(mcpConfig),
|
|
200
287
|
'--system-prompt-file',
|
|
201
288
|
opts.systemPromptPath,
|
|
289
|
+
'--append-system-prompt',
|
|
290
|
+
`Today's date is ${new Date().toISOString().slice(0, 10)}.`,
|
|
202
291
|
// Disable the built-in tool set so claude only uses our MCP tools.
|
|
203
292
|
'--tools',
|
|
204
293
|
'',
|
|
@@ -221,6 +310,8 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
|
|
|
221
310
|
'--allowedTools',
|
|
222
311
|
`mcp__${MCP_SERVER_NAME}__run_tests`,
|
|
223
312
|
'--allowedTools',
|
|
313
|
+
`mcp__${MCP_SERVER_NAME}__read_build_plan`,
|
|
314
|
+
'--allowedTools',
|
|
224
315
|
`mcp__${MCP_SERVER_NAME}__done`,
|
|
225
316
|
'--allowedTools',
|
|
226
317
|
`mcp__${MCP_SERVER_NAME}__give_up`,
|
|
@@ -234,6 +325,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
|
|
|
234
325
|
'bypassPermissions',
|
|
235
326
|
'--no-session-persistence',
|
|
236
327
|
'--disable-slash-commands',
|
|
328
|
+
// Cap thinking effort below `max` to reduce usage-policy false positives.
|
|
329
|
+
'--effort',
|
|
330
|
+
COMPILE_EFFORT_LEVEL,
|
|
237
331
|
'--model',
|
|
238
332
|
preferredAgentModel('claude-cli'),
|
|
239
333
|
initialPrompt,
|
|
@@ -245,7 +339,26 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
|
|
|
245
339
|
try {
|
|
246
340
|
child = spawn('claude', args, {
|
|
247
341
|
cwd: REPO_ROOT,
|
|
248
|
-
|
|
342
|
+
// Claude CLI's default MCP_TOOL_TIMEOUT is 60s. The compile MCP
|
|
343
|
+
// server's `done` tool runs external verification inline — bun test
|
|
344
|
+
// (up to 60s × 3 retries for the integration suite + 120s for the
|
|
345
|
+
// parser suite) plus typechecking. On bot-protected sites where the
|
|
346
|
+
// integration test escalates fetch → fetch-bootstrap → stealth-fetch
|
|
347
|
+
// for every assertion, a single bun test pass can run 30s × 3
|
|
348
|
+
// rungs × N tests = 10-15 min before the outer wrapper kills it,
|
|
349
|
+
// and 3 retries push the total well past 30 min. A 10-min cap was
|
|
350
|
+
// not enough — set 30 min so the worst-case verification can
|
|
351
|
+
// actually complete and the agent receives the failure feedback
|
|
352
|
+
// (and ships with `liveVerified: false` via the waiver path)
|
|
353
|
+
// rather than getting `-32000: Connection closed` mid-call and
|
|
354
|
+
// wasting the rest of its turn budget. Honor user-set env so an
|
|
355
|
+
// operator on a fast network can tighten without editing source.
|
|
356
|
+
// Connection-startup timeout stays at 60s for cold Playwright boot.
|
|
357
|
+
env: {
|
|
358
|
+
...process.env,
|
|
359
|
+
MCP_TOOL_TIMEOUT: process.env.MCP_TOOL_TIMEOUT ?? '1800000',
|
|
360
|
+
MCP_TIMEOUT: process.env.MCP_TIMEOUT ?? '60000',
|
|
361
|
+
},
|
|
249
362
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
250
363
|
});
|
|
251
364
|
} catch (err) {
|
|
@@ -267,6 +380,12 @@ async function driveStreamJson(
|
|
|
267
380
|
const parentCtx = otelContext.active();
|
|
268
381
|
|
|
269
382
|
const conversationLog: unknown[] = [];
|
|
383
|
+
const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
|
|
384
|
+
const flushLog = (): void => {
|
|
385
|
+
try {
|
|
386
|
+
writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
|
|
387
|
+
} catch {}
|
|
388
|
+
};
|
|
270
389
|
const captureLlmIo = traceLlmIoEnabled();
|
|
271
390
|
let inputTokens = 0;
|
|
272
391
|
let outputTokens = 0;
|
|
@@ -376,6 +495,7 @@ async function driveStreamJson(
|
|
|
376
495
|
});
|
|
377
496
|
endTraceSpan(currentTurnSpan);
|
|
378
497
|
}
|
|
498
|
+
flushLog();
|
|
379
499
|
turn++;
|
|
380
500
|
turnInputTokens = 0;
|
|
381
501
|
turnOutputTokens = 0;
|
|
@@ -460,13 +580,8 @@ async function driveStreamJson(
|
|
|
460
580
|
log(`unflushed stdout tail (${stdoutBuf.length} bytes) discarded`);
|
|
461
581
|
}
|
|
462
582
|
|
|
463
|
-
//
|
|
464
|
-
|
|
465
|
-
try {
|
|
466
|
-
writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
|
|
467
|
-
} catch (err) {
|
|
468
|
-
log(`failed to persist conversation log: ${errMsg(err)}`);
|
|
469
|
-
}
|
|
583
|
+
// Final flush of the complete conversation log.
|
|
584
|
+
flushLog();
|
|
470
585
|
|
|
471
586
|
// Inspect sentinels to determine outcome.
|
|
472
587
|
const doneSentinel = pathJoin(opts.absoluteToolDir, COMPILE_SENTINELS.done);
|
|
@@ -620,21 +735,6 @@ function finalErrorResult(opts: CompileViaClaudeCliOptions, message: string): Co
|
|
|
620
735
|
};
|
|
621
736
|
}
|
|
622
737
|
|
|
623
|
-
function formatCandidateContext(
|
|
624
|
-
candidate: ToolCandidate | undefined,
|
|
625
|
-
sharedContext: SharedCompileContext | undefined,
|
|
626
|
-
): string {
|
|
627
|
-
if (!candidate && !sharedContext) return '';
|
|
628
|
-
return `
|
|
629
|
-
Selected candidate context:
|
|
630
|
-
${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
|
|
631
|
-
|
|
632
|
-
Shared compile context:
|
|
633
|
-
${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
|
|
634
|
-
|
|
635
|
-
Compile only the selected candidate. Do not create tools for other actions in the recording.`;
|
|
636
|
-
}
|
|
637
|
-
|
|
638
738
|
function errMsg(err: unknown): string {
|
|
639
739
|
return err instanceof Error ? err.message : String(err);
|
|
640
740
|
}
|
|
@@ -11,7 +11,9 @@ import { type ChildProcess, spawn } from 'node:child_process';
|
|
|
11
11
|
import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs';
|
|
12
12
|
import { isAbsolute as pathIsAbsolute, join as pathJoin } from 'node:path';
|
|
13
13
|
import { type Span, context as otelContext } from '@opentelemetry/api';
|
|
14
|
+
import { type SharedModuleManifestEntry, resolvePlanSliceFromFile } from './build-plan.ts';
|
|
14
15
|
import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
|
|
16
|
+
import { formatCandidateContext, formatToolPlan } from './compile-agent-types.ts';
|
|
15
17
|
import { preferredAgentModel } from './llm.ts';
|
|
16
18
|
import { createLog } from './log.ts';
|
|
17
19
|
import { COMPILE_SENTINELS } from './mcp-compile-server.ts';
|
|
@@ -48,6 +50,12 @@ interface CompileViaCodexCliOptions {
|
|
|
48
50
|
keepTest?: boolean;
|
|
49
51
|
candidate?: ToolCandidate;
|
|
50
52
|
sharedContext?: SharedCompileContext;
|
|
53
|
+
/** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
|
|
54
|
+
buildPlanPath?: string;
|
|
55
|
+
/** Shared-module build manifest for this site (verified flags). */
|
|
56
|
+
sharedModules?: SharedModuleManifestEntry[];
|
|
57
|
+
/** Per-tool implementation plan injected into the agent's initial message. */
|
|
58
|
+
toolPlan?: string;
|
|
51
59
|
}
|
|
52
60
|
|
|
53
61
|
interface CodexJsonEvent {
|
|
@@ -141,15 +149,22 @@ async function compileViaCodexCliImpl(
|
|
|
141
149
|
opts.absoluteToolDir,
|
|
142
150
|
...(opts.candidate ? ['--candidate-json', JSON.stringify(opts.candidate)] : []),
|
|
143
151
|
...(opts.sharedContext ? ['--shared-context-json', JSON.stringify(opts.sharedContext)] : []),
|
|
152
|
+
...(opts.buildPlanPath ? ['--build-plan-path', opts.buildPlanPath] : []),
|
|
153
|
+
...(opts.sharedModules ? ['--shared-modules-json', JSON.stringify(opts.sharedModules)] : []),
|
|
144
154
|
];
|
|
145
155
|
|
|
146
156
|
let systemPrompt: string;
|
|
147
157
|
try {
|
|
148
|
-
systemPrompt = readFileSync(opts.systemPromptPath, 'utf8')
|
|
158
|
+
systemPrompt = `${readFileSync(opts.systemPromptPath, 'utf8')}\n\nToday's date is ${new Date().toISOString().slice(0, 10)}.`;
|
|
149
159
|
} catch (err) {
|
|
150
160
|
return finalErrorResult(opts, `failed to read system prompt: ${errMsg(err)}`);
|
|
151
161
|
}
|
|
152
162
|
|
|
163
|
+
const { assignedSharedModules } = resolvePlanSliceFromFile(
|
|
164
|
+
opts.buildPlanPath,
|
|
165
|
+
opts.candidate?.toolName,
|
|
166
|
+
opts.sharedModules,
|
|
167
|
+
);
|
|
153
168
|
const initialPrompt = `<system_instructions>
|
|
154
169
|
${systemPrompt}
|
|
155
170
|
</system_instructions>
|
|
@@ -159,7 +174,8 @@ A new compile task is starting.
|
|
|
159
174
|
Session path: ${sessionPathAbs}
|
|
160
175
|
Tool directory: ${opts.absoluteToolDir}
|
|
161
176
|
You will write artifacts into the tool directory.
|
|
162
|
-
${formatCandidateContext(opts.candidate, opts.sharedContext)}
|
|
177
|
+
${formatCandidateContext(opts.candidate, opts.sharedContext, assignedSharedModules)}
|
|
178
|
+
${formatToolPlan(opts.toolPlan)}
|
|
163
179
|
|
|
164
180
|
Use the imprint-compile MCP tools to inspect the session, write artifacts, run tests, and call done(). Begin by calling read_session_summary, then proceed per the system instructions.`;
|
|
165
181
|
|
|
@@ -277,6 +293,12 @@ async function driveJsonl(
|
|
|
277
293
|
const parentCtx = otelContext.active();
|
|
278
294
|
|
|
279
295
|
const conversationLog: unknown[] = [];
|
|
296
|
+
const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
|
|
297
|
+
const flushLog = (): void => {
|
|
298
|
+
try {
|
|
299
|
+
writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
|
|
300
|
+
} catch {}
|
|
301
|
+
};
|
|
280
302
|
let inputTokens = 0;
|
|
281
303
|
let outputTokens = 0;
|
|
282
304
|
let turn = 0;
|
|
@@ -357,6 +379,7 @@ async function driveJsonl(
|
|
|
357
379
|
|
|
358
380
|
if (evt.type === 'turn.started') {
|
|
359
381
|
if (currentTurnSpan) endTraceSpan(currentTurnSpan);
|
|
382
|
+
flushLog();
|
|
360
383
|
turn++;
|
|
361
384
|
currentTurnSpan = startTraceSpan(`agent.turn.${turn}`, 'CHAIN', {
|
|
362
385
|
'imprint.agent.turn': turn,
|
|
@@ -438,12 +461,7 @@ async function driveJsonl(
|
|
|
438
461
|
log(`unflushed stdout tail (${stdoutBuf.length} bytes) discarded`);
|
|
439
462
|
}
|
|
440
463
|
|
|
441
|
-
|
|
442
|
-
try {
|
|
443
|
-
writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
|
|
444
|
-
} catch (err) {
|
|
445
|
-
log(`failed to persist conversation log: ${errMsg(err)}`);
|
|
446
|
-
}
|
|
464
|
+
flushLog();
|
|
447
465
|
|
|
448
466
|
const workflowPath = pathJoin(opts.absoluteToolDir, 'workflow.json');
|
|
449
467
|
const parserPath = pathJoin(opts.absoluteToolDir, 'parser.ts');
|
|
@@ -692,21 +710,6 @@ function finalErrorResult(opts: CompileViaCodexCliOptions, message: string): Com
|
|
|
692
710
|
};
|
|
693
711
|
}
|
|
694
712
|
|
|
695
|
-
function formatCandidateContext(
|
|
696
|
-
candidate: ToolCandidate | undefined,
|
|
697
|
-
sharedContext: SharedCompileContext | undefined,
|
|
698
|
-
): string {
|
|
699
|
-
if (!candidate && !sharedContext) return '';
|
|
700
|
-
return `
|
|
701
|
-
Selected candidate context:
|
|
702
|
-
${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
|
|
703
|
-
|
|
704
|
-
Shared compile context:
|
|
705
|
-
${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
|
|
706
|
-
|
|
707
|
-
Compile only the selected candidate. Do not create tools for other actions in the recording.`;
|
|
708
|
-
}
|
|
709
|
-
|
|
710
713
|
function errMsg(err: unknown): string {
|
|
711
714
|
return err instanceof Error ? err.message : String(err);
|
|
712
715
|
}
|
|
@@ -7,6 +7,44 @@
|
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
import type { AgentProgress } from './agent.ts';
|
|
10
|
+
import { type AssignedSharedModule, describeAssignedModules } from './build-plan.ts';
|
|
11
|
+
import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
|
|
12
|
+
|
|
13
|
+
/** Render a per-tool implementation plan (param→field mapping, request
|
|
14
|
+
* construction, response parsing, shared-module imports, edge cases) into an
|
|
15
|
+
* initial-message section the compile agent must follow. Shared verbatim by the
|
|
16
|
+
* in-process loop and both CLI drivers. Generic — carries no site-specific
|
|
17
|
+
* content; the plan itself is derived per-tool from the recording. */
|
|
18
|
+
export function formatToolPlan(toolPlan: string | undefined): string {
|
|
19
|
+
const plan = toolPlan?.trim();
|
|
20
|
+
if (!plan) return '';
|
|
21
|
+
return `
|
|
22
|
+
|
|
23
|
+
IMPLEMENTATION PLAN — a planning pass analyzed the recording for THIS tool and produced the plan below. Follow it. It maps each parameter to its recorded field, specifies how to construct the request(s) and parse the response, and names the shared modules to import. Deviate only where the recorded data plainly contradicts the plan; if you do, note the correction in a brief code comment.
|
|
24
|
+
|
|
25
|
+
${plan}`;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Render the selected candidate + shared compile context (and any assigned
|
|
29
|
+
* shared modules) into the compile agent's initial message. Shared verbatim by
|
|
30
|
+
* the in-process loop and both CLI drivers. */
|
|
31
|
+
export function formatCandidateContext(
|
|
32
|
+
candidate: ToolCandidate | undefined,
|
|
33
|
+
sharedContext: SharedCompileContext | undefined,
|
|
34
|
+
assignedSharedModules?: AssignedSharedModule[],
|
|
35
|
+
): string {
|
|
36
|
+
if (!candidate && !sharedContext) return '';
|
|
37
|
+
return `
|
|
38
|
+
Selected candidate context:
|
|
39
|
+
${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
|
|
40
|
+
|
|
41
|
+
Shared compile context:
|
|
42
|
+
${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
|
|
43
|
+
|
|
44
|
+
Compile only the selected candidate. Do not create tools for other actions in the recording.${
|
|
45
|
+
assignedSharedModules ? describeAssignedModules(assignedSharedModules) : ''
|
|
46
|
+
}`;
|
|
47
|
+
}
|
|
10
48
|
|
|
11
49
|
export interface CompileAgentProgress extends AgentProgress {
|
|
12
50
|
/** 1-based verification cycle. Cycle 1 is the initial agent run. Subsequent cycles
|
|
@@ -16,10 +16,17 @@ import {
|
|
|
16
16
|
giveUpTool,
|
|
17
17
|
runAgentLoop,
|
|
18
18
|
} from './agent.ts';
|
|
19
|
+
import { type SharedModuleManifestEntry, resolvePlanSliceFromFile } from './build-plan.ts';
|
|
19
20
|
import { compileViaClaudeCli } from './claude-cli-compile.ts';
|
|
20
21
|
import { compileViaCodexCli } from './codex-cli-compile.ts';
|
|
21
22
|
import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
|
|
22
|
-
import {
|
|
23
|
+
import { formatCandidateContext, formatToolPlan } from './compile-agent-types.ts';
|
|
24
|
+
import {
|
|
25
|
+
applyLiveVerification,
|
|
26
|
+
applyParamVerification,
|
|
27
|
+
buildCompileTools,
|
|
28
|
+
externalVerification,
|
|
29
|
+
} from './compile-tools.ts';
|
|
23
30
|
import { type Replacement, extractCredentials } from './credential-extract.ts';
|
|
24
31
|
import {
|
|
25
32
|
type LLMOptions,
|
|
@@ -84,12 +91,28 @@ interface CompileAgentOptions {
|
|
|
84
91
|
classifications?: ClassifiedValue[];
|
|
85
92
|
/** Credential values extracted during teach, passed to integration tests via env var. */
|
|
86
93
|
teachCredentials?: { site: string; values: Record<string, string> };
|
|
94
|
+
/** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
|
|
95
|
+
buildPlanPath?: string;
|
|
96
|
+
/** Shared-module build manifest for this site (verified flags). */
|
|
97
|
+
sharedModules?: SharedModuleManifestEntry[];
|
|
87
98
|
/** Called when wall-clock deadline is reached; return ms to extend or null to time out. */
|
|
88
99
|
onDeadlineReached?: OnDeadlineReached;
|
|
100
|
+
/** Per-tool implementation plan (param→field mapping, request construction,
|
|
101
|
+
* response parsing, shared-module imports). Injected into the agent's initial
|
|
102
|
+
* message so the compile follows it. Generic — not tied to any site. */
|
|
103
|
+
toolPlan?: string;
|
|
89
104
|
}
|
|
90
105
|
|
|
91
106
|
export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAgentResult> {
|
|
92
107
|
const startTime = Date.now();
|
|
108
|
+
// Resolve the shared modules + token contracts the plan assigned this tool, so
|
|
109
|
+
// the in-process verifier can assert modules are imported and require a chained
|
|
110
|
+
// test for each producer-sourced token param.
|
|
111
|
+
const { assignedSharedModules, tokenParams, emittedTokens } = resolvePlanSliceFromFile(
|
|
112
|
+
opts.buildPlanPath,
|
|
113
|
+
opts.candidate?.toolName,
|
|
114
|
+
opts.sharedModules,
|
|
115
|
+
);
|
|
93
116
|
|
|
94
117
|
// 1. Load + validate the session
|
|
95
118
|
let session: Session = loadJsonFile(
|
|
@@ -169,7 +192,7 @@ export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAg
|
|
|
169
192
|
`System prompt not found at ${systemPromptPath}\n→ this is an Imprint installation problem; please file an issue at https://github.com/ashaychangwani/imprint/issues with the steps you ran.`,
|
|
170
193
|
);
|
|
171
194
|
}
|
|
172
|
-
const systemPrompt = readFileSync(systemPromptPath, 'utf8')
|
|
195
|
+
const systemPrompt = `${readFileSync(systemPromptPath, 'utf8')}\n\nToday's date is ${new Date().toISOString().slice(0, 10)}.`;
|
|
173
196
|
|
|
174
197
|
// 5. Build the toolset (shared with the MCP server used by the claude-cli path)
|
|
175
198
|
const sessionPathAbs = opts.sessionPath.startsWith('/')
|
|
@@ -181,6 +204,8 @@ export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAg
|
|
|
181
204
|
sharedContext: opts.sharedContext,
|
|
182
205
|
classifications: opts.classifications,
|
|
183
206
|
teachCredentials: opts.teachCredentials,
|
|
207
|
+
buildPlanPath: opts.buildPlanPath,
|
|
208
|
+
sharedModules: opts.sharedModules,
|
|
184
209
|
}),
|
|
185
210
|
doneTool(),
|
|
186
211
|
giveUpTool(),
|
|
@@ -192,7 +217,8 @@ export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAg
|
|
|
192
217
|
Session path: ${sessionPathAbs}
|
|
193
218
|
Tool directory: ${absoluteToolDir}
|
|
194
219
|
You will write artifacts into the tool directory.
|
|
195
|
-
${formatCandidateContext(opts.candidate, opts.sharedContext)}
|
|
220
|
+
${formatCandidateContext(opts.candidate, opts.sharedContext, assignedSharedModules)}
|
|
221
|
+
${formatToolPlan(opts.toolPlan)}
|
|
196
222
|
|
|
197
223
|
Begin by calling read_session_summary to orient yourself, then proceed per the system prompt.`;
|
|
198
224
|
|
|
@@ -221,6 +247,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
|
|
|
221
247
|
keepTest: opts.keepTest,
|
|
222
248
|
candidate: opts.candidate,
|
|
223
249
|
sharedContext: opts.sharedContext,
|
|
250
|
+
buildPlanPath: opts.buildPlanPath,
|
|
251
|
+
sharedModules: opts.sharedModules,
|
|
252
|
+
toolPlan: opts.toolPlan,
|
|
224
253
|
});
|
|
225
254
|
}
|
|
226
255
|
if (resolvedProvider.name === 'codex-cli') {
|
|
@@ -235,6 +264,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
|
|
|
235
264
|
keepTest: opts.keepTest,
|
|
236
265
|
candidate: opts.candidate,
|
|
237
266
|
sharedContext: opts.sharedContext,
|
|
267
|
+
buildPlanPath: opts.buildPlanPath,
|
|
268
|
+
sharedModules: opts.sharedModules,
|
|
269
|
+
toolPlan: opts.toolPlan,
|
|
238
270
|
});
|
|
239
271
|
}
|
|
240
272
|
if (!isToolUseProvider(resolvedProvider)) {
|
|
@@ -249,6 +281,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
|
|
|
249
281
|
}
|
|
250
282
|
|
|
251
283
|
// 9. Run the agent loop with verification sub-loop
|
|
284
|
+
mkdirSync(absoluteToolDir, { recursive: true });
|
|
285
|
+
const conversationLogPath = pathJoin(absoluteToolDir, '.compile-log.json');
|
|
286
|
+
|
|
252
287
|
let totalTurns = 0;
|
|
253
288
|
let totalInputTokens = 0;
|
|
254
289
|
let totalOutputTokens = 0;
|
|
@@ -283,6 +318,10 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
|
|
|
283
318
|
deadlineMs,
|
|
284
319
|
llm: provider,
|
|
285
320
|
onProgress: wrappedOnProgress,
|
|
321
|
+
onConversationUpdate: (currentCycleLog) => {
|
|
322
|
+
const fullLog = [...conversationLog, ...currentCycleLog];
|
|
323
|
+
writeFileSync(conversationLogPath, JSON.stringify(fullLog, null, 2), 'utf8');
|
|
324
|
+
},
|
|
286
325
|
onDeadlineReached: opts.onDeadlineReached,
|
|
287
326
|
});
|
|
288
327
|
|
|
@@ -300,7 +339,7 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
|
|
|
300
339
|
}
|
|
301
340
|
|
|
302
341
|
// Perform external verification
|
|
303
|
-
const { failures, warnings } = await externalVerification(
|
|
342
|
+
const { failures, warnings, paramVerification, liveVerification } = await externalVerification(
|
|
304
343
|
absoluteToolDir,
|
|
305
344
|
session,
|
|
306
345
|
sessionPathAbs,
|
|
@@ -308,6 +347,13 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
|
|
|
308
347
|
expectedToolName: opts.candidate?.toolName,
|
|
309
348
|
likelyParams: opts.candidate?.likelyParams,
|
|
310
349
|
candidateRequestSeqs: opts.candidate?.requestSeqs,
|
|
350
|
+
// Widen Fix B's variation pool to the dependency requests (e.g. a
|
|
351
|
+
// bootstrap GET) so a session token that varies only across dependency
|
|
352
|
+
// seqs and is then frozen as a literal in the tool's request is caught.
|
|
353
|
+
dependencyRequestSeqs: opts.candidate?.dependencySeqs,
|
|
354
|
+
assignedSharedModules,
|
|
355
|
+
tokenParams,
|
|
356
|
+
emittedTokens,
|
|
311
357
|
},
|
|
312
358
|
);
|
|
313
359
|
|
|
@@ -316,10 +362,19 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
|
|
|
316
362
|
}
|
|
317
363
|
|
|
318
364
|
if (failures.length === 0) {
|
|
319
|
-
// Success (possibly with warnings)
|
|
365
|
+
// Success (possibly with warnings). Persist per-parameter verified flags
|
|
366
|
+
// and the live-verification stamp into workflow.json so downstream
|
|
367
|
+
// (audit, teach summary) can see which tools shipped without a passing
|
|
368
|
+
// live call.
|
|
369
|
+
applyLiveVerification(absoluteToolDir, liveVerification);
|
|
370
|
+
const paramWarnings = applyParamVerification(absoluteToolDir, paramVerification);
|
|
371
|
+
const allWarnings = [...warnings, ...paramWarnings];
|
|
372
|
+
if (paramWarnings.length > 0) {
|
|
373
|
+
log(`parameter verification:\n${paramWarnings.join('\n')}`);
|
|
374
|
+
}
|
|
320
375
|
message = result.doneSummary ?? 'Task completed';
|
|
321
|
-
if (
|
|
322
|
-
message += `\n\nWarnings:\n${
|
|
376
|
+
if (allWarnings.length > 0) {
|
|
377
|
+
message += `\n\nWarnings:\n${allWarnings.join('\n')}`;
|
|
323
378
|
}
|
|
324
379
|
if (!opts.keepTest) {
|
|
325
380
|
for (const f of ['parser.test.ts', 'integration.test.ts']) {
|
|
@@ -345,9 +400,7 @@ ${failures.map((f) => `- ${f}`).join('\n')}
|
|
|
345
400
|
Resume your work. Read the files you wrote (workflow.json, parser.ts, parser.test.ts), fix the issues, re-run tests, and call done again when fixed.`;
|
|
346
401
|
}
|
|
347
402
|
|
|
348
|
-
// 10.
|
|
349
|
-
mkdirSync(absoluteToolDir, { recursive: true });
|
|
350
|
-
const conversationLogPath = pathJoin(absoluteToolDir, '.compile-log.json');
|
|
403
|
+
// 10. Final flush of the complete conversation log
|
|
351
404
|
writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
|
|
352
405
|
|
|
353
406
|
// 11. Return the result
|
|
@@ -387,18 +440,3 @@ function buildMessageFromOutcome(result: AgentResult): string {
|
|
|
387
440
|
return 'Unknown outcome';
|
|
388
441
|
}
|
|
389
442
|
}
|
|
390
|
-
|
|
391
|
-
function formatCandidateContext(
|
|
392
|
-
candidate: ToolCandidate | undefined,
|
|
393
|
-
sharedContext: SharedCompileContext | undefined,
|
|
394
|
-
): string {
|
|
395
|
-
if (!candidate && !sharedContext) return '';
|
|
396
|
-
return `
|
|
397
|
-
Selected candidate context:
|
|
398
|
-
${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
|
|
399
|
-
|
|
400
|
-
Shared compile context:
|
|
401
|
-
${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
|
|
402
|
-
|
|
403
|
-
Compile only the selected candidate. Do not create tools for other actions in the recording.`;
|
|
404
|
-
}
|