imprint-mcp 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +165 -201
  2. package/examples/discoverandgo/README.md +1 -1
  3. package/examples/echo/README.md +1 -1
  4. package/examples/google-flights/README.md +28 -0
  5. package/examples/google-flights/_shared/batchexecute.ts +63 -0
  6. package/examples/google-flights/_shared/flights_request.ts +95 -0
  7. package/examples/google-flights/_shared/package.json +9 -0
  8. package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
  9. package/examples/google-flights/get_flight_booking_details/package.json +9 -0
  10. package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
  11. package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
  12. package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
  13. package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
  14. package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
  15. package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
  16. package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
  17. package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
  18. package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
  19. package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
  20. package/examples/google-flights/lookup_airport/index.ts +101 -0
  21. package/examples/google-flights/lookup_airport/package.json +9 -0
  22. package/examples/google-flights/lookup_airport/parser.ts +66 -0
  23. package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
  24. package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
  25. package/examples/google-flights/lookup_airport/workflow.json +57 -0
  26. package/examples/google-flights/search_flights/index.ts +219 -0
  27. package/examples/google-flights/search_flights/package.json +9 -0
  28. package/examples/google-flights/search_flights/parser.ts +169 -0
  29. package/examples/google-flights/search_flights/playbook.yaml +184 -0
  30. package/examples/google-flights/search_flights/request-transform.ts +119 -0
  31. package/examples/google-flights/search_flights/workflow.json +143 -0
  32. package/examples/google-hotels/README.md +29 -0
  33. package/examples/google-hotels/_shared/batchexecute.ts +73 -0
  34. package/examples/google-hotels/_shared/freq.ts +158 -0
  35. package/examples/google-hotels/_shared/package.json +9 -0
  36. package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
  37. package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
  38. package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
  39. package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
  40. package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
  41. package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
  42. package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
  43. package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
  44. package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
  45. package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
  46. package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
  47. package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
  48. package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
  49. package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
  50. package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
  51. package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
  52. package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
  53. package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
  54. package/examples/google-hotels/search_hotels/index.ts +207 -0
  55. package/examples/google-hotels/search_hotels/package.json +9 -0
  56. package/examples/google-hotels/search_hotels/parser.ts +260 -0
  57. package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
  58. package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
  59. package/examples/google-hotels/search_hotels/workflow.json +127 -0
  60. package/package.json +3 -2
  61. package/prompts/audit-agent.md +71 -0
  62. package/prompts/build-planning.md +74 -0
  63. package/prompts/compile-agent.md +132 -28
  64. package/prompts/prereq-builder.md +64 -0
  65. package/prompts/prereq-planner.md +34 -0
  66. package/prompts/tool-planning.md +39 -0
  67. package/src/cli.ts +111 -4
  68. package/src/imprint/agent.ts +5 -0
  69. package/src/imprint/audit.ts +996 -0
  70. package/src/imprint/backend-ladder.ts +1214 -184
  71. package/src/imprint/build-plan.ts +1051 -0
  72. package/src/imprint/cdp-browser-fetch.ts +589 -0
  73. package/src/imprint/cdp-jar-cache.ts +320 -0
  74. package/src/imprint/chromium.ts +135 -0
  75. package/src/imprint/claude-cli-compile.ts +125 -25
  76. package/src/imprint/codex-cli-compile.ts +26 -23
  77. package/src/imprint/compile-agent-types.ts +38 -0
  78. package/src/imprint/compile-agent.ts +65 -27
  79. package/src/imprint/compile-tools.ts +1656 -64
  80. package/src/imprint/compile.ts +14 -2
  81. package/src/imprint/concurrency.ts +87 -0
  82. package/src/imprint/credential-extract.ts +174 -25
  83. package/src/imprint/cron.ts +1 -0
  84. package/src/imprint/doctor.ts +39 -0
  85. package/src/imprint/emit.ts +85 -0
  86. package/src/imprint/freeform-redact.ts +5 -4
  87. package/src/imprint/integrations.ts +2 -2
  88. package/src/imprint/llm.ts +56 -8
  89. package/src/imprint/mcp-compile-server.ts +43 -10
  90. package/src/imprint/mcp-maintenance.ts +9 -101
  91. package/src/imprint/mcp-server.ts +73 -7
  92. package/src/imprint/multi-progress.ts +7 -2
  93. package/src/imprint/param-grounding.ts +367 -0
  94. package/src/imprint/paths.ts +29 -0
  95. package/src/imprint/playbook-runner.ts +101 -40
  96. package/src/imprint/prereq-builder.ts +651 -0
  97. package/src/imprint/probe-backends.ts +6 -3
  98. package/src/imprint/record.ts +10 -1
  99. package/src/imprint/redact.ts +30 -2
  100. package/src/imprint/replay-capture.ts +19 -18
  101. package/src/imprint/runtime.ts +19 -10
  102. package/src/imprint/sensitive-keys.ts +141 -7
  103. package/src/imprint/session-diff.ts +79 -2
  104. package/src/imprint/session-merge.ts +9 -5
  105. package/src/imprint/stealth-chromium.ts +81 -0
  106. package/src/imprint/stealth-fetch.ts +309 -29
  107. package/src/imprint/stealth-token-cache.ts +88 -0
  108. package/src/imprint/teach-plan.ts +251 -0
  109. package/src/imprint/teach-state.ts +17 -0
  110. package/src/imprint/teach.ts +582 -147
  111. package/src/imprint/tool-candidates.ts +72 -14
  112. package/src/imprint/tool-plan.ts +313 -0
  113. package/src/imprint/tracing.ts +135 -6
  114. package/src/imprint/types.ts +61 -3
  115. package/examples/google-flights/search_google_flights/index.ts +0 -101
  116. package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
  117. package/examples/google-flights/search_google_flights/parser.ts +0 -189
  118. package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
  119. package/examples/google-flights/search_google_flights/workflow.json +0 -48
  120. package/examples/google-hotels/search_google_hotels/index.ts +0 -194
  121. package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
  122. package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
  123. package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
  124. package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
  125. package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
  126. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
  127. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
  128. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
  129. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
@@ -29,7 +29,9 @@ import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from '
29
29
  import { join as pathJoin } from 'node:path';
30
30
  import { type Span, context as otelContext } from '@opentelemetry/api';
31
31
  import type { OnDeadlineReached } from './agent.ts';
32
+ import { type SharedModuleManifestEntry, resolvePlanSliceFromFile } from './build-plan.ts';
32
33
  import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
34
+ import { formatCandidateContext, formatToolPlan } from './compile-agent-types.ts';
33
35
  import { preferredAgentModel } from './llm.ts';
34
36
  import { createLog } from './log.ts';
35
37
  import { COMPILE_SENTINELS } from './mcp-compile-server.ts';
@@ -39,6 +41,7 @@ import {
39
41
  llmSpanAttributes,
40
42
  setSpanAttributes,
41
43
  startTraceSpan,
44
+ totalPromptTokens,
42
45
  traceJsonInputOutputAttributes,
43
46
  traceLlmIoEnabled,
44
47
  traced,
@@ -52,6 +55,35 @@ const CLI_PATH = pathJoin(REPO_ROOT, 'src', 'cli.ts');
52
55
  const MCP_SERVER_NAME = 'imprint-compile';
53
56
  const MAX_VERIFICATION_CYCLES = 5;
54
57
 
58
+ /**
59
+ * Thinking effort for the compile agent. Deliberately `high`, not `max`:
60
+ * empirically, max-effort thinking generates a large volume of reasoning tokens
61
+ * on reverse-engineering tasks, which measurably raises the model's usage-policy
62
+ * safety-filter false-positive rate. `high` keeps strong reasoning with far
63
+ * fewer spurious refusals. Passed as an explicit `--effort` flag so it overrides
64
+ * any CLAUDE_EFFORT inherited from the environment.
65
+ */
66
+ const COMPILE_EFFORT_LEVEL = 'high';
67
+
68
+ /**
69
+ * Signature of Claude Code's usage-policy safety refusal (surfaced in the
70
+ * terminal result event / our error message). The block is a transient,
71
+ * probabilistic false positive on legitimate compiles, so we retry a fresh
72
+ * session a few times before surfacing it as a hard failure.
73
+ */
74
+ const USAGE_POLICY_REFUSAL =
75
+ /unable to respond to this request|appears to violate our Usage Policy/i;
76
+
77
+ /** Total attempts (1 initial + retries) when a usage-policy refusal is hit. */
78
+ const MAX_USAGE_POLICY_ATTEMPTS = 3;
79
+
80
+ /** Exponential backoff with jitter between refusal retries. Spacing matters:
81
+ * bursts of near-identical requests raise the safety-filter trip rate. */
82
+ function usagePolicyBackoffMs(attempt: number): number {
83
+ const base = 5000 * 2 ** (attempt - 1); // 5s, 10s, ...
84
+ return base + Math.floor(Math.random() * base * 0.5);
85
+ }
86
+
55
87
  interface CompileViaClaudeCliOptions {
56
88
  session: Session;
57
89
  absoluteToolDir: string;
@@ -67,6 +99,12 @@ interface CompileViaClaudeCliOptions {
67
99
  keepTest?: boolean;
68
100
  candidate?: ToolCandidate;
69
101
  sharedContext?: SharedCompileContext;
102
+ /** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
103
+ buildPlanPath?: string;
104
+ /** Shared-module build manifest for this site (verified flags). */
105
+ sharedModules?: SharedModuleManifestEntry[];
106
+ /** Per-tool implementation plan injected into the agent's initial message. */
107
+ toolPlan?: string;
70
108
  }
71
109
 
72
110
  interface StreamJsonEvent {
@@ -124,7 +162,14 @@ export async function compileViaClaudeCli(
124
162
  ...llmSpanAttributes({
125
163
  provider: 'claude-cli',
126
164
  model: preferredAgentModel('claude-cli'),
127
- inputTokens: result.inputTokens,
165
+ // TOTAL prompt (uncached + cache); the cache split is passed separately
166
+ // for cost. `result.inputTokens` alone is the uncached delta (often a
167
+ // few hundred), which would mislabel `llm.token_count.prompt`.
168
+ inputTokens: totalPromptTokens(
169
+ result.inputTokens,
170
+ result.cacheReadInputTokens,
171
+ result.cacheCreationInputTokens,
172
+ ),
128
173
  outputTokens: result.outputTokens,
129
174
  cacheReadTokens: result.cacheReadInputTokens,
130
175
  cacheWriteTokens: result.cacheCreationInputTokens,
@@ -135,9 +180,41 @@ export async function compileViaClaudeCli(
135
180
  );
136
181
  }
137
182
 
183
+ /**
184
+ * Drives the compile, retrying a fresh claude-cli session when an attempt is
185
+ * blocked by the usage-policy safety filter. The block is a flaky false positive
186
+ * (see USAGE_POLICY_REFUSAL); a re-roll almost always succeeds. All other
187
+ * outcomes (success, give_up, verification failure, timeout) return immediately.
188
+ */
138
189
  async function compileViaClaudeCliImpl(
139
190
  opts: CompileViaClaudeCliOptions,
140
191
  ): Promise<CompileAgentResult> {
192
+ let lastResult: CompileAgentResult | undefined;
193
+ for (let attempt = 1; attempt <= MAX_USAGE_POLICY_ATTEMPTS; attempt++) {
194
+ const result = await runClaudeCliAttempt(opts);
195
+ const isRefusal = !result.success && USAGE_POLICY_REFUSAL.test(result.message ?? '');
196
+ if (!isRefusal) return result;
197
+ lastResult = result;
198
+ if (attempt < MAX_USAGE_POLICY_ATTEMPTS) {
199
+ const backoffMs = usagePolicyBackoffMs(attempt);
200
+ log(
201
+ `usage-policy refusal on attempt ${attempt}/${MAX_USAGE_POLICY_ATTEMPTS}; ` +
202
+ `retrying a fresh session in ${Math.round(backoffMs / 1000)}s`,
203
+ );
204
+ await new Promise((resolve) => setTimeout(resolve, backoffMs));
205
+ }
206
+ }
207
+
208
+ // Every attempt was blocked. Annotate the final error so the operator knows
209
+ // it was the (flaky) safety filter, not their recording or workflow.
210
+ const exhausted = lastResult as CompileAgentResult;
211
+ return {
212
+ ...exhausted,
213
+ message: `${exhausted.message}\n\nBlocked by the model's usage-policy safety filter on all ${MAX_USAGE_POLICY_ATTEMPTS} attempts. This is typically a transient false positive on reverse-engineering compiles — re-run this tool, or compile it with a different provider (e.g. codex-cli).`,
214
+ };
215
+ }
216
+
217
+ async function runClaudeCliAttempt(opts: CompileViaClaudeCliOptions): Promise<CompileAgentResult> {
141
218
  // Ensure tool dir exists and clear any prior sentinels — a stale
142
219
  // sentinel from a previous run would short-circuit our success detection.
143
220
  mkdirSync(opts.absoluteToolDir, { recursive: true });
@@ -175,17 +252,27 @@ async function compileViaClaudeCliImpl(
175
252
  ...(opts.sharedContext
176
253
  ? ['--shared-context-json', JSON.stringify(opts.sharedContext)]
177
254
  : []),
255
+ ...(opts.buildPlanPath ? ['--build-plan-path', opts.buildPlanPath] : []),
256
+ ...(opts.sharedModules
257
+ ? ['--shared-modules-json', JSON.stringify(opts.sharedModules)]
258
+ : []),
178
259
  ],
179
260
  },
180
261
  },
181
262
  };
182
263
 
264
+ const { assignedSharedModules } = resolvePlanSliceFromFile(
265
+ opts.buildPlanPath,
266
+ opts.candidate?.toolName,
267
+ opts.sharedModules,
268
+ );
183
269
  const initialPrompt = `A new compile task is starting.
184
270
 
185
271
  Session path: ${sessionPathAbs}
186
272
  Tool directory: ${opts.absoluteToolDir}
187
273
  You will write artifacts into the tool directory.
188
- ${formatCandidateContext(opts.candidate, opts.sharedContext)}
274
+ ${formatCandidateContext(opts.candidate, opts.sharedContext, assignedSharedModules)}
275
+ ${formatToolPlan(opts.toolPlan)}
189
276
 
190
277
  Begin by calling read_session_summary to orient yourself, then proceed per the system prompt.`;
191
278
 
@@ -199,6 +286,8 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
199
286
  JSON.stringify(mcpConfig),
200
287
  '--system-prompt-file',
201
288
  opts.systemPromptPath,
289
+ '--append-system-prompt',
290
+ `Today's date is ${new Date().toISOString().slice(0, 10)}.`,
202
291
  // Disable the built-in tool set so claude only uses our MCP tools.
203
292
  '--tools',
204
293
  '',
@@ -221,6 +310,8 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
221
310
  '--allowedTools',
222
311
  `mcp__${MCP_SERVER_NAME}__run_tests`,
223
312
  '--allowedTools',
313
+ `mcp__${MCP_SERVER_NAME}__read_build_plan`,
314
+ '--allowedTools',
224
315
  `mcp__${MCP_SERVER_NAME}__done`,
225
316
  '--allowedTools',
226
317
  `mcp__${MCP_SERVER_NAME}__give_up`,
@@ -234,6 +325,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
234
325
  'bypassPermissions',
235
326
  '--no-session-persistence',
236
327
  '--disable-slash-commands',
328
+ // Cap thinking effort below `max` to reduce usage-policy false positives.
329
+ '--effort',
330
+ COMPILE_EFFORT_LEVEL,
237
331
  '--model',
238
332
  preferredAgentModel('claude-cli'),
239
333
  initialPrompt,
@@ -245,7 +339,26 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
245
339
  try {
246
340
  child = spawn('claude', args, {
247
341
  cwd: REPO_ROOT,
248
- env: process.env,
342
+ // Claude CLI's default MCP_TOOL_TIMEOUT is 60s. The compile MCP
343
+ // server's `done` tool runs external verification inline — bun test
344
+ // (up to 60s × 3 retries for the integration suite + 120s for the
345
+ // parser suite) plus typechecking. On bot-protected sites where the
346
+ // integration test escalates fetch → fetch-bootstrap → stealth-fetch
347
+ // for every assertion, a single bun test pass can run 30s × 3
348
+ // rungs × N tests = 10-15 min before the outer wrapper kills it,
349
+ // and 3 retries push the total well past 30 min. A 10-min cap was
350
+ // not enough — set 30 min so the worst-case verification can
351
+ // actually complete and the agent receives the failure feedback
352
+ // (and ships with `liveVerified: false` via the waiver path)
353
+ // rather than getting `-32000: Connection closed` mid-call and
354
+ // wasting the rest of its turn budget. Honor user-set env so an
355
+ // operator on a fast network can tighten without editing source.
356
+ // Connection-startup timeout stays at 60s for cold Playwright boot.
357
+ env: {
358
+ ...process.env,
359
+ MCP_TOOL_TIMEOUT: process.env.MCP_TOOL_TIMEOUT ?? '1800000',
360
+ MCP_TIMEOUT: process.env.MCP_TIMEOUT ?? '60000',
361
+ },
249
362
  stdio: ['ignore', 'pipe', 'pipe'],
250
363
  });
251
364
  } catch (err) {
@@ -267,6 +380,12 @@ async function driveStreamJson(
267
380
  const parentCtx = otelContext.active();
268
381
 
269
382
  const conversationLog: unknown[] = [];
383
+ const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
384
+ const flushLog = (): void => {
385
+ try {
386
+ writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
387
+ } catch {}
388
+ };
270
389
  const captureLlmIo = traceLlmIoEnabled();
271
390
  let inputTokens = 0;
272
391
  let outputTokens = 0;
@@ -376,6 +495,7 @@ async function driveStreamJson(
376
495
  });
377
496
  endTraceSpan(currentTurnSpan);
378
497
  }
498
+ flushLog();
379
499
  turn++;
380
500
  turnInputTokens = 0;
381
501
  turnOutputTokens = 0;
@@ -460,13 +580,8 @@ async function driveStreamJson(
460
580
  log(`unflushed stdout tail (${stdoutBuf.length} bytes) discarded`);
461
581
  }
462
582
 
463
- // Persist conversation log for post-mortem.
464
- const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
465
- try {
466
- writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
467
- } catch (err) {
468
- log(`failed to persist conversation log: ${errMsg(err)}`);
469
- }
583
+ // Final flush of the complete conversation log.
584
+ flushLog();
470
585
 
471
586
  // Inspect sentinels to determine outcome.
472
587
  const doneSentinel = pathJoin(opts.absoluteToolDir, COMPILE_SENTINELS.done);
@@ -620,21 +735,6 @@ function finalErrorResult(opts: CompileViaClaudeCliOptions, message: string): Co
620
735
  };
621
736
  }
622
737
 
623
- function formatCandidateContext(
624
- candidate: ToolCandidate | undefined,
625
- sharedContext: SharedCompileContext | undefined,
626
- ): string {
627
- if (!candidate && !sharedContext) return '';
628
- return `
629
- Selected candidate context:
630
- ${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
631
-
632
- Shared compile context:
633
- ${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
634
-
635
- Compile only the selected candidate. Do not create tools for other actions in the recording.`;
636
- }
637
-
638
738
  function errMsg(err: unknown): string {
639
739
  return err instanceof Error ? err.message : String(err);
640
740
  }
@@ -11,7 +11,9 @@ import { type ChildProcess, spawn } from 'node:child_process';
11
11
  import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs';
12
12
  import { isAbsolute as pathIsAbsolute, join as pathJoin } from 'node:path';
13
13
  import { type Span, context as otelContext } from '@opentelemetry/api';
14
+ import { type SharedModuleManifestEntry, resolvePlanSliceFromFile } from './build-plan.ts';
14
15
  import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
16
+ import { formatCandidateContext, formatToolPlan } from './compile-agent-types.ts';
15
17
  import { preferredAgentModel } from './llm.ts';
16
18
  import { createLog } from './log.ts';
17
19
  import { COMPILE_SENTINELS } from './mcp-compile-server.ts';
@@ -48,6 +50,12 @@ interface CompileViaCodexCliOptions {
48
50
  keepTest?: boolean;
49
51
  candidate?: ToolCandidate;
50
52
  sharedContext?: SharedCompileContext;
53
+ /** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
54
+ buildPlanPath?: string;
55
+ /** Shared-module build manifest for this site (verified flags). */
56
+ sharedModules?: SharedModuleManifestEntry[];
57
+ /** Per-tool implementation plan injected into the agent's initial message. */
58
+ toolPlan?: string;
51
59
  }
52
60
 
53
61
  interface CodexJsonEvent {
@@ -141,15 +149,22 @@ async function compileViaCodexCliImpl(
141
149
  opts.absoluteToolDir,
142
150
  ...(opts.candidate ? ['--candidate-json', JSON.stringify(opts.candidate)] : []),
143
151
  ...(opts.sharedContext ? ['--shared-context-json', JSON.stringify(opts.sharedContext)] : []),
152
+ ...(opts.buildPlanPath ? ['--build-plan-path', opts.buildPlanPath] : []),
153
+ ...(opts.sharedModules ? ['--shared-modules-json', JSON.stringify(opts.sharedModules)] : []),
144
154
  ];
145
155
 
146
156
  let systemPrompt: string;
147
157
  try {
148
- systemPrompt = readFileSync(opts.systemPromptPath, 'utf8');
158
+ systemPrompt = `${readFileSync(opts.systemPromptPath, 'utf8')}\n\nToday's date is ${new Date().toISOString().slice(0, 10)}.`;
149
159
  } catch (err) {
150
160
  return finalErrorResult(opts, `failed to read system prompt: ${errMsg(err)}`);
151
161
  }
152
162
 
163
+ const { assignedSharedModules } = resolvePlanSliceFromFile(
164
+ opts.buildPlanPath,
165
+ opts.candidate?.toolName,
166
+ opts.sharedModules,
167
+ );
153
168
  const initialPrompt = `<system_instructions>
154
169
  ${systemPrompt}
155
170
  </system_instructions>
@@ -159,7 +174,8 @@ A new compile task is starting.
159
174
  Session path: ${sessionPathAbs}
160
175
  Tool directory: ${opts.absoluteToolDir}
161
176
  You will write artifacts into the tool directory.
162
- ${formatCandidateContext(opts.candidate, opts.sharedContext)}
177
+ ${formatCandidateContext(opts.candidate, opts.sharedContext, assignedSharedModules)}
178
+ ${formatToolPlan(opts.toolPlan)}
163
179
 
164
180
  Use the imprint-compile MCP tools to inspect the session, write artifacts, run tests, and call done(). Begin by calling read_session_summary, then proceed per the system instructions.`;
165
181
 
@@ -277,6 +293,12 @@ async function driveJsonl(
277
293
  const parentCtx = otelContext.active();
278
294
 
279
295
  const conversationLog: unknown[] = [];
296
+ const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
297
+ const flushLog = (): void => {
298
+ try {
299
+ writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
300
+ } catch {}
301
+ };
280
302
  let inputTokens = 0;
281
303
  let outputTokens = 0;
282
304
  let turn = 0;
@@ -357,6 +379,7 @@ async function driveJsonl(
357
379
 
358
380
  if (evt.type === 'turn.started') {
359
381
  if (currentTurnSpan) endTraceSpan(currentTurnSpan);
382
+ flushLog();
360
383
  turn++;
361
384
  currentTurnSpan = startTraceSpan(`agent.turn.${turn}`, 'CHAIN', {
362
385
  'imprint.agent.turn': turn,
@@ -438,12 +461,7 @@ async function driveJsonl(
438
461
  log(`unflushed stdout tail (${stdoutBuf.length} bytes) discarded`);
439
462
  }
440
463
 
441
- const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
442
- try {
443
- writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
444
- } catch (err) {
445
- log(`failed to persist conversation log: ${errMsg(err)}`);
446
- }
464
+ flushLog();
447
465
 
448
466
  const workflowPath = pathJoin(opts.absoluteToolDir, 'workflow.json');
449
467
  const parserPath = pathJoin(opts.absoluteToolDir, 'parser.ts');
@@ -692,21 +710,6 @@ function finalErrorResult(opts: CompileViaCodexCliOptions, message: string): Com
692
710
  };
693
711
  }
694
712
 
695
- function formatCandidateContext(
696
- candidate: ToolCandidate | undefined,
697
- sharedContext: SharedCompileContext | undefined,
698
- ): string {
699
- if (!candidate && !sharedContext) return '';
700
- return `
701
- Selected candidate context:
702
- ${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
703
-
704
- Shared compile context:
705
- ${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
706
-
707
- Compile only the selected candidate. Do not create tools for other actions in the recording.`;
708
- }
709
-
710
713
  function errMsg(err: unknown): string {
711
714
  return err instanceof Error ? err.message : String(err);
712
715
  }
@@ -7,6 +7,44 @@
7
7
  */
8
8
 
9
9
  import type { AgentProgress } from './agent.ts';
10
+ import { type AssignedSharedModule, describeAssignedModules } from './build-plan.ts';
11
+ import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
12
+
13
+ /** Render a per-tool implementation plan (param→field mapping, request
14
+ * construction, response parsing, shared-module imports, edge cases) into an
15
+ * initial-message section the compile agent must follow. Shared verbatim by the
16
+ * in-process loop and both CLI drivers. Generic — carries no site-specific
17
+ * content; the plan itself is derived per-tool from the recording. */
18
+ export function formatToolPlan(toolPlan: string | undefined): string {
19
+ const plan = toolPlan?.trim();
20
+ if (!plan) return '';
21
+ return `
22
+
23
+ IMPLEMENTATION PLAN — a planning pass analyzed the recording for THIS tool and produced the plan below. Follow it. It maps each parameter to its recorded field, specifies how to construct the request(s) and parse the response, and names the shared modules to import. Deviate only where the recorded data plainly contradicts the plan; if you do, note the correction in a brief code comment.
24
+
25
+ ${plan}`;
26
+ }
27
+
28
+ /** Render the selected candidate + shared compile context (and any assigned
29
+ * shared modules) into the compile agent's initial message. Shared verbatim by
30
+ * the in-process loop and both CLI drivers. */
31
+ export function formatCandidateContext(
32
+ candidate: ToolCandidate | undefined,
33
+ sharedContext: SharedCompileContext | undefined,
34
+ assignedSharedModules?: AssignedSharedModule[],
35
+ ): string {
36
+ if (!candidate && !sharedContext) return '';
37
+ return `
38
+ Selected candidate context:
39
+ ${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
40
+
41
+ Shared compile context:
42
+ ${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
43
+
44
+ Compile only the selected candidate. Do not create tools for other actions in the recording.${
45
+ assignedSharedModules ? describeAssignedModules(assignedSharedModules) : ''
46
+ }`;
47
+ }
10
48
 
11
49
  export interface CompileAgentProgress extends AgentProgress {
12
50
  /** 1-based verification cycle. Cycle 1 is the initial agent run. Subsequent cycles
@@ -16,10 +16,17 @@ import {
16
16
  giveUpTool,
17
17
  runAgentLoop,
18
18
  } from './agent.ts';
19
+ import { type SharedModuleManifestEntry, resolvePlanSliceFromFile } from './build-plan.ts';
19
20
  import { compileViaClaudeCli } from './claude-cli-compile.ts';
20
21
  import { compileViaCodexCli } from './codex-cli-compile.ts';
21
22
  import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
22
- import { buildCompileTools, externalVerification } from './compile-tools.ts';
23
+ import { formatCandidateContext, formatToolPlan } from './compile-agent-types.ts';
24
+ import {
25
+ applyLiveVerification,
26
+ applyParamVerification,
27
+ buildCompileTools,
28
+ externalVerification,
29
+ } from './compile-tools.ts';
23
30
  import { type Replacement, extractCredentials } from './credential-extract.ts';
24
31
  import {
25
32
  type LLMOptions,
@@ -53,7 +60,7 @@ export function resolveCompileAgentModel(provider: ProviderName): string {
53
60
  interface CompileAgentOptions {
54
61
  /** Path to the recorded session JSON (absolute or relative). */
55
62
  sessionPath: string;
56
- /** Hard wall-clock budget. Default 10 minutes. */
63
+ /** Hard wall-clock budget. Default 20 minutes. */
57
64
  maxDurationMs?: number;
58
65
  /** Override LLM config (region, model, project). */
59
66
  llmConfig?: LLMOptions;
@@ -84,12 +91,28 @@ interface CompileAgentOptions {
84
91
  classifications?: ClassifiedValue[];
85
92
  /** Credential values extracted during teach, passed to integration tests via env var. */
86
93
  teachCredentials?: { site: string; values: Record<string, string> };
94
+ /** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
95
+ buildPlanPath?: string;
96
+ /** Shared-module build manifest for this site (verified flags). */
97
+ sharedModules?: SharedModuleManifestEntry[];
87
98
  /** Called when wall-clock deadline is reached; return ms to extend or null to time out. */
88
99
  onDeadlineReached?: OnDeadlineReached;
100
+ /** Per-tool implementation plan (param→field mapping, request construction,
101
+ * response parsing, shared-module imports). Injected into the agent's initial
102
+ * message so the compile follows it. Generic — not tied to any site. */
103
+ toolPlan?: string;
89
104
  }
90
105
 
91
106
  export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAgentResult> {
92
107
  const startTime = Date.now();
108
+ // Resolve the shared modules + token contracts the plan assigned this tool, so
109
+ // the in-process verifier can assert modules are imported and require a chained
110
+ // test for each producer-sourced token param.
111
+ const { assignedSharedModules, tokenParams, emittedTokens } = resolvePlanSliceFromFile(
112
+ opts.buildPlanPath,
113
+ opts.candidate?.toolName,
114
+ opts.sharedModules,
115
+ );
93
116
 
94
117
  // 1. Load + validate the session
95
118
  let session: Session = loadJsonFile(
@@ -169,7 +192,7 @@ export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAg
169
192
  `System prompt not found at ${systemPromptPath}\n→ this is an Imprint installation problem; please file an issue at https://github.com/ashaychangwani/imprint/issues with the steps you ran.`,
170
193
  );
171
194
  }
172
- const systemPrompt = readFileSync(systemPromptPath, 'utf8');
195
+ const systemPrompt = `${readFileSync(systemPromptPath, 'utf8')}\n\nToday's date is ${new Date().toISOString().slice(0, 10)}.`;
173
196
 
174
197
  // 5. Build the toolset (shared with the MCP server used by the claude-cli path)
175
198
  const sessionPathAbs = opts.sessionPath.startsWith('/')
@@ -181,6 +204,8 @@ export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAg
181
204
  sharedContext: opts.sharedContext,
182
205
  classifications: opts.classifications,
183
206
  teachCredentials: opts.teachCredentials,
207
+ buildPlanPath: opts.buildPlanPath,
208
+ sharedModules: opts.sharedModules,
184
209
  }),
185
210
  doneTool(),
186
211
  giveUpTool(),
@@ -192,12 +217,13 @@ export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAg
192
217
  Session path: ${sessionPathAbs}
193
218
  Tool directory: ${absoluteToolDir}
194
219
  You will write artifacts into the tool directory.
195
- ${formatCandidateContext(opts.candidate, opts.sharedContext)}
220
+ ${formatCandidateContext(opts.candidate, opts.sharedContext, assignedSharedModules)}
221
+ ${formatToolPlan(opts.toolPlan)}
196
222
 
197
223
  Begin by calling read_session_summary to orient yourself, then proceed per the system prompt.`;
198
224
 
199
225
  // 7. Compute deadline
200
- const deadlineMs = Date.now() + (opts.maxDurationMs ?? 10 * 60 * 1000);
226
+ const deadlineMs = Date.now() + (opts.maxDurationMs ?? 20 * 60 * 1000);
201
227
 
202
228
  // 8. Instantiate provider (or use injected one for testing).
203
229
  // CLI providers take a different path: they don't implement Anthropic
@@ -221,6 +247,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
221
247
  keepTest: opts.keepTest,
222
248
  candidate: opts.candidate,
223
249
  sharedContext: opts.sharedContext,
250
+ buildPlanPath: opts.buildPlanPath,
251
+ sharedModules: opts.sharedModules,
252
+ toolPlan: opts.toolPlan,
224
253
  });
225
254
  }
226
255
  if (resolvedProvider.name === 'codex-cli') {
@@ -235,6 +264,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
235
264
  keepTest: opts.keepTest,
236
265
  candidate: opts.candidate,
237
266
  sharedContext: opts.sharedContext,
267
+ buildPlanPath: opts.buildPlanPath,
268
+ sharedModules: opts.sharedModules,
269
+ toolPlan: opts.toolPlan,
238
270
  });
239
271
  }
240
272
  if (!isToolUseProvider(resolvedProvider)) {
@@ -249,6 +281,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
249
281
  }
250
282
 
251
283
  // 9. Run the agent loop with verification sub-loop
284
+ mkdirSync(absoluteToolDir, { recursive: true });
285
+ const conversationLogPath = pathJoin(absoluteToolDir, '.compile-log.json');
286
+
252
287
  let totalTurns = 0;
253
288
  let totalInputTokens = 0;
254
289
  let totalOutputTokens = 0;
@@ -283,6 +318,10 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
283
318
  deadlineMs,
284
319
  llm: provider,
285
320
  onProgress: wrappedOnProgress,
321
+ onConversationUpdate: (currentCycleLog) => {
322
+ const fullLog = [...conversationLog, ...currentCycleLog];
323
+ writeFileSync(conversationLogPath, JSON.stringify(fullLog, null, 2), 'utf8');
324
+ },
286
325
  onDeadlineReached: opts.onDeadlineReached,
287
326
  });
288
327
 
@@ -300,7 +339,7 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
300
339
  }
301
340
 
302
341
  // Perform external verification
303
- const { failures, warnings } = await externalVerification(
342
+ const { failures, warnings, paramVerification, liveVerification } = await externalVerification(
304
343
  absoluteToolDir,
305
344
  session,
306
345
  sessionPathAbs,
@@ -308,6 +347,13 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
308
347
  expectedToolName: opts.candidate?.toolName,
309
348
  likelyParams: opts.candidate?.likelyParams,
310
349
  candidateRequestSeqs: opts.candidate?.requestSeqs,
350
+ // Widen Fix B's variation pool to the dependency requests (e.g. a
351
+ // bootstrap GET) so a session token that varies only across dependency
352
+ // seqs and is then frozen as a literal in the tool's request is caught.
353
+ dependencyRequestSeqs: opts.candidate?.dependencySeqs,
354
+ assignedSharedModules,
355
+ tokenParams,
356
+ emittedTokens,
311
357
  },
312
358
  );
313
359
 
@@ -316,10 +362,19 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
316
362
  }
317
363
 
318
364
  if (failures.length === 0) {
319
- // Success (possibly with warnings)
365
+ // Success (possibly with warnings). Persist per-parameter verified flags
366
+ // and the live-verification stamp into workflow.json so downstream
367
+ // (audit, teach summary) can see which tools shipped without a passing
368
+ // live call.
369
+ applyLiveVerification(absoluteToolDir, liveVerification);
370
+ const paramWarnings = applyParamVerification(absoluteToolDir, paramVerification);
371
+ const allWarnings = [...warnings, ...paramWarnings];
372
+ if (paramWarnings.length > 0) {
373
+ log(`parameter verification:\n${paramWarnings.join('\n')}`);
374
+ }
320
375
  message = result.doneSummary ?? 'Task completed';
321
- if (warnings.length > 0) {
322
- message += `\n\nWarnings:\n${warnings.join('\n')}`;
376
+ if (allWarnings.length > 0) {
377
+ message += `\n\nWarnings:\n${allWarnings.join('\n')}`;
323
378
  }
324
379
  if (!opts.keepTest) {
325
380
  for (const f of ['parser.test.ts', 'integration.test.ts']) {
@@ -345,9 +400,7 @@ ${failures.map((f) => `- ${f}`).join('\n')}
345
400
  Resume your work. Read the files you wrote (workflow.json, parser.ts, parser.test.ts), fix the issues, re-run tests, and call done again when fixed.`;
346
401
  }
347
402
 
348
- // 10. Persist conversation log
349
- mkdirSync(absoluteToolDir, { recursive: true });
350
- const conversationLogPath = pathJoin(absoluteToolDir, '.compile-log.json');
403
+ // 10. Final flush of the complete conversation log
351
404
  writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
352
405
 
353
406
  // 11. Return the result
@@ -387,18 +440,3 @@ function buildMessageFromOutcome(result: AgentResult): string {
387
440
  return 'Unknown outcome';
388
441
  }
389
442
  }
390
-
391
- function formatCandidateContext(
392
- candidate: ToolCandidate | undefined,
393
- sharedContext: SharedCompileContext | undefined,
394
- ): string {
395
- if (!candidate && !sharedContext) return '';
396
- return `
397
- Selected candidate context:
398
- ${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
399
-
400
- Shared compile context:
401
- ${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
402
-
403
- Compile only the selected candidate. Do not create tools for other actions in the recording.`;
404
- }