npm - imprint-mcp - Versions diffs - 0.2.0 → 0.3.0 - Mend

imprint-mcp 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

package/README.md +165 -201
package/examples/discoverandgo/README.md +1 -1
package/examples/echo/README.md +1 -1
package/examples/google-flights/README.md +28 -0
package/examples/google-flights/_shared/batchexecute.ts +63 -0
package/examples/google-flights/_shared/flights_request.ts +95 -0
package/examples/google-flights/_shared/package.json +9 -0
package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
package/examples/google-flights/get_flight_booking_details/package.json +9 -0
package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
package/examples/google-flights/lookup_airport/index.ts +101 -0
package/examples/google-flights/lookup_airport/package.json +9 -0
package/examples/google-flights/lookup_airport/parser.ts +66 -0
package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
package/examples/google-flights/lookup_airport/workflow.json +57 -0
package/examples/google-flights/search_flights/index.ts +219 -0
package/examples/google-flights/search_flights/package.json +9 -0
package/examples/google-flights/search_flights/parser.ts +169 -0
package/examples/google-flights/search_flights/playbook.yaml +184 -0
package/examples/google-flights/search_flights/request-transform.ts +119 -0
package/examples/google-flights/search_flights/workflow.json +143 -0
package/examples/google-hotels/README.md +29 -0
package/examples/google-hotels/_shared/batchexecute.ts +73 -0
package/examples/google-hotels/_shared/freq.ts +158 -0
package/examples/google-hotels/_shared/package.json +9 -0
package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
package/examples/google-hotels/search_hotels/index.ts +207 -0
package/examples/google-hotels/search_hotels/package.json +9 -0
package/examples/google-hotels/search_hotels/parser.ts +260 -0
package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
package/examples/google-hotels/search_hotels/workflow.json +127 -0
package/package.json +3 -2
package/prompts/audit-agent.md +71 -0
package/prompts/build-planning.md +74 -0
package/prompts/compile-agent.md +132 -28
package/prompts/prereq-builder.md +64 -0
package/prompts/prereq-planner.md +34 -0
package/prompts/tool-planning.md +39 -0
package/src/cli.ts +111 -4
package/src/imprint/agent.ts +5 -0
package/src/imprint/audit.ts +996 -0
package/src/imprint/backend-ladder.ts +1214 -184
package/src/imprint/build-plan.ts +1051 -0
package/src/imprint/cdp-browser-fetch.ts +589 -0
package/src/imprint/cdp-jar-cache.ts +320 -0
package/src/imprint/chromium.ts +135 -0
package/src/imprint/claude-cli-compile.ts +125 -25
package/src/imprint/codex-cli-compile.ts +26 -23
package/src/imprint/compile-agent-types.ts +38 -0
package/src/imprint/compile-agent.ts +65 -27
package/src/imprint/compile-tools.ts +1656 -64
package/src/imprint/compile.ts +14 -2
package/src/imprint/concurrency.ts +87 -0
package/src/imprint/credential-extract.ts +174 -25
package/src/imprint/cron.ts +1 -0
package/src/imprint/doctor.ts +39 -0
package/src/imprint/emit.ts +85 -0
package/src/imprint/freeform-redact.ts +5 -4
package/src/imprint/integrations.ts +2 -2
package/src/imprint/llm.ts +56 -8
package/src/imprint/mcp-compile-server.ts +43 -10
package/src/imprint/mcp-maintenance.ts +9 -101
package/src/imprint/mcp-server.ts +73 -7
package/src/imprint/multi-progress.ts +7 -2
package/src/imprint/param-grounding.ts +367 -0
package/src/imprint/paths.ts +29 -0
package/src/imprint/playbook-runner.ts +101 -40
package/src/imprint/prereq-builder.ts +651 -0
package/src/imprint/probe-backends.ts +6 -3
package/src/imprint/record.ts +10 -1
package/src/imprint/redact.ts +30 -2
package/src/imprint/replay-capture.ts +19 -18
package/src/imprint/runtime.ts +19 -10
package/src/imprint/sensitive-keys.ts +141 -7
package/src/imprint/session-diff.ts +79 -2
package/src/imprint/session-merge.ts +9 -5
package/src/imprint/stealth-chromium.ts +81 -0
package/src/imprint/stealth-fetch.ts +309 -29
package/src/imprint/stealth-token-cache.ts +88 -0
package/src/imprint/teach-plan.ts +251 -0
package/src/imprint/teach-state.ts +17 -0
package/src/imprint/teach.ts +582 -147
package/src/imprint/tool-candidates.ts +72 -14
package/src/imprint/tool-plan.ts +313 -0
package/src/imprint/tracing.ts +135 -6
package/src/imprint/types.ts +61 -3
package/examples/google-flights/search_google_flights/index.ts +0 -101
package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
package/examples/google-flights/search_google_flights/parser.ts +0 -189
package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
package/examples/google-flights/search_google_flights/workflow.json +0 -48
package/examples/google-hotels/search_google_hotels/index.ts +0 -194
package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97

package/src/imprint/claude-cli-compile.ts CHANGED Viewed

@@ -29,7 +29,9 @@ import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from '
 import { join as pathJoin } from 'node:path';
 import { type Span, context as otelContext } from '@opentelemetry/api';
 import type { OnDeadlineReached } from './agent.ts';
+import { type SharedModuleManifestEntry, resolvePlanSliceFromFile } from './build-plan.ts';
 import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
+import { formatCandidateContext, formatToolPlan } from './compile-agent-types.ts';
 import { preferredAgentModel } from './llm.ts';
 import { createLog } from './log.ts';
 import { COMPILE_SENTINELS } from './mcp-compile-server.ts';
@@ -39,6 +41,7 @@ import {
   llmSpanAttributes,
   setSpanAttributes,
   startTraceSpan,
+  totalPromptTokens,
   traceJsonInputOutputAttributes,
   traceLlmIoEnabled,
   traced,
@@ -52,6 +55,35 @@ const CLI_PATH = pathJoin(REPO_ROOT, 'src', 'cli.ts');
 const MCP_SERVER_NAME = 'imprint-compile';
 const MAX_VERIFICATION_CYCLES = 5;
+/**
+ * Thinking effort for the compile agent. Deliberately `high`, not `max`:
+ * empirically, max-effort thinking generates a large volume of reasoning tokens
+ * on reverse-engineering tasks, which measurably raises the model's usage-policy
+ * safety-filter false-positive rate. `high` keeps strong reasoning with far
+ * fewer spurious refusals. Passed as an explicit `--effort` flag so it overrides
+ * any CLAUDE_EFFORT inherited from the environment.
+ */
+const COMPILE_EFFORT_LEVEL = 'high';
+/**
+ * Signature of Claude Code's usage-policy safety refusal (surfaced in the
+ * terminal result event / our error message). The block is a transient,
+ * probabilistic false positive on legitimate compiles, so we retry a fresh
+ * session a few times before surfacing it as a hard failure.
+ */
+const USAGE_POLICY_REFUSAL =
+  /unable to respond to this request|appears to violate our Usage Policy/i;
+/** Total attempts (1 initial + retries) when a usage-policy refusal is hit. */
+const MAX_USAGE_POLICY_ATTEMPTS = 3;
+/** Exponential backoff with jitter between refusal retries. Spacing matters:
+ *  bursts of near-identical requests raise the safety-filter trip rate. */
+function usagePolicyBackoffMs(attempt: number): number {
+  const base = 5000 * 2 ** (attempt - 1); // 5s, 10s, ...
+  return base + Math.floor(Math.random() * base * 0.5);
+}
 interface CompileViaClaudeCliOptions {
   session: Session;
   absoluteToolDir: string;
@@ -67,6 +99,12 @@ interface CompileViaClaudeCliOptions {
   keepTest?: boolean;
   candidate?: ToolCandidate;
   sharedContext?: SharedCompileContext;
+  /** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
+  buildPlanPath?: string;
+  /** Shared-module build manifest for this site (verified flags). */
+  sharedModules?: SharedModuleManifestEntry[];
+  /** Per-tool implementation plan injected into the agent's initial message. */
+  toolPlan?: string;
 }
 interface StreamJsonEvent {
@@ -124,7 +162,14 @@ export async function compileViaClaudeCli(
         ...llmSpanAttributes({
           provider: 'claude-cli',
           model: preferredAgentModel('claude-cli'),
-          inputTokens: result.inputTokens,
+          // TOTAL prompt (uncached + cache); the cache split is passed separately
+          // for cost. `result.inputTokens` alone is the uncached delta (often a
+          // few hundred), which would mislabel `llm.token_count.prompt`.
+          inputTokens: totalPromptTokens(
+            result.inputTokens,
+            result.cacheReadInputTokens,
+            result.cacheCreationInputTokens,
+          ),
           outputTokens: result.outputTokens,
           cacheReadTokens: result.cacheReadInputTokens,
           cacheWriteTokens: result.cacheCreationInputTokens,
@@ -135,9 +180,41 @@ export async function compileViaClaudeCli(
   );
 }
+/**
+ * Drives the compile, retrying a fresh claude-cli session when an attempt is
+ * blocked by the usage-policy safety filter. The block is a flaky false positive
+ * (see USAGE_POLICY_REFUSAL); a re-roll almost always succeeds. All other
+ * outcomes (success, give_up, verification failure, timeout) return immediately.
+ */
 async function compileViaClaudeCliImpl(
   opts: CompileViaClaudeCliOptions,
 ): Promise<CompileAgentResult> {
+  let lastResult: CompileAgentResult | undefined;
+  for (let attempt = 1; attempt <= MAX_USAGE_POLICY_ATTEMPTS; attempt++) {
+    const result = await runClaudeCliAttempt(opts);
+    const isRefusal = !result.success && USAGE_POLICY_REFUSAL.test(result.message ?? '');
+    if (!isRefusal) return result;
+    lastResult = result;
+    if (attempt < MAX_USAGE_POLICY_ATTEMPTS) {
+      const backoffMs = usagePolicyBackoffMs(attempt);
+      log(
+        `usage-policy refusal on attempt ${attempt}/${MAX_USAGE_POLICY_ATTEMPTS}; ` +
+          `retrying a fresh session in ${Math.round(backoffMs / 1000)}s`,
+      );
+      await new Promise((resolve) => setTimeout(resolve, backoffMs));
+    }
+  }
+  // Every attempt was blocked. Annotate the final error so the operator knows
+  // it was the (flaky) safety filter, not their recording or workflow.
+  const exhausted = lastResult as CompileAgentResult;
+  return {
+    ...exhausted,
+    message: `${exhausted.message}\n\nBlocked by the model's usage-policy safety filter on all ${MAX_USAGE_POLICY_ATTEMPTS} attempts. This is typically a transient false positive on reverse-engineering compiles — re-run this tool, or compile it with a different provider (e.g. codex-cli).`,
+  };
+}
+async function runClaudeCliAttempt(opts: CompileViaClaudeCliOptions): Promise<CompileAgentResult> {
   // Ensure tool dir exists and clear any prior sentinels — a stale
   // sentinel from a previous run would short-circuit our success detection.
   mkdirSync(opts.absoluteToolDir, { recursive: true });
@@ -175,17 +252,27 @@ async function compileViaClaudeCliImpl(
           ...(opts.sharedContext
             ? ['--shared-context-json', JSON.stringify(opts.sharedContext)]
             : []),
+          ...(opts.buildPlanPath ? ['--build-plan-path', opts.buildPlanPath] : []),
+          ...(opts.sharedModules
+            ? ['--shared-modules-json', JSON.stringify(opts.sharedModules)]
+            : []),
         ],
       },
     },
   };
+  const { assignedSharedModules } = resolvePlanSliceFromFile(
+    opts.buildPlanPath,
+    opts.candidate?.toolName,
+    opts.sharedModules,
+  );
   const initialPrompt = `A new compile task is starting.
 Session path: ${sessionPathAbs}
 Tool directory: ${opts.absoluteToolDir}
 You will write artifacts into the tool directory.
-${formatCandidateContext(opts.candidate, opts.sharedContext)}
+${formatCandidateContext(opts.candidate, opts.sharedContext, assignedSharedModules)}
+${formatToolPlan(opts.toolPlan)}
 Begin by calling read_session_summary to orient yourself, then proceed per the system prompt.`;
@@ -199,6 +286,8 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
     JSON.stringify(mcpConfig),
     '--system-prompt-file',
     opts.systemPromptPath,
+    '--append-system-prompt',
+    `Today's date is ${new Date().toISOString().slice(0, 10)}.`,
     // Disable the built-in tool set so claude only uses our MCP tools.
     '--tools',
     '',
@@ -221,6 +310,8 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
     '--allowedTools',
     `mcp__${MCP_SERVER_NAME}__run_tests`,
     '--allowedTools',
+    `mcp__${MCP_SERVER_NAME}__read_build_plan`,
+    '--allowedTools',
     `mcp__${MCP_SERVER_NAME}__done`,
     '--allowedTools',
     `mcp__${MCP_SERVER_NAME}__give_up`,
@@ -234,6 +325,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
     'bypassPermissions',
     '--no-session-persistence',
     '--disable-slash-commands',
+    // Cap thinking effort below `max` to reduce usage-policy false positives.
+    '--effort',
+    COMPILE_EFFORT_LEVEL,
     '--model',
     preferredAgentModel('claude-cli'),
     initialPrompt,
@@ -245,7 +339,26 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
   try {
     child = spawn('claude', args, {
       cwd: REPO_ROOT,
-      env: process.env,
+      // Claude CLI's default MCP_TOOL_TIMEOUT is 60s. The compile MCP
+      // server's `done` tool runs external verification inline — bun test
+      // (up to 60s × 3 retries for the integration suite + 120s for the
+      // parser suite) plus typechecking. On bot-protected sites where the
+      // integration test escalates fetch → fetch-bootstrap → stealth-fetch
+      // for every assertion, a single bun test pass can run 30s × 3
+      // rungs × N tests = 10-15 min before the outer wrapper kills it,
+      // and 3 retries push the total well past 30 min. A 10-min cap was
+      // not enough — set 30 min so the worst-case verification can
+      // actually complete and the agent receives the failure feedback
+      // (and ships with `liveVerified: false` via the waiver path)
+      // rather than getting `-32000: Connection closed` mid-call and
+      // wasting the rest of its turn budget. Honor user-set env so an
+      // operator on a fast network can tighten without editing source.
+      // Connection-startup timeout stays at 60s for cold Playwright boot.
+      env: {
+        ...process.env,
+        MCP_TOOL_TIMEOUT: process.env.MCP_TOOL_TIMEOUT ?? '1800000',
+        MCP_TIMEOUT: process.env.MCP_TIMEOUT ?? '60000',
+      },
       stdio: ['ignore', 'pipe', 'pipe'],
     });
   } catch (err) {
@@ -267,6 +380,12 @@ async function driveStreamJson(
   const parentCtx = otelContext.active();
   const conversationLog: unknown[] = [];
+  const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
+  const flushLog = (): void => {
+    try {
+      writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
+    } catch {}
+  };
   const captureLlmIo = traceLlmIoEnabled();
   let inputTokens = 0;
   let outputTokens = 0;
@@ -376,6 +495,7 @@ async function driveStreamJson(
             });
             endTraceSpan(currentTurnSpan);
           }
+          flushLog();
           turn++;
           turnInputTokens = 0;
           turnOutputTokens = 0;
@@ -460,13 +580,8 @@ async function driveStreamJson(
     log(`unflushed stdout tail (${stdoutBuf.length} bytes) discarded`);
   }
-  // Persist conversation log for post-mortem.
-  const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
-  try {
-    writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
-  } catch (err) {
-    log(`failed to persist conversation log: ${errMsg(err)}`);
-  }
+  // Final flush of the complete conversation log.
+  flushLog();
   // Inspect sentinels to determine outcome.
   const doneSentinel = pathJoin(opts.absoluteToolDir, COMPILE_SENTINELS.done);
@@ -620,21 +735,6 @@ function finalErrorResult(opts: CompileViaClaudeCliOptions, message: string): Co
   };
 }
-function formatCandidateContext(
-  candidate: ToolCandidate | undefined,
-  sharedContext: SharedCompileContext | undefined,
-): string {
-  if (!candidate && !sharedContext) return '';
-  return `
-Selected candidate context:
-${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
-Shared compile context:
-${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
-Compile only the selected candidate. Do not create tools for other actions in the recording.`;
-}
 function errMsg(err: unknown): string {
   return err instanceof Error ? err.message : String(err);
 }

package/src/imprint/codex-cli-compile.ts CHANGED Viewed

@@ -11,7 +11,9 @@ import { type ChildProcess, spawn } from 'node:child_process';
 import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs';
 import { isAbsolute as pathIsAbsolute, join as pathJoin } from 'node:path';
 import { type Span, context as otelContext } from '@opentelemetry/api';
+import { type SharedModuleManifestEntry, resolvePlanSliceFromFile } from './build-plan.ts';
 import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
+import { formatCandidateContext, formatToolPlan } from './compile-agent-types.ts';
 import { preferredAgentModel } from './llm.ts';
 import { createLog } from './log.ts';
 import { COMPILE_SENTINELS } from './mcp-compile-server.ts';
@@ -48,6 +50,12 @@ interface CompileViaCodexCliOptions {
   keepTest?: boolean;
   candidate?: ToolCandidate;
   sharedContext?: SharedCompileContext;
+  /** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
+  buildPlanPath?: string;
+  /** Shared-module build manifest for this site (verified flags). */
+  sharedModules?: SharedModuleManifestEntry[];
+  /** Per-tool implementation plan injected into the agent's initial message. */
+  toolPlan?: string;
 }
 interface CodexJsonEvent {
@@ -141,15 +149,22 @@ async function compileViaCodexCliImpl(
     opts.absoluteToolDir,
     ...(opts.candidate ? ['--candidate-json', JSON.stringify(opts.candidate)] : []),
     ...(opts.sharedContext ? ['--shared-context-json', JSON.stringify(opts.sharedContext)] : []),
+    ...(opts.buildPlanPath ? ['--build-plan-path', opts.buildPlanPath] : []),
+    ...(opts.sharedModules ? ['--shared-modules-json', JSON.stringify(opts.sharedModules)] : []),
   ];
   let systemPrompt: string;
   try {
-    systemPrompt = readFileSync(opts.systemPromptPath, 'utf8');
+    systemPrompt = `${readFileSync(opts.systemPromptPath, 'utf8')}\n\nToday's date is ${new Date().toISOString().slice(0, 10)}.`;
   } catch (err) {
     return finalErrorResult(opts, `failed to read system prompt: ${errMsg(err)}`);
   }
+  const { assignedSharedModules } = resolvePlanSliceFromFile(
+    opts.buildPlanPath,
+    opts.candidate?.toolName,
+    opts.sharedModules,
+  );
   const initialPrompt = `<system_instructions>
 ${systemPrompt}
 </system_instructions>
@@ -159,7 +174,8 @@ A new compile task is starting.
 Session path: ${sessionPathAbs}
 Tool directory: ${opts.absoluteToolDir}
 You will write artifacts into the tool directory.
-${formatCandidateContext(opts.candidate, opts.sharedContext)}
+${formatCandidateContext(opts.candidate, opts.sharedContext, assignedSharedModules)}
+${formatToolPlan(opts.toolPlan)}
 Use the imprint-compile MCP tools to inspect the session, write artifacts, run tests, and call done(). Begin by calling read_session_summary, then proceed per the system instructions.`;
@@ -277,6 +293,12 @@ async function driveJsonl(
   const parentCtx = otelContext.active();
   const conversationLog: unknown[] = [];
+  const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
+  const flushLog = (): void => {
+    try {
+      writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
+    } catch {}
+  };
   let inputTokens = 0;
   let outputTokens = 0;
   let turn = 0;
@@ -357,6 +379,7 @@ async function driveJsonl(
         if (evt.type === 'turn.started') {
           if (currentTurnSpan) endTraceSpan(currentTurnSpan);
+          flushLog();
           turn++;
           currentTurnSpan = startTraceSpan(`agent.turn.${turn}`, 'CHAIN', {
             'imprint.agent.turn': turn,
@@ -438,12 +461,7 @@ async function driveJsonl(
     log(`unflushed stdout tail (${stdoutBuf.length} bytes) discarded`);
   }
-  const conversationLogPath = pathJoin(opts.absoluteToolDir, '.compile-log.json');
-  try {
-    writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
-  } catch (err) {
-    log(`failed to persist conversation log: ${errMsg(err)}`);
-  }
+  flushLog();
   const workflowPath = pathJoin(opts.absoluteToolDir, 'workflow.json');
   const parserPath = pathJoin(opts.absoluteToolDir, 'parser.ts');
@@ -692,21 +710,6 @@ function finalErrorResult(opts: CompileViaCodexCliOptions, message: string): Com
   };
 }
-function formatCandidateContext(
-  candidate: ToolCandidate | undefined,
-  sharedContext: SharedCompileContext | undefined,
-): string {
-  if (!candidate && !sharedContext) return '';
-  return `
-Selected candidate context:
-${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
-Shared compile context:
-${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
-Compile only the selected candidate. Do not create tools for other actions in the recording.`;
-}
 function errMsg(err: unknown): string {
   return err instanceof Error ? err.message : String(err);
 }

package/src/imprint/compile-agent-types.ts CHANGED Viewed

@@ -7,6 +7,44 @@
  */
 import type { AgentProgress } from './agent.ts';
+import { type AssignedSharedModule, describeAssignedModules } from './build-plan.ts';
+import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
+/** Render a per-tool implementation plan (param→field mapping, request
+ *  construction, response parsing, shared-module imports, edge cases) into an
+ *  initial-message section the compile agent must follow. Shared verbatim by the
+ *  in-process loop and both CLI drivers. Generic — carries no site-specific
+ *  content; the plan itself is derived per-tool from the recording. */
+export function formatToolPlan(toolPlan: string | undefined): string {
+  const plan = toolPlan?.trim();
+  if (!plan) return '';
+  return `
+IMPLEMENTATION PLAN — a planning pass analyzed the recording for THIS tool and produced the plan below. Follow it. It maps each parameter to its recorded field, specifies how to construct the request(s) and parse the response, and names the shared modules to import. Deviate only where the recorded data plainly contradicts the plan; if you do, note the correction in a brief code comment.
+${plan}`;
+}
+/** Render the selected candidate + shared compile context (and any assigned
+ *  shared modules) into the compile agent's initial message. Shared verbatim by
+ *  the in-process loop and both CLI drivers. */
+export function formatCandidateContext(
+  candidate: ToolCandidate | undefined,
+  sharedContext: SharedCompileContext | undefined,
+  assignedSharedModules?: AssignedSharedModule[],
+): string {
+  if (!candidate && !sharedContext) return '';
+  return `
+Selected candidate context:
+${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
+Shared compile context:
+${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
+Compile only the selected candidate. Do not create tools for other actions in the recording.${
+    assignedSharedModules ? describeAssignedModules(assignedSharedModules) : ''
+  }`;
+}
 export interface CompileAgentProgress extends AgentProgress {
   /** 1-based verification cycle. Cycle 1 is the initial agent run. Subsequent cycles

package/src/imprint/compile-agent.ts CHANGED Viewed

@@ -16,10 +16,17 @@ import {
   giveUpTool,
   runAgentLoop,
 } from './agent.ts';
+import { type SharedModuleManifestEntry, resolvePlanSliceFromFile } from './build-plan.ts';
 import { compileViaClaudeCli } from './claude-cli-compile.ts';
 import { compileViaCodexCli } from './codex-cli-compile.ts';
 import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
-import { buildCompileTools, externalVerification } from './compile-tools.ts';
+import { formatCandidateContext, formatToolPlan } from './compile-agent-types.ts';
+import {
+  applyLiveVerification,
+  applyParamVerification,
+  buildCompileTools,
+  externalVerification,
+} from './compile-tools.ts';
 import { type Replacement, extractCredentials } from './credential-extract.ts';
 import {
   type LLMOptions,
@@ -53,7 +60,7 @@ export function resolveCompileAgentModel(provider: ProviderName): string {
 interface CompileAgentOptions {
   /** Path to the recorded session JSON (absolute or relative). */
   sessionPath: string;
-  /** Hard wall-clock budget. Default 10 minutes. */
+  /** Hard wall-clock budget. Default 20 minutes. */
   maxDurationMs?: number;
   /** Override LLM config (region, model, project). */
   llmConfig?: LLMOptions;
@@ -84,12 +91,28 @@ interface CompileAgentOptions {
   classifications?: ClassifiedValue[];
   /** Credential values extracted during teach, passed to integration tests via env var. */
   teachCredentials?: { site: string; values: Record<string, string> };
+  /** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
+  buildPlanPath?: string;
+  /** Shared-module build manifest for this site (verified flags). */
+  sharedModules?: SharedModuleManifestEntry[];
   /** Called when wall-clock deadline is reached; return ms to extend or null to time out. */
   onDeadlineReached?: OnDeadlineReached;
+  /** Per-tool implementation plan (param→field mapping, request construction,
+   *  response parsing, shared-module imports). Injected into the agent's initial
+   *  message so the compile follows it. Generic — not tied to any site. */
+  toolPlan?: string;
 }
 export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAgentResult> {
   const startTime = Date.now();
+  // Resolve the shared modules + token contracts the plan assigned this tool, so
+  // the in-process verifier can assert modules are imported and require a chained
+  // test for each producer-sourced token param.
+  const { assignedSharedModules, tokenParams, emittedTokens } = resolvePlanSliceFromFile(
+    opts.buildPlanPath,
+    opts.candidate?.toolName,
+    opts.sharedModules,
+  );
   // 1. Load + validate the session
   let session: Session = loadJsonFile(
@@ -169,7 +192,7 @@ export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAg
       `System prompt not found at ${systemPromptPath}\n→ this is an Imprint installation problem; please file an issue at https://github.com/ashaychangwani/imprint/issues with the steps you ran.`,
     );
   }
-  const systemPrompt = readFileSync(systemPromptPath, 'utf8');
+  const systemPrompt = `${readFileSync(systemPromptPath, 'utf8')}\n\nToday's date is ${new Date().toISOString().slice(0, 10)}.`;
   // 5. Build the toolset (shared with the MCP server used by the claude-cli path)
   const sessionPathAbs = opts.sessionPath.startsWith('/')
@@ -181,6 +204,8 @@ export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAg
       sharedContext: opts.sharedContext,
       classifications: opts.classifications,
       teachCredentials: opts.teachCredentials,
+      buildPlanPath: opts.buildPlanPath,
+      sharedModules: opts.sharedModules,
     }),
     doneTool(),
     giveUpTool(),
@@ -192,12 +217,13 @@ export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAg
 Session path: ${sessionPathAbs}
 Tool directory: ${absoluteToolDir}
 You will write artifacts into the tool directory.
-${formatCandidateContext(opts.candidate, opts.sharedContext)}
+${formatCandidateContext(opts.candidate, opts.sharedContext, assignedSharedModules)}
+${formatToolPlan(opts.toolPlan)}
 Begin by calling read_session_summary to orient yourself, then proceed per the system prompt.`;
   // 7. Compute deadline
-  const deadlineMs = Date.now() + (opts.maxDurationMs ?? 10 * 60 * 1000);
+  const deadlineMs = Date.now() + (opts.maxDurationMs ?? 20 * 60 * 1000);
   // 8. Instantiate provider (or use injected one for testing).
   //    CLI providers take a different path: they don't implement Anthropic
@@ -221,6 +247,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
         keepTest: opts.keepTest,
         candidate: opts.candidate,
         sharedContext: opts.sharedContext,
+        buildPlanPath: opts.buildPlanPath,
+        sharedModules: opts.sharedModules,
+        toolPlan: opts.toolPlan,
       });
     }
     if (resolvedProvider.name === 'codex-cli') {
@@ -235,6 +264,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
         keepTest: opts.keepTest,
         candidate: opts.candidate,
         sharedContext: opts.sharedContext,
+        buildPlanPath: opts.buildPlanPath,
+        sharedModules: opts.sharedModules,
+        toolPlan: opts.toolPlan,
       });
     }
     if (!isToolUseProvider(resolvedProvider)) {
@@ -249,6 +281,9 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
   }
   // 9. Run the agent loop with verification sub-loop
+  mkdirSync(absoluteToolDir, { recursive: true });
+  const conversationLogPath = pathJoin(absoluteToolDir, '.compile-log.json');
   let totalTurns = 0;
   let totalInputTokens = 0;
   let totalOutputTokens = 0;
@@ -283,6 +318,10 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
       deadlineMs,
       llm: provider,
       onProgress: wrappedOnProgress,
+      onConversationUpdate: (currentCycleLog) => {
+        const fullLog = [...conversationLog, ...currentCycleLog];
+        writeFileSync(conversationLogPath, JSON.stringify(fullLog, null, 2), 'utf8');
+      },
       onDeadlineReached: opts.onDeadlineReached,
     });
@@ -300,7 +339,7 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
     }
     // Perform external verification
-    const { failures, warnings } = await externalVerification(
+    const { failures, warnings, paramVerification, liveVerification } = await externalVerification(
       absoluteToolDir,
       session,
       sessionPathAbs,
@@ -308,6 +347,13 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
         expectedToolName: opts.candidate?.toolName,
         likelyParams: opts.candidate?.likelyParams,
         candidateRequestSeqs: opts.candidate?.requestSeqs,
+        // Widen Fix B's variation pool to the dependency requests (e.g. a
+        // bootstrap GET) so a session token that varies only across dependency
+        // seqs and is then frozen as a literal in the tool's request is caught.
+        dependencyRequestSeqs: opts.candidate?.dependencySeqs,
+        assignedSharedModules,
+        tokenParams,
+        emittedTokens,
       },
     );
@@ -316,10 +362,19 @@ Begin by calling read_session_summary to orient yourself, then proceed per the s
     }
     if (failures.length === 0) {
-      // Success (possibly with warnings)
+      // Success (possibly with warnings). Persist per-parameter verified flags
+      // and the live-verification stamp into workflow.json so downstream
+      // (audit, teach summary) can see which tools shipped without a passing
+      // live call.
+      applyLiveVerification(absoluteToolDir, liveVerification);
+      const paramWarnings = applyParamVerification(absoluteToolDir, paramVerification);
+      const allWarnings = [...warnings, ...paramWarnings];
+      if (paramWarnings.length > 0) {
+        log(`parameter verification:\n${paramWarnings.join('\n')}`);
+      }
       message = result.doneSummary ?? 'Task completed';
-      if (warnings.length > 0) {
-        message += `\n\nWarnings:\n${warnings.join('\n')}`;
+      if (allWarnings.length > 0) {
+        message += `\n\nWarnings:\n${allWarnings.join('\n')}`;
       }
       if (!opts.keepTest) {
         for (const f of ['parser.test.ts', 'integration.test.ts']) {
@@ -345,9 +400,7 @@ ${failures.map((f) => `- ${f}`).join('\n')}
 Resume your work. Read the files you wrote (workflow.json, parser.ts, parser.test.ts), fix the issues, re-run tests, and call done again when fixed.`;
   }
-  // 10. Persist conversation log
-  mkdirSync(absoluteToolDir, { recursive: true });
-  const conversationLogPath = pathJoin(absoluteToolDir, '.compile-log.json');
+  // 10. Final flush of the complete conversation log
   writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
   // 11. Return the result
@@ -387,18 +440,3 @@ function buildMessageFromOutcome(result: AgentResult): string {
       return 'Unknown outcome';
   }
 }
-function formatCandidateContext(
-  candidate: ToolCandidate | undefined,
-  sharedContext: SharedCompileContext | undefined,
-): string {
-  if (!candidate && !sharedContext) return '';
-  return `
-Selected candidate context:
-${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
-Shared compile context:
-${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
-Compile only the selected candidate. Do not create tools for other actions in the recording.`;
-}