imprint-mcp 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +165 -201
  2. package/examples/discoverandgo/README.md +1 -1
  3. package/examples/echo/README.md +1 -1
  4. package/examples/google-flights/README.md +28 -0
  5. package/examples/google-flights/_shared/batchexecute.ts +63 -0
  6. package/examples/google-flights/_shared/flights_request.ts +95 -0
  7. package/examples/google-flights/_shared/package.json +9 -0
  8. package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
  9. package/examples/google-flights/get_flight_booking_details/package.json +9 -0
  10. package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
  11. package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
  12. package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
  13. package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
  14. package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
  15. package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
  16. package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
  17. package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
  18. package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
  19. package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
  20. package/examples/google-flights/lookup_airport/index.ts +101 -0
  21. package/examples/google-flights/lookup_airport/package.json +9 -0
  22. package/examples/google-flights/lookup_airport/parser.ts +66 -0
  23. package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
  24. package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
  25. package/examples/google-flights/lookup_airport/workflow.json +57 -0
  26. package/examples/google-flights/search_flights/index.ts +219 -0
  27. package/examples/google-flights/search_flights/package.json +9 -0
  28. package/examples/google-flights/search_flights/parser.ts +169 -0
  29. package/examples/google-flights/search_flights/playbook.yaml +184 -0
  30. package/examples/google-flights/search_flights/request-transform.ts +119 -0
  31. package/examples/google-flights/search_flights/workflow.json +143 -0
  32. package/examples/google-hotels/README.md +29 -0
  33. package/examples/google-hotels/_shared/batchexecute.ts +73 -0
  34. package/examples/google-hotels/_shared/freq.ts +158 -0
  35. package/examples/google-hotels/_shared/package.json +9 -0
  36. package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
  37. package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
  38. package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
  39. package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
  40. package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
  41. package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
  42. package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
  43. package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
  44. package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
  45. package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
  46. package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
  47. package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
  48. package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
  49. package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
  50. package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
  51. package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
  52. package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
  53. package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
  54. package/examples/google-hotels/search_hotels/index.ts +207 -0
  55. package/examples/google-hotels/search_hotels/package.json +9 -0
  56. package/examples/google-hotels/search_hotels/parser.ts +260 -0
  57. package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
  58. package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
  59. package/examples/google-hotels/search_hotels/workflow.json +127 -0
  60. package/package.json +3 -2
  61. package/prompts/audit-agent.md +71 -0
  62. package/prompts/build-planning.md +74 -0
  63. package/prompts/compile-agent.md +131 -27
  64. package/prompts/prereq-builder.md +64 -0
  65. package/prompts/prereq-planner.md +34 -0
  66. package/prompts/tool-planning.md +39 -0
  67. package/src/cli.ts +109 -2
  68. package/src/imprint/agent.ts +5 -0
  69. package/src/imprint/audit.ts +996 -0
  70. package/src/imprint/backend-ladder.ts +1214 -184
  71. package/src/imprint/build-plan.ts +1051 -0
  72. package/src/imprint/cdp-browser-fetch.ts +589 -0
  73. package/src/imprint/cdp-jar-cache.ts +320 -0
  74. package/src/imprint/chromium.ts +135 -0
  75. package/src/imprint/claude-cli-compile.ts +125 -25
  76. package/src/imprint/codex-cli-compile.ts +26 -23
  77. package/src/imprint/compile-agent-types.ts +38 -0
  78. package/src/imprint/compile-agent.ts +63 -25
  79. package/src/imprint/compile-tools.ts +1656 -64
  80. package/src/imprint/compile.ts +13 -1
  81. package/src/imprint/concurrency.ts +87 -0
  82. package/src/imprint/cron.ts +1 -0
  83. package/src/imprint/doctor.ts +39 -0
  84. package/src/imprint/freeform-redact.ts +5 -4
  85. package/src/imprint/integrations.ts +2 -2
  86. package/src/imprint/llm.ts +56 -8
  87. package/src/imprint/mcp-compile-server.ts +43 -10
  88. package/src/imprint/mcp-maintenance.ts +9 -101
  89. package/src/imprint/mcp-server.ts +73 -7
  90. package/src/imprint/multi-progress.ts +7 -2
  91. package/src/imprint/param-grounding.ts +367 -0
  92. package/src/imprint/paths.ts +29 -0
  93. package/src/imprint/playbook-runner.ts +101 -40
  94. package/src/imprint/prereq-builder.ts +651 -0
  95. package/src/imprint/probe-backends.ts +6 -3
  96. package/src/imprint/record.ts +10 -1
  97. package/src/imprint/redact.ts +30 -2
  98. package/src/imprint/replay-capture.ts +19 -18
  99. package/src/imprint/runtime.ts +19 -10
  100. package/src/imprint/session-diff.ts +79 -2
  101. package/src/imprint/session-merge.ts +9 -5
  102. package/src/imprint/stealth-chromium.ts +81 -0
  103. package/src/imprint/stealth-fetch.ts +309 -29
  104. package/src/imprint/stealth-token-cache.ts +88 -0
  105. package/src/imprint/teach-plan.ts +251 -0
  106. package/src/imprint/teach-state.ts +10 -0
  107. package/src/imprint/teach.ts +456 -142
  108. package/src/imprint/tool-candidates.ts +72 -14
  109. package/src/imprint/tool-plan.ts +313 -0
  110. package/src/imprint/tracing.ts +135 -6
  111. package/src/imprint/types.ts +61 -3
  112. package/examples/google-flights/search_google_flights/index.ts +0 -101
  113. package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
  114. package/examples/google-flights/search_google_flights/parser.ts +0 -189
  115. package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
  116. package/examples/google-flights/search_google_flights/workflow.json +0 -48
  117. package/examples/google-hotels/search_google_hotels/index.ts +0 -194
  118. package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
  119. package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
  120. package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
  121. package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
  122. package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
  123. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
  124. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
  125. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
  126. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
@@ -19,6 +19,7 @@ import {
19
19
  import { dirname, join as pathJoin } from 'node:path';
20
20
  import type { OnDeadlineReached } from './agent.ts';
21
21
  import { inferAppApiHosts } from './app-api-hosts.ts';
22
+ import type { SharedModuleManifestEntry } from './build-plan.ts';
22
23
  import { type CompileAgentProgress, compileAgent } from './compile-agent.ts';
23
24
  import { isSameRegistrableDomain, registrableDomain } from './etld.ts';
24
25
  import { type LLMOptions, extractJsonArray, resolveProvider } from './llm.ts';
@@ -82,6 +83,14 @@ interface GenerateOptions extends CompileOptions {
82
83
  classifications?: ClassifiedValue[];
83
84
  /** Credential values extracted during teach, passed to integration tests via env var. */
84
85
  teachCredentials?: { site: string; values: Record<string, string> };
86
+ /** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
87
+ buildPlanPath?: string;
88
+ /** Shared-module build manifest for this site (verified flags). */
89
+ sharedModules?: SharedModuleManifestEntry[];
90
+ /** Per-tool implementation plan (param→field mapping, request construction,
91
+ * response parsing, shared-module imports). Injected into the agent's initial
92
+ * message so the compile follows it. */
93
+ toolPlan?: string;
85
94
  }
86
95
 
87
96
  interface GenerateResult {
@@ -122,6 +131,9 @@ export async function generate(opts: GenerateOptions): Promise<GenerateResult> {
122
131
  sharedContext: opts.sharedContext,
123
132
  classifications: opts.classifications,
124
133
  teachCredentials: opts.teachCredentials,
134
+ buildPlanPath: opts.buildPlanPath,
135
+ sharedModules: opts.sharedModules,
136
+ toolPlan: opts.toolPlan,
125
137
  });
126
138
 
127
139
  setSpanAttributes(span, {
@@ -274,7 +286,7 @@ const TRIAGE_RESOURCE_TYPES = new Set(['XHR', 'Fetch', 'Document']);
274
286
  const HEADER_TRUNCATE_LIMIT = 200;
275
287
  // Per-request body cap for triage. Triage only needs enough body to distinguish
276
288
  // data-bearing POSTs (search/booking) from telemetry; full bodies on a busy
277
- // site can total >1MB and blow the 200K-token cap on `claude-opus-4-7`.
289
+ // site can total >1MB and blow the 200K-token cap on `claude-opus-4-8`.
278
290
  const TRIAGE_BODY_LIMIT = 500;
279
291
 
280
292
  export interface TriageResult {
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Bounded-concurrency fan-out helpers shared across the teach pipeline.
3
+ *
4
+ * Lives in its own module (rather than teach.ts) so leaf modules like
5
+ * teach-plan.ts can reuse it without importing teach.ts, which would create an
6
+ * import cycle (teach.ts → teach-plan.ts → teach.ts). teach.ts re-exports both
7
+ * for backwards compatibility with existing callers + tests.
8
+ */
9
+
10
+ /** Run `fn` over `items` with at most `concurrency` in flight, preserving input
11
+ * order in the result. Throws the first error encountered (after in-flight work
12
+ * settles); use mapLimitSettled when you need per-item success/failure. */
13
+ export async function mapLimit<T, R>(
14
+ items: T[],
15
+ concurrency: number,
16
+ fn: (item: T) => Promise<R>,
17
+ ): Promise<R[]> {
18
+ const results = new Array<R>(items.length);
19
+ let next = 0;
20
+ let firstError: unknown;
21
+ const workers = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
22
+ while (next < items.length && firstError === undefined) {
23
+ const index = next++;
24
+ const item = items[index];
25
+ if (item === undefined) continue;
26
+ try {
27
+ results[index] = await fn(item);
28
+ } catch (err) {
29
+ firstError ??= err;
30
+ }
31
+ }
32
+ });
33
+ await Promise.allSettled(workers);
34
+ if (firstError !== undefined) throw firstError;
35
+ return results;
36
+ }
37
+
38
+ type SettledResult<R> = { ok: true; value: R } | { ok: false; error: unknown };
39
+
40
+ /** Like mapLimit, but never throws: each item resolves to a tagged
41
+ * success/failure entry, preserving input order. */
42
+ export async function mapLimitSettled<T, R>(
43
+ items: T[],
44
+ concurrency: number,
45
+ fn: (item: T) => Promise<R>,
46
+ ): Promise<SettledResult<R>[]> {
47
+ const results = new Array<SettledResult<R>>(items.length);
48
+ let next = 0;
49
+ const workers = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
50
+ while (next < items.length) {
51
+ const index = next++;
52
+ const item = items[index];
53
+ if (item === undefined) continue;
54
+ try {
55
+ results[index] = { ok: true, value: await fn(item) };
56
+ } catch (err) {
57
+ results[index] = { ok: false, error: err };
58
+ }
59
+ }
60
+ });
61
+ await Promise.allSettled(workers);
62
+ return results;
63
+ }
64
+
65
+ /** Error thrown by withTimeout when the deadline elapses before the work settles.
66
+ * A distinct class lets callers tell a timeout apart from a genuine failure. */
67
+ export class TimeoutError extends Error {
68
+ constructor(label: string, ms: number) {
69
+ super(`${label} exceeded ${Math.round(ms / 1000)}s timeout`);
70
+ this.name = 'TimeoutError';
71
+ }
72
+ }
73
+
74
+ /** Race a promise against a timeout. The underlying work (e.g. a CLI child) is
75
+ * NOT cancelled — the caller just stops awaiting it and decides how to degrade.
76
+ * Throws TimeoutError on timeout. */
77
+ export async function withTimeout<T>(work: Promise<T>, ms: number, label: string): Promise<T> {
78
+ let timer: ReturnType<typeof setTimeout> | undefined;
79
+ const timeout = new Promise<never>((_, reject) => {
80
+ timer = setTimeout(() => reject(new TimeoutError(label, ms)), ms);
81
+ });
82
+ try {
83
+ return await Promise.race([work, timeout]);
84
+ } finally {
85
+ if (timer) clearTimeout(timer);
86
+ }
87
+ }
@@ -242,6 +242,7 @@ async function runCronImpl(opts: RunCronOptions): Promise<void> {
242
242
  if (
243
243
  ladder.includes('fetch') ||
244
244
  ladder.includes('fetch-bootstrap') ||
245
+ ladder.includes('cdp-replay') ||
245
246
  ladder.includes('stealth-fetch')
246
247
  ) {
247
248
  const validator = buildZodValidator(tool.workflow.parameters);
@@ -1,6 +1,7 @@
1
1
  /** `imprint doctor` — check that the environment can actually run imprint.
2
2
  * Reports pass/fail per prerequisite plus a one-line fix when failed. */
3
3
 
4
+ import { spawnSync } from 'node:child_process';
4
5
  import { existsSync, readFileSync, readdirSync } from 'node:fs';
5
6
  import { homedir } from 'node:os';
6
7
  import { join as pathJoin } from 'node:path';
@@ -20,6 +21,7 @@ export function doctor(): CheckResult[] {
20
21
  checkBun(),
21
22
  checkChromium(),
22
23
  checkPlaywrightChromium(),
24
+ checkVirtualDisplay(),
23
25
  checkLLMProvider(),
24
26
  checkPushOptional(),
25
27
  checkClaudeCode(),
@@ -87,6 +89,43 @@ function checkPlaywrightChromium(): CheckResult {
87
89
  };
88
90
  }
89
91
 
92
+ function hasXvfbBinary(): boolean {
93
+ try {
94
+ return spawnSync('sh', ['-c', 'command -v Xvfb'], { stdio: 'ignore' }).status === 0;
95
+ } catch {
96
+ return false;
97
+ }
98
+ }
99
+
100
+ /** The trusted-browser replay (playbook rung's cdp-browser transport) runs Chrome
101
+ * HEADLESS by default and needs NO display — the `HeadlessChrome` UA token is
102
+ * stripped so anti-bot services don't edge-block it. A display only matters as a
103
+ * fallback on a GPU-less Linux host, where headless WebGL reports SwiftShader and
104
+ * the replay must run HEADED under Xvfb (launchChromium auto-starts it when a
105
+ * headed launch finds no `$DISPLAY`). macOS/Windows need nothing. Advisory only. */
106
+ function checkVirtualDisplay(): CheckResult {
107
+ const name = 'Display (headed replay)';
108
+ if (process.platform !== 'linux') {
109
+ return { name, ok: true, detail: `${process.platform}: native window server (no Xvfb needed)` };
110
+ }
111
+ const display = process.env.DISPLAY;
112
+ if (display) return { name, ok: true, detail: `$DISPLAY=${display}` };
113
+ if (hasXvfbBinary()) {
114
+ return {
115
+ name,
116
+ ok: true,
117
+ detail: 'no $DISPLAY; Xvfb present — headed-replay fallback available for GPU-less hosts',
118
+ };
119
+ }
120
+ return {
121
+ name,
122
+ ok: true, // advisory — default replay is headless; Xvfb is only a GPU-less fallback
123
+ detail:
124
+ 'Linux, no $DISPLAY and no Xvfb — default replay is headless (fine); install Xvfb only if a GPU-less host gets bot-flagged',
125
+ fix: 'GPU-less host bot-flagged? install the headed-replay fallback: apt-get install xvfb (or export DISPLAY=:0)',
126
+ };
127
+ }
128
+
90
129
  function checkLLMProvider(): CheckResult {
91
130
  const statuses = getProviderStatuses();
92
131
  const detected = statuses.filter((s) => s.detected);
@@ -73,10 +73,11 @@ const FREEFORM_POLICIES: PolicyName[] = [
73
73
  Policies.PGP_PRIVATE_KEY,
74
74
  Policies.PASSWORD_ASSIGNMENT,
75
75
  Policies.ENVIRONMENT_VARIABLE_SECRET,
76
- Policies.GENERIC_PASSWORD,
77
- Policies.GENERIC_TOKEN,
78
- Policies.GENERIC_CREDENTIAL,
79
- Policies.GENERIC_SECRET,
76
+ // NOTE: the GENERIC_* catch-alls (GENERIC_PASSWORD/TOKEN/CREDENTIAL/SECRET) are
77
+ // intentionally omitted — they match on value shape alone and fire on benign
78
+ // data (e.g. `id=1234567890`), corrupting/over-redacting structured payloads.
79
+ // Real secrets are still covered by the keyword-anchored and specific policies
80
+ // above and below (PASSWORD_ASSIGNMENT, OAUTH_*, private keys, cloud tokens, PII).
80
81
  Policies.OAUTH_CLIENT_SECRET,
81
82
  Policies.OAUTH_REFRESH_TOKEN,
82
83
  Policies.OAUTH_ACCESS_TOKEN,
@@ -87,7 +87,7 @@ export function generatePasteSnippet(opts: {
87
87
 
88
88
  switch (platform) {
89
89
  case 'claude-code':
90
- return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}. The backend ladder handles browser/API state and bot detection automatically (fetch → gated fetch-bootstrap → stealth-fetch → playbook).`;
90
+ return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}. The backend ladder handles browser/API state and bot detection automatically (fetch → gated fetch-bootstrap → cdp-replay → stealth-fetch → playbook).`;
91
91
 
92
92
  case 'codex':
93
93
  return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}.`;
@@ -352,7 +352,7 @@ ${yamlStringify(p, { lineWidth: 0 }).trim()}
352
352
  // Backend ladder explanation.
353
353
  const backendBlock = `## Backend Ladder
354
354
 
355
- The MCP server automatically escalates from fetch API replay to gated fetch-bootstrap when browser-minted state is declared, then stealth-fetch for bot-defense state, then playbook for full DOM replay.
355
+ The MCP server automatically escalates from fetch API replay to gated fetch-bootstrap when browser-minted state is declared, then cdp-replay (API requests run inside a live trusted Chrome so a protected POST refreshes its anti-bot token between calls), then stealth-fetch for bot-defense state, then playbook for full DOM replay.
356
356
  Bot detection is handled transparently.`;
357
357
 
358
358
  // Scheduling block (optional).
@@ -6,6 +6,7 @@ import {
6
6
  llmSpanAttributes,
7
7
  resolveTraceTokenCount,
8
8
  setSpanAttributes,
9
+ totalPromptTokens,
9
10
  traceLlmIoEnabled,
10
11
  traceLlmMessages,
11
12
  traced,
@@ -17,6 +18,16 @@ interface AnalyzeResult {
17
18
  text: string;
18
19
  inputTokens: number | null;
19
20
  outputTokens: number | null;
21
+ /**
22
+ * Prompt-cache token counts, when the provider reports them. `inputTokens` is
23
+ * the *uncached* input only (the Anthropic/CLI `usage.input_tokens`); the bulk
24
+ * of a cache-hit call lives here. Threaded through so `llm.analyze` cost is
25
+ * cache-aware (cache reads bill at 0.1×, writes at 1.25×) instead of charging
26
+ * the whole prompt at the full input rate. Null/undefined for providers that
27
+ * don't expose usage (codex-cli, cursor-cli).
28
+ */
29
+ cacheReadInputTokens?: number | null;
30
+ cacheCreationInputTokens?: number | null;
20
31
  durationMs: number;
21
32
  stopReason: string | null;
22
33
  }
@@ -129,6 +140,8 @@ class AnthropicApiProvider implements LLMProvider {
129
140
  text,
130
141
  inputTokens: response.usage.input_tokens,
131
142
  outputTokens: response.usage.output_tokens,
143
+ cacheReadInputTokens: response.usage.cache_read_input_tokens ?? null,
144
+ cacheCreationInputTokens: response.usage.cache_creation_input_tokens ?? null,
132
145
  durationMs: Date.now() - t0,
133
146
  stopReason: response.stop_reason ?? null,
134
147
  };
@@ -256,7 +269,15 @@ class ClaudeCliProvider implements LLMProvider {
256
269
  );
257
270
  }
258
271
 
259
- let parsed: { result?: string; usage?: { input_tokens?: number; output_tokens?: number } };
272
+ let parsed: {
273
+ result?: string;
274
+ usage?: {
275
+ input_tokens?: number;
276
+ output_tokens?: number;
277
+ cache_read_input_tokens?: number;
278
+ cache_creation_input_tokens?: number;
279
+ };
280
+ };
260
281
  try {
261
282
  parsed = JSON.parse(stdout);
262
283
  } catch (parseErr) {
@@ -273,6 +294,8 @@ class ClaudeCliProvider implements LLMProvider {
273
294
  text: parsed.result,
274
295
  inputTokens: parsed.usage?.input_tokens ?? null,
275
296
  outputTokens: parsed.usage?.output_tokens ?? null,
297
+ cacheReadInputTokens: parsed.usage?.cache_read_input_tokens ?? null,
298
+ cacheCreationInputTokens: parsed.usage?.cache_creation_input_tokens ?? null,
276
299
  durationMs: Date.now() - t0,
277
300
  stopReason: null,
278
301
  };
@@ -437,7 +460,20 @@ async function traceAnalyze(
437
460
  },
438
461
  async (span) => {
439
462
  const result = await fn();
440
- const inputTokens = resolveTraceTokenCount(result.inputTokens, details?.inputText);
463
+ // Providers report `inputTokens` as the *uncached* input only; the cached
464
+ // portion lives in the cache fields. `llmCostAttributes` expects the TOTAL
465
+ // prompt tokens (it derives uncached = total − cacheRead − cacheWrite), so
466
+ // sum them here. A real total is also large enough to clear the
467
+ // resolveTraceTokenCount sanity check, so cache-hit calls stop falling back
468
+ // to the chars/4 estimate.
469
+ const cacheReadTokens = result.cacheReadInputTokens ?? undefined;
470
+ const cacheWriteTokens = result.cacheCreationInputTokens ?? undefined;
471
+ const totalInputTokens = totalPromptTokens(
472
+ result.inputTokens,
473
+ cacheReadTokens,
474
+ cacheWriteTokens,
475
+ );
476
+ const inputTokens = resolveTraceTokenCount(totalInputTokens, details?.inputText);
441
477
  const outputTokens = resolveTraceTokenCount(result.outputTokens, result.text);
442
478
  setSpanAttributes(span, {
443
479
  ...llmSpanAttributes({
@@ -445,6 +481,8 @@ async function traceAnalyze(
445
481
  model,
446
482
  inputTokens: inputTokens.tokens,
447
483
  outputTokens: outputTokens.tokens,
484
+ cacheReadTokens,
485
+ cacheWriteTokens,
448
486
  tokenCountsEstimated:
449
487
  inputTokens.source === 'estimated' || outputTokens.source === 'estimated',
450
488
  inputTokenSource: inputTokens.source,
@@ -512,12 +550,20 @@ async function traceMessageWithTools(
512
550
  return `[${b.type}]`;
513
551
  })
514
552
  .join('\n');
553
+ const cacheReadTokens = response.usage.cache_read_input_tokens ?? undefined;
554
+ const cacheWriteTokens = response.usage.cache_creation_input_tokens ?? undefined;
515
555
  setSpanAttributes(span, {
516
556
  ...llmSpanAttributes({
517
557
  provider,
518
558
  model,
519
- inputTokens: response.usage.input_tokens,
559
+ inputTokens: totalPromptTokens(
560
+ response.usage.input_tokens,
561
+ cacheReadTokens,
562
+ cacheWriteTokens,
563
+ ),
520
564
  outputTokens: response.usage.output_tokens,
565
+ cacheReadTokens,
566
+ cacheWriteTokens,
521
567
  stopReason: response.stop_reason,
522
568
  outputMessages: captureIo
523
569
  ? traceLlmMessages([{ role: 'assistant', content: outputText }])
@@ -824,7 +870,7 @@ export function detectTeachProvider(): ProviderName {
824
870
  }
825
871
 
826
872
  function createProvider(name: ProviderName, opts: LLMOptions = {}): LLMProvider {
827
- const model = opts.model ?? process.env.ANTHROPIC_MODEL ?? 'claude-opus-4-7';
873
+ const model = opts.model ?? process.env.ANTHROPIC_MODEL ?? 'claude-opus-4-8';
828
874
  const temperature = opts.temperature ?? 0;
829
875
  const maxTokens = opts.maxTokens ?? 8192;
830
876
 
@@ -865,11 +911,11 @@ export function preferredAgentModel(provider: ProviderName): string {
865
911
  switch (provider) {
866
912
  case 'anthropic-api':
867
913
  case 'claude-cli':
868
- return 'claude-opus-4-7';
914
+ return 'claude-opus-4-8';
869
915
  case 'codex-cli':
870
916
  return 'gpt-5.5';
871
917
  case 'cursor-cli':
872
- return 'claude-opus-4-7'; // best-effort; cursor passes through
918
+ return 'claude-opus-4-8'; // best-effort; cursor passes through
873
919
  }
874
920
  }
875
921
 
@@ -883,7 +929,8 @@ export function availableModelsForProvider(provider: ProviderName): ModelOption[
883
929
  case 'anthropic-api':
884
930
  case 'claude-cli':
885
931
  return [
886
- { model: 'claude-opus-4-7', isDefault: true },
932
+ { model: 'claude-opus-4-8', isDefault: true },
933
+ { model: 'claude-opus-4-7', isDefault: false },
887
934
  { model: 'claude-sonnet-4-6', isDefault: false },
888
935
  { model: 'claude-haiku-4-5', isDefault: false },
889
936
  { model: 'claude-opus-4-6', isDefault: false },
@@ -908,7 +955,8 @@ export function availableModelsForProvider(provider: ProviderName): ModelOption[
908
955
  ];
909
956
  case 'cursor-cli':
910
957
  return [
911
- { model: 'claude-opus-4-7', isDefault: true },
958
+ { model: 'claude-opus-4-8', isDefault: true },
959
+ { model: 'claude-opus-4-7', isDefault: false },
912
960
  { model: 'claude-sonnet-4-6', isDefault: false },
913
961
  { model: 'claude-haiku-4-5', isDefault: false },
914
962
  { model: 'gpt-5.5', isDefault: false },
@@ -24,7 +24,13 @@ import {
24
24
  ListToolsRequestSchema,
25
25
  type Tool,
26
26
  } from '@modelcontextprotocol/sdk/types.js';
27
- import { buildCompileTools, externalVerification } from './compile-tools.ts';
27
+ import { type SharedModuleManifestEntry, resolvePlanSliceFromFile } from './build-plan.ts';
28
+ import {
29
+ applyLiveVerification,
30
+ applyParamVerification,
31
+ buildCompileTools,
32
+ externalVerification,
33
+ } from './compile-tools.ts';
28
34
  import { loadJsonFile } from './load-json.ts';
29
35
  import { createLog } from './log.ts';
30
36
  import { redactSession } from './redact.ts';
@@ -43,6 +49,10 @@ interface RunCompileMcpServerOptions {
43
49
  maxVerificationCycles?: number;
44
50
  candidate?: ToolCandidate;
45
51
  sharedContext?: SharedCompileContext;
52
+ /** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
53
+ buildPlanPath?: string;
54
+ /** Shared-module build manifest for this site (verified flags). */
55
+ sharedModules?: SharedModuleManifestEntry[];
46
56
  }
47
57
 
48
58
  const DONE_SENTINEL = '.compile-done.json';
@@ -67,12 +77,24 @@ export async function runCompileMcpServer(opts: RunCompileMcpServerOptions): Pro
67
77
  session = redactSession(session).session;
68
78
  }
69
79
 
70
- // Build the 8 read/write tools (same as the in-process loop).
80
+ // Build the read/write tools (same as the in-process loop). When a build
81
+ // plan is present, buildCompileTools also exposes read_build_plan.
71
82
  const compileTools = buildCompileTools(session, opts.toolDir, opts.sessionPath, {
72
83
  candidate: opts.candidate,
73
84
  sharedContext: opts.sharedContext,
85
+ buildPlanPath: opts.buildPlanPath,
86
+ sharedModules: opts.sharedModules,
74
87
  });
75
88
 
89
+ // Resolve the shared modules + producer→consumer token contracts the plan
90
+ // assigned this tool, so verification can assert modules are imported and
91
+ // require a chained test for each producer-sourced token param.
92
+ const { assignedSharedModules, tokenParams, emittedTokens } = resolvePlanSliceFromFile(
93
+ opts.buildPlanPath,
94
+ opts.candidate?.toolName,
95
+ opts.sharedModules,
96
+ );
97
+
76
98
  // The custom done/give_up tools live alongside in MCP space.
77
99
  const doneTool: Tool = {
78
100
  name: 'done',
@@ -136,25 +158,36 @@ export async function runCompileMcpServer(opts: RunCompileMcpServerOptions): Pro
136
158
  if (name === 'done') {
137
159
  const summary = (args as { summary?: string }).summary ?? 'Task completed';
138
160
  log(`done() called: ${summary}`);
139
- const { failures, warnings } = await externalVerification(
140
- opts.toolDir,
141
- session,
142
- opts.sessionPath,
143
- {
161
+ const { failures, warnings, paramVerification, liveVerification } =
162
+ await externalVerification(opts.toolDir, session, opts.sessionPath, {
144
163
  expectedToolName: opts.candidate?.toolName,
145
164
  likelyParams: opts.candidate?.likelyParams,
146
165
  candidateRequestSeqs: opts.candidate?.requestSeqs,
147
- },
148
- );
166
+ // Widen Fix B's variation pool to dependency requests so a token that
167
+ // varies only across them and is frozen as a literal in the tool's
168
+ // request is caught (the cross-request session-token leak case).
169
+ dependencyRequestSeqs: opts.candidate?.dependencySeqs,
170
+ assignedSharedModules,
171
+ tokenParams,
172
+ emittedTokens,
173
+ });
149
174
  if (warnings.length > 0) {
150
175
  log(`verification warnings (non-blocking):\n${warnings.join('\n')}`);
151
176
  }
152
177
  if (failures.length === 0) {
178
+ // Persist per-parameter verified flags + the live-verification stamp
179
+ // onto workflow.json. Audit and teach read the stamp.
180
+ applyLiveVerification(opts.toolDir, liveVerification);
181
+ const paramWarnings = applyParamVerification(opts.toolDir, paramVerification);
182
+ if (paramWarnings.length > 0) {
183
+ log(`parameter verification:\n${paramWarnings.join('\n')}`);
184
+ }
185
+ const allWarnings = [...warnings, ...paramWarnings];
153
186
  const sentinel = pathJoin(opts.toolDir, DONE_SENTINEL);
154
187
  writeFileSync(
155
188
  sentinel,
156
189
  JSON.stringify(
157
- { summary, verification: 'passed', warnings, timestamp: Date.now() },
190
+ { summary, verification: 'passed', warnings: allWarnings, timestamp: Date.now() },
158
191
  null,
159
192
  2,
160
193
  ),