imprint-mcp 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +165 -201
  2. package/examples/discoverandgo/README.md +1 -1
  3. package/examples/echo/README.md +1 -1
  4. package/examples/google-flights/README.md +28 -0
  5. package/examples/google-flights/_shared/batchexecute.ts +63 -0
  6. package/examples/google-flights/_shared/flights_request.ts +95 -0
  7. package/examples/google-flights/_shared/package.json +9 -0
  8. package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
  9. package/examples/google-flights/get_flight_booking_details/package.json +9 -0
  10. package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
  11. package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
  12. package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
  13. package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
  14. package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
  15. package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
  16. package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
  17. package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
  18. package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
  19. package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
  20. package/examples/google-flights/lookup_airport/index.ts +101 -0
  21. package/examples/google-flights/lookup_airport/package.json +9 -0
  22. package/examples/google-flights/lookup_airport/parser.ts +66 -0
  23. package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
  24. package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
  25. package/examples/google-flights/lookup_airport/workflow.json +57 -0
  26. package/examples/google-flights/search_flights/index.ts +219 -0
  27. package/examples/google-flights/search_flights/package.json +9 -0
  28. package/examples/google-flights/search_flights/parser.ts +169 -0
  29. package/examples/google-flights/search_flights/playbook.yaml +184 -0
  30. package/examples/google-flights/search_flights/request-transform.ts +119 -0
  31. package/examples/google-flights/search_flights/workflow.json +143 -0
  32. package/examples/google-hotels/README.md +29 -0
  33. package/examples/google-hotels/_shared/batchexecute.ts +73 -0
  34. package/examples/google-hotels/_shared/freq.ts +158 -0
  35. package/examples/google-hotels/_shared/package.json +9 -0
  36. package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
  37. package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
  38. package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
  39. package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
  40. package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
  41. package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
  42. package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
  43. package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
  44. package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
  45. package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
  46. package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
  47. package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
  48. package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
  49. package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
  50. package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
  51. package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
  52. package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
  53. package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
  54. package/examples/google-hotels/search_hotels/index.ts +207 -0
  55. package/examples/google-hotels/search_hotels/package.json +9 -0
  56. package/examples/google-hotels/search_hotels/parser.ts +260 -0
  57. package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
  58. package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
  59. package/examples/google-hotels/search_hotels/workflow.json +127 -0
  60. package/package.json +3 -2
  61. package/prompts/audit-agent.md +71 -0
  62. package/prompts/build-planning.md +74 -0
  63. package/prompts/compile-agent.md +131 -27
  64. package/prompts/prereq-builder.md +64 -0
  65. package/prompts/prereq-planner.md +34 -0
  66. package/prompts/tool-planning.md +39 -0
  67. package/src/cli.ts +109 -2
  68. package/src/imprint/agent.ts +5 -0
  69. package/src/imprint/audit.ts +996 -0
  70. package/src/imprint/backend-ladder.ts +1214 -184
  71. package/src/imprint/build-plan.ts +1051 -0
  72. package/src/imprint/cdp-browser-fetch.ts +589 -0
  73. package/src/imprint/cdp-jar-cache.ts +320 -0
  74. package/src/imprint/chromium.ts +135 -0
  75. package/src/imprint/claude-cli-compile.ts +125 -25
  76. package/src/imprint/codex-cli-compile.ts +26 -23
  77. package/src/imprint/compile-agent-types.ts +38 -0
  78. package/src/imprint/compile-agent.ts +63 -25
  79. package/src/imprint/compile-tools.ts +1656 -64
  80. package/src/imprint/compile.ts +13 -1
  81. package/src/imprint/concurrency.ts +87 -0
  82. package/src/imprint/cron.ts +1 -0
  83. package/src/imprint/doctor.ts +39 -0
  84. package/src/imprint/freeform-redact.ts +5 -4
  85. package/src/imprint/integrations.ts +2 -2
  86. package/src/imprint/llm.ts +56 -8
  87. package/src/imprint/mcp-compile-server.ts +43 -10
  88. package/src/imprint/mcp-maintenance.ts +9 -101
  89. package/src/imprint/mcp-server.ts +73 -7
  90. package/src/imprint/multi-progress.ts +7 -2
  91. package/src/imprint/param-grounding.ts +367 -0
  92. package/src/imprint/paths.ts +29 -0
  93. package/src/imprint/playbook-runner.ts +101 -40
  94. package/src/imprint/prereq-builder.ts +651 -0
  95. package/src/imprint/probe-backends.ts +6 -3
  96. package/src/imprint/record.ts +10 -1
  97. package/src/imprint/redact.ts +30 -2
  98. package/src/imprint/replay-capture.ts +19 -18
  99. package/src/imprint/runtime.ts +19 -10
  100. package/src/imprint/session-diff.ts +79 -2
  101. package/src/imprint/session-merge.ts +9 -5
  102. package/src/imprint/stealth-chromium.ts +81 -0
  103. package/src/imprint/stealth-fetch.ts +309 -29
  104. package/src/imprint/stealth-token-cache.ts +88 -0
  105. package/src/imprint/teach-plan.ts +251 -0
  106. package/src/imprint/teach-state.ts +10 -0
  107. package/src/imprint/teach.ts +456 -142
  108. package/src/imprint/tool-candidates.ts +72 -14
  109. package/src/imprint/tool-plan.ts +313 -0
  110. package/src/imprint/tracing.ts +135 -6
  111. package/src/imprint/types.ts +61 -3
  112. package/examples/google-flights/search_google_flights/index.ts +0 -101
  113. package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
  114. package/examples/google-flights/search_google_flights/parser.ts +0 -189
  115. package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
  116. package/examples/google-flights/search_google_flights/workflow.json +0 -48
  117. package/examples/google-hotels/search_google_hotels/index.ts +0 -194
  118. package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
  119. package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
  120. package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
  121. package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
  122. package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
  123. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
  124. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
  125. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
  126. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
@@ -0,0 +1,651 @@
1
+ /**
2
+ * Prereq builder for multi-tool `imprint teach`.
3
+ *
4
+ * For each shared module the planner declared (build-plan.ts), this writes
5
+ * `~/.imprint/<site>/_shared/<name>.ts` + a sibling test and verifies it before
6
+ * the per-tool compile fan-out. It runs as a single-shot `llm.analyze` →
7
+ * verify → feedback loop (the same shape compilePlaybook uses), so it works
8
+ * uniformly on every provider (claude-cli, codex-cli, anthropic-api) without a
9
+ * dedicated MCP server. `verifySharedModule` is the deterministic anti-cheat
10
+ * gate: the module must export what the plan declared, its test must pass with
11
+ * non-trivial assertions, it must typecheck, and a kind-specific ground-truth
12
+ * anchor must reproduce the recorded behavior.
13
+ */
14
+
15
+ import { copyFileSync, existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
16
+ import { basename, dirname as pathDirname, join as pathJoin } from 'node:path';
17
+ import type { SharedModuleSpec } from './build-plan.ts';
18
+ import {
19
+ countExpectCalls,
20
+ hasTrivialAssertion,
21
+ runCommand,
22
+ typecheckArtifacts,
23
+ } from './compile-tools.ts';
24
+ import { type LLMOptions, extractJsonObject, resolveProvider } from './llm.ts';
25
+ import { createLog } from './log.ts';
26
+ import { looksLikeRpcEnvelope } from './redact.ts';
27
+ import { setSpanAttributes, traced } from './tracing.ts';
28
+ import type { CapturedRequest, Session } from './types.ts';
29
+
30
+ const PROMPTS_DIR = pathJoin(import.meta.dir, '..', '..', 'prompts');
31
+ const SESSION_PATH_ENV = 'IMPRINT_SESSION_PATH';
32
+ const DEFAULT_MAX_CYCLES = 5;
33
+ const SOURCE_BODY_LIMIT = 12_000;
34
+ const MIN_SIGNING_VALUE_LEN = 8;
35
+ const log = createLog('prereq-builder');
36
+
37
+ let verifyCopyCounter = 0;
38
+
39
+ /** Import a freshly-written module, defeating bun's stale `.ts` import cache.
40
+ *
41
+ * Bun keys its transpiled-module cache by file PATH and ignores the `?t=`
42
+ * query cache-buster for local `.ts` files, so within the long-lived teach
43
+ * process a re-import after the compile agent edits a module returns the
44
+ * STALE first-loaded version. That silently breaks per-cycle verification: a
45
+ * request-transform the agent fixed across cycles still looks like it never
46
+ * exported `transform`, so a valid signing module fails all cycles and gets
47
+ * pruned — forcing every tool to re-implement it. (The `bun test` step is
48
+ * immune because it runs in a fresh subprocess; that's why the test could
49
+ * pass while the in-process anchor wrongly failed.)
50
+ *
51
+ * Copying to a unique sibling filename in the SAME directory forces a fresh
52
+ * load while keeping the module's relative imports to sibling shared modules
53
+ * resolvable. The leading dot + explicit cleanup keep the temp copy out of
54
+ * test/typecheck globs. */
55
+ export async function importModuleFresh(modulePath: string): Promise<Record<string, unknown>> {
56
+ const uniq = pathJoin(
57
+ pathDirname(modulePath),
58
+ `.verify-${Date.now()}-${process.pid}-${verifyCopyCounter++}.ts`,
59
+ );
60
+ copyFileSync(modulePath, uniq);
61
+ try {
62
+ return (await import(`file://${uniq}`)) as Record<string, unknown>;
63
+ } finally {
64
+ rmSync(uniq, { force: true });
65
+ }
66
+ }
67
+
68
+ // biome-ignore lint/suspicious/noExplicitAny: dynamically-imported user code
69
+ type AnyFn = (...args: any[]) => unknown;
70
+
71
+ interface BuildSharedModuleResult {
72
+ module: SharedModuleSpec;
73
+ ok: boolean;
74
+ failures: string[];
75
+ warnings: string[];
76
+ cycles: number;
77
+ durationMs: number;
78
+ }
79
+
80
+ export async function buildSharedModule(opts: {
81
+ site: string;
82
+ module: SharedModuleSpec;
83
+ session: Session;
84
+ /** Redacted session path — set as IMPRINT_SESSION_PATH when running the test. */
85
+ sessionPath: string;
86
+ sharedDir: string;
87
+ /** Already-built modules this one may depend on (for import context). */
88
+ builtModules?: SharedModuleSpec[];
89
+ llmConfig?: LLMOptions;
90
+ maxCycles?: number;
91
+ onProgress?: (msg: string) => void;
92
+ }): Promise<BuildSharedModuleResult> {
93
+ return await traced(
94
+ 'teach.build_shared_module',
95
+ 'AGENT',
96
+ {
97
+ 'imprint.site': opts.session.site,
98
+ 'imprint.shared_module': opts.module.path,
99
+ 'imprint.shared_module_kind': opts.module.kind,
100
+ 'imprint.provider': opts.llmConfig?.provider ?? 'auto',
101
+ },
102
+ async (span) => {
103
+ const start = Date.now();
104
+ const promptPath = pathJoin(PROMPTS_DIR, 'prereq-builder.md');
105
+ if (!existsSync(promptPath)) {
106
+ throw new Error(
107
+ `Prereq-builder prompt not found at ${promptPath}\n→ this is an Imprint installation problem.`,
108
+ );
109
+ }
110
+ const systemPrompt = readFileSync(promptPath, 'utf8');
111
+ const llm = resolveProvider(opts.llmConfig ?? {});
112
+ const maxCycles = opts.maxCycles ?? DEFAULT_MAX_CYCLES;
113
+
114
+ // Plan phase (plan-first): one analysis pass that yields a Markdown
115
+ // implementation plan the cycle loop then follows. It grounds the data
116
+ // shape + strict-typing decisions before any code is written, so retries
117
+ // fix mechanics rather than re-deriving structure. Best-effort — a missing
118
+ // prompt or any LLM error degrades to implementing without a plan.
119
+ const plan = await planSharedModule(
120
+ opts.module,
121
+ opts.session,
122
+ opts.builtModules ?? [],
123
+ llm,
124
+ opts.onProgress,
125
+ );
126
+ if (plan) {
127
+ const planFile = `${basename(opts.module.path).replace(/\.ts$/, '')}.plan.md`;
128
+ writeFileSync(pathJoin(opts.sharedDir, planFile), plan, 'utf8');
129
+ }
130
+ setSpanAttributes(span, { 'imprint.shared_module.planned': plan != null });
131
+ const basePayload = buildPrereqPayload(
132
+ opts.module,
133
+ opts.session,
134
+ opts.builtModules ?? [],
135
+ plan,
136
+ );
137
+
138
+ let lastFailures: string[] = ['builder produced no output'];
139
+ let lastWarnings: string[] = [];
140
+ let cycle = 0;
141
+ for (cycle = 1; cycle <= maxCycles; cycle++) {
142
+ opts.onProgress?.(
143
+ cycle === 1
144
+ ? `${opts.module.path}: cycle ${cycle}/${maxCycles}`
145
+ : `${opts.module.path}: cycle ${cycle}/${maxCycles} (retrying after: ${summarizeFailures(lastFailures)})`,
146
+ );
147
+ const payload =
148
+ cycle === 1 ? basePayload : { ...basePayload, previousFailures: lastFailures };
149
+ const result = await llm.analyze(systemPrompt, payload);
150
+ const objectText = extractJsonObject(result.text);
151
+ if (!objectText) {
152
+ lastFailures = ['builder did not return a JSON object with {module, test}'];
153
+ continue;
154
+ }
155
+ let parsed: { module?: unknown; test?: unknown };
156
+ try {
157
+ parsed = JSON.parse(objectText);
158
+ } catch (err) {
159
+ lastFailures = [
160
+ `builder returned invalid JSON: ${err instanceof Error ? err.message : String(err)}`,
161
+ ];
162
+ continue;
163
+ }
164
+ if (typeof parsed.module !== 'string' || parsed.module.trim().length === 0) {
165
+ lastFailures = ['builder response missing a non-empty "module" string'];
166
+ continue;
167
+ }
168
+
169
+ writeSharedFiles(
170
+ opts.sharedDir,
171
+ opts.module,
172
+ parsed.module,
173
+ typeof parsed.test === 'string' ? parsed.test : undefined,
174
+ );
175
+
176
+ const { failures, warnings } = await verifySharedModule(
177
+ opts.sharedDir,
178
+ opts.module,
179
+ opts.session,
180
+ opts.sessionPath,
181
+ );
182
+ lastFailures = failures;
183
+ lastWarnings = warnings;
184
+ if (failures.length === 0) {
185
+ setSpanAttributes(span, {
186
+ 'imprint.shared_module.cycles': cycle,
187
+ 'imprint.shared_module.ok': true,
188
+ });
189
+ log(`built ${opts.module.path} in ${cycle} cycle(s)`);
190
+ return {
191
+ module: opts.module,
192
+ ok: true,
193
+ failures: [],
194
+ warnings,
195
+ cycles: cycle,
196
+ durationMs: Date.now() - start,
197
+ };
198
+ }
199
+ log(
200
+ `verify failed for ${opts.module.path} (cycle ${cycle}/${maxCycles}): ${summarizeFailures(failures)}`,
201
+ );
202
+ }
203
+
204
+ setSpanAttributes(span, {
205
+ 'imprint.shared_module.cycles': maxCycles,
206
+ 'imprint.shared_module.ok': false,
207
+ });
208
+ return {
209
+ module: opts.module,
210
+ ok: false,
211
+ failures: lastFailures,
212
+ warnings: lastWarnings,
213
+ cycles: cycle - 1,
214
+ durationMs: Date.now() - start,
215
+ };
216
+ },
217
+ );
218
+ }
219
+
220
+ /** Planning pass (plan-first): one analysis call returning a Markdown
221
+ * implementation plan the cycle loop then follows. Skipped for type-only
222
+ * modules and when IMPRINT_NO_PREREQ_PLAN is set. Best-effort — returns
223
+ * undefined (implement without a plan) on a missing prompt or any LLM error. */
224
+ async function planSharedModule(
225
+ module: SharedModuleSpec,
226
+ session: Session,
227
+ builtModules: SharedModuleSpec[],
228
+ llm: ReturnType<typeof resolveProvider>,
229
+ onProgress?: (msg: string) => void,
230
+ ): Promise<string | undefined> {
231
+ if (module.kind === 'types' || prereqPlanDisabled()) return undefined;
232
+ const promptPath = pathJoin(PROMPTS_DIR, 'prereq-planner.md');
233
+ if (!existsSync(promptPath)) return undefined;
234
+ onProgress?.(`${module.path}: planning`);
235
+ const systemPrompt = readFileSync(promptPath, 'utf8');
236
+ const payload = buildPrereqPayload(module, session, builtModules);
237
+ try {
238
+ const result = await llm.analyze(systemPrompt, payload);
239
+ const plan = stripCodeFences(result.text).trim();
240
+ if (plan.length === 0) return undefined;
241
+ log(`planned ${module.path} (${plan.length} chars)`);
242
+ return plan;
243
+ } catch (err) {
244
+ log(
245
+ `planning failed for ${module.path} (${err instanceof Error ? err.message : String(err)}) — implementing without a plan`,
246
+ );
247
+ return undefined;
248
+ }
249
+ }
250
+
251
+ function prereqPlanDisabled(): boolean {
252
+ const v = process.env.IMPRINT_NO_PREREQ_PLAN;
253
+ return !!v && !['0', 'false', 'no', 'off'].includes(v.toLowerCase());
254
+ }
255
+
256
+ /** Unwrap a response whose entire body is a single Markdown code fence; leave
257
+ * inline fences (snippets within the plan) untouched. */
258
+ function stripCodeFences(text: string): string {
259
+ const t = text.trim();
260
+ const m = /^```[a-zA-Z]*\n([\s\S]*?)\n```$/.exec(t);
261
+ return m?.[1] ?? t;
262
+ }
263
+
264
+ /** Compress the verifier's (possibly multi-KB) failure list into a short,
265
+ * human-scannable summary of WHICH gate(s) failed — used in the per-cycle
266
+ * progress line and the "verify failed" log so a slow build reveals its blocker
267
+ * (typecheck vs test vs anchor) instead of a bare "verify failed". The full
268
+ * failure text still flows to `previousFailures` (the builder's retry feedback)
269
+ * and the prune log. Kept in sync with the failure strings produced by
270
+ * verifySharedModule + the build loop. */
271
+ export function summarizeFailures(failures: string[]): string {
272
+ const gates = new Set<string>();
273
+ for (const f of failures) gates.add(classifyFailure(f));
274
+ return [...gates].join(', ') || 'unknown';
275
+ }
276
+
277
+ function classifyFailure(f: string): string {
278
+ if (f.includes('failed typecheck')) return 'typecheck';
279
+ if (f.includes('does not export')) return 'missing export';
280
+ if (f.includes('import failed')) return 'import error';
281
+ if (
282
+ /\bbun test\b.*exited/.test(f) ||
283
+ f.includes('expect() calls') ||
284
+ f.includes('trivial tautological') ||
285
+ f.includes('needs a test proving')
286
+ ) {
287
+ return 'test';
288
+ }
289
+ if (f.includes('(request-transform)')) return 'signing anchor';
290
+ if (f.includes('(parser-helper)')) return 'parser anchor';
291
+ if (f.includes('JSON object') || f.includes('invalid JSON') || f.includes('"module" string')) {
292
+ return 'malformed builder output';
293
+ }
294
+ return 'verification';
295
+ }
296
+
297
+ // ─── Verification (anti-cheat gate) ─────────────────────────────────────────
298
+
299
+ interface VerifySharedModuleResult {
300
+ failures: string[];
301
+ warnings: string[];
302
+ }
303
+
304
+ export async function verifySharedModule(
305
+ sharedDir: string,
306
+ module: SharedModuleSpec,
307
+ session: Session,
308
+ sessionPath: string,
309
+ ): Promise<VerifySharedModuleResult> {
310
+ const failures: string[] = [];
311
+ const warnings: string[] = [];
312
+
313
+ const base = basename(module.path);
314
+ const name = base.replace(/\.ts$/, '');
315
+ const modulePath = pathJoin(sharedDir, base);
316
+ const testBase = `${name}.test.ts`;
317
+ const testPath = pathJoin(sharedDir, testBase);
318
+
319
+ if (!existsSync(modulePath)) {
320
+ failures.push(`${module.path} was not written`);
321
+ return { failures, warnings };
322
+ }
323
+
324
+ const moduleSrc = readFileSync(modulePath, 'utf8');
325
+ const typesOnly = isTypesOnlyModule(moduleSrc);
326
+ let importOk = true;
327
+
328
+ // 1. Runtime import + exported-symbol checks (skipped for type-only modules).
329
+ if (!typesOnly) {
330
+ try {
331
+ const mod = await importModuleFresh(modulePath);
332
+ for (const sig of module.exportSignatures) {
333
+ if (isTypeSignature(sig)) continue;
334
+ const sym = exportedSymbolName(sig);
335
+ if (sym && !(sym in mod)) {
336
+ failures.push(`${module.path} does not export "${sym}" (declared in exportSignatures)`);
337
+ }
338
+ }
339
+ } catch (err) {
340
+ importOk = false;
341
+ failures.push(
342
+ `${module.path} import failed: ${err instanceof Error ? err.message : String(err)}`,
343
+ );
344
+ }
345
+ }
346
+
347
+ // 2. Test quality + run (skipped for type-only modules — no behavior to test).
348
+ if (!typesOnly && !existsSync(testPath)) {
349
+ failures.push(
350
+ `${testBase} was not written — a shared module needs a test proving its behavior against recorded data`,
351
+ );
352
+ } else if (!typesOnly) {
353
+ const testSrc = readFileSync(testPath, 'utf8');
354
+ const expectCount = countExpectCalls(testSrc);
355
+ if (expectCount < 3) {
356
+ failures.push(`${testBase} has only ${expectCount} expect() calls; need ≥3`);
357
+ }
358
+ if (hasTrivialAssertion(testSrc)) {
359
+ failures.push(
360
+ `${testBase} contains trivial tautological assertions — tests must reference real recorded values`,
361
+ );
362
+ }
363
+ const result = await runCommand(`bun test ${testBase}`, sharedDir, 120000, {
364
+ [SESSION_PATH_ENV]: sessionPath,
365
+ });
366
+ const output = JSON.parse(result.result) as {
367
+ stdout: string;
368
+ stderr: string;
369
+ exitCode: number;
370
+ };
371
+ if (output.exitCode !== 0) {
372
+ failures.push(
373
+ `bun test ${testBase} exited ${output.exitCode}\nstdout:\n${output.stdout}\nstderr:\n${output.stderr}`,
374
+ );
375
+ }
376
+ }
377
+
378
+ // 3. Typecheck the module (+ its declared dependency files).
379
+ const includes = [base, ...module.dependsOn.map((d) => basename(d))];
380
+ const tc = await typecheckArtifacts(sharedDir, includes);
381
+ if (tc.exitCode !== 0 || tc.timedOut) {
382
+ failures.push(
383
+ `${module.path} failed typecheck (exit ${tc.exitCode}${tc.timedOut ? ', timed out' : ''})\nstdout:\n${tc.stdout}\nstderr:\n${tc.stderr}`,
384
+ );
385
+ }
386
+
387
+ // 4. Kind-specific ground-truth anchor (only when the module imported cleanly).
388
+ if (!typesOnly && importOk) {
389
+ if (module.kind === 'request-transform') {
390
+ const anchor = await anchorRequestTransform(modulePath, module, session);
391
+ if (anchor.failure) failures.push(anchor.failure);
392
+ if (anchor.warning) warnings.push(anchor.warning);
393
+ } else if (module.kind === 'parser-helper') {
394
+ const warn = await anchorParserHelper(modulePath, module, session);
395
+ if (warn) warnings.push(warn);
396
+ }
397
+ }
398
+
399
+ return { failures, warnings };
400
+ }
401
+
402
+ /** The recorded request seqs an anchor checks against: the module's declared
403
+ * sourceSeqs, or all requests when it declared none. */
404
+ function resolveSeqs(module: SharedModuleSpec, session: Session): number[] {
405
+ return module.sourceSeqs.length > 0 ? module.sourceSeqs : session.requests.map((r) => r.seq);
406
+ }
407
+
408
+ /** Re-sign a recorded URL and check the module reproduces the signing param.
409
+ * A throw / non-string / dropped-path result is a hard failure; an inability
410
+ * to reproduce any recorded param is a warning (the signer may legitimately
411
+ * fold in a per-call nonce that can't match a recorded value). */
412
+ async function anchorRequestTransform(
413
+ modulePath: string,
414
+ module: SharedModuleSpec,
415
+ session: Session,
416
+ ): Promise<{ failure?: string; warning?: string }> {
417
+ let transform: unknown;
418
+ try {
419
+ const mod = await importModuleFresh(modulePath);
420
+ transform = mod.transform;
421
+ } catch {
422
+ return {}; // import failure already recorded by the caller
423
+ }
424
+ if (typeof transform !== 'function') {
425
+ return {
426
+ failure: `${module.path} (request-transform) must export a transform(method, url, responses, params?) function`,
427
+ };
428
+ }
429
+
430
+ const seqs = resolveSeqs(module, session);
431
+ let attempted = false;
432
+ let anyCallSucceeded = false;
433
+ for (const seq of seqs) {
434
+ const req = session.requests.find((r) => r.seq === seq);
435
+ if (!req) continue;
436
+ let recorded: URL;
437
+ try {
438
+ recorded = new URL(req.url);
439
+ } catch {
440
+ continue;
441
+ }
442
+ const highEntropy = [...recorded.searchParams.entries()].filter(
443
+ ([, v]) => v.length >= MIN_SIGNING_VALUE_LEN,
444
+ );
445
+ if (highEntropy.length === 0) continue;
446
+ attempted = true;
447
+
448
+ for (const [key, recordedValue] of highEntropy) {
449
+ const stripped = new URL(req.url);
450
+ stripped.searchParams.delete(key);
451
+ let out: unknown;
452
+ try {
453
+ out = (transform as AnyFn)(req.method, stripped.toString(), []);
454
+ } catch {
455
+ continue;
456
+ }
457
+ const outUrl = typeof out === 'string' ? out : (out as { url?: unknown })?.url;
458
+ if (typeof outUrl !== 'string') continue;
459
+ anyCallSucceeded = true;
460
+ let regenerated: string | null;
461
+ try {
462
+ regenerated = new URL(outUrl).searchParams.get(key);
463
+ } catch {
464
+ regenerated = null;
465
+ }
466
+ if (regenerated === recordedValue) return {}; // anchor satisfied
467
+ }
468
+ }
469
+
470
+ if (attempted && !anyCallSucceeded) {
471
+ return {
472
+ failure: `${module.path} (request-transform) threw or returned no URL string when re-signing every recorded URL. It must reproduce the site's signing logic (sourceSeqs ${module.sourceSeqs.join(', ') || 'none'}).`,
473
+ };
474
+ }
475
+ if (attempted) {
476
+ return {
477
+ warning: `${module.path} (request-transform) ran but did not reproduce any recorded high-entropy query param. If signing folds in a per-call nonce this is expected; otherwise verify the algorithm against the recorded .js (sourceSeqs ${module.sourceSeqs.join(', ') || 'none'}).`,
478
+ };
479
+ }
480
+ return {};
481
+ }
482
+
483
+ /** Warning-only: confirm a parser-helper produces non-empty output on a
484
+ * recorded response body. Helpers with non-body signatures legitimately throw
485
+ * here, so this never fails the build — the per-tool integration tests that
486
+ * import the helper are the hard gate. */
487
+ async function anchorParserHelper(
488
+ modulePath: string,
489
+ module: SharedModuleSpec,
490
+ session: Session,
491
+ ): Promise<string | null> {
492
+ let mod: Record<string, unknown>;
493
+ try {
494
+ mod = await importModuleFresh(modulePath);
495
+ } catch {
496
+ return null;
497
+ }
498
+ const fns = module.exportSignatures
499
+ .filter((s) => !isTypeSignature(s))
500
+ .map((s) => exportedSymbolName(s))
501
+ .filter((n): n is string => n != null)
502
+ .map((n) => mod[n])
503
+ .filter((f): f is AnyFn => typeof f === 'function');
504
+ if (fns.length === 0) return null;
505
+
506
+ const seqs = resolveSeqs(module, session);
507
+
508
+ // Fixture-sanity gate (defense-in-depth): if every recorded source body is
509
+ // neither valid JSON nor a recognized RPC envelope, the ground truth itself is
510
+ // unusable — surface that as a distinct, actionable message instead of letting
511
+ // the builder burn cycles "fixing" code that is actually fine. (Part 1's
512
+ // redaction fix is what prevents the common over-redaction poisoning.)
513
+ const candidateBodies = seqs
514
+ .map((seq) => session.requests.find((r) => r.seq === seq)?.response?.body)
515
+ .filter((b): b is string => typeof b === 'string' && b.length > 0);
516
+ if (
517
+ candidateBodies.length > 0 &&
518
+ candidateBodies.every((b) => !isJsonParseable(b) && !looksLikeRpcEnvelope(b))
519
+ ) {
520
+ return `${module.path} (parser-helper): the recorded response body for sourceSeqs ${module.sourceSeqs.join(', ') || 'none'} is not valid JSON nor a recognized RPC envelope — the fixture appears corrupted, not a code error. Re-record the session or inspect the raw body before iterating.`;
521
+ }
522
+
523
+ let body: unknown;
524
+ for (const seq of seqs) {
525
+ const raw = session.requests.find((r) => r.seq === seq)?.response?.body;
526
+ if (!raw) continue;
527
+ try {
528
+ body = JSON.parse(raw);
529
+ } catch {
530
+ body = raw;
531
+ }
532
+ break;
533
+ }
534
+ if (body === undefined) return null;
535
+
536
+ for (const fn of fns) {
537
+ try {
538
+ const out = fn(body);
539
+ if (!isEmptyResult(out)) return null; // at least one export produced data
540
+ } catch {
541
+ // helper may take different args — ignore
542
+ }
543
+ }
544
+ return `${module.path} (parser-helper): no export returned non-empty output when applied to a recorded response body. Verify it parses the captured data (sourceSeqs ${module.sourceSeqs.join(', ') || 'none'}).`;
545
+ }
546
+
547
+ // ─── Payload + file writing ─────────────────────────────────────────────────
548
+
549
+ function buildPrereqPayload(
550
+ module: SharedModuleSpec,
551
+ session: Session,
552
+ builtModules: SharedModuleSpec[],
553
+ implementationPlan?: string,
554
+ ): Record<string, unknown> {
555
+ const sources = module.sourceSeqs
556
+ .map((seq) => session.requests.find((r) => r.seq === seq))
557
+ .filter((r): r is CapturedRequest => r != null)
558
+ .map((req) => ({
559
+ seq: req.seq,
560
+ method: req.method,
561
+ url: req.url,
562
+ requestHeaders: req.headers,
563
+ requestBody: truncate(req.body, SOURCE_BODY_LIMIT),
564
+ status: req.response?.status,
565
+ mimeType: req.response?.mimeType,
566
+ responseBody: truncate(req.response?.body, SOURCE_BODY_LIMIT),
567
+ }));
568
+
569
+ const availableDependencies = builtModules
570
+ .filter((m) => module.dependsOn.includes(m.path))
571
+ .map((m) => ({
572
+ importPath: `./${basename(m.path)}`,
573
+ exportSignatures: m.exportSignatures,
574
+ }));
575
+
576
+ const payload: Record<string, unknown> = {
577
+ site: session.site,
578
+ url: session.url,
579
+ module: {
580
+ path: module.path,
581
+ kind: module.kind,
582
+ purpose: module.purpose,
583
+ exportSignatures: module.exportSignatures,
584
+ spec: module.spec,
585
+ dependsOn: module.dependsOn,
586
+ },
587
+ availableDependencies,
588
+ sources,
589
+ };
590
+ if (implementationPlan) payload.implementationPlan = implementationPlan;
591
+ return payload;
592
+ }
593
+
594
+ function writeSharedFiles(
595
+ sharedDir: string,
596
+ module: SharedModuleSpec,
597
+ moduleSrc: string,
598
+ testSrc: string | undefined,
599
+ ): void {
600
+ mkdirSync(sharedDir, { recursive: true });
601
+ const base = basename(module.path);
602
+ writeFileSync(pathJoin(sharedDir, base), moduleSrc, 'utf8');
603
+ if (testSrc && testSrc.trim().length > 0) {
604
+ writeFileSync(pathJoin(sharedDir, base.replace(/\.ts$/, '.test.ts')), testSrc, 'utf8');
605
+ }
606
+ }
607
+
608
+ // ─── Source-analysis helpers ────────────────────────────────────────────────
609
+
610
+ function exportedSymbolName(sig: string): string | null {
611
+ const m = sig.match(
612
+ /export\s+(?:async\s+)?(?:function|const|let|var|class|type|interface|enum)\s+([A-Za-z0-9_$]+)/,
613
+ );
614
+ return m?.[1] ?? null;
615
+ }
616
+
617
+ function isTypeSignature(sig: string): boolean {
618
+ return /export\s+(?:type|interface)\b/.test(sig);
619
+ }
620
+
621
+ /** True when the module declares only type/interface exports — no runtime
622
+ * surface to test or import-check. */
623
+ function isTypesOnlyModule(src: string): boolean {
624
+ if (/export\s+(?:async\s+)?(?:function|const|let|var|class|enum|default)\b/.test(src)) {
625
+ return false;
626
+ }
627
+ return /export\s+(?:type|interface)\b/.test(src);
628
+ }
629
+
630
+ function isJsonParseable(s: string): boolean {
631
+ try {
632
+ JSON.parse(s);
633
+ return true;
634
+ } catch {
635
+ return false;
636
+ }
637
+ }
638
+
639
+ function isEmptyResult(value: unknown): boolean {
640
+ if (value == null) return true;
641
+ if (value === '') return true;
642
+ if (Array.isArray(value)) return value.length === 0;
643
+ if (typeof value === 'object') return Object.keys(value as object).length === 0;
644
+ return false;
645
+ }
646
+
647
+ function truncate(s: string | undefined, limit: number): string | undefined {
648
+ if (!s) return undefined;
649
+ if (s.length <= limit) return s;
650
+ return `${s.slice(0, limit)}…(truncated, original length ${s.length})`;
651
+ }
@@ -63,14 +63,17 @@ export async function probeBackends(opts: ProbeBackendsOptions): Promise<ProbeBa
63
63
 
64
64
  const params = resolveParams(tool, opts.paramOverrides);
65
65
 
66
- log(`probing fetch / fetch-bootstrap / stealth-fetch / playbook for ${tool.workflow.toolName}…`);
66
+ log(`probing backends for ${tool.workflow.toolName}…`);
67
67
  log(` params: ${JSON.stringify(params)}`);
68
68
 
69
69
  // Try every backend (single-rung ladders) — operators want the full
70
- // matrix, not just the first that worked.
70
+ // matrix, not just the first that worked. cdp-replay is included so it
71
+ // lands in preferredOrder when it works — without it, runtime always
72
+ // falls through fetch-bootstrap (~30-60s) before reaching the spliced-in
73
+ // cdp-replay rung, wasting time on every call.
71
74
  const stealthCache = new Map<string, StealthFetch>();
72
75
  const allBackends: ConcreteBackend[] = workflowNeedsBootstrap(tool.workflow)
73
- ? ['fetch', 'fetch-bootstrap', 'stealth-fetch', 'playbook']
76
+ ? ['fetch', 'fetch-bootstrap', 'cdp-replay', 'stealth-fetch', 'playbook']
74
77
  : ['fetch', 'stealth-fetch', 'playbook'];
75
78
  const results: BackendsCache['results'] = {};
76
79
  const working: ConcreteBackend[] = [];
@@ -187,7 +187,16 @@ export async function record(opts: RecordOptions): Promise<RecordResult> {
187
187
  const body = bodyResp.base64Encoded
188
188
  ? Buffer.from(bodyResp.body, 'base64').toString('utf8')
189
189
  : bodyResp.body;
190
- const MAX = 256 * 1024;
190
+ // Body cap for the on-disk session. Server-rendered HTML pages on
191
+ // travel/booking sites routinely run 250-500KB (Costco's rental-car
192
+ // results page is ~262KB). The previous 256KB cap silently chopped
193
+ // such pages and the compile agent saw the `[…truncated…]` marker
194
+ // as a hard data-quality block (even when only a few bytes were
195
+ // lost, leaving plenty of structure to parse). 2MB covers the
196
+ // ~99th percentile of full-page renders without bloating most
197
+ // sessions — `Network.getResponseBody` still streams to memory,
198
+ // so very large bodies remain capped to protect process memory.
199
+ const MAX = 2 * 1024 * 1024;
191
200
  const truncated = body.length > MAX ? `${body.slice(0, MAX)}\n[…truncated…]` : body;
192
201
  writer.requestBody(captured.seq, truncated);
193
202
  } catch (err) {