imprint-mcp 0.4.6 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +4 -4
  2. package/examples/google-flights/README.md +2 -0
  3. package/examples/google-flights/_shared/flights_request.ts +10 -4
  4. package/examples/google-flights/get_flight_booking_details/index.ts +5 -2
  5. package/examples/google-flights/get_flight_booking_details/parser.ts +8 -0
  6. package/examples/google-flights/get_flight_booking_details/workflow.json +5 -2
  7. package/examples/google-flights/get_flight_calendar_prices/index.ts +5 -2
  8. package/examples/google-flights/get_flight_calendar_prices/parser.ts +8 -4
  9. package/examples/google-flights/get_flight_calendar_prices/workflow.json +5 -2
  10. package/examples/google-flights/lookup_airport/index.ts +3 -0
  11. package/examples/google-flights/lookup_airport/parser.ts +8 -1
  12. package/examples/google-flights/lookup_airport/workflow.json +3 -0
  13. package/examples/google-flights/search_flights/index.ts +63 -8
  14. package/examples/google-flights/search_flights/parser.ts +10 -0
  15. package/examples/google-flights/search_flights/request-transform.ts +45 -0
  16. package/examples/google-flights/search_flights/workflow.json +63 -8
  17. package/package.json +1 -1
  18. package/prompts/build-planning.md +1 -1
  19. package/prompts/compile-agent.md +5 -3
  20. package/prompts/prereq-builder.md +2 -1
  21. package/src/imprint/backend-ladder.ts +436 -43
  22. package/src/imprint/cdp-browser-fetch.ts +176 -6
  23. package/src/imprint/cdp-jar-cache.ts +105 -10
  24. package/src/imprint/compile-tools.ts +2 -2
  25. package/src/imprint/mcp-server.ts +152 -65
  26. package/src/imprint/probe-backends.ts +41 -10
  27. package/src/imprint/runtime.ts +24 -12
  28. package/src/imprint/stealth-fetch.ts +71 -0
  29. package/src/imprint/stealth-token-cache.ts +38 -1
  30. package/src/imprint/types.ts +45 -0
@@ -87,15 +87,82 @@ export function buildJsonSchema(parameters: WorkflowParameter[]): Tool['inputSch
87
87
  };
88
88
  }
89
89
 
90
+ export function shouldSkipBootstrapSplice(preferredOrder?: ConcreteBackend[]): boolean {
91
+ return Boolean(preferredOrder?.length && !preferredOrder.includes('fetch-bootstrap'));
92
+ }
93
+
94
+ export function withPreferredFallbacks(
95
+ ladder: ConcreteBackend[],
96
+ preferredOrder?: ConcreteBackend[],
97
+ ): ConcreteBackend[] {
98
+ const next = [...ladder];
99
+ if (preferredOrder?.includes('fetch-bootstrap') && !next.includes('cdp-replay')) {
100
+ const idx = next.indexOf('fetch-bootstrap');
101
+ if (idx !== -1) next.splice(idx + 1, 0, 'cdp-replay');
102
+ }
103
+ if (
104
+ (preferredOrder?.includes('fetch-bootstrap') || preferredOrder?.includes('cdp-replay')) &&
105
+ !next.includes('stealth-fetch')
106
+ ) {
107
+ const cdpIdx = next.indexOf('cdp-replay');
108
+ const fbIdx = next.indexOf('fetch-bootstrap');
109
+ const idx = cdpIdx !== -1 ? cdpIdx : fbIdx;
110
+ if (idx !== -1) next.splice(idx + 1, 0, 'stealth-fetch');
111
+ }
112
+ return next;
113
+ }
114
+
115
+ export function applyExecutionFallbacks(
116
+ ladder: ConcreteBackend[],
117
+ execution?: { skipPlaybookFallback?: boolean },
118
+ ): ConcreteBackend[] {
119
+ if (!execution?.skipPlaybookFallback || ladder.length <= 1) return ladder;
120
+ return ladder.filter((backend) => backend !== 'playbook');
121
+ }
122
+
123
+ export function buildSiteSpacingMap(
124
+ tools: Array<{ site: string; workflow: { execution?: { minCallSpacingMs?: number } } }>,
125
+ ): Map<string, number> {
126
+ const out = new Map<string, number>();
127
+ for (const tool of tools) {
128
+ const spacing = Math.max(0, tool.workflow.execution?.minCallSpacingMs ?? 0);
129
+ if (spacing > (out.get(tool.site) ?? 0)) out.set(tool.site, spacing);
130
+ }
131
+ return out;
132
+ }
133
+
90
134
  const log = createLog('mcp');
91
135
 
92
136
  export async function runSerializedBySite<T>(
93
137
  queues: Map<string, Promise<void>>,
94
138
  site: string,
95
139
  task: () => Promise<T>,
140
+ opts: {
141
+ minCallSpacingMs?: number;
142
+ lastFinishedAt?: Map<string, number>;
143
+ now?: () => number;
144
+ sleep?: (ms: number) => Promise<void>;
145
+ } = {},
96
146
  ): Promise<T> {
97
147
  const previous = queues.get(site) ?? Promise.resolve();
98
- const run = previous.catch(() => undefined).then(task);
148
+ const run = previous
149
+ .catch(() => undefined)
150
+ .then(async () => {
151
+ const spacingMs = Math.max(0, opts.minCallSpacingMs ?? 0);
152
+ const lastFinishedAt = opts.lastFinishedAt;
153
+ const now = opts.now ?? Date.now;
154
+ const sleep = opts.sleep ?? ((ms: number) => new Promise<void>((r) => setTimeout(r, ms)));
155
+ if (spacingMs > 0 && lastFinishedAt) {
156
+ const elapsed = now() - (lastFinishedAt.get(site) ?? 0);
157
+ const waitMs = spacingMs - elapsed;
158
+ if (waitMs > 0) await sleep(waitMs);
159
+ }
160
+ try {
161
+ return await task();
162
+ } finally {
163
+ lastFinishedAt?.set(site, now());
164
+ }
165
+ });
99
166
  const tail = run.then(
100
167
  () => undefined,
101
168
  () => undefined,
@@ -119,7 +186,7 @@ function buildServer(
119
186
  {
120
187
  capabilities: { tools: {} },
121
188
  instructions:
122
- 'Imprint runs deterministic workflows captured from real browser sessions. Tools prefer fetch API replay, may use gated fetch-bootstrap only for declared browser-minted state, then cdp-replay (API requests run inside a live trusted Chrome so a protected POST refreshes its anti-bot token between calls) for multi-step state-changing flows, then stealth-fetch for bot-defense state, and playbook only for full DOM interaction. Error codes: AUTH_EXPIRED (401, run `imprint login <site>`); STATE_MISSING (required cookie/state was unavailable or ambiguous); FORBIDDEN (403); RATE_LIMITED (429, back off); BAD_RESPONSE (other 4xx/5xx); NETWORK (fetch failed); UNKNOWN (everything else).',
189
+ 'Imprint runs deterministic workflows captured from real browser sessions. Tools prefer fetch API replay, front-load cdp-replay when a workflow needs reusable live-browser request state, may use gated fetch-bootstrap for one-shot browser-minted state, then stealth-fetch for bot-defense state, and playbook only for full DOM interaction. Error codes: AUTH_EXPIRED (401, run `imprint login <site>`); STATE_MISSING (required cookie/state was unavailable or ambiguous); FORBIDDEN (403); RATE_LIMITED (429, back off); BAD_RESPONSE (other 4xx/5xx); NETWORK (fetch failed); UNKNOWN (everything else).',
123
190
  },
124
191
  );
125
192
 
@@ -150,6 +217,8 @@ function buildServer(
150
217
  // make Google Flights return fast empty result sets. Keep same-site execution
151
218
  // sequential while allowing unrelated sites to proceed independently.
152
219
  const siteExecutionQueues = new Map<string, Promise<void>>();
220
+ const siteLastFinishedAt = new Map<string, number>();
221
+ const siteMinCallSpacingMs = buildSiteSpacingMap(tools);
153
222
 
154
223
  server.setRequestHandler(ListToolsRequestSchema, async () => ({
155
224
  tools: tools.map((t) => ({
@@ -187,75 +256,93 @@ function buildServer(
187
256
  >;
188
257
 
189
258
  try {
190
- return await runSerializedBySite(siteExecutionQueues, tool.site, async () => {
191
- // Audit-only pacing: when the audit harness sets IMPRINT_AUDIT_PACING_MS,
192
- // sleep before each actual workflow execution so same-site queued calls
193
- // stay spaced out instead of all waiting concurrently before the queue.
194
- // Unset in production -> no delay.
195
- const pacingMs = Number(process.env.IMPRINT_AUDIT_PACING_MS);
196
- if (Number.isFinite(pacingMs) && pacingMs > 0) {
197
- await new Promise((r) => setTimeout(r, pacingMs));
198
- }
259
+ return await runSerializedBySite(
260
+ siteExecutionQueues,
261
+ tool.site,
262
+ async () => {
263
+ // Audit-only pacing: when the audit harness sets IMPRINT_AUDIT_PACING_MS,
264
+ // sleep before each actual workflow execution so same-site queued calls
265
+ // stay spaced out instead of all waiting concurrently before the queue.
266
+ // Unset in production -> no delay.
267
+ const pacingMs = Number(process.env.IMPRINT_AUDIT_PACING_MS);
268
+ if (Number.isFinite(pacingMs) && pacingMs > 0) {
269
+ await new Promise((r) => setTimeout(r, pacingMs));
270
+ }
199
271
 
200
- const ladder = resolveLadder('auto', tool.preferredOrder);
201
- const { result, usedBackend, attempts } = await runWithLadder(
202
- ladder,
203
- tool,
204
- args,
205
- assetRoot,
206
- stealthCache,
207
- { cdpPool, winnerCache, skipBootstrapSplice: Boolean(tool.preferredOrder?.length) },
208
- );
209
- // Reset the idle timer for this site's pooled Chrome.
210
- if (result.ok && usedBackend === 'cdp-replay' && cdpPool.has(tool.site)) {
211
- const prev = cdpIdleTimers.get(tool.site);
212
- if (prev) clearTimeout(prev);
213
- const timer = setTimeout(() => {
214
- const cf = cdpPool.get(tool.site);
215
- if (cf) {
216
- log(`closing idle CDP session for ${tool.site}`);
217
- cf.close().catch(() => {});
218
- cdpPool.delete(tool.site);
219
- cdpIdleTimers.delete(tool.site);
220
- // Drop this site's winner memo too: a memoized cdp-replay would now
221
- // point at a closed Chrome and re-pay the cold relaunch.
222
- for (const key of winnerCache.keys()) {
223
- if (key.startsWith(`${tool.site}:`)) winnerCache.delete(key);
224
- }
225
- }
226
- }, CDP_IDLE_TIMEOUT_MS);
227
- timer.unref();
228
- cdpIdleTimers.set(tool.site, timer);
229
- }
230
- if (!result.ok) {
231
- const text = formatToolError(result);
232
- return {
233
- isError: true,
234
- content: [{ type: 'text', text: `${text}\n(backend: ${usedBackend})` }],
235
- };
236
- }
237
- try {
238
- const cache = persistRuntimeBackendsCache({
272
+ const ladder = withPreferredFallbacks(
273
+ resolveLadder('auto', tool.preferredOrder),
274
+ tool.preferredOrder,
275
+ );
276
+ const executionLadder = applyExecutionFallbacks(ladder, tool.workflow.execution);
277
+ const { result, usedBackend, attempts } = await runWithLadder(
278
+ executionLadder,
239
279
  tool,
280
+ args,
240
281
  assetRoot,
241
- usedBackend,
242
- attempts,
243
- });
244
- if (cache) {
245
- tool.preferredOrder = cache.preferredOrder;
282
+ stealthCache,
283
+ {
284
+ cdpPool,
285
+ winnerCache,
286
+ skipBootstrapSplice: shouldSkipBootstrapSplice(tool.preferredOrder),
287
+ },
288
+ );
289
+ // Reset the idle timer for this site's pooled Chrome. The pool may be
290
+ // retained even when a CDP-backed workflow response failed; that keeps
291
+ // later calls warm, but still needs an idle reap.
292
+ if (cdpPool.has(tool.site)) {
293
+ const prev = cdpIdleTimers.get(tool.site);
294
+ if (prev) clearTimeout(prev);
295
+ const timer = setTimeout(() => {
296
+ const cf = cdpPool.get(tool.site);
297
+ if (cf) {
298
+ log(`closing idle CDP session for ${tool.site}`);
299
+ cf.close().catch(() => {});
300
+ cdpPool.delete(tool.site);
301
+ cdpIdleTimers.delete(tool.site);
302
+ // Drop this site's winner memo too: a memoized cdp-replay would now
303
+ // point at a closed Chrome and re-pay the cold relaunch.
304
+ for (const key of winnerCache.keys()) {
305
+ if (key.startsWith(`${tool.site}:`)) winnerCache.delete(key);
306
+ }
307
+ }
308
+ }, CDP_IDLE_TIMEOUT_MS);
309
+ timer.unref();
310
+ cdpIdleTimers.set(tool.site, timer);
311
+ }
312
+ if (!result.ok) {
313
+ const text = formatToolError(result);
314
+ return {
315
+ isError: true,
316
+ content: [{ type: 'text', text: `${text}\n(backend: ${usedBackend})` }],
317
+ };
318
+ }
319
+ try {
320
+ const cache = persistRuntimeBackendsCache({
321
+ tool,
322
+ assetRoot,
323
+ usedBackend,
324
+ attempts,
325
+ });
326
+ if (cache) {
327
+ tool.preferredOrder = cache.preferredOrder;
328
+ log(
329
+ ` learned backend order for ${tool.workflow.toolName}: ${cache.preferredOrder.join(' → ')}`,
330
+ );
331
+ }
332
+ } catch (err) {
246
333
  log(
247
- ` learned backend order for ${tool.workflow.toolName}: ${cache.preferredOrder.join(' ')}`,
334
+ ` warning: could not persist backend order for ${tool.workflow.toolName}: ${err instanceof Error ? err.message : String(err)}`,
248
335
  );
249
336
  }
250
- } catch (err) {
251
- log(
252
- ` warning: could not persist backend order for ${tool.workflow.toolName}: ${err instanceof Error ? err.message : String(err)}`,
253
- );
254
- }
255
- const text =
256
- typeof result.data === 'string' ? result.data : JSON.stringify(result.data, null, 2);
257
- return { content: [{ type: 'text', text: `${text}\n\n(backend: ${usedBackend})` }] };
258
- });
337
+ const text =
338
+ typeof result.data === 'string' ? result.data : JSON.stringify(result.data, null, 2);
339
+ return { content: [{ type: 'text', text: `${text}\n\n(backend: ${usedBackend})` }] };
340
+ },
341
+ {
342
+ minCallSpacingMs: siteMinCallSpacingMs.get(tool.site),
343
+ lastFinishedAt: siteLastFinishedAt,
344
+ },
345
+ );
259
346
  } catch (err) {
260
347
  const msg = err instanceof Error ? err.message : String(err);
261
348
  return { isError: true, content: [{ type: 'text', text: `[INTERNAL] ${msg}` }] };
@@ -340,14 +340,37 @@ function workflowHash(workflow: ResolvedTool['workflow']): string {
340
340
 
341
341
  function capabilityHash(workflow: ResolvedTool['workflow']): string {
342
342
  const caps = {
343
- bootstrap: Boolean(workflow.bootstrap),
344
- captures: workflow.requests.flatMap((r) =>
345
- (r.captures ?? []).map((c) => `${c.source}:${c.name}:${c.capability}`),
346
- ),
343
+ requestTransformModule: workflow.requestTransformModule ?? null,
344
+ bootstrap: workflow.bootstrap
345
+ ? {
346
+ url: workflow.bootstrap.url,
347
+ captures: workflow.bootstrap.captures ?? [],
348
+ }
349
+ : null,
350
+ requests: workflow.requests.map((r) => ({
351
+ method: r.method.toUpperCase(),
352
+ effect: r.effect ?? null,
353
+ stateRefs: stateRefsInWorkflowRequest(r),
354
+ captures: r.captures ?? [],
355
+ })),
347
356
  };
348
357
  return createHash('sha256').update(JSON.stringify(caps)).digest('hex');
349
358
  }
350
359
 
360
+ function stateRefsInWorkflowRequest(
361
+ request: ResolvedTool['workflow']['requests'][number],
362
+ ): string[] {
363
+ const refs = new Set<string>();
364
+ const scan = (text: string | undefined): void => {
365
+ if (!text) return;
366
+ for (const match of text.matchAll(/\$\{state\.([A-Za-z0-9_]+)\}/g)) refs.add(match[1] ?? '');
367
+ };
368
+ scan(request.url);
369
+ scan(request.body);
370
+ for (const value of Object.values(request.headers ?? {})) scan(value);
371
+ return [...refs].filter(Boolean).sort();
372
+ }
373
+
351
374
  /** Read backends.json with status information. Runtime can still fall back to
352
375
  * the default ladder, while status commands can explain why a cache was not
353
376
  * usable. */
@@ -367,8 +390,11 @@ export function loadBackendsCacheStatus(
367
390
  if (parsed.schemaVersion && parsed.schemaVersion >= 2 && parsed.workflowHash) {
368
391
  const workflowPath = pathResolve(toolDir, 'workflow.json');
369
392
  if (existsSync(workflowPath)) {
370
- const currentHash = workflowHashSync(readFileSync(workflowPath, 'utf8'));
371
- if (currentHash !== parsed.workflowHash) {
393
+ const current = workflowCacheHashesSync(readFileSync(workflowPath, 'utf8'));
394
+ if (
395
+ current.workflowHash !== parsed.workflowHash &&
396
+ (!parsed.capabilityHash || current.capabilityHash !== parsed.capabilityHash)
397
+ ) {
372
398
  const reason = 'workflow hash changed';
373
399
  if (opts.warn !== false) {
374
400
  process.stderr.write(
@@ -496,10 +522,15 @@ export function persistRuntimeBackendsCache(opts: {
496
522
  return cache;
497
523
  }
498
524
 
499
- function workflowHashSync(workflowJson: string): string {
500
- return createHash('sha256')
501
- .update(JSON.stringify(WorkflowSchema.parse(JSON.parse(workflowJson))))
502
- .digest('hex');
525
+ function workflowCacheHashesSync(workflowJson: string): {
526
+ workflowHash: string;
527
+ capabilityHash: string;
528
+ } {
529
+ const workflow = WorkflowSchema.parse(JSON.parse(workflowJson));
530
+ return {
531
+ workflowHash: workflowHash(workflow),
532
+ capabilityHash: capabilityHash(workflow),
533
+ };
503
534
  }
504
535
 
505
536
  function backendsCacheRemediation(site: string, toolName?: string): string {
@@ -133,18 +133,6 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
133
133
  }
134
134
  }
135
135
 
136
- // rawResponses feeds parser modules and the final return shape. responseSlots
137
- // keeps legacy request.extract aliases without replacing raw parser input.
138
- const responseSlots: ResponseSlot[] = [];
139
- const state: Record<string, unknown> = { ...(opts.initialState ?? {}) };
140
-
141
- // Per-execution mutable jar. Never shared across MCP/cron calls.
142
- const cookieJar = new RuntimeCookieJar(credentials.cookies);
143
- const liveCredentials: CredentialStore = { ...credentials, cookies: cookieJar.toJSON() };
144
- const stateCapabilities = collectStateCapabilities(opts.workflow);
145
- const dependencyPreflight = preflightStateDependencies(opts.workflow, state, stateCapabilities);
146
- if (!dependencyPreflight.ok) return dependencyPreflight.result;
147
-
148
136
  type TransformResult = string | { url: string; body?: string; headers?: Record<string, string> };
149
137
  let requestTransform:
150
138
  | ((
@@ -152,6 +140,7 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
152
140
  url: string,
153
141
  responses: unknown[],
154
142
  params?: Record<string, string | number | boolean>,
143
+ state?: Record<string, unknown>,
155
144
  ) => TransformResult)
156
145
  | null = null;
157
146
  if (opts.workflow.requestTransformModule && opts.workflowPath) {
@@ -161,12 +150,34 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
161
150
  opts.workflow.requestTransformModule,
162
151
  );
163
152
  const mod = await import(transformPath);
153
+ if (typeof mod.prepareParams === 'function') {
154
+ const prepared = await mod.prepareParams(params);
155
+ if (prepared && typeof prepared === 'object') {
156
+ for (const [k, v] of Object.entries(prepared)) {
157
+ if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {
158
+ params[k] = v;
159
+ }
160
+ }
161
+ }
162
+ }
164
163
  if (typeof mod.transform === 'function') requestTransform = mod.transform;
165
164
  } catch {
166
165
  // Non-fatal — proceed without transform.
167
166
  }
168
167
  }
169
168
 
169
+ // rawResponses feeds parser modules and the final return shape. responseSlots
170
+ // keeps legacy request.extract aliases without replacing raw parser input.
171
+ const responseSlots: ResponseSlot[] = [];
172
+ const state: Record<string, unknown> = { ...(opts.initialState ?? {}) };
173
+
174
+ // Per-execution mutable jar. Never shared across MCP/cron calls.
175
+ const cookieJar = new RuntimeCookieJar(credentials.cookies);
176
+ const liveCredentials: CredentialStore = { ...credentials, cookies: cookieJar.toJSON() };
177
+ const stateCapabilities = collectStateCapabilities(opts.workflow);
178
+ const dependencyPreflight = preflightStateDependencies(opts.workflow, state, stateCapabilities);
179
+ if (!dependencyPreflight.ok) return dependencyPreflight.result;
180
+
170
181
  for (let i = 0; i < opts.workflow.requests.length; i++) {
171
182
  const req = opts.workflow.requests[i];
172
183
  if (!req) continue;
@@ -190,6 +201,7 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
190
201
  subbed.url,
191
202
  responseSlots.map((s) => s.raw),
192
203
  params,
204
+ state,
193
205
  );
194
206
  if (typeof transformResult === 'string') {
195
207
  subbed.url = transformResult;
@@ -74,6 +74,20 @@ export interface TokenCache {
74
74
  /** Lower-cased response headers of the bootstrap navigation, so callers can
75
75
  * satisfy `response_header` bootstrap captures. Optional. */
76
76
  bootstrapResponseHeaders?: Record<string, string>;
77
+ /** Browser-generated requests observed while the bootstrap page loaded. Lets
78
+ * workflows capture replay headers minted by page JavaScript for later XHRs. */
79
+ observedRequests?: Array<{
80
+ method: string;
81
+ url: string;
82
+ headers: Record<string, string>;
83
+ body?: string;
84
+ resourceType?: string;
85
+ response?: {
86
+ status: number;
87
+ headers: Record<string, string>;
88
+ body?: string;
89
+ };
90
+ }>;
77
91
  /** The bootstrap browser's actual `navigator.userAgent`, captured live. Reused
78
92
  * for the post-bootstrap fetches so the wire UA matches the binary that minted
79
93
  * the cookies (and its client hints below). Absent if capture failed or on
@@ -509,6 +523,42 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
509
523
  });
510
524
 
511
525
  const page = await context.newPage();
526
+ const observedRequests: NonNullable<TokenCache['observedRequests']> = [];
527
+ const observedByRequest = new Map<
528
+ unknown,
529
+ NonNullable<TokenCache['observedRequests']>[number]
530
+ >();
531
+ const pendingResponseCaptures = new Set<Promise<void>>();
532
+ page.on('request', (request) => {
533
+ const entry: NonNullable<TokenCache['observedRequests']>[number] = {
534
+ method: request.method(),
535
+ url: request.url(),
536
+ headers: request.headers(),
537
+ ...(request.postData() !== null ? { body: request.postData() as string } : {}),
538
+ resourceType: request.resourceType(),
539
+ };
540
+ observedRequests.push(entry);
541
+ observedByRequest.set(request, entry);
542
+ if (observedRequests.length > 100) observedRequests.shift();
543
+ });
544
+ page.on('response', (response) => {
545
+ const entry = observedByRequest.get(response.request());
546
+ if (!entry || !shouldCaptureObservedBody(entry)) return;
547
+ const pending = (async () => {
548
+ try {
549
+ const headers = await response.allHeaders();
550
+ entry.response = {
551
+ status: response.status(),
552
+ headers,
553
+ body: await response.text(),
554
+ };
555
+ } catch {
556
+ // best-effort — response reuse simply won't match without a body
557
+ }
558
+ })();
559
+ pendingResponseCaptures.add(pending);
560
+ void pending.finally(() => pendingResponseCaptures.delete(pending));
561
+ });
512
562
  // Patch navigator.webdriver ONLY on the vanilla-Playwright fallback. When the
513
563
  // stealth plugin is active it already removes the property natively (a real
514
564
  // Chrome lacks it); stacking our Object.defineProperty on top leaves a
@@ -662,6 +712,7 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
662
712
  );
663
713
 
664
714
  await page.waitForTimeout(300);
715
+ await settlePendingResponseCaptures(pendingResponseCaptures, 2_000);
665
716
 
666
717
  // Capture cookies scoped to the recording's registrable domain
667
718
  // (eTLD+1). Naive `.split('.').slice(-2)` was wrong for multi-part
@@ -686,6 +737,7 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
686
737
  bootstrappedAt: Date.now(),
687
738
  bootstrapHtml,
688
739
  bootstrapResponseHeaders,
740
+ observedRequests,
689
741
  userAgent: capturedUserAgent,
690
742
  clientHints,
691
743
  };
@@ -694,6 +746,25 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
694
746
  }
695
747
  }
696
748
 
749
+ function shouldCaptureObservedBody(
750
+ entry: NonNullable<TokenCache['observedRequests']>[number],
751
+ ): boolean {
752
+ const type = entry.resourceType?.toLowerCase();
753
+ if (type && type !== 'xhr' && type !== 'fetch') return false;
754
+ return true;
755
+ }
756
+
757
+ async function settlePendingResponseCaptures(
758
+ pending: Set<Promise<void>>,
759
+ timeoutMs: number,
760
+ ): Promise<void> {
761
+ if (pending.size === 0) return;
762
+ await Promise.race([
763
+ Promise.allSettled([...pending]),
764
+ new Promise<void>((resolve) => setTimeout(resolve, timeoutMs)),
765
+ ]);
766
+ }
767
+
697
768
  async function defaultUnderlyingFetch(
698
769
  url: string,
699
770
  init: FetchInit,
@@ -57,6 +57,19 @@ export function loadCachedToken(siteDir: string, maxAgeSeconds: number): TokenCa
57
57
  cookies: raw.cookies,
58
58
  sensorHeaders: raw.sensorHeaders,
59
59
  bootstrappedAt: raw.bootstrappedAt,
60
+ ...(typeof raw.bootstrapHtml === 'string' ? { bootstrapHtml: raw.bootstrapHtml } : {}),
61
+ ...(raw.bootstrapResponseHeaders &&
62
+ typeof raw.bootstrapResponseHeaders === 'object' &&
63
+ !Array.isArray(raw.bootstrapResponseHeaders)
64
+ ? { bootstrapResponseHeaders: raw.bootstrapResponseHeaders as Record<string, string> }
65
+ : {}),
66
+ ...(Array.isArray(raw.observedRequests)
67
+ ? { observedRequests: stripDurableObservedResponseBodies(raw.observedRequests) }
68
+ : {}),
69
+ ...(typeof raw.userAgent === 'string' ? { userAgent: raw.userAgent } : {}),
70
+ ...(raw.clientHints && typeof raw.clientHints === 'object' && !Array.isArray(raw.clientHints)
71
+ ? { clientHints: raw.clientHints as Record<string, string> }
72
+ : {}),
60
73
  };
61
74
  } catch {
62
75
  return null;
@@ -69,7 +82,16 @@ export function saveCachedToken(siteDir: string, token: TokenCache): void {
69
82
  mkdirSync(siteDir, { recursive: true });
70
83
  const p = tokenPath(siteDir);
71
84
  const tmp = `${p}.${process.pid}.tmp`;
72
- writeFileSync(tmp, `${JSON.stringify(token)}\n`, 'utf8');
85
+ writeFileSync(
86
+ tmp,
87
+ `${JSON.stringify({
88
+ ...token,
89
+ ...(token.observedRequests
90
+ ? { observedRequests: stripDurableObservedResponseBodies(token.observedRequests) }
91
+ : {}),
92
+ })}\n`,
93
+ 'utf8',
94
+ );
73
95
  renameSync(tmp, p);
74
96
  } catch (err) {
75
97
  log(
@@ -78,6 +100,21 @@ export function saveCachedToken(siteDir: string, token: TokenCache): void {
78
100
  }
79
101
  }
80
102
 
103
+ function stripDurableObservedResponseBodies(
104
+ observedRequests: NonNullable<TokenCache['observedRequests']>,
105
+ ): NonNullable<TokenCache['observedRequests']> {
106
+ return observedRequests.map((req) => {
107
+ if (!req.response || req.response.body === undefined) return req;
108
+ return {
109
+ ...req,
110
+ response: {
111
+ status: req.response.status,
112
+ headers: req.response.headers,
113
+ },
114
+ };
115
+ });
116
+ }
117
+
81
118
  /** Remove a cached token (best-effort) — call when a site's teach run ends. */
82
119
  export function clearCachedToken(siteDir: string): void {
83
120
  try {
@@ -221,6 +221,42 @@ const BootstrapCaptureSchema = z.discriminatedUnion('source', [
221
221
  header: z.string(),
222
222
  mode: z.enum(['first', 'last', 'all']).optional().default('last'),
223
223
  }),
224
+ /** Read a header from a browser-generated request observed during bootstrap
225
+ * navigation. This is for replay tokens produced by page JavaScript for an
226
+ * XHR/fetch request, where neither HTML nor response headers contain the
227
+ * value. `urlPattern` is a JavaScript regular expression tested against the
228
+ * observed request URL. */
229
+ CaptureCommonSchema.extend({
230
+ source: z.literal('request_header'),
231
+ header: z.string(),
232
+ method: z.string().optional(),
233
+ urlPattern: z.string().optional(),
234
+ mode: z.enum(['first', 'last', 'all']).optional().default('last'),
235
+ }),
236
+ /** Read a value from a browser-generated request URL observed during
237
+ * bootstrap navigation. This is for per-page request ids or URL tokens that
238
+ * are generated alongside browser XHR/fetch calls. `urlPattern` selects the
239
+ * observed request; `pattern` extracts the value from that request URL. */
240
+ CaptureCommonSchema.extend({
241
+ source: z.literal('request_url_regex'),
242
+ pattern: z.string(),
243
+ group: z.number().int().nonnegative().optional().default(1),
244
+ method: z.string().optional(),
245
+ urlPattern: z.string().optional(),
246
+ mode: z.enum(['first', 'last', 'all']).optional().default('last'),
247
+ }),
248
+ /** Read a value from a browser-generated request body observed during
249
+ * bootstrap navigation. This is for page-minted replay tokens embedded in
250
+ * POST bodies (for example form-encoded RPC envelopes) where neither the URL
251
+ * nor request headers carry the value. */
252
+ CaptureCommonSchema.extend({
253
+ source: z.literal('request_body_regex'),
254
+ pattern: z.string(),
255
+ group: z.number().int().nonnegative().optional().default(1),
256
+ method: z.string().optional(),
257
+ urlPattern: z.string().optional(),
258
+ mode: z.enum(['first', 'last', 'all']).optional().default('last'),
259
+ }),
224
260
  ]);
225
261
  export type BootstrapCapture = z.infer<typeof BootstrapCaptureSchema>;
226
262
 
@@ -295,6 +331,15 @@ export const WorkflowSchema = z.object({
295
331
  exhaustedBackends: z.array(z.string()),
296
332
  })
297
333
  .optional(),
334
+ /** Optional runtime hints for provider-specific replay constraints. */
335
+ execution: z
336
+ .object({
337
+ /** Minimum end-to-start spacing between MCP calls for the same site. */
338
+ minCallSpacingMs: z.number().int().nonnegative().optional(),
339
+ /** Do not enter the DOM playbook rung after API/browser-backed rungs fail. */
340
+ skipPlaybookFallback: z.boolean().optional(),
341
+ })
342
+ .optional(),
298
343
  });
299
344
  export type Workflow = z.infer<typeof WorkflowSchema>;
300
345