imprint-mcp 0.4.6 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/examples/google-flights/README.md +2 -0
- package/examples/google-flights/_shared/flights_request.ts +10 -4
- package/examples/google-flights/get_flight_booking_details/index.ts +5 -2
- package/examples/google-flights/get_flight_booking_details/parser.ts +8 -0
- package/examples/google-flights/get_flight_booking_details/workflow.json +5 -2
- package/examples/google-flights/get_flight_calendar_prices/index.ts +5 -2
- package/examples/google-flights/get_flight_calendar_prices/parser.ts +8 -4
- package/examples/google-flights/get_flight_calendar_prices/workflow.json +5 -2
- package/examples/google-flights/lookup_airport/index.ts +3 -0
- package/examples/google-flights/lookup_airport/parser.ts +8 -1
- package/examples/google-flights/lookup_airport/workflow.json +3 -0
- package/examples/google-flights/search_flights/index.ts +63 -8
- package/examples/google-flights/search_flights/parser.ts +10 -0
- package/examples/google-flights/search_flights/request-transform.ts +45 -0
- package/examples/google-flights/search_flights/workflow.json +63 -8
- package/package.json +1 -1
- package/prompts/build-planning.md +1 -1
- package/prompts/compile-agent.md +5 -3
- package/prompts/prereq-builder.md +2 -1
- package/src/imprint/backend-ladder.ts +436 -43
- package/src/imprint/cdp-browser-fetch.ts +176 -6
- package/src/imprint/cdp-jar-cache.ts +105 -10
- package/src/imprint/compile-tools.ts +2 -2
- package/src/imprint/mcp-server.ts +152 -65
- package/src/imprint/probe-backends.ts +41 -10
- package/src/imprint/runtime.ts +24 -12
- package/src/imprint/stealth-fetch.ts +71 -0
- package/src/imprint/stealth-token-cache.ts +38 -1
- package/src/imprint/types.ts +45 -0
|
@@ -87,15 +87,82 @@ export function buildJsonSchema(parameters: WorkflowParameter[]): Tool['inputSch
|
|
|
87
87
|
};
|
|
88
88
|
}
|
|
89
89
|
|
|
90
|
+
export function shouldSkipBootstrapSplice(preferredOrder?: ConcreteBackend[]): boolean {
|
|
91
|
+
return Boolean(preferredOrder?.length && !preferredOrder.includes('fetch-bootstrap'));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export function withPreferredFallbacks(
|
|
95
|
+
ladder: ConcreteBackend[],
|
|
96
|
+
preferredOrder?: ConcreteBackend[],
|
|
97
|
+
): ConcreteBackend[] {
|
|
98
|
+
const next = [...ladder];
|
|
99
|
+
if (preferredOrder?.includes('fetch-bootstrap') && !next.includes('cdp-replay')) {
|
|
100
|
+
const idx = next.indexOf('fetch-bootstrap');
|
|
101
|
+
if (idx !== -1) next.splice(idx + 1, 0, 'cdp-replay');
|
|
102
|
+
}
|
|
103
|
+
if (
|
|
104
|
+
(preferredOrder?.includes('fetch-bootstrap') || preferredOrder?.includes('cdp-replay')) &&
|
|
105
|
+
!next.includes('stealth-fetch')
|
|
106
|
+
) {
|
|
107
|
+
const cdpIdx = next.indexOf('cdp-replay');
|
|
108
|
+
const fbIdx = next.indexOf('fetch-bootstrap');
|
|
109
|
+
const idx = cdpIdx !== -1 ? cdpIdx : fbIdx;
|
|
110
|
+
if (idx !== -1) next.splice(idx + 1, 0, 'stealth-fetch');
|
|
111
|
+
}
|
|
112
|
+
return next;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export function applyExecutionFallbacks(
|
|
116
|
+
ladder: ConcreteBackend[],
|
|
117
|
+
execution?: { skipPlaybookFallback?: boolean },
|
|
118
|
+
): ConcreteBackend[] {
|
|
119
|
+
if (!execution?.skipPlaybookFallback || ladder.length <= 1) return ladder;
|
|
120
|
+
return ladder.filter((backend) => backend !== 'playbook');
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export function buildSiteSpacingMap(
|
|
124
|
+
tools: Array<{ site: string; workflow: { execution?: { minCallSpacingMs?: number } } }>,
|
|
125
|
+
): Map<string, number> {
|
|
126
|
+
const out = new Map<string, number>();
|
|
127
|
+
for (const tool of tools) {
|
|
128
|
+
const spacing = Math.max(0, tool.workflow.execution?.minCallSpacingMs ?? 0);
|
|
129
|
+
if (spacing > (out.get(tool.site) ?? 0)) out.set(tool.site, spacing);
|
|
130
|
+
}
|
|
131
|
+
return out;
|
|
132
|
+
}
|
|
133
|
+
|
|
90
134
|
const log = createLog('mcp');
|
|
91
135
|
|
|
92
136
|
export async function runSerializedBySite<T>(
|
|
93
137
|
queues: Map<string, Promise<void>>,
|
|
94
138
|
site: string,
|
|
95
139
|
task: () => Promise<T>,
|
|
140
|
+
opts: {
|
|
141
|
+
minCallSpacingMs?: number;
|
|
142
|
+
lastFinishedAt?: Map<string, number>;
|
|
143
|
+
now?: () => number;
|
|
144
|
+
sleep?: (ms: number) => Promise<void>;
|
|
145
|
+
} = {},
|
|
96
146
|
): Promise<T> {
|
|
97
147
|
const previous = queues.get(site) ?? Promise.resolve();
|
|
98
|
-
const run = previous
|
|
148
|
+
const run = previous
|
|
149
|
+
.catch(() => undefined)
|
|
150
|
+
.then(async () => {
|
|
151
|
+
const spacingMs = Math.max(0, opts.minCallSpacingMs ?? 0);
|
|
152
|
+
const lastFinishedAt = opts.lastFinishedAt;
|
|
153
|
+
const now = opts.now ?? Date.now;
|
|
154
|
+
const sleep = opts.sleep ?? ((ms: number) => new Promise<void>((r) => setTimeout(r, ms)));
|
|
155
|
+
if (spacingMs > 0 && lastFinishedAt) {
|
|
156
|
+
const elapsed = now() - (lastFinishedAt.get(site) ?? 0);
|
|
157
|
+
const waitMs = spacingMs - elapsed;
|
|
158
|
+
if (waitMs > 0) await sleep(waitMs);
|
|
159
|
+
}
|
|
160
|
+
try {
|
|
161
|
+
return await task();
|
|
162
|
+
} finally {
|
|
163
|
+
lastFinishedAt?.set(site, now());
|
|
164
|
+
}
|
|
165
|
+
});
|
|
99
166
|
const tail = run.then(
|
|
100
167
|
() => undefined,
|
|
101
168
|
() => undefined,
|
|
@@ -119,7 +186,7 @@ function buildServer(
|
|
|
119
186
|
{
|
|
120
187
|
capabilities: { tools: {} },
|
|
121
188
|
instructions:
|
|
122
|
-
'Imprint runs deterministic workflows captured from real browser sessions. Tools prefer fetch API replay,
|
|
189
|
+
'Imprint runs deterministic workflows captured from real browser sessions. Tools prefer fetch API replay, front-load cdp-replay when a workflow needs reusable live-browser request state, may use gated fetch-bootstrap for one-shot browser-minted state, then stealth-fetch for bot-defense state, and playbook only for full DOM interaction. Error codes: AUTH_EXPIRED (401, run `imprint login <site>`); STATE_MISSING (required cookie/state was unavailable or ambiguous); FORBIDDEN (403); RATE_LIMITED (429, back off); BAD_RESPONSE (other 4xx/5xx); NETWORK (fetch failed); UNKNOWN (everything else).',
|
|
123
190
|
},
|
|
124
191
|
);
|
|
125
192
|
|
|
@@ -150,6 +217,8 @@ function buildServer(
|
|
|
150
217
|
// make Google Flights return fast empty result sets. Keep same-site execution
|
|
151
218
|
// sequential while allowing unrelated sites to proceed independently.
|
|
152
219
|
const siteExecutionQueues = new Map<string, Promise<void>>();
|
|
220
|
+
const siteLastFinishedAt = new Map<string, number>();
|
|
221
|
+
const siteMinCallSpacingMs = buildSiteSpacingMap(tools);
|
|
153
222
|
|
|
154
223
|
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
155
224
|
tools: tools.map((t) => ({
|
|
@@ -187,75 +256,93 @@ function buildServer(
|
|
|
187
256
|
>;
|
|
188
257
|
|
|
189
258
|
try {
|
|
190
|
-
return await runSerializedBySite(
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
259
|
+
return await runSerializedBySite(
|
|
260
|
+
siteExecutionQueues,
|
|
261
|
+
tool.site,
|
|
262
|
+
async () => {
|
|
263
|
+
// Audit-only pacing: when the audit harness sets IMPRINT_AUDIT_PACING_MS,
|
|
264
|
+
// sleep before each actual workflow execution so same-site queued calls
|
|
265
|
+
// stay spaced out instead of all waiting concurrently before the queue.
|
|
266
|
+
// Unset in production -> no delay.
|
|
267
|
+
const pacingMs = Number(process.env.IMPRINT_AUDIT_PACING_MS);
|
|
268
|
+
if (Number.isFinite(pacingMs) && pacingMs > 0) {
|
|
269
|
+
await new Promise((r) => setTimeout(r, pacingMs));
|
|
270
|
+
}
|
|
199
271
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
{ cdpPool, winnerCache, skipBootstrapSplice: Boolean(tool.preferredOrder?.length) },
|
|
208
|
-
);
|
|
209
|
-
// Reset the idle timer for this site's pooled Chrome.
|
|
210
|
-
if (result.ok && usedBackend === 'cdp-replay' && cdpPool.has(tool.site)) {
|
|
211
|
-
const prev = cdpIdleTimers.get(tool.site);
|
|
212
|
-
if (prev) clearTimeout(prev);
|
|
213
|
-
const timer = setTimeout(() => {
|
|
214
|
-
const cf = cdpPool.get(tool.site);
|
|
215
|
-
if (cf) {
|
|
216
|
-
log(`closing idle CDP session for ${tool.site}`);
|
|
217
|
-
cf.close().catch(() => {});
|
|
218
|
-
cdpPool.delete(tool.site);
|
|
219
|
-
cdpIdleTimers.delete(tool.site);
|
|
220
|
-
// Drop this site's winner memo too: a memoized cdp-replay would now
|
|
221
|
-
// point at a closed Chrome and re-pay the cold relaunch.
|
|
222
|
-
for (const key of winnerCache.keys()) {
|
|
223
|
-
if (key.startsWith(`${tool.site}:`)) winnerCache.delete(key);
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
}, CDP_IDLE_TIMEOUT_MS);
|
|
227
|
-
timer.unref();
|
|
228
|
-
cdpIdleTimers.set(tool.site, timer);
|
|
229
|
-
}
|
|
230
|
-
if (!result.ok) {
|
|
231
|
-
const text = formatToolError(result);
|
|
232
|
-
return {
|
|
233
|
-
isError: true,
|
|
234
|
-
content: [{ type: 'text', text: `${text}\n(backend: ${usedBackend})` }],
|
|
235
|
-
};
|
|
236
|
-
}
|
|
237
|
-
try {
|
|
238
|
-
const cache = persistRuntimeBackendsCache({
|
|
272
|
+
const ladder = withPreferredFallbacks(
|
|
273
|
+
resolveLadder('auto', tool.preferredOrder),
|
|
274
|
+
tool.preferredOrder,
|
|
275
|
+
);
|
|
276
|
+
const executionLadder = applyExecutionFallbacks(ladder, tool.workflow.execution);
|
|
277
|
+
const { result, usedBackend, attempts } = await runWithLadder(
|
|
278
|
+
executionLadder,
|
|
239
279
|
tool,
|
|
280
|
+
args,
|
|
240
281
|
assetRoot,
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
282
|
+
stealthCache,
|
|
283
|
+
{
|
|
284
|
+
cdpPool,
|
|
285
|
+
winnerCache,
|
|
286
|
+
skipBootstrapSplice: shouldSkipBootstrapSplice(tool.preferredOrder),
|
|
287
|
+
},
|
|
288
|
+
);
|
|
289
|
+
// Reset the idle timer for this site's pooled Chrome. The pool may be
|
|
290
|
+
// retained even when a CDP-backed workflow response failed; that keeps
|
|
291
|
+
// later calls warm, but still needs an idle reap.
|
|
292
|
+
if (cdpPool.has(tool.site)) {
|
|
293
|
+
const prev = cdpIdleTimers.get(tool.site);
|
|
294
|
+
if (prev) clearTimeout(prev);
|
|
295
|
+
const timer = setTimeout(() => {
|
|
296
|
+
const cf = cdpPool.get(tool.site);
|
|
297
|
+
if (cf) {
|
|
298
|
+
log(`closing idle CDP session for ${tool.site}`);
|
|
299
|
+
cf.close().catch(() => {});
|
|
300
|
+
cdpPool.delete(tool.site);
|
|
301
|
+
cdpIdleTimers.delete(tool.site);
|
|
302
|
+
// Drop this site's winner memo too: a memoized cdp-replay would now
|
|
303
|
+
// point at a closed Chrome and re-pay the cold relaunch.
|
|
304
|
+
for (const key of winnerCache.keys()) {
|
|
305
|
+
if (key.startsWith(`${tool.site}:`)) winnerCache.delete(key);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}, CDP_IDLE_TIMEOUT_MS);
|
|
309
|
+
timer.unref();
|
|
310
|
+
cdpIdleTimers.set(tool.site, timer);
|
|
311
|
+
}
|
|
312
|
+
if (!result.ok) {
|
|
313
|
+
const text = formatToolError(result);
|
|
314
|
+
return {
|
|
315
|
+
isError: true,
|
|
316
|
+
content: [{ type: 'text', text: `${text}\n(backend: ${usedBackend})` }],
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
try {
|
|
320
|
+
const cache = persistRuntimeBackendsCache({
|
|
321
|
+
tool,
|
|
322
|
+
assetRoot,
|
|
323
|
+
usedBackend,
|
|
324
|
+
attempts,
|
|
325
|
+
});
|
|
326
|
+
if (cache) {
|
|
327
|
+
tool.preferredOrder = cache.preferredOrder;
|
|
328
|
+
log(
|
|
329
|
+
` learned backend order for ${tool.workflow.toolName}: ${cache.preferredOrder.join(' → ')}`,
|
|
330
|
+
);
|
|
331
|
+
}
|
|
332
|
+
} catch (err) {
|
|
246
333
|
log(
|
|
247
|
-
`
|
|
334
|
+
` warning: could not persist backend order for ${tool.workflow.toolName}: ${err instanceof Error ? err.message : String(err)}`,
|
|
248
335
|
);
|
|
249
336
|
}
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
337
|
+
const text =
|
|
338
|
+
typeof result.data === 'string' ? result.data : JSON.stringify(result.data, null, 2);
|
|
339
|
+
return { content: [{ type: 'text', text: `${text}\n\n(backend: ${usedBackend})` }] };
|
|
340
|
+
},
|
|
341
|
+
{
|
|
342
|
+
minCallSpacingMs: siteMinCallSpacingMs.get(tool.site),
|
|
343
|
+
lastFinishedAt: siteLastFinishedAt,
|
|
344
|
+
},
|
|
345
|
+
);
|
|
259
346
|
} catch (err) {
|
|
260
347
|
const msg = err instanceof Error ? err.message : String(err);
|
|
261
348
|
return { isError: true, content: [{ type: 'text', text: `[INTERNAL] ${msg}` }] };
|
|
@@ -340,14 +340,37 @@ function workflowHash(workflow: ResolvedTool['workflow']): string {
|
|
|
340
340
|
|
|
341
341
|
function capabilityHash(workflow: ResolvedTool['workflow']): string {
|
|
342
342
|
const caps = {
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
343
|
+
requestTransformModule: workflow.requestTransformModule ?? null,
|
|
344
|
+
bootstrap: workflow.bootstrap
|
|
345
|
+
? {
|
|
346
|
+
url: workflow.bootstrap.url,
|
|
347
|
+
captures: workflow.bootstrap.captures ?? [],
|
|
348
|
+
}
|
|
349
|
+
: null,
|
|
350
|
+
requests: workflow.requests.map((r) => ({
|
|
351
|
+
method: r.method.toUpperCase(),
|
|
352
|
+
effect: r.effect ?? null,
|
|
353
|
+
stateRefs: stateRefsInWorkflowRequest(r),
|
|
354
|
+
captures: r.captures ?? [],
|
|
355
|
+
})),
|
|
347
356
|
};
|
|
348
357
|
return createHash('sha256').update(JSON.stringify(caps)).digest('hex');
|
|
349
358
|
}
|
|
350
359
|
|
|
360
|
+
function stateRefsInWorkflowRequest(
|
|
361
|
+
request: ResolvedTool['workflow']['requests'][number],
|
|
362
|
+
): string[] {
|
|
363
|
+
const refs = new Set<string>();
|
|
364
|
+
const scan = (text: string | undefined): void => {
|
|
365
|
+
if (!text) return;
|
|
366
|
+
for (const match of text.matchAll(/\$\{state\.([A-Za-z0-9_]+)\}/g)) refs.add(match[1] ?? '');
|
|
367
|
+
};
|
|
368
|
+
scan(request.url);
|
|
369
|
+
scan(request.body);
|
|
370
|
+
for (const value of Object.values(request.headers ?? {})) scan(value);
|
|
371
|
+
return [...refs].filter(Boolean).sort();
|
|
372
|
+
}
|
|
373
|
+
|
|
351
374
|
/** Read backends.json with status information. Runtime can still fall back to
|
|
352
375
|
* the default ladder, while status commands can explain why a cache was not
|
|
353
376
|
* usable. */
|
|
@@ -367,8 +390,11 @@ export function loadBackendsCacheStatus(
|
|
|
367
390
|
if (parsed.schemaVersion && parsed.schemaVersion >= 2 && parsed.workflowHash) {
|
|
368
391
|
const workflowPath = pathResolve(toolDir, 'workflow.json');
|
|
369
392
|
if (existsSync(workflowPath)) {
|
|
370
|
-
const
|
|
371
|
-
if (
|
|
393
|
+
const current = workflowCacheHashesSync(readFileSync(workflowPath, 'utf8'));
|
|
394
|
+
if (
|
|
395
|
+
current.workflowHash !== parsed.workflowHash &&
|
|
396
|
+
(!parsed.capabilityHash || current.capabilityHash !== parsed.capabilityHash)
|
|
397
|
+
) {
|
|
372
398
|
const reason = 'workflow hash changed';
|
|
373
399
|
if (opts.warn !== false) {
|
|
374
400
|
process.stderr.write(
|
|
@@ -496,10 +522,15 @@ export function persistRuntimeBackendsCache(opts: {
|
|
|
496
522
|
return cache;
|
|
497
523
|
}
|
|
498
524
|
|
|
499
|
-
function
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
525
|
+
function workflowCacheHashesSync(workflowJson: string): {
|
|
526
|
+
workflowHash: string;
|
|
527
|
+
capabilityHash: string;
|
|
528
|
+
} {
|
|
529
|
+
const workflow = WorkflowSchema.parse(JSON.parse(workflowJson));
|
|
530
|
+
return {
|
|
531
|
+
workflowHash: workflowHash(workflow),
|
|
532
|
+
capabilityHash: capabilityHash(workflow),
|
|
533
|
+
};
|
|
503
534
|
}
|
|
504
535
|
|
|
505
536
|
function backendsCacheRemediation(site: string, toolName?: string): string {
|
package/src/imprint/runtime.ts
CHANGED
|
@@ -133,18 +133,6 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
|
|
|
133
133
|
}
|
|
134
134
|
}
|
|
135
135
|
|
|
136
|
-
// rawResponses feeds parser modules and the final return shape. responseSlots
|
|
137
|
-
// keeps legacy request.extract aliases without replacing raw parser input.
|
|
138
|
-
const responseSlots: ResponseSlot[] = [];
|
|
139
|
-
const state: Record<string, unknown> = { ...(opts.initialState ?? {}) };
|
|
140
|
-
|
|
141
|
-
// Per-execution mutable jar. Never shared across MCP/cron calls.
|
|
142
|
-
const cookieJar = new RuntimeCookieJar(credentials.cookies);
|
|
143
|
-
const liveCredentials: CredentialStore = { ...credentials, cookies: cookieJar.toJSON() };
|
|
144
|
-
const stateCapabilities = collectStateCapabilities(opts.workflow);
|
|
145
|
-
const dependencyPreflight = preflightStateDependencies(opts.workflow, state, stateCapabilities);
|
|
146
|
-
if (!dependencyPreflight.ok) return dependencyPreflight.result;
|
|
147
|
-
|
|
148
136
|
type TransformResult = string | { url: string; body?: string; headers?: Record<string, string> };
|
|
149
137
|
let requestTransform:
|
|
150
138
|
| ((
|
|
@@ -152,6 +140,7 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
|
|
|
152
140
|
url: string,
|
|
153
141
|
responses: unknown[],
|
|
154
142
|
params?: Record<string, string | number | boolean>,
|
|
143
|
+
state?: Record<string, unknown>,
|
|
155
144
|
) => TransformResult)
|
|
156
145
|
| null = null;
|
|
157
146
|
if (opts.workflow.requestTransformModule && opts.workflowPath) {
|
|
@@ -161,12 +150,34 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
|
|
|
161
150
|
opts.workflow.requestTransformModule,
|
|
162
151
|
);
|
|
163
152
|
const mod = await import(transformPath);
|
|
153
|
+
if (typeof mod.prepareParams === 'function') {
|
|
154
|
+
const prepared = await mod.prepareParams(params);
|
|
155
|
+
if (prepared && typeof prepared === 'object') {
|
|
156
|
+
for (const [k, v] of Object.entries(prepared)) {
|
|
157
|
+
if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {
|
|
158
|
+
params[k] = v;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
164
163
|
if (typeof mod.transform === 'function') requestTransform = mod.transform;
|
|
165
164
|
} catch {
|
|
166
165
|
// Non-fatal — proceed without transform.
|
|
167
166
|
}
|
|
168
167
|
}
|
|
169
168
|
|
|
169
|
+
// rawResponses feeds parser modules and the final return shape. responseSlots
|
|
170
|
+
// keeps legacy request.extract aliases without replacing raw parser input.
|
|
171
|
+
const responseSlots: ResponseSlot[] = [];
|
|
172
|
+
const state: Record<string, unknown> = { ...(opts.initialState ?? {}) };
|
|
173
|
+
|
|
174
|
+
// Per-execution mutable jar. Never shared across MCP/cron calls.
|
|
175
|
+
const cookieJar = new RuntimeCookieJar(credentials.cookies);
|
|
176
|
+
const liveCredentials: CredentialStore = { ...credentials, cookies: cookieJar.toJSON() };
|
|
177
|
+
const stateCapabilities = collectStateCapabilities(opts.workflow);
|
|
178
|
+
const dependencyPreflight = preflightStateDependencies(opts.workflow, state, stateCapabilities);
|
|
179
|
+
if (!dependencyPreflight.ok) return dependencyPreflight.result;
|
|
180
|
+
|
|
170
181
|
for (let i = 0; i < opts.workflow.requests.length; i++) {
|
|
171
182
|
const req = opts.workflow.requests[i];
|
|
172
183
|
if (!req) continue;
|
|
@@ -190,6 +201,7 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
|
|
|
190
201
|
subbed.url,
|
|
191
202
|
responseSlots.map((s) => s.raw),
|
|
192
203
|
params,
|
|
204
|
+
state,
|
|
193
205
|
);
|
|
194
206
|
if (typeof transformResult === 'string') {
|
|
195
207
|
subbed.url = transformResult;
|
|
@@ -74,6 +74,20 @@ export interface TokenCache {
|
|
|
74
74
|
/** Lower-cased response headers of the bootstrap navigation, so callers can
|
|
75
75
|
* satisfy `response_header` bootstrap captures. Optional. */
|
|
76
76
|
bootstrapResponseHeaders?: Record<string, string>;
|
|
77
|
+
/** Browser-generated requests observed while the bootstrap page loaded. Lets
|
|
78
|
+
* workflows capture replay headers minted by page JavaScript for later XHRs. */
|
|
79
|
+
observedRequests?: Array<{
|
|
80
|
+
method: string;
|
|
81
|
+
url: string;
|
|
82
|
+
headers: Record<string, string>;
|
|
83
|
+
body?: string;
|
|
84
|
+
resourceType?: string;
|
|
85
|
+
response?: {
|
|
86
|
+
status: number;
|
|
87
|
+
headers: Record<string, string>;
|
|
88
|
+
body?: string;
|
|
89
|
+
};
|
|
90
|
+
}>;
|
|
77
91
|
/** The bootstrap browser's actual `navigator.userAgent`, captured live. Reused
|
|
78
92
|
* for the post-bootstrap fetches so the wire UA matches the binary that minted
|
|
79
93
|
* the cookies (and its client hints below). Absent if capture failed or on
|
|
@@ -509,6 +523,42 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
|
|
|
509
523
|
});
|
|
510
524
|
|
|
511
525
|
const page = await context.newPage();
|
|
526
|
+
const observedRequests: NonNullable<TokenCache['observedRequests']> = [];
|
|
527
|
+
const observedByRequest = new Map<
|
|
528
|
+
unknown,
|
|
529
|
+
NonNullable<TokenCache['observedRequests']>[number]
|
|
530
|
+
>();
|
|
531
|
+
const pendingResponseCaptures = new Set<Promise<void>>();
|
|
532
|
+
page.on('request', (request) => {
|
|
533
|
+
const entry: NonNullable<TokenCache['observedRequests']>[number] = {
|
|
534
|
+
method: request.method(),
|
|
535
|
+
url: request.url(),
|
|
536
|
+
headers: request.headers(),
|
|
537
|
+
...(request.postData() !== null ? { body: request.postData() as string } : {}),
|
|
538
|
+
resourceType: request.resourceType(),
|
|
539
|
+
};
|
|
540
|
+
observedRequests.push(entry);
|
|
541
|
+
observedByRequest.set(request, entry);
|
|
542
|
+
if (observedRequests.length > 100) observedRequests.shift();
|
|
543
|
+
});
|
|
544
|
+
page.on('response', (response) => {
|
|
545
|
+
const entry = observedByRequest.get(response.request());
|
|
546
|
+
if (!entry || !shouldCaptureObservedBody(entry)) return;
|
|
547
|
+
const pending = (async () => {
|
|
548
|
+
try {
|
|
549
|
+
const headers = await response.allHeaders();
|
|
550
|
+
entry.response = {
|
|
551
|
+
status: response.status(),
|
|
552
|
+
headers,
|
|
553
|
+
body: await response.text(),
|
|
554
|
+
};
|
|
555
|
+
} catch {
|
|
556
|
+
// best-effort — response reuse simply won't match without a body
|
|
557
|
+
}
|
|
558
|
+
})();
|
|
559
|
+
pendingResponseCaptures.add(pending);
|
|
560
|
+
void pending.finally(() => pendingResponseCaptures.delete(pending));
|
|
561
|
+
});
|
|
512
562
|
// Patch navigator.webdriver ONLY on the vanilla-Playwright fallback. When the
|
|
513
563
|
// stealth plugin is active it already removes the property natively (a real
|
|
514
564
|
// Chrome lacks it); stacking our Object.defineProperty on top leaves a
|
|
@@ -662,6 +712,7 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
|
|
|
662
712
|
);
|
|
663
713
|
|
|
664
714
|
await page.waitForTimeout(300);
|
|
715
|
+
await settlePendingResponseCaptures(pendingResponseCaptures, 2_000);
|
|
665
716
|
|
|
666
717
|
// Capture cookies scoped to the recording's registrable domain
|
|
667
718
|
// (eTLD+1). Naive `.split('.').slice(-2)` was wrong for multi-part
|
|
@@ -686,6 +737,7 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
|
|
|
686
737
|
bootstrappedAt: Date.now(),
|
|
687
738
|
bootstrapHtml,
|
|
688
739
|
bootstrapResponseHeaders,
|
|
740
|
+
observedRequests,
|
|
689
741
|
userAgent: capturedUserAgent,
|
|
690
742
|
clientHints,
|
|
691
743
|
};
|
|
@@ -694,6 +746,25 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
|
|
|
694
746
|
}
|
|
695
747
|
}
|
|
696
748
|
|
|
749
|
+
function shouldCaptureObservedBody(
|
|
750
|
+
entry: NonNullable<TokenCache['observedRequests']>[number],
|
|
751
|
+
): boolean {
|
|
752
|
+
const type = entry.resourceType?.toLowerCase();
|
|
753
|
+
if (type && type !== 'xhr' && type !== 'fetch') return false;
|
|
754
|
+
return true;
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
async function settlePendingResponseCaptures(
|
|
758
|
+
pending: Set<Promise<void>>,
|
|
759
|
+
timeoutMs: number,
|
|
760
|
+
): Promise<void> {
|
|
761
|
+
if (pending.size === 0) return;
|
|
762
|
+
await Promise.race([
|
|
763
|
+
Promise.allSettled([...pending]),
|
|
764
|
+
new Promise<void>((resolve) => setTimeout(resolve, timeoutMs)),
|
|
765
|
+
]);
|
|
766
|
+
}
|
|
767
|
+
|
|
697
768
|
async function defaultUnderlyingFetch(
|
|
698
769
|
url: string,
|
|
699
770
|
init: FetchInit,
|
|
@@ -57,6 +57,19 @@ export function loadCachedToken(siteDir: string, maxAgeSeconds: number): TokenCa
|
|
|
57
57
|
cookies: raw.cookies,
|
|
58
58
|
sensorHeaders: raw.sensorHeaders,
|
|
59
59
|
bootstrappedAt: raw.bootstrappedAt,
|
|
60
|
+
...(typeof raw.bootstrapHtml === 'string' ? { bootstrapHtml: raw.bootstrapHtml } : {}),
|
|
61
|
+
...(raw.bootstrapResponseHeaders &&
|
|
62
|
+
typeof raw.bootstrapResponseHeaders === 'object' &&
|
|
63
|
+
!Array.isArray(raw.bootstrapResponseHeaders)
|
|
64
|
+
? { bootstrapResponseHeaders: raw.bootstrapResponseHeaders as Record<string, string> }
|
|
65
|
+
: {}),
|
|
66
|
+
...(Array.isArray(raw.observedRequests)
|
|
67
|
+
? { observedRequests: stripDurableObservedResponseBodies(raw.observedRequests) }
|
|
68
|
+
: {}),
|
|
69
|
+
...(typeof raw.userAgent === 'string' ? { userAgent: raw.userAgent } : {}),
|
|
70
|
+
...(raw.clientHints && typeof raw.clientHints === 'object' && !Array.isArray(raw.clientHints)
|
|
71
|
+
? { clientHints: raw.clientHints as Record<string, string> }
|
|
72
|
+
: {}),
|
|
60
73
|
};
|
|
61
74
|
} catch {
|
|
62
75
|
return null;
|
|
@@ -69,7 +82,16 @@ export function saveCachedToken(siteDir: string, token: TokenCache): void {
|
|
|
69
82
|
mkdirSync(siteDir, { recursive: true });
|
|
70
83
|
const p = tokenPath(siteDir);
|
|
71
84
|
const tmp = `${p}.${process.pid}.tmp`;
|
|
72
|
-
writeFileSync(
|
|
85
|
+
writeFileSync(
|
|
86
|
+
tmp,
|
|
87
|
+
`${JSON.stringify({
|
|
88
|
+
...token,
|
|
89
|
+
...(token.observedRequests
|
|
90
|
+
? { observedRequests: stripDurableObservedResponseBodies(token.observedRequests) }
|
|
91
|
+
: {}),
|
|
92
|
+
})}\n`,
|
|
93
|
+
'utf8',
|
|
94
|
+
);
|
|
73
95
|
renameSync(tmp, p);
|
|
74
96
|
} catch (err) {
|
|
75
97
|
log(
|
|
@@ -78,6 +100,21 @@ export function saveCachedToken(siteDir: string, token: TokenCache): void {
|
|
|
78
100
|
}
|
|
79
101
|
}
|
|
80
102
|
|
|
103
|
+
function stripDurableObservedResponseBodies(
|
|
104
|
+
observedRequests: NonNullable<TokenCache['observedRequests']>,
|
|
105
|
+
): NonNullable<TokenCache['observedRequests']> {
|
|
106
|
+
return observedRequests.map((req) => {
|
|
107
|
+
if (!req.response || req.response.body === undefined) return req;
|
|
108
|
+
return {
|
|
109
|
+
...req,
|
|
110
|
+
response: {
|
|
111
|
+
status: req.response.status,
|
|
112
|
+
headers: req.response.headers,
|
|
113
|
+
},
|
|
114
|
+
};
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
|
|
81
118
|
/** Remove a cached token (best-effort) — call when a site's teach run ends. */
|
|
82
119
|
export function clearCachedToken(siteDir: string): void {
|
|
83
120
|
try {
|
package/src/imprint/types.ts
CHANGED
|
@@ -221,6 +221,42 @@ const BootstrapCaptureSchema = z.discriminatedUnion('source', [
|
|
|
221
221
|
header: z.string(),
|
|
222
222
|
mode: z.enum(['first', 'last', 'all']).optional().default('last'),
|
|
223
223
|
}),
|
|
224
|
+
/** Read a header from a browser-generated request observed during bootstrap
|
|
225
|
+
* navigation. This is for replay tokens produced by page JavaScript for an
|
|
226
|
+
* XHR/fetch request, where neither HTML nor response headers contain the
|
|
227
|
+
* value. `urlPattern` is a JavaScript regular expression tested against the
|
|
228
|
+
* observed request URL. */
|
|
229
|
+
CaptureCommonSchema.extend({
|
|
230
|
+
source: z.literal('request_header'),
|
|
231
|
+
header: z.string(),
|
|
232
|
+
method: z.string().optional(),
|
|
233
|
+
urlPattern: z.string().optional(),
|
|
234
|
+
mode: z.enum(['first', 'last', 'all']).optional().default('last'),
|
|
235
|
+
}),
|
|
236
|
+
/** Read a value from a browser-generated request URL observed during
|
|
237
|
+
* bootstrap navigation. This is for per-page request ids or URL tokens that
|
|
238
|
+
* are generated alongside browser XHR/fetch calls. `urlPattern` selects the
|
|
239
|
+
* observed request; `pattern` extracts the value from that request URL. */
|
|
240
|
+
CaptureCommonSchema.extend({
|
|
241
|
+
source: z.literal('request_url_regex'),
|
|
242
|
+
pattern: z.string(),
|
|
243
|
+
group: z.number().int().nonnegative().optional().default(1),
|
|
244
|
+
method: z.string().optional(),
|
|
245
|
+
urlPattern: z.string().optional(),
|
|
246
|
+
mode: z.enum(['first', 'last', 'all']).optional().default('last'),
|
|
247
|
+
}),
|
|
248
|
+
/** Read a value from a browser-generated request body observed during
|
|
249
|
+
* bootstrap navigation. This is for page-minted replay tokens embedded in
|
|
250
|
+
* POST bodies (for example form-encoded RPC envelopes) where neither the URL
|
|
251
|
+
* nor request headers carry the value. */
|
|
252
|
+
CaptureCommonSchema.extend({
|
|
253
|
+
source: z.literal('request_body_regex'),
|
|
254
|
+
pattern: z.string(),
|
|
255
|
+
group: z.number().int().nonnegative().optional().default(1),
|
|
256
|
+
method: z.string().optional(),
|
|
257
|
+
urlPattern: z.string().optional(),
|
|
258
|
+
mode: z.enum(['first', 'last', 'all']).optional().default('last'),
|
|
259
|
+
}),
|
|
224
260
|
]);
|
|
225
261
|
export type BootstrapCapture = z.infer<typeof BootstrapCaptureSchema>;
|
|
226
262
|
|
|
@@ -295,6 +331,15 @@ export const WorkflowSchema = z.object({
|
|
|
295
331
|
exhaustedBackends: z.array(z.string()),
|
|
296
332
|
})
|
|
297
333
|
.optional(),
|
|
334
|
+
/** Optional runtime hints for provider-specific replay constraints. */
|
|
335
|
+
execution: z
|
|
336
|
+
.object({
|
|
337
|
+
/** Minimum end-to-start spacing between MCP calls for the same site. */
|
|
338
|
+
minCallSpacingMs: z.number().int().nonnegative().optional(),
|
|
339
|
+
/** Do not enter the DOM playbook rung after API/browser-backed rungs fail. */
|
|
340
|
+
skipPlaybookFallback: z.boolean().optional(),
|
|
341
|
+
})
|
|
342
|
+
.optional(),
|
|
298
343
|
});
|
|
299
344
|
export type Workflow = z.infer<typeof WorkflowSchema>;
|
|
300
345
|
|