imprint-mcp 0.4.7 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/examples/google-flights/README.md +0 -2
- package/examples/google-flights/_shared/flights_request.ts +4 -10
- package/examples/google-flights/get_flight_booking_details/index.ts +2 -5
- package/examples/google-flights/get_flight_booking_details/parser.ts +0 -8
- package/examples/google-flights/get_flight_booking_details/workflow.json +2 -5
- package/examples/google-flights/get_flight_calendar_prices/index.ts +2 -5
- package/examples/google-flights/get_flight_calendar_prices/parser.ts +4 -8
- package/examples/google-flights/get_flight_calendar_prices/workflow.json +2 -5
- package/examples/google-flights/lookup_airport/index.ts +0 -3
- package/examples/google-flights/lookup_airport/parser.ts +1 -8
- package/examples/google-flights/lookup_airport/workflow.json +0 -3
- package/examples/google-flights/search_flights/index.ts +7 -62
- package/examples/google-flights/search_flights/request-transform.ts +0 -45
- package/examples/google-flights/search_flights/workflow.json +7 -62
- package/package.json +1 -1
- package/prompts/build-planning.md +1 -1
- package/prompts/compile-agent.md +3 -5
- package/prompts/prereq-builder.md +1 -2
- package/src/imprint/backend-ladder.ts +47 -436
- package/src/imprint/cdp-browser-fetch.ts +6 -176
- package/src/imprint/cdp-jar-cache.ts +10 -105
- package/src/imprint/compile-tools.ts +2 -2
- package/src/imprint/mcp-server.ts +65 -152
- package/src/imprint/probe-backends.ts +10 -41
- package/src/imprint/runtime.ts +12 -24
- package/src/imprint/stealth-fetch.ts +0 -71
- package/src/imprint/stealth-token-cache.ts +1 -38
- package/src/imprint/types.ts +0 -45
|
@@ -87,82 +87,15 @@ export function buildJsonSchema(parameters: WorkflowParameter[]): Tool['inputSch
|
|
|
87
87
|
};
|
|
88
88
|
}
|
|
89
89
|
|
|
90
|
-
export function shouldSkipBootstrapSplice(preferredOrder?: ConcreteBackend[]): boolean {
|
|
91
|
-
return Boolean(preferredOrder?.length && !preferredOrder.includes('fetch-bootstrap'));
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
export function withPreferredFallbacks(
|
|
95
|
-
ladder: ConcreteBackend[],
|
|
96
|
-
preferredOrder?: ConcreteBackend[],
|
|
97
|
-
): ConcreteBackend[] {
|
|
98
|
-
const next = [...ladder];
|
|
99
|
-
if (preferredOrder?.includes('fetch-bootstrap') && !next.includes('cdp-replay')) {
|
|
100
|
-
const idx = next.indexOf('fetch-bootstrap');
|
|
101
|
-
if (idx !== -1) next.splice(idx + 1, 0, 'cdp-replay');
|
|
102
|
-
}
|
|
103
|
-
if (
|
|
104
|
-
(preferredOrder?.includes('fetch-bootstrap') || preferredOrder?.includes('cdp-replay')) &&
|
|
105
|
-
!next.includes('stealth-fetch')
|
|
106
|
-
) {
|
|
107
|
-
const cdpIdx = next.indexOf('cdp-replay');
|
|
108
|
-
const fbIdx = next.indexOf('fetch-bootstrap');
|
|
109
|
-
const idx = cdpIdx !== -1 ? cdpIdx : fbIdx;
|
|
110
|
-
if (idx !== -1) next.splice(idx + 1, 0, 'stealth-fetch');
|
|
111
|
-
}
|
|
112
|
-
return next;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
export function applyExecutionFallbacks(
|
|
116
|
-
ladder: ConcreteBackend[],
|
|
117
|
-
execution?: { skipPlaybookFallback?: boolean },
|
|
118
|
-
): ConcreteBackend[] {
|
|
119
|
-
if (!execution?.skipPlaybookFallback || ladder.length <= 1) return ladder;
|
|
120
|
-
return ladder.filter((backend) => backend !== 'playbook');
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
export function buildSiteSpacingMap(
|
|
124
|
-
tools: Array<{ site: string; workflow: { execution?: { minCallSpacingMs?: number } } }>,
|
|
125
|
-
): Map<string, number> {
|
|
126
|
-
const out = new Map<string, number>();
|
|
127
|
-
for (const tool of tools) {
|
|
128
|
-
const spacing = Math.max(0, tool.workflow.execution?.minCallSpacingMs ?? 0);
|
|
129
|
-
if (spacing > (out.get(tool.site) ?? 0)) out.set(tool.site, spacing);
|
|
130
|
-
}
|
|
131
|
-
return out;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
90
|
const log = createLog('mcp');
|
|
135
91
|
|
|
136
92
|
export async function runSerializedBySite<T>(
|
|
137
93
|
queues: Map<string, Promise<void>>,
|
|
138
94
|
site: string,
|
|
139
95
|
task: () => Promise<T>,
|
|
140
|
-
opts: {
|
|
141
|
-
minCallSpacingMs?: number;
|
|
142
|
-
lastFinishedAt?: Map<string, number>;
|
|
143
|
-
now?: () => number;
|
|
144
|
-
sleep?: (ms: number) => Promise<void>;
|
|
145
|
-
} = {},
|
|
146
96
|
): Promise<T> {
|
|
147
97
|
const previous = queues.get(site) ?? Promise.resolve();
|
|
148
|
-
const run = previous
|
|
149
|
-
.catch(() => undefined)
|
|
150
|
-
.then(async () => {
|
|
151
|
-
const spacingMs = Math.max(0, opts.minCallSpacingMs ?? 0);
|
|
152
|
-
const lastFinishedAt = opts.lastFinishedAt;
|
|
153
|
-
const now = opts.now ?? Date.now;
|
|
154
|
-
const sleep = opts.sleep ?? ((ms: number) => new Promise<void>((r) => setTimeout(r, ms)));
|
|
155
|
-
if (spacingMs > 0 && lastFinishedAt) {
|
|
156
|
-
const elapsed = now() - (lastFinishedAt.get(site) ?? 0);
|
|
157
|
-
const waitMs = spacingMs - elapsed;
|
|
158
|
-
if (waitMs > 0) await sleep(waitMs);
|
|
159
|
-
}
|
|
160
|
-
try {
|
|
161
|
-
return await task();
|
|
162
|
-
} finally {
|
|
163
|
-
lastFinishedAt?.set(site, now());
|
|
164
|
-
}
|
|
165
|
-
});
|
|
98
|
+
const run = previous.catch(() => undefined).then(task);
|
|
166
99
|
const tail = run.then(
|
|
167
100
|
() => undefined,
|
|
168
101
|
() => undefined,
|
|
@@ -186,7 +119,7 @@ function buildServer(
|
|
|
186
119
|
{
|
|
187
120
|
capabilities: { tools: {} },
|
|
188
121
|
instructions:
|
|
189
|
-
'Imprint runs deterministic workflows captured from real browser sessions. Tools prefer fetch API replay,
|
|
122
|
+
'Imprint runs deterministic workflows captured from real browser sessions. Tools prefer fetch API replay, may use gated fetch-bootstrap only for declared browser-minted state, then cdp-replay (API requests run inside a live trusted Chrome so a protected POST refreshes its anti-bot token between calls) for multi-step state-changing flows, then stealth-fetch for bot-defense state, and playbook only for full DOM interaction. Error codes: AUTH_EXPIRED (401, run `imprint login <site>`); STATE_MISSING (required cookie/state was unavailable or ambiguous); FORBIDDEN (403); RATE_LIMITED (429, back off); BAD_RESPONSE (other 4xx/5xx); NETWORK (fetch failed); UNKNOWN (everything else).',
|
|
190
123
|
},
|
|
191
124
|
);
|
|
192
125
|
|
|
@@ -217,8 +150,6 @@ function buildServer(
|
|
|
217
150
|
// make Google Flights return fast empty result sets. Keep same-site execution
|
|
218
151
|
// sequential while allowing unrelated sites to proceed independently.
|
|
219
152
|
const siteExecutionQueues = new Map<string, Promise<void>>();
|
|
220
|
-
const siteLastFinishedAt = new Map<string, number>();
|
|
221
|
-
const siteMinCallSpacingMs = buildSiteSpacingMap(tools);
|
|
222
153
|
|
|
223
154
|
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
224
155
|
tools: tools.map((t) => ({
|
|
@@ -256,93 +187,75 @@ function buildServer(
|
|
|
256
187
|
>;
|
|
257
188
|
|
|
258
189
|
try {
|
|
259
|
-
return await runSerializedBySite(
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
if (Number.isFinite(pacingMs) && pacingMs > 0) {
|
|
269
|
-
await new Promise((r) => setTimeout(r, pacingMs));
|
|
270
|
-
}
|
|
190
|
+
return await runSerializedBySite(siteExecutionQueues, tool.site, async () => {
|
|
191
|
+
// Audit-only pacing: when the audit harness sets IMPRINT_AUDIT_PACING_MS,
|
|
192
|
+
// sleep before each actual workflow execution so same-site queued calls
|
|
193
|
+
// stay spaced out instead of all waiting concurrently before the queue.
|
|
194
|
+
// Unset in production -> no delay.
|
|
195
|
+
const pacingMs = Number(process.env.IMPRINT_AUDIT_PACING_MS);
|
|
196
|
+
if (Number.isFinite(pacingMs) && pacingMs > 0) {
|
|
197
|
+
await new Promise((r) => setTimeout(r, pacingMs));
|
|
198
|
+
}
|
|
271
199
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
const cf = cdpPool.get(tool.site);
|
|
297
|
-
if (cf) {
|
|
298
|
-
log(`closing idle CDP session for ${tool.site}`);
|
|
299
|
-
cf.close().catch(() => {});
|
|
300
|
-
cdpPool.delete(tool.site);
|
|
301
|
-
cdpIdleTimers.delete(tool.site);
|
|
302
|
-
// Drop this site's winner memo too: a memoized cdp-replay would now
|
|
303
|
-
// point at a closed Chrome and re-pay the cold relaunch.
|
|
304
|
-
for (const key of winnerCache.keys()) {
|
|
305
|
-
if (key.startsWith(`${tool.site}:`)) winnerCache.delete(key);
|
|
306
|
-
}
|
|
200
|
+
const ladder = resolveLadder('auto', tool.preferredOrder);
|
|
201
|
+
const { result, usedBackend, attempts } = await runWithLadder(
|
|
202
|
+
ladder,
|
|
203
|
+
tool,
|
|
204
|
+
args,
|
|
205
|
+
assetRoot,
|
|
206
|
+
stealthCache,
|
|
207
|
+
{ cdpPool, winnerCache, skipBootstrapSplice: Boolean(tool.preferredOrder?.length) },
|
|
208
|
+
);
|
|
209
|
+
// Reset the idle timer for this site's pooled Chrome.
|
|
210
|
+
if (result.ok && usedBackend === 'cdp-replay' && cdpPool.has(tool.site)) {
|
|
211
|
+
const prev = cdpIdleTimers.get(tool.site);
|
|
212
|
+
if (prev) clearTimeout(prev);
|
|
213
|
+
const timer = setTimeout(() => {
|
|
214
|
+
const cf = cdpPool.get(tool.site);
|
|
215
|
+
if (cf) {
|
|
216
|
+
log(`closing idle CDP session for ${tool.site}`);
|
|
217
|
+
cf.close().catch(() => {});
|
|
218
|
+
cdpPool.delete(tool.site);
|
|
219
|
+
cdpIdleTimers.delete(tool.site);
|
|
220
|
+
// Drop this site's winner memo too: a memoized cdp-replay would now
|
|
221
|
+
// point at a closed Chrome and re-pay the cold relaunch.
|
|
222
|
+
for (const key of winnerCache.keys()) {
|
|
223
|
+
if (key.startsWith(`${tool.site}:`)) winnerCache.delete(key);
|
|
307
224
|
}
|
|
308
|
-
}, CDP_IDLE_TIMEOUT_MS);
|
|
309
|
-
timer.unref();
|
|
310
|
-
cdpIdleTimers.set(tool.site, timer);
|
|
311
|
-
}
|
|
312
|
-
if (!result.ok) {
|
|
313
|
-
const text = formatToolError(result);
|
|
314
|
-
return {
|
|
315
|
-
isError: true,
|
|
316
|
-
content: [{ type: 'text', text: `${text}\n(backend: ${usedBackend})` }],
|
|
317
|
-
};
|
|
318
|
-
}
|
|
319
|
-
try {
|
|
320
|
-
const cache = persistRuntimeBackendsCache({
|
|
321
|
-
tool,
|
|
322
|
-
assetRoot,
|
|
323
|
-
usedBackend,
|
|
324
|
-
attempts,
|
|
325
|
-
});
|
|
326
|
-
if (cache) {
|
|
327
|
-
tool.preferredOrder = cache.preferredOrder;
|
|
328
|
-
log(
|
|
329
|
-
` learned backend order for ${tool.workflow.toolName}: ${cache.preferredOrder.join(' → ')}`,
|
|
330
|
-
);
|
|
331
225
|
}
|
|
332
|
-
}
|
|
226
|
+
}, CDP_IDLE_TIMEOUT_MS);
|
|
227
|
+
timer.unref();
|
|
228
|
+
cdpIdleTimers.set(tool.site, timer);
|
|
229
|
+
}
|
|
230
|
+
if (!result.ok) {
|
|
231
|
+
const text = formatToolError(result);
|
|
232
|
+
return {
|
|
233
|
+
isError: true,
|
|
234
|
+
content: [{ type: 'text', text: `${text}\n(backend: ${usedBackend})` }],
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
try {
|
|
238
|
+
const cache = persistRuntimeBackendsCache({
|
|
239
|
+
tool,
|
|
240
|
+
assetRoot,
|
|
241
|
+
usedBackend,
|
|
242
|
+
attempts,
|
|
243
|
+
});
|
|
244
|
+
if (cache) {
|
|
245
|
+
tool.preferredOrder = cache.preferredOrder;
|
|
333
246
|
log(
|
|
334
|
-
`
|
|
247
|
+
` learned backend order for ${tool.workflow.toolName}: ${cache.preferredOrder.join(' → ')}`,
|
|
335
248
|
);
|
|
336
249
|
}
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
}
|
|
345
|
-
);
|
|
250
|
+
} catch (err) {
|
|
251
|
+
log(
|
|
252
|
+
` warning: could not persist backend order for ${tool.workflow.toolName}: ${err instanceof Error ? err.message : String(err)}`,
|
|
253
|
+
);
|
|
254
|
+
}
|
|
255
|
+
const text =
|
|
256
|
+
typeof result.data === 'string' ? result.data : JSON.stringify(result.data, null, 2);
|
|
257
|
+
return { content: [{ type: 'text', text: `${text}\n\n(backend: ${usedBackend})` }] };
|
|
258
|
+
});
|
|
346
259
|
} catch (err) {
|
|
347
260
|
const msg = err instanceof Error ? err.message : String(err);
|
|
348
261
|
return { isError: true, content: [{ type: 'text', text: `[INTERNAL] ${msg}` }] };
|
|
@@ -340,37 +340,14 @@ function workflowHash(workflow: ResolvedTool['workflow']): string {
|
|
|
340
340
|
|
|
341
341
|
function capabilityHash(workflow: ResolvedTool['workflow']): string {
|
|
342
342
|
const caps = {
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
captures: workflow.bootstrap.captures ?? [],
|
|
348
|
-
}
|
|
349
|
-
: null,
|
|
350
|
-
requests: workflow.requests.map((r) => ({
|
|
351
|
-
method: r.method.toUpperCase(),
|
|
352
|
-
effect: r.effect ?? null,
|
|
353
|
-
stateRefs: stateRefsInWorkflowRequest(r),
|
|
354
|
-
captures: r.captures ?? [],
|
|
355
|
-
})),
|
|
343
|
+
bootstrap: Boolean(workflow.bootstrap),
|
|
344
|
+
captures: workflow.requests.flatMap((r) =>
|
|
345
|
+
(r.captures ?? []).map((c) => `${c.source}:${c.name}:${c.capability}`),
|
|
346
|
+
),
|
|
356
347
|
};
|
|
357
348
|
return createHash('sha256').update(JSON.stringify(caps)).digest('hex');
|
|
358
349
|
}
|
|
359
350
|
|
|
360
|
-
function stateRefsInWorkflowRequest(
|
|
361
|
-
request: ResolvedTool['workflow']['requests'][number],
|
|
362
|
-
): string[] {
|
|
363
|
-
const refs = new Set<string>();
|
|
364
|
-
const scan = (text: string | undefined): void => {
|
|
365
|
-
if (!text) return;
|
|
366
|
-
for (const match of text.matchAll(/\$\{state\.([A-Za-z0-9_]+)\}/g)) refs.add(match[1] ?? '');
|
|
367
|
-
};
|
|
368
|
-
scan(request.url);
|
|
369
|
-
scan(request.body);
|
|
370
|
-
for (const value of Object.values(request.headers ?? {})) scan(value);
|
|
371
|
-
return [...refs].filter(Boolean).sort();
|
|
372
|
-
}
|
|
373
|
-
|
|
374
351
|
/** Read backends.json with status information. Runtime can still fall back to
|
|
375
352
|
* the default ladder, while status commands can explain why a cache was not
|
|
376
353
|
* usable. */
|
|
@@ -390,11 +367,8 @@ export function loadBackendsCacheStatus(
|
|
|
390
367
|
if (parsed.schemaVersion && parsed.schemaVersion >= 2 && parsed.workflowHash) {
|
|
391
368
|
const workflowPath = pathResolve(toolDir, 'workflow.json');
|
|
392
369
|
if (existsSync(workflowPath)) {
|
|
393
|
-
const
|
|
394
|
-
if (
|
|
395
|
-
current.workflowHash !== parsed.workflowHash &&
|
|
396
|
-
(!parsed.capabilityHash || current.capabilityHash !== parsed.capabilityHash)
|
|
397
|
-
) {
|
|
370
|
+
const currentHash = workflowHashSync(readFileSync(workflowPath, 'utf8'));
|
|
371
|
+
if (currentHash !== parsed.workflowHash) {
|
|
398
372
|
const reason = 'workflow hash changed';
|
|
399
373
|
if (opts.warn !== false) {
|
|
400
374
|
process.stderr.write(
|
|
@@ -522,15 +496,10 @@ export function persistRuntimeBackendsCache(opts: {
|
|
|
522
496
|
return cache;
|
|
523
497
|
}
|
|
524
498
|
|
|
525
|
-
function
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
const workflow = WorkflowSchema.parse(JSON.parse(workflowJson));
|
|
530
|
-
return {
|
|
531
|
-
workflowHash: workflowHash(workflow),
|
|
532
|
-
capabilityHash: capabilityHash(workflow),
|
|
533
|
-
};
|
|
499
|
+
function workflowHashSync(workflowJson: string): string {
|
|
500
|
+
return createHash('sha256')
|
|
501
|
+
.update(JSON.stringify(WorkflowSchema.parse(JSON.parse(workflowJson))))
|
|
502
|
+
.digest('hex');
|
|
534
503
|
}
|
|
535
504
|
|
|
536
505
|
function backendsCacheRemediation(site: string, toolName?: string): string {
|
package/src/imprint/runtime.ts
CHANGED
|
@@ -133,6 +133,18 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
|
|
|
133
133
|
}
|
|
134
134
|
}
|
|
135
135
|
|
|
136
|
+
// rawResponses feeds parser modules and the final return shape. responseSlots
|
|
137
|
+
// keeps legacy request.extract aliases without replacing raw parser input.
|
|
138
|
+
const responseSlots: ResponseSlot[] = [];
|
|
139
|
+
const state: Record<string, unknown> = { ...(opts.initialState ?? {}) };
|
|
140
|
+
|
|
141
|
+
// Per-execution mutable jar. Never shared across MCP/cron calls.
|
|
142
|
+
const cookieJar = new RuntimeCookieJar(credentials.cookies);
|
|
143
|
+
const liveCredentials: CredentialStore = { ...credentials, cookies: cookieJar.toJSON() };
|
|
144
|
+
const stateCapabilities = collectStateCapabilities(opts.workflow);
|
|
145
|
+
const dependencyPreflight = preflightStateDependencies(opts.workflow, state, stateCapabilities);
|
|
146
|
+
if (!dependencyPreflight.ok) return dependencyPreflight.result;
|
|
147
|
+
|
|
136
148
|
type TransformResult = string | { url: string; body?: string; headers?: Record<string, string> };
|
|
137
149
|
let requestTransform:
|
|
138
150
|
| ((
|
|
@@ -140,7 +152,6 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
|
|
|
140
152
|
url: string,
|
|
141
153
|
responses: unknown[],
|
|
142
154
|
params?: Record<string, string | number | boolean>,
|
|
143
|
-
state?: Record<string, unknown>,
|
|
144
155
|
) => TransformResult)
|
|
145
156
|
| null = null;
|
|
146
157
|
if (opts.workflow.requestTransformModule && opts.workflowPath) {
|
|
@@ -150,34 +161,12 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
|
|
|
150
161
|
opts.workflow.requestTransformModule,
|
|
151
162
|
);
|
|
152
163
|
const mod = await import(transformPath);
|
|
153
|
-
if (typeof mod.prepareParams === 'function') {
|
|
154
|
-
const prepared = await mod.prepareParams(params);
|
|
155
|
-
if (prepared && typeof prepared === 'object') {
|
|
156
|
-
for (const [k, v] of Object.entries(prepared)) {
|
|
157
|
-
if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {
|
|
158
|
-
params[k] = v;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
164
|
if (typeof mod.transform === 'function') requestTransform = mod.transform;
|
|
164
165
|
} catch {
|
|
165
166
|
// Non-fatal — proceed without transform.
|
|
166
167
|
}
|
|
167
168
|
}
|
|
168
169
|
|
|
169
|
-
// rawResponses feeds parser modules and the final return shape. responseSlots
|
|
170
|
-
// keeps legacy request.extract aliases without replacing raw parser input.
|
|
171
|
-
const responseSlots: ResponseSlot[] = [];
|
|
172
|
-
const state: Record<string, unknown> = { ...(opts.initialState ?? {}) };
|
|
173
|
-
|
|
174
|
-
// Per-execution mutable jar. Never shared across MCP/cron calls.
|
|
175
|
-
const cookieJar = new RuntimeCookieJar(credentials.cookies);
|
|
176
|
-
const liveCredentials: CredentialStore = { ...credentials, cookies: cookieJar.toJSON() };
|
|
177
|
-
const stateCapabilities = collectStateCapabilities(opts.workflow);
|
|
178
|
-
const dependencyPreflight = preflightStateDependencies(opts.workflow, state, stateCapabilities);
|
|
179
|
-
if (!dependencyPreflight.ok) return dependencyPreflight.result;
|
|
180
|
-
|
|
181
170
|
for (let i = 0; i < opts.workflow.requests.length; i++) {
|
|
182
171
|
const req = opts.workflow.requests[i];
|
|
183
172
|
if (!req) continue;
|
|
@@ -201,7 +190,6 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
|
|
|
201
190
|
subbed.url,
|
|
202
191
|
responseSlots.map((s) => s.raw),
|
|
203
192
|
params,
|
|
204
|
-
state,
|
|
205
193
|
);
|
|
206
194
|
if (typeof transformResult === 'string') {
|
|
207
195
|
subbed.url = transformResult;
|
|
@@ -74,20 +74,6 @@ export interface TokenCache {
|
|
|
74
74
|
/** Lower-cased response headers of the bootstrap navigation, so callers can
|
|
75
75
|
* satisfy `response_header` bootstrap captures. Optional. */
|
|
76
76
|
bootstrapResponseHeaders?: Record<string, string>;
|
|
77
|
-
/** Browser-generated requests observed while the bootstrap page loaded. Lets
|
|
78
|
-
* workflows capture replay headers minted by page JavaScript for later XHRs. */
|
|
79
|
-
observedRequests?: Array<{
|
|
80
|
-
method: string;
|
|
81
|
-
url: string;
|
|
82
|
-
headers: Record<string, string>;
|
|
83
|
-
body?: string;
|
|
84
|
-
resourceType?: string;
|
|
85
|
-
response?: {
|
|
86
|
-
status: number;
|
|
87
|
-
headers: Record<string, string>;
|
|
88
|
-
body?: string;
|
|
89
|
-
};
|
|
90
|
-
}>;
|
|
91
77
|
/** The bootstrap browser's actual `navigator.userAgent`, captured live. Reused
|
|
92
78
|
* for the post-bootstrap fetches so the wire UA matches the binary that minted
|
|
93
79
|
* the cookies (and its client hints below). Absent if capture failed or on
|
|
@@ -523,42 +509,6 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
|
|
|
523
509
|
});
|
|
524
510
|
|
|
525
511
|
const page = await context.newPage();
|
|
526
|
-
const observedRequests: NonNullable<TokenCache['observedRequests']> = [];
|
|
527
|
-
const observedByRequest = new Map<
|
|
528
|
-
unknown,
|
|
529
|
-
NonNullable<TokenCache['observedRequests']>[number]
|
|
530
|
-
>();
|
|
531
|
-
const pendingResponseCaptures = new Set<Promise<void>>();
|
|
532
|
-
page.on('request', (request) => {
|
|
533
|
-
const entry: NonNullable<TokenCache['observedRequests']>[number] = {
|
|
534
|
-
method: request.method(),
|
|
535
|
-
url: request.url(),
|
|
536
|
-
headers: request.headers(),
|
|
537
|
-
...(request.postData() !== null ? { body: request.postData() as string } : {}),
|
|
538
|
-
resourceType: request.resourceType(),
|
|
539
|
-
};
|
|
540
|
-
observedRequests.push(entry);
|
|
541
|
-
observedByRequest.set(request, entry);
|
|
542
|
-
if (observedRequests.length > 100) observedRequests.shift();
|
|
543
|
-
});
|
|
544
|
-
page.on('response', (response) => {
|
|
545
|
-
const entry = observedByRequest.get(response.request());
|
|
546
|
-
if (!entry || !shouldCaptureObservedBody(entry)) return;
|
|
547
|
-
const pending = (async () => {
|
|
548
|
-
try {
|
|
549
|
-
const headers = await response.allHeaders();
|
|
550
|
-
entry.response = {
|
|
551
|
-
status: response.status(),
|
|
552
|
-
headers,
|
|
553
|
-
body: await response.text(),
|
|
554
|
-
};
|
|
555
|
-
} catch {
|
|
556
|
-
// best-effort — response reuse simply won't match without a body
|
|
557
|
-
}
|
|
558
|
-
})();
|
|
559
|
-
pendingResponseCaptures.add(pending);
|
|
560
|
-
void pending.finally(() => pendingResponseCaptures.delete(pending));
|
|
561
|
-
});
|
|
562
512
|
// Patch navigator.webdriver ONLY on the vanilla-Playwright fallback. When the
|
|
563
513
|
// stealth plugin is active it already removes the property natively (a real
|
|
564
514
|
// Chrome lacks it); stacking our Object.defineProperty on top leaves a
|
|
@@ -712,7 +662,6 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
|
|
|
712
662
|
);
|
|
713
663
|
|
|
714
664
|
await page.waitForTimeout(300);
|
|
715
|
-
await settlePendingResponseCaptures(pendingResponseCaptures, 2_000);
|
|
716
665
|
|
|
717
666
|
// Capture cookies scoped to the recording's registrable domain
|
|
718
667
|
// (eTLD+1). Naive `.split('.').slice(-2)` was wrong for multi-part
|
|
@@ -737,7 +686,6 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
|
|
|
737
686
|
bootstrappedAt: Date.now(),
|
|
738
687
|
bootstrapHtml,
|
|
739
688
|
bootstrapResponseHeaders,
|
|
740
|
-
observedRequests,
|
|
741
689
|
userAgent: capturedUserAgent,
|
|
742
690
|
clientHints,
|
|
743
691
|
};
|
|
@@ -746,25 +694,6 @@ export async function bootstrapStealthToken(args: BootstrapArgs): Promise<TokenC
|
|
|
746
694
|
}
|
|
747
695
|
}
|
|
748
696
|
|
|
749
|
-
function shouldCaptureObservedBody(
|
|
750
|
-
entry: NonNullable<TokenCache['observedRequests']>[number],
|
|
751
|
-
): boolean {
|
|
752
|
-
const type = entry.resourceType?.toLowerCase();
|
|
753
|
-
if (type && type !== 'xhr' && type !== 'fetch') return false;
|
|
754
|
-
return true;
|
|
755
|
-
}
|
|
756
|
-
|
|
757
|
-
async function settlePendingResponseCaptures(
|
|
758
|
-
pending: Set<Promise<void>>,
|
|
759
|
-
timeoutMs: number,
|
|
760
|
-
): Promise<void> {
|
|
761
|
-
if (pending.size === 0) return;
|
|
762
|
-
await Promise.race([
|
|
763
|
-
Promise.allSettled([...pending]),
|
|
764
|
-
new Promise<void>((resolve) => setTimeout(resolve, timeoutMs)),
|
|
765
|
-
]);
|
|
766
|
-
}
|
|
767
|
-
|
|
768
697
|
async function defaultUnderlyingFetch(
|
|
769
698
|
url: string,
|
|
770
699
|
init: FetchInit,
|
|
@@ -57,19 +57,6 @@ export function loadCachedToken(siteDir: string, maxAgeSeconds: number): TokenCa
|
|
|
57
57
|
cookies: raw.cookies,
|
|
58
58
|
sensorHeaders: raw.sensorHeaders,
|
|
59
59
|
bootstrappedAt: raw.bootstrappedAt,
|
|
60
|
-
...(typeof raw.bootstrapHtml === 'string' ? { bootstrapHtml: raw.bootstrapHtml } : {}),
|
|
61
|
-
...(raw.bootstrapResponseHeaders &&
|
|
62
|
-
typeof raw.bootstrapResponseHeaders === 'object' &&
|
|
63
|
-
!Array.isArray(raw.bootstrapResponseHeaders)
|
|
64
|
-
? { bootstrapResponseHeaders: raw.bootstrapResponseHeaders as Record<string, string> }
|
|
65
|
-
: {}),
|
|
66
|
-
...(Array.isArray(raw.observedRequests)
|
|
67
|
-
? { observedRequests: stripDurableObservedResponseBodies(raw.observedRequests) }
|
|
68
|
-
: {}),
|
|
69
|
-
...(typeof raw.userAgent === 'string' ? { userAgent: raw.userAgent } : {}),
|
|
70
|
-
...(raw.clientHints && typeof raw.clientHints === 'object' && !Array.isArray(raw.clientHints)
|
|
71
|
-
? { clientHints: raw.clientHints as Record<string, string> }
|
|
72
|
-
: {}),
|
|
73
60
|
};
|
|
74
61
|
} catch {
|
|
75
62
|
return null;
|
|
@@ -82,16 +69,7 @@ export function saveCachedToken(siteDir: string, token: TokenCache): void {
|
|
|
82
69
|
mkdirSync(siteDir, { recursive: true });
|
|
83
70
|
const p = tokenPath(siteDir);
|
|
84
71
|
const tmp = `${p}.${process.pid}.tmp`;
|
|
85
|
-
writeFileSync(
|
|
86
|
-
tmp,
|
|
87
|
-
`${JSON.stringify({
|
|
88
|
-
...token,
|
|
89
|
-
...(token.observedRequests
|
|
90
|
-
? { observedRequests: stripDurableObservedResponseBodies(token.observedRequests) }
|
|
91
|
-
: {}),
|
|
92
|
-
})}\n`,
|
|
93
|
-
'utf8',
|
|
94
|
-
);
|
|
72
|
+
writeFileSync(tmp, `${JSON.stringify(token)}\n`, 'utf8');
|
|
95
73
|
renameSync(tmp, p);
|
|
96
74
|
} catch (err) {
|
|
97
75
|
log(
|
|
@@ -100,21 +78,6 @@ export function saveCachedToken(siteDir: string, token: TokenCache): void {
|
|
|
100
78
|
}
|
|
101
79
|
}
|
|
102
80
|
|
|
103
|
-
function stripDurableObservedResponseBodies(
|
|
104
|
-
observedRequests: NonNullable<TokenCache['observedRequests']>,
|
|
105
|
-
): NonNullable<TokenCache['observedRequests']> {
|
|
106
|
-
return observedRequests.map((req) => {
|
|
107
|
-
if (!req.response || req.response.body === undefined) return req;
|
|
108
|
-
return {
|
|
109
|
-
...req,
|
|
110
|
-
response: {
|
|
111
|
-
status: req.response.status,
|
|
112
|
-
headers: req.response.headers,
|
|
113
|
-
},
|
|
114
|
-
};
|
|
115
|
-
});
|
|
116
|
-
}
|
|
117
|
-
|
|
118
81
|
/** Remove a cached token (best-effort) — call when a site's teach run ends. */
|
|
119
82
|
export function clearCachedToken(siteDir: string): void {
|
|
120
83
|
try {
|
package/src/imprint/types.ts
CHANGED
|
@@ -221,42 +221,6 @@ const BootstrapCaptureSchema = z.discriminatedUnion('source', [
|
|
|
221
221
|
header: z.string(),
|
|
222
222
|
mode: z.enum(['first', 'last', 'all']).optional().default('last'),
|
|
223
223
|
}),
|
|
224
|
-
/** Read a header from a browser-generated request observed during bootstrap
|
|
225
|
-
* navigation. This is for replay tokens produced by page JavaScript for an
|
|
226
|
-
* XHR/fetch request, where neither HTML nor response headers contain the
|
|
227
|
-
* value. `urlPattern` is a JavaScript regular expression tested against the
|
|
228
|
-
* observed request URL. */
|
|
229
|
-
CaptureCommonSchema.extend({
|
|
230
|
-
source: z.literal('request_header'),
|
|
231
|
-
header: z.string(),
|
|
232
|
-
method: z.string().optional(),
|
|
233
|
-
urlPattern: z.string().optional(),
|
|
234
|
-
mode: z.enum(['first', 'last', 'all']).optional().default('last'),
|
|
235
|
-
}),
|
|
236
|
-
/** Read a value from a browser-generated request URL observed during
|
|
237
|
-
* bootstrap navigation. This is for per-page request ids or URL tokens that
|
|
238
|
-
* are generated alongside browser XHR/fetch calls. `urlPattern` selects the
|
|
239
|
-
* observed request; `pattern` extracts the value from that request URL. */
|
|
240
|
-
CaptureCommonSchema.extend({
|
|
241
|
-
source: z.literal('request_url_regex'),
|
|
242
|
-
pattern: z.string(),
|
|
243
|
-
group: z.number().int().nonnegative().optional().default(1),
|
|
244
|
-
method: z.string().optional(),
|
|
245
|
-
urlPattern: z.string().optional(),
|
|
246
|
-
mode: z.enum(['first', 'last', 'all']).optional().default('last'),
|
|
247
|
-
}),
|
|
248
|
-
/** Read a value from a browser-generated request body observed during
|
|
249
|
-
* bootstrap navigation. This is for page-minted replay tokens embedded in
|
|
250
|
-
* POST bodies (for example form-encoded RPC envelopes) where neither the URL
|
|
251
|
-
* nor request headers carry the value. */
|
|
252
|
-
CaptureCommonSchema.extend({
|
|
253
|
-
source: z.literal('request_body_regex'),
|
|
254
|
-
pattern: z.string(),
|
|
255
|
-
group: z.number().int().nonnegative().optional().default(1),
|
|
256
|
-
method: z.string().optional(),
|
|
257
|
-
urlPattern: z.string().optional(),
|
|
258
|
-
mode: z.enum(['first', 'last', 'all']).optional().default('last'),
|
|
259
|
-
}),
|
|
260
224
|
]);
|
|
261
225
|
export type BootstrapCapture = z.infer<typeof BootstrapCaptureSchema>;
|
|
262
226
|
|
|
@@ -331,15 +295,6 @@ export const WorkflowSchema = z.object({
|
|
|
331
295
|
exhaustedBackends: z.array(z.string()),
|
|
332
296
|
})
|
|
333
297
|
.optional(),
|
|
334
|
-
/** Optional runtime hints for provider-specific replay constraints. */
|
|
335
|
-
execution: z
|
|
336
|
-
.object({
|
|
337
|
-
/** Minimum end-to-start spacing between MCP calls for the same site. */
|
|
338
|
-
minCallSpacingMs: z.number().int().nonnegative().optional(),
|
|
339
|
-
/** Do not enter the DOM playbook rung after API/browser-backed rungs fail. */
|
|
340
|
-
skipPlaybookFallback: z.boolean().optional(),
|
|
341
|
-
})
|
|
342
|
-
.optional(),
|
|
343
298
|
});
|
|
344
299
|
export type Workflow = z.infer<typeof WorkflowSchema>;
|
|
345
300
|
|