imprint-mcp 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "imprint-mcp",
3
- "version": "0.4.2",
3
+ "version": "0.4.4",
4
4
  "description": "Teach an AI agent how to use any website. Once. Records a real browser session + narration; generates a deterministic MCP tool plus a DOM-replay playbook fallback.",
5
5
  "type": "module",
6
6
  "exports": {
@@ -75,6 +75,8 @@ interface LadderResult {
75
75
  const log = createLog('backend');
76
76
 
77
77
  const DEFAULT_LADDER: ConcreteBackend[] = ['fetch', 'stealth-fetch', 'playbook'];
78
+ const DEFAULT_PLAYBOOK_BACKEND_TIMEOUT_MS = 75_000;
79
+ const DEFAULT_PLAYBOOK_BACKEND_STEP_TIMEOUT_MS = 20_000;
78
80
 
79
81
  /** Process-scoped memo of the backend that last succeeded for a site on the
80
82
  * compile/test path (`runWorkflowWithLadder`). Lets the param-coverage suite
@@ -182,6 +184,22 @@ function sleepMs(ms: number): Promise<void> {
182
184
  return new Promise((r) => setTimeout(r, ms));
183
185
  }
184
186
 
187
+ function playbookBackendTimeoutMs(): number {
188
+ return positiveEnvMs('IMPRINT_PLAYBOOK_BACKEND_TIMEOUT_MS', DEFAULT_PLAYBOOK_BACKEND_TIMEOUT_MS);
189
+ }
190
+
191
+ function playbookBackendStepTimeoutMs(): number {
192
+ return positiveEnvMs(
193
+ 'IMPRINT_PLAYBOOK_BACKEND_STEP_TIMEOUT_MS',
194
+ DEFAULT_PLAYBOOK_BACKEND_STEP_TIMEOUT_MS,
195
+ );
196
+ }
197
+
198
+ function positiveEnvMs(name: string, fallback: number): number {
199
+ const raw = Number(process.env[name] ?? fallback);
200
+ return Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : fallback;
201
+ }
202
+
185
203
  function withWorkflowDefaults(
186
204
  workflow: Workflow,
187
205
  params: Record<string, string | number | boolean>,
@@ -340,6 +358,8 @@ export async function runWithLadder(
340
358
  playbook: playbookPath(assetRoot, tool.site, tool.dir),
341
359
  params: paramsWithDefaults,
342
360
  site: tool.site,
361
+ stepTimeoutMs: playbookBackendStepTimeoutMs(),
362
+ maxDurationMs: playbookBackendTimeoutMs(),
343
363
  });
344
364
  break;
345
365
  }
@@ -1466,7 +1486,7 @@ export async function runWorkflowWithLadder(opts: {
1466
1486
  // A backend that finishes AFTER the probe returned (it lost the race but
1467
1487
  // is still cold-starting Chrome) pools its browser late — arm the idle
1468
1488
  // close so it's torn down rather than left lingering.
1469
- void inner.finally(() => armCompileCdpIdleClose());
1489
+ void inner.finally(() => armCompileCdpIdleClose()).catch(() => {});
1470
1490
  const r = await Promise.race([
1471
1491
  inner,
1472
1492
  sleepMs(PROBE_TIMEOUT_MS).then(
@@ -89,6 +89,24 @@ export function buildJsonSchema(parameters: WorkflowParameter[]): Tool['inputSch
89
89
 
90
90
  const log = createLog('mcp');
91
91
 
92
+ export async function runSerializedBySite<T>(
93
+ queues: Map<string, Promise<void>>,
94
+ site: string,
95
+ task: () => Promise<T>,
96
+ ): Promise<T> {
97
+ const previous = queues.get(site) ?? Promise.resolve();
98
+ const run = previous.catch(() => undefined).then(task);
99
+ const tail = run.then(
100
+ () => undefined,
101
+ () => undefined,
102
+ );
103
+ queues.set(site, tail);
104
+ tail.finally(() => {
105
+ if (queues.get(site) === tail) queues.delete(site);
106
+ });
107
+ return await run;
108
+ }
109
+
92
110
  /** Build the MCP Server with all discovered tools registered. */
93
111
  function buildServer(
94
112
  name: string,
@@ -127,6 +145,12 @@ function buildServer(
127
145
  // cdp-replay and re-pay the ~33s relaunch.
128
146
  const winnerCache = new Map<string, ConcreteBackend>();
129
147
 
148
+ // Browser-backed rungs share per-site state (CDP page/session, stealth token,
149
+ // winner memo, and backend cache). Parallel MCP calls can race that state and
150
+ // make Google Flights return fast empty result sets. Keep same-site execution
151
+ // sequential while allowing unrelated sites to proceed independently.
152
+ const siteExecutionQueues = new Map<string, Promise<void>>();
153
+
130
154
  server.setRequestHandler(ListToolsRequestSchema, async () => ({
131
155
  tools: tools.map((t) => ({
132
156
  name: t.workflow.toolName,
@@ -162,74 +186,76 @@ function buildServer(
162
186
  string | number | boolean
163
187
  >;
164
188
 
165
- // Audit-only pacing: when the audit harness sets IMPRINT_AUDIT_PACING_MS,
166
- // sleep before each tool call so the auditor's per-parameter differential
167
- // probing of bot-defended idempotent reads stays steady enough not to trip
168
- // the per-IP anti-bot defense. Unset in production → no delay.
169
- const pacingMs = Number(process.env.IMPRINT_AUDIT_PACING_MS);
170
- if (Number.isFinite(pacingMs) && pacingMs > 0) {
171
- await new Promise((r) => setTimeout(r, pacingMs));
172
- }
173
-
174
189
  try {
175
- const ladder = resolveLadder('auto', tool.preferredOrder);
176
- const { result, usedBackend, attempts } = await runWithLadder(
177
- ladder,
178
- tool,
179
- args,
180
- assetRoot,
181
- stealthCache,
182
- { cdpPool, winnerCache, skipBootstrapSplice: Boolean(tool.preferredOrder?.length) },
183
- );
184
- // Reset the idle timer for this site's pooled Chrome.
185
- if (result.ok && usedBackend === 'cdp-replay' && cdpPool.has(tool.site)) {
186
- const prev = cdpIdleTimers.get(tool.site);
187
- if (prev) clearTimeout(prev);
188
- const timer = setTimeout(() => {
189
- const cf = cdpPool.get(tool.site);
190
- if (cf) {
191
- log(`closing idle CDP session for ${tool.site}`);
192
- cf.close().catch(() => {});
193
- cdpPool.delete(tool.site);
194
- cdpIdleTimers.delete(tool.site);
195
- // Drop this site's winner memo too: a memoized cdp-replay would now
196
- // point at a closed Chrome and re-pay the cold relaunch.
197
- for (const key of winnerCache.keys()) {
198
- if (key.startsWith(`${tool.site}:`)) winnerCache.delete(key);
199
- }
200
- }
201
- }, CDP_IDLE_TIMEOUT_MS);
202
- timer.unref();
203
- cdpIdleTimers.set(tool.site, timer);
204
- }
205
- if (!result.ok) {
206
- const text = formatToolError(result);
207
- return {
208
- isError: true,
209
- content: [{ type: 'text', text: `${text}\n(backend: ${usedBackend})` }],
210
- };
211
- }
212
- try {
213
- const cache = persistRuntimeBackendsCache({
190
+ return await runSerializedBySite(siteExecutionQueues, tool.site, async () => {
191
+ // Audit-only pacing: when the audit harness sets IMPRINT_AUDIT_PACING_MS,
192
+ // sleep before each actual workflow execution so same-site queued calls
193
+ // stay spaced out instead of all waiting concurrently before the queue.
194
+ // Unset in production -> no delay.
195
+ const pacingMs = Number(process.env.IMPRINT_AUDIT_PACING_MS);
196
+ if (Number.isFinite(pacingMs) && pacingMs > 0) {
197
+ await new Promise((r) => setTimeout(r, pacingMs));
198
+ }
199
+
200
+ const ladder = resolveLadder('auto', tool.preferredOrder);
201
+ const { result, usedBackend, attempts } = await runWithLadder(
202
+ ladder,
214
203
  tool,
204
+ args,
215
205
  assetRoot,
216
- usedBackend,
217
- attempts,
218
- });
219
- if (cache) {
220
- tool.preferredOrder = cache.preferredOrder;
206
+ stealthCache,
207
+ { cdpPool, winnerCache, skipBootstrapSplice: Boolean(tool.preferredOrder?.length) },
208
+ );
209
+ // Reset the idle timer for this site's pooled Chrome.
210
+ if (result.ok && usedBackend === 'cdp-replay' && cdpPool.has(tool.site)) {
211
+ const prev = cdpIdleTimers.get(tool.site);
212
+ if (prev) clearTimeout(prev);
213
+ const timer = setTimeout(() => {
214
+ const cf = cdpPool.get(tool.site);
215
+ if (cf) {
216
+ log(`closing idle CDP session for ${tool.site}`);
217
+ cf.close().catch(() => {});
218
+ cdpPool.delete(tool.site);
219
+ cdpIdleTimers.delete(tool.site);
220
+ // Drop this site's winner memo too: a memoized cdp-replay would now
221
+ // point at a closed Chrome and re-pay the cold relaunch.
222
+ for (const key of winnerCache.keys()) {
223
+ if (key.startsWith(`${tool.site}:`)) winnerCache.delete(key);
224
+ }
225
+ }
226
+ }, CDP_IDLE_TIMEOUT_MS);
227
+ timer.unref();
228
+ cdpIdleTimers.set(tool.site, timer);
229
+ }
230
+ if (!result.ok) {
231
+ const text = formatToolError(result);
232
+ return {
233
+ isError: true,
234
+ content: [{ type: 'text', text: `${text}\n(backend: ${usedBackend})` }],
235
+ };
236
+ }
237
+ try {
238
+ const cache = persistRuntimeBackendsCache({
239
+ tool,
240
+ assetRoot,
241
+ usedBackend,
242
+ attempts,
243
+ });
244
+ if (cache) {
245
+ tool.preferredOrder = cache.preferredOrder;
246
+ log(
247
+ ` learned backend order for ${tool.workflow.toolName}: ${cache.preferredOrder.join(' → ')}`,
248
+ );
249
+ }
250
+ } catch (err) {
221
251
  log(
222
- ` learned backend order for ${tool.workflow.toolName}: ${cache.preferredOrder.join(' ')}`,
252
+ ` warning: could not persist backend order for ${tool.workflow.toolName}: ${err instanceof Error ? err.message : String(err)}`,
223
253
  );
224
254
  }
225
- } catch (err) {
226
- log(
227
- ` warning: could not persist backend order for ${tool.workflow.toolName}: ${err instanceof Error ? err.message : String(err)}`,
228
- );
229
- }
230
- const text =
231
- typeof result.data === 'string' ? result.data : JSON.stringify(result.data, null, 2);
232
- return { content: [{ type: 'text', text: `${text}\n\n(backend: ${usedBackend})` }] };
255
+ const text =
256
+ typeof result.data === 'string' ? result.data : JSON.stringify(result.data, null, 2);
257
+ return { content: [{ type: 'text', text: `${text}\n\n(backend: ${usedBackend})` }] };
258
+ });
233
259
  } catch (err) {
234
260
  const msg = err instanceof Error ? err.message : String(err);
235
261
  return { isError: true, content: [{ type: 'text', text: `[INTERNAL] ${msg}` }] };
@@ -30,6 +30,10 @@ interface RunPlaybookOptions {
30
30
  headed?: boolean;
31
31
  /** Per-step timeout in ms. Default 30000. */
32
32
  stepTimeoutMs?: number;
33
+ /** Whole-playbook timeout in ms. Default unbounded for direct playbook runs. */
34
+ maxDurationMs?: number;
35
+ /** Timeout for diagnostic screenshots in ms. Default 5000. */
36
+ screenshotTimeoutMs?: number;
33
37
  /** Screenshot after every step (not just on failure). */
34
38
  trace?: boolean;
35
39
  /** Inject a Playwright Page for tests. */
@@ -44,6 +48,8 @@ interface RunPlaybookOptions {
44
48
  }
45
49
 
46
50
  const log = createLog('playbook');
51
+ const DEFAULT_STEP_TIMEOUT_MS = 30000;
52
+ const DEFAULT_SCREENSHOT_TIMEOUT_MS = 5000;
47
53
 
48
54
  export async function runPlaybook(opts: RunPlaybookOptions): Promise<ToolResult> {
49
55
  let playbook: Playbook;
@@ -57,7 +63,10 @@ export async function runPlaybook(opts: RunPlaybookOptions): Promise<ToolResult>
57
63
  // Generous default — Akamai sensor JS, A/B loaders, lazy bundles all
58
64
  // need real time to settle. Tight timeouts make broken sites look
59
65
  // worse than they are.
60
- const stepTimeoutMs = opts.stepTimeoutMs ?? 30000;
66
+ const stepTimeoutMs = positiveMs(opts.stepTimeoutMs, DEFAULT_STEP_TIMEOUT_MS);
67
+ const screenshotTimeoutMs = positiveMs(opts.screenshotTimeoutMs, DEFAULT_SCREENSHOT_TIMEOUT_MS);
68
+ const deadlineAt =
69
+ opts.maxDurationMs !== undefined ? Date.now() + positiveMs(opts.maxDurationMs, 1) : null;
61
70
 
62
71
  let browser: Browser | undefined;
63
72
  let context: BrowserContext | undefined;
@@ -137,19 +146,42 @@ export async function runPlaybook(opts: RunPlaybookOptions): Promise<ToolResult>
137
146
 
138
147
  for (const [i, step] of playbook.steps.entries()) {
139
148
  lastStep = i + 1;
149
+ const budgetMs = budgetedTimeoutMs(
150
+ stepTimeoutMs,
151
+ deadlineAt,
152
+ `Playbook exceeded max duration before step ${lastStep}`,
153
+ );
140
154
  log(`step ${i + 1}/${playbook.steps.length}: ${step.action}`);
141
- await executeStep(page, step, params, stepTimeoutMs);
155
+ await withTimeout(
156
+ executeStep(page, step, params, budgetMs),
157
+ budgetMs,
158
+ `Playbook step ${lastStep}/${playbook.steps.length} (${step.action})`,
159
+ );
142
160
  if (opts.trace) {
143
- const traceShot = await screenshot(page, `${playbook.toolName}-trace`, lastStep);
161
+ const traceShot = await screenshot(
162
+ page,
163
+ `${playbook.toolName}-trace`,
164
+ lastStep,
165
+ screenshotTimeoutMs,
166
+ );
144
167
  log(` url=${page.url()}`);
145
168
  if (traceShot) log(` trace screenshot: ${traceShot}`);
146
169
  }
147
170
  }
148
- await Promise.allSettled(pendingBodyReads);
171
+ const bodyReadBudgetMs = budgetedTimeoutMs(
172
+ stepTimeoutMs,
173
+ deadlineAt,
174
+ 'Playbook exceeded max duration while reading captured responses',
175
+ );
176
+ await withTimeout(
177
+ Promise.allSettled(pendingBodyReads),
178
+ bodyReadBudgetMs,
179
+ 'Playbook captured-response drain',
180
+ );
149
181
  const data = await extractResult(page, playbook.result, captured);
150
182
  return { ok: true, data };
151
183
  } catch (err) {
152
- const screenshotPath = await screenshot(page, playbook.toolName, lastStep);
184
+ const screenshotPath = await screenshot(page, playbook.toolName, lastStep, screenshotTimeoutMs);
153
185
  const suffix = screenshotPath ? `\nscreenshot: ${screenshotPath}` : '';
154
186
  const errStr = errMsg(err);
155
187
  // Classify the failure mode honestly: a missing locator, a step
@@ -161,9 +193,10 @@ export async function runPlaybook(opts: RunPlaybookOptions): Promise<ToolResult>
161
193
  // bug, which over-attributes drift to defects. Map known
162
194
  // transient-shape errors to NETWORK so they count as `infra`
163
195
  // (re-runnable) rather than `tool_broken` (permanent defect).
164
- const isTransient = /No locator matched|Timeout \d+ms exceeded|forResponse|waiting for/i.test(
165
- errStr,
166
- );
196
+ const isTransient =
197
+ /No locator matched|Timeout \d+ms exceeded|timed out after|exceeded max duration|forResponse|waiting for/i.test(
198
+ errStr,
199
+ );
167
200
  return {
168
201
  ok: false,
169
202
  error: isTransient ? 'NETWORK' : 'BAD_RESPONSE',
@@ -177,19 +210,58 @@ export async function runPlaybook(opts: RunPlaybookOptions): Promise<ToolResult>
177
210
  }
178
211
  }
179
212
 
180
- async function screenshot(page: Page, toolName: string, stepNum: number): Promise<string | null> {
213
+ async function screenshot(
214
+ page: Page,
215
+ toolName: string,
216
+ stepNum: number,
217
+ timeoutMs: number,
218
+ ): Promise<string | null> {
181
219
  try {
182
220
  const { tmpdir } = await import('node:os');
183
221
  const { join } = await import('node:path');
184
222
  const ts = new Date().toISOString().replace(/[:.]/g, '-');
185
223
  const path = join(tmpdir(), `imprint-playbook-${toolName}-step${stepNum}-${ts}.png`);
186
- await page.screenshot({ path, fullPage: true });
224
+ await withTimeout(page.screenshot({ path, fullPage: true }), timeoutMs, 'Playbook screenshot');
187
225
  return path;
188
226
  } catch {
189
227
  return null;
190
228
  }
191
229
  }
192
230
 
231
+ function positiveMs(value: number | undefined, fallback: number): number {
232
+ if (value === undefined) return fallback;
233
+ return Number.isFinite(value) && value > 0 ? Math.floor(value) : fallback;
234
+ }
235
+
236
+ function budgetedTimeoutMs(
237
+ configuredMs: number,
238
+ deadlineAt: number | null,
239
+ errorMessage: string,
240
+ ): number {
241
+ if (deadlineAt === null) return configuredMs;
242
+ const remainingMs = deadlineAt - Date.now();
243
+ if (remainingMs <= 0) throw new Error(errorMessage);
244
+ return Math.max(1, Math.min(configuredMs, Math.floor(remainingMs)));
245
+ }
246
+
247
+ async function withTimeout<T>(promise: Promise<T>, timeoutMs: number, label: string): Promise<T> {
248
+ const boundedMs = positiveMs(timeoutMs, 1);
249
+ let timer: ReturnType<typeof setTimeout> | undefined;
250
+ try {
251
+ return await Promise.race([
252
+ promise,
253
+ new Promise<never>((_resolve, reject) => {
254
+ timer = setTimeout(
255
+ () => reject(new Error(`${label} timed out after ${boundedMs}ms`)),
256
+ boundedMs,
257
+ );
258
+ }),
259
+ ]);
260
+ } finally {
261
+ if (timer) clearTimeout(timer);
262
+ }
263
+ }
264
+
193
265
  async function loadPlaybook(input: string | Playbook): Promise<Playbook> {
194
266
  if (typeof input !== 'string') return input;
195
267
  if (!existsSync(input)) {
@@ -272,6 +272,27 @@ function backendResultTooSlow(result: BackendsCache['results'][string] | undefin
272
272
  return result?.outcome === 'ok' && result.tooSlow === true;
273
273
  }
274
274
 
275
+ function invalidPreferredOrderReason(cache: BackendsCache): string | null {
276
+ for (const backend of cache.preferredOrder) {
277
+ const result = cache.results[backend];
278
+ if (backend === 'playbook' && result?.outcome !== 'ok') {
279
+ return 'preferredOrder includes playbook without a successful playbook result';
280
+ }
281
+ if (result && result.outcome !== 'ok') {
282
+ return `preferredOrder includes ${backend} with ${result.outcome} result`;
283
+ }
284
+ }
285
+ return null;
286
+ }
287
+
288
+ function existingBackendUsable(
289
+ backend: ConcreteBackend,
290
+ result: BackendsCache['results'][string] | undefined,
291
+ ): boolean {
292
+ if (!result) return backend !== 'playbook';
293
+ return result.outcome === 'ok';
294
+ }
295
+
275
296
  async function probeWarmCdpReplay(
276
297
  tool: ResolvedTool,
277
298
  params: Record<string, string | number | boolean>,
@@ -358,6 +379,15 @@ export function loadBackendsCacheStatus(
358
379
  }
359
380
  }
360
381
  }
382
+ const invalidPreferredReason = invalidPreferredOrderReason(parsed);
383
+ if (invalidPreferredReason) {
384
+ if (opts.warn !== false) {
385
+ process.stderr.write(
386
+ `[imprint] backends.json at ${path} has unsafe preferred backends — ignoring (run \`${remediation}\` to regenerate): ${invalidPreferredReason}\n`,
387
+ );
388
+ }
389
+ return { status: 'invalid', path, reason: invalidPreferredReason, remediation };
390
+ }
361
391
  return { status: 'ok', path, cache: parsed };
362
392
  } catch (err) {
363
393
  const reason = err instanceof Error ? err.message : String(err);
@@ -438,17 +468,11 @@ export function persistRuntimeBackendsCache(opts: {
438
468
  const usedOkAttempt = observedOkAttempts.find((a) => a.backend === opts.usedBackend);
439
469
  const usedBackendTooSlow =
440
470
  usedOkAttempt !== undefined && usedOkAttempt.durationMs > preferredBackendMaxMs();
441
- const existingFast = existingPreferred.filter(
442
- (backend) => !backendResultTooSlow(results[backend]),
443
- );
444
- const existingSlow = existingPreferred.filter((backend) =>
445
- backendResultTooSlow(results[backend]),
471
+ const existingUsable = existingPreferred.filter((backend) =>
472
+ existingBackendUsable(backend, results[backend]),
446
473
  );
447
- const structuralFallbacks: ConcreteBackend[] = existsSync(
448
- pathResolve(opts.tool.dir, 'playbook.yaml'),
449
- )
450
- ? ['playbook']
451
- : [];
474
+ const existingFast = existingUsable.filter((backend) => !backendResultTooSlow(results[backend]));
475
+ const existingSlow = existingUsable.filter((backend) => backendResultTooSlow(results[backend]));
452
476
  const preferredOrder = uniqueBackends([
453
477
  ...(usedOkAttempt && !usedBackendTooSlow ? [opts.usedBackend] : []),
454
478
  ...existingFast,
@@ -456,7 +480,6 @@ export function persistRuntimeBackendsCache(opts: {
456
480
  ...existingSlow,
457
481
  ...slowObservedOk,
458
482
  ...(usedOkAttempt && usedBackendTooSlow ? [opts.usedBackend] : []),
459
- ...structuralFallbacks,
460
483
  ]);
461
484
  const cache: BackendsCache = {
462
485
  probedAt: new Date().toISOString(),