imprint-mcp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/CHANGELOG.md +168 -0
  2. package/LICENSE +21 -0
  3. package/README.md +322 -0
  4. package/examples/discoverandgo/README.md +57 -0
  5. package/examples/discoverandgo/book_discoverandgo_museum_pass/cron.json +8 -0
  6. package/examples/discoverandgo/book_discoverandgo_museum_pass/index.ts +89 -0
  7. package/examples/discoverandgo/book_discoverandgo_museum_pass/workflow.json +39 -0
  8. package/examples/echo/README.md +37 -0
  9. package/examples/echo/echo_test/index.ts +31 -0
  10. package/examples/google-flights/search_google_flights/index.ts +101 -0
  11. package/examples/google-flights/search_google_flights/parser.test.ts +140 -0
  12. package/examples/google-flights/search_google_flights/parser.ts +189 -0
  13. package/examples/google-flights/search_google_flights/playbook.yaml +130 -0
  14. package/examples/google-flights/search_google_flights/workflow.json +48 -0
  15. package/examples/google-hotels/search_google_hotels/index.ts +194 -0
  16. package/examples/google-hotels/search_google_hotels/parser.test.ts +168 -0
  17. package/examples/google-hotels/search_google_hotels/parser.ts +330 -0
  18. package/examples/google-hotels/search_google_hotels/playbook.yaml +125 -0
  19. package/examples/google-hotels/search_google_hotels/workflow.json +111 -0
  20. package/examples/namecheap-domains/search_namecheap_domains/index.ts +144 -0
  21. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +380 -0
  22. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +50 -0
  23. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +136 -0
  24. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +97 -0
  25. package/examples/southwest/README.md +81 -0
  26. package/examples/southwest/search_southwest_flights/backends.json +23 -0
  27. package/examples/southwest/search_southwest_flights/cron.json +19 -0
  28. package/examples/southwest/search_southwest_flights/index.ts +110 -0
  29. package/examples/southwest/search_southwest_flights/playbook.yaml +46 -0
  30. package/examples/southwest/search_southwest_flights/workflow.json +54 -0
  31. package/package.json +78 -0
  32. package/prompts/compile-agent.md +580 -0
  33. package/prompts/intent-detection.md +198 -0
  34. package/prompts/playbook-compilation.md +279 -0
  35. package/prompts/request-triage.md +74 -0
  36. package/prompts/tool-candidate-detection.md +104 -0
  37. package/src/cli.ts +1287 -0
  38. package/src/imprint/agent.ts +468 -0
  39. package/src/imprint/app-api-hosts.ts +53 -0
  40. package/src/imprint/backend-ladder.ts +568 -0
  41. package/src/imprint/check.ts +136 -0
  42. package/src/imprint/chromium.ts +211 -0
  43. package/src/imprint/claude-cli-compile.ts +640 -0
  44. package/src/imprint/cli-credential.ts +394 -0
  45. package/src/imprint/codex-cli-compile.ts +712 -0
  46. package/src/imprint/compile-agent-types.ts +40 -0
  47. package/src/imprint/compile-agent.ts +404 -0
  48. package/src/imprint/compile-tools.ts +1389 -0
  49. package/src/imprint/compile.ts +720 -0
  50. package/src/imprint/cookie-jar.ts +246 -0
  51. package/src/imprint/credential-bundle.ts +195 -0
  52. package/src/imprint/credential-extract.ts +290 -0
  53. package/src/imprint/credential-store.ts +707 -0
  54. package/src/imprint/cron.ts +312 -0
  55. package/src/imprint/doctor.ts +223 -0
  56. package/src/imprint/emit.ts +154 -0
  57. package/src/imprint/etld.ts +134 -0
  58. package/src/imprint/freeform-redact.ts +216 -0
  59. package/src/imprint/inject-listener.ts +137 -0
  60. package/src/imprint/install.ts +795 -0
  61. package/src/imprint/integrations.ts +385 -0
  62. package/src/imprint/is-compiled.ts +2 -0
  63. package/src/imprint/json-path.ts +100 -0
  64. package/src/imprint/llm.ts +998 -0
  65. package/src/imprint/load-json.ts +54 -0
  66. package/src/imprint/log.ts +33 -0
  67. package/src/imprint/login.ts +166 -0
  68. package/src/imprint/mcp-compile-server.ts +282 -0
  69. package/src/imprint/mcp-maintenance.ts +1790 -0
  70. package/src/imprint/mcp-server.ts +350 -0
  71. package/src/imprint/multi-progress.ts +69 -0
  72. package/src/imprint/notify.ts +155 -0
  73. package/src/imprint/paths.ts +64 -0
  74. package/src/imprint/playbook-parser.ts +21 -0
  75. package/src/imprint/playbook-runner.ts +465 -0
  76. package/src/imprint/probe-backends.ts +251 -0
  77. package/src/imprint/progress.ts +28 -0
  78. package/src/imprint/record.ts +470 -0
  79. package/src/imprint/redact.ts +550 -0
  80. package/src/imprint/replay-capture.ts +387 -0
  81. package/src/imprint/request-context.ts +66 -0
  82. package/src/imprint/runtime-link.ts +73 -0
  83. package/src/imprint/runtime.ts +942 -0
  84. package/src/imprint/sensitive-keys.ts +156 -0
  85. package/src/imprint/session-diff.ts +409 -0
  86. package/src/imprint/session-merge.ts +198 -0
  87. package/src/imprint/session-writer.ts +149 -0
  88. package/src/imprint/sites.ts +27 -0
  89. package/src/imprint/stealth-fetch.ts +434 -0
  90. package/src/imprint/teach-state.ts +235 -0
  91. package/src/imprint/teach.ts +2120 -0
  92. package/src/imprint/tool-candidates.ts +423 -0
  93. package/src/imprint/tool-loader.ts +186 -0
  94. package/src/imprint/tool-selection.ts +70 -0
  95. package/src/imprint/tracing.ts +508 -0
  96. package/src/imprint/types.ts +472 -0
  97. package/src/imprint/version.ts +21 -0
@@ -0,0 +1,568 @@
1
+ /**
2
+ * Walk a list of backends in order, escalating on FORBIDDEN and satisfiable
3
+ * STATE_MISSING; other errors return immediately. fetch-bootstrap is a gated
4
+ * API-replay adapter, not a default DOM fallback rung: auto only reaches it
5
+ * for workflows that declare bootstrap/captures or STATE_MISSING says browser
6
+ * bootstrap can satisfy the missing state.
7
+ */
8
+
9
+ import { existsSync } from 'node:fs';
10
+ import { resolve as pathResolve } from 'node:path';
11
+ import type { Page } from 'playwright';
12
+ import { RuntimeCookieJar } from './cookie-jar.ts';
13
+ import { createLog } from './log.ts';
14
+ import { runPlaybook } from './playbook-runner.ts';
15
+ import { type CredentialStore, loadCredentialStore, substituteString } from './runtime.ts';
16
+ import { type StealthFetch, createStealthFetch } from './stealth-fetch.ts';
17
+ import type { ResolvedTool } from './tool-loader.ts';
18
+ import type {
19
+ BootstrapCapture,
20
+ ConcreteBackend,
21
+ ReplayBackend,
22
+ StateCapability,
23
+ StateMissingItem,
24
+ ToolResult,
25
+ Workflow,
26
+ } from './types.ts';
27
+
28
+ interface LadderResult {
29
+ result: ToolResult;
30
+ usedBackend: ConcreteBackend;
31
+ /** One entry per rung that was tried. */
32
+ attempts: Array<{
33
+ backend: ConcreteBackend;
34
+ outcome: 'ok' | 'escalate' | 'failed' | 'unavailable';
35
+ detail: string;
36
+ durationMs: number;
37
+ }>;
38
+ }
39
+
40
+ const log = createLog('backend');
41
+
42
+ const DEFAULT_LADDER: ConcreteBackend[] = ['fetch', 'stealth-fetch', 'playbook'];
43
+
44
+ /** Expand a replayBackend choice into a concrete ladder. 'auto' prefers
45
+ * the probed order (if any), else the default. Explicit choice → single rung. */
46
+ export function resolveLadder(
47
+ backend: ReplayBackend,
48
+ cachedPreferredOrder?: ConcreteBackend[],
49
+ ): ConcreteBackend[] {
50
+ if (backend === 'auto') {
51
+ return cachedPreferredOrder && cachedPreferredOrder.length > 0
52
+ ? cachedPreferredOrder
53
+ : DEFAULT_LADDER;
54
+ }
55
+ return [backend];
56
+ }
57
+
58
+ /** First non-FORBIDDEN result wins; last FORBIDDEN returned if every rung escalates. */
59
+ export async function runWithLadder(
60
+ ladder: ConcreteBackend[],
61
+ tool: ResolvedTool,
62
+ params: Record<string, string | number | boolean>,
63
+ assetRoot: string,
64
+ stealthCache: Map<string, StealthFetch>,
65
+ ): Promise<LadderResult> {
66
+ if (ladder.length === 0) {
67
+ throw new Error('runWithLadder: empty ladder');
68
+ }
69
+
70
+ const effectiveLadder = effectiveAutoLadder(ladder, tool.workflow);
71
+ const attempts: LadderResult['attempts'] = [];
72
+ let lastResult: ToolResult | null = null;
73
+ let skipUntilBackend: ConcreteBackend | null = null;
74
+
75
+ for (const backend of effectiveLadder) {
76
+ if (skipUntilBackend && backend !== skipUntilBackend) continue;
77
+ if (skipUntilBackend === backend) skipUntilBackend = null;
78
+
79
+ if (backend === 'playbook' && !existsSync(playbookPath(assetRoot, tool.site, tool.dir))) {
80
+ attempts.push({
81
+ backend,
82
+ outcome: 'unavailable',
83
+ detail: 'no playbook.yaml',
84
+ durationMs: 0,
85
+ });
86
+ log(`${backend}: skipped (prerequisite missing)`);
87
+ continue;
88
+ }
89
+
90
+ const t0 = Date.now();
91
+ log(`trying ${backend}…`);
92
+ let result: ToolResult;
93
+ try {
94
+ switch (backend) {
95
+ case 'fetch':
96
+ result = await tool.toolFn(params);
97
+ break;
98
+ case 'fetch-bootstrap':
99
+ result = await runFetchBootstrap(tool, params);
100
+ break;
101
+ case 'stealth-fetch': {
102
+ const sf = ensureStealthFetch(tool, stealthCache);
103
+ result = await tool.toolFn(params, { fetchImpl: sf.fetchImpl });
104
+ break;
105
+ }
106
+ case 'playbook':
107
+ result = await runPlaybook({
108
+ playbook: playbookPath(assetRoot, tool.site, tool.dir),
109
+ params,
110
+ site: tool.site,
111
+ });
112
+ break;
113
+ }
114
+ } catch (err) {
115
+ const msg = err instanceof Error ? err.message : String(err);
116
+ result = { ok: false, error: 'UNKNOWN', message: `${backend} threw: ${msg}` };
117
+ }
118
+ const durationMs = Date.now() - t0;
119
+ lastResult = result;
120
+
121
+ if (result.ok) {
122
+ attempts.push({ backend, outcome: 'ok', detail: `succeeded in ${durationMs}ms`, durationMs });
123
+ log(`${backend}: OK in ${durationMs}ms`);
124
+ return { result, usedBackend: backend, attempts };
125
+ }
126
+
127
+ if (result.error === 'FORBIDDEN') {
128
+ attempts.push({
129
+ backend,
130
+ outcome: 'escalate',
131
+ detail: `${result.error}: ${result.message.slice(0, 120)}`,
132
+ durationMs,
133
+ });
134
+ log(`${backend}: FORBIDDEN in ${durationMs}ms — escalating`);
135
+ continue;
136
+ }
137
+
138
+ if (result.error === 'STATE_MISSING') {
139
+ const next = nextStateMissingBackend(effectiveLadder, backend, result.missing ?? []);
140
+ if (next) {
141
+ attempts.push({
142
+ backend,
143
+ outcome: 'escalate',
144
+ detail: `${result.error}: ${result.message.slice(0, 120)}`,
145
+ durationMs,
146
+ });
147
+ log(`${backend}: STATE_MISSING in ${durationMs}ms — escalating to ${next}`);
148
+ skipUntilBackend = next;
149
+ continue;
150
+ }
151
+ }
152
+
153
+ // Non-FORBIDDEN errors don't escalate — different backend can't fix
154
+ // AUTH_EXPIRED, NETWORK, RATE_LIMITED.
155
+ attempts.push({
156
+ backend,
157
+ outcome: 'failed',
158
+ detail: `${result.error}: ${result.message.slice(0, 120)}`,
159
+ durationMs,
160
+ });
161
+ log(`${backend}: ${result.error} in ${durationMs}ms — non-escalatable, returning`);
162
+ return { result, usedBackend: backend, attempts };
163
+ }
164
+
165
+ // Every backend either escalated (FORBIDDEN) or was unavailable.
166
+ if (!lastResult) {
167
+ return {
168
+ result: {
169
+ ok: false,
170
+ error: 'UNKNOWN',
171
+ message: `Every backend in the ladder was unavailable: ${effectiveLadder.join(', ')}. For "auto" mode, ensure at least workflow.json exists; for the playbook rung, run \`imprint compile-playbook\` first.`,
172
+ },
173
+ usedBackend: effectiveLadder[effectiveLadder.length - 1] ?? 'fetch',
174
+ attempts,
175
+ };
176
+ }
177
+ log(
178
+ `every backend escalated; returning last error from ${effectiveLadder[effectiveLadder.length - 1]}`,
179
+ );
180
+ return {
181
+ result: lastResult,
182
+ usedBackend: effectiveLadder[effectiveLadder.length - 1] ?? 'fetch',
183
+ attempts,
184
+ };
185
+ }
186
+
187
+ function effectiveAutoLadder(ladder: ConcreteBackend[], workflow: Workflow): ConcreteBackend[] {
188
+ if (ladder.length <= 1 || ladder.includes('fetch-bootstrap')) return ladder;
189
+ if (!workflowNeedsBootstrap(workflow)) return ladder;
190
+ const fetchIdx = ladder.indexOf('fetch');
191
+ if (fetchIdx === -1) return ladder;
192
+ const next = [...ladder];
193
+ next.splice(fetchIdx + 1, 0, 'fetch-bootstrap');
194
+ return next;
195
+ }
196
+
197
+ function workflowNeedsBootstrap(workflow: Workflow): boolean {
198
+ if (workflow.bootstrap) return true;
199
+ return workflow.requests.some((r) =>
200
+ (r.captures ?? []).some(
201
+ (c) => c.capability === 'browser_bootstrap' || c.capability === 'stealth_bootstrap',
202
+ ),
203
+ );
204
+ }
205
+
206
+ function nextStateMissingBackend(
207
+ ladder: ConcreteBackend[],
208
+ backend: ConcreteBackend,
209
+ missing: StateMissingItem[],
210
+ ): ConcreteBackend | null {
211
+ const idx = ladder.indexOf(backend);
212
+ if (idx < 0) return null;
213
+ for (const next of ladder.slice(idx + 1)) {
214
+ if (stateMissingSatisfiableBy(next, missing)) return next;
215
+ }
216
+ return null;
217
+ }
218
+
219
+ function stateMissingSatisfiableBy(backend: ConcreteBackend, missing: StateMissingItem[]): boolean {
220
+ const required = missing.filter((m) => m.required !== false);
221
+ if (required.length === 0) return false;
222
+ return required.every((m) => capabilitySatisfiedBy(backend, m.capability));
223
+ }
224
+
225
+ function capabilitySatisfiedBy(backend: ConcreteBackend, capability: StateCapability): boolean {
226
+ if (backend === 'fetch-bootstrap') {
227
+ return capability === 'browser_bootstrap' || capability === 'stealth_bootstrap';
228
+ }
229
+ if (backend === 'stealth-fetch') return false;
230
+ if (backend === 'playbook') {
231
+ return (
232
+ capability === 'ordinary_http' ||
233
+ capability === 'browser_bootstrap' ||
234
+ capability === 'stealth_bootstrap'
235
+ );
236
+ }
237
+ return false;
238
+ }
239
+
240
+ async function runFetchBootstrap(
241
+ tool: ResolvedTool,
242
+ params: Record<string, string | number | boolean>,
243
+ ): Promise<ToolResult> {
244
+ if (!tool.workflow.bootstrap) {
245
+ return {
246
+ ok: false,
247
+ error: 'STATE_MISSING',
248
+ message: 'fetch-bootstrap requires workflow.bootstrap metadata.',
249
+ missing: [
250
+ {
251
+ name: 'workflow.bootstrap',
252
+ source: 'workflow',
253
+ capability: 'browser_bootstrap',
254
+ required: true,
255
+ failure: 'producer_unavailable',
256
+ message: 'workflow.bootstrap is missing',
257
+ },
258
+ ],
259
+ remediation: 'Regenerate or edit workflow.json with bootstrap metadata.',
260
+ };
261
+ }
262
+
263
+ const credentials = (await loadCredentialStore(tool.site)) ?? {
264
+ site: tool.site,
265
+ cookies: [],
266
+ values: {},
267
+ storage: [],
268
+ };
269
+ const bootstrapUrl = substituteString(tool.workflow.bootstrap.url, params, credentials, []);
270
+ const initialState: Record<string, unknown> = {};
271
+ const { chromium } = await import('playwright');
272
+ let browser: Awaited<ReturnType<typeof chromium.launch>> | undefined;
273
+ try {
274
+ browser = await chromium.launch({ headless: true });
275
+ const context = await browser.newContext();
276
+ if (credentials.cookies.length > 0) {
277
+ await context.addCookies(
278
+ credentials.cookies.map((c) => ({
279
+ name: c.name,
280
+ value: c.value,
281
+ domain: c.hostOnly ? undefined : c.domain,
282
+ url: c.hostOnly ? cookieUrlFor(c, bootstrapUrl) : undefined,
283
+ path: c.path,
284
+ expires: c.expires,
285
+ httpOnly: c.httpOnly,
286
+ secure: c.secure,
287
+ sameSite: sameSiteForPlaywright(c.sameSite),
288
+ })),
289
+ );
290
+ }
291
+ if ((credentials.storage ?? []).length > 0) {
292
+ await context.addInitScript((records) => {
293
+ const browserGlobal = globalThis as unknown as {
294
+ location: { origin: string };
295
+ localStorage: { setItem(key: string, value: string): void };
296
+ };
297
+ for (const record of records as Array<{
298
+ origin: string;
299
+ kind: 'localStorage' | 'sessionStorage';
300
+ key: string;
301
+ value: string;
302
+ }>) {
303
+ if (record.kind !== 'localStorage') continue;
304
+ if (browserGlobal.location.origin !== record.origin) continue;
305
+ browserGlobal.localStorage.setItem(record.key, record.value);
306
+ }
307
+ }, credentials.storage ?? []);
308
+ }
309
+ const page = await context.newPage();
310
+ await page.route('**/*', async (route) => {
311
+ const type = route.request().resourceType();
312
+ if (['image', 'media', 'font'].includes(type)) return route.abort();
313
+ return route.continue();
314
+ });
315
+ await page.goto(bootstrapUrl, {
316
+ waitUntil: tool.workflow.bootstrap.waitUntil ?? 'domcontentloaded',
317
+ timeout: tool.workflow.bootstrap.timeoutMs ?? 30_000,
318
+ });
319
+ if (tool.workflow.bootstrap.waitMs) await page.waitForTimeout(tool.workflow.bootstrap.waitMs);
320
+
321
+ const html = await page.content();
322
+ for (const capture of tool.workflow.bootstrap.captures ?? []) {
323
+ let value: unknown;
324
+ try {
325
+ value = await evaluateBootstrapCapture(capture, page, html);
326
+ } catch (err) {
327
+ if (capture.required === false) continue;
328
+ return bootstrapCaptureMissingResult(
329
+ capture,
330
+ `Bootstrap capture "${capture.name}" (${capture.source}) failed: ${err instanceof Error ? err.message : String(err)}`,
331
+ 'producer_ran_value_absent',
332
+ );
333
+ }
334
+ if (value !== undefined && value !== null && value !== '') {
335
+ initialState[capture.name] = value;
336
+ } else if (capture.required !== false && capture.source !== 'cookie') {
337
+ return bootstrapCaptureMissingResult(
338
+ capture,
339
+ `Required bootstrap capture "${capture.name}" (${capture.source}) did not produce a value.`,
340
+ 'producer_ran_value_absent',
341
+ );
342
+ }
343
+ }
344
+
345
+ const cookies = await context.cookies();
346
+ const bootstrappedCredentials: CredentialStore = {
347
+ ...credentials,
348
+ cookies: [
349
+ ...credentials.cookies,
350
+ ...cookies.map((c) => ({
351
+ name: c.name,
352
+ value: c.value,
353
+ domain: c.domain,
354
+ path: c.path,
355
+ expires: c.expires,
356
+ httpOnly: c.httpOnly,
357
+ secure: c.secure,
358
+ sameSite: c.sameSite,
359
+ hostOnly: !c.domain.startsWith('.'),
360
+ })),
361
+ ],
362
+ };
363
+ const jar = new RuntimeCookieJar(bootstrappedCredentials.cookies);
364
+ for (const capture of tool.workflow.bootstrap.captures ?? []) {
365
+ if (capture.source !== 'cookie') continue;
366
+ const lookup = jar.lookup(capture.cookie, capture.url ?? bootstrapUrl, {
367
+ url: capture.url,
368
+ domain: capture.domain,
369
+ path: capture.path,
370
+ sameSite: capture.sameSite,
371
+ allowHttpOnlyProjection: capture.allowHttpOnlyProjection,
372
+ });
373
+ if (lookup.ok) initialState[capture.name] = lookup.cookie.value;
374
+ else if (capture.required !== false) {
375
+ return bootstrapCaptureMissingResult(
376
+ capture,
377
+ lookup.reason === 'ambiguous'
378
+ ? `Bootstrap cookie capture "${capture.name}" is ambiguous; add url/domain/path constraints.`
379
+ : lookup.reason === 'httponly'
380
+ ? `Bootstrap cookie capture "${capture.name}" targets HttpOnly cookie "${capture.cookie}" without allowHttpOnlyProjection.`
381
+ : `Bootstrap cookie capture "${capture.name}" did not find cookie "${capture.cookie}".`,
382
+ lookup.reason === 'ambiguous' ? 'ambiguous_cookie' : 'producer_ran_value_absent',
383
+ );
384
+ }
385
+ }
386
+ return await tool.toolFn(params, {
387
+ credentials: bootstrappedCredentials,
388
+ initialState,
389
+ });
390
+ } catch (err) {
391
+ const stateMissing = bootstrapFailureStateMissingResult(
392
+ tool.workflow,
393
+ `fetch-bootstrap could not produce required bootstrap state: ${err instanceof Error ? err.message : String(err)}`,
394
+ );
395
+ if (stateMissing) return stateMissing;
396
+ return {
397
+ ok: false,
398
+ error: 'NETWORK',
399
+ message: `fetch-bootstrap failed: ${err instanceof Error ? err.message : String(err)}`,
400
+ };
401
+ } finally {
402
+ await browser?.close().catch(() => {});
403
+ }
404
+ }
405
+
406
+ function bootstrapFailureStateMissingResult(
407
+ workflow: Workflow,
408
+ message: string,
409
+ ): ToolResult | null {
410
+ const captures = (workflow.bootstrap?.captures ?? []).filter(
411
+ (capture) => capture.required !== false,
412
+ );
413
+ if (captures.length === 0) return null;
414
+ return {
415
+ ok: false,
416
+ error: 'STATE_MISSING',
417
+ message,
418
+ missing: captures.map((capture) =>
419
+ bootstrapMissingItem(capture, message, 'producer_unavailable'),
420
+ ),
421
+ remediation: remediationForBootstrapCapabilities(captures.map((capture) => capture.capability)),
422
+ };
423
+ }
424
+
425
+ function bootstrapCaptureMissingResult(
426
+ capture: BootstrapCapture,
427
+ message: string,
428
+ failure: StateMissingItem['failure'],
429
+ ): ToolResult {
430
+ return {
431
+ ok: false,
432
+ error: 'STATE_MISSING',
433
+ message,
434
+ missing: [bootstrapMissingItem(capture, message, failure)],
435
+ remediation: remediationForBootstrapCapabilities([capture.capability]),
436
+ };
437
+ }
438
+
439
+ function bootstrapMissingItem(
440
+ capture: BootstrapCapture,
441
+ message: string,
442
+ failure: StateMissingItem['failure'],
443
+ ): StateMissingItem {
444
+ return {
445
+ name: capture.name,
446
+ source: bootstrapCaptureSource(capture),
447
+ capability: capture.capability,
448
+ required: true,
449
+ failure,
450
+ message,
451
+ };
452
+ }
453
+
454
+ function bootstrapCaptureSource(capture: BootstrapCapture): StateMissingItem['source'] {
455
+ if (capture.source === 'cookie') return 'cookie';
456
+ if (capture.source === 'local_storage' || capture.source === 'session_storage') return 'storage';
457
+ return 'state';
458
+ }
459
+
460
+ function remediationForBootstrapCapabilities(capabilities: StateCapability[]): string {
461
+ return capabilities.includes('stealth_bootstrap')
462
+ ? 'Use replayBackend: "auto" so Imprint can try fetch-bootstrap and then the playbook fallback when API replay cannot mint bot-defense/browser state.'
463
+ : 'Run through fetch-bootstrap, or update workflow.bootstrap so Imprint can mint browser state before API replay.';
464
+ }
465
+
466
+ async function evaluateBootstrapCapture(
467
+ capture: BootstrapCapture,
468
+ page: Page,
469
+ html: string,
470
+ ): Promise<unknown> {
471
+ switch (capture.source) {
472
+ case 'html_regex': {
473
+ const match = html.match(new RegExp(capture.pattern));
474
+ return match?.[capture.group ?? 1];
475
+ }
476
+ case 'dom_attribute':
477
+ return await page
478
+ .locator(capture.selector)
479
+ .first()
480
+ .getAttribute(capture.attribute, { timeout: capture.timeoutMs ?? 5000 });
481
+ case 'dom_text':
482
+ return await page
483
+ .locator(capture.selector)
484
+ .first()
485
+ .textContent({ timeout: capture.timeoutMs ?? 5000 });
486
+ case 'local_storage':
487
+ return await page.evaluate(
488
+ ({ origin, key }) => {
489
+ const browserGlobal = globalThis as unknown as {
490
+ location: { origin: string };
491
+ localStorage: { getItem(key: string): string | null };
492
+ };
493
+ return browserGlobal.location.origin === origin
494
+ ? browserGlobal.localStorage.getItem(key)
495
+ : null;
496
+ },
497
+ { origin: capture.origin, key: capture.key },
498
+ );
499
+ case 'session_storage':
500
+ return await page.evaluate(
501
+ ({ origin, key }) => {
502
+ const browserGlobal = globalThis as unknown as {
503
+ location: { origin: string };
504
+ sessionStorage: { getItem(key: string): string | null };
505
+ };
506
+ return browserGlobal.location.origin === origin
507
+ ? browserGlobal.sessionStorage.getItem(key)
508
+ : null;
509
+ },
510
+ { origin: capture.origin, key: capture.key },
511
+ );
512
+ case 'cookie':
513
+ return undefined;
514
+ }
515
+ }
516
+
517
+ function sameSiteForPlaywright(
518
+ sameSite: string | undefined,
519
+ ): 'Strict' | 'Lax' | 'None' | undefined {
520
+ if (!sameSite) return undefined;
521
+ const lower = sameSite.toLowerCase();
522
+ if (lower === 'strict') return 'Strict';
523
+ if (lower === 'lax') return 'Lax';
524
+ if (lower === 'none') return 'None';
525
+ return undefined;
526
+ }
527
+
528
+ function cookieUrlFor(cookie: { domain: string; secure?: boolean }, fallback: string): string {
529
+ try {
530
+ const u = new URL(fallback);
531
+ u.hostname = cookie.domain.replace(/^\./, '');
532
+ u.protocol = cookie.secure ? 'https:' : u.protocol;
533
+ return u.toString();
534
+ } catch {
535
+ return `${cookie.secure ? 'https' : 'http'}://${cookie.domain.replace(/^\./, '')}/`;
536
+ }
537
+ }
538
+
539
+ /** Per-site stealth fetcher; bootstrap pays its ~12s once per process. */
540
+ function ensureStealthFetch(tool: ResolvedTool, cache: Map<string, StealthFetch>): StealthFetch {
541
+ const cached = cache.get(tool.site);
542
+ if (cached) return cached;
543
+ const sf = createStealthFetch({ baseUrl: pickBaseUrl(tool) });
544
+ cache.set(tool.site, sf);
545
+ return sf;
546
+ }
547
+
548
+ /** First request URL's origin — Akamai binds sensor tokens to that
549
+ * origin, and the origin is always literal (substitutions only appear
550
+ * after the domain in well-formed workflows). */
551
+ function pickBaseUrl(tool: ResolvedTool): string {
552
+ const firstRequest = tool.workflow.requests[0];
553
+ if (!firstRequest) {
554
+ throw new Error(
555
+ `Workflow ${tool.workflow.toolName} has no requests — stealth-fetch needs at least one request URL.\n→ re-record the session; recording probably stopped before any XHR fired.`,
556
+ );
557
+ }
558
+ const m = firstRequest.url.match(/^(https?:\/\/[^/]+)/);
559
+ if (m?.[1]) return m[1];
560
+ throw new Error(
561
+ `Could not derive bootstrap origin from URL: ${firstRequest.url}\n→ check workflow.json — the first request URL must start with https://<domain>.`,
562
+ );
563
+ }
564
+
565
+ function playbookPath(assetRoot: string, site: string, toolDir?: string): string {
566
+ if (toolDir) return pathResolve(toolDir, 'playbook.yaml');
567
+ return pathResolve(assetRoot, site, 'playbook.yaml');
568
+ }
@@ -0,0 +1,136 @@
1
+ /** `imprint check` — sanity-check a captured session.json or .jsonl
2
+ * for obvious gaps (no requests, no narration, no end markers). */
3
+
4
+ import { existsSync, readFileSync } from 'node:fs';
5
+ import { extname } from 'node:path';
6
+ import { assembleFromJsonl } from './session-writer.ts';
7
+ import { type Session, SessionSchema } from './types.ts';
8
+
9
+ interface CheckResult {
10
+ ok: boolean;
11
+ warnings: string[];
12
+ summary: string;
13
+ }
14
+
15
+ export function checkSession(path: string): CheckResult {
16
+ if (!existsSync(path)) {
17
+ return { ok: false, warnings: [`File not found: ${path}`], summary: '' };
18
+ }
19
+
20
+ let session: Session;
21
+ try {
22
+ if (extname(path) === '.jsonl') {
23
+ session = assembleFromJsonl(path);
24
+ } else {
25
+ const raw = JSON.parse(readFileSync(path, 'utf8'));
26
+ session = SessionSchema.parse(raw);
27
+ }
28
+ } catch (err) {
29
+ return {
30
+ ok: false,
31
+ warnings: [`Failed to parse: ${err instanceof Error ? err.message : String(err)}`],
32
+ summary: '',
33
+ };
34
+ }
35
+
36
+ const warnings: string[] = [];
37
+
38
+ // Categorize requests.
39
+ const xhr = session.requests.filter((r) => /xhr|fetch/i.test(r.resourceType));
40
+ const docs = session.requests.filter((r) => r.resourceType === 'Document');
41
+ const posts = session.requests.filter((r) => r.method !== 'GET' && r.method !== 'HEAD');
42
+ const errors = session.requests.filter(
43
+ (r) => r.response?.status !== undefined && r.response.status >= 400,
44
+ );
45
+ const successes = session.requests.filter(
46
+ (r) => r.response?.status !== undefined && r.response.status >= 200 && r.response.status < 300,
47
+ );
48
+
49
+ // Categorize events.
50
+ const navs = session.events.filter((e) => e.type === 'navigation');
51
+ const clicks = session.events.filter((e) => e.type === 'click');
52
+ const inputs = session.events.filter((e) => e.type === 'input' || e.type === 'change');
53
+ const submits = session.events.filter((e) => e.type === 'submit');
54
+
55
+ const cookies = session.cookieSnapshots ?? [];
56
+ const startCookies = cookies.find((c) => c.label === 'start');
57
+ const endCookies = cookies.find((c) => c.label === 'end');
58
+
59
+ const lastEventTs = Math.max(
60
+ 0,
61
+ ...session.requests.map((r) => r.timestamp),
62
+ ...session.events.map((e) => e.timestamp),
63
+ );
64
+ const durationS = (lastEventTs / 1000).toFixed(1);
65
+
66
+ // Heuristic warnings.
67
+ if (session.requests.length === 0) {
68
+ warnings.push('No network requests captured. Recorder may have started after page load.');
69
+ }
70
+ if (session.narration.length === 0) {
71
+ warnings.push('No narration captured. The LLM intent detection works best with narration.');
72
+ }
73
+ if (clicks.length === 0 && submits.length === 0) {
74
+ warnings.push(
75
+ 'No clicks or form submits captured. Did the injector fail to load? (Check session for [IMPRINT] sentinel.)',
76
+ );
77
+ }
78
+ if (posts.length === 0) {
79
+ warnings.push(
80
+ "No POST/PUT/DELETE requests captured. If this was a booking flow, the booking POST didn't fire — capture is likely incomplete.",
81
+ );
82
+ }
83
+ if (!startCookies) {
84
+ warnings.push('No start-of-session cookie snapshot. Auth state at recording start is unknown.');
85
+ }
86
+ if (!endCookies) {
87
+ warnings.push(
88
+ 'No end-of-session cookie snapshot. The recorder may have crashed before clean shutdown.',
89
+ );
90
+ }
91
+ if (errors.length > successes.length && errors.length > 3) {
92
+ warnings.push(
93
+ `More 4xx/5xx responses (${errors.length}) than 2xx (${successes.length}). Auth or anti-bot may be blocking the workflow.`,
94
+ );
95
+ }
96
+ if (lastEventTs < 5000 && session.requests.length > 0) {
97
+ warnings.push('Session is shorter than 5 seconds. Are you sure the workflow completed?');
98
+ }
99
+
100
+ const summary = [
101
+ `site: ${session.site}`,
102
+ `duration: ${durationS}s`,
103
+ `requests: ${session.requests.length} (${docs.length} doc, ${xhr.length} xhr, ${posts.length} POST/PUT/DELETE)`,
104
+ `responses: ${successes.length} 2xx, ${errors.length} 4xx/5xx`,
105
+ `events: ${navs.length} nav, ${clicks.length} click, ${inputs.length} input, ${submits.length} submit`,
106
+ `narration: ${session.narration.length} lines`,
107
+ `cookies: ${startCookies ? `${startCookies.cookies.length} at start` : 'no start snapshot'}, ${
108
+ endCookies ? `${endCookies.cookies.length} at end` : 'no end snapshot'
109
+ }`,
110
+ ].join('\n ');
111
+
112
+ return {
113
+ ok: warnings.length === 0,
114
+ warnings,
115
+ summary,
116
+ };
117
+ }
118
+
119
+ export function reportCheck(path: string, result: CheckResult): void {
120
+ console.log(`[imprint] check ${path}`);
121
+ console.log('');
122
+ console.log(` ${result.summary}`);
123
+ console.log('');
124
+ if (result.warnings.length === 0) {
125
+ console.log(' ✓ no warnings — capture looks complete');
126
+ console.log('');
127
+ console.log('next step:');
128
+ console.log(` imprint redact ${path} # scrub credentials before LLM analysis`);
129
+ } else {
130
+ console.log(` ⚠ ${result.warnings.length} warning${result.warnings.length === 1 ? '' : 's'}:`);
131
+ for (const w of result.warnings) {
132
+ console.log(` • ${w}`);
133
+ }
134
+ }
135
+ console.log('');
136
+ }