imprint-mcp 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +165 -201
  2. package/examples/discoverandgo/README.md +1 -1
  3. package/examples/echo/README.md +1 -1
  4. package/examples/google-flights/README.md +28 -0
  5. package/examples/google-flights/_shared/batchexecute.ts +63 -0
  6. package/examples/google-flights/_shared/flights_request.ts +95 -0
  7. package/examples/google-flights/_shared/package.json +9 -0
  8. package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
  9. package/examples/google-flights/get_flight_booking_details/package.json +9 -0
  10. package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
  11. package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
  12. package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
  13. package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
  14. package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
  15. package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
  16. package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
  17. package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
  18. package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
  19. package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
  20. package/examples/google-flights/lookup_airport/index.ts +101 -0
  21. package/examples/google-flights/lookup_airport/package.json +9 -0
  22. package/examples/google-flights/lookup_airport/parser.ts +66 -0
  23. package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
  24. package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
  25. package/examples/google-flights/lookup_airport/workflow.json +57 -0
  26. package/examples/google-flights/search_flights/index.ts +219 -0
  27. package/examples/google-flights/search_flights/package.json +9 -0
  28. package/examples/google-flights/search_flights/parser.ts +169 -0
  29. package/examples/google-flights/search_flights/playbook.yaml +184 -0
  30. package/examples/google-flights/search_flights/request-transform.ts +119 -0
  31. package/examples/google-flights/search_flights/workflow.json +143 -0
  32. package/examples/google-hotels/README.md +29 -0
  33. package/examples/google-hotels/_shared/batchexecute.ts +73 -0
  34. package/examples/google-hotels/_shared/freq.ts +158 -0
  35. package/examples/google-hotels/_shared/package.json +9 -0
  36. package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
  37. package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
  38. package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
  39. package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
  40. package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
  41. package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
  42. package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
  43. package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
  44. package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
  45. package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
  46. package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
  47. package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
  48. package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
  49. package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
  50. package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
  51. package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
  52. package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
  53. package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
  54. package/examples/google-hotels/search_hotels/index.ts +207 -0
  55. package/examples/google-hotels/search_hotels/package.json +9 -0
  56. package/examples/google-hotels/search_hotels/parser.ts +260 -0
  57. package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
  58. package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
  59. package/examples/google-hotels/search_hotels/workflow.json +127 -0
  60. package/package.json +3 -2
  61. package/prompts/audit-agent.md +71 -0
  62. package/prompts/build-planning.md +74 -0
  63. package/prompts/compile-agent.md +131 -27
  64. package/prompts/prereq-builder.md +64 -0
  65. package/prompts/prereq-planner.md +34 -0
  66. package/prompts/tool-planning.md +39 -0
  67. package/src/cli.ts +109 -2
  68. package/src/imprint/agent.ts +5 -0
  69. package/src/imprint/audit.ts +996 -0
  70. package/src/imprint/backend-ladder.ts +1214 -184
  71. package/src/imprint/build-plan.ts +1051 -0
  72. package/src/imprint/cdp-browser-fetch.ts +589 -0
  73. package/src/imprint/cdp-jar-cache.ts +320 -0
  74. package/src/imprint/chromium.ts +135 -0
  75. package/src/imprint/claude-cli-compile.ts +125 -25
  76. package/src/imprint/codex-cli-compile.ts +26 -23
  77. package/src/imprint/compile-agent-types.ts +38 -0
  78. package/src/imprint/compile-agent.ts +63 -25
  79. package/src/imprint/compile-tools.ts +1656 -64
  80. package/src/imprint/compile.ts +13 -1
  81. package/src/imprint/concurrency.ts +87 -0
  82. package/src/imprint/cron.ts +1 -0
  83. package/src/imprint/doctor.ts +39 -0
  84. package/src/imprint/freeform-redact.ts +5 -4
  85. package/src/imprint/integrations.ts +2 -2
  86. package/src/imprint/llm.ts +56 -8
  87. package/src/imprint/mcp-compile-server.ts +43 -10
  88. package/src/imprint/mcp-maintenance.ts +9 -101
  89. package/src/imprint/mcp-server.ts +73 -7
  90. package/src/imprint/multi-progress.ts +7 -2
  91. package/src/imprint/param-grounding.ts +367 -0
  92. package/src/imprint/paths.ts +29 -0
  93. package/src/imprint/playbook-runner.ts +101 -40
  94. package/src/imprint/prereq-builder.ts +651 -0
  95. package/src/imprint/probe-backends.ts +6 -3
  96. package/src/imprint/record.ts +10 -1
  97. package/src/imprint/redact.ts +30 -2
  98. package/src/imprint/replay-capture.ts +19 -18
  99. package/src/imprint/runtime.ts +19 -10
  100. package/src/imprint/session-diff.ts +79 -2
  101. package/src/imprint/session-merge.ts +9 -5
  102. package/src/imprint/stealth-chromium.ts +81 -0
  103. package/src/imprint/stealth-fetch.ts +309 -29
  104. package/src/imprint/stealth-token-cache.ts +88 -0
  105. package/src/imprint/teach-plan.ts +251 -0
  106. package/src/imprint/teach-state.ts +10 -0
  107. package/src/imprint/teach.ts +456 -142
  108. package/src/imprint/tool-candidates.ts +72 -14
  109. package/src/imprint/tool-plan.ts +313 -0
  110. package/src/imprint/tracing.ts +135 -6
  111. package/src/imprint/types.ts +61 -3
  112. package/examples/google-flights/search_google_flights/index.ts +0 -101
  113. package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
  114. package/examples/google-flights/search_google_flights/parser.ts +0 -189
  115. package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
  116. package/examples/google-flights/search_google_flights/workflow.json +0 -48
  117. package/examples/google-hotels/search_google_hotels/index.ts +0 -194
  118. package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
  119. package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
  120. package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
  121. package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
  122. package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
  123. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
  124. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
  125. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
  126. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
@@ -21,6 +21,22 @@ import type { CapturedRequest, Session } from './types.ts';
21
21
  const USER_INTERACTION_TYPES = new Set(['click', 'input', 'change', 'submit']);
22
22
  const MULTI_VALUE_HEADERS = new Set(['cookie', 'set-cookie']);
23
23
 
24
+ /**
25
+ * Detect a structured RPC envelope (XSSI-guarded or length-prefixed) whose body
26
+ * is NOT top-level JSON but carries doubly-encoded JSON as string payloads —
27
+ * e.g. Google `batchexecute` (`)]}'` guard + `<len>\n[...]` frames). Running the
28
+ * flat-text freeform scanner over such a body injects `[REDACTED]` into bare
29
+ * numeric IDs/coordinates inside the inner JSON and makes it unparseable, so the
30
+ * freeform fallback must skip these. The structure-aware key-based redaction
31
+ * still applies to any clean-JSON bodies; this only gates the flat-text scan.
32
+ */
33
+ export function looksLikeRpcEnvelope(body: string): boolean {
34
+ const head = body.slice(0, 64).trimStart();
35
+ if (head.startsWith(")]}'")) return true; // anti-XSSI guard: )]}' and )]}',
36
+ if (/^\d{1,9}\r?\n\[/.test(head)) return true; // length-prefixed frame: 219006\n[
37
+ return false;
38
+ }
39
+
24
40
  /**
25
41
  * Detect sensitive headers whose values are page-minted constants — baked
26
42
  * into the site's JavaScript, not per-user secrets. The recording starts
@@ -179,7 +195,12 @@ export function redactJsonBody(
179
195
  const visited = visit(inner, [...pathSoFar, k]);
180
196
  out[k] = JSON.stringify(visited);
181
197
  } catch {
182
- const r = freeform ? redactFreeformText(v) : { redacted: v, redactionsCount: 0 };
198
+ // Nested string that isn't parseable JSON: scan it as free text,
199
+ // unless it's a structured RPC envelope (flat-scanning corrupts it).
200
+ const r =
201
+ freeform && !looksLikeRpcEnvelope(v)
202
+ ? redactFreeformText(v)
203
+ : { redacted: v, redactionsCount: 0 };
183
204
  freeformCount += r.redactionsCount;
184
205
  out[k] = r.redacted;
185
206
  }
@@ -228,6 +249,9 @@ export function redactBody(
228
249
  } catch {
229
250
  const formR = redactFormBody(body, formPlaceholders, markerContext);
230
251
  if (formR.redactionsCount > 0 || formR.placeholdersInjected > 0 || !freeform) return formR;
252
+ // A structured RPC envelope (XSSI/length-prefixed) is not flat text —
253
+ // flat-scanning it would corrupt the doubly-encoded JSON payloads it carries.
254
+ if (looksLikeRpcEnvelope(body)) return formR;
231
255
  const freeformR = redactFreeformText(body);
232
256
  return {
233
257
  redacted: freeformR.redacted,
@@ -437,7 +461,11 @@ export function redactSession(
437
461
  response.mimeType,
438
462
  undefined,
439
463
  undefined,
440
- useFreeform,
464
+ // Responses are key-based only: never value-pattern (freeform) scan a
465
+ // server body. Keeps redaction focused on real secrets (post-login
466
+ // cookies + user-entered PII) and avoids corrupting structured RPC
467
+ // envelopes whose payloads are doubly-encoded JSON.
468
+ false,
441
469
  markerContext,
442
470
  );
443
471
  respBody = respBodyR.redacted;
@@ -15,6 +15,7 @@ import { join as pathJoin } from 'node:path';
15
15
  import type { Browser, BrowserContext, Locator, Page } from 'playwright';
16
16
  import { createLog } from './log.ts';
17
17
  import type { CapturedReplayRequest } from './session-diff.ts';
18
+ import { getStealthChromium, getStealthExecutablePath } from './stealth-chromium.ts';
18
19
  import type { CapturedEvent, Session } from './types.ts';
19
20
 
20
21
  const log = createLog('replay-capture');
@@ -62,25 +63,17 @@ export async function replayRawSession(opts: RawReplayOptions): Promise<ReplayCa
62
63
 
63
64
  let chromium: typeof import('playwright').chromium;
64
65
  try {
65
- const pwExtra = await import('playwright-extra');
66
- const stealthMod = await import('puppeteer-extra-plugin-stealth');
67
- const stealthFactory =
68
- (stealthMod as { default?: () => unknown }).default ??
69
- (stealthMod as unknown as () => unknown);
70
- pwExtra.chromium.use(stealthFactory() as never);
71
- chromium = pwExtra.chromium as unknown as typeof import('playwright').chromium;
72
- } catch {
73
- try {
74
- const pw = await import('playwright');
75
- chromium = pw.chromium;
76
- } catch (innerErr) {
77
- return { ok: false, requests: [], error: `Playwright not available: ${errMsg(innerErr)}` };
78
- }
66
+ chromium = await getStealthChromium();
67
+ } catch (innerErr) {
68
+ return { ok: false, requests: [], error: `Playwright not available: ${errMsg(innerErr)}` };
79
69
  }
80
70
 
81
71
  try {
82
72
  replayLog(`launching browser (headed=${!!opts.headed})`);
83
- browser = await chromium.launch({ headless: !opts.headed });
73
+ browser = await chromium.launch({
74
+ headless: !opts.headed,
75
+ executablePath: getStealthExecutablePath(),
76
+ });
84
77
  } catch (err) {
85
78
  replayLog(`browser launch failed: ${errMsg(err)}`);
86
79
  return { ok: false, requests: [], error: `Could not launch Chromium: ${errMsg(err)}` };
@@ -215,11 +208,19 @@ export async function replayRawSession(opts: RawReplayOptions): Promise<ReplayCa
215
208
  opts.onProgress?.(i + 1, replayableEvents.length, captured.length);
216
209
  }
217
210
 
218
- // Allow final network requests to settle
211
+ // Allow final network requests to settle, but never block forever: on a
212
+ // large recording a single hung response-body read can stall allSettled
213
+ // indefinitely (there is no outer timeout on the replay stage). Cap the
214
+ // wait and proceed with whatever bodies are ready — replay-diff is
215
+ // best-effort, so partial captures are acceptable.
216
+ const SETTLE_TIMEOUT_MS = 15_000;
219
217
  replayLog('waiting for networkidle...');
220
- await page.waitForLoadState('networkidle').catch(() => {});
218
+ await page.waitForLoadState('networkidle', { timeout: SETTLE_TIMEOUT_MS }).catch(() => {});
221
219
  await page.waitForTimeout(1000);
222
- await Promise.allSettled(pendingReads);
220
+ await Promise.race([
221
+ Promise.allSettled(pendingReads),
222
+ new Promise<void>((resolve) => setTimeout(resolve, SETTLE_TIMEOUT_MS)),
223
+ ]);
223
224
  captured.sort((a, b) => a.seq - b.seq);
224
225
 
225
226
  replayLog(`replay complete: captured ${captured.length} requests total`);
@@ -113,14 +113,23 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
113
113
  (await loadCredentialStore(opts.workflow.site)) ??
114
114
  emptyStore(opts.workflow.site);
115
115
 
116
- // Validate required parameters are present.
116
+ // Validate required parameters are present and merge declared defaults
117
+ // into the working params map. Without the merge, `parameter.default` would
118
+ // be a presence-sentinel only — the substitution layer at
119
+ // `resolvePlaceholder` would still throw STATE_MISSING because it reads
120
+ // from this map directly. The schema declares `default` as a real value
121
+ // (string | number | boolean), so honor it.
122
+ const params: Record<string, string | number | boolean> = { ...opts.params };
117
123
  for (const p of opts.workflow.parameters) {
118
- if (!(p.name in opts.params) && p.default === undefined) {
119
- return {
120
- ok: false,
121
- error: 'UNKNOWN',
122
- message: `Missing required parameter: ${p.name} (${p.description})`,
123
- };
124
+ if (!(p.name in params)) {
125
+ if (p.default === undefined) {
126
+ return {
127
+ ok: false,
128
+ error: 'UNKNOWN',
129
+ message: `Missing required parameter: ${p.name} (${p.description})`,
130
+ };
131
+ }
132
+ params[p.name] = p.default;
124
133
  }
125
134
  }
126
135
 
@@ -163,7 +172,7 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
163
172
  if (!req) continue;
164
173
 
165
174
  const subbedResult = substituteRequest(req, {
166
- params: opts.params,
175
+ params,
167
176
  credentials: liveCredentials,
168
177
  responseSlots,
169
178
  state,
@@ -180,7 +189,7 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
180
189
  subbed.method,
181
190
  subbed.url,
182
191
  responseSlots.map((s) => s.raw),
183
- opts.params,
192
+ params,
184
193
  );
185
194
  if (typeof transformResult === 'string') {
186
195
  subbed.url = transformResult;
@@ -312,7 +321,7 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
312
321
  };
313
322
  }
314
323
  finalData = mod.extract(finalData, {
315
- params: opts.params,
324
+ params,
316
325
  responses: responseSlots.map((s) => s.raw),
317
326
  });
318
327
  } catch (err) {
@@ -318,6 +318,17 @@ function suggestStateName(location: string): string {
318
318
  .toLowerCase();
319
319
  }
320
320
 
321
+ /** Whether a value looks like an opaque token/id (vs human text, a city name, a
322
+ * date). Gates provenance-tagging of stable values so an incidental constant
323
+ * (a UI label, the echoed query) isn't treated as a server-provided token.
324
+ * Shared with the build-plan token detector. */
325
+ export function looksLikeToken(v: string): boolean {
326
+ if (v.length < 12) return false;
327
+ if (/\s/.test(v)) return false; // multi-word / free text
328
+ if (/^\d{4}-\d{2}-\d{2}$/.test(v)) return false; // dates
329
+ return /[:|_-]/.test(v) || /\d/.test(v);
330
+ }
331
+
321
332
  // ─── Main diff ──────────────────────────────────────────────────────────────
322
333
 
323
334
  export function diffTriagedSessions(
@@ -327,6 +338,12 @@ export function diffTriagedSessions(
327
338
  const pairs = alignRequests(original.requests, replay.requests);
328
339
  const pairedOrigSeqs = new Set(pairs.map((p) => p.originalSeq));
329
340
  const pairedReplaySeqs = new Set(pairs.map((p) => p.replaySeq));
341
+ // `searchPriorResponses` over the replay returns a producer in REPLAY-seq
342
+ // space, but `originalSeq` and every downstream consumer (capture hints,
343
+ // build-plan token detection, the planner) work in ORIGINAL-seq space — so a
344
+ // replay producer must be translated back via the alignment pairs.
345
+ const replayToOriginal = new Map(pairs.map((p) => [p.replaySeq, p.originalSeq]));
346
+ const toOriginalSeq = (replaySeq: number): number => replayToOriginal.get(replaySeq) ?? replaySeq;
330
347
 
331
348
  const classifications: ClassifiedValue[] = [];
332
349
 
@@ -347,17 +364,28 @@ export function diffTriagedSessions(
347
364
  if (v2Value === undefined) continue; // field only in run 1
348
365
 
349
366
  if (v1.value === v2Value) {
367
+ // Stable across runs. Normally a constant — but an OPAQUE stable value
368
+ // that also appears in a PRIOR response is a server-PROVIDED token (e.g.
369
+ // a per-entity id minted by a sibling search tool). The same-flow replay
370
+ // can't expose it by variance (same entity → same token), so recover its
371
+ // provenance from the original responses (already original-seq space).
372
+ // A cross-tool consumer then sources it as a param instead of hardcoding.
373
+ const provider = looksLikeToken(v1.value)
374
+ ? searchPriorResponses(v1.value, original.requests, pair.originalSeq)
375
+ : null;
350
376
  classifications.push({
351
377
  classification: 'constant',
352
378
  location: v1.location,
353
379
  originalSeq: pair.originalSeq,
354
380
  value1: v1.value,
355
381
  value2: v2Value,
382
+ ...(provider ? { producerSeq: provider.seq, producerPath: provider.path } : {}),
356
383
  });
357
384
  continue;
358
385
  }
359
386
 
360
- // Value differs — check if it came from a prior response in run 2
387
+ // Value differs — check if it came from a prior response in run 2,
388
+ // translating the replay producer back to original-seq space.
361
389
  const producer = searchPriorResponses(v2Value, replay.requests, pair.replaySeq);
362
390
 
363
391
  if (producer) {
@@ -368,7 +396,7 @@ export function diffTriagedSessions(
368
396
  originalSeq: pair.originalSeq,
369
397
  value1: v1.value,
370
398
  value2: v2Value,
371
- producerSeq: producer.seq,
399
+ producerSeq: toOriginalSeq(producer.seq),
372
400
  producerPath: producer.path,
373
401
  suggestedStateName: name || undefined,
374
402
  });
@@ -407,3 +435,52 @@ export function triageByAlignment(
407
435
  const aligned = alignRequests(run1TriagedRequests, run2AllRequests);
408
436
  return aligned.filter((pair) => pair.confidence >= 0.5).map((pair) => pair.replaySeq);
409
437
  }
438
+
439
+ /**
440
+ * Severity order — a value seen varying in ANY pass outranks one seen constant.
441
+ * server_derived (traceable to a response) wins over browser_minted.
442
+ */
443
+ const CLASSIFICATION_RANK: Record<ValueClassification, number> = {
444
+ constant: 0,
445
+ browser_minted: 1,
446
+ server_derived: 2,
447
+ };
448
+
449
+ /**
450
+ * Merge `ClassifiedValue`s from several diff passes that all share the SAME
451
+ * `original` recording (so `originalSeq` is a stable join key across passes).
452
+ *
453
+ * Each pass diffs the original recording against one other run — the automated
454
+ * browser replay AND every other real recording of the site. Anti-bot edges
455
+ * (Akamai, DataDome, …) often block the automated replay at the page level, so
456
+ * the replay reproduces only a fraction of the recording's requests and their
457
+ * functional values (GraphQL safelisting signatures, persisted-query hashes,
458
+ * app keys) never get classified. Real recordings come from a trusted browser
459
+ * and DO carry those requests, so diffing recordings against each other
460
+ * recovers the missing signal.
461
+ *
462
+ * Merge rule per (originalSeq, location):
463
+ * - a value that VARIES in any pass is ephemeral — the strongest non-constant
464
+ * classification wins (server_derived > browser_minted), preserving its
465
+ * producer provenance;
466
+ * - a value constant in every pass that observed it is `constant`.
467
+ * A value the replay never observed (because it was blocked) but that is
468
+ * identical across time-separated recordings is therefore kept as `constant`,
469
+ * not silently dropped.
470
+ */
471
+ export function mergeClassifications(passes: ClassifiedValue[][]): ClassifiedValue[] {
472
+ const byKey = new Map<string, ClassifiedValue>();
473
+ for (const pass of passes) {
474
+ for (const cv of pass) {
475
+ const key = `${cv.originalSeq}${cv.location}`;
476
+ const prev = byKey.get(key);
477
+ if (
478
+ !prev ||
479
+ CLASSIFICATION_RANK[cv.classification] > CLASSIFICATION_RANK[prev.classification]
480
+ ) {
481
+ byKey.set(key, cv);
482
+ }
483
+ }
484
+ }
485
+ return [...byKey.values()];
486
+ }
@@ -7,7 +7,7 @@
7
7
  * pipeline consumes unchanged.
8
8
  */
9
9
 
10
- import { existsSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
10
+ import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
11
11
  import { join as pathJoin } from 'node:path';
12
12
  import { localSessionsDir } from './paths.ts';
13
13
  import { friendlySessionTimestamp } from './teach-state.ts';
@@ -34,10 +34,13 @@ interface SessionInfo {
34
34
  }
35
35
 
36
36
  export function listSiteSessions(site: string): SessionInfo[] {
37
- const sessDir = localSessionsDir(site);
38
- if (!existsSync(sessDir)) return [];
37
+ return listSessionsInDir(localSessionsDir(site));
38
+ }
39
+
40
+ export function listSessionsInDir(dir: string): SessionInfo[] {
41
+ if (!existsSync(dir)) return [];
39
42
 
40
- const files = readdirSync(sessDir).filter(
43
+ const files = readdirSync(dir).filter(
41
44
  (f) =>
42
45
  f.endsWith('.json') &&
43
46
  !f.includes('.redacted') &&
@@ -47,7 +50,7 @@ export function listSiteSessions(site: string): SessionInfo[] {
47
50
 
48
51
  const infos: SessionInfo[] = [];
49
52
  for (const filename of files) {
50
- const absPath = pathJoin(sessDir, filename);
53
+ const absPath = pathJoin(dir, filename);
51
54
  try {
52
55
  const raw = JSON.parse(readFileSync(absPath, 'utf8'));
53
56
  const session = SessionSchema.parse(raw);
@@ -190,6 +193,7 @@ export function mergeSessions(sessions: Session[]): Session {
190
193
 
191
194
  export function writeCombinedSession(site: string, combined: Session): string {
192
195
  const sessDir = localSessionsDir(site);
196
+ mkdirSync(sessDir, { recursive: true });
193
197
  const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
194
198
  const filename = `combined-${timestamp}.json`;
195
199
  const absPath = pathJoin(sessDir, filename);
@@ -0,0 +1,81 @@
1
+ import { findChromium } from './chromium.ts';
2
+
3
+ /**
4
+ * Shared loader for Playwright's chromium with the stealth plugin applied.
5
+ *
6
+ * Stealth patches navigator.webdriver, plugin enumeration, WebGL vendor
7
+ * strings, and other headless-Chrome telltales that anti-bot services
8
+ * (Akamai, Cloudflare, PerimeterX) detect. Vanilla headless Playwright
9
+ * gets tarpitted or 403'd by these services; the stealth-patched chromium
10
+ * loads the same pages in seconds.
11
+ *
12
+ * Falls back to vanilla `playwright` if `playwright-extra` /
13
+ * `puppeteer-extra-plugin-stealth` are not installed (preserves the
14
+ * graceful-degrade behavior of the original duplicated loaders in
15
+ * playbook-runner, replay-capture, and backend-ladder).
16
+ *
17
+ * Throws if no Playwright is available at all — callers translate the
18
+ * thrown error into their own result shape.
19
+ */
20
+ export async function getStealthChromium(): Promise<typeof import('playwright').chromium> {
21
+ try {
22
+ const pwExtra = await import('playwright-extra');
23
+ const stealthMod = await import('puppeteer-extra-plugin-stealth');
24
+ const stealthFactory =
25
+ (stealthMod as { default?: () => unknown }).default ??
26
+ (stealthMod as unknown as () => unknown);
27
+ pwExtra.chromium.use(stealthFactory() as never);
28
+ return pwExtra.chromium as unknown as typeof import('playwright').chromium;
29
+ } catch {
30
+ const pw = await import('playwright');
31
+ return pw.chromium;
32
+ }
33
+ }
34
+
35
+ /**
36
+ * True when the puppeteer-extra stealth plugin is installed and WILL be applied
37
+ * by getStealthChromium() (i.e. we're not on the vanilla-Playwright fallback).
38
+ *
39
+ * Callers use this to avoid stacking a manual `navigator.webdriver` patch on top
40
+ * of the plugin's: the stealth plugin removes the property the way a real Chrome
41
+ * does (it simply lacks `webdriver`), whereas a redundant
42
+ * `Object.defineProperty(navigator,'webdriver',{get:()=>false})` leaves a
43
+ * non-native property descriptor that is ITSELF a fingerprinting tell. So the
44
+ * manual patch should only run on the vanilla fallback, where it's the only
45
+ * protection. Import resolution is cached, so probing here is cheap.
46
+ */
47
+ export async function isStealthPluginAvailable(): Promise<boolean> {
48
+ try {
49
+ await import('playwright-extra');
50
+ await import('puppeteer-extra-plugin-stealth');
51
+ return true;
52
+ } catch {
53
+ return false;
54
+ }
55
+ }
56
+
57
+ /**
58
+ * Path to the same Chromium binary `imprint record` uses for the user's
59
+ * recording session — Playwright's bundled "Google Chrome for Testing"
60
+ * (full Chrome build), the system Chrome on macOS, or a Linux distro
61
+ * Chrome/Chromium package, in that order of preference.
62
+ *
63
+ * Why this matters: by default Playwright's `chromium.launch({ headless: true })`
64
+ * picks `chrome-headless-shell` — a separate stripped-down binary that
65
+ * Akamai / Cloudflare / PerimeterX class anti-bot services detect at the
66
+ * binary/TLS-fingerprint layer regardless of how thoroughly the JS-level
67
+ * `navigator.webdriver` etc. are patched by the stealth plugin. The
68
+ * recording browser uses the FULL Chrome binary and Akamai trusts it; the
69
+ * replay browser using chrome-headless-shell looks like a bot. Using the
70
+ * SAME binary for both eliminates the binary asymmetry.
71
+ *
72
+ * Returns `undefined` if no Chromium can be located — callers should let
73
+ * Playwright fall back to whatever default it finds.
74
+ */
75
+ export function getStealthExecutablePath(): string | undefined {
76
+ try {
77
+ return findChromium();
78
+ } catch {
79
+ return undefined;
80
+ }
81
+ }