npm - imprint-mcp - Versions diffs - 0.2.0 → 0.3.0 - Mend

imprint-mcp 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

package/README.md +165 -201
package/examples/discoverandgo/README.md +1 -1
package/examples/echo/README.md +1 -1
package/examples/google-flights/README.md +28 -0
package/examples/google-flights/_shared/batchexecute.ts +63 -0
package/examples/google-flights/_shared/flights_request.ts +95 -0
package/examples/google-flights/_shared/package.json +9 -0
package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
package/examples/google-flights/get_flight_booking_details/package.json +9 -0
package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
package/examples/google-flights/lookup_airport/index.ts +101 -0
package/examples/google-flights/lookup_airport/package.json +9 -0
package/examples/google-flights/lookup_airport/parser.ts +66 -0
package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
package/examples/google-flights/lookup_airport/workflow.json +57 -0
package/examples/google-flights/search_flights/index.ts +219 -0
package/examples/google-flights/search_flights/package.json +9 -0
package/examples/google-flights/search_flights/parser.ts +169 -0
package/examples/google-flights/search_flights/playbook.yaml +184 -0
package/examples/google-flights/search_flights/request-transform.ts +119 -0
package/examples/google-flights/search_flights/workflow.json +143 -0
package/examples/google-hotels/README.md +29 -0
package/examples/google-hotels/_shared/batchexecute.ts +73 -0
package/examples/google-hotels/_shared/freq.ts +158 -0
package/examples/google-hotels/_shared/package.json +9 -0
package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
package/examples/google-hotels/search_hotels/index.ts +207 -0
package/examples/google-hotels/search_hotels/package.json +9 -0
package/examples/google-hotels/search_hotels/parser.ts +260 -0
package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
package/examples/google-hotels/search_hotels/workflow.json +127 -0
package/package.json +3 -2
package/prompts/audit-agent.md +71 -0
package/prompts/build-planning.md +74 -0
package/prompts/compile-agent.md +132 -28
package/prompts/prereq-builder.md +64 -0
package/prompts/prereq-planner.md +34 -0
package/prompts/tool-planning.md +39 -0
package/src/cli.ts +111 -4
package/src/imprint/agent.ts +5 -0
package/src/imprint/audit.ts +996 -0
package/src/imprint/backend-ladder.ts +1214 -184
package/src/imprint/build-plan.ts +1051 -0
package/src/imprint/cdp-browser-fetch.ts +589 -0
package/src/imprint/cdp-jar-cache.ts +320 -0
package/src/imprint/chromium.ts +135 -0
package/src/imprint/claude-cli-compile.ts +125 -25
package/src/imprint/codex-cli-compile.ts +26 -23
package/src/imprint/compile-agent-types.ts +38 -0
package/src/imprint/compile-agent.ts +65 -27
package/src/imprint/compile-tools.ts +1656 -64
package/src/imprint/compile.ts +14 -2
package/src/imprint/concurrency.ts +87 -0
package/src/imprint/credential-extract.ts +174 -25
package/src/imprint/cron.ts +1 -0
package/src/imprint/doctor.ts +39 -0
package/src/imprint/emit.ts +85 -0
package/src/imprint/freeform-redact.ts +5 -4
package/src/imprint/integrations.ts +2 -2
package/src/imprint/llm.ts +56 -8
package/src/imprint/mcp-compile-server.ts +43 -10
package/src/imprint/mcp-maintenance.ts +9 -101
package/src/imprint/mcp-server.ts +73 -7
package/src/imprint/multi-progress.ts +7 -2
package/src/imprint/param-grounding.ts +367 -0
package/src/imprint/paths.ts +29 -0
package/src/imprint/playbook-runner.ts +101 -40
package/src/imprint/prereq-builder.ts +651 -0
package/src/imprint/probe-backends.ts +6 -3
package/src/imprint/record.ts +10 -1
package/src/imprint/redact.ts +30 -2
package/src/imprint/replay-capture.ts +19 -18
package/src/imprint/runtime.ts +19 -10
package/src/imprint/sensitive-keys.ts +141 -7
package/src/imprint/session-diff.ts +79 -2
package/src/imprint/session-merge.ts +9 -5
package/src/imprint/stealth-chromium.ts +81 -0
package/src/imprint/stealth-fetch.ts +309 -29
package/src/imprint/stealth-token-cache.ts +88 -0
package/src/imprint/teach-plan.ts +251 -0
package/src/imprint/teach-state.ts +17 -0
package/src/imprint/teach.ts +582 -147
package/src/imprint/tool-candidates.ts +72 -14
package/src/imprint/tool-plan.ts +313 -0
package/src/imprint/tracing.ts +135 -6
package/src/imprint/types.ts +61 -3
package/examples/google-flights/search_google_flights/index.ts +0 -101
package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
package/examples/google-flights/search_google_flights/parser.ts +0 -189
package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
package/examples/google-flights/search_google_flights/workflow.json +0 -48
package/examples/google-hotels/search_google_hotels/index.ts +0 -194
package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97

package/src/imprint/redact.ts CHANGED Viewed

@@ -21,6 +21,22 @@ import type { CapturedRequest, Session } from './types.ts';
 const USER_INTERACTION_TYPES = new Set(['click', 'input', 'change', 'submit']);
 const MULTI_VALUE_HEADERS = new Set(['cookie', 'set-cookie']);
+/**
+ * Detect a structured RPC envelope (XSSI-guarded or length-prefixed) whose body
+ * is NOT top-level JSON but carries doubly-encoded JSON as string payloads —
+ * e.g. Google `batchexecute` (`)]}'` guard + `<len>\n[...]` frames). Running the
+ * flat-text freeform scanner over such a body injects `[REDACTED]` into bare
+ * numeric IDs/coordinates inside the inner JSON and makes it unparseable, so the
+ * freeform fallback must skip these. The structure-aware key-based redaction
+ * still applies to any clean-JSON bodies; this only gates the flat-text scan.
+ */
+export function looksLikeRpcEnvelope(body: string): boolean {
+  const head = body.slice(0, 64).trimStart();
+  if (head.startsWith(")]}'")) return true; // anti-XSSI guard: )]}' and )]}',
+  if (/^\d{1,9}\r?\n\[/.test(head)) return true; // length-prefixed frame: 219006\n[
+  return false;
+}
 /**
  * Detect sensitive headers whose values are page-minted constants — baked
  * into the site's JavaScript, not per-user secrets. The recording starts
@@ -179,7 +195,12 @@ export function redactJsonBody(
             const visited = visit(inner, [...pathSoFar, k]);
             out[k] = JSON.stringify(visited);
           } catch {
-            const r = freeform ? redactFreeformText(v) : { redacted: v, redactionsCount: 0 };
+            // Nested string that isn't parseable JSON: scan it as free text,
+            // unless it's a structured RPC envelope (flat-scanning corrupts it).
+            const r =
+              freeform && !looksLikeRpcEnvelope(v)
+                ? redactFreeformText(v)
+                : { redacted: v, redactionsCount: 0 };
             freeformCount += r.redactionsCount;
             out[k] = r.redacted;
           }
@@ -228,6 +249,9 @@ export function redactBody(
   } catch {
     const formR = redactFormBody(body, formPlaceholders, markerContext);
     if (formR.redactionsCount > 0 || formR.placeholdersInjected > 0 || !freeform) return formR;
+    // A structured RPC envelope (XSSI/length-prefixed) is not flat text —
+    // flat-scanning it would corrupt the doubly-encoded JSON payloads it carries.
+    if (looksLikeRpcEnvelope(body)) return formR;
     const freeformR = redactFreeformText(body);
     return {
       redacted: freeformR.redacted,
@@ -437,7 +461,11 @@ export function redactSession(
           response.mimeType,
           undefined,
           undefined,
-          useFreeform,
+          // Responses are key-based only: never value-pattern (freeform) scan a
+          // server body. Keeps redaction focused on real secrets (post-login
+          // cookies + user-entered PII) and avoids corrupting structured RPC
+          // envelopes whose payloads are doubly-encoded JSON.
+          false,
           markerContext,
         );
         respBody = respBodyR.redacted;

package/src/imprint/replay-capture.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import { join as pathJoin } from 'node:path';
 import type { Browser, BrowserContext, Locator, Page } from 'playwright';
 import { createLog } from './log.ts';
 import type { CapturedReplayRequest } from './session-diff.ts';
+import { getStealthChromium, getStealthExecutablePath } from './stealth-chromium.ts';
 import type { CapturedEvent, Session } from './types.ts';
 const log = createLog('replay-capture');
@@ -62,25 +63,17 @@ export async function replayRawSession(opts: RawReplayOptions): Promise<ReplayCa
   let chromium: typeof import('playwright').chromium;
   try {
-    const pwExtra = await import('playwright-extra');
-    const stealthMod = await import('puppeteer-extra-plugin-stealth');
-    const stealthFactory =
-      (stealthMod as { default?: () => unknown }).default ??
-      (stealthMod as unknown as () => unknown);
-    pwExtra.chromium.use(stealthFactory() as never);
-    chromium = pwExtra.chromium as unknown as typeof import('playwright').chromium;
-  } catch {
-    try {
-      const pw = await import('playwright');
-      chromium = pw.chromium;
-    } catch (innerErr) {
-      return { ok: false, requests: [], error: `Playwright not available: ${errMsg(innerErr)}` };
-    }
+    chromium = await getStealthChromium();
+  } catch (innerErr) {
+    return { ok: false, requests: [], error: `Playwright not available: ${errMsg(innerErr)}` };
   }
   try {
     replayLog(`launching browser (headed=${!!opts.headed})`);
-    browser = await chromium.launch({ headless: !opts.headed });
+    browser = await chromium.launch({
+      headless: !opts.headed,
+      executablePath: getStealthExecutablePath(),
+    });
   } catch (err) {
     replayLog(`browser launch failed: ${errMsg(err)}`);
     return { ok: false, requests: [], error: `Could not launch Chromium: ${errMsg(err)}` };
@@ -215,11 +208,19 @@ export async function replayRawSession(opts: RawReplayOptions): Promise<ReplayCa
       opts.onProgress?.(i + 1, replayableEvents.length, captured.length);
     }
-    // Allow final network requests to settle
+    // Allow final network requests to settle, but never block forever: on a
+    // large recording a single hung response-body read can stall allSettled
+    // indefinitely (there is no outer timeout on the replay stage). Cap the
+    // wait and proceed with whatever bodies are ready — replay-diff is
+    // best-effort, so partial captures are acceptable.
+    const SETTLE_TIMEOUT_MS = 15_000;
     replayLog('waiting for networkidle...');
-    await page.waitForLoadState('networkidle').catch(() => {});
+    await page.waitForLoadState('networkidle', { timeout: SETTLE_TIMEOUT_MS }).catch(() => {});
     await page.waitForTimeout(1000);
-    await Promise.allSettled(pendingReads);
+    await Promise.race([
+      Promise.allSettled(pendingReads),
+      new Promise<void>((resolve) => setTimeout(resolve, SETTLE_TIMEOUT_MS)),
+    ]);
     captured.sort((a, b) => a.seq - b.seq);
     replayLog(`replay complete: captured ${captured.length} requests total`);

package/src/imprint/runtime.ts CHANGED Viewed

@@ -113,14 +113,23 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
     (await loadCredentialStore(opts.workflow.site)) ??
     emptyStore(opts.workflow.site);
-  // Validate required parameters are present.
+  // Validate required parameters are present and merge declared defaults
+  // into the working params map. Without the merge, `parameter.default` would
+  // be a presence-sentinel only — the substitution layer at
+  // `resolvePlaceholder` would still throw STATE_MISSING because it reads
+  // from this map directly. The schema declares `default` as a real value
+  // (string | number | boolean), so honor it.
+  const params: Record<string, string | number | boolean> = { ...opts.params };
   for (const p of opts.workflow.parameters) {
-    if (!(p.name in opts.params) && p.default === undefined) {
-      return {
-        ok: false,
-        error: 'UNKNOWN',
-        message: `Missing required parameter: ${p.name} (${p.description})`,
-      };
+    if (!(p.name in params)) {
+      if (p.default === undefined) {
+        return {
+          ok: false,
+          error: 'UNKNOWN',
+          message: `Missing required parameter: ${p.name} (${p.description})`,
+        };
+      }
+      params[p.name] = p.default;
     }
   }
@@ -163,7 +172,7 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
     if (!req) continue;
     const subbedResult = substituteRequest(req, {
-      params: opts.params,
+      params,
       credentials: liveCredentials,
       responseSlots,
       state,
@@ -180,7 +189,7 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
           subbed.method,
           subbed.url,
           responseSlots.map((s) => s.raw),
-          opts.params,
+          params,
         );
         if (typeof transformResult === 'string') {
           subbed.url = transformResult;
@@ -312,7 +321,7 @@ export async function executeWorkflow<T = unknown>(opts: ExecuteOptions): Promis
         };
       }
       finalData = mod.extract(finalData, {
-        params: opts.params,
+        params,
         responses: responseSlots.map((s) => s.raw),
       });
     } catch (err) {

package/src/imprint/sensitive-keys.ts CHANGED Viewed

@@ -112,15 +112,126 @@ const SENSITIVE_KEYS = [
   'dob',
 ];
-const SENSITIVE_KEY_SET = new Set(SENSITIVE_KEYS.map((k) => k.toLowerCase()));
+// `normalizeKey` (defined below) lowercases and strips `_`/`-` — set
+// membership goes through it, so we MUST pre-normalize the stored entries
+// or lookups for e.g. `j_password` (→ `jpassword`) will miss a stored
+// `j_password`. Hoisting a local copy of the rule rather than ordering
+// gymnastics keeps the file linear.
+const _normalize = (s: string): string => s.toLowerCase().replace(/[-_]/g, '');
+const SENSITIVE_KEY_SET = new Set(SENSITIVE_KEYS.map(_normalize));
 /** Subset of SENSITIVE_KEYS that specifically denote a credential (not PII).
  *  Used by credential-extract.ts when looking for the password half of a
- *  login form pair — we don't want to treat e.g. `dob` as a password. */
-const PASSWORD_LIKE_KEYS = new Set(
-  ['password', 'passwd', 'pwd', 'pin', 'patronpassword', 'patron_password'].map((k) =>
-    k.toLowerCase(),
-  ),
+ *  login form pair — we don't want to treat e.g. `dob` as a password.
+ *
+ *  Inclusion criterion: a key name that, when present in a request body
+ *  alongside a username-like partner, almost always means "this is the
+ *  password the user typed at login time." Be liberal here — false positives
+ *  cost the user one extra prompt confirmation; false negatives ship broken
+ *  tools. New additions should reference a real recorded site that broke
+ *  without them.
+ *
+ *  Sites observed needing each entry:
+ *    - password / passwd / pwd:                most modern APIs
+ *    - pin:                                    bank / utility login forms
+ *    - pass:                                   legacy PHP forms (e.g. SMF)
+ *    - secret:                                 OAuth ROPC payloads
+ *    - j_password:                             Java EE / Spring Security default form-login
+ *    - userpassword / loginpassword / accountpassword:
+ *                                              vendor SSO portals that namespace fields
+ *    - patronpassword / patron_password:       Discover & Go libraries (kept for back-compat)
+ */
+const PASSWORD_LIKE_ENTRIES = [
+  'password',
+  'passwd',
+  'pwd',
+  'pin',
+  'pass',
+  'secret',
+  'j_password',
+  'userpassword',
+  'loginpassword',
+  'accountpassword',
+  'patronpassword',
+  'patron_password',
+];
+const PASSWORD_LIKE_KEYS = new Set(PASSWORD_LIKE_ENTRIES.map(_normalize));
+/** Subset of SENSITIVE_KEYS that specifically denote a username/email/login
+ *  identifier — the partner half of a username+password login pair.
+ *
+ *  Same inclusion criterion as PASSWORD_LIKE_KEYS: liberal coverage of real
+ *  recorded forms, narrow enough not to match arbitrary identifiers. Note
+ *  this set is intentionally distinct from `email`, `phone` etc. in
+ *  SENSITIVE_KEYS — those get redacted as PII regardless, but only the
+ *  subset here qualifies as the "username partner" the credential extractor
+ *  pairs with a password.
+ *
+ *  Sites observed needing each entry:
+ *    - user / username / user_name / userid / user_id:
+ *                                              most APIs
+ *    - login / loginid / login_id / login_email:
+ *                                              REST endpoints that name the form field after the action
+ *    - email / emailaddress / email_address:   email-as-username flows
+ *    - account / accountid / account_id:       enterprise SSO portals
+ *    - patron / patronnumber / patron_number / patronid / patron_id:
+ *                                              library systems (Discover & Go)
+ *    - j_username:                             Java EE / Spring Security default form-login
+ *    - signin / signinid / sign_in_id:         vendor SSO portals (Okta-style)
+ *    - usr / uid:                              legacy CGI / older PHP
+ *    - memberid / member_id / membername / member_name:
+ *                                              membership-driven sites (gyms, clubs)
+ *    - customerid / customer_id / customernumber / customer_number:
+ *                                              ecommerce account portals
+ *    - clientid / client_id / clientnumber / client_number:
+ *                                              B2B portals (CAUTION: also matches OAuth client_id;
+ *                                              credential-extract.ts gates on having a password
+ *                                              partner in the same parent, so OAuth token endpoints
+ *                                              that pass client_id without a password won't match)
+ */
+const USERNAME_LIKE_KEYS = new Set(
+  [
+    'user',
+    'username',
+    'user_name',
+    'userid',
+    'user_id',
+    'login',
+    'loginid',
+    'login_id',
+    'loginemail',
+    'login_email',
+    'email',
+    'emailaddress',
+    'email_address',
+    'account',
+    'accountid',
+    'account_id',
+    'patron',
+    'patronnumber',
+    'patron_number',
+    'patronid',
+    'patron_id',
+    'j_username',
+    'signin',
+    'signinid',
+    'sign_in_id',
+    'usr',
+    'uid',
+    'memberid',
+    'member_id',
+    'membername',
+    'member_name',
+    'customerid',
+    'customer_id',
+    'customernumber',
+    'customer_number',
+    'clientid',
+    'client_id',
+    'clientnumber',
+    'client_number',
+  ].map(_normalize),
 );
 const SENSITIVE_HEADERS = [
@@ -138,7 +249,7 @@ const SENSITIVE_HEADERS = [
 const SENSITIVE_HEADER_SET = new Set(SENSITIVE_HEADERS.map((h) => h.toLowerCase()));
-export const normalizeKey = (s: string): string => s.toLowerCase().replace(/[-_]/g, '');
+export const normalizeKey = _normalize;
 /** True if the key name suggests a sensitive value (auth, payment, PII). */
 export function isSensitiveKey(key: string): boolean {
@@ -151,6 +262,29 @@ export function isSensitiveCredentialKey(key: string): boolean {
   return PASSWORD_LIKE_KEYS.has(normalizeKey(key));
 }
+/** True if the key name suggests a username/email/login identifier — the
+ *  partner half of a login pair. Used in credential extraction and in the
+ *  pre-emit guardrail that flags workflows templating credentials as plain
+ *  parameters. */
+export function isUsernameLikeKey(key: string): boolean {
+  return USERNAME_LIKE_KEYS.has(normalizeKey(key));
+}
+/** True for either half of a login pair (username or password). Used by the
+ *  pre-emit guardrail and the post-redact pairing audit, which both need to
+ *  decide "is this parameter name credential-shaped?" without caring which
+ *  half. */
+export function isLoginFieldKey(key: string): boolean {
+  const n = normalizeKey(key);
+  return PASSWORD_LIKE_KEYS.has(n) || USERNAME_LIKE_KEYS.has(n);
+}
+/** Raw password-like key strings (pre-normalization) for callers that need
+ *  substring matching against raw body text rather than parsed key lookup. */
+export function passwordLikeTokens(): readonly string[] {
+  return PASSWORD_LIKE_ENTRIES;
+}
 export function isSensitiveHeader(header: string): boolean {
   return SENSITIVE_HEADER_SET.has(header.toLowerCase());
 }

package/src/imprint/session-diff.ts CHANGED Viewed

@@ -318,6 +318,17 @@ function suggestStateName(location: string): string {
     .toLowerCase();
 }
+/** Whether a value looks like an opaque token/id (vs human text, a city name, a
+ *  date). Gates provenance-tagging of stable values so an incidental constant
+ *  (a UI label, the echoed query) isn't treated as a server-provided token.
+ *  Shared with the build-plan token detector. */
+export function looksLikeToken(v: string): boolean {
+  if (v.length < 12) return false;
+  if (/\s/.test(v)) return false; // multi-word / free text
+  if (/^\d{4}-\d{2}-\d{2}$/.test(v)) return false; // dates
+  return /[:|_-]/.test(v) || /\d/.test(v);
+}
 // ─── Main diff ──────────────────────────────────────────────────────────────
 export function diffTriagedSessions(
@@ -327,6 +338,12 @@ export function diffTriagedSessions(
   const pairs = alignRequests(original.requests, replay.requests);
   const pairedOrigSeqs = new Set(pairs.map((p) => p.originalSeq));
   const pairedReplaySeqs = new Set(pairs.map((p) => p.replaySeq));
+  // `searchPriorResponses` over the replay returns a producer in REPLAY-seq
+  // space, but `originalSeq` and every downstream consumer (capture hints,
+  // build-plan token detection, the planner) work in ORIGINAL-seq space — so a
+  // replay producer must be translated back via the alignment pairs.
+  const replayToOriginal = new Map(pairs.map((p) => [p.replaySeq, p.originalSeq]));
+  const toOriginalSeq = (replaySeq: number): number => replayToOriginal.get(replaySeq) ?? replaySeq;
   const classifications: ClassifiedValue[] = [];
@@ -347,17 +364,28 @@ export function diffTriagedSessions(
       if (v2Value === undefined) continue; // field only in run 1
       if (v1.value === v2Value) {
+        // Stable across runs. Normally a constant — but an OPAQUE stable value
+        // that also appears in a PRIOR response is a server-PROVIDED token (e.g.
+        // a per-entity id minted by a sibling search tool). The same-flow replay
+        // can't expose it by variance (same entity → same token), so recover its
+        // provenance from the original responses (already original-seq space).
+        // A cross-tool consumer then sources it as a param instead of hardcoding.
+        const provider = looksLikeToken(v1.value)
+          ? searchPriorResponses(v1.value, original.requests, pair.originalSeq)
+          : null;
         classifications.push({
           classification: 'constant',
           location: v1.location,
           originalSeq: pair.originalSeq,
           value1: v1.value,
           value2: v2Value,
+          ...(provider ? { producerSeq: provider.seq, producerPath: provider.path } : {}),
         });
         continue;
       }
-      // Value differs — check if it came from a prior response in run 2
+      // Value differs — check if it came from a prior response in run 2,
+      // translating the replay producer back to original-seq space.
       const producer = searchPriorResponses(v2Value, replay.requests, pair.replaySeq);
       if (producer) {
@@ -368,7 +396,7 @@ export function diffTriagedSessions(
           originalSeq: pair.originalSeq,
           value1: v1.value,
           value2: v2Value,
-          producerSeq: producer.seq,
+          producerSeq: toOriginalSeq(producer.seq),
           producerPath: producer.path,
           suggestedStateName: name || undefined,
         });
@@ -407,3 +435,52 @@ export function triageByAlignment(
   const aligned = alignRequests(run1TriagedRequests, run2AllRequests);
   return aligned.filter((pair) => pair.confidence >= 0.5).map((pair) => pair.replaySeq);
 }
+/**
+ * Severity order — a value seen varying in ANY pass outranks one seen constant.
+ * server_derived (traceable to a response) wins over browser_minted.
+ */
+const CLASSIFICATION_RANK: Record<ValueClassification, number> = {
+  constant: 0,
+  browser_minted: 1,
+  server_derived: 2,
+};
+/**
+ * Merge `ClassifiedValue`s from several diff passes that all share the SAME
+ * `original` recording (so `originalSeq` is a stable join key across passes).
+ *
+ * Each pass diffs the original recording against one other run — the automated
+ * browser replay AND every other real recording of the site. Anti-bot edges
+ * (Akamai, DataDome, …) often block the automated replay at the page level, so
+ * the replay reproduces only a fraction of the recording's requests and their
+ * functional values (GraphQL safelisting signatures, persisted-query hashes,
+ * app keys) never get classified. Real recordings come from a trusted browser
+ * and DO carry those requests, so diffing recordings against each other
+ * recovers the missing signal.
+ *
+ * Merge rule per (originalSeq, location):
+ *   - a value that VARIES in any pass is ephemeral — the strongest non-constant
+ *     classification wins (server_derived > browser_minted), preserving its
+ *     producer provenance;
+ *   - a value constant in every pass that observed it is `constant`.
+ * A value the replay never observed (because it was blocked) but that is
+ * identical across time-separated recordings is therefore kept as `constant`,
+ * not silently dropped.
+ */
+export function mergeClassifications(passes: ClassifiedValue[][]): ClassifiedValue[] {
+  const byKey = new Map<string, ClassifiedValue>();
+  for (const pass of passes) {
+    for (const cv of pass) {
+      const key = `${cv.originalSeq}${cv.location}`;
+      const prev = byKey.get(key);
+      if (
+        !prev ||
+        CLASSIFICATION_RANK[cv.classification] > CLASSIFICATION_RANK[prev.classification]
+      ) {
+        byKey.set(key, cv);
+      }
+    }
+  }
+  return [...byKey.values()];
+}

package/src/imprint/session-merge.ts CHANGED Viewed

@@ -7,7 +7,7 @@
  * pipeline consumes unchanged.
  */
-import { existsSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
+import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
 import { join as pathJoin } from 'node:path';
 import { localSessionsDir } from './paths.ts';
 import { friendlySessionTimestamp } from './teach-state.ts';
@@ -34,10 +34,13 @@ interface SessionInfo {
 }
 export function listSiteSessions(site: string): SessionInfo[] {
-  const sessDir = localSessionsDir(site);
-  if (!existsSync(sessDir)) return [];
+  return listSessionsInDir(localSessionsDir(site));
+}
+export function listSessionsInDir(dir: string): SessionInfo[] {
+  if (!existsSync(dir)) return [];
-  const files = readdirSync(sessDir).filter(
+  const files = readdirSync(dir).filter(
     (f) =>
       f.endsWith('.json') &&
       !f.includes('.redacted') &&
@@ -47,7 +50,7 @@ export function listSiteSessions(site: string): SessionInfo[] {
   const infos: SessionInfo[] = [];
   for (const filename of files) {
-    const absPath = pathJoin(sessDir, filename);
+    const absPath = pathJoin(dir, filename);
     try {
       const raw = JSON.parse(readFileSync(absPath, 'utf8'));
       const session = SessionSchema.parse(raw);
@@ -190,6 +193,7 @@ export function mergeSessions(sessions: Session[]): Session {
 export function writeCombinedSession(site: string, combined: Session): string {
   const sessDir = localSessionsDir(site);
+  mkdirSync(sessDir, { recursive: true });
   const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
   const filename = `combined-${timestamp}.json`;
   const absPath = pathJoin(sessDir, filename);

package/src/imprint/stealth-chromium.ts ADDED Viewed

@@ -0,0 +1,81 @@
+import { findChromium } from './chromium.ts';
+/**
+ * Shared loader for Playwright's chromium with the stealth plugin applied.
+ *
+ * Stealth patches navigator.webdriver, plugin enumeration, WebGL vendor
+ * strings, and other headless-Chrome telltales that anti-bot services
+ * (Akamai, Cloudflare, PerimeterX) detect. Vanilla headless Playwright
+ * gets tarpitted or 403'd by these services; the stealth-patched chromium
+ * loads the same pages in seconds.
+ *
+ * Falls back to vanilla `playwright` if `playwright-extra` /
+ * `puppeteer-extra-plugin-stealth` are not installed (preserves the
+ * graceful-degrade behavior of the original duplicated loaders in
+ * playbook-runner, replay-capture, and backend-ladder).
+ *
+ * Throws if no Playwright is available at all — callers translate the
+ * thrown error into their own result shape.
+ */
+export async function getStealthChromium(): Promise<typeof import('playwright').chromium> {
+  try {
+    const pwExtra = await import('playwright-extra');
+    const stealthMod = await import('puppeteer-extra-plugin-stealth');
+    const stealthFactory =
+      (stealthMod as { default?: () => unknown }).default ??
+      (stealthMod as unknown as () => unknown);
+    pwExtra.chromium.use(stealthFactory() as never);
+    return pwExtra.chromium as unknown as typeof import('playwright').chromium;
+  } catch {
+    const pw = await import('playwright');
+    return pw.chromium;
+  }
+}
+/**
+ * True when the puppeteer-extra stealth plugin is installed and WILL be applied
+ * by getStealthChromium() (i.e. we're not on the vanilla-Playwright fallback).
+ *
+ * Callers use this to avoid stacking a manual `navigator.webdriver` patch on top
+ * of the plugin's: the stealth plugin removes the property the way a real Chrome
+ * does (it simply lacks `webdriver`), whereas a redundant
+ * `Object.defineProperty(navigator,'webdriver',{get:()=>false})` leaves a
+ * non-native property descriptor that is ITSELF a fingerprinting tell. So the
+ * manual patch should only run on the vanilla fallback, where it's the only
+ * protection. Import resolution is cached, so probing here is cheap.
+ */
+export async function isStealthPluginAvailable(): Promise<boolean> {
+  try {
+    await import('playwright-extra');
+    await import('puppeteer-extra-plugin-stealth');
+    return true;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Path to the same Chromium binary `imprint record` uses for the user's
+ * recording session — Playwright's bundled "Google Chrome for Testing"
+ * (full Chrome build), the system Chrome on macOS, or a Linux distro
+ * Chrome/Chromium package, in that order of preference.
+ *
+ * Why this matters: by default Playwright's `chromium.launch({ headless: true })`
+ * picks `chrome-headless-shell` — a separate stripped-down binary that
+ * Akamai / Cloudflare / PerimeterX class anti-bot services detect at the
+ * binary/TLS-fingerprint layer regardless of how thoroughly the JS-level
+ * `navigator.webdriver` etc. are patched by the stealth plugin. The
+ * recording browser uses the FULL Chrome binary and Akamai trusts it; the
+ * replay browser using chrome-headless-shell looks like a bot. Using the
+ * SAME binary for both eliminates the binary asymmetry.
+ *
+ * Returns `undefined` if no Chromium can be located — callers should let
+ * Playwright fall back to whatever default it finds.
+ */
+export function getStealthExecutablePath(): string | undefined {
+  try {
+    return findChromium();
+  } catch {
+    return undefined;
+  }
+}