npm - imprint-mcp - Versions diffs - 0.2.0 → 0.3.0 - Mend

imprint-mcp 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

package/README.md +165 -201
package/examples/discoverandgo/README.md +1 -1
package/examples/echo/README.md +1 -1
package/examples/google-flights/README.md +28 -0
package/examples/google-flights/_shared/batchexecute.ts +63 -0
package/examples/google-flights/_shared/flights_request.ts +95 -0
package/examples/google-flights/_shared/package.json +9 -0
package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
package/examples/google-flights/get_flight_booking_details/package.json +9 -0
package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
package/examples/google-flights/lookup_airport/index.ts +101 -0
package/examples/google-flights/lookup_airport/package.json +9 -0
package/examples/google-flights/lookup_airport/parser.ts +66 -0
package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
package/examples/google-flights/lookup_airport/workflow.json +57 -0
package/examples/google-flights/search_flights/index.ts +219 -0
package/examples/google-flights/search_flights/package.json +9 -0
package/examples/google-flights/search_flights/parser.ts +169 -0
package/examples/google-flights/search_flights/playbook.yaml +184 -0
package/examples/google-flights/search_flights/request-transform.ts +119 -0
package/examples/google-flights/search_flights/workflow.json +143 -0
package/examples/google-hotels/README.md +29 -0
package/examples/google-hotels/_shared/batchexecute.ts +73 -0
package/examples/google-hotels/_shared/freq.ts +158 -0
package/examples/google-hotels/_shared/package.json +9 -0
package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
package/examples/google-hotels/search_hotels/index.ts +207 -0
package/examples/google-hotels/search_hotels/package.json +9 -0
package/examples/google-hotels/search_hotels/parser.ts +260 -0
package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
package/examples/google-hotels/search_hotels/workflow.json +127 -0
package/package.json +3 -2
package/prompts/audit-agent.md +71 -0
package/prompts/build-planning.md +74 -0
package/prompts/compile-agent.md +132 -28
package/prompts/prereq-builder.md +64 -0
package/prompts/prereq-planner.md +34 -0
package/prompts/tool-planning.md +39 -0
package/src/cli.ts +111 -4
package/src/imprint/agent.ts +5 -0
package/src/imprint/audit.ts +996 -0
package/src/imprint/backend-ladder.ts +1214 -184
package/src/imprint/build-plan.ts +1051 -0
package/src/imprint/cdp-browser-fetch.ts +589 -0
package/src/imprint/cdp-jar-cache.ts +320 -0
package/src/imprint/chromium.ts +135 -0
package/src/imprint/claude-cli-compile.ts +125 -25
package/src/imprint/codex-cli-compile.ts +26 -23
package/src/imprint/compile-agent-types.ts +38 -0
package/src/imprint/compile-agent.ts +65 -27
package/src/imprint/compile-tools.ts +1656 -64
package/src/imprint/compile.ts +14 -2
package/src/imprint/concurrency.ts +87 -0
package/src/imprint/credential-extract.ts +174 -25
package/src/imprint/cron.ts +1 -0
package/src/imprint/doctor.ts +39 -0
package/src/imprint/emit.ts +85 -0
package/src/imprint/freeform-redact.ts +5 -4
package/src/imprint/integrations.ts +2 -2
package/src/imprint/llm.ts +56 -8
package/src/imprint/mcp-compile-server.ts +43 -10
package/src/imprint/mcp-maintenance.ts +9 -101
package/src/imprint/mcp-server.ts +73 -7
package/src/imprint/multi-progress.ts +7 -2
package/src/imprint/param-grounding.ts +367 -0
package/src/imprint/paths.ts +29 -0
package/src/imprint/playbook-runner.ts +101 -40
package/src/imprint/prereq-builder.ts +651 -0
package/src/imprint/probe-backends.ts +6 -3
package/src/imprint/record.ts +10 -1
package/src/imprint/redact.ts +30 -2
package/src/imprint/replay-capture.ts +19 -18
package/src/imprint/runtime.ts +19 -10
package/src/imprint/sensitive-keys.ts +141 -7
package/src/imprint/session-diff.ts +79 -2
package/src/imprint/session-merge.ts +9 -5
package/src/imprint/stealth-chromium.ts +81 -0
package/src/imprint/stealth-fetch.ts +309 -29
package/src/imprint/stealth-token-cache.ts +88 -0
package/src/imprint/teach-plan.ts +251 -0
package/src/imprint/teach-state.ts +17 -0
package/src/imprint/teach.ts +582 -147
package/src/imprint/tool-candidates.ts +72 -14
package/src/imprint/tool-plan.ts +313 -0
package/src/imprint/tracing.ts +135 -6
package/src/imprint/types.ts +61 -3
package/examples/google-flights/search_google_flights/index.ts +0 -101
package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
package/examples/google-flights/search_google_flights/parser.ts +0 -189
package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
package/examples/google-flights/search_google_flights/workflow.json +0 -48
package/examples/google-hotels/search_google_hotels/index.ts +0 -194
package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97

package/src/imprint/compile.ts CHANGED Viewed

@@ -19,6 +19,7 @@ import {
 import { dirname, join as pathJoin } from 'node:path';
 import type { OnDeadlineReached } from './agent.ts';
 import { inferAppApiHosts } from './app-api-hosts.ts';
+import type { SharedModuleManifestEntry } from './build-plan.ts';
 import { type CompileAgentProgress, compileAgent } from './compile-agent.ts';
 import { isSameRegistrableDomain, registrableDomain } from './etld.ts';
 import { type LLMOptions, extractJsonArray, resolveProvider } from './llm.ts';
@@ -82,6 +83,14 @@ interface GenerateOptions extends CompileOptions {
   classifications?: ClassifiedValue[];
   /** Credential values extracted during teach, passed to integration tests via env var. */
   teachCredentials?: { site: string; values: Record<string, string> };
+  /** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
+  buildPlanPath?: string;
+  /** Shared-module build manifest for this site (verified flags). */
+  sharedModules?: SharedModuleManifestEntry[];
+  /** Per-tool implementation plan (param→field mapping, request construction,
+   *  response parsing, shared-module imports). Injected into the agent's initial
+   *  message so the compile follows it. */
+  toolPlan?: string;
 }
 interface GenerateResult {
@@ -122,6 +131,9 @@ export async function generate(opts: GenerateOptions): Promise<GenerateResult> {
         sharedContext: opts.sharedContext,
         classifications: opts.classifications,
         teachCredentials: opts.teachCredentials,
+        buildPlanPath: opts.buildPlanPath,
+        sharedModules: opts.sharedModules,
+        toolPlan: opts.toolPlan,
       });
       setSpanAttributes(span, {
@@ -145,7 +157,7 @@ export async function generate(opts: GenerateOptions): Promise<GenerateResult> {
         ];
         if (result.outcome === 'timeout') {
           lines.push(
-            'hint: increase the timeout with --timeout (teach) or --max-duration (generate)',
+            'hint: most complex tools take 10-15 minutes. increase the timeout with --timeout (teach) or --max-duration (generate)',
           );
         }
         throw new Error(lines.join('\n'));
@@ -274,7 +286,7 @@ const TRIAGE_RESOURCE_TYPES = new Set(['XHR', 'Fetch', 'Document']);
 const HEADER_TRUNCATE_LIMIT = 200;
 // Per-request body cap for triage. Triage only needs enough body to distinguish
 // data-bearing POSTs (search/booking) from telemetry; full bodies on a busy
-// site can total >1MB and blow the 200K-token cap on `claude-opus-4-7`.
+// site can total >1MB and blow the 200K-token cap on `claude-opus-4-8`.
 const TRIAGE_BODY_LIMIT = 500;
 export interface TriageResult {

package/src/imprint/concurrency.ts ADDED Viewed

@@ -0,0 +1,87 @@
+/**
+ * Bounded-concurrency fan-out helpers shared across the teach pipeline.
+ *
+ * Lives in its own module (rather than teach.ts) so leaf modules like
+ * teach-plan.ts can reuse it without importing teach.ts, which would create an
+ * import cycle (teach.ts → teach-plan.ts → teach.ts). teach.ts re-exports both
+ * for backwards compatibility with existing callers + tests.
+ */
+/** Run `fn` over `items` with at most `concurrency` in flight, preserving input
+ *  order in the result. Throws the first error encountered (after in-flight work
+ *  settles); use mapLimitSettled when you need per-item success/failure. */
+export async function mapLimit<T, R>(
+  items: T[],
+  concurrency: number,
+  fn: (item: T) => Promise<R>,
+): Promise<R[]> {
+  const results = new Array<R>(items.length);
+  let next = 0;
+  let firstError: unknown;
+  const workers = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
+    while (next < items.length && firstError === undefined) {
+      const index = next++;
+      const item = items[index];
+      if (item === undefined) continue;
+      try {
+        results[index] = await fn(item);
+      } catch (err) {
+        firstError ??= err;
+      }
+    }
+  });
+  await Promise.allSettled(workers);
+  if (firstError !== undefined) throw firstError;
+  return results;
+}
+type SettledResult<R> = { ok: true; value: R } | { ok: false; error: unknown };
+/** Like mapLimit, but never throws: each item resolves to a tagged
+ *  success/failure entry, preserving input order. */
+export async function mapLimitSettled<T, R>(
+  items: T[],
+  concurrency: number,
+  fn: (item: T) => Promise<R>,
+): Promise<SettledResult<R>[]> {
+  const results = new Array<SettledResult<R>>(items.length);
+  let next = 0;
+  const workers = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
+    while (next < items.length) {
+      const index = next++;
+      const item = items[index];
+      if (item === undefined) continue;
+      try {
+        results[index] = { ok: true, value: await fn(item) };
+      } catch (err) {
+        results[index] = { ok: false, error: err };
+      }
+    }
+  });
+  await Promise.allSettled(workers);
+  return results;
+}
+/** Error thrown by withTimeout when the deadline elapses before the work settles.
+ *  A distinct class lets callers tell a timeout apart from a genuine failure. */
+export class TimeoutError extends Error {
+  constructor(label: string, ms: number) {
+    super(`${label} exceeded ${Math.round(ms / 1000)}s timeout`);
+    this.name = 'TimeoutError';
+  }
+}
+/** Race a promise against a timeout. The underlying work (e.g. a CLI child) is
+ *  NOT cancelled — the caller just stops awaiting it and decides how to degrade.
+ *  Throws TimeoutError on timeout. */
+export async function withTimeout<T>(work: Promise<T>, ms: number, label: string): Promise<T> {
+  let timer: ReturnType<typeof setTimeout> | undefined;
+  const timeout = new Promise<never>((_, reject) => {
+    timer = setTimeout(() => reject(new TimeoutError(label, ms)), ms);
+  });
+  try {
+    return await Promise.race([work, timeout]);
+  } finally {
+    if (timer) clearTimeout(timer);
+  }
+}

package/src/imprint/credential-extract.ts CHANGED Viewed

@@ -12,14 +12,13 @@
  * value is visible and lets us confirm which form was the login form.
  */
-import { isSensitiveCredentialKey, normalizeKey } from './sensitive-keys.ts';
+import { isSensitiveCredentialKey, isUsernameLikeKey } from './sensitive-keys.ts';
 import type { CapturedEvent, CapturedRequest, Session } from './types.ts';
-/** Field-name patterns we'll treat as the username/email partner of a
- *  password field. Ordered by preference: emails first, then user-ish
- *  identifiers. */
-const USERNAME_KEY_RE =
-  /^(user(?:name|id)?|email(?:address)?|login(?:id)?|account|patron(?:number|id)?)$/i;
+/** Predicate: this key looks like the username/email/login partner of a
+ *  password field. Backed by `USERNAME_LIKE_KEYS` in sensitive-keys.ts so
+ *  the dictionary stays in one place. */
+const isUsernameKey = (key: string): boolean => isUsernameLikeKey(key);
 /** Where, within a request, a redactable value lives. */
 export type ReplacementLocation =
@@ -58,6 +57,29 @@ interface ExtractionResult {
   replacements: Replacement[];
 }
+/** Parsers are tried in this order on every request that has a body. Each
+ *  one is side-effect-free and returns `null` when its input doesn't fit
+ *  its expected framing — so trying JSON first on a form body, or form on
+ *  a JSON body, is safe: only the parser that actually fits will produce a
+ *  finding.
+ *
+ *  Dispatch is parser-driven, not Content-Type-driven, because real sites
+ *  routinely mislabel their bodies — the canonical example is the Nextep
+ *  cafe API (`Content-Type: text/plain` for JSON bodies). Letting the data
+ *  speak for itself prevents whole classes of silent extraction failures.
+ *
+ *  URL-query parsing runs even on requests without a body (e.g. GET-based
+ *  logins that pass credentials in the query string). Multipart is checked
+ *  before generic form-urlencoded because a multipart body still contains
+ *  `=` characters and would be parsed as a single malformed form pair
+ *  otherwise. */
+const BODY_PARSERS: Array<(r: CapturedRequest) => BodyFinding | null> = [
+  findInJsonBody,
+  findInJsonWrappedInForm,
+  findInMultipartBody,
+  findInFormBody,
+];
 /** Top-level entry point. */
 export function extractCredentials(session: Session): ExtractionResult {
   const findings: CredentialFinding[] = [];
@@ -65,13 +87,17 @@ export function extractCredentials(session: Session): ExtractionResult {
   const usernamesInDom = collectFormSubmitUsernames(session.events);
   for (const req of session.requests) {
-    if (!req.body) continue;
-    const ct = (req.headers['content-type'] ?? req.headers['Content-Type'] ?? '').toLowerCase();
-    const found = ct.includes('json')
-      ? findInJsonBody(req)
-      : ct.includes('urlencoded') || req.body.includes('=')
-        ? findInFormBody(req)
-        : null;
+    let found: BodyFinding | null = null;
+    if (req.body) {
+      for (const parse of BODY_PARSERS) {
+        found = parse(req);
+        if (found) break;
+      }
+    }
+    // Last-resort: credentials in the URL query string (rare but real for
+    // some legacy GET-based login endpoints). Tried after body parsers so
+    // body-based logins always win when both are present.
+    if (!found) found = findInUrlQuery(req);
     if (!found) continue;
     const confirmedByDom = usernamesInDom.has(found.usernameValue);
@@ -132,7 +158,7 @@ function findInFormBody(req: CapturedRequest): BodyFinding | null {
   // Second pass: find a username-like key.
   for (const { key, value } of pairs) {
-    if (USERNAME_KEY_RE.test(normalizeKey(key)) && value.length > 0) {
+    if (isUsernameKey(key) && value.length > 0) {
       usernameKey = key;
       usernameValue = value;
       break;
@@ -163,11 +189,7 @@ function findInJsonBody(req: CapturedRequest): BodyFinding | null {
   if (typeof pwdHit.value !== 'string' || pwdHit.value.length === 0) return null;
   // Look for a username-like key; prefer one in the same parent object.
-  const userHit = findFirstByPredicate(
-    parsed,
-    (k) => USERNAME_KEY_RE.test(normalizeKey(k)),
-    pwdHit.parent,
-  );
+  const userHit = findFirstByPredicate(parsed, isUsernameKey, pwdHit.parent);
   if (!userHit || typeof userHit.value !== 'string' || userHit.value.length === 0) return null;
   return {
@@ -178,6 +200,138 @@ function findInJsonBody(req: CapturedRequest): BodyFinding | null {
   };
 }
+/** Handles legacy framings where a JSON document is the value of a single
+ *  form-encoded field — `payload={"username":"…","password":"…"}` or
+ *  `data=…` or `request=…`. Real PHP / ColdFusion apps do this. We delegate
+ *  the inner pairing to findInJsonBody by synthesizing a child request, and
+ *  re-encode the path as `body-form` so the redactor knows to swap the
+ *  whole inner JSON string back in. */
+function findInJsonWrappedInForm(req: CapturedRequest): BodyFinding | null {
+  if (!req.body) return null;
+  const pairs = parseFormBody(req.body);
+  if (pairs.length === 0) return null;
+  const WRAPPER_KEYS = new Set(['payload', 'data', 'request', 'json', 'body']);
+  for (const { key, value } of pairs) {
+    if (!WRAPPER_KEYS.has(key.toLowerCase())) continue;
+    if (!value.startsWith('{') && !value.startsWith('[')) continue;
+    // Build a synthetic request with the unwrapped JSON as body.
+    const inner: CapturedRequest = { ...req, body: value };
+    const found = findInJsonBody(inner);
+    if (!found) continue;
+    // Project the JSON paths back into form-key terms — the redactor
+    // matches on `originalValue` regardless of `location`, but we keep the
+    // location semantically correct so future readers aren't confused.
+    return {
+      ...found,
+      usernameLocation: { kind: 'body-form', key },
+      passwordLocation: { kind: 'body-form', key },
+    };
+  }
+  return null;
+}
+/** Parse a multipart/form-data body into {key, value} pairs and pair like
+ *  the form-urlencoded path. Defensive: any malformed part is skipped.
+ *
+ *  We sniff the boundary from the first line (`--<boundary>`) rather than
+ *  trusting the Content-Type header, because the whole point of this
+ *  module is to not trust Content-Type. */
+function findInMultipartBody(req: CapturedRequest): BodyFinding | null {
+  if (!req.body) return null;
+  const body = req.body;
+  // First line should be `--<boundary>`. If it doesn't start with `--` or
+  // there's no following newline, this isn't multipart.
+  const firstNewline = body.indexOf('\n');
+  if (firstNewline < 0) return null;
+  const firstLine = body.slice(0, firstNewline).trimEnd();
+  if (!firstLine.startsWith('--')) return null;
+  const boundary = firstLine.slice(2);
+  if (boundary.length === 0 || boundary.length > 200) return null;
+  // Split on the boundary; skip the prologue (empty before first boundary)
+  // and the epilogue (after closing `--<boundary>--`).
+  const sep = `--${boundary}`;
+  const parts = body.split(sep).slice(1);
+  const pairs: Array<{ key: string; value: string }> = [];
+  for (const partRaw of parts) {
+    const part = partRaw.startsWith('\r\n')
+      ? partRaw.slice(2)
+      : partRaw.startsWith('\n')
+        ? partRaw.slice(1)
+        : partRaw;
+    if (part.startsWith('--')) break; // closing boundary
+    // Headers and body are separated by a blank line.
+    const headerEnd = part.indexOf('\r\n\r\n');
+    const headerEnd2 = headerEnd >= 0 ? headerEnd : part.indexOf('\n\n');
+    if (headerEnd2 < 0) continue;
+    const sepLen = headerEnd >= 0 ? 4 : 2;
+    const headers = part.slice(0, headerEnd2);
+    let value = part.slice(headerEnd2 + sepLen);
+    // Strip the trailing CRLF that precedes the next boundary.
+    value = value.replace(/\r?\n$/, '');
+    const nameMatch = headers.match(/name="([^"]*)"/i);
+    if (!nameMatch) continue;
+    const key = nameMatch[1] ?? '';
+    if (!key) continue;
+    pairs.push({ key, value });
+  }
+  if (pairs.length === 0) return null;
+  return pairFromKeyValuePairs(pairs, 'body-form');
+}
+/** Credentials in the URL query string — `GET /login?username=…&password=…`
+ *  or a POST whose body is empty but credentials ride in the URL. Rare but
+ *  real for some legacy CGI endpoints. */
+function findInUrlQuery(req: CapturedRequest): BodyFinding | null {
+  let qs: string;
+  try {
+    const u = new URL(req.url);
+    qs = u.search.startsWith('?') ? u.search.slice(1) : u.search;
+  } catch {
+    return null;
+  }
+  if (!qs) return null;
+  const pairs = parseFormBody(qs);
+  if (pairs.length === 0) return null;
+  return pairFromKeyValuePairs(pairs, 'body-form');
+}
+/** Shared pairing: given key/value pairs, find a password partner and a
+ *  username partner. Returns a BodyFinding or null. Used by every parser
+ *  that flattens its input into key/value pairs (form, multipart, URL
+ *  query). The `location.kind` argument is passed through unchanged. */
+function pairFromKeyValuePairs(
+  pairs: Array<{ key: string; value: string }>,
+  kind: 'body-form',
+): BodyFinding | null {
+  let passwordKey: string | null = null;
+  let passwordValue: string | null = null;
+  for (const { key, value } of pairs) {
+    if (isSensitiveCredentialKey(key) && value.length > 0) {
+      passwordKey = key;
+      passwordValue = value;
+      break;
+    }
+  }
+  if (passwordKey === null || passwordValue === null) return null;
+  let usernameKey: string | null = null;
+  let usernameValue: string | null = null;
+  for (const { key, value } of pairs) {
+    if (isUsernameKey(key) && value.length > 0) {
+      usernameKey = key;
+      usernameValue = value;
+      break;
+    }
+  }
+  if (usernameKey === null || usernameValue === null) return null;
+  return {
+    usernameValue,
+    passwordValue,
+    usernameLocation: { kind, key: usernameKey },
+    passwordLocation: { kind, key: passwordKey },
+  };
+}
 interface JsonHit {
   key: string;
   value: unknown;
@@ -238,12 +392,7 @@ function collectFormSubmitUsernames(events: CapturedEvent[]): Set<string> {
         fields?: Array<{ name?: string; type?: string; value?: string }>;
       };
       for (const f of detail.fields ?? []) {
-        if (
-          f.name &&
-          f.value &&
-          f.type !== 'password' &&
-          USERNAME_KEY_RE.test(normalizeKey(f.name))
-        ) {
+        if (f.name && f.value && f.type !== 'password' && isUsernameKey(f.name)) {
           out.add(f.value);
         }
       }

package/src/imprint/cron.ts CHANGED Viewed

@@ -242,6 +242,7 @@ async function runCronImpl(opts: RunCronOptions): Promise<void> {
   if (
     ladder.includes('fetch') ||
     ladder.includes('fetch-bootstrap') ||
+    ladder.includes('cdp-replay') ||
     ladder.includes('stealth-fetch')
   ) {
     const validator = buildZodValidator(tool.workflow.parameters);

package/src/imprint/doctor.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 /** `imprint doctor` — check that the environment can actually run imprint.
  *  Reports pass/fail per prerequisite plus a one-line fix when failed. */
+import { spawnSync } from 'node:child_process';
 import { existsSync, readFileSync, readdirSync } from 'node:fs';
 import { homedir } from 'node:os';
 import { join as pathJoin } from 'node:path';
@@ -20,6 +21,7 @@ export function doctor(): CheckResult[] {
     checkBun(),
     checkChromium(),
     checkPlaywrightChromium(),
+    checkVirtualDisplay(),
     checkLLMProvider(),
     checkPushOptional(),
     checkClaudeCode(),
@@ -87,6 +89,43 @@ function checkPlaywrightChromium(): CheckResult {
   };
 }
+function hasXvfbBinary(): boolean {
+  try {
+    return spawnSync('sh', ['-c', 'command -v Xvfb'], { stdio: 'ignore' }).status === 0;
+  } catch {
+    return false;
+  }
+}
+/** The trusted-browser replay (playbook rung's cdp-browser transport) runs Chrome
+ *  HEADLESS by default and needs NO display — the `HeadlessChrome` UA token is
+ *  stripped so anti-bot services don't edge-block it. A display only matters as a
+ *  fallback on a GPU-less Linux host, where headless WebGL reports SwiftShader and
+ *  the replay must run HEADED under Xvfb (launchChromium auto-starts it when a
+ *  headed launch finds no `$DISPLAY`). macOS/Windows need nothing. Advisory only. */
+function checkVirtualDisplay(): CheckResult {
+  const name = 'Display (headed replay)';
+  if (process.platform !== 'linux') {
+    return { name, ok: true, detail: `${process.platform}: native window server (no Xvfb needed)` };
+  }
+  const display = process.env.DISPLAY;
+  if (display) return { name, ok: true, detail: `$DISPLAY=${display}` };
+  if (hasXvfbBinary()) {
+    return {
+      name,
+      ok: true,
+      detail: 'no $DISPLAY; Xvfb present — headed-replay fallback available for GPU-less hosts',
+    };
+  }
+  return {
+    name,
+    ok: true, // advisory — default replay is headless; Xvfb is only a GPU-less fallback
+    detail:
+      'Linux, no $DISPLAY and no Xvfb — default replay is headless (fine); install Xvfb only if a GPU-less host gets bot-flagged',
+    fix: 'GPU-less host bot-flagged? install the headed-replay fallback: apt-get install xvfb (or export DISPLAY=:0)',
+  };
+}
 function checkLLMProvider(): CheckResult {
   const statuses = getProviderStatuses();
   const detected = statuses.filter((s) => s.detected);

package/src/imprint/emit.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
 import { basename, dirname, join as pathJoin, resolve as pathResolve } from 'node:path';
 import { loadJsonFile } from './load-json.ts';
 import { ensureImprintRuntimeLink } from './runtime-link.ts';
+import { isLoginFieldKey } from './sensitive-keys.ts';
 import { type Workflow, WorkflowSchema } from './types.ts';
 interface EmitOptions {
@@ -36,6 +37,8 @@ export function emit(opts: EmitOptions): EmitResult {
     'workflow.json',
   );
+  assertNoCredentialShapedParams(workflow);
   const outDir = opts.outDir ?? defaultOutDir(opts.workflowPath, workflow);
   mkdirSync(outDir, { recursive: true });
@@ -137,6 +140,88 @@ export { WORKFLOW };
 `;
 }
+/** Pre-emit guardrail: refuse to write a workflow whose parameters look
+ *  like login credentials (`password`, `userid`, `email`, etc., per the
+ *  shared dictionary in sensitive-keys.ts) but are templated as plain
+ *  `${param.X}` instead of credential-store references like
+ *  `${credential.X}`.
+ *
+ *  This catches the failure mode where upstream credential extraction
+ *  silently failed (e.g. unusual Content-Type, body framing the parser
+ *  didn't recognise, declined credential-save prompt), so the compile
+ *  agent had no credential anchor and chose to model the login fields as
+ *  ordinary callable parameters. The resulting MCP tool would advertise
+ *  `userid`/`password` as required inputs, forward whatever the caller
+ *  passed verbatim, and (most often) silently produce empty results when
+ *  the caller passed empty strings.
+ *
+ *  We require either:
+ *    - The parameter isn't credential-shaped, OR
+ *    - The body template references `${credential.<name>}` (or another
+ *      `credential.*` reference), in which case the workflow is pulling
+ *      from the credential store and the `${param.X}` parameter is
+ *      effectively a no-op the user can safely ignore.
+ *
+ *  Throws with the remediation steps the user needs to take. */
+function assertNoCredentialShapedParams(workflow: Workflow): void {
+  const offenders: Array<{ name: string; matches: string[] }> = [];
+  for (const param of workflow.parameters) {
+    if (!isLoginFieldKey(param.name)) continue;
+    const paramRef = `\${param.${param.name}}`;
+    const credentialRef = `\${credential.${param.name}}`;
+    const requestsUsingParam: string[] = [];
+    let coveredByCredentialRef = false;
+    for (let i = 0; i < workflow.requests.length; i++) {
+      const req = workflow.requests[i];
+      if (!req) continue;
+      const haystack = `${req.url} ${req.body ?? ''} ${Object.values(req.headers).join(' ')}`;
+      if (haystack.includes(credentialRef)) {
+        coveredByCredentialRef = true;
+      }
+      if (haystack.includes(paramRef)) {
+        requestsUsingParam.push(`requests[${i}] (${req.method} ${req.url})`);
+      }
+    }
+    // Only flag if the body templates the param and there's no parallel
+    // credential reference. A workflow that uses both `${param.X}` and
+    // `${credential.X}` is suspicious but not necessarily broken — leave
+    // it to the user. The dangerous case is `${param.X}` alone.
+    if (requestsUsingParam.length > 0 && !coveredByCredentialRef) {
+      offenders.push({ name: param.name, matches: requestsUsingParam });
+    }
+  }
+  if (offenders.length === 0) return;
+  const lines = [
+    `Workflow ${JSON.stringify(workflow.toolName)} declares ${offenders.length} credential-shaped parameter(s) that are templated as plain \`\${param.X}\` instead of \`\${credential.X}\`:`,
+    '',
+  ];
+  for (const o of offenders) {
+    lines.push(`  • parameter \`${o.name}\` — used in:`);
+    for (const m of o.matches) lines.push(`      - ${m}`);
+  }
+  lines.push(
+    '',
+    'Credentials MUST be pulled from the credential store via `${credential.<name>}`, never modelled as plain workflow parameters.',
+    "This usually means the redact stage failed to extract a username+password pair from the recorded login request — common causes include unusual Content-Type headers, multipart bodies, or login fields the extractor dictionary doesn't yet cover.",
+    '',
+    'To fix:',
+    `  1. Delete the redacted session: rm ${workflowToolHint(workflow)}/sessions/*.redacted.json (or the relevant one)`,
+    `  2. Re-run from the redact stage: imprint teach ${workflow.site} --from redact`,
+    '  3. Accept the "Save credentials for site to the credential manager?" prompt this time.',
+    '  4. Let teach continue through generate → compile-playbook → emit.',
+    '',
+    "If the prompt does NOT appear during step 3, the extractor still cannot pair this site's login fields — please file a bug attaching the (redacted!) session.",
+  );
+  throw new Error(lines.join('\n'));
+}
+/** Pretty path hint for the error message above. We don't have IMPRINT_HOME
+ *  in scope and don't need it — `~/.imprint/<site>` is the convention. */
+function workflowToolHint(workflow: Workflow): string {
+  return `~/.imprint/${workflow.site}`;
+}
 function pascalCase(s: string): string {
   return s
     .split(/[_-]+/)

package/src/imprint/freeform-redact.ts CHANGED Viewed

@@ -73,10 +73,11 @@ const FREEFORM_POLICIES: PolicyName[] = [
   Policies.PGP_PRIVATE_KEY,
   Policies.PASSWORD_ASSIGNMENT,
   Policies.ENVIRONMENT_VARIABLE_SECRET,
-  Policies.GENERIC_PASSWORD,
-  Policies.GENERIC_TOKEN,
-  Policies.GENERIC_CREDENTIAL,
-  Policies.GENERIC_SECRET,
+  // NOTE: the GENERIC_* catch-alls (GENERIC_PASSWORD/TOKEN/CREDENTIAL/SECRET) are
+  // intentionally omitted — they match on value shape alone and fire on benign
+  // data (e.g. `id=1234567890`), corrupting/over-redacting structured payloads.
+  // Real secrets are still covered by the keyword-anchored and specific policies
+  // above and below (PASSWORD_ASSIGNMENT, OAUTH_*, private keys, cloud tokens, PII).
   Policies.OAUTH_CLIENT_SECRET,
   Policies.OAUTH_REFRESH_TOKEN,
   Policies.OAUTH_ACCESS_TOKEN,

package/src/imprint/integrations.ts CHANGED Viewed

@@ -87,7 +87,7 @@ export function generatePasteSnippet(opts: {
   switch (platform) {
     case 'claude-code':
-      return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}. The backend ladder handles browser/API state and bot detection automatically (fetch → gated fetch-bootstrap → stealth-fetch → playbook).`;
+      return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}. The backend ladder handles browser/API state and bot detection automatically (fetch → gated fetch-bootstrap → cdp-replay → stealth-fetch → playbook).`;
     case 'codex':
       return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}.`;
@@ -352,7 +352,7 @@ ${yamlStringify(p, { lineWidth: 0 }).trim()}
   // Backend ladder explanation.
   const backendBlock = `## Backend Ladder
-The MCP server automatically escalates from fetch API replay to gated fetch-bootstrap when browser-minted state is declared, then stealth-fetch for bot-defense state, then playbook for full DOM replay.
+The MCP server automatically escalates from fetch API replay to gated fetch-bootstrap when browser-minted state is declared, then cdp-replay (API requests run inside a live trusted Chrome so a protected POST refreshes its anti-bot token between calls), then stealth-fetch for bot-defense state, then playbook for full DOM replay.
 Bot detection is handled transparently.`;
   // Scheduling block (optional).