imprint-mcp 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +165 -201
  2. package/examples/discoverandgo/README.md +1 -1
  3. package/examples/echo/README.md +1 -1
  4. package/examples/google-flights/README.md +28 -0
  5. package/examples/google-flights/_shared/batchexecute.ts +63 -0
  6. package/examples/google-flights/_shared/flights_request.ts +95 -0
  7. package/examples/google-flights/_shared/package.json +9 -0
  8. package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
  9. package/examples/google-flights/get_flight_booking_details/package.json +9 -0
  10. package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
  11. package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
  12. package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
  13. package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
  14. package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
  15. package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
  16. package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
  17. package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
  18. package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
  19. package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
  20. package/examples/google-flights/lookup_airport/index.ts +101 -0
  21. package/examples/google-flights/lookup_airport/package.json +9 -0
  22. package/examples/google-flights/lookup_airport/parser.ts +66 -0
  23. package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
  24. package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
  25. package/examples/google-flights/lookup_airport/workflow.json +57 -0
  26. package/examples/google-flights/search_flights/index.ts +219 -0
  27. package/examples/google-flights/search_flights/package.json +9 -0
  28. package/examples/google-flights/search_flights/parser.ts +169 -0
  29. package/examples/google-flights/search_flights/playbook.yaml +184 -0
  30. package/examples/google-flights/search_flights/request-transform.ts +119 -0
  31. package/examples/google-flights/search_flights/workflow.json +143 -0
  32. package/examples/google-hotels/README.md +29 -0
  33. package/examples/google-hotels/_shared/batchexecute.ts +73 -0
  34. package/examples/google-hotels/_shared/freq.ts +158 -0
  35. package/examples/google-hotels/_shared/package.json +9 -0
  36. package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
  37. package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
  38. package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
  39. package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
  40. package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
  41. package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
  42. package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
  43. package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
  44. package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
  45. package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
  46. package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
  47. package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
  48. package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
  49. package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
  50. package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
  51. package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
  52. package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
  53. package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
  54. package/examples/google-hotels/search_hotels/index.ts +207 -0
  55. package/examples/google-hotels/search_hotels/package.json +9 -0
  56. package/examples/google-hotels/search_hotels/parser.ts +260 -0
  57. package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
  58. package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
  59. package/examples/google-hotels/search_hotels/workflow.json +127 -0
  60. package/package.json +3 -2
  61. package/prompts/audit-agent.md +71 -0
  62. package/prompts/build-planning.md +74 -0
  63. package/prompts/compile-agent.md +132 -28
  64. package/prompts/prereq-builder.md +64 -0
  65. package/prompts/prereq-planner.md +34 -0
  66. package/prompts/tool-planning.md +39 -0
  67. package/src/cli.ts +111 -4
  68. package/src/imprint/agent.ts +5 -0
  69. package/src/imprint/audit.ts +996 -0
  70. package/src/imprint/backend-ladder.ts +1214 -184
  71. package/src/imprint/build-plan.ts +1051 -0
  72. package/src/imprint/cdp-browser-fetch.ts +589 -0
  73. package/src/imprint/cdp-jar-cache.ts +320 -0
  74. package/src/imprint/chromium.ts +135 -0
  75. package/src/imprint/claude-cli-compile.ts +125 -25
  76. package/src/imprint/codex-cli-compile.ts +26 -23
  77. package/src/imprint/compile-agent-types.ts +38 -0
  78. package/src/imprint/compile-agent.ts +65 -27
  79. package/src/imprint/compile-tools.ts +1656 -64
  80. package/src/imprint/compile.ts +14 -2
  81. package/src/imprint/concurrency.ts +87 -0
  82. package/src/imprint/credential-extract.ts +174 -25
  83. package/src/imprint/cron.ts +1 -0
  84. package/src/imprint/doctor.ts +39 -0
  85. package/src/imprint/emit.ts +85 -0
  86. package/src/imprint/freeform-redact.ts +5 -4
  87. package/src/imprint/integrations.ts +2 -2
  88. package/src/imprint/llm.ts +56 -8
  89. package/src/imprint/mcp-compile-server.ts +43 -10
  90. package/src/imprint/mcp-maintenance.ts +9 -101
  91. package/src/imprint/mcp-server.ts +73 -7
  92. package/src/imprint/multi-progress.ts +7 -2
  93. package/src/imprint/param-grounding.ts +367 -0
  94. package/src/imprint/paths.ts +29 -0
  95. package/src/imprint/playbook-runner.ts +101 -40
  96. package/src/imprint/prereq-builder.ts +651 -0
  97. package/src/imprint/probe-backends.ts +6 -3
  98. package/src/imprint/record.ts +10 -1
  99. package/src/imprint/redact.ts +30 -2
  100. package/src/imprint/replay-capture.ts +19 -18
  101. package/src/imprint/runtime.ts +19 -10
  102. package/src/imprint/sensitive-keys.ts +141 -7
  103. package/src/imprint/session-diff.ts +79 -2
  104. package/src/imprint/session-merge.ts +9 -5
  105. package/src/imprint/stealth-chromium.ts +81 -0
  106. package/src/imprint/stealth-fetch.ts +309 -29
  107. package/src/imprint/stealth-token-cache.ts +88 -0
  108. package/src/imprint/teach-plan.ts +251 -0
  109. package/src/imprint/teach-state.ts +17 -0
  110. package/src/imprint/teach.ts +582 -147
  111. package/src/imprint/tool-candidates.ts +72 -14
  112. package/src/imprint/tool-plan.ts +313 -0
  113. package/src/imprint/tracing.ts +135 -6
  114. package/src/imprint/types.ts +61 -3
  115. package/examples/google-flights/search_google_flights/index.ts +0 -101
  116. package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
  117. package/examples/google-flights/search_google_flights/parser.ts +0 -189
  118. package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
  119. package/examples/google-flights/search_google_flights/workflow.json +0 -48
  120. package/examples/google-hotels/search_google_hotels/index.ts +0 -194
  121. package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
  122. package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
  123. package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
  124. package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
  125. package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
  126. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
  127. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
  128. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
  129. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
@@ -19,6 +19,7 @@ import {
19
19
  import { dirname, join as pathJoin } from 'node:path';
20
20
  import type { OnDeadlineReached } from './agent.ts';
21
21
  import { inferAppApiHosts } from './app-api-hosts.ts';
22
+ import type { SharedModuleManifestEntry } from './build-plan.ts';
22
23
  import { type CompileAgentProgress, compileAgent } from './compile-agent.ts';
23
24
  import { isSameRegistrableDomain, registrableDomain } from './etld.ts';
24
25
  import { type LLMOptions, extractJsonArray, resolveProvider } from './llm.ts';
@@ -82,6 +83,14 @@ interface GenerateOptions extends CompileOptions {
82
83
  classifications?: ClassifiedValue[];
83
84
  /** Credential values extracted during teach, passed to integration tests via env var. */
84
85
  teachCredentials?: { site: string; values: Record<string, string> };
86
+ /** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
87
+ buildPlanPath?: string;
88
+ /** Shared-module build manifest for this site (verified flags). */
89
+ sharedModules?: SharedModuleManifestEntry[];
90
+ /** Per-tool implementation plan (param→field mapping, request construction,
91
+ * response parsing, shared-module imports). Injected into the agent's initial
92
+ * message so the compile follows it. */
93
+ toolPlan?: string;
85
94
  }
86
95
 
87
96
  interface GenerateResult {
@@ -122,6 +131,9 @@ export async function generate(opts: GenerateOptions): Promise<GenerateResult> {
122
131
  sharedContext: opts.sharedContext,
123
132
  classifications: opts.classifications,
124
133
  teachCredentials: opts.teachCredentials,
134
+ buildPlanPath: opts.buildPlanPath,
135
+ sharedModules: opts.sharedModules,
136
+ toolPlan: opts.toolPlan,
125
137
  });
126
138
 
127
139
  setSpanAttributes(span, {
@@ -145,7 +157,7 @@ export async function generate(opts: GenerateOptions): Promise<GenerateResult> {
145
157
  ];
146
158
  if (result.outcome === 'timeout') {
147
159
  lines.push(
148
- 'hint: increase the timeout with --timeout (teach) or --max-duration (generate)',
160
+ 'hint: most complex tools take 10-15 minutes. increase the timeout with --timeout (teach) or --max-duration (generate)',
149
161
  );
150
162
  }
151
163
  throw new Error(lines.join('\n'));
@@ -274,7 +286,7 @@ const TRIAGE_RESOURCE_TYPES = new Set(['XHR', 'Fetch', 'Document']);
274
286
  const HEADER_TRUNCATE_LIMIT = 200;
275
287
  // Per-request body cap for triage. Triage only needs enough body to distinguish
276
288
  // data-bearing POSTs (search/booking) from telemetry; full bodies on a busy
277
- // site can total >1MB and blow the 200K-token cap on `claude-opus-4-7`.
289
+ // site can total >1MB and blow the 200K-token cap on `claude-opus-4-8`.
278
290
  const TRIAGE_BODY_LIMIT = 500;
279
291
 
280
292
  export interface TriageResult {
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Bounded-concurrency fan-out helpers shared across the teach pipeline.
3
+ *
4
+ * Lives in its own module (rather than teach.ts) so leaf modules like
5
+ * teach-plan.ts can reuse it without importing teach.ts, which would create an
6
+ * import cycle (teach.ts → teach-plan.ts → teach.ts). teach.ts re-exports both
7
+ * for backwards compatibility with existing callers + tests.
8
+ */
9
+
10
+ /** Run `fn` over `items` with at most `concurrency` in flight, preserving input
11
+ * order in the result. Throws the first error encountered (after in-flight work
12
+ * settles); use mapLimitSettled when you need per-item success/failure. */
13
+ export async function mapLimit<T, R>(
14
+ items: T[],
15
+ concurrency: number,
16
+ fn: (item: T) => Promise<R>,
17
+ ): Promise<R[]> {
18
+ const results = new Array<R>(items.length);
19
+ let next = 0;
20
+ let firstError: unknown;
21
+ const workers = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
22
+ while (next < items.length && firstError === undefined) {
23
+ const index = next++;
24
+ const item = items[index];
25
+ if (item === undefined) continue;
26
+ try {
27
+ results[index] = await fn(item);
28
+ } catch (err) {
29
+ firstError ??= err;
30
+ }
31
+ }
32
+ });
33
+ await Promise.allSettled(workers);
34
+ if (firstError !== undefined) throw firstError;
35
+ return results;
36
+ }
37
+
38
+ type SettledResult<R> = { ok: true; value: R } | { ok: false; error: unknown };
39
+
40
+ /** Like mapLimit, but never throws: each item resolves to a tagged
41
+ * success/failure entry, preserving input order. */
42
+ export async function mapLimitSettled<T, R>(
43
+ items: T[],
44
+ concurrency: number,
45
+ fn: (item: T) => Promise<R>,
46
+ ): Promise<SettledResult<R>[]> {
47
+ const results = new Array<SettledResult<R>>(items.length);
48
+ let next = 0;
49
+ const workers = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
50
+ while (next < items.length) {
51
+ const index = next++;
52
+ const item = items[index];
53
+ if (item === undefined) continue;
54
+ try {
55
+ results[index] = { ok: true, value: await fn(item) };
56
+ } catch (err) {
57
+ results[index] = { ok: false, error: err };
58
+ }
59
+ }
60
+ });
61
+ await Promise.allSettled(workers);
62
+ return results;
63
+ }
64
+
65
+ /** Error thrown by withTimeout when the deadline elapses before the work settles.
66
+ * A distinct class lets callers tell a timeout apart from a genuine failure. */
67
+ export class TimeoutError extends Error {
68
+ constructor(label: string, ms: number) {
69
+ super(`${label} exceeded ${Math.round(ms / 1000)}s timeout`);
70
+ this.name = 'TimeoutError';
71
+ }
72
+ }
73
+
74
+ /** Race a promise against a timeout. The underlying work (e.g. a CLI child) is
75
+ * NOT cancelled — the caller just stops awaiting it and decides how to degrade.
76
+ * Throws TimeoutError on timeout. */
77
+ export async function withTimeout<T>(work: Promise<T>, ms: number, label: string): Promise<T> {
78
+ let timer: ReturnType<typeof setTimeout> | undefined;
79
+ const timeout = new Promise<never>((_, reject) => {
80
+ timer = setTimeout(() => reject(new TimeoutError(label, ms)), ms);
81
+ });
82
+ try {
83
+ return await Promise.race([work, timeout]);
84
+ } finally {
85
+ if (timer) clearTimeout(timer);
86
+ }
87
+ }
@@ -12,14 +12,13 @@
12
12
  * value is visible and lets us confirm which form was the login form.
13
13
  */
14
14
 
15
- import { isSensitiveCredentialKey, normalizeKey } from './sensitive-keys.ts';
15
+ import { isSensitiveCredentialKey, isUsernameLikeKey } from './sensitive-keys.ts';
16
16
  import type { CapturedEvent, CapturedRequest, Session } from './types.ts';
17
17
 
18
- /** Field-name patterns we'll treat as the username/email partner of a
19
- * password field. Ordered by preference: emails first, then user-ish
20
- * identifiers. */
21
- const USERNAME_KEY_RE =
22
- /^(user(?:name|id)?|email(?:address)?|login(?:id)?|account|patron(?:number|id)?)$/i;
18
+ /** Predicate: this key looks like the username/email/login partner of a
19
+ * password field. Backed by `USERNAME_LIKE_KEYS` in sensitive-keys.ts so
20
+ * the dictionary stays in one place. */
21
+ const isUsernameKey = (key: string): boolean => isUsernameLikeKey(key);
23
22
 
24
23
  /** Where, within a request, a redactable value lives. */
25
24
  export type ReplacementLocation =
@@ -58,6 +57,29 @@ interface ExtractionResult {
58
57
  replacements: Replacement[];
59
58
  }
60
59
 
60
+ /** Parsers are tried in this order on every request that has a body. Each
61
+ * one is side-effect-free and returns `null` when its input doesn't fit
62
+ * its expected framing — so trying JSON first on a form body, or form on
63
+ * a JSON body, is safe: only the parser that actually fits will produce a
64
+ * finding.
65
+ *
66
+ * Dispatch is parser-driven, not Content-Type-driven, because real sites
67
+ * routinely mislabel their bodies — the canonical example is the Nextep
68
+ * cafe API (`Content-Type: text/plain` for JSON bodies). Letting the data
69
+ * speak for itself prevents whole classes of silent extraction failures.
70
+ *
71
+ * URL-query parsing runs even on requests without a body (e.g. GET-based
72
+ * logins that pass credentials in the query string). Multipart is checked
73
+ * before generic form-urlencoded because a multipart body still contains
74
+ * `=` characters and would be parsed as a single malformed form pair
75
+ * otherwise. */
76
+ const BODY_PARSERS: Array<(r: CapturedRequest) => BodyFinding | null> = [
77
+ findInJsonBody,
78
+ findInJsonWrappedInForm,
79
+ findInMultipartBody,
80
+ findInFormBody,
81
+ ];
82
+
61
83
  /** Top-level entry point. */
62
84
  export function extractCredentials(session: Session): ExtractionResult {
63
85
  const findings: CredentialFinding[] = [];
@@ -65,13 +87,17 @@ export function extractCredentials(session: Session): ExtractionResult {
65
87
  const usernamesInDom = collectFormSubmitUsernames(session.events);
66
88
 
67
89
  for (const req of session.requests) {
68
- if (!req.body) continue;
69
- const ct = (req.headers['content-type'] ?? req.headers['Content-Type'] ?? '').toLowerCase();
70
- const found = ct.includes('json')
71
- ? findInJsonBody(req)
72
- : ct.includes('urlencoded') || req.body.includes('=')
73
- ? findInFormBody(req)
74
- : null;
90
+ let found: BodyFinding | null = null;
91
+ if (req.body) {
92
+ for (const parse of BODY_PARSERS) {
93
+ found = parse(req);
94
+ if (found) break;
95
+ }
96
+ }
97
+ // Last-resort: credentials in the URL query string (rare but real for
98
+ // some legacy GET-based login endpoints). Tried after body parsers so
99
+ // body-based logins always win when both are present.
100
+ if (!found) found = findInUrlQuery(req);
75
101
  if (!found) continue;
76
102
 
77
103
  const confirmedByDom = usernamesInDom.has(found.usernameValue);
@@ -132,7 +158,7 @@ function findInFormBody(req: CapturedRequest): BodyFinding | null {
132
158
 
133
159
  // Second pass: find a username-like key.
134
160
  for (const { key, value } of pairs) {
135
- if (USERNAME_KEY_RE.test(normalizeKey(key)) && value.length > 0) {
161
+ if (isUsernameKey(key) && value.length > 0) {
136
162
  usernameKey = key;
137
163
  usernameValue = value;
138
164
  break;
@@ -163,11 +189,7 @@ function findInJsonBody(req: CapturedRequest): BodyFinding | null {
163
189
  if (typeof pwdHit.value !== 'string' || pwdHit.value.length === 0) return null;
164
190
 
165
191
  // Look for a username-like key; prefer one in the same parent object.
166
- const userHit = findFirstByPredicate(
167
- parsed,
168
- (k) => USERNAME_KEY_RE.test(normalizeKey(k)),
169
- pwdHit.parent,
170
- );
192
+ const userHit = findFirstByPredicate(parsed, isUsernameKey, pwdHit.parent);
171
193
  if (!userHit || typeof userHit.value !== 'string' || userHit.value.length === 0) return null;
172
194
 
173
195
  return {
@@ -178,6 +200,138 @@ function findInJsonBody(req: CapturedRequest): BodyFinding | null {
178
200
  };
179
201
  }
180
202
 
203
+ /** Handles legacy framings where a JSON document is the value of a single
204
+ * form-encoded field — `payload={"username":"…","password":"…"}` or
205
+ * `data=…` or `request=…`. Real PHP / ColdFusion apps do this. We delegate
206
+ * the inner pairing to findInJsonBody by synthesizing a child request, and
207
+ * re-encode the path as `body-form` so the redactor knows to swap the
208
+ * whole inner JSON string back in. */
209
+ function findInJsonWrappedInForm(req: CapturedRequest): BodyFinding | null {
210
+ if (!req.body) return null;
211
+ const pairs = parseFormBody(req.body);
212
+ if (pairs.length === 0) return null;
213
+
214
+ const WRAPPER_KEYS = new Set(['payload', 'data', 'request', 'json', 'body']);
215
+ for (const { key, value } of pairs) {
216
+ if (!WRAPPER_KEYS.has(key.toLowerCase())) continue;
217
+ if (!value.startsWith('{') && !value.startsWith('[')) continue;
218
+ // Build a synthetic request with the unwrapped JSON as body.
219
+ const inner: CapturedRequest = { ...req, body: value };
220
+ const found = findInJsonBody(inner);
221
+ if (!found) continue;
222
+ // Project the JSON paths back into form-key terms — the redactor
223
+ // matches on `originalValue` regardless of `location`, but we keep the
224
+ // location semantically correct so future readers aren't confused.
225
+ return {
226
+ ...found,
227
+ usernameLocation: { kind: 'body-form', key },
228
+ passwordLocation: { kind: 'body-form', key },
229
+ };
230
+ }
231
+ return null;
232
+ }
233
+
234
+ /** Parse a multipart/form-data body into {key, value} pairs and pair like
235
+ * the form-urlencoded path. Defensive: any malformed part is skipped.
236
+ *
237
+ * We sniff the boundary from the first line (`--<boundary>`) rather than
238
+ * trusting the Content-Type header, because the whole point of this
239
+ * module is to not trust Content-Type. */
240
+ function findInMultipartBody(req: CapturedRequest): BodyFinding | null {
241
+ if (!req.body) return null;
242
+ const body = req.body;
243
+ // First line should be `--<boundary>`. If it doesn't start with `--` or
244
+ // there's no following newline, this isn't multipart.
245
+ const firstNewline = body.indexOf('\n');
246
+ if (firstNewline < 0) return null;
247
+ const firstLine = body.slice(0, firstNewline).trimEnd();
248
+ if (!firstLine.startsWith('--')) return null;
249
+ const boundary = firstLine.slice(2);
250
+ if (boundary.length === 0 || boundary.length > 200) return null;
251
+ // Split on the boundary; skip the prologue (empty before first boundary)
252
+ // and the epilogue (after closing `--<boundary>--`).
253
+ const sep = `--${boundary}`;
254
+ const parts = body.split(sep).slice(1);
255
+ const pairs: Array<{ key: string; value: string }> = [];
256
+ for (const partRaw of parts) {
257
+ const part = partRaw.startsWith('\r\n')
258
+ ? partRaw.slice(2)
259
+ : partRaw.startsWith('\n')
260
+ ? partRaw.slice(1)
261
+ : partRaw;
262
+ if (part.startsWith('--')) break; // closing boundary
263
+ // Headers and body are separated by a blank line.
264
+ const headerEnd = part.indexOf('\r\n\r\n');
265
+ const headerEnd2 = headerEnd >= 0 ? headerEnd : part.indexOf('\n\n');
266
+ if (headerEnd2 < 0) continue;
267
+ const sepLen = headerEnd >= 0 ? 4 : 2;
268
+ const headers = part.slice(0, headerEnd2);
269
+ let value = part.slice(headerEnd2 + sepLen);
270
+ // Strip the trailing CRLF that precedes the next boundary.
271
+ value = value.replace(/\r?\n$/, '');
272
+ const nameMatch = headers.match(/name="([^"]*)"/i);
273
+ if (!nameMatch) continue;
274
+ const key = nameMatch[1] ?? '';
275
+ if (!key) continue;
276
+ pairs.push({ key, value });
277
+ }
278
+ if (pairs.length === 0) return null;
279
+ return pairFromKeyValuePairs(pairs, 'body-form');
280
+ }
281
+
282
+ /** Credentials in the URL query string — `GET /login?username=…&password=…`
283
+ * or a POST whose body is empty but credentials ride in the URL. Rare but
284
+ * real for some legacy CGI endpoints. */
285
+ function findInUrlQuery(req: CapturedRequest): BodyFinding | null {
286
+ let qs: string;
287
+ try {
288
+ const u = new URL(req.url);
289
+ qs = u.search.startsWith('?') ? u.search.slice(1) : u.search;
290
+ } catch {
291
+ return null;
292
+ }
293
+ if (!qs) return null;
294
+ const pairs = parseFormBody(qs);
295
+ if (pairs.length === 0) return null;
296
+ return pairFromKeyValuePairs(pairs, 'body-form');
297
+ }
298
+
299
+ /** Shared pairing: given key/value pairs, find a password partner and a
300
+ * username partner. Returns a BodyFinding or null. Used by every parser
301
+ * that flattens its input into key/value pairs (form, multipart, URL
302
+ * query). The `location.kind` argument is passed through unchanged. */
303
+ function pairFromKeyValuePairs(
304
+ pairs: Array<{ key: string; value: string }>,
305
+ kind: 'body-form',
306
+ ): BodyFinding | null {
307
+ let passwordKey: string | null = null;
308
+ let passwordValue: string | null = null;
309
+ for (const { key, value } of pairs) {
310
+ if (isSensitiveCredentialKey(key) && value.length > 0) {
311
+ passwordKey = key;
312
+ passwordValue = value;
313
+ break;
314
+ }
315
+ }
316
+ if (passwordKey === null || passwordValue === null) return null;
317
+ let usernameKey: string | null = null;
318
+ let usernameValue: string | null = null;
319
+ for (const { key, value } of pairs) {
320
+ if (isUsernameKey(key) && value.length > 0) {
321
+ usernameKey = key;
322
+ usernameValue = value;
323
+ break;
324
+ }
325
+ }
326
+ if (usernameKey === null || usernameValue === null) return null;
327
+ return {
328
+ usernameValue,
329
+ passwordValue,
330
+ usernameLocation: { kind, key: usernameKey },
331
+ passwordLocation: { kind, key: passwordKey },
332
+ };
333
+ }
334
+
181
335
  interface JsonHit {
182
336
  key: string;
183
337
  value: unknown;
@@ -238,12 +392,7 @@ function collectFormSubmitUsernames(events: CapturedEvent[]): Set<string> {
238
392
  fields?: Array<{ name?: string; type?: string; value?: string }>;
239
393
  };
240
394
  for (const f of detail.fields ?? []) {
241
- if (
242
- f.name &&
243
- f.value &&
244
- f.type !== 'password' &&
245
- USERNAME_KEY_RE.test(normalizeKey(f.name))
246
- ) {
395
+ if (f.name && f.value && f.type !== 'password' && isUsernameKey(f.name)) {
247
396
  out.add(f.value);
248
397
  }
249
398
  }
@@ -242,6 +242,7 @@ async function runCronImpl(opts: RunCronOptions): Promise<void> {
242
242
  if (
243
243
  ladder.includes('fetch') ||
244
244
  ladder.includes('fetch-bootstrap') ||
245
+ ladder.includes('cdp-replay') ||
245
246
  ladder.includes('stealth-fetch')
246
247
  ) {
247
248
  const validator = buildZodValidator(tool.workflow.parameters);
@@ -1,6 +1,7 @@
1
1
  /** `imprint doctor` — check that the environment can actually run imprint.
2
2
  * Reports pass/fail per prerequisite plus a one-line fix when failed. */
3
3
 
4
+ import { spawnSync } from 'node:child_process';
4
5
  import { existsSync, readFileSync, readdirSync } from 'node:fs';
5
6
  import { homedir } from 'node:os';
6
7
  import { join as pathJoin } from 'node:path';
@@ -20,6 +21,7 @@ export function doctor(): CheckResult[] {
20
21
  checkBun(),
21
22
  checkChromium(),
22
23
  checkPlaywrightChromium(),
24
+ checkVirtualDisplay(),
23
25
  checkLLMProvider(),
24
26
  checkPushOptional(),
25
27
  checkClaudeCode(),
@@ -87,6 +89,43 @@ function checkPlaywrightChromium(): CheckResult {
87
89
  };
88
90
  }
89
91
 
92
+ function hasXvfbBinary(): boolean {
93
+ try {
94
+ return spawnSync('sh', ['-c', 'command -v Xvfb'], { stdio: 'ignore' }).status === 0;
95
+ } catch {
96
+ return false;
97
+ }
98
+ }
99
+
100
+ /** The trusted-browser replay (playbook rung's cdp-browser transport) runs Chrome
101
+ * HEADLESS by default and needs NO display — the `HeadlessChrome` UA token is
102
+ * stripped so anti-bot services don't edge-block it. A display only matters as a
103
+ * fallback on a GPU-less Linux host, where headless WebGL reports SwiftShader and
104
+ * the replay must run HEADED under Xvfb (launchChromium auto-starts it when a
105
+ * headed launch finds no `$DISPLAY`). macOS/Windows need nothing. Advisory only. */
106
+ function checkVirtualDisplay(): CheckResult {
107
+ const name = 'Display (headed replay)';
108
+ if (process.platform !== 'linux') {
109
+ return { name, ok: true, detail: `${process.platform}: native window server (no Xvfb needed)` };
110
+ }
111
+ const display = process.env.DISPLAY;
112
+ if (display) return { name, ok: true, detail: `$DISPLAY=${display}` };
113
+ if (hasXvfbBinary()) {
114
+ return {
115
+ name,
116
+ ok: true,
117
+ detail: 'no $DISPLAY; Xvfb present — headed-replay fallback available for GPU-less hosts',
118
+ };
119
+ }
120
+ return {
121
+ name,
122
+ ok: true, // advisory — default replay is headless; Xvfb is only a GPU-less fallback
123
+ detail:
124
+ 'Linux, no $DISPLAY and no Xvfb — default replay is headless (fine); install Xvfb only if a GPU-less host gets bot-flagged',
125
+ fix: 'GPU-less host bot-flagged? install the headed-replay fallback: apt-get install xvfb (or export DISPLAY=:0)',
126
+ };
127
+ }
128
+
90
129
  function checkLLMProvider(): CheckResult {
91
130
  const statuses = getProviderStatuses();
92
131
  const detected = statuses.filter((s) => s.detected);
@@ -5,6 +5,7 @@ import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
5
5
  import { basename, dirname, join as pathJoin, resolve as pathResolve } from 'node:path';
6
6
  import { loadJsonFile } from './load-json.ts';
7
7
  import { ensureImprintRuntimeLink } from './runtime-link.ts';
8
+ import { isLoginFieldKey } from './sensitive-keys.ts';
8
9
  import { type Workflow, WorkflowSchema } from './types.ts';
9
10
 
10
11
  interface EmitOptions {
@@ -36,6 +37,8 @@ export function emit(opts: EmitOptions): EmitResult {
36
37
  'workflow.json',
37
38
  );
38
39
 
40
+ assertNoCredentialShapedParams(workflow);
41
+
39
42
  const outDir = opts.outDir ?? defaultOutDir(opts.workflowPath, workflow);
40
43
 
41
44
  mkdirSync(outDir, { recursive: true });
@@ -137,6 +140,88 @@ export { WORKFLOW };
137
140
  `;
138
141
  }
139
142
 
143
+ /** Pre-emit guardrail: refuse to write a workflow whose parameters look
144
+ * like login credentials (`password`, `userid`, `email`, etc., per the
145
+ * shared dictionary in sensitive-keys.ts) but are templated as plain
146
+ * `${param.X}` instead of credential-store references like
147
+ * `${credential.X}`.
148
+ *
149
+ * This catches the failure mode where upstream credential extraction
150
+ * silently failed (e.g. unusual Content-Type, body framing the parser
151
+ * didn't recognise, declined credential-save prompt), so the compile
152
+ * agent had no credential anchor and chose to model the login fields as
153
+ * ordinary callable parameters. The resulting MCP tool would advertise
154
+ * `userid`/`password` as required inputs, forward whatever the caller
155
+ * passed verbatim, and (most often) silently produce empty results when
156
+ * the caller passed empty strings.
157
+ *
158
+ * We require either:
159
+ * - The parameter isn't credential-shaped, OR
160
+ * - The body template references `${credential.<name>}` (or another
161
+ * `credential.*` reference), in which case the workflow is pulling
162
+ * from the credential store and the `${param.X}` parameter is
163
+ * effectively a no-op the user can safely ignore.
164
+ *
165
+ * Throws with the remediation steps the user needs to take. */
166
+ function assertNoCredentialShapedParams(workflow: Workflow): void {
167
+ const offenders: Array<{ name: string; matches: string[] }> = [];
168
+ for (const param of workflow.parameters) {
169
+ if (!isLoginFieldKey(param.name)) continue;
170
+ const paramRef = `\${param.${param.name}}`;
171
+ const credentialRef = `\${credential.${param.name}}`;
172
+ const requestsUsingParam: string[] = [];
173
+ let coveredByCredentialRef = false;
174
+ for (let i = 0; i < workflow.requests.length; i++) {
175
+ const req = workflow.requests[i];
176
+ if (!req) continue;
177
+ const haystack = `${req.url} ${req.body ?? ''} ${Object.values(req.headers).join(' ')}`;
178
+ if (haystack.includes(credentialRef)) {
179
+ coveredByCredentialRef = true;
180
+ }
181
+ if (haystack.includes(paramRef)) {
182
+ requestsUsingParam.push(`requests[${i}] (${req.method} ${req.url})`);
183
+ }
184
+ }
185
+ // Only flag if the body templates the param and there's no parallel
186
+ // credential reference. A workflow that uses both `${param.X}` and
187
+ // `${credential.X}` is suspicious but not necessarily broken — leave
188
+ // it to the user. The dangerous case is `${param.X}` alone.
189
+ if (requestsUsingParam.length > 0 && !coveredByCredentialRef) {
190
+ offenders.push({ name: param.name, matches: requestsUsingParam });
191
+ }
192
+ }
193
+ if (offenders.length === 0) return;
194
+
195
+ const lines = [
196
+ `Workflow ${JSON.stringify(workflow.toolName)} declares ${offenders.length} credential-shaped parameter(s) that are templated as plain \`\${param.X}\` instead of \`\${credential.X}\`:`,
197
+ '',
198
+ ];
199
+ for (const o of offenders) {
200
+ lines.push(` • parameter \`${o.name}\` — used in:`);
201
+ for (const m of o.matches) lines.push(` - ${m}`);
202
+ }
203
+ lines.push(
204
+ '',
205
+ 'Credentials MUST be pulled from the credential store via `${credential.<name>}`, never modelled as plain workflow parameters.',
206
+ "This usually means the redact stage failed to extract a username+password pair from the recorded login request — common causes include unusual Content-Type headers, multipart bodies, or login fields the extractor dictionary doesn't yet cover.",
207
+ '',
208
+ 'To fix:',
209
+ ` 1. Delete the redacted session: rm ${workflowToolHint(workflow)}/sessions/*.redacted.json (or the relevant one)`,
210
+ ` 2. Re-run from the redact stage: imprint teach ${workflow.site} --from redact`,
211
+ ' 3. Accept the "Save credentials for site to the credential manager?" prompt this time.',
212
+ ' 4. Let teach continue through generate → compile-playbook → emit.',
213
+ '',
214
+ "If the prompt does NOT appear during step 3, the extractor still cannot pair this site's login fields — please file a bug attaching the (redacted!) session.",
215
+ );
216
+ throw new Error(lines.join('\n'));
217
+ }
218
+
219
+ /** Pretty path hint for the error message above. We don't have IMPRINT_HOME
220
+ * in scope and don't need it — `~/.imprint/<site>` is the convention. */
221
+ function workflowToolHint(workflow: Workflow): string {
222
+ return `~/.imprint/${workflow.site}`;
223
+ }
224
+
140
225
  function pascalCase(s: string): string {
141
226
  return s
142
227
  .split(/[_-]+/)
@@ -73,10 +73,11 @@ const FREEFORM_POLICIES: PolicyName[] = [
73
73
  Policies.PGP_PRIVATE_KEY,
74
74
  Policies.PASSWORD_ASSIGNMENT,
75
75
  Policies.ENVIRONMENT_VARIABLE_SECRET,
76
- Policies.GENERIC_PASSWORD,
77
- Policies.GENERIC_TOKEN,
78
- Policies.GENERIC_CREDENTIAL,
79
- Policies.GENERIC_SECRET,
76
+ // NOTE: the GENERIC_* catch-alls (GENERIC_PASSWORD/TOKEN/CREDENTIAL/SECRET) are
77
+ // intentionally omitted — they match on value shape alone and fire on benign
78
+ // data (e.g. `id=1234567890`), corrupting/over-redacting structured payloads.
79
+ // Real secrets are still covered by the keyword-anchored and specific policies
80
+ // above and below (PASSWORD_ASSIGNMENT, OAUTH_*, private keys, cloud tokens, PII).
80
81
  Policies.OAUTH_CLIENT_SECRET,
81
82
  Policies.OAUTH_REFRESH_TOKEN,
82
83
  Policies.OAUTH_ACCESS_TOKEN,
@@ -87,7 +87,7 @@ export function generatePasteSnippet(opts: {
87
87
 
88
88
  switch (platform) {
89
89
  case 'claude-code':
90
- return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}. The backend ladder handles browser/API state and bot detection automatically (fetch → gated fetch-bootstrap → stealth-fetch → playbook).`;
90
+ return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}. The backend ladder handles browser/API state and bot detection automatically (fetch → gated fetch-bootstrap → cdp-replay → stealth-fetch → playbook).`;
91
91
 
92
92
  case 'codex':
93
93
  return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}.`;
@@ -352,7 +352,7 @@ ${yamlStringify(p, { lineWidth: 0 }).trim()}
352
352
  // Backend ladder explanation.
353
353
  const backendBlock = `## Backend Ladder
354
354
 
355
- The MCP server automatically escalates from fetch API replay to gated fetch-bootstrap when browser-minted state is declared, then stealth-fetch for bot-defense state, then playbook for full DOM replay.
355
+ The MCP server automatically escalates from fetch API replay to gated fetch-bootstrap when browser-minted state is declared, then cdp-replay (API requests run inside a live trusted Chrome so a protected POST refreshes its anti-bot token between calls), then stealth-fetch for bot-defense state, then playbook for full DOM replay.
356
356
  Bot detection is handled transparently.`;
357
357
 
358
358
  // Scheduling block (optional).