imprint-mcp 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/README.md +193 -189
  2. package/examples/discoverandgo/README.md +1 -1
  3. package/examples/echo/README.md +1 -1
  4. package/examples/google-flights/README.md +28 -0
  5. package/examples/google-flights/_shared/batchexecute.ts +63 -0
  6. package/examples/google-flights/_shared/flights_request.ts +95 -0
  7. package/examples/google-flights/_shared/package.json +9 -0
  8. package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
  9. package/examples/google-flights/get_flight_booking_details/package.json +9 -0
  10. package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
  11. package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
  12. package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
  13. package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
  14. package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
  15. package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
  16. package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
  17. package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
  18. package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
  19. package/examples/google-flights/get_flight_calendar_prices/workflow.json +78 -0
  20. package/examples/google-flights/lookup_airport/index.ts +101 -0
  21. package/examples/google-flights/lookup_airport/package.json +9 -0
  22. package/examples/google-flights/lookup_airport/parser.ts +66 -0
  23. package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
  24. package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
  25. package/examples/google-flights/lookup_airport/workflow.json +57 -0
  26. package/examples/google-flights/search_flights/index.ts +219 -0
  27. package/examples/google-flights/search_flights/package.json +9 -0
  28. package/examples/google-flights/search_flights/parser.ts +169 -0
  29. package/examples/google-flights/search_flights/playbook.yaml +184 -0
  30. package/examples/google-flights/search_flights/request-transform.ts +119 -0
  31. package/examples/google-flights/search_flights/workflow.json +143 -0
  32. package/examples/google-hotels/README.md +29 -0
  33. package/examples/google-hotels/_shared/batchexecute.ts +73 -0
  34. package/examples/google-hotels/_shared/freq.ts +158 -0
  35. package/examples/google-hotels/_shared/package.json +9 -0
  36. package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
  37. package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
  38. package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
  39. package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
  40. package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
  41. package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
  42. package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
  43. package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
  44. package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
  45. package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
  46. package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
  47. package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
  48. package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
  49. package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
  50. package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
  51. package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
  52. package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
  53. package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
  54. package/examples/google-hotels/search_hotels/index.ts +207 -0
  55. package/examples/google-hotels/search_hotels/package.json +9 -0
  56. package/examples/google-hotels/search_hotels/parser.ts +260 -0
  57. package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
  58. package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
  59. package/examples/google-hotels/search_hotels/workflow.json +127 -0
  60. package/examples/southwest/README.md +3 -2
  61. package/examples/southwest/search_southwest_flights/index.ts +18 -1
  62. package/examples/southwest/search_southwest_flights/workflow.json +18 -1
  63. package/package.json +3 -2
  64. package/prompts/audit-agent.md +71 -0
  65. package/prompts/build-planning.md +74 -0
  66. package/prompts/compile-agent.md +131 -27
  67. package/prompts/prereq-builder.md +64 -0
  68. package/prompts/prereq-planner.md +34 -0
  69. package/prompts/tool-planning.md +39 -0
  70. package/src/cli.ts +116 -3
  71. package/src/imprint/agent.ts +5 -0
  72. package/src/imprint/audit.ts +996 -0
  73. package/src/imprint/backend-ladder.ts +1214 -184
  74. package/src/imprint/build-plan.ts +1051 -0
  75. package/src/imprint/cdp-browser-fetch.ts +592 -0
  76. package/src/imprint/cdp-jar-cache.ts +320 -0
  77. package/src/imprint/chromium.ts +414 -8
  78. package/src/imprint/claude-cli-compile.ts +125 -25
  79. package/src/imprint/codex-cli-compile.ts +26 -23
  80. package/src/imprint/compile-agent-types.ts +38 -0
  81. package/src/imprint/compile-agent.ts +63 -25
  82. package/src/imprint/compile-tools.ts +1666 -66
  83. package/src/imprint/compile.ts +13 -1
  84. package/src/imprint/concurrency.ts +87 -0
  85. package/src/imprint/cron.ts +4 -0
  86. package/src/imprint/doctor.ts +48 -3
  87. package/src/imprint/freeform-redact.ts +5 -4
  88. package/src/imprint/install.ts +79 -4
  89. package/src/imprint/integrations.ts +3 -3
  90. package/src/imprint/llm.ts +56 -8
  91. package/src/imprint/mcp-compile-server.ts +43 -10
  92. package/src/imprint/mcp-maintenance.ts +18 -102
  93. package/src/imprint/mcp-server.ts +73 -7
  94. package/src/imprint/multi-progress.ts +7 -2
  95. package/src/imprint/param-grounding.ts +367 -0
  96. package/src/imprint/paths.ts +29 -0
  97. package/src/imprint/playbook-runner.ts +101 -40
  98. package/src/imprint/prereq-builder.ts +651 -0
  99. package/src/imprint/probe-backends.ts +6 -3
  100. package/src/imprint/record.ts +10 -1
  101. package/src/imprint/redact.ts +30 -2
  102. package/src/imprint/replay-capture.ts +19 -18
  103. package/src/imprint/runtime.ts +19 -10
  104. package/src/imprint/session-diff.ts +79 -2
  105. package/src/imprint/session-merge.ts +9 -5
  106. package/src/imprint/stealth-chromium.ts +79 -0
  107. package/src/imprint/stealth-fetch.ts +309 -29
  108. package/src/imprint/stealth-token-cache.ts +88 -0
  109. package/src/imprint/teach-plan.ts +251 -0
  110. package/src/imprint/teach-state.ts +10 -0
  111. package/src/imprint/teach.ts +456 -142
  112. package/src/imprint/tool-candidates.ts +72 -14
  113. package/src/imprint/tool-plan.ts +313 -0
  114. package/src/imprint/tracing.ts +135 -6
  115. package/src/imprint/types.ts +61 -3
  116. package/examples/google-flights/search_google_flights/index.ts +0 -101
  117. package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
  118. package/examples/google-flights/search_google_flights/parser.ts +0 -189
  119. package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
  120. package/examples/google-flights/search_google_flights/workflow.json +0 -48
  121. package/examples/google-hotels/search_google_hotels/index.ts +0 -194
  122. package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
  123. package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
  124. package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
  125. package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
  126. package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
  127. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
  128. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
  129. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
  130. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
@@ -0,0 +1,367 @@
1
+ /**
2
+ * Event-correlated differential param grounding.
3
+ *
4
+ * The candidate detector reliably identifies WHICH inputs the user controlled
5
+ * (`likelyParams`) and WHICH events toggled them (`eventSeqs`) — but the compile
6
+ * agent historically grounded a param by eyeballing a single request, and when
7
+ * the value wasn't obviously present it gave up and shipped the param
8
+ * `verified:false`, inert. Yet the encoding is almost always right there: the
9
+ * request a filter-toggle event triggers differs from the prior equivalent
10
+ * request at exactly the position that param controls.
11
+ *
12
+ * This module makes that differential deterministic and site-agnostic: for each
13
+ * UI event, find the request it triggered, diff it against the most recent
14
+ * comparable request (same endpoint), and report the changed paths. The compile
15
+ * agent (and the precomputed hint surfaced to it) then maps each diff to a
16
+ * `likelyParam` — the semantic step the model is good at — instead of guessing
17
+ * at an encoding. Decoding is generic (JSON body, an `f.req=`-embedded JSON
18
+ * envelope as used by Google's batchexecute, or plain form fields), so this is
19
+ * not specific to any one site.
20
+ */
21
+
22
+ import type { CapturedRequest, Session } from './types.ts';
23
+
24
+ interface GroundingChange {
25
+ /** JSON path into the decoded request body, e.g. "[1][4][3]". */
26
+ path: string;
27
+ before: string;
28
+ after: string;
29
+ }
30
+
31
+ interface EventGrounding {
32
+ eventSeq: number;
33
+ /** Human label from the event detail (button text / aria-label / id). */
34
+ label: string;
35
+ /** The request the event triggered (first comparable request after it). */
36
+ triggeredSeq?: number;
37
+ /** The prior request of the same endpoint that the diff is taken against. */
38
+ priorSeq?: number;
39
+ endpoint?: string;
40
+ changes: GroundingChange[];
41
+ }
42
+
43
+ /** First request after `eventSeq`, within a window, that has a decodable body. */
44
+ const TRIGGER_WINDOW = 12;
45
+
46
+ /** Decode a request body into a comparable structure. Handles, in order:
47
+ * a raw JSON body; an `f.req=<json>` form field whose value is a JSON envelope
48
+ * (batchexecute) — unwrapping `[[["rpcid","<inner-json-string>",…]]]` to the
49
+ * inner payload when present; otherwise a flat form-field map; else the raw
50
+ * string. Never throws. */
51
+ export function decodeBodyForDiff(body: string | undefined): unknown {
52
+ if (!body) return undefined;
53
+ const trimmed = body.trim();
54
+ if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
55
+ try {
56
+ return JSON.parse(trimmed);
57
+ } catch {
58
+ /* not JSON */
59
+ }
60
+ }
61
+ // form-encoded?
62
+ if (/(^|&)[\w.]+=/.test(trimmed)) {
63
+ const params = new URLSearchParams(trimmed);
64
+ const freq = params.get('f.req');
65
+ if (freq != null) {
66
+ try {
67
+ const env = JSON.parse(freq);
68
+ // batchexecute envelope: [[["rpcid","<inner json string>", …]]]
69
+ const innerStr = env?.[0]?.[0]?.[1];
70
+ if (typeof innerStr === 'string') {
71
+ try {
72
+ return JSON.parse(innerStr);
73
+ } catch {
74
+ return env;
75
+ }
76
+ }
77
+ return env;
78
+ } catch {
79
+ /* f.req not JSON */
80
+ }
81
+ }
82
+ const out: Record<string, string> = {};
83
+ for (const [k, v] of params) out[k] = v;
84
+ return out;
85
+ }
86
+ return trimmed;
87
+ }
88
+
89
+ /** Deep structural diff → changed leaf paths (a→b). Identical subtrees are
90
+ * skipped via a cheap stringify equality check. */
91
+ export function structuralDiff(
92
+ a: unknown,
93
+ b: unknown,
94
+ path = '',
95
+ out: GroundingChange[] = [],
96
+ ): GroundingChange[] {
97
+ if (JSON.stringify(a) === JSON.stringify(b)) return out;
98
+ if (Array.isArray(a) && Array.isArray(b)) {
99
+ const n = Math.max(a.length, b.length);
100
+ for (let i = 0; i < n; i++) structuralDiff(a[i], b[i], `${path}[${i}]`, out);
101
+ return out;
102
+ }
103
+ if (a && b && typeof a === 'object' && typeof b === 'object') {
104
+ const keys = new Set([...Object.keys(a as object), ...Object.keys(b as object)]);
105
+ for (const k of keys) {
106
+ structuralDiff(
107
+ (a as Record<string, unknown>)[k],
108
+ (b as Record<string, unknown>)[k],
109
+ path ? `${path}.${k}` : k,
110
+ out,
111
+ );
112
+ }
113
+ return out;
114
+ }
115
+ const cap = (v: unknown) => {
116
+ const s = v === undefined ? 'undefined' : JSON.stringify(v);
117
+ return s.length > 48 ? `${s.slice(0, 48)}…` : s;
118
+ };
119
+ out.push({ path: path || '(root)', before: cap(a), after: cap(b) });
120
+ return out;
121
+ }
122
+
123
+ /** A stable key grouping "comparable" requests: the batchexecute rpcid when
124
+ * present, else METHOD + URL path (query stripped). */
125
+ function endpointKey(req: CapturedRequest): string {
126
+ const url = req.url ?? '';
127
+ // Accept both `rpcids=` (Google batchexecute, plural) and a singular `rpcid=`
128
+ // in the URL query, matching tool-candidates' endpoint-family keying — so a
129
+ // batchexecute-style endpoint never collapses distinct rpcs to one path key.
130
+ const rpc = /[?&]rpcids?=([^&]+)/.exec(url);
131
+ if (rpc) return `rpc:${decodeURIComponent(rpc[1] ?? '')}`;
132
+ try {
133
+ const u = new URL(url);
134
+ return `${req.method ?? 'GET'} ${u.pathname}`;
135
+ } catch {
136
+ return `${req.method ?? 'GET'} ${url.split('?')[0]}`;
137
+ }
138
+ }
139
+
140
+ function bodyOf(req: CapturedRequest): string | undefined {
141
+ // CapturedRequest stores the request body on `.body`; tolerate alt shapes.
142
+ return (
143
+ (req as unknown as { body?: string }).body ??
144
+ (req as unknown as { requestBody?: string }).requestBody ??
145
+ undefined
146
+ );
147
+ }
148
+
149
+ function eventLabel(detail: string): string {
150
+ let d: Record<string, unknown> = {};
151
+ try {
152
+ d = JSON.parse(detail);
153
+ } catch {
154
+ return detail.slice(0, 48);
155
+ }
156
+ const txt = (d.text ?? d.ariaLabel ?? d.name ?? d.id ?? '') as string;
157
+ return String(txt).replace(/\s+/g, ' ').trim().slice(0, 48);
158
+ }
159
+
160
+ /** Telemetry/beacon endpoints that fire constantly and are never the tool's
161
+ * load-bearing request — excluded when we can't scope to the candidate's own
162
+ * endpoints. */
163
+ const TELEMETRY = /\/(log|gen_204|jserror|ping|beacon|csi|_\/bscframe|metrics|stats)\b/i;
164
+
165
+ /** A decoded body worth diffing: a structured array/object, not a raw (often
166
+ * gzipped/opaque) string. */
167
+ function isStructured(v: unknown): boolean {
168
+ return v != null && typeof v === 'object';
169
+ }
170
+
171
+ /** Ground a single event: find the request it triggered and diff against the
172
+ * most recent prior request of the same endpoint.
173
+ *
174
+ * `relevantEndpoints` (the candidate's own request endpoints, via endpointKey)
175
+ * scopes the search to the tool's load-bearing requests — without it a burst of
176
+ * telemetry POSTs between the click and the real request would be mistaken for
177
+ * the trigger. */
178
+ export function groundEvent(
179
+ session: Session,
180
+ eventSeq: number,
181
+ relevantEndpoints?: Set<string>,
182
+ ): EventGrounding {
183
+ const reqs = [...session.requests].sort((a, b) => a.seq - b.seq);
184
+ const ev = session.events.find((e) => e.seq === eventSeq);
185
+ const label = ev ? eventLabel(ev.detail) : '';
186
+
187
+ const triggered = reqs.find((r) => {
188
+ if (r.seq <= eventSeq || r.seq > eventSeq + windowEnd(reqs, eventSeq)) return false;
189
+ const decoded = decodeBodyForDiff(bodyOf(r));
190
+ if (decoded === undefined) return false;
191
+ if (relevantEndpoints && relevantEndpoints.size > 0)
192
+ return relevantEndpoints.has(endpointKey(r));
193
+ // Fallback: structured body + not an obvious telemetry endpoint.
194
+ return isStructured(decoded) && !TELEMETRY.test(r.url ?? '');
195
+ });
196
+ if (!triggered) return { eventSeq, label, changes: [] };
197
+
198
+ const key = endpointKey(triggered);
199
+ const prior = [...reqs]
200
+ .reverse()
201
+ .find(
202
+ (r) =>
203
+ r.seq < triggered.seq &&
204
+ endpointKey(r) === key &&
205
+ decodeBodyForDiff(bodyOf(r)) !== undefined,
206
+ );
207
+
208
+ const changes = prior
209
+ ? structuralDiff(decodeBodyForDiff(bodyOf(prior)), decodeBodyForDiff(bodyOf(triggered)))
210
+ : [];
211
+ return {
212
+ eventSeq,
213
+ label,
214
+ triggeredSeq: triggered.seq,
215
+ priorSeq: prior?.seq,
216
+ endpoint: key,
217
+ changes,
218
+ };
219
+ }
220
+
221
+ /** Window end: don't scan unboundedly — cap at TRIGGER_WINDOW requests past the
222
+ * event (by seq distance to the Nth following request). */
223
+ function windowEnd(reqs: CapturedRequest[], eventSeq: number): number {
224
+ const after = reqs.filter((r) => r.seq > eventSeq).slice(0, TRIGGER_WINDOW);
225
+ const last = after.at(-1);
226
+ return last ? last.seq - eventSeq : TRIGGER_WINDOW;
227
+ }
228
+
229
+ /** Precompute grounding diffs for a candidate's filter-toggle events, dropping
230
+ * events that triggered nothing or changed nothing.
231
+ *
232
+ * Pass `relevantEndpoints` = endpointKey() of the candidate's own request seqs
233
+ * so the diff is taken against the tool's load-bearing request, not telemetry. */
234
+ export function groundingForEvents(
235
+ session: Session,
236
+ eventSeqs: number[],
237
+ relevantEndpoints?: Set<string>,
238
+ ): EventGrounding[] {
239
+ const all = eventSeqs
240
+ .map((seq) => groundEvent(session, seq, relevantEndpoints))
241
+ .filter((g) => g.changes.length > 0);
242
+
243
+ // Drop session-churn paths — positions that change across MOST events are
244
+ // per-call session state (rotating tokens, pagination flags, a display-mode
245
+ // value), not the param the event toggled. A param's encoding shows up only
246
+ // in the diff(s) of the event(s) that control it, so frequency cleanly
247
+ // separates signal from churn.
248
+ const pathFreq = new Map<string, number>();
249
+ for (const g of all) {
250
+ for (const p of new Set(g.changes.map((c) => c.path)))
251
+ pathFreq.set(p, (pathFreq.get(p) ?? 0) + 1);
252
+ }
253
+ const churnAt = Math.max(3, Math.ceil(all.length / 2));
254
+ for (const g of all) g.changes = g.changes.filter((c) => (pathFreq.get(c.path) ?? 0) < churnAt);
255
+ return all.filter((g) => g.changes.length > 0);
256
+ }
257
+
258
+ /** Derive the relevant-endpoint set from a candidate's request seqs. */
259
+ export function endpointsForSeqs(session: Session, seqs: number[]): Set<string> {
260
+ const set = new Set<string>();
261
+ for (const seq of seqs) {
262
+ const r = session.requests.find((x) => x.seq === seq);
263
+ if (r) set.add(endpointKey(r));
264
+ }
265
+ return set;
266
+ }
267
+
268
+ // ─── Input-value provenance ──────────────────────────────────────────────────
269
+ //
270
+ // The grounding above covers params the user *toggled* (filters/sort). It does
271
+ // not cover a primary param whose value is an opaque id the request can't carry
272
+ // as plain text — e.g. an entity/object handle, an account id, a place/geo id, a
273
+ // category token. The compile agent historically shipped these as the raw param
274
+ // text, which the backend silently ignores and falls back to a default (an
275
+ // unfiltered/global result set, or a server-chosen default scope). The id was
276
+ // never the user's text; it was *minted by an earlier response* and chained into
277
+ // the request. That cross-request data-flow is the signal this detects — keyed
278
+ // on structure, not any vendor's id format.
279
+
280
+ interface InputProvenance {
281
+ /** JSON path into the decoded request body where the minted value sits. */
282
+ path: string;
283
+ /** Example resolved value (truncated). Varies per call — the PATH is the signal. */
284
+ valueSample: string;
285
+ /** The candidate request that consumes the value. */
286
+ requestSeq: number;
287
+ /** Earliest earlier request whose RESPONSE first carried this value. */
288
+ sourceSeq: number;
289
+ sourceEndpoint: string;
290
+ /** True when the source is the tool's own endpoint (resolve-then-refine: an
291
+ * initial text request whose response yields the id, re-sent as a refined
292
+ * request carrying that id). */
293
+ selfChain: boolean;
294
+ }
295
+
296
+ /** An opaque, machine-minted identifier — not human-typed text. Vendor-agnostic:
297
+ * keyed on structure (no whitespace, long enough, mixes character classes or is
298
+ * a delimited handle), not on any specific id format. Excludes free text
299
+ * (multi-word phrases, single dictionary words), ISO dates, and bare counts so
300
+ * they never trip it, while still catching namespaced handles ("ns/abc123"),
301
+ * hex ids, UUIDs, and base64-ish session handles. */
302
+ function isIdLike(v: string): boolean {
303
+ if (/\s/.test(v)) return false; // free text has spaces
304
+ if (v.length < 6) return false; // too short to be an opaque handle
305
+ if (/^\d{4}-\d{2}-\d{2}([T ]|$)/.test(v)) return false; // ISO date / datetime
306
+ const hasLetter = /[A-Za-z]/.test(v);
307
+ const hasDigit = /\d/.test(v);
308
+ const hasIdPunct = /[/:_.+=~-]/.test(v); // namespaced / delimited handle
309
+ // Opaque if it mixes letters+digits (a token), or is a delimited handle that
310
+ // still carries an alphanumeric payload. A bare word or a pure number is not.
311
+ return (hasLetter && hasDigit) || (hasIdPunct && (hasLetter || hasDigit));
312
+ }
313
+
314
+ function responseBodyOf(req: CapturedRequest): string | undefined {
315
+ const b = (req as unknown as { response?: { body?: string } }).response?.body;
316
+ return typeof b === 'string' ? b : undefined;
317
+ }
318
+
319
+ function leafStrings(
320
+ v: unknown,
321
+ path = '',
322
+ out: { path: string; val: string }[] = [],
323
+ ): { path: string; val: string }[] {
324
+ if (Array.isArray(v)) {
325
+ v.forEach((x, i) => leafStrings(x, `${path}[${i}]`, out));
326
+ } else if (v && typeof v === 'object') {
327
+ for (const k of Object.keys(v as object))
328
+ leafStrings((v as Record<string, unknown>)[k], path ? `${path}.${k}` : k, out);
329
+ } else if (typeof v === 'string' && v.length >= 4) {
330
+ out.push({ path, val: v });
331
+ }
332
+ return out;
333
+ }
334
+
335
+ /** For each candidate request, find body positions holding an id-like value that
336
+ * first appears in an EARLIER response — i.e. a value the request did not get
337
+ * from the user's text but chained in from upstream. Deduped by endpoint+path
338
+ * (the value varies per call; the position is the durable signal). */
339
+ export function inputProvenance(session: Session, candidateSeqs: number[]): InputProvenance[] {
340
+ const reqs = [...session.requests].sort((a, b) => a.seq - b.seq);
341
+ const seen = new Set<string>();
342
+ const out: InputProvenance[] = [];
343
+ for (const seq of [...candidateSeqs].sort((a, b) => a - b)) {
344
+ const r = reqs.find((x) => x.seq === seq);
345
+ if (!r) continue;
346
+ const decoded = decodeBodyForDiff(bodyOf(r));
347
+ if (decoded == null || typeof decoded !== 'object') continue;
348
+ const ep = endpointKey(r);
349
+ for (const { path, val } of leafStrings(decoded)) {
350
+ if (!isIdLike(val)) continue;
351
+ const key = `${ep}|${path}`;
352
+ if (seen.has(key)) continue;
353
+ const src = reqs.find((x) => x.seq < seq && (responseBodyOf(x)?.includes(val) ?? false));
354
+ if (!src) continue; // not minted upstream → it IS the param's own text / a constant
355
+ seen.add(key);
356
+ out.push({
357
+ path,
358
+ valueSample: val.length > 40 ? `${val.slice(0, 40)}…` : val,
359
+ requestSeq: seq,
360
+ sourceSeq: src.seq,
361
+ sourceEndpoint: endpointKey(src),
362
+ selfChain: endpointKey(src) === ep,
363
+ });
364
+ }
365
+ }
366
+ return out;
367
+ }
@@ -38,6 +38,35 @@ export function localSessionsDir(site: string): string {
38
38
  return pathJoin(localSiteDir(site), 'sessions');
39
39
  }
40
40
 
41
+ /** Default output path for `imprint audit <site>` — a per-site report sidecar
42
+ * (`~/.imprint/<site>/.audit-report.json`). */
43
+ export function localAuditReportPath(site: string): string {
44
+ return pathJoin(localSiteDir(site), '.audit-report.json');
45
+ }
46
+
47
+ /** Site-level directory for shared modules reused across a site's tools
48
+ * (`~/.imprint/<site>/_shared`). Per-tool artifacts import these via the
49
+ * relative path `../_shared/<name>.ts`. The tool loader and completed-workflow
50
+ * discovery both skip `_shared` (it has no index.ts). */
51
+ export function localSharedDir(site: string): string {
52
+ return pathJoin(localSiteDir(site), '_shared');
53
+ }
54
+
55
+ /** Resolve a shared-module relative path (e.g. "_shared/sign.ts" or "sign.ts")
56
+ * to an absolute path under the site's shared dir, rejecting traversal. */
57
+ export function localSharedModulePath(site: string, relPath: string): string {
58
+ if (relPath.includes('..') || relPath.startsWith('/') || relPath.includes('\\')) {
59
+ throw new Error(`Invalid shared module path: "${relPath}". Must not contain ".." or "/".`);
60
+ }
61
+ const base = relPath.startsWith('_shared/') ? relPath.slice('_shared/'.length) : relPath;
62
+ if (base.includes('/')) {
63
+ throw new Error(
64
+ `Invalid shared module path: "${relPath}". Must be a flat file under _shared/.`,
65
+ );
66
+ }
67
+ return pathJoin(localSharedDir(site), base);
68
+ }
69
+
41
70
  export function defaultSessionJsonlPath(site: string, timestamp: string): string {
42
71
  return pathJoin(localSessionsDir(site), `${timestamp}.jsonl`);
43
72
  }
@@ -12,6 +12,7 @@ import { createLog } from './log.ts';
12
12
  import { imprintHomeDir } from './paths.ts';
13
13
  import { parsePlaybook } from './playbook-parser.ts';
14
14
  import { substituteString } from './runtime.ts';
15
+ import { getStealthChromium, getStealthExecutablePath } from './stealth-chromium.ts';
15
16
  import type {
16
17
  Locator,
17
18
  Playbook,
@@ -64,33 +65,24 @@ export async function runPlaybook(opts: RunPlaybookOptions): Promise<ToolResult>
64
65
  if (opts.pageOverride) {
65
66
  page = opts.pageOverride;
66
67
  } else {
67
- // playwright-extra + stealth plugin patches navigator.webdriver,
68
- // plugin enumeration, WebGL vendor strings, etc. Vanilla headless
69
- // Playwright eats a 403 from any decent enterprise site (verified:
70
- // Southwest 403 → 200 with stealth).
71
68
  let chromium: typeof import('playwright').chromium;
72
69
  try {
73
- const pwExtra = await import('playwright-extra');
74
- const stealthMod = await import('puppeteer-extra-plugin-stealth');
75
- const stealthFactory =
76
- (stealthMod as { default?: () => unknown }).default ??
77
- (stealthMod as unknown as () => unknown);
78
- pwExtra.chromium.use(stealthFactory() as never);
79
- chromium = pwExtra.chromium as unknown as typeof import('playwright').chromium;
80
- } catch {
81
- try {
82
- const pw = await import('playwright');
83
- chromium = pw.chromium;
84
- } catch (innerErr) {
85
- return {
86
- ok: false,
87
- error: 'UNKNOWN',
88
- message: `Playwright not available: ${errMsg(innerErr)}. Run: bunx playwright install chromium`,
89
- };
90
- }
70
+ chromium = await getStealthChromium();
71
+ } catch (innerErr) {
72
+ return {
73
+ ok: false,
74
+ error: 'UNKNOWN',
75
+ message: `Playwright not available: ${errMsg(innerErr)}. Run: bunx playwright install chromium`,
76
+ };
91
77
  }
92
78
  try {
93
- browser = await chromium.launch({ headless: !opts.headed });
79
+ // Use the same full Chrome binary as `imprint record` — NOT
80
+ // chrome-headless-shell, which Akamai detects at the binary level
81
+ // regardless of stealth-plugin JS patches.
82
+ browser = await chromium.launch({
83
+ headless: !opts.headed,
84
+ executablePath: getStealthExecutablePath(),
85
+ });
94
86
  } catch (err) {
95
87
  return {
96
88
  ok: false,
@@ -159,10 +151,23 @@ export async function runPlaybook(opts: RunPlaybookOptions): Promise<ToolResult>
159
151
  } catch (err) {
160
152
  const screenshotPath = await screenshot(page, playbook.toolName, lastStep);
161
153
  const suffix = screenshotPath ? `\nscreenshot: ${screenshotPath}` : '';
154
+ const errStr = errMsg(err);
155
+ // Classify the failure mode honestly: a missing locator, a step
156
+ // timeout, or a `forResponse` wait that didn't resolve are
157
+ // transient page-state signals (the DOM rendered differently than
158
+ // the recording, or the page was slow). Those are NETWORK-class
159
+ // signals, not tool-defect (BAD_RESPONSE) signals — the audit
160
+ // gate's `tool_broken` classifier treats BAD_RESPONSE as a real
161
+ // bug, which over-attributes drift to defects. Map known
162
+ // transient-shape errors to NETWORK so they count as `infra`
163
+ // (re-runnable) rather than `tool_broken` (permanent defect).
164
+ const isTransient = /No locator matched|Timeout \d+ms exceeded|forResponse|waiting for/i.test(
165
+ errStr,
166
+ );
162
167
  return {
163
168
  ok: false,
164
- error: 'BAD_RESPONSE',
165
- message: `Playbook failed at step ${lastStep}: ${errMsg(err)}${suffix}`,
169
+ error: isTransient ? 'NETWORK' : 'BAD_RESPONSE',
170
+ message: `Playbook failed at step ${lastStep}: ${errStr}${suffix}`,
166
171
  };
167
172
  } finally {
168
173
  if (!opts.pageOverride) {
@@ -252,11 +257,36 @@ async function executeStep(
252
257
  case 'type': {
253
258
  const locator = await firstMatching(page, step.locators, params, timeoutMs);
254
259
  const value = subst(step.value, params);
255
- if (step.clear === false) {
256
- await locator.pressSequentially(value, { timeout: timeoutMs });
257
- } else {
258
- await locator.fill(value, { timeout: timeoutMs });
260
+ // Detect element type so we dispatch the right action. `type` on a
261
+ // <select> means "choose the option whose value/label matches" —
262
+ // a recording can capture either action shape, and the audit-time
263
+ // tool may also call type with a value that happens to land on a
264
+ // select. Without this branch, fill()/pressSequentially() throw
265
+ // "Element is not an input/textarea" and the whole playbook
266
+ // aborts.
267
+ const tagName = await locator.evaluate((el) => el.tagName.toLowerCase());
268
+ if (tagName === 'select') {
269
+ // Try value first, fall back to label — match Playwright's own
270
+ // selectOption semantics.
271
+ try {
272
+ await locator.selectOption({ value }, { timeout: timeoutMs });
273
+ } catch {
274
+ await locator.selectOption({ label: value }, { timeout: timeoutMs });
275
+ }
276
+ await applyWait(page, step.wait_for, locator, timeoutMs);
277
+ return;
278
+ }
279
+ // Inputs / textareas: pressSequentially fires real input / keydown
280
+ // / keyup events. React-style frameworks bind to synthetic events
281
+ // that locator.fill() doesn't trigger — typing into an autocomplete
282
+ // or debounced search field with fill() updates the input visually
283
+ // but the framework's onChange handler never runs, so the dropdown
284
+ // / XHR / next-step locator times out. The ~10ms-per-char internal
285
+ // delay is negligible against page-load latency.
286
+ if (step.clear !== false) {
287
+ await locator.fill('', { timeout: timeoutMs });
259
288
  }
289
+ await locator.pressSequentially(value, { timeout: timeoutMs });
260
290
  await applyWait(page, step.wait_for, locator, timeoutMs);
261
291
  return;
262
292
  }
@@ -376,10 +406,27 @@ async function applyWait(
376
406
  }
377
407
  if ('xhr' in wait) {
378
408
  const re = new RegExp(wait.xhr);
379
- await page.waitForResponse(
380
- (resp) => re.test(resp.url()) && (!wait.method || resp.request().method() === wait.method),
381
- { timeout: wait.timeout_ms ?? timeoutMs },
382
- );
409
+ try {
410
+ await page.waitForResponse(
411
+ (resp) => re.test(resp.url()) && (!wait.method || resp.request().method() === wait.method),
412
+ { timeout: wait.timeout_ms ?? timeoutMs },
413
+ );
414
+ } catch (err) {
415
+ // A missed `wait_for: {xhr: ...}` is usually a soft signal: the
416
+ // recorded action (typing into an autocomplete, clicking a tab)
417
+ // happened, but the page didn't fire the exact XHR we matched on
418
+ // — either the URL pattern drifted, the debounce window was
419
+ // tighter than our wait, or the page chose a cached response. The
420
+ // next playbook step has its own locator / wait_for and will fail
421
+ // loudly if the page state is actually wrong. Letting the
422
+ // playbook continue here gives it a real chance to recover
423
+ // (observed on Costco's pickup-location autocomplete: typing
424
+ // succeeded, the XHR just never fired before our 30s window).
425
+ const msg = err instanceof Error ? err.message : String(err);
426
+ // Re-throw closures / nav errors that aren't simple timeouts —
427
+ // those signal real page breakdown.
428
+ if (!/timeout|Timeout/.test(msg)) throw err;
429
+ }
383
430
  return;
384
431
  }
385
432
  if ('sleep_ms' in wait) {
@@ -387,7 +434,10 @@ async function applyWait(
387
434
  }
388
435
  }
389
436
 
390
- async function extractResult(
437
+ /** Exported for testing — drives the XHR-body extraction contract that
438
+ * must stay symmetric with the workflow runtime (runtime.ts:279-285).
439
+ */
440
+ export async function extractResult(
391
441
  page: Page,
392
442
  result: PlaybookResult,
393
443
  captured: Array<{ url: string; method: string; status: number; body: string | null }>,
@@ -410,16 +460,27 @@ async function extractResult(
410
460
  `Result XHR returned ${last.status} (${last.url}): ${last.body.slice(0, 300)}.${hint}`,
411
461
  );
412
462
  }
413
- let parsed: unknown;
463
+ // Mirror runtime.ts (workflow path) semantics: try JSON first, but fall
464
+ // back to the raw body string when parsing fails. Many APIs return
465
+ // non-JSON envelopes that a downstream parser knows how to decode —
466
+ // Google XSSI prefix (`)]}'`), chunked batchexecute payloads, JSONP
467
+ // callbacks, protobuf-over-HTTP, etc. Throwing here would bypass the
468
+ // parser entirely; passing the raw bytes lets the parser do its job and
469
+ // keeps the playbook fallback's contract symmetric with the workflow
470
+ // path.
471
+ let parsed: unknown = last.body;
414
472
  try {
415
473
  parsed = JSON.parse(last.body);
416
474
  } catch {
417
- throw new Error(`Result XHR body was not JSON (${last.url}): ${last.body.slice(0, 200)}`);
475
+ // Path-based extraction (`items[].id`) needs a structured value to
476
+ // navigate, so we still fail loudly in that case. Whole-body
477
+ // extraction (`extract === '*'`) is the contract that says "the
478
+ // parser owns the bytes," so we pass them through.
479
+ if (result.extract !== '*' && result.extract !== '') {
480
+ throw new Error(`Result XHR body was not JSON (${last.url}): ${last.body.slice(0, 200)}`);
481
+ }
418
482
  }
419
- // `*` returns the full parsed JSON unchanged — useful when the consumer
420
- // (parser.ts, MCP caller) wants the rich object graph rather than just
421
- // numeric leaves.
422
- if (result.extract === '*') {
483
+ if (result.extract === '*' || result.extract === '') {
423
484
  return { [result.return_as]: parsed, source_url: last.url };
424
485
  }
425
486
  return { [result.return_as]: extractAt(parsed, result.extract), source_url: last.url };