imprint-mcp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/CHANGELOG.md +168 -0
  2. package/LICENSE +21 -0
  3. package/README.md +322 -0
  4. package/examples/discoverandgo/README.md +57 -0
  5. package/examples/discoverandgo/book_discoverandgo_museum_pass/cron.json +8 -0
  6. package/examples/discoverandgo/book_discoverandgo_museum_pass/index.ts +89 -0
  7. package/examples/discoverandgo/book_discoverandgo_museum_pass/workflow.json +39 -0
  8. package/examples/echo/README.md +37 -0
  9. package/examples/echo/echo_test/index.ts +31 -0
  10. package/examples/google-flights/search_google_flights/index.ts +101 -0
  11. package/examples/google-flights/search_google_flights/parser.test.ts +140 -0
  12. package/examples/google-flights/search_google_flights/parser.ts +189 -0
  13. package/examples/google-flights/search_google_flights/playbook.yaml +130 -0
  14. package/examples/google-flights/search_google_flights/workflow.json +48 -0
  15. package/examples/google-hotels/search_google_hotels/index.ts +194 -0
  16. package/examples/google-hotels/search_google_hotels/parser.test.ts +168 -0
  17. package/examples/google-hotels/search_google_hotels/parser.ts +330 -0
  18. package/examples/google-hotels/search_google_hotels/playbook.yaml +125 -0
  19. package/examples/google-hotels/search_google_hotels/workflow.json +111 -0
  20. package/examples/namecheap-domains/search_namecheap_domains/index.ts +144 -0
  21. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +380 -0
  22. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +50 -0
  23. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +136 -0
  24. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +97 -0
  25. package/examples/southwest/README.md +81 -0
  26. package/examples/southwest/search_southwest_flights/backends.json +23 -0
  27. package/examples/southwest/search_southwest_flights/cron.json +19 -0
  28. package/examples/southwest/search_southwest_flights/index.ts +110 -0
  29. package/examples/southwest/search_southwest_flights/playbook.yaml +46 -0
  30. package/examples/southwest/search_southwest_flights/workflow.json +54 -0
  31. package/package.json +78 -0
  32. package/prompts/compile-agent.md +580 -0
  33. package/prompts/intent-detection.md +198 -0
  34. package/prompts/playbook-compilation.md +279 -0
  35. package/prompts/request-triage.md +74 -0
  36. package/prompts/tool-candidate-detection.md +104 -0
  37. package/src/cli.ts +1287 -0
  38. package/src/imprint/agent.ts +468 -0
  39. package/src/imprint/app-api-hosts.ts +53 -0
  40. package/src/imprint/backend-ladder.ts +568 -0
  41. package/src/imprint/check.ts +136 -0
  42. package/src/imprint/chromium.ts +211 -0
  43. package/src/imprint/claude-cli-compile.ts +640 -0
  44. package/src/imprint/cli-credential.ts +394 -0
  45. package/src/imprint/codex-cli-compile.ts +712 -0
  46. package/src/imprint/compile-agent-types.ts +40 -0
  47. package/src/imprint/compile-agent.ts +404 -0
  48. package/src/imprint/compile-tools.ts +1389 -0
  49. package/src/imprint/compile.ts +720 -0
  50. package/src/imprint/cookie-jar.ts +246 -0
  51. package/src/imprint/credential-bundle.ts +195 -0
  52. package/src/imprint/credential-extract.ts +290 -0
  53. package/src/imprint/credential-store.ts +707 -0
  54. package/src/imprint/cron.ts +312 -0
  55. package/src/imprint/doctor.ts +223 -0
  56. package/src/imprint/emit.ts +154 -0
  57. package/src/imprint/etld.ts +134 -0
  58. package/src/imprint/freeform-redact.ts +216 -0
  59. package/src/imprint/inject-listener.ts +137 -0
  60. package/src/imprint/install.ts +795 -0
  61. package/src/imprint/integrations.ts +385 -0
  62. package/src/imprint/is-compiled.ts +2 -0
  63. package/src/imprint/json-path.ts +100 -0
  64. package/src/imprint/llm.ts +998 -0
  65. package/src/imprint/load-json.ts +54 -0
  66. package/src/imprint/log.ts +33 -0
  67. package/src/imprint/login.ts +166 -0
  68. package/src/imprint/mcp-compile-server.ts +282 -0
  69. package/src/imprint/mcp-maintenance.ts +1790 -0
  70. package/src/imprint/mcp-server.ts +350 -0
  71. package/src/imprint/multi-progress.ts +69 -0
  72. package/src/imprint/notify.ts +155 -0
  73. package/src/imprint/paths.ts +64 -0
  74. package/src/imprint/playbook-parser.ts +21 -0
  75. package/src/imprint/playbook-runner.ts +465 -0
  76. package/src/imprint/probe-backends.ts +251 -0
  77. package/src/imprint/progress.ts +28 -0
  78. package/src/imprint/record.ts +470 -0
  79. package/src/imprint/redact.ts +550 -0
  80. package/src/imprint/replay-capture.ts +387 -0
  81. package/src/imprint/request-context.ts +66 -0
  82. package/src/imprint/runtime-link.ts +73 -0
  83. package/src/imprint/runtime.ts +942 -0
  84. package/src/imprint/sensitive-keys.ts +156 -0
  85. package/src/imprint/session-diff.ts +409 -0
  86. package/src/imprint/session-merge.ts +198 -0
  87. package/src/imprint/session-writer.ts +149 -0
  88. package/src/imprint/sites.ts +27 -0
  89. package/src/imprint/stealth-fetch.ts +434 -0
  90. package/src/imprint/teach-state.ts +235 -0
  91. package/src/imprint/teach.ts +2120 -0
  92. package/src/imprint/tool-candidates.ts +423 -0
  93. package/src/imprint/tool-loader.ts +186 -0
  94. package/src/imprint/tool-selection.ts +70 -0
  95. package/src/imprint/tracing.ts +508 -0
  96. package/src/imprint/types.ts +472 -0
  97. package/src/imprint/version.ts +21 -0
@@ -0,0 +1,198 @@
1
+ # Imprint Intent Detection
2
+
3
+ You analyze a captured browser session and produce a deterministic, parameterized workflow that an MCP tool can replay.
4
+
5
+ ## Input
6
+
7
+ You will receive a JSON object with this shape:
8
+
9
+ ```json
10
+ {
11
+ "site": "string",
12
+ "url": "string (starting URL)",
13
+ "narration": [
14
+ { "timestamp": ms, "text": "what the user said they were doing" }
15
+ ],
16
+ "events": [
17
+ { "timestamp": ms, "type": "click|input|change|submit|navigation", "detail": "..." }
18
+ ],
19
+ "requests": [
20
+ {
21
+ "seq": int,
22
+ "timestamp": ms,
23
+ "method": "GET|POST|...",
24
+ "url": "string",
25
+ "headers": { ... },
26
+ "body": "string or omitted",
27
+ "resourceType": "Document|XHR|Fetch|Stylesheet|...",
28
+ "response": { "status": int, "headers": {...}, "body": "string" }
29
+ }
30
+ ]
31
+ }
32
+ ```
33
+
34
+ The narration is in the user's own words and is your most reliable signal of intent. Use the timestamps to correlate narration → events → requests.
35
+
36
+ Sensitive fields fall into two categories in the input you receive:
37
+
38
+ 1. **Already-templated credentials** — login form values like username/email + password are rewritten to `${credential.NAME}` placeholders BEFORE you see the session. When you see a request body like `username=${credential.username}&password=${credential.password}`, those placeholders MUST be preserved verbatim in your generated workflow.json. The runtime substitutes them from a per-site credential manager (OS keychain) at call time. Do NOT replace these with parameters or the redacted-byte form.
39
+
40
+ 2. **Generic redactions** — other secrets (cookies, auth headers, response tokens) have been replaced with `[REDACTED:N]` markers (N = original byte length). The presence of these tells you "this field was a credential/token in the original capture" — you should treat such fields as parameterized auth that the runtime will inject from the user's credential store. Reference them as `${credential.NAME}` (pick a snake_case name like `csrf_token`, `patron_id`). NEVER hardcode the redacted values.
41
+
42
+ ## Output
43
+
44
+ You output a single JSON object matching this schema, and ONLY that JSON (no prose before or after):
45
+
46
+ ```json
47
+ {
48
+ "toolName": "snake_case_verb_phrase",
49
+ "intent": {
50
+ "description": "one-sentence human description of what this workflow does",
51
+ "userSaid": "concatenated relevant narration verbatim"
52
+ },
53
+ "parameters": [
54
+ {
55
+ "name": "snake_case_param_name",
56
+ "type": "string|number|boolean",
57
+ "description": "what this parameter represents from the user's perspective",
58
+ "default": "optional default value"
59
+ }
60
+ ],
61
+ "requests": [
62
+ {
63
+ "method": "GET|POST|...",
64
+ "url": "https://... — supports THREE placeholder syntaxes (and ONLY these three): ${param.NAME} for user-supplied parameters; ${response[N].JSON_PATH} for values extracted from a prior response in this chain (N is the 0-based index into THIS requests array); ${credential.NAME} for values stored at login time (patron_id, csrf_token, etc.) — anything that's per-user-account state",
65
+ "headers": { "Header-Name": "value or ${param.X} or ${response[N].field} or ${credential.X}" },
66
+ "body": "optional — same templating rules as url",
67
+ "extract": {
68
+ "json_path_expression": "name_to_use_in_subsequent_${response[N].name}_substitutions"
69
+ }
70
+ }
71
+ ],
72
+ "site": "string (echo from input)"
73
+ }
74
+ ```
75
+
76
+ ## Rules
77
+
78
+ 1. **Pick the smallest set of requests that accomplishes the user's stated intent.** Most captured requests are noise: analytics, asset loads, telemetry beacons, prefetches, font/image fetches. Drop them all.
79
+
80
+ 2. **Identify the LOAD-BEARING requests** — the ones that actually do the user's work (the booking, the search, the post). Keep them in chronological order. There are usually 1-5 of these.
81
+
82
+ 3. **Parameterize aggressively but correctly.** Anything the user would change between runs is a parameter (use `${param.NAME}`). Anything that's identity-specific to this user (their library card patron ID, an internal user UUID, a CSRF token established at login) is NOT a parameter — it's stable per-account state that the runtime injects via credentials (use `${credential.NAME}` and pick a `NAME` that's snake_case and descriptive: `patron_id`, `csrf_token`, `account_uuid`). User-facing things like email or display name CAN be parameters if the user might want to override (e.g., booking a museum pass for a friend's email).
83
+
84
+ ALWAYS use `${credential.X}` (never `${auth.X}` or `${cred.X}` or any other prefix) for credentialed values. Consistency matters because the runtime resolves these by literal prefix match.
85
+
86
+ 4. **Detect chained requests.** If request N+1 uses a value that came from request N's response (e.g., a `reservationID` returned by `makeReservation` that's then sent to `cancelReservation`), use the `extract` field on request N to name the value, and `${response[N].name}` in request N+1.
87
+
88
+ 5. **Login request handling.** Examine the captured login request:
89
+ - **KEEP the login request** when the request body uses `${credential.username}` / `${credential.password}` placeholders (the redaction step has already templated them in for you). The runtime will replay the login each call, get a fresh session, and chain it into subsequent requests via `extract`. This is the right pattern for sites where cookies expire quickly or auth tokens rotate per session.
90
+ - **DROP the login request** only when (a) there's no login POST in the capture (the user was already logged in via prior cookies), or (b) the user's stated intent has nothing to do with auth (e.g., a public search). In those cases the runtime relies on persisted cookies from `imprint login`.
91
+ - When in doubt — INCLUDE the login. The runtime tolerates "login already valid" outcomes gracefully; what it can't tolerate is workflows that assume cookies and find them expired.
92
+ - When you keep a login request, use `extract` to pull any returned auth tokens (`id_token`, `access_token`, etc.) so subsequent requests can reference them via `${response[0].id_token}`.
93
+
94
+ 6. **Drop requests to third-party origins** (analytics, fonts, maps tiles, translation widgets) unless the user's intent explicitly references them.
95
+
96
+ 7. **Drop redirect chains** — only the final destination matters.
97
+
98
+ 8. **Keep request headers minimal.** Drop:
99
+ - `User-Agent`, `Accept-Encoding`, `sec-ch-*` client hints, `x-client-data`, browser-internal headers.
100
+ - **Bot-detection / fingerprinting headers** — these have opaque values bound to the original browser session and go stale on replay. Common patterns:
101
+ - **Akamai Bot Manager**: a per-site randomized prefix followed by `-a`/`-b`/`-c`/`-d`/`-f`/`-z` suffixes (e.g. `EE30zvQLWf-a`, `xY7nQ-c`). The prefix is uppercase+lowercase+digits, ~10 chars, repeated across multiple headers in the same request.
102
+ - **DataDome**: headers starting with `x-dd-` or `dd-`.
103
+ - **PerimeterX / HUMAN**: `_px*`, `x-px*`.
104
+ - **Cloudflare bot**: `cf-*` (except `cf-connecting-ip` if echoed back).
105
+ - **Generic fingerprinting**: any header whose name doesn't appear in standard HTTP/MDN listings AND whose value is a long opaque base64-ish string.
106
+ - Drop them all. The runtime will replay without them; the API may flag the request as bot-driven, in which case the failure tells the operator to pivot.
107
+
108
+ **Keep**:
109
+ - `Content-Type`
110
+ - `Origin` (when the server enforces it)
111
+ - `Referer` (when the server enforces it)
112
+ - Genuine CSRF-style `X-*` headers established at login time — parameterize via `extract` from the login response, not as `${param.X}`.
113
+
114
+ **Special case — `X-API-Key`**: usually an app-level identifier embedded in the site's JavaScript (every visitor sees the same value). Keep it as a literal string in the workflow. If the redaction step replaced it with `[REDACTED:N]`, the operator should re-run `imprint redact --keep-header x-api-key` and regenerate. Only treat `X-API-Key` as a credential if the value is clearly per-user (e.g., it appears in a `Set-Cookie` after login, or differs between two captures from different accounts).
115
+
116
+ 9. **toolName is a verb phrase the LLM caller would naturally use** — `book_museum_pass`, `search_southwest_seats`, `cancel_reservation`. Snake_case. Specific.
117
+
118
+ 10. **If multiple workflows are present in one capture** (e.g., the user did a booking AND THEN a cancellation as TWO separate intents), pick the MORE SIGNIFICANT one as the workflow — the booking, not the cleanup. The cancellation might be exposed as a chained `extract` step within the booking workflow if the user's narration suggests a "book then cancel" flow, but typically should be its own separate workflow.
119
+
120
+ 11. **Use a domain-aware default for parameters that have a clear repeated value across the capture.** If the user always selected "2 adult passes" you can set `default: 2`. If a date varied, no default.
121
+
122
+ ## Example with login
123
+
124
+ Suppose the user narrated: "log in to southwest and show me the seat map for my upcoming flight to LAS"
125
+
126
+ The capture contains:
127
+ - a `POST /api/security/v4/security/token` with body `username=${credential.username}&password=${credential.password}&scope=openid&...` returning `{"id_token": "...", "swa_token": "...", "customers.userInformation.accountNumber": "12345"}`
128
+ - a `GET /api/customers/account/upcoming-trips` returning `{"trips": [{"confirmation": "ABC123"}, ...]}`
129
+ - a `GET /api/extensions/v1/seat-map?confirmation=ABC123&firstName=Ashay&lastName=Changwani` returning a seat map
130
+
131
+ You would output:
132
+
133
+ ```json
134
+ {
135
+ "toolName": "get_southwest_seat_map",
136
+ "intent": {
137
+ "description": "Log in to Southwest, fetch the user's upcoming flights, and return the seat map for a specific confirmation number.",
138
+ "userSaid": "log in to southwest and show me the seat map for my upcoming flight to LAS"
139
+ },
140
+ "parameters": [
141
+ { "name": "confirmation_number", "type": "string", "description": "Southwest confirmation/PNR (6 alphanumeric chars)." },
142
+ { "name": "first_name", "type": "string", "description": "Passenger's first name (matches the booking)." },
143
+ { "name": "last_name", "type": "string", "description": "Passenger's last name (matches the booking)." }
144
+ ],
145
+ "requests": [
146
+ {
147
+ "method": "POST",
148
+ "url": "https://www.southwest.com/api/security/v4/security/token",
149
+ "headers": { "Content-Type": "application/x-www-form-urlencoded", "Accept": "application/json" },
150
+ "body": "username=${credential.username}&password=${credential.password}&scope=openid&response_type=id_token+swa_token&client_id=...",
151
+ "extract": { "id_token": "id_token", "swa_token": "swa_token" }
152
+ },
153
+ {
154
+ "method": "GET",
155
+ "url": "https://www.southwest.com/api/extensions/v1/seat-map?confirmation=${param.confirmation_number}&firstName=${param.first_name}&lastName=${param.last_name}",
156
+ "headers": { "Accept": "application/json", "Authorization": "Bearer ${response[0].id_token}" }
157
+ }
158
+ ],
159
+ "site": "southwest-seats"
160
+ }
161
+ ```
162
+
163
+ Notice: `${credential.username}` and `${credential.password}` are emitted verbatim into the login body. The login response's `id_token` is `extract`-ed and chained into the seat-map request's `Authorization` header.
164
+
165
+ If the same recording also exercised an "upcoming trips list" view, that would typically be a SEPARATE workflow (`list_upcoming_trips`) the user records in another teach run — Claude can call list-then-loop to get all seat maps for upcoming flights.
166
+
167
+ ## Example without login
168
+
169
+ Suppose the user narrated: "i'm searching for southwest seats on my BUR to LAS flight"
170
+
171
+ And the capture contained 47 requests — 2 to `southwest.com/api/flights/{id}/seats` (the load-bearing one), 1 OPTIONS preflight, 4 to `analytics.southwest.com/event`, 12 to `*.googletagmanager.com`, 8 image fetches, etc.
172
+
173
+ You would output something like:
174
+
175
+ ```json
176
+ {
177
+ "toolName": "check_southwest_seats",
178
+ "intent": {
179
+ "description": "Check seat availability on a Southwest Airlines flight by flight ID.",
180
+ "userSaid": "i'm searching for southwest seats on my BUR to LAS flight"
181
+ },
182
+ "parameters": [
183
+ { "name": "flight_id", "type": "string", "description": "Southwest's internal flight identifier (from a confirmation email or flight search result)" }
184
+ ],
185
+ "requests": [
186
+ {
187
+ "method": "GET",
188
+ "url": "https://southwest.com/api/flights/${param.flight_id}/seats",
189
+ "headers": { "Accept": "application/json" }
190
+ }
191
+ ],
192
+ "site": "southwest"
193
+ }
194
+ ```
195
+
196
+ You DO NOT include the analytics, the GTM, the image fetches, or the OPTIONS preflight (browsers send those automatically; the runtime will too).
197
+
198
+ Now analyze the input session and produce the workflow.
@@ -0,0 +1,279 @@
1
+ # Imprint Playbook Compilation
2
+
3
+ You analyze a captured browser session and produce a deterministic DOM playbook — a step-by-step recipe a real browser can follow to reproduce what the user did. Where the network workflow says "POST this URL with these params," the playbook says "navigate here, type into this field, click that button, wait for that XHR."
4
+
5
+ ## Input
6
+
7
+ You will receive a JSON object with this shape:
8
+
9
+ ```json
10
+ {
11
+ "site": "string",
12
+ "url": "string (starting URL)",
13
+ "candidate": { "toolName": "optional selected tool scope", "...": "..." },
14
+ "sharedContext": { "loginRequestSeqs": [1], "...": "optional shared auth/helper guidance" },
15
+ "narration": [
16
+ { "timestamp": ms, "text": "what the user said they were doing" }
17
+ ],
18
+ "events": [
19
+ {
20
+ "seq": int,
21
+ "timestamp": ms,
22
+ "type": "click | input | change | submit | navigation",
23
+ "detail": "JSON-encoded element info — tag, id, name, text, ariaLabel, href, selector, value, fields"
24
+ }
25
+ ],
26
+ "requests": [
27
+ { "method": "GET|POST|...", "url": "string", "resourceType": "XHR|Fetch|Document|...", "response": { "status": int } }
28
+ ]
29
+ }
30
+ ```
31
+
32
+ Most events are noise — focus changes, hover, accidental clicks the user reverted. The narration is your highest-signal input: timestamps tell you which events the user actually meant.
33
+
34
+ If `candidate` is present, compile only that candidate. Ignore other independent actions in the recording unless they are required setup for the selected candidate.
35
+
36
+ ## Output
37
+
38
+ YAML matching this exact shape, and ONLY the YAML (no prose before or after, no `\`\`\`yaml` fences):
39
+
40
+ ```yaml
41
+ toolName: <snake_case_verb_phrase>
42
+ summary: <one sentence describing what the playbook does>
43
+ parameters:
44
+ - name: <param_name>
45
+ type: <string|number|boolean>
46
+ description: <what this parameter is>
47
+ default: <optional default value>
48
+ steps:
49
+ - action: <navigate|click|type|submit|press|wait>
50
+ # action-specific fields below
51
+ result:
52
+ source: <xhr|dom>
53
+ # source-specific fields below
54
+ notes: <optional free-form caveats for downstream agents>
55
+ ```
56
+
57
+ ### Step shapes
58
+
59
+ **navigate** — opens a URL.
60
+ ```yaml
61
+ - action: navigate
62
+ url: https://www.example.com/path
63
+ wait_for: networkidle
64
+ ```
65
+
66
+ **type** — types into an input.
67
+ ```yaml
68
+ - action: type
69
+ locators:
70
+ - by: id
71
+ value: originationAirportCode
72
+ - by: css
73
+ value: input[name="origin"]
74
+ value: ${origin}
75
+ wait_for:
76
+ sleep_ms: 300
77
+ ```
78
+
79
+ **click** — clicks an element.
80
+ ```yaml
81
+ - action: click
82
+ locators:
83
+ - by: aria_label
84
+ value_pattern: ${origin}
85
+ - by: text
86
+ value_pattern: ${origin}
87
+ wait_for: visible
88
+ ```
89
+
90
+ **submit** — submits a form.
91
+ ```yaml
92
+ - action: submit
93
+ locators:
94
+ - by: css
95
+ value: form#search
96
+ wait_for:
97
+ xhr: /api/search
98
+ ```
99
+
100
+ **press** — dispatches a key (Escape to dismiss overlays, Enter to submit a focused form, etc.).
101
+ ```yaml
102
+ - action: press
103
+ key: Escape
104
+ wait_for:
105
+ sleep_ms: 300
106
+ ```
107
+
108
+ **wait** — explicit wait without an action.
109
+ ```yaml
110
+ - action: wait
111
+ wait_for: networkidle
112
+ ```
113
+
114
+ ### Locator priority
115
+
116
+ Always provide MULTIPLE locators per click/type/submit step, in this priority order:
117
+
118
+ 1. **`by: role`** — `value: button`, `name: "Search"`. Most stable; survives CSS rewrites and a11y improvements.
119
+ 2. **`by: aria_label`** — exact `value` or `value_pattern` (regex source). Stable when sites maintain a11y.
120
+ 3. **`by: text`** — visible text. Stable for buttons/links with persistent labels.
121
+ 4. **`by: id`** — only when the id looks stable (`originationAirportCode` good; `react-aria-:r3:` bad — those are auto-generated).
122
+ 5. **`by: css`** — last resort. Captured CSS-Modules class names like `pageContent__3XVqO` change on every site deploy. Include them as a fallback only.
123
+
124
+ ### wait_for values
125
+
126
+ Strings:
127
+ - `networkidle` — page settled (no network activity for 500ms). Good after nav and submit.
128
+ - `load` — DOMContentLoaded fired.
129
+ - `visible` — the element matched by THIS STEP's locator is now visible. Useful when the locator is the autocomplete option you JUST typed for. NOT useful after clicking a dropdown trigger to open it (the trigger was already visible) — use `sleep_ms` instead.
130
+ - `hidden` — same but for disappearing.
131
+
132
+ Objects:
133
+ - `xhr: <pattern>` (with optional `method: GET`) — wait for an XHR/fetch response whose URL matches the pattern (substring or regex source).
134
+ - `sleep_ms: <number>` — unconditional pause. Use after clicking a dropdown trigger to give it time to expand, after typing into an autocomplete to give it time to filter, or anywhere a UI animation needs to finish before the next interaction. 300-500ms is the typical range.
135
+
136
+ ### Dropdown / popover pattern
137
+
138
+ For a click that OPENS a popover/dropdown (trip-type selector, date picker, settings menu), the next click on a dropdown ITEM needs the popover to be rendered first. Use `sleep_ms: 300` on the trigger click — the dropdown's items aren't yet in the DOM at the moment of the trigger click, so `visible` would resolve to the trigger itself and skip the wait.
139
+
140
+ ```yaml
141
+ - action: click
142
+ locators:
143
+ - by: text
144
+ value: Round-trip
145
+ wait_for:
146
+ sleep_ms: 300
147
+
148
+ - action: click
149
+ locators:
150
+ - by: text
151
+ value: One-way
152
+ - by: role
153
+ value: option
154
+ name: One-way
155
+ wait_for: visible
156
+ ```
157
+
158
+ ### Result block
159
+
160
+ Identify which captured XHR carries the data the user actually cares about (the LAST data-bearing XHR before the user's narration ends, in most cases). Then the path within its JSON body to extract.
161
+
162
+ **The `extract` path MUST exist in the actual response body.** The input includes a truncated `response_body` for each XHR — read the result-bearing one and walk its real key structure. Do NOT invent paths based on what you think the API "should" return. The path syntax is dot-separated keys with `[]` to mean "iterate every element of this array" — same as the network workflow's substitution syntax. Examples:
163
+ - `data.searchResults.airProducts[].lowestFare.value` (Southwest's actual shape)
164
+ - `flights[].fares[].price.amount` (a different airline's shape)
165
+
166
+ If the field you want is wrapped in standard envelopes (`data`, `result`, `response`, `payload`), include the envelope in the path.
167
+
168
+ ```yaml
169
+ result:
170
+ source: xhr
171
+ url_pattern: /api/search/results
172
+ extract: items[].price
173
+ return_as: prices
174
+ ```
175
+
176
+ For pages where the data is rendered to the DOM without an XHR backing:
177
+
178
+ ```yaml
179
+ result:
180
+ source: dom
181
+ locators:
182
+ - by: css
183
+ value: .price-table tr td.fare
184
+ extract: text
185
+ return_as: prices
186
+ ```
187
+
188
+ ## Rules
189
+
190
+ 1. **Filter aggressively.** The capture contains every focus change, hover, and accidental click. Use narration timestamps to keep only events the user meant. A 60-second capture for a 5-step workflow should produce 5-10 steps, not 50.
191
+
192
+ 2. **Group autocomplete-then-pick into one step pair.** `input` + `change` + `click` events on a search-then-pick widget are usually two logical steps: type, then click the option. Don't emit a step for every keystroke.
193
+
194
+ 3. **Parameterize what changes.** The user typed "SJC" once during recording, but they'll type many origins at runtime. Make `${origin}` a parameter. Locator value_patterns can interpolate the same parameter so "click the option whose aria-label contains SJC" generalizes.
195
+
196
+ 4. **Same parameter naming as workflow.json when both exist.** If the network workflow uses `origin_airport_code`, the playbook should too. The cron + MCP layer maps params 1:1 across both backends.
197
+
198
+ 5. **Identify wait points carefully.** A click that triggers an XHR needs `wait_for: { xhr: <url-pattern> }` so subsequent steps don't race the response. A nav needs `wait_for: networkidle`. A typed-then-pick autocomplete needs the option element to be `visible` first.
199
+
200
+ 6. **Drop login flows.** Same as the API workflow — login is `imprint login`'s job. The playbook starts from a logged-in state (cookies will be loaded into the browser context).
201
+
202
+ 7. **Keep step descriptions short.** No need for verbose human-readable titles — the YAML is the spec.
203
+
204
+ 8. **The toolName and parameters should match workflow.json EXACTLY when both are produced from the same session.** This lets cron/MCP fall back from API to playbook with the same params.
205
+
206
+ 9. **If the recording shows the user navigating between multiple pages, capture each navigation explicitly as a `navigate` step.** Don't assume single-page.
207
+
208
+ 10. **Output format is strict.** YAML, parsed by `YAML.parse` then validated against the Zod schema in `src/imprint/types.ts` (search for `PlaybookSchema`). Stick to the templates above. **YAML quoting**: if any string value contains colons, single quotes, or YAML-special characters (`{}[]|>&*!#%@`), wrap the entire value in double quotes.
209
+
210
+ ## Example
211
+
212
+ For a Southwest fare search recording (user typed SJC, picked the autocomplete, typed SAN, picked, typed depart date, clicked search), output:
213
+
214
+ ```yaml
215
+ toolName: search_southwest_flights
216
+ summary: Search Southwest for one-way fares between two airports on a given date.
217
+ parameters:
218
+ - name: origin
219
+ type: string
220
+ description: IATA airport code, e.g. SJC
221
+ - name: destination
222
+ type: string
223
+ description: IATA airport code, e.g. SAN
224
+ - name: depart_date
225
+ type: string
226
+ description: YYYY-MM-DD
227
+ steps:
228
+ - action: navigate
229
+ url: https://www.southwest.com/air/booking/
230
+ wait_for: networkidle
231
+ - action: type
232
+ locators:
233
+ - by: id
234
+ value: originationAirportCode
235
+ value: ${origin}
236
+ wait_for:
237
+ sleep_ms: 500
238
+ - action: click
239
+ locators:
240
+ - by: aria_label
241
+ value_pattern: ${origin}
242
+ - by: text
243
+ value_pattern: ${origin}
244
+ wait_for: visible
245
+ - action: type
246
+ locators:
247
+ - by: id
248
+ value: destinationAirportCode
249
+ value: ${destination}
250
+ wait_for:
251
+ sleep_ms: 500
252
+ - action: click
253
+ locators:
254
+ - by: aria_label
255
+ value_pattern: ${destination}
256
+ - by: text
257
+ value_pattern: ${destination}
258
+ wait_for: visible
259
+ - action: type
260
+ locators:
261
+ - by: id
262
+ value: departureDate
263
+ value: ${depart_date}
264
+ - action: click
265
+ locators:
266
+ - by: text
267
+ value: Search
268
+ - by: aria_label
269
+ value: Search flights
270
+ wait_for:
271
+ xhr: /api/air-booking/v1/.*/shopping
272
+ result:
273
+ source: xhr
274
+ url_pattern: /api/air-booking/v1/.*/shopping
275
+ extract: airProducts[].lowestFare.value
276
+ return_as: prices
277
+ ```
278
+
279
+ Now compile the input session.
@@ -0,0 +1,74 @@
1
+ # Imprint Request Triage
2
+
3
+ You analyze the network requests from a captured browser session and identify which requests are relevant to the user's workflow. Most requests are noise -- analytics, telemetry, config fetches, prefetches, ad beacons, health checks -- even when they share the same origin as the site.
4
+
5
+ ## Input
6
+
7
+ You receive a JSON object:
8
+
9
+ ```json
10
+ {
11
+ "site": "string",
12
+ "url": "string (starting URL)",
13
+ "narration": [
14
+ { "timestamp": ms, "text": "what the user said they were doing" }
15
+ ],
16
+ "requests": [
17
+ {
18
+ "seq": int,
19
+ "timestamp": ms,
20
+ "method": "GET|POST|...",
21
+ "url": "string",
22
+ "resourceType": "XHR|Fetch|Document",
23
+ "status": int,
24
+ "mimeType": "string",
25
+ "headers": "truncated request headers",
26
+ "body": "request payload (NOT the response body)",
27
+ "bodyLength": int,
28
+ "responseBodyLength": int,
29
+ "repeatCount": int,
30
+ "repeatedSeqs": [int],
31
+ "lastTimestamp": ms
32
+ }
33
+ ]
34
+ }
35
+ ```
36
+
37
+ The narration is the user's own description of what they did. Use it to understand the workflow's intent, then select the requests that serve that intent.
38
+
39
+ Request entries may include `repeatCount`, `repeatedSeqs`, and `lastTimestamp` when identical requests were compacted. Select the representative `seq` unless a specific repeated seq is needed for an intentional multi-step workflow.
40
+
41
+ ## What to include
42
+
43
+ **Data-bearing API calls** -- requests whose responses carry the data the user was after:
44
+ - Search results (flights, hotels, products, prices)
45
+ - Form submissions (booking, reservation, login)
46
+ - Data fetches that populate the page the user cared about
47
+ - Navigation documents (the HTML pages the user visited)
48
+ - Lookup or resolution endpoints (anything that converts user input into structured data -- e.g. returning locations, IDs, or options the user selects from)
49
+
50
+ **What to EXCLUDE** (even if same-origin):
51
+ - Analytics and telemetry (`/collect`, `/event`, `/track`, `/log`, `/beacon`, `/pixel`, `analytics`, `telemetry`, `metrics`)
52
+ - Health checks and heartbeats (`/health`, `/ping`, `/alive`, `/heartbeat`)
53
+ - Config and feature-flag fetches (`/config`, `/flags`, `/features`, `/settings`, `/toggle`)
54
+ - Prefetch and preload requests (speculative fetches that the user didn't trigger)
55
+ - Asset manifests and service-worker registrations
56
+ - CORS preflight OPTIONS requests
57
+ - Duplicate requests to the same endpoint (keep only the one whose timestamp aligns with the user's action; if multiple calls to the same endpoint are intentional -- e.g., paginating through results -- keep them all)
58
+ - Third-party API calls to domains unrelated to the user's workflow (ad networks, tag managers, social widgets)
59
+
60
+ ## Deciding what's relevant
61
+
62
+ 1. **Read the narration first.** It tells you the user's goal -- "searching for flights," "booking a hotel," "checking prices." Every request you select should serve that goal.
63
+ 2. **Correlate timestamps.** The narration has timestamps; the requests have timestamps. A request whose timestamp falls near a narration event ("now I clicked search") is likely load-bearing.
64
+ 3. **Prefer POST/PUT/PATCH over GET** when both exist for the same endpoint -- the mutation is usually the load-bearing one.
65
+ 4. **When in doubt, include it.** A false positive (including a noise request) is cheaper than a false negative (excluding the result-bearing XHR). The downstream compilation LLM can ignore noise, but it can't work with data it never sees.
66
+ 5. **Aim for 5-50 requests** out of potentially hundreds. If you're selecting more than 50, you're probably not filtering aggressively enough. If fewer than 3, double-check you haven't dropped the key data-fetch.
67
+
68
+ ## Output
69
+
70
+ A JSON array of `seq` numbers, and ONLY that array (no prose before or after, no code fences):
71
+
72
+ [3, 17, 42, 98]
73
+
74
+ The order does not matter. The downstream system will sort by seq.
@@ -0,0 +1,104 @@
1
+ You identify which generated tools should come from one redacted browser recording.
2
+
3
+ Return ONLY one JSON object. No markdown, no prose.
4
+
5
+ Schema:
6
+
7
+ {
8
+ "sharedContext": {
9
+ "loginRequestSeqs": [number],
10
+ "credentialNames": [string],
11
+ "tokenExtractionNotes": "string",
12
+ "sharedHelperNotes": "string"
13
+ },
14
+ "candidates": [
15
+ {
16
+ "toolName": "snake_case_tool_name",
17
+ "description": "short user-facing description",
18
+ "rationale": "why this is an independent tool",
19
+ "confidence": 0.0,
20
+ "primary": true,
21
+ "requestSeqs": [number],
22
+ "representativeSeqs": [number],
23
+ "eventSeqs": [number],
24
+ "eventTimeRange": { "startTimestamp": 0, "endTimestamp": 0 },
25
+ "expectedOutput": "what the tool should return",
26
+ "likelyParams": [
27
+ { "name": "snake_case_param", "type": "string", "description": "short description" }
28
+ ],
29
+ "dependencySeqs": [number]
30
+ }
31
+ ]
32
+ }
33
+
34
+ Rules:
35
+
36
+ 1. Expose user-facing independent intents as tools. A recording may include one
37
+ intent or several independent intents.
38
+ 2. Do not expose login, auth, CSRF refresh, telemetry, page bootstrap, or
39
+ tracking as tools. Put login/auth request seqs in sharedContext.loginRequestSeqs
40
+ or candidate.dependencySeqs instead.
41
+ 3. Cleanup, cancel, delete, or undo flows should be candidates only when the
42
+ narration clearly says they are the user's target.
43
+ 4. Shared auth dependency seqs may be reused by multiple tools.
44
+ 5. There must be exactly one primary candidate. Pick the candidate that best
45
+ matches the user's narration and the most complete request/event path.
46
+ 6. Use stable snake_case tool names. Prefer verb_object names such as
47
+ search_flights, book_museum_pass, list_orders.
48
+ 7. Candidate requestSeqs should include the load-bearing API requests for that
49
+ tool. dependencySeqs should include prerequisite requests needed to replay it,
50
+ especially auth/token requests.
51
+ Request entries may include repeatCount/repeatedSeqs when identical requests
52
+ were compacted; use the representative seq unless the repeated seqs are
53
+ specifically needed to describe the workflow.
54
+ 8. expectedOutput should be concrete enough for a compiler to write a parser.
55
+ 9. likelyParams should describe user-controllable inputs, not session-bound
56
+ tokens, cookies, account IDs, or credentials.
57
+ 10. likelyParams.type must be exactly one of "string", "number", or "boolean".
58
+ If a parameter can accept multiple values, describe that in description and
59
+ use "string" instead of array syntax such as "string[]".
60
+ 11. If the recording has only one useful intent, return one primary candidate.
61
+ 12. When an endpoint returns a large dataset (high responseBodyLength — e.g.
62
+ a product catalog, pricing index, or comprehensive listing), prefer it as
63
+ the primary load-bearing request over smaller supplementary endpoints
64
+ (status checks, metadata lookups, narrow feeds). Include both in
65
+ requestSeqs when they serve the same user intent.
66
+ 13. When multiple endpoints contribute complementary data for the same user
67
+ intent (e.g. a catalog endpoint + a supplementary data endpoint), include
68
+ ALL of them in requestSeqs so the compile-agent can chain them into one
69
+ workflow and merge the data in the parser.
70
+ 14. Lookup or resolution endpoints (any endpoint that converts user input
71
+ into structured data — returning IDs, codes, options, or entities the
72
+ user selects from) MAY be separate tool candidates when they serve a
73
+ standalone use case. Expose them as a separate candidate when the
74
+ endpoint accepts a user query and returns structured results that an
75
+ agent could use independently. Include them in dependencySeqs of the
76
+ primary tool when its parameters depend on the lookup result.
77
+ 15. Prefer more candidates over fewer. If a request or group of requests
78
+ could be useful to a caller on its own — without completing the rest of
79
+ the flow — emit it as a separate candidate even if the recording used
80
+ it as a step toward a larger goal. A read-only query that returns data
81
+ an agent could act on independently is a strong signal for a separate
82
+ tool.
83
+ 16. Every candidate MUST have at least one seq in requestSeqs. A tool with
84
+ no backing requests cannot be compiled. If you cannot identify the
85
+ specific request(s) for an action, do not emit it as a candidate.
86
+ 17. When the same API endpoint (same URL path and method) is called
87
+ multiple times with different parameter values — such as toggling
88
+ filters, changing sort order, adjusting constraints, or paginating —
89
+ those are parameter variations of a single tool, NOT separate tools.
90
+ Consolidate them into one candidate and add the varying values as
91
+ likelyParams. Only split into separate candidates when different
92
+ endpoints serve genuinely independent intents.
93
+ 18. When requestSeqs contains multiple calls to the same API endpoint with
94
+ different parameter values (autocomplete keystrokes, pagination, filter
95
+ toggles, sort changes), select representativeSeqs to MAXIMIZE likelyParam
96
+ coverage. Every likelyParam must have at least one representative where
97
+ its value is non-default or non-null — a representative where the param
98
+ is null or absent teaches nothing about its wire position. Start with one
99
+ baseline representative (all defaults/nulls), then add the minimum number
100
+ of additional representatives needed so every likelyParam is exercised.
101
+ Prefer representatives that exercise multiple uncovered params at once.
102
+ If every seq in requestSeqs is a distinct API call (different endpoints
103
+ or fundamentally different operations), set representativeSeqs equal to
104
+ requestSeqs or omit it.