imprint-mcp 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +165 -201
  2. package/examples/discoverandgo/README.md +1 -1
  3. package/examples/echo/README.md +1 -1
  4. package/examples/google-flights/README.md +28 -0
  5. package/examples/google-flights/_shared/batchexecute.ts +63 -0
  6. package/examples/google-flights/_shared/flights_request.ts +95 -0
  7. package/examples/google-flights/_shared/package.json +9 -0
  8. package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
  9. package/examples/google-flights/get_flight_booking_details/package.json +9 -0
  10. package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
  11. package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
  12. package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
  13. package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
  14. package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
  15. package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
  16. package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
  17. package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
  18. package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
  19. package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
  20. package/examples/google-flights/lookup_airport/index.ts +101 -0
  21. package/examples/google-flights/lookup_airport/package.json +9 -0
  22. package/examples/google-flights/lookup_airport/parser.ts +66 -0
  23. package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
  24. package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
  25. package/examples/google-flights/lookup_airport/workflow.json +57 -0
  26. package/examples/google-flights/search_flights/index.ts +219 -0
  27. package/examples/google-flights/search_flights/package.json +9 -0
  28. package/examples/google-flights/search_flights/parser.ts +169 -0
  29. package/examples/google-flights/search_flights/playbook.yaml +184 -0
  30. package/examples/google-flights/search_flights/request-transform.ts +119 -0
  31. package/examples/google-flights/search_flights/workflow.json +143 -0
  32. package/examples/google-hotels/README.md +29 -0
  33. package/examples/google-hotels/_shared/batchexecute.ts +73 -0
  34. package/examples/google-hotels/_shared/freq.ts +158 -0
  35. package/examples/google-hotels/_shared/package.json +9 -0
  36. package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
  37. package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
  38. package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
  39. package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
  40. package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
  41. package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
  42. package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
  43. package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
  44. package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
  45. package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
  46. package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
  47. package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
  48. package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
  49. package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
  50. package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
  51. package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
  52. package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
  53. package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
  54. package/examples/google-hotels/search_hotels/index.ts +207 -0
  55. package/examples/google-hotels/search_hotels/package.json +9 -0
  56. package/examples/google-hotels/search_hotels/parser.ts +260 -0
  57. package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
  58. package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
  59. package/examples/google-hotels/search_hotels/workflow.json +127 -0
  60. package/package.json +3 -2
  61. package/prompts/audit-agent.md +71 -0
  62. package/prompts/build-planning.md +74 -0
  63. package/prompts/compile-agent.md +132 -28
  64. package/prompts/prereq-builder.md +64 -0
  65. package/prompts/prereq-planner.md +34 -0
  66. package/prompts/tool-planning.md +39 -0
  67. package/src/cli.ts +111 -4
  68. package/src/imprint/agent.ts +5 -0
  69. package/src/imprint/audit.ts +996 -0
  70. package/src/imprint/backend-ladder.ts +1214 -184
  71. package/src/imprint/build-plan.ts +1051 -0
  72. package/src/imprint/cdp-browser-fetch.ts +589 -0
  73. package/src/imprint/cdp-jar-cache.ts +320 -0
  74. package/src/imprint/chromium.ts +135 -0
  75. package/src/imprint/claude-cli-compile.ts +125 -25
  76. package/src/imprint/codex-cli-compile.ts +26 -23
  77. package/src/imprint/compile-agent-types.ts +38 -0
  78. package/src/imprint/compile-agent.ts +65 -27
  79. package/src/imprint/compile-tools.ts +1656 -64
  80. package/src/imprint/compile.ts +14 -2
  81. package/src/imprint/concurrency.ts +87 -0
  82. package/src/imprint/credential-extract.ts +174 -25
  83. package/src/imprint/cron.ts +1 -0
  84. package/src/imprint/doctor.ts +39 -0
  85. package/src/imprint/emit.ts +85 -0
  86. package/src/imprint/freeform-redact.ts +5 -4
  87. package/src/imprint/integrations.ts +2 -2
  88. package/src/imprint/llm.ts +56 -8
  89. package/src/imprint/mcp-compile-server.ts +43 -10
  90. package/src/imprint/mcp-maintenance.ts +9 -101
  91. package/src/imprint/mcp-server.ts +73 -7
  92. package/src/imprint/multi-progress.ts +7 -2
  93. package/src/imprint/param-grounding.ts +367 -0
  94. package/src/imprint/paths.ts +29 -0
  95. package/src/imprint/playbook-runner.ts +101 -40
  96. package/src/imprint/prereq-builder.ts +651 -0
  97. package/src/imprint/probe-backends.ts +6 -3
  98. package/src/imprint/record.ts +10 -1
  99. package/src/imprint/redact.ts +30 -2
  100. package/src/imprint/replay-capture.ts +19 -18
  101. package/src/imprint/runtime.ts +19 -10
  102. package/src/imprint/sensitive-keys.ts +141 -7
  103. package/src/imprint/session-diff.ts +79 -2
  104. package/src/imprint/session-merge.ts +9 -5
  105. package/src/imprint/stealth-chromium.ts +81 -0
  106. package/src/imprint/stealth-fetch.ts +309 -29
  107. package/src/imprint/stealth-token-cache.ts +88 -0
  108. package/src/imprint/teach-plan.ts +251 -0
  109. package/src/imprint/teach-state.ts +17 -0
  110. package/src/imprint/teach.ts +582 -147
  111. package/src/imprint/tool-candidates.ts +72 -14
  112. package/src/imprint/tool-plan.ts +313 -0
  113. package/src/imprint/tracing.ts +135 -6
  114. package/src/imprint/types.ts +61 -3
  115. package/examples/google-flights/search_google_flights/index.ts +0 -101
  116. package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
  117. package/examples/google-flights/search_google_flights/parser.ts +0 -189
  118. package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
  119. package/examples/google-flights/search_google_flights/workflow.json +0 -48
  120. package/examples/google-hotels/search_google_hotels/index.ts +0 -194
  121. package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
  122. package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
  123. package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
  124. package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
  125. package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
  126. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
  127. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
  128. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
  129. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
package/README.md CHANGED
@@ -1,311 +1,273 @@
1
- <h1 align="center">Imprint</h1>
1
+ <div align="center">
2
2
 
3
- <p align="center">
4
- <strong>Don't do anything twice. Teach your AI agent once, and it remembers forever.</strong>
5
- </p>
3
+ # imprint
6
4
 
7
- <p align="center">
8
- <a href="https://github.com/ashaychangwani/imprint/actions/workflows/test.yml"><img src="https://github.com/ashaychangwani/imprint/actions/workflows/test.yml/badge.svg" alt="Tests"></a>
9
- <img src="https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/ashaychangwani/cbd3134e06fb4fabf24aed94b251bdfd/raw/test-count.json" alt="Test count">
10
- <a href="https://github.com/ashaychangwani/imprint/releases"><img src="https://img.shields.io/github/v/release/ashaychangwani/imprint?label=release" alt="Release"></a>
11
- <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue" alt="MIT License"></a>
12
- <a href="https://github.com/ashaychangwani/imprint/stargazers"><img src="https://img.shields.io/github/stars/ashaychangwani/imprint?style=social" alt="GitHub Stars"></a>
13
- </p>
5
+ **Teach your AI agent any website. Once.**
14
6
 
15
- <br>
7
+ Record a real browser session, get a deterministic MCP tool back.\
8
+ No tokens burned on exploration. No "the LLM clicked the wrong button."\
9
+ The recording *is* the executable.
16
10
 
17
- ```bash
18
- bun install -g imprint-mcp
19
- imprint teach southwest --url https://www.southwest.com
20
- ```
11
+ [![Tests](https://github.com/ashaychangwani/imprint/actions/workflows/test.yml/badge.svg)](https://github.com/ashaychangwani/imprint/actions/workflows/test.yml)
12
+ ![Test count](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/ashaychangwani/cbd3134e06fb4fabf24aed94b251bdfd/raw/test-count.json)
13
+ [![Release](https://img.shields.io/github/v/release/ashaychangwani/imprint?label=release)](https://github.com/ashaychangwani/imprint/releases)
14
+ [![MIT License](https://img.shields.io/badge/license-MIT-blue)](LICENSE)
15
+ [![GitHub Stars](https://img.shields.io/github/stars/ashaychangwani/imprint?style=social)](https://github.com/ashaychangwani/imprint/stargazers)
21
16
 
22
- That's it. Imprint opens a browser, you drive the workflow, and it compiles a deterministic **MCP tool** your AI agent can call from then on. No tokens burned on exploration, no "the LLM clicked the wrong button" variance. The recording *is* the executable.
17
+ [Quick Start](#quick-start) · [Examples](#examples) · [How It Works](#how-it-works) · [Docs](docs/getting-started.md)
23
18
 
24
- <br>
19
+ </div>
25
20
 
26
- ## See it in action
21
+ ---
27
22
 
28
- After teaching, your agent has a tool called `search_namecheap_domains`. The compile-agent reverse-engineered the site's CRC32 URL signing scheme from a captured JavaScript bundle, chains five API endpoints, and merges availability + pricing + aftermarket data:
23
+ ## Quick Start
29
24
 
25
+ ```bash
26
+ bun install -g imprint-mcp
27
+ imprint teach southwest --url https://www.southwest.com
30
28
  ```
31
- $ claude "search for getimprint on Namecheap, under $20/yr renewal"
32
29
 
33
- getimprint.com taken registered 2008 GoDaddy.com, LLC
34
- getimprint.dev available $12.98/yr (19% off) renews $20.98/yr
35
- getimprint.org available $7.48/yr (42% off) renews $15.98/yr
36
- getimprint.fyi available $6.98/yr renews $9.68/yr
37
- getimprint.xyz available $2.00/yr (90% off) renews $19.48/yr
38
- ```
30
+ A browser opens. You drive the workflow and narrate what you're doing. Imprint records every request and interaction, then compiles a deterministic **MCP tool** your agent can call forever.
39
31
 
40
- Real-time domain availability with per-request URL signing — the agent wrote the signing function itself by reading the site's JS bundle.
32
+ ---
41
33
 
42
- <br>
34
+ ## See It in Action
43
35
 
44
- ## How it works
36
+ **Teach once.** `imprint teach google-flights` records one real search and compiles a **4-tool** MCP server from that single session — the compile agent reverse-engineers Google's `batchexecute` wire format itself and wires the search→booking token chain, with no hand-written request code. Here is the actual run (6 recordings → 4 tools, every tool live-verified):
45
37
 
46
- <table>
47
- <tr>
48
- <td width="33%">
38
+ ![imprint teach google-flights — a real run: six recordings compiled into four live-verified MCP tools](web/public/imprint-teach.gif)
49
39
 
50
- ### 1. Teach
40
+ **Then your agent calls those tools** like any other — real-time results through a live trusted-Chrome (`cdp-replay`) backend:
51
41
 
52
- ```bash
53
- imprint teach mysite \
54
- --url https://example.com
55
42
  ```
43
+ $ claude "cheapest nonstop SJC→SAN the first week of July, with a carry-on"
56
44
 
57
- A browser opens. You drive the workflow and narrate what you're doing. Imprint records every network request and DOM interaction.
58
-
59
- Raw recordings are stored locally under `~/.imprint/<site>/sessions/`, and each generated tool lives under `~/.imprint/<site>/<toolName>/` by default, outside the repo. The generated `index.ts` imports from `imprint/runtime` via a `node_modules/imprint` symlink that Imprint maintains automatically — created on `emit`, self-healed at runtime if a worktree moves or vanishes (so the next `imprint mcp-server`/`cron`/`probe-backends` repairs a stale link without re-emitting). The tracked `examples/` tree remains as source fixtures and demos.
60
-
61
- </td>
62
- <td width="33%">
63
-
64
- ### 2. Compile
65
-
66
- Imprint generates replay artifacts:
45
+ Alaska AS1623 SJC→SAN 6:00a→7:32a nonstop $137
46
+ Southwest WN2412 SJC→SAN 8:15a→9:45a nonstop $158
47
+ Delta DL2901 SJC→SAN 7:10a→8:44a nonstop $169
48
+ ```
67
49
 
68
- - **`workflow.json`** API-level replay (fast, with named state captures)
69
- - **`playbook.yaml`** — DOM-level fallback (universal)
70
- - **`request-transform.ts`** — URL signing when the API requires per-call tokens (optional)
50
+ The suite was one-shot compiled from one recording and audited at **92.6%**, every tool live-verified. *(The terminal above is a faithful replay regenerate/record it with `bun scripts/demo-teach.ts`.)*
71
51
 
72
- Both artifacts are written into the generated tool directory (`~/.imprint/<site>/<toolName>/`). `compile-playbook` uses that nested location by default so cron and MCP discovery can see the fallback without a custom `--out`.
52
+ ---
73
53
 
74
- Credentials and PII are redacted automatically: credential values become `${credential.NAME}` placeholders, sensitive values become redaction markers that preserve equality within the artifact, and a supplemental free-form scan catches common emails, phone numbers, SSNs, payment cards, JWTs, API keys, private keys, database URLs, and webhook URLs before LLM compile.
54
+ ## How It Works
75
55
 
56
+ <table>
57
+ <tr>
58
+ <td align="center" width="33%">
59
+ <h3>1. Teach</h3>
60
+ <p>Open a real browser, drive the workflow, narrate what you're doing. Imprint records every network request and DOM interaction.</p>
76
61
  </td>
77
- <td width="34%">
78
-
79
- ### 3. Use
80
-
81
- A typed MCP tool is generated and wired into your AI platform. Re-run `imprint install <site>` any time to add the same emitted MCP server to another platform, or remove it later with `imprint uninstall <site>`.
82
-
62
+ <td align="center" width="33%">
63
+ <h3>2. Compile</h3>
64
+ <p>Generates two replay artifacts:<br><br><code>workflow.json</code> — API-level replay<br><code>playbook.yaml</code> — DOM-level fallback<br><br>Credentials are redacted automatically.</p>
65
+ </td>
66
+ <td align="center" width="34%">
67
+ <h3>3. Use</h3>
68
+ <p>A typed MCP tool your agent calls like any other tool. Works with Claude Code, Codex, Claude Desktop, and any MCP client.</p>
83
69
  </td>
84
70
  </tr>
85
71
  </table>
86
72
 
87
- > All three steps happen in a single `imprint teach` command.
73
+ > [!TIP]
74
+ > All three steps happen in a single `imprint teach` command. Credentials and PII are redacted automatically before anything reaches the LLM.
88
75
 
89
- <br>
76
+ ---
90
77
 
91
78
  ## Why Imprint?
92
79
 
93
- Other browser-tool frameworks (browser-use, Computer Use) ask the LLM to **decide every click at runtime**.
80
+ Other browser-tool frameworks ask the LLM to **decide every click at runtime**. Imprint takes a fundamentally different approach:
94
81
 
95
- | | Imprint | browser-use / Computer Use |
96
- |---|---|---|
97
- | **How it works** | Record once, replay deterministically | LLM decides every click at runtime |
82
+ | | **Imprint** | **browser-use / Computer Use** |
83
+ |:--|:--|:--|
84
+ | **Approach** | Record once, replay deterministically | LLM decides every click at runtime |
98
85
  | **Token cost** | Zero at runtime | Scales with workflow complexity |
99
86
  | **Reliability** | Deterministic — same input, same output | Variable — exploration can diverge |
100
87
  | **Bot detection** | Real Chromium + stealth-fetch | Detectable automation fingerprint |
101
- | **When it breaks** | Automatic fallback via backend ladder | No fallback |
102
- | **Time to result** | 200ms – 9s | 30s+ |
88
+ | **Fallback** | Automatic ladder (API DOM) | None |
89
+ | **Speed** | 200ms – 9s | 30s+ |
103
90
 
104
- <br>
91
+ ---
105
92
 
106
- ## Install
93
+ ## Installation
107
94
 
108
- ### npm (requires [Bun](https://bun.sh) >= 1.3)
95
+ ### Recommended
109
96
 
110
97
  ```bash
111
98
  bun install -g imprint-mcp
112
99
  ```
113
100
 
114
- Or run without installing: `bunx imprint-mcp teach southwest --url https://www.southwest.com`
101
+ > Requires [Bun](https://bun.sh) >= 1.3. Or run without installing: `bunx imprint-mcp teach <site> --url <url>`
115
102
 
116
- ### Standalone binary (no Bun needed)
103
+ ### Standalone Binary
117
104
 
118
105
  ```bash
119
106
  curl -fsSL https://raw.githubusercontent.com/ashaychangwani/imprint/main/scripts/install.sh | bash
120
107
  ```
121
108
 
122
- The standalone binary supports `mcp-server`, `install`, `cron`, and `credential` commands.
123
- Browser commands (`teach`, `record`, `login`, `playbook`) require a full Bun + Playwright install.
124
-
125
- ### From source
109
+ ### From Source
126
110
 
127
111
  ```bash
128
112
  git clone https://github.com/ashaychangwani/imprint.git && cd imprint
129
113
  bun install && bun link
130
114
  ```
131
115
 
132
- ### Browser commands
116
+ <details>
117
+ <summary><strong>Browser setup & LLM providers</strong></summary>
118
+
119
+ <br>
133
120
 
134
- The `teach`, `record`, `login`, and `playbook` commands need Playwright's Chromium. Install it once:
121
+ **Browser commands** (`teach`, `record`, `login`, `playbook`) need Playwright's Chromium:
135
122
 
136
123
  ```bash
137
124
  bunx playwright install chromium
138
125
  ```
139
126
 
140
- ### LLM providers
127
+ **LLM providers** are auto-detected. Run `imprint doctor` to see what's available.
141
128
 
142
- Imprint detects LLM providers from what's already on your system:
129
+ | Priority | Provider | Detected via |
130
+ |:--|:--|:--|
131
+ | 1 | Claude Code | `claude` on PATH |
132
+ | 2 | Codex CLI | `codex` on PATH |
133
+ | 3 | Anthropic API | `ANTHROPIC_API_KEY` env var |
134
+ | 4 | Cursor | `cursor` on PATH |
143
135
 
144
- | Priority | Provider | Triggered by |
145
- |---|---|---|
146
- | 1 | `claude-cli` | `claude` on PATH (Claude Code subscription) |
147
- | 2 | `codex-cli` | `codex` on PATH (Codex subscription) |
148
- | 3 | `anthropic-api` | `ANTHROPIC_API_KEY` env var |
149
- | 4 | `cursor-cli` | `cursor` on PATH (generic prompt/playbook compile only; not `teach`/`generate`) |
136
+ Override with `--provider <name>` and `--model <name>`.
150
137
 
151
- ```bash
152
- imprint doctor
153
- ```
154
-
155
- Shows which providers are detected. Interactive `imprint teach` prompts you to choose when multiple compatible compile providers are available, and also lists undetected providers as setup-help entries. Pick one of those help entries to see exactly which CLI or environment variable to add so it will be detected next time.
156
-
157
- To force a specific provider and skip the picker, pass `--provider <name>` to `teach`, `generate`, or `compile-playbook`. `teach` and `generate` require a compile-agent provider (`claude-cli`, `codex-cli`, or `anthropic-api`); `compile-playbook` can also use `cursor-cli`.
158
-
159
- After selecting a provider, `teach` prompts for a **model** (e.g. `claude-opus-4-7` vs `claude-sonnet-4-6` for Anthropic, `gpt-5.4` vs `o3` for Codex). Override with `--model <name>`. Each tool compiles with a **5-minute timeout** by default; override with `--timeout <duration>` (e.g. `--timeout 10m`, `--timeout 1h`). To skip the replay-and-diff stage (the automated second pass that classifies ephemeral vs constant values), pass `--skip-replay` — faster, but may reduce workflow accuracy for sites with dynamic request parameters.
138
+ </details>
160
139
 
161
- <br>
140
+ ---
162
141
 
163
- ## Local compile tracing
142
+ ## The Backend Ladder
164
143
 
165
- Slow or suspicious compiles can be inspected in a local [Phoenix](https://arize.com/docs/phoenix/self-hosting/deployment-options/terminal) trace UI.
144
+ When an API call gets blocked, Imprint doesn't jump to DOM replay. It escalates through the cheapest backend that works:
166
145
 
167
- ```bash
168
- # one-time install with uv
169
- uv tool install arize-phoenix
170
- phoenix serve
171
-
172
- # in another terminal
173
- IMPRINT_TRACE=1 \
174
- IMPRINT_TRACE_BATCH=false \
175
- IMPRINT_TRACE_LLM_IO=1 \
176
- IMPRINT_TRACE_TOOL_IO=1 \
177
- PHOENIX_COLLECTOR_ENDPOINT=http://localhost:6006 \
178
- imprint teach namecheap-domains --from-session ~/.imprint/namecheap-domains/sessions/<ts>.json --provider codex-cli
146
+ ```
147
+ fetch ~200ms Plain APIs, persisted cookies
148
+
149
+
150
+ fetch-bootstrap browser Mints cookies, CSRF tokens, storage
151
+ │ + API
152
+
153
+ cdp-replay ~2-35s API calls run inside a live, trusted Chrome —
154
+ │ a protected POST refreshes its anti-bot token
155
+ │ between calls (multi-step state-changing flows)
156
+
157
+ stealth-fetch ~1-12s Defeats Akamai, Cloudflare, DataDome
158
+
159
+
160
+ playbook ~9s Full DOM replay — universal fallback
179
161
  ```
180
162
 
181
- Traces show the full compile pipeline at every level of detail: each `agent.turn.N` span captures per-turn token counts; each `llm.message_with_tools` span records model, provider, input/output tokens, and stop reason; each `agent.tool.X` span times individual tool dispatches. Drill from `cli.teach` `compile.generate``agent.turn.1`tool calls to find exactly which turn or tool is spending tokens. Set `IMPRINT_TRACE_IO_MAX_CHARS` to raise or lower captured payload size. Set `IMPRINT_TRACE_INPUT_USD_PER_1M` and `IMPRINT_TRACE_OUTPUT_USD_PER_1M` to add estimated cost attributes.
163
+ The full order is `fetch fetch-bootstrapcdp-replaystealth-fetchplaybook`; `auto` mode walks it and stops at the first backend that works.
182
164
 
183
- <br>
165
+ Every recording compiles to *both* `workflow.json` and `playbook.yaml`, so the ladder always has a DOM fallback.
166
+
167
+ ---
184
168
 
185
- ## Platform support
169
+ ## Platform Support
186
170
 
187
- At the end of `imprint teach`, you pick your AI platform and Imprint handles the wiring:
171
+ At the end of `imprint teach`, pick your AI platform and Imprint wires it up:
188
172
 
189
173
  | Platform | Integration |
190
- |---|---|
191
- | **Claude Code** | Automatic — runs `claude mcp add` for you |
192
- | **Codex CLI** | Automatic — runs `codex mcp add` for you |
174
+ |:--|:--|
175
+ | **Claude Code** | Automatic — runs `claude mcp add` |
176
+ | **Codex CLI** | Automatic — runs `codex mcp add` |
193
177
  | **Claude Desktop** | Paste-ready JSON config |
194
178
  | **OpenClaw** | MCP config + SKILL.md export |
195
179
  | **Hermes** | MCP config + SKILL.md + cron mapping |
196
180
 
197
- Each site registers as its own MCP server (`imprint-southwest`, `imprint-discoverandgo`, ...) so tools never collide. See [Integrations](docs/integrations.md) for HTTP transport, Docker, and systemd options.
181
+ Each site registers as its own MCP server (`imprint-southwest`, `imprint-google-flights`, ...) so tools never collide.
198
182
 
199
- Audit or clean up those registrations with `imprint mcp`:
183
+ ---
200
184
 
201
- ```bash
202
- imprint mcp status # registrations + local teach state
203
- imprint mcp # interactive cleanup TUI
204
- imprint mcp disable imprint-mysite --yes # reversible; stores a local snapshot
205
- imprint mcp delete imprint-mysite --yes # removes external MCP registrations only
206
- ```
185
+ ## Examples
207
186
 
208
- Raw recordings under `~/.imprint/<site>/sessions/` may contain sensitive browser state. Cleanup commands leave them alone unless you explicitly choose `--local site`.
187
+ Every example below was **one-shot compiled from a single real browser-session recording** (`imprint teach`) the generated artifacts are committed verbatim as a **proof of concept** of what the compiler produces, not as maintained integrations. Recording-derived defaults (dates, geo) age out; pass explicit values.
209
188
 
210
- See [MCP Maintenance](docs/mcp-maintenance.md) for status classifications, supported client config files, reversible disable behavior, and local artifact cleanup rules.
189
+ **★ Star examples** multi-tool suites, each compiled from one recording and scored by the headless differential audit:
211
190
 
212
- <br>
191
+ | Example | Tools | Audit | What it shows |
192
+ |:--|:--|:--|:--|
193
+ | [**google-flights**](examples/google-flights) | 4 | 92.6% | `batchexecute` wire-format decode + search→booking producer-token chain, live `cdp-replay` |
194
+ | [**google-hotels**](examples/google-hotels) | 4 | 91.7% | autocomplete → search → reviews/booking producer-token chaining |
213
195
 
214
- ## Sharing skills across machines
196
+ Other examples:
215
197
 
216
- Teach on your laptop, ship the skill to a remote agent (OpenClaw, Hermes, a server-side cron host, ...). Skill folders committed to git contain **zero plaintext credentials** — only placeholders like `${credential.NAME}` / `${state.NAME}` and a `credentials.manifest.json` listing the secrets or durable storage keys the receiver must provision.
198
+ | Example | Description |
199
+ |:--|:--|
200
+ | [**southwest**](examples/southwest) | Live fare search — defeats Akamai bot detection |
201
+ | [**discoverandgo**](examples/discoverandgo) | Authenticated booking via per-site credential store |
202
+ | [**echo**](examples/echo) | MCP smoke-test fixture |
217
203
 
218
- For credentials, use the **encrypted bundle** flow when you can't (or don't want to) re-type passwords on the receiving machine:
204
+ Install any example into your MCP client:
219
205
 
220
206
  ```bash
221
- # On the laptop where you taught the skill:
222
- imprint credential export southwest --out southwest.imprintbundle
223
- # → prompts for a passphrase. The bundle is libsodium-encrypted with an
224
- # argon2id-derived key. Safe to send via Slack, email, scp, S3, etc.
225
-
226
- # On the OpenClaw machine (or any other receiver):
227
- imprint credential import southwest southwest.imprintbundle
228
- # → prompts for the same passphrase. Decrypts; secrets land in the OS keychain.
207
+ imprint install google-flights --source examples --platform claude-desktop
229
208
  ```
230
209
 
231
- Pass the passphrase **out-of-band** (Signal, phone, password manager share — *not* the same channel as the bundle file).
232
-
233
- After import, the same `imprint mcp-server <site>` config you'd use locally works on the receiver — it resolves credentials from that machine's credential backend and initializes a fresh cookie/state jar for every tool call. If anything's missing, `imprint mcp-server` and `imprint cron` log/fail with the exact `imprint credential set`, `imprint login`, or `imprint credential import` commands you need.
234
-
235
- See [Sharing Skills](docs/credential-sharing.md) for the full flow including interactive `imprint credential set` (when you can re-type), threat model, rotation, and OpenClaw / Hermes wiring details.
210
+ ---
236
211
 
237
- <br>
238
-
239
- ## The backend ladder
240
-
241
- When an API call gets blocked or needs browser-minted state, Imprint doesn't jump straight to DOM replay. It escalates through the cheapest mode that can satisfy the workflow:
242
-
243
- | | Speed | Handles |
244
- |---|---|---|
245
- | **fetch** | ~200ms | Plain APIs, persisted cookies, in-flight HTTP captures |
246
- | **fetch-bootstrap** | browser bootstrap + API replay | Pages that only need Chromium to mint cookies, CSRF, storage, or DOM-derived state |
247
- | **stealth-fetch** | ~12s first call, ~1s after | Akamai, Cloudflare, DataDome, bot-defense state |
248
- | **playbook** | ~9s | Anything — full DOM replay as fallback |
249
-
250
- `fetch-bootstrap` is not a default rung for every workflow. `auto` inserts it only when the workflow declares bootstrap metadata, a capture requires browser/stealth bootstrap, or `fetch` returns structured `STATE_MISSING` that a browser bootstrap can satisfy. Every recording still compiles to *both* `workflow.json` and `playbook.yaml`, so the ladder has a DOM fallback when API replay cannot work.
212
+ ## CLI Reference
251
213
 
252
- State-aware workflows use named captures and `${state.NAME}` placeholders. For example, request A can set a CSRF cookie, request B can project it into a header, and the whole run stays on plain `fetch` without launching Chromium.
214
+ ```bash
215
+ imprint --help # all commands
216
+ imprint <command> --help # per-command options
217
+ ```
253
218
 
254
- <br>
219
+ | Category | Commands |
220
+ |:--|:--|
221
+ | **Pipeline** | `teach` · `record` · `redact` · `generate` · `compile-playbook` · `emit` |
222
+ | **Runtime** | `cron` · `mcp-server` · `playbook` · `probe-backends` · `audit` |
223
+ | **Credentials** | `credential set` · `credential list` · `credential export` · `credential import` · `credential migrate` |
224
+ | **Utilities** | `mcp` · `login` · `assemble` · `check` · `doctor` · `install` · `uninstall` |
255
225
 
256
- ## Examples
226
+ ---
257
227
 
258
- The checked-in `examples/` directory contains committed fixtures and demos. Generated tools from `imprint teach` go into `~/.imprint/<site>/<toolName>/` by default (configurable via `IMPRINT_HOME`). Runtime discovery (cron, MCP, probe-backends) reads `IMPRINT_HOME`, so to run the checked-in examples, point it at the repo's `examples/` directory:
228
+ ## Sharing Skills
259
229
 
260
- You can also install an example directly into an MCP client:
230
+ Teach on your laptop, ship to a remote agent. Skill folders contain **zero plaintext credentials** — only `${credential.NAME}` placeholders and a manifest listing what the receiver must provision.
261
231
 
262
232
  ```bash
263
- imprint install google-flights --source examples --platform claude-desktop
264
- ```
265
-
266
- Run `imprint install` with no arguments for an interactive install/uninstall picker. It only shows detected AI platforms; uninstall lists installed `imprint-*` MCP servers directly. For GUI config-file clients such as Claude Desktop, install writes an absolute Bun + Imprint CLI path so the app does not depend on your shell PATH.
233
+ # Export (encrypted with libsodium + argon2id)
234
+ imprint credential export southwest --out southwest.imprintbundle
267
235
 
268
- | Example | What it demonstrates | Run it |
269
- |---|---|---|
270
- | [**southwest**](examples/southwest) | Live fare watcher, defeats Akamai bot detection, price-drop notifications | `IMPRINT_HOME=examples imprint cron southwest --once` |
271
- | [**google-flights**](examples/google-flights) | Real-time flight search across all carriers, parses Google's raw protobuf response | `IMPRINT_HOME=examples imprint mcp-server google-flights` |
272
- | [**google-hotels**](examples/google-hotels) | Hotel search with star rating, guest scores, nightly + total prices | `IMPRINT_HOME=examples imprint mcp-server google-hotels` |
273
- | [**discoverandgo**](examples/discoverandgo) | Authenticated booking via per-site credential store | `IMPRINT_HOME=examples imprint cron discoverandgo --once` |
274
- | [**namecheap-domains**](examples/namecheap-domains) | Domain search with CRC32 URL signing reverse-engineered from JS, 5-endpoint chain with availability + aftermarket pricing | `IMPRINT_HOME=examples imprint mcp-server namecheap-domains` |
275
- | [**echo**](examples/echo) | MCP smoke-test fixture (no network, no LLM) | `IMPRINT_HOME=examples imprint mcp-server echo` |
236
+ # Import on another machine
237
+ imprint credential import southwest southwest.imprintbundle
238
+ ```
276
239
 
277
- <br>
240
+ Send the bundle over any channel. Pass the passphrase **out-of-band**.
278
241
 
279
- ## CLI reference
242
+ See [Sharing Skills](docs/credential-sharing.md) for the full flow.
280
243
 
281
- ```
282
- imprint --help # all commands
283
- imprint <command> --help # per-command options
284
- ```
244
+ ---
285
245
 
286
- | | Commands |
287
- |---|---|
288
- | **Pipeline** | `teach` · `record` · `redact` · `generate` · `compile-playbook` · `emit` |
289
- | **Runtime** | `cron` · `mcp-server` · `playbook` · `probe-backends` |
290
- | **Credentials** | `credential set` · `credential list` · `credential export` · `credential import` · `credential migrate` |
291
- | **Utilities** | `mcp` · `login` · `assemble` · `check` · `doctor` |
246
+ ## Documentation
292
247
 
293
- `teach`, `generate`, and `compile-playbook` accept `--provider <name>` to override the auto-detected LLM (see [Install](#install) for valid names and compile-agent support). `teach` and `generate` also take `--keep-test` to retain the agent-written `parser.test.ts` for debugging — it's deleted by default since it reads the gitignored redacted session via `$IMPRINT_SESSION_PATH` and isn't reproducible elsewhere. For multi-tool sites, use `imprint cron <site> --tool <toolName>` and `imprint probe-backends <site> --tool <toolName>` unless `--config` or `--out` points inside the target tool directory.
248
+ | | |
249
+ |:--|:--|
250
+ | [Getting Started](docs/getting-started.md) | Full walkthrough |
251
+ | [Architecture](docs/architecture.md) | Data flow and module map |
252
+ | [Integrations](docs/integrations.md) | Per-platform setup |
253
+ | [Security](docs/security.md) | Redaction, credential handling, what gets stored |
254
+ | [Sharing Skills](docs/credential-sharing.md) | Credential export/import and remote provisioning |
255
+ | [MCP Maintenance](docs/mcp-maintenance.md) | Audit, disable, restore, and prune MCP state |
256
+ | [Troubleshooting](docs/troubleshooting.md) | Common failures and fixes |
257
+ | [Tracing](docs/tracing.md) | OpenTelemetry tracing, cost rollup, and Phoenix setup |
294
258
 
295
- <br>
259
+ <details>
260
+ <summary>More docs</summary>
296
261
 
297
- ## Docs
262
+ - [Decisions](docs/decisions.md) — design rationale
263
+ - [Glossary](docs/glossary.md) — terms and concepts
264
+ - [Capture Protocol](docs/capture-protocol.md) — CDP recording details
265
+ - [Playbook Debugging](docs/playbook-debugging.md) — DOM replay debugging
266
+ - [Notifications](docs/notifications.md) — alert setup
298
267
 
299
- - [Getting Started](docs/getting-started.md) — full walkthrough
300
- - [Integrations](docs/integrations.md) — per-platform setup
301
- - [MCP Maintenance](docs/mcp-maintenance.md) — audit, disable, restore, and prune Imprint MCP state
302
- - [Sharing Skills](docs/credential-sharing.md) — laptop ↔ OpenClaw / Hermes / remote-agent provisioning
303
- - [Architecture](docs/architecture.md) — data flow and module map
304
- - [Security](docs/security.md) — redaction, credential handling, what gets stored
305
- - [Troubleshooting](docs/troubleshooting.md) — common failures and fixes
306
- - [Decisions](docs/decisions.md) · [Glossary](docs/glossary.md) · [Capture Protocol](docs/capture-protocol.md) · [Playbook Debugging](docs/playbook-debugging.md) · [Notifications](docs/notifications.md)
268
+ </details>
307
269
 
308
- <br>
270
+ ---
309
271
 
310
272
  ## Contributing
311
273
 
@@ -315,8 +277,10 @@ Good first contributions: replay backends, notification predicates, auth extract
315
277
 
316
278
  See [CONTRIBUTING.md](CONTRIBUTING.md) for full guidelines.
317
279
 
318
- <br>
280
+ ---
281
+
282
+ <div align="center">
319
283
 
320
- ## License
284
+ **[MIT License](LICENSE)**
321
285
 
322
- [MIT](LICENSE)
286
+ </div>
@@ -49,7 +49,7 @@ imprint cron discoverandgo --once
49
49
  ## Notes
50
50
 
51
51
  - Discover & Go's auth model is patron-ID + session cookies. The session cookie expires; re-run `imprint login` if you start seeing AUTH_EXPIRED.
52
- - The workflow chains an `epass_server.php?method=Login` followed by the booking call. Generated `workflow.json` references both via the `${response[N].patronID}` extraction syntax.
52
+ - `imprint login` parses the `patronID` out of the recorded `epass_server.php?method=Login` POST and stores it in the credential store as `patron_id`. The booking `workflow.json` then references it via `${credential.patron_id}` no Login call is replayed at runtime.
53
53
 
54
54
  ## Not in this demo
55
55
 
@@ -11,7 +11,7 @@
11
11
 
12
12
  ```bash
13
13
  # Inspect via mcp-inspector (recommended for debugging)
14
- npx @modelcontextprotocol/inspector imprint mcp-server --site echo
14
+ npx @modelcontextprotocol/inspector imprint mcp-server echo
15
15
 
16
16
  # Or run the included client smoke test
17
17
  bun scripts/mcp-client-test.ts
@@ -0,0 +1,28 @@
1
+ # Google Flights — `imprint-google-flights`
2
+
3
+ > **One-shot compiled, proof of concept.** Every file in this directory was generated by a single `imprint teach google-flights` run against **one** recorded browser session — no hand-written request code, parsers, or selectors. It is committed here as a proof of concept of what the compiler produces, not as a maintained integration.
4
+
5
+ A 4-tool MCP server for Google Flights, compiled from a recording of a normal flight search. Headless-claude differential audit: **92.6%** — every tool `liveVerified=true`.
6
+
7
+ ## Tools
8
+
9
+ | Tool | What it does | Notes |
10
+ |---|---|---|
11
+ | `lookup_airport` | Resolve a city/airport query to IATA codes | |
12
+ | `search_flights` | Search itineraries (origin, destination, dates, trip type, stops, price, times, duration, bags) | the star tool |
13
+ | `get_flight_booking_details` | Fare/booking detail for a selected itinerary | **consumes** a `flight_token` produced by `search_flights` (producer → consumer chain) |
14
+ | `get_flight_calendar_prices` | Lowest price per day across a date window | |
15
+
16
+ ## How it was compiled
17
+
18
+ - **Protocol**: Google's `/_/FlightsFrontendUi` **`batchexecute`** endpoint returns a nested-array (protobuf-ish) payload. The compiler reverse-engineered the encoding into `_shared/batchexecute.ts` (shared decoder) + per-tool `parser.ts`, and the `f.req` request shape into `_shared/flights_request.ts` + per-tool `request-transform.ts`.
19
+ - **Anti-bot**: the per-page `f.sid` / `bl` tokens are bootstrapped at runtime (`${state.f_sid}` placeholders), and calls run on the **cdp-replay** rung (requests issued inside a live, trusted Chrome) with a **stealth-fetch** fallback.
20
+ - **Artifacts per tool**: `workflow.json` (API replay), `playbook.yaml` (DOM fallback), `index.ts` (MCP tool), `parser.ts` + `request-transform.ts` (codecs).
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ imprint install google-flights --source examples --platform claude-desktop
26
+ ```
27
+
28
+ *Recording-derived defaults (dates) age out — pass explicit values. See the repo [README](../../README.md) and [docs](../../docs/architecture.md).*
@@ -0,0 +1,63 @@
1
+ // Decode Google's batchexecute streaming envelope used by every FlightsFrontendUi RPC.
2
+ //
3
+ // Wire format (verified against recorded seq 69/97/111/667):
4
+ // )]}'\n\n <- anti-XSSI magic prefix
5
+ // <decimal length>\n <- length line (jsonChars + 2; counts bounding newlines)
6
+ // [[...rows...]]\n <- one chunk = single-line JSON array of rows
7
+ // ...repeats...
8
+ //
9
+ // We DELIBERATELY do not slice by the length lines: the stated length is
10
+ // `jsonChars + 2`, so naive slice(pos, pos+len) overshoots into the next token.
11
+ // Because every chunk is single-line JSON (all interior newlines are escaped as
12
+ // \n), splitting on "\n" is exact and robust. Each real RPC result is a row
13
+ // ["wrb.fr", <rpcid|null>, "<doubly-encoded JSON payload>", ...]; row[2] must be
14
+ // JSON.parse'd a SECOND time. Sidecar rows ("di", "af.httprm", "e") are ignored.
15
+
16
+ export function decodeBatchExecute(raw: string): Array<{ rpcid: string | null; payload: any }> {
17
+ let text = raw;
18
+ if (text.startsWith(")]}'")) {
19
+ text = text.slice(4);
20
+ }
21
+
22
+ const out: Array<{ rpcid: string | null; payload: any }> = [];
23
+ const lines = text.split('\n');
24
+
25
+ for (const line of lines) {
26
+ const trimmed = line.trim();
27
+ if (trimmed.length === 0) continue;
28
+ // Skip the decimal length marker lines.
29
+ if (/^\d+$/.test(trimmed)) continue;
30
+ // Chunks are JSON arrays; anything else is noise / partial.
31
+ if (trimmed[0] !== '[') continue;
32
+
33
+ let chunk: any;
34
+ try {
35
+ chunk = JSON.parse(trimmed);
36
+ } catch {
37
+ // Tolerate truncated / partial trailing lines.
38
+ continue;
39
+ }
40
+ if (!Array.isArray(chunk)) continue;
41
+
42
+ for (const row of chunk) {
43
+ if (!Array.isArray(row) || row[0] !== 'wrb.fr') continue;
44
+ const rpcid = typeof row[1] === 'string' ? row[1] : null;
45
+ if (typeof row[2] !== 'string') continue;
46
+ let payload: any;
47
+ try {
48
+ payload = JSON.parse(row[2]);
49
+ } catch {
50
+ continue;
51
+ }
52
+ out.push({ rpcid, payload });
53
+ }
54
+ }
55
+
56
+ return out;
57
+ }
58
+
59
+ export function extractRpcPayload(raw: string, rpcid?: string): any {
60
+ const frames = decodeBatchExecute(raw);
61
+ const frame = rpcid != null ? frames.find((f) => f.rpcid === rpcid) : frames[0];
62
+ return frame?.payload;
63
+ }