imprint-mcp 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/README.md +193 -189
  2. package/examples/discoverandgo/README.md +1 -1
  3. package/examples/echo/README.md +1 -1
  4. package/examples/google-flights/README.md +28 -0
  5. package/examples/google-flights/_shared/batchexecute.ts +63 -0
  6. package/examples/google-flights/_shared/flights_request.ts +95 -0
  7. package/examples/google-flights/_shared/package.json +9 -0
  8. package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
  9. package/examples/google-flights/get_flight_booking_details/package.json +9 -0
  10. package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
  11. package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
  12. package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
  13. package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
  14. package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
  15. package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
  16. package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
  17. package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
  18. package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
  19. package/examples/google-flights/get_flight_calendar_prices/workflow.json +78 -0
  20. package/examples/google-flights/lookup_airport/index.ts +101 -0
  21. package/examples/google-flights/lookup_airport/package.json +9 -0
  22. package/examples/google-flights/lookup_airport/parser.ts +66 -0
  23. package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
  24. package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
  25. package/examples/google-flights/lookup_airport/workflow.json +57 -0
  26. package/examples/google-flights/search_flights/index.ts +219 -0
  27. package/examples/google-flights/search_flights/package.json +9 -0
  28. package/examples/google-flights/search_flights/parser.ts +169 -0
  29. package/examples/google-flights/search_flights/playbook.yaml +184 -0
  30. package/examples/google-flights/search_flights/request-transform.ts +119 -0
  31. package/examples/google-flights/search_flights/workflow.json +143 -0
  32. package/examples/google-hotels/README.md +29 -0
  33. package/examples/google-hotels/_shared/batchexecute.ts +73 -0
  34. package/examples/google-hotels/_shared/freq.ts +158 -0
  35. package/examples/google-hotels/_shared/package.json +9 -0
  36. package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
  37. package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
  38. package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
  39. package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
  40. package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
  41. package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
  42. package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
  43. package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
  44. package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
  45. package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
  46. package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
  47. package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
  48. package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
  49. package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
  50. package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
  51. package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
  52. package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
  53. package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
  54. package/examples/google-hotels/search_hotels/index.ts +207 -0
  55. package/examples/google-hotels/search_hotels/package.json +9 -0
  56. package/examples/google-hotels/search_hotels/parser.ts +260 -0
  57. package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
  58. package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
  59. package/examples/google-hotels/search_hotels/workflow.json +127 -0
  60. package/examples/southwest/README.md +3 -2
  61. package/examples/southwest/search_southwest_flights/index.ts +18 -1
  62. package/examples/southwest/search_southwest_flights/workflow.json +18 -1
  63. package/package.json +3 -2
  64. package/prompts/audit-agent.md +71 -0
  65. package/prompts/build-planning.md +74 -0
  66. package/prompts/compile-agent.md +131 -27
  67. package/prompts/prereq-builder.md +64 -0
  68. package/prompts/prereq-planner.md +34 -0
  69. package/prompts/tool-planning.md +39 -0
  70. package/src/cli.ts +116 -3
  71. package/src/imprint/agent.ts +5 -0
  72. package/src/imprint/audit.ts +996 -0
  73. package/src/imprint/backend-ladder.ts +1214 -184
  74. package/src/imprint/build-plan.ts +1051 -0
  75. package/src/imprint/cdp-browser-fetch.ts +592 -0
  76. package/src/imprint/cdp-jar-cache.ts +320 -0
  77. package/src/imprint/chromium.ts +414 -8
  78. package/src/imprint/claude-cli-compile.ts +125 -25
  79. package/src/imprint/codex-cli-compile.ts +26 -23
  80. package/src/imprint/compile-agent-types.ts +38 -0
  81. package/src/imprint/compile-agent.ts +63 -25
  82. package/src/imprint/compile-tools.ts +1666 -66
  83. package/src/imprint/compile.ts +13 -1
  84. package/src/imprint/concurrency.ts +87 -0
  85. package/src/imprint/cron.ts +4 -0
  86. package/src/imprint/doctor.ts +48 -3
  87. package/src/imprint/freeform-redact.ts +5 -4
  88. package/src/imprint/install.ts +79 -4
  89. package/src/imprint/integrations.ts +3 -3
  90. package/src/imprint/llm.ts +56 -8
  91. package/src/imprint/mcp-compile-server.ts +43 -10
  92. package/src/imprint/mcp-maintenance.ts +18 -102
  93. package/src/imprint/mcp-server.ts +73 -7
  94. package/src/imprint/multi-progress.ts +7 -2
  95. package/src/imprint/param-grounding.ts +367 -0
  96. package/src/imprint/paths.ts +29 -0
  97. package/src/imprint/playbook-runner.ts +101 -40
  98. package/src/imprint/prereq-builder.ts +651 -0
  99. package/src/imprint/probe-backends.ts +6 -3
  100. package/src/imprint/record.ts +10 -1
  101. package/src/imprint/redact.ts +30 -2
  102. package/src/imprint/replay-capture.ts +19 -18
  103. package/src/imprint/runtime.ts +19 -10
  104. package/src/imprint/session-diff.ts +79 -2
  105. package/src/imprint/session-merge.ts +9 -5
  106. package/src/imprint/stealth-chromium.ts +79 -0
  107. package/src/imprint/stealth-fetch.ts +309 -29
  108. package/src/imprint/stealth-token-cache.ts +88 -0
  109. package/src/imprint/teach-plan.ts +251 -0
  110. package/src/imprint/teach-state.ts +10 -0
  111. package/src/imprint/teach.ts +456 -142
  112. package/src/imprint/tool-candidates.ts +72 -14
  113. package/src/imprint/tool-plan.ts +313 -0
  114. package/src/imprint/tracing.ts +135 -6
  115. package/src/imprint/types.ts +61 -3
  116. package/examples/google-flights/search_google_flights/index.ts +0 -101
  117. package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
  118. package/examples/google-flights/search_google_flights/parser.ts +0 -189
  119. package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
  120. package/examples/google-flights/search_google_flights/workflow.json +0 -48
  121. package/examples/google-hotels/search_google_hotels/index.ts +0 -194
  122. package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
  123. package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
  124. package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
  125. package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
  126. package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
  127. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
  128. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
  129. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
  130. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
package/README.md CHANGED
@@ -1,311 +1,313 @@
1
- <h1 align="center">Imprint</h1>
1
+ <div align="center">
2
2
 
3
- <p align="center">
4
- <strong>Don't do anything twice. Teach your AI agent once, and it remembers forever.</strong>
5
- </p>
3
+ # imprint
6
4
 
7
- <p align="center">
8
- <a href="https://github.com/ashaychangwani/imprint/actions/workflows/test.yml"><img src="https://github.com/ashaychangwani/imprint/actions/workflows/test.yml/badge.svg" alt="Tests"></a>
9
- <img src="https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/ashaychangwani/cbd3134e06fb4fabf24aed94b251bdfd/raw/test-count.json" alt="Test count">
10
- <a href="https://github.com/ashaychangwani/imprint/releases"><img src="https://img.shields.io/github/v/release/ashaychangwani/imprint?label=release" alt="Release"></a>
11
- <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue" alt="MIT License"></a>
12
- <a href="https://github.com/ashaychangwani/imprint/stargazers"><img src="https://img.shields.io/github/stars/ashaychangwani/imprint?style=social" alt="GitHub Stars"></a>
13
- </p>
5
+ **Teach your AI agent any website. Once.**
14
6
 
15
- <br>
7
+ Record a real browser session, get a deterministic MCP tool back.\
8
+ No tokens burned on exploration. No "the LLM clicked the wrong button."\
9
+ The recording *is* the executable.
16
10
 
17
- ```bash
18
- bun install -g imprint-mcp
19
- imprint teach southwest --url https://www.southwest.com
20
- ```
11
+ [![Tests](https://github.com/ashaychangwani/imprint/actions/workflows/test.yml/badge.svg)](https://github.com/ashaychangwani/imprint/actions/workflows/test.yml)
12
+ ![Test count](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/ashaychangwani/cbd3134e06fb4fabf24aed94b251bdfd/raw/test-count.json)
13
+ [![Release](https://img.shields.io/github/v/release/ashaychangwani/imprint?label=release)](https://github.com/ashaychangwani/imprint/releases)
14
+ [![MIT License](https://img.shields.io/badge/license-MIT-blue)](LICENSE)
15
+ [![GitHub Stars](https://img.shields.io/github/stars/ashaychangwani/imprint?style=social)](https://github.com/ashaychangwani/imprint/stargazers)
21
16
 
22
- That's it. Imprint opens a browser, you drive the workflow, and it compiles a deterministic **MCP tool** your AI agent can call from then on. No tokens burned on exploration, no "the LLM clicked the wrong button" variance. The recording *is* the executable.
17
+ [Quick Start](#quick-start) · [Examples](#examples) · [How It Works](#how-it-works) · [Docs](docs/getting-started.md)
23
18
 
24
- <br>
19
+ </div>
25
20
 
26
- ## See it in action
21
+ ---
27
22
 
28
- After teaching, your agent has a tool called `search_namecheap_domains`. The compile-agent reverse-engineered the site's CRC32 URL signing scheme from a captured JavaScript bundle, chains five API endpoints, and merges availability + pricing + aftermarket data:
23
+ ## Quick Start
29
24
 
25
+ ```bash
26
+ bun install -g imprint-mcp
27
+ imprint teach southwest --url https://www.southwest.com
30
28
  ```
31
- $ claude "search for getimprint on Namecheap, under $20/yr renewal"
32
29
 
33
- getimprint.com taken registered 2008 GoDaddy.com, LLC
34
- getimprint.dev available $12.98/yr (19% off) renews $20.98/yr
35
- getimprint.org available $7.48/yr (42% off) renews $15.98/yr
36
- getimprint.fyi available $6.98/yr renews $9.68/yr
37
- getimprint.xyz available $2.00/yr (90% off) renews $19.48/yr
38
- ```
30
+ A browser opens. You drive the workflow and narrate what you're doing. Imprint records every request and interaction, then compiles a deterministic **MCP tool** your agent can call forever.
39
31
 
40
- Real-time domain availability with per-request URL signing the agent wrote the signing function itself by reading the site's JS bundle.
32
+ Want to try a finished MCP before recording anything?
41
33
 
42
- <br>
43
-
44
- ## How it works
34
+ ```bash
35
+ imprint install google-flights --source examples --platform claude-desktop
36
+ ```
45
37
 
46
- <table>
47
- <tr>
48
- <td width="33%">
38
+ That registers the checked-in Google Flights example as an MCP server in your client. Swap `claude-desktop` for `claude-code`, `codex`, `openclaw`, or `hermes`, or add `--print` to see the config without changing anything.
49
39
 
50
- ### 1. Teach
40
+ On a Hermes agent or Docker host, register the examples directly into Hermes:
51
41
 
52
42
  ```bash
53
- imprint teach mysite \
54
- --url https://example.com
43
+ for site in google-flights google-hotels southwest discoverandgo echo; do
44
+ imprint install "$site" --source examples --platform hermes --no-interactive
45
+ done
55
46
  ```
56
47
 
57
- A browser opens. You drive the workflow and narrate what you're doing. Imprint records every network request and DOM interaction.
48
+ When `HERMES_HOME` is set, Imprint writes Hermes MCP entries to `$HERMES_HOME/config.yaml`; outside Hermes it uses `~/.hermes/config.yaml`. For browser-backed MCPs, `imprint install` also installs Playwright Chromium into `$HERMES_HOME/.cache/ms-playwright` and writes `PLAYWRIGHT_BROWSERS_PATH` into the MCP config so Hermes can find it. Use `--skip-browser-install` only for offline builds where you preinstall the browser yourself. In a fresh Linux image that is missing browser system libraries, install those during image build with `bunx playwright install --with-deps chromium`.
58
49
 
59
- Raw recordings are stored locally under `~/.imprint/<site>/sessions/`, and each generated tool lives under `~/.imprint/<site>/<toolName>/` by default, outside the repo. The generated `index.ts` imports from `imprint/runtime` via a `node_modules/imprint` symlink that Imprint maintains automatically — created on `emit`, self-healed at runtime if a worktree moves or vanishes (so the next `imprint mcp-server`/`cron`/`probe-backends` repairs a stale link without re-emitting). The tracked `examples/` tree remains as source fixtures and demos.
50
+ ---
60
51
 
61
- </td>
62
- <td width="33%">
52
+ ## See It in Action
63
53
 
64
- ### 2. Compile
54
+ **Teach once.** `imprint teach google-flights` records one real search and compiles a **4-tool** MCP server from that single session — the compile agent reverse-engineers Google's `batchexecute` wire format itself and wires the search→booking token chain, with no hand-written request code. Here is the actual run (6 recordings → 4 tools, every tool live-verified):
65
55
 
66
- Imprint generates replay artifacts:
56
+ ![imprint teach google-flights — a real run: six recordings compiled into four live-verified MCP tools](web/public/imprint-teach.gif)
67
57
 
68
- - **`workflow.json`**API-level replay (fast, with named state captures)
69
- - **`playbook.yaml`** — DOM-level fallback (universal)
70
- - **`request-transform.ts`** — URL signing when the API requires per-call tokens (optional)
58
+ **Then your agent calls those tools** like any other real-time results through a live trusted-Chrome (`cdp-replay`) backend:
71
59
 
72
- Both artifacts are written into the generated tool directory (`~/.imprint/<site>/<toolName>/`). `compile-playbook` uses that nested location by default so cron and MCP discovery can see the fallback without a custom `--out`.
60
+ ```
61
+ $ claude "cheapest nonstop SJC→SAN the first week of July, with a carry-on"
73
62
 
74
- Credentials and PII are redacted automatically: credential values become `${credential.NAME}` placeholders, sensitive values become redaction markers that preserve equality within the artifact, and a supplemental free-form scan catches common emails, phone numbers, SSNs, payment cards, JWTs, API keys, private keys, database URLs, and webhook URLs before LLM compile.
63
+ Alaska AS1623 SJC→SAN 6:00a→7:32a nonstop $137
64
+ Southwest WN2412 SJC→SAN 8:15a→9:45a nonstop $158
65
+ Delta DL2901 SJC→SAN 7:10a→8:44a nonstop $169
66
+ ```
75
67
 
76
- </td>
77
- <td width="34%">
68
+ The suite was one-shot compiled from one recording and audited at **92.6%**, every tool live-verified. *(The terminal above is a faithful replay — regenerate/record it with `bun scripts/demo-teach.ts`.)*
78
69
 
79
- ### 3. Use
70
+ ---
80
71
 
81
- A typed MCP tool is generated and wired into your AI platform. Re-run `imprint install <site>` any time to add the same emitted MCP server to another platform, or remove it later with `imprint uninstall <site>`.
72
+ ## How It Works
82
73
 
74
+ <table>
75
+ <tr>
76
+ <td align="center" width="33%">
77
+ <h3>1. Teach</h3>
78
+ <p>Open a real browser, drive the workflow, narrate what you're doing. Imprint records every network request and DOM interaction.</p>
79
+ </td>
80
+ <td align="center" width="33%">
81
+ <h3>2. Compile</h3>
82
+ <p>Generates two replay artifacts:<br><br><code>workflow.json</code> — API-level replay<br><code>playbook.yaml</code> — DOM-level fallback<br><br>Credentials are redacted automatically.</p>
83
+ </td>
84
+ <td align="center" width="34%">
85
+ <h3>3. Use</h3>
86
+ <p>A typed MCP tool your agent calls like any other tool. Works with Claude Code, Codex, Claude Desktop, and any MCP client.</p>
83
87
  </td>
84
88
  </tr>
85
89
  </table>
86
90
 
87
- > All three steps happen in a single `imprint teach` command.
91
+ > [!TIP]
92
+ > All three steps happen in a single `imprint teach` command. Credentials and PII are redacted automatically before anything reaches the LLM.
88
93
 
89
- <br>
94
+ ---
90
95
 
91
96
  ## Why Imprint?
92
97
 
93
- Other browser-tool frameworks (browser-use, Computer Use) ask the LLM to **decide every click at runtime**.
98
+ Other browser-tool frameworks ask the LLM to **decide every click at runtime**. Imprint takes a fundamentally different approach:
94
99
 
95
- | | Imprint | browser-use / Computer Use |
96
- |---|---|---|
97
- | **How it works** | Record once, replay deterministically | LLM decides every click at runtime |
100
+ | | **Imprint** | **browser-use / Computer Use** |
101
+ |:--|:--|:--|
102
+ | **Approach** | Record once, replay deterministically | LLM decides every click at runtime |
98
103
  | **Token cost** | Zero at runtime | Scales with workflow complexity |
99
104
  | **Reliability** | Deterministic — same input, same output | Variable — exploration can diverge |
100
105
  | **Bot detection** | Real Chromium + stealth-fetch | Detectable automation fingerprint |
101
- | **When it breaks** | Automatic fallback via backend ladder | No fallback |
102
- | **Time to result** | 200ms – 9s | 30s+ |
106
+ | **Fallback** | Automatic ladder (API DOM) | None |
107
+ | **Speed** | 200ms – 9s | 30s+ |
103
108
 
104
- <br>
109
+ ---
105
110
 
106
- ## Install
111
+ ## Installation
107
112
 
108
- ### npm (requires [Bun](https://bun.sh) >= 1.3)
113
+ ### Recommended
109
114
 
110
115
  ```bash
111
116
  bun install -g imprint-mcp
112
117
  ```
113
118
 
114
- Or run without installing: `bunx imprint-mcp teach southwest --url https://www.southwest.com`
119
+ > Requires [Bun](https://bun.sh) >= 1.3. Or run without installing: `bunx imprint-mcp teach <site> --url <url>`
115
120
 
116
- ### Standalone binary (no Bun needed)
121
+ ### Standalone Binary
117
122
 
118
123
  ```bash
119
124
  curl -fsSL https://raw.githubusercontent.com/ashaychangwani/imprint/main/scripts/install.sh | bash
120
125
  ```
121
126
 
122
- The standalone binary supports `mcp-server`, `install`, `cron`, and `credential` commands.
123
- Browser commands (`teach`, `record`, `login`, `playbook`) require a full Bun + Playwright install.
124
-
125
- ### From source
127
+ ### From Source
126
128
 
127
129
  ```bash
128
130
  git clone https://github.com/ashaychangwani/imprint.git && cd imprint
129
131
  bun install && bun link
130
132
  ```
131
133
 
132
- ### Browser commands
134
+ <details>
135
+ <summary><strong>Browser setup & LLM providers</strong></summary>
133
136
 
134
- The `teach`, `record`, `login`, and `playbook` commands need Playwright's Chromium. Install it once:
137
+ <br>
138
+
139
+ **Browser commands** (`teach`, `record`, `login`, `playbook`) and browser-backed `imprint install` targets auto-install Playwright Chromium when it is missing. For offline CI or prebuilt Linux images where you pass `--skip-browser-install`, preinstall it ahead of time:
135
140
 
136
141
  ```bash
137
142
  bunx playwright install chromium
138
143
  ```
139
144
 
140
- ### LLM providers
141
-
142
- Imprint detects LLM providers from what's already on your system:
145
+ **LLM providers** are auto-detected. Run `imprint doctor` to see what's available.
143
146
 
144
- | Priority | Provider | Triggered by |
145
- |---|---|---|
146
- | 1 | `claude-cli` | `claude` on PATH (Claude Code subscription) |
147
- | 2 | `codex-cli` | `codex` on PATH (Codex subscription) |
148
- | 3 | `anthropic-api` | `ANTHROPIC_API_KEY` env var |
149
- | 4 | `cursor-cli` | `cursor` on PATH (generic prompt/playbook compile only; not `teach`/`generate`) |
147
+ | Priority | Provider | Detected via |
148
+ |:--|:--|:--|
149
+ | 1 | Claude Code | `claude` on PATH |
150
+ | 2 | Codex CLI | `codex` on PATH |
151
+ | 3 | Anthropic API | `ANTHROPIC_API_KEY` env var |
152
+ | 4 | Cursor | `cursor` on PATH |
150
153
 
151
- ```bash
152
- imprint doctor
153
- ```
154
-
155
- Shows which providers are detected. Interactive `imprint teach` prompts you to choose when multiple compatible compile providers are available, and also lists undetected providers as setup-help entries. Pick one of those help entries to see exactly which CLI or environment variable to add so it will be detected next time.
154
+ Override with `--provider <name>` and `--model <name>`.
156
155
 
157
- To force a specific provider and skip the picker, pass `--provider <name>` to `teach`, `generate`, or `compile-playbook`. `teach` and `generate` require a compile-agent provider (`claude-cli`, `codex-cli`, or `anthropic-api`); `compile-playbook` can also use `cursor-cli`.
156
+ </details>
158
157
 
159
- After selecting a provider, `teach` prompts for a **model** (e.g. `claude-opus-4-7` vs `claude-sonnet-4-6` for Anthropic, `gpt-5.4` vs `o3` for Codex). Override with `--model <name>`. Each tool compiles with a **20-minute timeout** by default — the compile agent writes the MCP server and runs thorough verification tests, so most complex tools take 10-15 minutes. Override with `--timeout <duration>` (e.g. `--timeout 30m`, `--timeout 1h`). To persist the generated tests after compilation, set `IMPRINT_KEEP_TEST=1` or pass `--keep-test`. To skip the replay-and-diff stage (the automated second pass that classifies ephemeral vs constant values), pass `--skip-replay` — faster, but may reduce workflow accuracy for sites with dynamic request parameters.
160
-
161
- <br>
158
+ ---
162
159
 
163
- ## Local compile tracing
160
+ ## The Backend Ladder
164
161
 
165
- Slow or suspicious compiles can be inspected in a local [Phoenix](https://arize.com/docs/phoenix/self-hosting/deployment-options/terminal) trace UI.
162
+ When an API call gets blocked, Imprint doesn't jump to DOM replay. It escalates through the cheapest backend that works:
166
163
 
167
- ```bash
168
- # one-time install with uv
169
- uv tool install arize-phoenix
170
- phoenix serve
171
-
172
- # in another terminal
173
- IMPRINT_TRACE=1 \
174
- IMPRINT_TRACE_BATCH=false \
175
- IMPRINT_TRACE_LLM_IO=1 \
176
- IMPRINT_TRACE_TOOL_IO=1 \
177
- PHOENIX_COLLECTOR_ENDPOINT=http://localhost:6006 \
178
- imprint teach namecheap-domains --from-session ~/.imprint/namecheap-domains/sessions/<ts>.json --provider codex-cli
164
+ ```
165
+ fetch ~200ms Plain APIs, persisted cookies
166
+
167
+
168
+ fetch-bootstrap browser Mints cookies, CSRF tokens, storage
169
+ │ + API
170
+
171
+ cdp-replay ~2-35s API calls run inside a live, trusted Chrome —
172
+ │ a protected POST refreshes its anti-bot token
173
+ │ between calls (multi-step state-changing flows)
174
+
175
+ stealth-fetch ~1-12s Defeats Akamai, Cloudflare, DataDome
176
+
177
+
178
+ playbook ~9s Full DOM replay — universal fallback
179
179
  ```
180
180
 
181
- Traces show the full compile pipeline at every level of detail: each `agent.turn.N` span captures per-turn token counts; each `llm.message_with_tools` span records model, provider, input/output tokens, and stop reason; each `agent.tool.X` span times individual tool dispatches. Drill from `cli.teach` `compile.generate``agent.turn.1`tool calls to find exactly which turn or tool is spending tokens. Set `IMPRINT_TRACE_IO_MAX_CHARS` to raise or lower captured payload size. Set `IMPRINT_TRACE_INPUT_USD_PER_1M` and `IMPRINT_TRACE_OUTPUT_USD_PER_1M` to add estimated cost attributes.
181
+ The full order is `fetch fetch-bootstrapcdp-replaystealth-fetchplaybook`; `auto` mode walks it and stops at the first backend that works.
182
182
 
183
- <br>
183
+ Every recording compiles to *both* `workflow.json` and `playbook.yaml`, so the ladder always has a DOM fallback.
184
184
 
185
- ## Platform support
185
+ ---
186
186
 
187
- At the end of `imprint teach`, you pick your AI platform and Imprint handles the wiring:
187
+ ## Platform Support
188
+
189
+ At the end of `imprint teach`, pick your AI platform and Imprint wires it up:
188
190
 
189
191
  | Platform | Integration |
190
- |---|---|
191
- | **Claude Code** | Automatic — runs `claude mcp add` for you |
192
- | **Codex CLI** | Automatic — runs `codex mcp add` for you |
192
+ |:--|:--|
193
+ | **Claude Code** | Automatic — runs `claude mcp add` |
194
+ | **Codex CLI** | Automatic — runs `codex mcp add` |
193
195
  | **Claude Desktop** | Paste-ready JSON config |
194
196
  | **OpenClaw** | MCP config + SKILL.md export |
195
197
  | **Hermes** | MCP config + SKILL.md + cron mapping |
196
198
 
197
- Each site registers as its own MCP server (`imprint-southwest`, `imprint-discoverandgo`, ...) so tools never collide. See [Integrations](docs/integrations.md) for HTTP transport, Docker, and systemd options.
199
+ Each site registers as its own MCP server (`imprint-southwest`, `imprint-google-flights`, ...) so tools never collide.
198
200
 
199
- Audit or clean up those registrations with `imprint mcp`:
201
+ ---
200
202
 
201
- ```bash
202
- imprint mcp status # registrations + local teach state
203
- imprint mcp # interactive cleanup TUI
204
- imprint mcp disable imprint-mysite --yes # reversible; stores a local snapshot
205
- imprint mcp delete imprint-mysite --yes # removes external MCP registrations only
206
- ```
203
+ ## Examples
207
204
 
208
- Raw recordings under `~/.imprint/<site>/sessions/` may contain sensitive browser state. Cleanup commands leave them alone unless you explicitly choose `--local site`.
205
+ Every example below was **one-shot compiled from a single real browser-session recording** (`imprint teach`) the generated artifacts are committed verbatim as a **proof of concept** of what the compiler produces, not as maintained integrations. Recording-derived defaults (dates, geo) age out; pass explicit values.
209
206
 
210
- See [MCP Maintenance](docs/mcp-maintenance.md) for status classifications, supported client config files, reversible disable behavior, and local artifact cleanup rules.
207
+ **★ Star examples** multi-tool suites, each compiled from one recording and scored by the headless differential audit:
211
208
 
212
- <br>
209
+ | Example | Tools | Audit | What it shows |
210
+ |:--|:--|:--|:--|
211
+ | [**google-flights**](examples/google-flights) | 4 | 92.6% | `batchexecute` wire-format decode + search→booking producer-token chain, live `cdp-replay` |
212
+ | [**google-hotels**](examples/google-hotels) | 4 | 91.7% | autocomplete → search → reviews/booking producer-token chaining |
213
213
 
214
- ## Sharing skills across machines
214
+ Other examples:
215
215
 
216
- Teach on your laptop, ship the skill to a remote agent (OpenClaw, Hermes, a server-side cron host, ...). Skill folders committed to git contain **zero plaintext credentials** — only placeholders like `${credential.NAME}` / `${state.NAME}` and a `credentials.manifest.json` listing the secrets or durable storage keys the receiver must provision.
216
+ | Example | Description |
217
+ |:--|:--|
218
+ | [**southwest**](examples/southwest) | Live fare search — defeats Akamai bot detection |
219
+ | [**discoverandgo**](examples/discoverandgo) | Authenticated booking via per-site credential store |
220
+ | [**echo**](examples/echo) | MCP smoke-test fixture |
217
221
 
218
- For credentials, use the **encrypted bundle** flow when you can't (or don't want to) re-type passwords on the receiving machine:
222
+ Install any example into your MCP client:
219
223
 
220
224
  ```bash
221
- # On the laptop where you taught the skill:
222
- imprint credential export southwest --out southwest.imprintbundle
223
- # → prompts for a passphrase. The bundle is libsodium-encrypted with an
224
- # argon2id-derived key. Safe to send via Slack, email, scp, S3, etc.
225
-
226
- # On the OpenClaw machine (or any other receiver):
227
- imprint credential import southwest southwest.imprintbundle
228
- # → prompts for the same passphrase. Decrypts; secrets land in the OS keychain.
225
+ imprint install google-flights --source examples --platform claude-desktop
229
226
  ```
230
227
 
231
- Pass the passphrase **out-of-band** (Signal, phone, password manager share *not* the same channel as the bundle file).
232
-
233
- After import, the same `imprint mcp-server <site>` config you'd use locally works on the receiver — it resolves credentials from that machine's credential backend and initializes a fresh cookie/state jar for every tool call. If anything's missing, `imprint mcp-server` and `imprint cron` log/fail with the exact `imprint credential set`, `imprint login`, or `imprint credential import` commands you need.
228
+ Examples are real generated MCPs, not handwritten SDK samples. `imprint install <site> --source examples` points the MCP server at this repo's `examples/` directory with `IMPRINT_HOME`, ensures Playwright Chromium for browser-backed tools, and lets your client list and call the checked-in tools immediately:
234
229
 
235
- See [Sharing Skills](docs/credential-sharing.md) for the full flow including interactive `imprint credential set` (when you can re-type), threat model, rotation, and OpenClaw / Hermes wiring details.
236
-
237
- <br>
230
+ ```bash
231
+ imprint install google-hotels --source examples --platform codex
232
+ imprint install southwest --source examples --platform claude-code
233
+ imprint install echo --source examples --platform claude-desktop --print
234
+ ```
238
235
 
239
- ## The backend ladder
236
+ For your own generated tools, leave off `--source examples`:
240
237
 
241
- When an API call gets blocked or needs browser-minted state, Imprint doesn't jump straight to DOM replay. It escalates through the cheapest mode that can satisfy the workflow:
238
+ ```bash
239
+ imprint install mysite --platform claude-code
240
+ imprint install mysite --platform codex
241
+ ```
242
242
 
243
- | | Speed | Handles |
244
- |---|---|---|
245
- | **fetch** | ~200ms | Plain APIs, persisted cookies, in-flight HTTP captures |
246
- | **fetch-bootstrap** | browser bootstrap + API replay | Pages that only need Chromium to mint cookies, CSRF, storage, or DOM-derived state |
247
- | **stealth-fetch** | ~12s first call, ~1s after | Akamai, Cloudflare, DataDome, bot-defense state |
248
- | **playbook** | ~9s | Anything — full DOM replay as fallback |
243
+ ---
249
244
 
250
- `fetch-bootstrap` is not a default rung for every workflow. `auto` inserts it only when the workflow declares bootstrap metadata, a capture requires browser/stealth bootstrap, or `fetch` returns structured `STATE_MISSING` that a browser bootstrap can satisfy. Every recording still compiles to *both* `workflow.json` and `playbook.yaml`, so the ladder has a DOM fallback when API replay cannot work.
245
+ ## CLI Reference
251
246
 
252
- State-aware workflows use named captures and `${state.NAME}` placeholders. For example, request A can set a CSRF cookie, request B can project it into a header, and the whole run stays on plain `fetch` without launching Chromium.
247
+ ```bash
248
+ imprint --help # all commands
249
+ imprint <command> --help # per-command options
250
+ ```
253
251
 
254
- <br>
252
+ | Category | Commands |
253
+ |:--|:--|
254
+ | **Pipeline** | `teach` · `record` · `redact` · `generate` · `compile-playbook` · `emit` |
255
+ | **Runtime** | `cron` · `mcp-server` · `playbook` · `probe-backends` · `audit` |
256
+ | **Credentials** | `credential set` · `credential list` · `credential export` · `credential import` · `credential migrate` |
257
+ | **Utilities** | `mcp` · `login` · `assemble` · `check` · `doctor` · `install` · `uninstall` |
255
258
 
256
- ## Examples
259
+ ---
257
260
 
258
- The checked-in `examples/` directory contains committed fixtures and demos. Generated tools from `imprint teach` go into `~/.imprint/<site>/<toolName>/` by default (configurable via `IMPRINT_HOME`). Runtime discovery (cron, MCP, probe-backends) reads `IMPRINT_HOME`, so to run the checked-in examples, point it at the repo's `examples/` directory:
261
+ ## Sharing Skills
259
262
 
260
- You can also install an example directly into an MCP client:
263
+ Teach on your laptop, ship to a remote agent. Generated MCP folders contain the portable tool artifacts: `workflow.json`, `playbook.yaml`, `index.ts`, optional shared modules, and cron/backend metadata. Copy `~/.imprint/<site>` into the receiver's `~/.imprint/<site>` or commit it to a private repo, install Imprint there, then register it:
261
264
 
262
265
  ```bash
263
- imprint install google-flights --source examples --platform claude-desktop
266
+ bun install -g imprint-mcp
267
+ imprint install mysite --platform claude-code
264
268
  ```
265
269
 
266
- Run `imprint install` with no arguments for an interactive install/uninstall picker. It only shows detected AI platforms; uninstall lists installed `imprint-*` MCP servers directly. For GUI config-file clients such as Claude Desktop, install writes an absolute Bun + Imprint CLI path so the app does not depend on your shell PATH.
270
+ Credentials stay separate. Skill folders contain **zero plaintext credentials** only `${credential.NAME}` placeholders and a manifest listing what the receiver must provision.
267
271
 
268
- | Example | What it demonstrates | Run it |
269
- |---|---|---|
270
- | [**southwest**](examples/southwest) | Live fare watcher, defeats Akamai bot detection, price-drop notifications | `IMPRINT_HOME=examples imprint cron southwest --once` |
271
- | [**google-flights**](examples/google-flights) | Real-time flight search across all carriers, parses Google's raw protobuf response | `IMPRINT_HOME=examples imprint mcp-server google-flights` |
272
- | [**google-hotels**](examples/google-hotels) | Hotel search with star rating, guest scores, nightly + total prices | `IMPRINT_HOME=examples imprint mcp-server google-hotels` |
273
- | [**discoverandgo**](examples/discoverandgo) | Authenticated booking via per-site credential store | `IMPRINT_HOME=examples imprint cron discoverandgo --once` |
274
- | [**namecheap-domains**](examples/namecheap-domains) | Domain search with CRC32 URL signing reverse-engineered from JS, 5-endpoint chain with availability + aftermarket pricing | `IMPRINT_HOME=examples imprint mcp-server namecheap-domains` |
275
- | [**echo**](examples/echo) | MCP smoke-test fixture (no network, no LLM) | `IMPRINT_HOME=examples imprint mcp-server echo` |
272
+ ```bash
273
+ # Export (encrypted with libsodium + argon2id)
274
+ imprint credential export southwest --out southwest.imprintbundle
276
275
 
277
- <br>
276
+ # Import on another machine
277
+ imprint credential import southwest southwest.imprintbundle
278
+ ```
278
279
 
279
- ## CLI reference
280
+ Send the bundle over any channel. Pass the passphrase **out-of-band**.
280
281
 
281
- ```
282
- imprint --help # all commands
283
- imprint <command> --help # per-command options
284
- ```
282
+ See [Sharing Skills](docs/credential-sharing.md) for the full flow.
285
283
 
286
- | | Commands |
287
- |---|---|
288
- | **Pipeline** | `teach` · `record` · `redact` · `generate` · `compile-playbook` · `emit` |
289
- | **Runtime** | `cron` · `mcp-server` · `playbook` · `probe-backends` |
290
- | **Credentials** | `credential set` · `credential list` · `credential export` · `credential import` · `credential migrate` |
291
- | **Utilities** | `mcp` · `login` · `assemble` · `check` · `doctor` |
284
+ ---
292
285
 
293
- `teach`, `generate`, and `compile-playbook` accept `--provider <name>` to override the auto-detected LLM (see [Install](#install) for valid names and compile-agent support). `teach` and `generate` also take `--keep-test` to retain the agent-written `parser.test.ts` for debugging — it's deleted by default since it reads the gitignored redacted session via `$IMPRINT_SESSION_PATH` and isn't reproducible elsewhere. For multi-tool sites, use `imprint cron <site> --tool <toolName>` and `imprint probe-backends <site> --tool <toolName>` unless `--config` or `--out` points inside the target tool directory.
286
+ ## Documentation
294
287
 
295
- <br>
288
+ | | |
289
+ |:--|:--|
290
+ | [Getting Started](docs/getting-started.md) | Full walkthrough |
291
+ | [Architecture](docs/architecture.md) | Data flow and module map |
292
+ | [Integrations](docs/integrations.md) | Per-platform setup |
293
+ | [Security](docs/security.md) | Redaction, credential handling, what gets stored |
294
+ | [Sharing Skills](docs/credential-sharing.md) | Credential export/import and remote provisioning |
295
+ | [MCP Maintenance](docs/mcp-maintenance.md) | Audit, disable, restore, and prune MCP state |
296
+ | [Troubleshooting](docs/troubleshooting.md) | Common failures and fixes |
297
+ | [Tracing](docs/tracing.md) | OpenTelemetry tracing, cost rollup, and Phoenix setup |
296
298
 
297
- ## Docs
299
+ <details>
300
+ <summary>More docs</summary>
298
301
 
299
- - [Getting Started](docs/getting-started.md) — full walkthrough
300
- - [Integrations](docs/integrations.md) — per-platform setup
301
- - [MCP Maintenance](docs/mcp-maintenance.md) — audit, disable, restore, and prune Imprint MCP state
302
- - [Sharing Skills](docs/credential-sharing.md) — laptop OpenClaw / Hermes / remote-agent provisioning
303
- - [Architecture](docs/architecture.md) — data flow and module map
304
- - [Security](docs/security.md) — redaction, credential handling, what gets stored
305
- - [Troubleshooting](docs/troubleshooting.md) — common failures and fixes
306
- - [Decisions](docs/decisions.md) · [Glossary](docs/glossary.md) · [Capture Protocol](docs/capture-protocol.md) · [Playbook Debugging](docs/playbook-debugging.md) · [Notifications](docs/notifications.md)
302
+ - [Decisions](docs/decisions.md) — design rationale
303
+ - [Glossary](docs/glossary.md) — terms and concepts
304
+ - [Capture Protocol](docs/capture-protocol.md) — CDP recording details
305
+ - [Playbook Debugging](docs/playbook-debugging.md) — DOM replay debugging
306
+ - [Notifications](docs/notifications.md) — alert setup
307
307
 
308
- <br>
308
+ </details>
309
+
310
+ ---
309
311
 
310
312
  ## Contributing
311
313
 
@@ -315,8 +317,10 @@ Good first contributions: replay backends, notification predicates, auth extract
315
317
 
316
318
  See [CONTRIBUTING.md](CONTRIBUTING.md) for full guidelines.
317
319
 
318
- <br>
320
+ ---
321
+
322
+ <div align="center">
319
323
 
320
- ## License
324
+ **[MIT License](LICENSE)**
321
325
 
322
- [MIT](LICENSE)
326
+ </div>
@@ -49,7 +49,7 @@ imprint cron discoverandgo --once
49
49
  ## Notes
50
50
 
51
51
  - Discover & Go's auth model is patron-ID + session cookies. The session cookie expires; re-run `imprint login` if you start seeing AUTH_EXPIRED.
52
- - The workflow chains an `epass_server.php?method=Login` followed by the booking call. Generated `workflow.json` references both via the `${response[N].patronID}` extraction syntax.
52
+ - `imprint login` parses the `patronID` out of the recorded `epass_server.php?method=Login` POST and stores it in the credential store as `patron_id`. The booking `workflow.json` then references it via `${credential.patron_id}` no Login call is replayed at runtime.
53
53
 
54
54
  ## Not in this demo
55
55
 
@@ -11,7 +11,7 @@
11
11
 
12
12
  ```bash
13
13
  # Inspect via mcp-inspector (recommended for debugging)
14
- npx @modelcontextprotocol/inspector imprint mcp-server --site echo
14
+ npx @modelcontextprotocol/inspector imprint mcp-server echo
15
15
 
16
16
  # Or run the included client smoke test
17
17
  bun scripts/mcp-client-test.ts
@@ -0,0 +1,28 @@
1
+ # Google Flights — `imprint-google-flights`
2
+
3
+ > **One-shot compiled, proof of concept.** Every file in this directory was generated by a single `imprint teach google-flights` run against **one** recorded browser session — no hand-written request code, parsers, or selectors. It is committed here as a proof of concept of what the compiler produces, not as a maintained integration.
4
+
5
+ A 4-tool MCP server for Google Flights, compiled from a recording of a normal flight search. Headless-claude differential audit: **92.6%** — every tool `liveVerified=true`.
6
+
7
+ ## Tools
8
+
9
+ | Tool | What it does | Notes |
10
+ |---|---|---|
11
+ | `lookup_airport` | Resolve a city/airport query to IATA codes | |
12
+ | `search_flights` | Search itineraries (origin, destination, dates, trip type, stops, price, times, duration, bags) | the star tool |
13
+ | `get_flight_booking_details` | Fare/booking detail for a selected itinerary | **consumes** a `flight_token` produced by `search_flights` (producer → consumer chain) |
14
+ | `get_flight_calendar_prices` | Lowest price per day across a date window | |
15
+
16
+ ## How it was compiled
17
+
18
+ - **Protocol**: Google's `/_/FlightsFrontendUi` **`batchexecute`** endpoint returns a nested-array (protobuf-ish) payload. The compiler reverse-engineered the encoding into `_shared/batchexecute.ts` (shared decoder) + per-tool `parser.ts`, and the `f.req` request shape into `_shared/flights_request.ts` + per-tool `request-transform.ts`.
19
+ - **Anti-bot**: the per-page `f.sid` / `bl` tokens are bootstrapped at runtime (`${state.f_sid}` placeholders), and calls run on the **cdp-replay** rung (requests issued inside a live, trusted Chrome) with a **stealth-fetch** fallback.
20
+ - **Artifacts per tool**: `workflow.json` (API replay), `playbook.yaml` (DOM fallback), `index.ts` (MCP tool), `parser.ts` + `request-transform.ts` (codecs).
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ imprint install google-flights --source examples --platform claude-desktop
26
+ ```
27
+
28
+ *Recording-derived defaults (dates) age out — pass explicit values. See the repo [README](../../README.md) and [docs](../../docs/architecture.md).*
@@ -0,0 +1,63 @@
1
+ // Decode Google's batchexecute streaming envelope used by every FlightsFrontendUi RPC.
2
+ //
3
+ // Wire format (verified against recorded seq 69/97/111/667):
4
+ // )]}'\n\n <- anti-XSSI magic prefix
5
+ // <decimal length>\n <- length line (jsonChars + 2; counts bounding newlines)
6
+ // [[...rows...]]\n <- one chunk = single-line JSON array of rows
7
+ // ...repeats...
8
+ //
9
+ // We DELIBERATELY do not slice by the length lines: the stated length is
10
+ // `jsonChars + 2`, so naive slice(pos, pos+len) overshoots into the next token.
11
+ // Because every chunk is single-line JSON (all interior newlines are escaped as
12
+ // \n), splitting on "\n" is exact and robust. Each real RPC result is a row
13
+ // ["wrb.fr", <rpcid|null>, "<doubly-encoded JSON payload>", ...]; row[2] must be
14
+ // JSON.parse'd a SECOND time. Sidecar rows ("di", "af.httprm", "e") are ignored.
15
+
16
+ export function decodeBatchExecute(raw: string): Array<{ rpcid: string | null; payload: any }> {
17
+ let text = raw;
18
+ if (text.startsWith(")]}'")) {
19
+ text = text.slice(4);
20
+ }
21
+
22
+ const out: Array<{ rpcid: string | null; payload: any }> = [];
23
+ const lines = text.split('\n');
24
+
25
+ for (const line of lines) {
26
+ const trimmed = line.trim();
27
+ if (trimmed.length === 0) continue;
28
+ // Skip the decimal length marker lines.
29
+ if (/^\d+$/.test(trimmed)) continue;
30
+ // Chunks are JSON arrays; anything else is noise / partial.
31
+ if (trimmed[0] !== '[') continue;
32
+
33
+ let chunk: any;
34
+ try {
35
+ chunk = JSON.parse(trimmed);
36
+ } catch {
37
+ // Tolerate truncated / partial trailing lines.
38
+ continue;
39
+ }
40
+ if (!Array.isArray(chunk)) continue;
41
+
42
+ for (const row of chunk) {
43
+ if (!Array.isArray(row) || row[0] !== 'wrb.fr') continue;
44
+ const rpcid = typeof row[1] === 'string' ? row[1] : null;
45
+ if (typeof row[2] !== 'string') continue;
46
+ let payload: any;
47
+ try {
48
+ payload = JSON.parse(row[2]);
49
+ } catch {
50
+ continue;
51
+ }
52
+ out.push({ rpcid, payload });
53
+ }
54
+ }
55
+
56
+ return out;
57
+ }
58
+
59
+ export function extractRpcPayload(raw: string, rpcid?: string): any {
60
+ const frames = decodeBatchExecute(raw);
61
+ const frame = rpcid != null ? frames.find((f) => f.rpcid === rpcid) : frames[0];
62
+ return frame?.payload;
63
+ }