mercury-agent 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +438 -0
  3. package/container/Dockerfile +127 -0
  4. package/container/Dockerfile.base +109 -0
  5. package/container/Dockerfile.power +17 -0
  6. package/container/agent-package.json +8 -0
  7. package/container/build.sh +54 -0
  8. package/docs/TODOS.md +147 -0
  9. package/docs/auth/dashboard.md +28 -0
  10. package/docs/auth/overview.md +109 -0
  11. package/docs/auth/whatsapp.md +173 -0
  12. package/docs/configuration.md +54 -0
  13. package/docs/container-lifecycle.md +349 -0
  14. package/docs/context-architecture.md +87 -0
  15. package/docs/deployment.md +199 -0
  16. package/docs/extensions.md +375 -0
  17. package/docs/graceful-shutdown.md +62 -0
  18. package/docs/kb-distillation.md +77 -0
  19. package/docs/media/overview.md +140 -0
  20. package/docs/media/whatsapp.md +171 -0
  21. package/docs/memory.md +137 -0
  22. package/docs/permissions.md +217 -0
  23. package/docs/pipeline.md +228 -0
  24. package/docs/prd-chat-memory.md +76 -0
  25. package/docs/prd-config-load.md +82 -0
  26. package/docs/rate-limiting.md +166 -0
  27. package/docs/scheduler.md +288 -0
  28. package/docs/setup-discord.md +100 -0
  29. package/docs/setup-slack.md +119 -0
  30. package/docs/setup-whatsapp.md +94 -0
  31. package/docs/subagents.md +166 -0
  32. package/docs/web-search.md +62 -0
  33. package/examples/extensions/README.md +12 -0
  34. package/examples/extensions/charts/index.ts +13 -0
  35. package/examples/extensions/charts/skill/SKILL.md +98 -0
  36. package/examples/extensions/gws/README.md +52 -0
  37. package/examples/extensions/gws/index.ts +106 -0
  38. package/examples/extensions/gws/skill/SKILL.md +57 -0
  39. package/examples/extensions/gws/skill/references/calendar.md +101 -0
  40. package/examples/extensions/gws/skill/references/docs.md +65 -0
  41. package/examples/extensions/gws/skill/references/drive.md +79 -0
  42. package/examples/extensions/gws/skill/references/gmail.md +85 -0
  43. package/examples/extensions/gws/skill/references/sheets.md +60 -0
  44. package/examples/extensions/napkin/index.ts +821 -0
  45. package/examples/extensions/napkin/prompts/consolidation-monthly.md +73 -0
  46. package/examples/extensions/napkin/prompts/consolidation-weekly.md +67 -0
  47. package/examples/extensions/napkin/prompts/kb-distillation.md +176 -0
  48. package/examples/extensions/napkin/skill/SKILL.md +728 -0
  49. package/examples/extensions/pdf/index.ts +23 -0
  50. package/examples/extensions/pdf/skill/LICENSE.txt +30 -0
  51. package/examples/extensions/pdf/skill/SKILL.md +314 -0
  52. package/examples/extensions/pdf/skill/forms.md +294 -0
  53. package/examples/extensions/pdf/skill/reference.md +612 -0
  54. package/examples/extensions/pdf/skill/scripts/check_bounding_boxes.py +65 -0
  55. package/examples/extensions/pdf/skill/scripts/check_fillable_fields.py +11 -0
  56. package/examples/extensions/pdf/skill/scripts/convert_pdf_to_images.py +33 -0
  57. package/examples/extensions/pdf/skill/scripts/create_validation_image.py +37 -0
  58. package/examples/extensions/pdf/skill/scripts/extract_form_field_info.py +122 -0
  59. package/examples/extensions/pdf/skill/scripts/extract_form_structure.py +115 -0
  60. package/examples/extensions/pdf/skill/scripts/fill_fillable_fields.py +98 -0
  61. package/examples/extensions/pdf/skill/scripts/fill_pdf_form_with_annotations.py +107 -0
  62. package/examples/extensions/permission-guard/index.ts +65 -0
  63. package/examples/extensions/pinchtab/index.ts +199 -0
  64. package/examples/extensions/pinchtab/lib/session-injector.ts +144 -0
  65. package/examples/extensions/pinchtab/skill/SKILL.md +224 -0
  66. package/examples/extensions/pinchtab/skill/TRUST.md +69 -0
  67. package/examples/extensions/pinchtab/skill/references/api.md +297 -0
  68. package/examples/extensions/pinchtab/skill/references/env.md +45 -0
  69. package/examples/extensions/pinchtab/skill/references/profiles.md +107 -0
  70. package/examples/extensions/tradestation/host/refresh.ts +102 -0
  71. package/examples/extensions/tradestation/index.ts +153 -0
  72. package/examples/extensions/tradestation/skill/SKILL.md +67 -0
  73. package/examples/extensions/tradestation/skill/scripts/ts-cli.ts +111 -0
  74. package/examples/extensions/voice-synth/index.ts +94 -0
  75. package/examples/extensions/voice-synth/skill/SKILL.md +38 -0
  76. package/examples/extensions/voice-transcribe/index.ts +381 -0
  77. package/examples/extensions/voice-transcribe/requirements.txt +8 -0
  78. package/examples/extensions/voice-transcribe/scripts/transcribe.py +179 -0
  79. package/examples/extensions/voice-transcribe/skill/SKILL.md +53 -0
  80. package/examples/extensions/web-search/index.ts +22 -0
  81. package/examples/extensions/web-search/skill/SKILL.md +114 -0
  82. package/examples/extensions/web-search/skill/references/apartments.md +178 -0
  83. package/examples/extensions/web-search/skill/references/car-purchase.md +132 -0
  84. package/examples/extensions/web-search/skill/references/car-rental.md +113 -0
  85. package/examples/extensions/web-search/skill/references/flights.md +133 -0
  86. package/examples/extensions/web-search/skill/references/hotels.md +148 -0
  87. package/examples/extensions/yahoo-mail/cli/bun.lock +66 -0
  88. package/examples/extensions/yahoo-mail/cli/package.json +13 -0
  89. package/examples/extensions/yahoo-mail/cli/ymail.mjs +353 -0
  90. package/examples/extensions/yahoo-mail/index.ts +57 -0
  91. package/examples/extensions/yahoo-mail/skill/SKILL.md +78 -0
  92. package/package.json +106 -0
  93. package/resources/agents/explore.md +50 -0
  94. package/resources/agents/worker.md +24 -0
  95. package/resources/builtin-extensions.txt +3 -0
  96. package/resources/connection-env-vars.json +25 -0
  97. package/resources/extensions/.gitkeep +0 -0
  98. package/resources/pi-extensions/subagent/agents.ts +126 -0
  99. package/resources/pi-extensions/subagent/index.ts +964 -0
  100. package/resources/profiles/coding/AGENTS.md +43 -0
  101. package/resources/profiles/coding/mercury-profile.yaml +15 -0
  102. package/resources/profiles/general/AGENTS.md +31 -0
  103. package/resources/profiles/general/mercury-profile.yaml +15 -0
  104. package/resources/profiles/research/AGENTS.md +40 -0
  105. package/resources/profiles/research/mercury-profile.yaml +15 -0
  106. package/resources/skills/config/SKILL.md +25 -0
  107. package/resources/skills/context/SKILL.md +33 -0
  108. package/resources/skills/conversation-recap/SKILL.md +19 -0
  109. package/resources/skills/media/SKILL.md +27 -0
  110. package/resources/skills/mutes/SKILL.md +31 -0
  111. package/resources/skills/permissions/SKILL.md +19 -0
  112. package/resources/skills/preferences/SKILL.md +31 -0
  113. package/resources/skills/recall/SKILL.md +24 -0
  114. package/resources/skills/roles/SKILL.md +18 -0
  115. package/resources/skills/spaces/SKILL.md +18 -0
  116. package/resources/skills/tasks/SKILL.md +45 -0
  117. package/resources/templates/AGENTS.md +157 -0
  118. package/resources/templates/env.template +34 -0
  119. package/resources/templates/mercury.example.yaml +75 -0
  120. package/src/adapters/discord-native.ts +534 -0
  121. package/src/adapters/discord.ts +38 -0
  122. package/src/adapters/setup.ts +89 -0
  123. package/src/adapters/slack.ts +9 -0
  124. package/src/adapters/whatsapp-media.ts +337 -0
  125. package/src/adapters/whatsapp.ts +629 -0
  126. package/src/agent/api-socket.ts +127 -0
  127. package/src/agent/container-entry.ts +967 -0
  128. package/src/agent/container-error.ts +49 -0
  129. package/src/agent/container-runner.ts +1272 -0
  130. package/src/agent/model-capabilities-core.ts +23 -0
  131. package/src/agent/model-capabilities.ts +231 -0
  132. package/src/agent/pi-failure-class.ts +83 -0
  133. package/src/agent/pi-jsonl-parser.ts +306 -0
  134. package/src/agent/preferences-prompt.ts +20 -0
  135. package/src/agent/user-error-messages.ts +78 -0
  136. package/src/bridges/discord.ts +171 -0
  137. package/src/bridges/slack.ts +177 -0
  138. package/src/bridges/teams.ts +160 -0
  139. package/src/bridges/telegram.ts +571 -0
  140. package/src/bridges/whatsapp.ts +290 -0
  141. package/src/chat-shim.ts +259 -0
  142. package/src/cli/mercury.ts +2508 -0
  143. package/src/cli/mrctl-http.ts +27 -0
  144. package/src/cli/mrctl.ts +611 -0
  145. package/src/cli/whatsapp-auth.ts +260 -0
  146. package/src/config-file.ts +397 -0
  147. package/src/config-model-chain.ts +30 -0
  148. package/src/config.ts +316 -0
  149. package/src/core/api-types.ts +58 -0
  150. package/src/core/api.ts +105 -0
  151. package/src/core/commands.ts +76 -0
  152. package/src/core/conversation.ts +47 -0
  153. package/src/core/handler.ts +206 -0
  154. package/src/core/media.ts +200 -0
  155. package/src/core/mute-duration.ts +22 -0
  156. package/src/core/outbox.ts +76 -0
  157. package/src/core/permissions.ts +192 -0
  158. package/src/core/profiles.ts +245 -0
  159. package/src/core/rate-limiter.ts +127 -0
  160. package/src/core/router.ts +191 -0
  161. package/src/core/routes/chat.ts +172 -0
  162. package/src/core/routes/config-builtin.ts +107 -0
  163. package/src/core/routes/config.ts +81 -0
  164. package/src/core/routes/connections.ts +190 -0
  165. package/src/core/routes/console.ts +668 -0
  166. package/src/core/routes/control.ts +46 -0
  167. package/src/core/routes/conversations.ts +66 -0
  168. package/src/core/routes/dashboard.ts +2491 -0
  169. package/src/core/routes/extensions.ts +37 -0
  170. package/src/core/routes/index.ts +14 -0
  171. package/src/core/routes/media.ts +72 -0
  172. package/src/core/routes/messages.ts +37 -0
  173. package/src/core/routes/mutes.ts +89 -0
  174. package/src/core/routes/prefs.ts +95 -0
  175. package/src/core/routes/roles.ts +125 -0
  176. package/src/core/routes/spaces.ts +60 -0
  177. package/src/core/routes/storage.ts +126 -0
  178. package/src/core/routes/tasks.ts +189 -0
  179. package/src/core/routes/tradestation.ts +268 -0
  180. package/src/core/routes/tts.ts +51 -0
  181. package/src/core/runtime.ts +1140 -0
  182. package/src/core/space-queue.ts +103 -0
  183. package/src/core/storage-cleanup.ts +140 -0
  184. package/src/core/storage-guard.ts +24 -0
  185. package/src/core/task-scheduler.ts +132 -0
  186. package/src/core/telegram-format.ts +178 -0
  187. package/src/core/trigger.ts +142 -0
  188. package/src/dashboard/index.html +729 -0
  189. package/src/dashboard/tokens.css +53 -0
  190. package/src/extensions/api.ts +252 -0
  191. package/src/extensions/catalog.ts +117 -0
  192. package/src/extensions/config-registry.ts +83 -0
  193. package/src/extensions/context.ts +36 -0
  194. package/src/extensions/hooks.ts +156 -0
  195. package/src/extensions/image-builder.ts +617 -0
  196. package/src/extensions/installer.ts +306 -0
  197. package/src/extensions/jobs.ts +122 -0
  198. package/src/extensions/loader.ts +271 -0
  199. package/src/extensions/permission-guard.ts +52 -0
  200. package/src/extensions/reserved.ts +28 -0
  201. package/src/extensions/skills.ts +123 -0
  202. package/src/extensions/types.ts +462 -0
  203. package/src/logger.ts +174 -0
  204. package/src/main.ts +586 -0
  205. package/src/server.ts +391 -0
  206. package/src/storage/db.ts +1624 -0
  207. package/src/storage/memory.ts +45 -0
  208. package/src/storage/pi-auth.ts +95 -0
  209. package/src/text/markdown.ts +117 -0
  210. package/src/text/rtl.ts +38 -0
  211. package/src/tradestation/host-api.ts +77 -0
  212. package/src/tradestation/pending-orders.ts +69 -0
  213. package/src/tts/azure.ts +52 -0
  214. package/src/tts/google.ts +128 -0
  215. package/src/tts/index.ts +8 -0
  216. package/src/tts/language.ts +20 -0
  217. package/src/tts/synthesize.ts +133 -0
  218. package/src/types.ts +295 -0
@@ -0,0 +1,199 @@
1
+ export default function (mercury: {
2
+ cli(opts: { name: string; install: string }): void;
3
+ skill(relativePath: string): void;
4
+ permission(opts: { defaultRoles: string[] }): void;
5
+ /** biome-ignore lint/suspicious/noExplicitAny: minimal stub matching MercuryExtensionAPI subset */
6
+ on(event: string, handler: (event: any, ctx: any) => Promise<any>): void;
7
+ }) {
8
+ mercury.cli({
9
+ name: "pinchtab",
10
+ install:
11
+ 'npm install -g pinchtab playwright && npx playwright install --with-deps chromium && CHROMIUM=$(NODE_PATH="$(npm root -g)" node -e "try{process.stdout.write(require(\'playwright\').chromium.executablePath())}catch(e){}" 2>/dev/null) && { test -x "$CHROMIUM" || CHROMIUM=$(find /home/mercury/.cache/ms-playwright -type f -path \'*/chrome-linux/chrome\' ! -path \'*headless_shell*\' 2>/dev/null | head -1); } && test -n "$CHROMIUM" && test -x "$CHROMIUM" && ln -sf "$CHROMIUM" /usr/local/bin/chromium && ln -sf "$CHROMIUM" /usr/bin/chromium && rm -rf /var/lib/apt/lists/*',
12
+ });
13
+ mercury.permission({ defaultRoles: ["admin", "member"] });
14
+ mercury.skill("./skill");
15
+
16
+ // Chrome needs --no-sandbox when running inside Docker (no user namespace for sandboxing).
17
+ // Also inject search engine preference and authenticated-session support into system prompt.
18
+ mercury.on("before_container", async () => {
19
+ // Bash ${...} must be escaped as \${...} so this TS template is valid.
20
+ const pinchtabEnsure = `pinchtab_ensure() {
21
+ local bind="\${BRIDGE_BIND:-127.0.0.1}"
22
+ local port="\${BRIDGE_PORT:-9867}"
23
+ local log="\${PINCHTAB_LOG:-/tmp/pinchtab.log}"
24
+ local max_wait="\${1:-120}"
25
+ mkdir -p "$(dirname "$log")" 2>/dev/null || true
26
+ : >"$log"
27
+ if [ ! -x "\${CHROME_BINARY:-}" ]; then
28
+ for _c in /usr/local/bin/chromium /usr/bin/chromium; do
29
+ if [ -x "$_c" ]; then export CHROME_BINARY="$_c"; break; fi
30
+ done
31
+ fi
32
+ if [ ! -x "\${CHROME_BINARY:-}" ]; then
33
+ echo "No executable Chromium (CHROME_BINARY=\${CHROME_BINARY:-}; tried /usr/local/bin/chromium, /usr/bin/chromium). Rebuild mercury-agent-ext (restart Mercury)." | tee -a "$log"
34
+ return 1
35
+ fi
36
+ _pinchtab_port_open() { (echo >/dev/tcp/$bind/$port) 2>/dev/null; }
37
+ if command -v pinchtab >/dev/null 2>&1 && _pinchtab_port_open; then
38
+ return 0
39
+ fi
40
+ pkill -f '[p]inchtab' 2>/dev/null || true
41
+ nohup pinchtab >>"$log" 2>&1 &
42
+ local pid=$!
43
+ sleep 2
44
+ if ! kill -0 "$pid" 2>/dev/null; then
45
+ echo "pinchtab exited immediately (pid $pid). Log:" >&2
46
+ tail -120 "$log" >&2
47
+ return 1
48
+ fi
49
+ local i=0
50
+ while [ "$i" -lt "$max_wait" ]; do
51
+ if _pinchtab_port_open; then
52
+ return 0
53
+ fi
54
+ if ! kill -0 "$pid" 2>/dev/null; then
55
+ echo "pinchtab died during startup. Log:" >&2
56
+ tail -120 "$log" >&2
57
+ return 1
58
+ fi
59
+ sleep 1
60
+ i=$((i+1))
61
+ done
62
+ echo "pinchtab did not listen on $bind:$port within \${max_wait}s. Log:" >&2
63
+ tail -120 "$log" >&2
64
+ return 1
65
+ }`;
66
+
67
+ let sessionFunctions = "";
68
+ let navExampleCommand = 'pinchtab nav "https://search.brave.com/search?q=your+query+here"';
69
+ let sessionPromptFragment = "";
70
+
71
+ if (process.env.MERCURY_BROWSER_SESSIONS) {
72
+ // Node.js injection script — pure ES5-style, no backticks or ${} so no TS escaping needed.
73
+ // Reads MERCURY_BROWSER_SESSIONS from env, looks up the domain, injects cookies +
74
+ // localStorage via the pinchtab HTTP bridge, then reloads. Exits 0 on success (session
75
+ // found and injected), 1 if no session for this domain, 2 on unexpected error.
76
+ const nodeInjectScript = `var url = process.argv[2];
77
+ if (!url) process.exit(1);
78
+ var raw = process.env.MERCURY_BROWSER_SESSIONS;
79
+ if (!raw) process.exit(1);
80
+ var sessions;
81
+ try { sessions = JSON.parse(Buffer.from(raw, "base64").toString()); } catch (e) { process.exit(1); }
82
+ var hostname = (new URL(url)).hostname;
83
+ var parts = hostname.split(".");
84
+ var multiPartTld = /\.(co|com|org|net|gov|ac|edu|or|ne|gr|gen|plc|ltd|me)\.[a-z]{2}$/i;
85
+ var domain = parts.length <= 2 ? hostname : (multiPartTld.test(hostname) ? parts.slice(-3).join(".") : parts.slice(-2).join("."));
86
+ var b64 = sessions[domain];
87
+ if (!b64) process.exit(1);
88
+ var state;
89
+ try { state = JSON.parse(Buffer.from(b64, "base64").toString()); } catch (e) { process.exit(1); }
90
+ var bind = process.env.BRIDGE_BIND || "127.0.0.1";
91
+ var port = process.env.BRIDGE_PORT || "9867";
92
+ var bridge = "http://" + bind + ":" + port;
93
+ Promise.resolve()
94
+ .then(function () {
95
+ return fetch(bridge + "/navigate", {
96
+ method: "POST",
97
+ headers: { "Content-Type": "application/json" },
98
+ body: JSON.stringify({ url: url }),
99
+ });
100
+ })
101
+ .then(function () {
102
+ if (!state.cookies || !state.cookies.length) return;
103
+ return fetch(bridge + "/cookies", {
104
+ method: "POST",
105
+ headers: { "Content-Type": "application/json" },
106
+ body: JSON.stringify({ url: url, cookies: state.cookies }),
107
+ }).catch(function () {});
108
+ })
109
+ .then(function () {
110
+ var origins = state.origins || [];
111
+ return origins.reduce(function (p, o) {
112
+ return p.then(function () {
113
+ if (!o.localStorage || !o.localStorage.length) return;
114
+ var script =
115
+ "(function(){" +
116
+ o.localStorage
117
+ .map(function (i) {
118
+ return "localStorage.setItem(" + JSON.stringify(i.name) + "," + JSON.stringify(i.value) + ")";
119
+ })
120
+ .join(";") +
121
+ "})()";
122
+ return fetch(bridge + "/evaluate", {
123
+ method: "POST",
124
+ headers: { "Content-Type": "application/json" },
125
+ body: JSON.stringify({ expression: script }),
126
+ }).catch(function () {});
127
+ });
128
+ }, Promise.resolve());
129
+ })
130
+ .then(function () {
131
+ return fetch(bridge + "/evaluate", {
132
+ method: "POST",
133
+ headers: { "Content-Type": "application/json" },
134
+ body: JSON.stringify({ expression: "window.location.reload()" }),
135
+ }).catch(function () {});
136
+ })
137
+ .then(function () {
138
+ process.exit(0);
139
+ })
140
+ .catch(function (e) {
141
+ console.error(e.message);
142
+ process.exit(2);
143
+ });`;
144
+
145
+ // Single-quoted heredoc (<< 'JSSCRIPT') so the JS code is written verbatim.
146
+ // JSSCRIPT terminator must stay at column 0 — do not indent it.
147
+ sessionFunctions = `
148
+ _pinchtab_write_inject() {
149
+ cat > /tmp/_pinchtab_inject.js << 'JSSCRIPT'
150
+ ${nodeInjectScript}
151
+ JSSCRIPT
152
+ }
153
+
154
+ pinchtab_nav() {
155
+ local url="$1"
156
+ pinchtab_ensure || return 1
157
+ if [ ! -f /tmp/_pinchtab_inject.js ]; then
158
+ _pinchtab_write_inject
159
+ fi
160
+ if node /tmp/_pinchtab_inject.js "$url" 2>/dev/null; then
161
+ return 0
162
+ fi
163
+ pinchtab nav "$url"
164
+ }`;
165
+
166
+ navExampleCommand = 'pinchtab_nav "https://search.brave.com/search?q=your+query+here"';
167
+ sessionPromptFragment =
168
+ "\n\nAuthenticated browser sessions are available. Use `pinchtab_nav <url>` instead of `pinchtab nav <url>` for all navigations — it automatically injects the saved session (cookies + localStorage) before navigation when one is available for the domain. If after navigating you land on a login or authentication page (session expired), tell the user their session has expired.";
169
+ if (process.env.MERCURY_CONSOLE_URL) {
170
+ sessionPromptFragment += ` Include a re-authentication link: ${process.env.MERCURY_CONSOLE_URL}/dashboard/browser-sessions?recapture=<eTLD+1-of-the-site> (e.g. for chase.com: ${process.env.MERCURY_CONSOLE_URL}/dashboard/browser-sessions?recapture=chase.com). Never attempt to enter credentials on the user's behalf.`;
171
+ }
172
+ }
173
+
174
+ return {
175
+ env: {
176
+ CHROME_BINARY: "/usr/local/bin/chromium",
177
+ CHROME_FLAGS: "--no-sandbox --disable-dev-shm-usage",
178
+ // container-runner strips MERCURY_ prefix on passthrough, so the inner
179
+ // container only gets BROWSER_SESSIONS. The inject script reads
180
+ // MERCURY_BROWSER_SESSIONS, so re-add it explicitly via extraEnv
181
+ // (extraEnv keys are passed verbatim, not stripped).
182
+ ...(process.env.MERCURY_BROWSER_SESSIONS
183
+ ? { MERCURY_BROWSER_SESSIONS: process.env.MERCURY_BROWSER_SESSIONS }
184
+ : {}),
185
+ },
186
+ systemPrompt: `When searching the web, always use Brave Search. Never use Google.
187
+
188
+ Before any pinchtab CLI use in Docker, define and run:
189
+
190
+ \`\`\`bash
191
+ ${pinchtabEnsure}${sessionFunctions}
192
+ pinchtab_ensure || exit 1
193
+ ${navExampleCommand}
194
+ sleep 3
195
+ pinchtab text
196
+ \`\`\`${sessionPromptFragment}`,
197
+ };
198
+ });
199
+ }
@@ -0,0 +1,144 @@
1
+ /**
2
+ * Browser session injector for pinchtab.
3
+ *
4
+ * Reads MERCURY_BROWSER_SESSIONS from env at module load, parses the base64
5
+ * JSON manifest into an in-memory map, and exposes injectSessionIfPresent()
6
+ * which injects cookies + localStorage via pinchtab's HTTP API before navigation.
7
+ *
8
+ * Used by the pinchtab before_container system prompt fragment — the agent is
9
+ * instructed to call the standalone inject-and-nav binary before navigating
10
+ * to any URL when authenticated sessions are available.
11
+ */
12
+
13
+ export interface StorageStateCookie {
14
+ name: string;
15
+ value: string;
16
+ domain: string;
17
+ path: string;
18
+ expires: number;
19
+ httpOnly: boolean;
20
+ secure: boolean;
21
+ sameSite: "Strict" | "Lax" | "None";
22
+ }
23
+
24
+ export interface StorageStateOrigin {
25
+ origin: string;
26
+ localStorage: Array<{ name: string; value: string }>;
27
+ }
28
+
29
+ export interface StorageState {
30
+ cookies: StorageStateCookie[];
31
+ origins: StorageStateOrigin[];
32
+ }
33
+
34
+ /** Extract the eTLD+1 from a URL hostname. e.g. "bank.chase.com" → "chase.com" */
35
+ export function extractDomain(urlOrHostname: string): string {
36
+ let hostname = urlOrHostname;
37
+ try {
38
+ hostname = new URL(urlOrHostname).hostname;
39
+ } catch {
40
+ // Input was already a hostname
41
+ }
42
+ const parts = hostname.split(".");
43
+ if (parts.length <= 2) return hostname;
44
+ return parts.slice(-2).join(".");
45
+ }
46
+
47
+ /** Parse MERCURY_BROWSER_SESSIONS env var into domain → StorageState map. */
48
+ function loadSessionMap(): Map<string, StorageState> {
49
+ const raw = process.env.MERCURY_BROWSER_SESSIONS;
50
+ if (!raw) return new Map();
51
+
52
+ try {
53
+ const manifest = JSON.parse(Buffer.from(raw, "base64").toString("utf8")) as Record<string, string>;
54
+ const map = new Map<string, StorageState>();
55
+ for (const [domain, b64] of Object.entries(manifest)) {
56
+ try {
57
+ const state = JSON.parse(Buffer.from(b64, "base64").toString("utf8")) as StorageState;
58
+ map.set(domain, state);
59
+ } catch (e) {
60
+ console.warn(`[session-injector] Failed to parse session for domain "${domain}":`, e);
61
+ }
62
+ }
63
+ return map;
64
+ } catch (e) {
65
+ console.warn("[session-injector] Failed to parse MERCURY_BROWSER_SESSIONS:", e);
66
+ return new Map();
67
+ }
68
+ }
69
+
70
+ const sessionMap = loadSessionMap();
71
+
72
+ const PINCHTAB_BASE = `http://${process.env.BRIDGE_BIND ?? "127.0.0.1"}:${process.env.BRIDGE_PORT ?? "9867"}`;
73
+
74
+ /** Returns true if there is a saved session for this URL's domain. */
75
+ export function hasSession(url: string): boolean {
76
+ return sessionMap.has(extractDomain(url));
77
+ }
78
+
79
+ /**
80
+ * Inject cookies + localStorage for the URL's domain via pinchtab's HTTP API,
81
+ * then navigate to the URL and reload so the site picks up the injected state.
82
+ *
83
+ * If no session is found for this domain, falls through to a plain navigate.
84
+ * Errors during injection are logged but do not prevent navigation.
85
+ */
86
+ export async function injectSessionIfPresent(url: string): Promise<void> {
87
+ const domain = extractDomain(url);
88
+ const session = sessionMap.get(domain);
89
+
90
+ // Navigate first (creates context, sets tab)
91
+ const navRes = await fetch(`${PINCHTAB_BASE}/navigate`, {
92
+ method: "POST",
93
+ headers: { "Content-Type": "application/json" },
94
+ body: JSON.stringify({ url }),
95
+ });
96
+ if (!navRes.ok) {
97
+ throw new Error(`pinchtab navigate failed: ${navRes.status} ${await navRes.text()}`);
98
+ }
99
+
100
+ if (!session) return;
101
+
102
+ // Inject cookies
103
+ if (session.cookies.length > 0) {
104
+ const cookieRes = await fetch(`${PINCHTAB_BASE}/cookies`, {
105
+ method: "POST",
106
+ headers: { "Content-Type": "application/json" },
107
+ body: JSON.stringify({ url, cookies: session.cookies }),
108
+ });
109
+ if (!cookieRes.ok) {
110
+ console.warn(`[session-injector] Cookie injection partial failure for "${domain}": ${cookieRes.status}`);
111
+ }
112
+ }
113
+
114
+ // Inject localStorage per origin
115
+ for (const originEntry of session.origins) {
116
+ if (originEntry.localStorage.length === 0) continue;
117
+ try {
118
+ const script = `(function(){${originEntry.localStorage
119
+ .map((item) => `localStorage.setItem(${JSON.stringify(item.name)},${JSON.stringify(item.value)})`)
120
+ .join(";")}})()`;
121
+ const evalRes = await fetch(`${PINCHTAB_BASE}/evaluate`, {
122
+ method: "POST",
123
+ headers: { "Content-Type": "application/json" },
124
+ body: JSON.stringify({ expression: script }),
125
+ });
126
+ if (!evalRes.ok) {
127
+ console.warn(`[session-injector] localStorage injection failed for "${originEntry.origin}": ${evalRes.status}`);
128
+ }
129
+ } catch (e) {
130
+ console.warn(`[session-injector] localStorage injection error for "${originEntry.origin}":`, e);
131
+ }
132
+ }
133
+
134
+ // Reload so the site picks up injected cookies + localStorage
135
+ try {
136
+ await fetch(`${PINCHTAB_BASE}/evaluate`, {
137
+ method: "POST",
138
+ headers: { "Content-Type": "application/json" },
139
+ body: JSON.stringify({ expression: "window.location.reload()" }),
140
+ });
141
+ } catch {
142
+ // Non-fatal — session may still be partially usable
143
+ }
144
+ }
@@ -0,0 +1,224 @@
1
+ ---
2
+ name: pinchtab
3
+ description: Control a headless or headed Chrome browser via Pinchtab's HTTP API for web automation, scraping, form filling, navigation, screenshots, and extraction with stable accessibility refs.
4
+ metadata:
5
+ short-description: Browser automation via Pinchtab HTTP API
6
+ ---
7
+
8
+ # Pinchtab
9
+
10
+ Fast, lightweight browser control for AI agents via HTTP + accessibility tree.
11
+
12
+ **Security Note:** Pinchtab runs entirely locally. It does not contact external services, send telemetry, or exfiltrate data. However, it controls a real Chrome instance — if pointed at a profile with saved logins, agents can access authenticated sites. Always use a dedicated empty profile and set BRIDGE_TOKEN when exposing the API. See [TRUST.md](TRUST.md) for the full security model.
13
+
14
+ ## Quick Start (Agent Workflow)
15
+
16
+ The 30-second pattern for browser tasks:
17
+
18
+ ```bash
19
+ # 1. Start Pinchtab (runs forever, local on :9867)
20
+ pinchtab &
21
+
22
+ # 2. In your agent, follow this loop:
23
+ # a) Navigate to a URL
24
+ # b) Snapshot the page (get refs like e0, e5, e12)
25
+ # c) Act on a ref (click e5, type e12 "search text")
26
+ # d) Snapshot again to see the result
27
+ # e) Repeat step c-d until done
28
+ ```
29
+
30
+ **That's it.** Refs are stable—you don't need to re-snapshot before every action. Only snapshot when the page changes significantly.
31
+
32
+ ## Mercury / Docker (required)
33
+
34
+ In the Mercury agent container, `pinchtab &` plus a short `sleep` often races the HTTP bridge: the CLI then hits `127.0.0.1:9867` before the daemon listens (`connection refused`). The host injects `CHROME_BINARY` and `CHROME_FLAGS` (`--no-sandbox` as root). **Always** wait until the port is open and capture daemon logs.
35
+
36
+ ```bash
37
+ pinchtab_ensure() {
38
+ local bind="${BRIDGE_BIND:-127.0.0.1}"
39
+ local port="${BRIDGE_PORT:-9867}"
40
+ local log="${PINCHTAB_LOG:-/tmp/pinchtab.log}"
41
+ local max_wait="${1:-120}"
42
+ mkdir -p "$(dirname "$log")" 2>/dev/null || true
43
+ : >"$log"
44
+ if [ ! -x "${CHROME_BINARY:-}" ]; then
45
+ for _c in /usr/local/bin/chromium /usr/bin/chromium; do
46
+ if [ -x "$_c" ]; then export CHROME_BINARY="$_c"; break; fi
47
+ done
48
+ fi
49
+ if [ ! -x "${CHROME_BINARY:-}" ]; then
50
+ echo "No executable Chromium (CHROME_BINARY=${CHROME_BINARY:-}; tried /usr/local/bin/chromium, /usr/bin/chromium). Rebuild mercury-agent-ext (restart Mercury)." | tee -a "$log"
51
+ return 1
52
+ fi
53
+ _pinchtab_port_open() { (echo >/dev/tcp/$bind/$port) 2>/dev/null; }
54
+ if command -v pinchtab >/dev/null 2>&1 && _pinchtab_port_open; then
55
+ return 0
56
+ fi
57
+ pkill -f '[p]inchtab' 2>/dev/null || true
58
+ nohup pinchtab >>"$log" 2>&1 &
59
+ local pid=$!
60
+ sleep 2
61
+ if ! kill -0 "$pid" 2>/dev/null; then
62
+ echo "pinchtab exited immediately (pid $pid). Log:" >&2
63
+ tail -120 "$log" >&2
64
+ return 1
65
+ fi
66
+ local i=0
67
+ while [ "$i" -lt "$max_wait" ]; do
68
+ if _pinchtab_port_open; then
69
+ return 0
70
+ fi
71
+ if ! kill -0 "$pid" 2>/dev/null; then
72
+ echo "pinchtab died during startup. Log:" >&2
73
+ tail -120 "$log" >&2
74
+ return 1
75
+ fi
76
+ sleep 1
77
+ i=$((i+1))
78
+ done
79
+ echo "pinchtab did not listen on $bind:$port within ${max_wait}s. Log:" >&2
80
+ tail -120 "$log" >&2
81
+ return 1
82
+ }
83
+ ```
84
+
85
+ Use it before every navigation/snapshot/text workflow:
86
+
87
+ ```bash
88
+ pinchtab_ensure || { echo "pinchtab failed — see /tmp/pinchtab.log"; exit 1; }
89
+ pinchtab nav "https://example.com"
90
+ sleep 3
91
+ pinchtab text
92
+ ```
93
+
94
+ If `pinchtab_ensure` fails, show the user the tail of `/tmp/pinchtab.log`; do not only increase `sleep` and retry blindly.
95
+
96
+ ### Recommended Secure Setup
97
+
98
+ ```bash
99
+ # Best practice for AI agents
100
+ BRIDGE_BIND=127.0.0.1 \
101
+ BRIDGE_TOKEN="your-strong-secret" \
102
+ BRIDGE_PROFILE=~/.pinchtab/automation-profile \
103
+ pinchtab &
104
+ ```
105
+
106
+ **Never expose to 0.0.0.0 without a token. Never point at your daily Chrome profile.**
107
+
108
+ ## Setup
109
+
110
+ ```bash
111
+ # Headless (default) — no visible window
112
+ pinchtab &
113
+
114
+ # Headed — visible Chrome window for human debugging
115
+ BRIDGE_HEADLESS=false pinchtab &
116
+
117
+ # With auth token
118
+ BRIDGE_TOKEN="your-secret-token" pinchtab &
119
+
120
+ # Custom port
121
+ BRIDGE_PORT=8080 pinchtab &
122
+ ```
123
+
124
+ Default: **port 9867**, no auth required (local). Set `BRIDGE_TOKEN` for remote access.
125
+
126
+ For advanced setup, see [references/profiles.md](references/profiles.md) and [references/env.md](references/env.md).
127
+
128
+ ## What a Snapshot Looks Like
129
+
130
+ After calling `/snapshot`, you get the page's accessibility tree as JSON—flat list of elements with refs:
131
+
132
+ ```json
133
+ {
134
+ "refs": [
135
+ {"id": "e0", "role": "link", "text": "Sign In", "selector": "a[href='/login']"},
136
+ {"id": "e1", "role": "textbox", "label": "Email", "selector": "input[name='email']"},
137
+ {"id": "e2", "role": "button", "text": "Submit", "selector": "button[type='submit']"}
138
+ ],
139
+ "text": "... readable text version of page ...",
140
+ "title": "Login Page"
141
+ }
142
+ ```
143
+
144
+ Then you act on refs: `click e0`, `type e1 "user@example.com"`, `press e2 Enter`.
145
+
146
+ ## Core Workflow
147
+
148
+ The typical agent loop:
149
+
150
+ 1. **Navigate** to a URL
151
+ 2. **Snapshot** the accessibility tree (get refs)
152
+ 3. **Act** on refs (click, type, press)
153
+ 4. **Snapshot** again to see results
154
+
155
+ Refs (e.g. `e0`, `e5`, `e12`) are cached per tab after each snapshot — no need to re-snapshot before every action unless the page changed significantly.
156
+
157
+ ### Quick examples
158
+
159
+ ```bash
160
+ pinchtab nav https://example.com
161
+ pinchtab snap -i -c # interactive + compact
162
+ pinchtab click e5
163
+ pinchtab type e12 hello world
164
+ pinchtab press Enter
165
+ pinchtab text # readable text (~1K tokens)
166
+ pinchtab text | jq .text # pipe to jq
167
+ pinchtab ss -o page.jpg # screenshot
168
+ pinchtab eval "document.title" # run JavaScript
169
+ pinchtab pdf --tab TAB_ID -o page.pdf # export PDF
170
+ ```
171
+
172
+ For the full HTTP API (curl examples, download, upload, cookies, stealth, batch actions, PDF export with full parameter control), see [references/api.md](references/api.md).
173
+
174
+ ## Token Cost Guide
175
+
176
+ | Method | Typical tokens | When to use |
177
+ |---|---|---|
178
+ | `/text` | ~800 | Reading page content |
179
+ | `/snapshot?filter=interactive` | ~3,600 | Finding buttons/links to click |
180
+ | `/snapshot?diff=true` | varies | Multi-step workflows (only changes) |
181
+ | `/snapshot?format=compact` | ~56-64% less | One-line-per-node, best efficiency |
182
+ | `/snapshot` | ~10,500 | Full page understanding |
183
+ | `/screenshot` | ~2K (vision) | Visual verification |
184
+ | `/tabs/{id}/pdf` | 0 (binary) | Export page as PDF (no token cost) |
185
+
186
+ **Strategy**: Start with `?filter=interactive&format=compact`. Use `?diff=true` on subsequent snapshots. Use `/text` when you only need readable content. Full `/snapshot` only when needed.
187
+
188
+ ## Agent Optimization
189
+
190
+ **Validated Feb 2026**: Testing with AI agents revealed a critical pattern for reliable, token-efficient scraping.
191
+
192
+ **See the full guide:** [docs/agent-optimization.md](../../docs/agent-optimization.md)
193
+
194
+ ### Quick Summary
195
+
196
+ **The 3-second pattern** — wait after navigate before snapshot:
197
+
198
+ ```bash
199
+ curl -X POST http://localhost:9867/navigate \
200
+ -H "Content-Type: application/json" \
201
+ -d '{"url": "https://example.com"}' && \
202
+ sleep 3 && \
203
+ curl http://localhost:9867/snapshot | jq '.nodes[] | select(.name | length > 15) | .name'
204
+ ```
205
+
206
+ **Token savings:** 93% reduction (3,842 → 272 tokens) when using prescriptive instructions vs. exploratory agent approach.
207
+
208
+ For detailed findings, system prompt templates, and site-specific notes, see [docs/agent-optimization.md](../../docs/agent-optimization.md).
209
+
210
+ ## Tips
211
+
212
+ - **Always pass `tabId` explicitly** when working with multiple tabs
213
+ - Refs are stable between snapshot and actions — no need to re-snapshot before clicking
214
+ - After navigation or major page changes, take a new snapshot for fresh refs
215
+ - Pinchtab persists sessions — tabs survive restarts (disable with `BRIDGE_NO_RESTORE=true`)
216
+ - Chrome profile is persistent — cookies/logins carry over between runs
217
+ - Use `BRIDGE_BLOCK_IMAGES=true` or `"blockImages": true` on navigate for read-heavy tasks
218
+ - **Wait 3+ seconds after navigate before snapshot** — Chrome needs time to render 2000+ accessibility tree nodes
219
+
220
+ ## Authenticated Browser Sessions
221
+
222
+ If the user has saved a browser session for a site (via the Browser Sessions page in the console), the agent will automatically use it when navigating to that domain. No special instructions are needed — just navigate to the URL normally. The session (cookies + localStorage) is pre-loaded into the container environment and injected transparently before the first page load on the matched domain.
223
+
224
+ Sites behind login walls (banks, airlines, HR portals, niche SaaS) can be accessed this way without any copy-pasting or API key setup. If a session seems stale or the site still shows a login screen, the user can re-capture the session from the Browser Sessions page.
@@ -0,0 +1,69 @@
1
+ # Pinchtab Security & Trust
2
+
3
+ **TL;DR**: Pinchtab is a local, sandboxed browser control tool. It does not phone home, steal credentials, or exfiltrate data. Source code is public; binaries are signed and published via GitHub.
4
+
5
+ ## What Pinchtab Does
6
+
7
+ - Launches a Chrome browser (local, under your control)
8
+ - Exposes navigation, clicking, typing, and page inspection via HTTP API
9
+ - Extracts the page's accessibility tree (for AI agents)
10
+ - Runs screenshots, PDFs, and JavaScript evaluation
11
+
12
+ **All of this stays local.** No telemetry. No external API calls (except to sites you navigate to).
13
+
14
+ ## What Pinchtab Does NOT Do
15
+
16
+ - ❌ Doesn't access your saved passwords/credentials (Chrome sandboxing)
17
+ - ❌ Doesn't exfiltrate data to remote servers
18
+ - ❌ Doesn't inject ads, malware, or miners
19
+ - ❌ Doesn't track browsing or send analytics
20
+ - ❌ Doesn't modify system files outside its state directory (`~/.pinchtab`)
21
+
22
+ ## Builds & Verification
23
+
24
+ Every release includes **checksums** alongside binaries:
25
+
26
+ ```bash
27
+ # After downloading, verify:
28
+ sha256sum -c checksums.txt
29
+ ```
30
+
31
+ Binaries are built automatically from tagged commits via GitHub Actions (publicly visible at https://github.com/pinchtab/pinchtab/actions).
32
+
33
+ ## Open Source
34
+
35
+ - **Source**: https://github.com/pinchtab/pinchtab (MIT)
36
+ - **Releases**: https://github.com/pinchtab/pinchtab/releases
37
+ - **Latest**: v0.7.0 (Feb 2026)
38
+
39
+ If you're concerned, audit the source—it's 12MB, zero external dependencies, mostly Go stdlib.
40
+
41
+ ## VirusTotal Flag
42
+
43
+ Pinchtab may trigger heuristic scanners on VirusTotal because:
44
+
45
+ - ✓ It launches Chrome (subprocess execution — flagged by AV heuristics)
46
+ - ✓ It runs JavaScript evaluation (eval-like operations)
47
+ - ✓ It makes HTTP requests (network activity)
48
+
49
+ These are **intentional design features**, not security flaws. Your browser does all three things by default.
50
+
51
+ **False positives are common for development tools.** The VT flag is a known false positive for chromedp-based tools (subprocess + HTTP server). Always verify SHA256 checksums from GitHub releases before running.
52
+
53
+ For maximum confidence, use the npm package (`npm install -g pinchtab`) or Docker image, which undergo additional validation.
54
+
55
+ ## Sandboxing
56
+
57
+ Pinchtab runs a separate Chrome process with:
58
+
59
+ - Isolated profile directory (default: `~/.pinchtab`)
60
+ - No access to your user's home files (unless you explicitly navigate to `file://` URLs)
61
+ - Standard Chrome security model (site isolation, CSP, etc.)
62
+
63
+ Set `BRIDGE_PROFILE_DIR` to use a custom directory if needed.
64
+
65
+ ## Questions?
66
+
67
+ - Source code: https://github.com/pinchtab/pinchtab
68
+ - Issues/security reports: https://github.com/pinchtab/pinchtab/issues
69
+ - Docs: https://pinchtab.com