pi-search-on-your-browser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +73 -0
  2. package/index.ts +203 -0
  3. package/package.json +28 -0
  4. package/src/chrome.ts +434 -0
package/README.md ADDED
@@ -0,0 +1,73 @@
1
+ # pi-search-on-your-browser
2
+
3
+ Search Google in your **own visible Chrome browser** — the [ds4-agent](https://github.com/antirez/ds4) style by @antirez.
4
+
5
+ > "If you need AI to do a search for you in the real world, ds4-agent is basically SOTA, because it can access the web sites without any limitations given that it uses your local Chrome browser (no, not in headless mode, that's the trick...)"
6
+ > — [@antirez on X](https://x.com/antirez/status/2066233392916525379), 2026-06-14
7
+
8
+ **This Pi package uses exactly the same approach:** launches your visible Chrome (not headless), navigates to google.com via CDP, runs JavaScript extractors in the page, and returns compact Markdown results. No API keys. No headless detection. Your real browser fingerprint, cookies, and login sessions.
9
+
10
+ ## How it works
11
+
12
+ When you call `google_search` or `visit_page`:
13
+
14
+ 1. A **visible Chrome window** opens (not headless) with a dedicated profile at `~/.pi-search-browser/`
15
+ 2. Chrome DevTools Protocol (CDP) is used to navigate and extract content
16
+ 3. JavaScript runs in the page to extract readable markdown
17
+ 4. Chrome stays alive between calls for speed (kill with `/google-search-kill`)
18
+
19
+ This means you're authenticated everywhere — paywalled sites, Twitter, GitHub, Google — because it's **your real browser**.
20
+
21
+ ## Install
22
+
23
+ ```bash
24
+ pi install /path/to/pi-search-on-your-browser
25
+ ```
26
+
27
+ Or from git (once published):
28
+
29
+ ```bash
30
+ pi install git:github.com/xezpeleta/pi-search-on-your-browser
31
+ ```
32
+
33
+ ## Tools
34
+
35
+ ### `google_search`
36
+
37
+ Search Google and get compact markdown links + text snippet.
38
+
39
+ ```
40
+ google_search({ query: "TypeScript 5.7 release notes" })
41
+ ```
42
+
43
+ ### `visit_page`
44
+
45
+ Visit any URL and get the page content as markdown.
46
+
47
+ ```
48
+ visit_page({ url: "https://example.com/article" })
49
+ ```
50
+
51
+ ## Commands
52
+
53
+ - `/google-search-kill` — Kill the Chrome browser
54
+
55
+ ## Requirements
56
+
57
+ - Google Chrome or Chromium installed
58
+ - Node.js 20+
59
+
60
+ ## Comparison with ds4-agent
61
+
62
+ | | pi-search-on-your-browser | ds4-agent |
63
+ |---|---|---|
64
+ | Language | TypeScript (Node.js) | C |
65
+ | Chrome connection | CDP WebSocket (manual RFC 6455) | CDP WebSocket (manual RFC 6455) |
66
+ | Profile | `~/.pi-search-browser/` | `~/.ds4/browser` |
67
+ | Google consent | Auto-click "Accept all" (multi-language) | Auto-click "Accept all" (multi-language) |
68
+ | Page extraction | Same JS extractors, ported to TS | Inline JS in C |
69
+ | Dependencies | Zero npm deps (just Node.js built-ins) | Zero deps (just POSIX) |
70
+
71
+ ## License
72
+
73
+ MIT
package/index.ts ADDED
@@ -0,0 +1,203 @@
1
+ /**
2
+ * pi-search-on-your-browser — exact same approach as ds4-agent, for Pi
3
+ *
4
+ * @antirez's ds4-agent strategy:
5
+ * https://x.com/antirez/status/2066233392916525379
6
+ * https://github.com/antirez/ds4
7
+ *
8
+ * Same approach: visible Chrome (not headless), CDP WebSocket, inline JS
9
+ * extractors. No API keys, no headless detection.
10
+ *
11
+ * Registered tools:
12
+ * - google_search — Search Google in a visible Chrome browser, returns markdown links + snippet
13
+ * - visit_page — Visit a URL in a visible Chrome browser, returns rendered page as markdown
14
+ *
15
+ * Registered commands:
16
+ * - /google-search-kill — Kill the Chrome process
17
+ *
18
+ * Chrome runs in a visible window (not headless) with a dedicated profile at
19
+ * ~/.pi-search-browser/ — cookies and sessions persist across calls.
20
+ */
21
+
22
+ import type { ExtensionAPI, ToolResult } from "@earendil-works/pi-coding-agent";
23
+ import { Text } from "@earendil-works/pi-tui";
24
+ import { Type } from "typebox";
25
+ import { googleSearch, visitPage, shutdownChrome } from "./src/chrome.js";
26
+
27
+ type RenderArgs = { query?: string; url?: string };
28
+ type RenderState = { expanded?: boolean; isPartial?: boolean };
29
+ type ToolTheme = {
30
+ fg: (color: string, text: string) => string;
31
+ bold: (text: string) => string;
32
+ dim: (text: string) => string;
33
+ };
34
+
35
+ export default function searchOnYourBrowser(pi: ExtensionAPI) {
36
+ // ── google_search tool ───────────────────────────────────────────────────
37
+
38
+ pi.registerTool({
39
+ name: "google_search",
40
+ label: "Google Search",
41
+ description:
42
+ "Search Google in your visible Chrome browser and return compact Markdown links. Uses your real browser fingerprint — no API keys, no headless detection.",
43
+ promptSnippet: "google_search: search Google in your visible browser, returns markdown links",
44
+ promptGuidelines: [
45
+ "Use google_search to find web pages when you need real-time information. Results include clickable markdown links.",
46
+ ],
47
+ parameters: Type.Object({
48
+ query: Type.String({ description: "Search query to send to Google" }),
49
+ }),
50
+ async execute(_toolCallId, params, _signal, onUpdate) {
51
+ const { query } = params;
52
+ if (!query || !query.trim()) {
53
+ return {
54
+ content: [{ type: "text" as const, text: "Tool error: google_search requires a query." }],
55
+ details: {},
56
+ };
57
+ }
58
+
59
+ try {
60
+ const started = Date.now();
61
+ const result = await googleSearch(query.trim(), (msg) => {
62
+ onUpdate?.({
63
+ content: [{ type: "text", text: msg }],
64
+ details: { _progress: true },
65
+ });
66
+ });
67
+ const elapsed = ((Date.now() - started) / 1000).toFixed(1);
68
+
69
+ return {
70
+ content: [{ type: "text" as const, text: result.markdown }],
71
+ details: { url: result.url, elapsed: `${elapsed}s`, chars: result.markdown.length },
72
+ };
73
+ } catch (err: unknown) {
74
+ const message = err instanceof Error ? err.message : String(err);
75
+ throw new Error(`google_search failed: ${message}`);
76
+ }
77
+ },
78
+
79
+ renderCall(args: Partial<RenderArgs>, theme: ToolTheme) {
80
+ const q = (args.query || "").slice(0, 60);
81
+ const trunc = q.length < (args.query || "").length ? "..." : "";
82
+ return new Text(
83
+ `${theme.fg("toolTitle", theme.bold("google_search"))} "${theme.fg("accent", q + trunc)}"`,
84
+ 0,
85
+ 0,
86
+ );
87
+ },
88
+
89
+ renderResult(result: ToolResult, { expanded, isPartial }: RenderState, theme: ToolTheme) {
90
+ if (isPartial) {
91
+ const progress = result.content.find((c) => c.type === "text")?.text ?? "Searching...";
92
+ return new Text(theme.fg("warning", progress), 0, 0);
93
+ }
94
+
95
+ const details = result.details as { url?: string; elapsed?: string; chars?: number } | undefined;
96
+ if (!expanded) {
97
+ const parts: string[] = [];
98
+ if (details?.chars) parts.push(`${details.chars.toLocaleString()} chars`);
99
+ if (details?.elapsed) parts.push(details.elapsed);
100
+ if (details?.url) parts.push(new URL(details.url).hostname);
101
+ return new Text(theme.fg("muted", ` → ${parts.join(" · ")}`), 0, 0);
102
+ }
103
+
104
+ const text = result.content.find((c) => c.type === "text")?.text ?? "";
105
+ return new Text(`\n${text.split("\n").map((l) => theme.fg("toolOutput", l)).join("\n")}`, 0, 0);
106
+ },
107
+ });
108
+
109
+ // ── visit_page tool ──────────────────────────────────────────────────────
110
+
111
+ pi.registerTool({
112
+ name: "visit_page",
113
+ label: "Visit Page",
114
+ description:
115
+ "Open a URL in your visible Chrome browser and return the rendered page as Markdown. Works with authenticated sites, paywalls, and JavaScript-heavy pages.",
116
+ promptSnippet: "visit_page: visit a URL in your visible browser, returns rendered markdown",
117
+ promptGuidelines: [
118
+ "Use visit_page to read a web page you found via google_search. It opens in your visible Chrome so authenticated/paywalled sites work.",
119
+ ],
120
+ parameters: Type.Object({
121
+ url: Type.String({ description: "Full URL to visit" }),
122
+ }),
123
+ async execute(_toolCallId, params, _signal, onUpdate) {
124
+ const { url } = params;
125
+ if (!url || !url.trim()) {
126
+ return {
127
+ content: [{ type: "text" as const, text: "Tool error: visit_page requires a URL." }],
128
+ details: {},
129
+ };
130
+ }
131
+
132
+ let targetUrl: string;
133
+ try {
134
+ targetUrl = new URL(url.trim()).toString();
135
+ } catch {
136
+ return {
137
+ content: [{ type: "text" as const, text: `Tool error: visit_page: invalid URL: ${url}` }],
138
+ details: {},
139
+ };
140
+ }
141
+
142
+ try {
143
+ const started = Date.now();
144
+ const result = await visitPage(targetUrl, (msg) => {
145
+ onUpdate?.({
146
+ content: [{ type: "text", text: msg }],
147
+ details: { _progress: true },
148
+ });
149
+ });
150
+ const elapsed = ((Date.now() - started) / 1000).toFixed(1);
151
+
152
+ return {
153
+ content: [{ type: "text" as const, text: result.markdown }],
154
+ details: { url: result.url, elapsed: `${elapsed}s`, chars: result.markdown.length },
155
+ };
156
+ } catch (err: unknown) {
157
+ const message = err instanceof Error ? err.message : String(err);
158
+ throw new Error(`visit_page failed: ${message}`);
159
+ }
160
+ },
161
+
162
+ renderCall(args: Partial<RenderArgs>, theme: ToolTheme) {
163
+ const u = args.url || "";
164
+ const hostname = (() => { try { return new URL(u).hostname; } catch { return u; } })();
165
+ return new Text(
166
+ `${theme.fg("toolTitle", theme.bold("visit_page"))} ${theme.fg("accent", hostname)}`,
167
+ 0,
168
+ 0,
169
+ );
170
+ },
171
+
172
+ renderResult(result: ToolResult, { expanded, isPartial }: RenderState, theme: ToolTheme) {
173
+ if (isPartial) {
174
+ const progress = result.content.find((c) => c.type === "text")?.text ?? "Loading...";
175
+ return new Text(theme.fg("warning", progress), 0, 0);
176
+ }
177
+
178
+ const details = result.details as { url?: string; elapsed?: string; chars?: number } | undefined;
179
+ if (!expanded) {
180
+ const parts: string[] = [];
181
+ if (details?.chars) parts.push(`${details.chars.toLocaleString()} chars`);
182
+ if (details?.elapsed) parts.push(details.elapsed);
183
+ if (details?.url) {
184
+ try { parts.push(new URL(details.url).hostname); } catch { /* */ }
185
+ }
186
+ return new Text(theme.fg("muted", ` → ${parts.join(" · ")}`), 0, 0);
187
+ }
188
+
189
+ const text = result.content.find((c) => c.type === "text")?.text ?? "";
190
+ return new Text(`\n${text.split("\n").map((l) => theme.fg("toolOutput", l)).join("\n")}`, 0, 0);
191
+ },
192
+ });
193
+
194
+ // ── Commands ─────────────────────────────────────────────────────────────
195
+
196
+ pi.registerCommand("google-search-kill", {
197
+ description: "Kill the Google Search Chrome browser process",
198
+ handler: async (_args, ctx) => {
199
+ shutdownChrome();
200
+ ctx.ui.notify("Google Search Chrome killed.", "info");
201
+ },
202
+ });
203
+ }
package/package.json ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "pi-search-on-your-browser",
3
+ "version": "0.1.0",
4
+ "description": "Search Google in your own visible Chrome — same approach as @antirez's ds4-agent. Zero dependencies, no API keys.",
5
+ "type": "module",
6
+ "keywords": ["pi-package", "search", "google", "chrome", "cdp", "browser", "ds4-agent"],
7
+ "author": "xezpeleta",
8
+ "license": "MIT",
9
+ "engines": {
10
+ "node": ">=21.0.0"
11
+ },
12
+ "files": [
13
+ "index.ts",
14
+ "src/",
15
+ "README.md"
16
+ ],
17
+ "pi": {
18
+ "extensions": ["./index.ts"]
19
+ },
20
+ "peerDependencies": {
21
+ "@earendil-works/pi-coding-agent": "*"
22
+ },
23
+ "peerDependenciesMeta": {
24
+ "@earendil-works/pi-coding-agent": {
25
+ "optional": true
26
+ }
27
+ }
28
+ }
package/src/chrome.ts ADDED
@@ -0,0 +1,434 @@
1
+ /**
2
+ * Chrome DevTools Protocol (CDP) client — Node.js built-in WebSocket.
3
+ *
4
+ * Same approach as ds4-agent (@antirez): visible Chrome (not headless),
5
+ * CDP WebSocket navigation, inline JavaScript extractors in the page.
6
+ *
7
+ * Reference: https://x.com/antirez/status/2066233392916525379
8
+ *
9
+ * Profile at ~/.pi-search-browser/ — dedicated, like ds4-agent's ~/.ds4/browser.
10
+ * Cookies and sessions persist across calls.
11
+ */
12
+
13
+ import { spawn, type ChildProcess } from "node:child_process";
14
+ import { mkdirSync, existsSync } from "node:fs";
15
+ import { homedir } from "node:os";
16
+ import { join } from "node:path";
17
+
18
+ const PROFILE_DIR = join(homedir(), ".pi-search-browser");
19
+ const CDP_PORT = 9322;
20
+ const CDP_TIMEOUT_MS = 30_000;
21
+ const MAX_RESULT_BYTES = 1_048_576; // 1 MB
22
+
23
+ // ── Utilities ─────────────────────────────────────────────────────────────
24
+
25
+ function sleep(ms: number): Promise<void> {
26
+ return new Promise((r) => setTimeout(r, ms));
27
+ }
28
+
29
+ function findChrome(): string {
30
+ const paths = [
31
+ process.env.CHROME_PATH,
32
+ "/usr/bin/google-chrome-stable",
33
+ "/usr/bin/google-chrome",
34
+ "/usr/bin/chromium",
35
+ "/usr/bin/chromium-browser",
36
+ "/snap/bin/chromium",
37
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
38
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
39
+ ];
40
+ for (const p of paths) {
41
+ if (p && existsSync(p)) return p;
42
+ }
43
+ return "google-chrome";
44
+ }
45
+
46
+ // ── CDP over WebSocket ────────────────────────────────────────────────────
47
+
48
+ interface PendingCall {
49
+ resolve: (v: unknown) => void;
50
+ reject: (e: Error) => void;
51
+ }
52
+
53
+ class CDPClient {
54
+ private ws: WebSocket | null = null;
55
+ private nextId = 1;
56
+ private pending = new Map<number, PendingCall>();
57
+ private connectPromise: Promise<void> | null = null;
58
+ private eventHandlers = new Map<string, Array<(params: unknown) => void>>();
59
+
60
+ async connect(wsUrl: string): Promise<void> {
61
+ this.connectPromise = new Promise((resolve, reject) => {
62
+ const ws = new WebSocket(wsUrl);
63
+ this.ws = ws;
64
+
65
+ const timer = setTimeout(() => {
66
+ ws.close();
67
+ reject(new Error(`WebSocket connect timeout`));
68
+ }, CDP_TIMEOUT_MS);
69
+
70
+ ws.onopen = () => {
71
+ clearTimeout(timer);
72
+ resolve();
73
+ };
74
+
75
+ ws.onmessage = (event) => {
76
+ let msg: { id?: number; method?: string; result?: unknown; error?: { message: string }; params?: unknown };
77
+ try {
78
+ msg = JSON.parse(event.data as string);
79
+ } catch {
80
+ return;
81
+ }
82
+ // Events (no id field) — dispatch to handlers
83
+ if (msg.id === undefined || msg.id === null) {
84
+ if (msg.method) {
85
+ const handlers = this.eventHandlers.get(msg.method);
86
+ if (handlers) {
87
+ for (const h of handlers) h(msg.params);
88
+ }
89
+ }
90
+ return;
91
+ }
92
+ const cb = this.pending.get(msg.id);
93
+ if (!cb) return;
94
+ this.pending.delete(msg.id);
95
+ if (msg.error) {
96
+ cb.reject(new Error(`CDP error: ${msg.error.message || JSON.stringify(msg.error)}`));
97
+ } else {
98
+ cb.resolve(msg.result);
99
+ }
100
+ };
101
+
102
+ ws.onerror = () => {
103
+ clearTimeout(timer);
104
+ reject(new Error("WebSocket connection error"));
105
+ };
106
+ });
107
+ await this.connectPromise;
108
+ }
109
+
110
+ onEvent(method: string, handler: (params: unknown) => void) {
111
+ const handlers = this.eventHandlers.get(method) || [];
112
+ handlers.push(handler);
113
+ this.eventHandlers.set(method, handlers);
114
+ }
115
+
116
+ async call(method: string, params: Record<string, unknown> = {}): Promise<unknown> {
117
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
118
+ throw new Error("CDP not connected");
119
+ }
120
+ const id = this.nextId++;
121
+ const msg = JSON.stringify({ id, method, params });
122
+
123
+ return new Promise((resolve, reject) => {
124
+ const timer = setTimeout(() => {
125
+ this.pending.delete(id);
126
+ reject(new Error(`CDP call timeout: ${method}`));
127
+ }, CDP_TIMEOUT_MS);
128
+
129
+ this.pending.set(id, {
130
+ resolve: (v) => { clearTimeout(timer); resolve(v); },
131
+ reject: (e) => { clearTimeout(timer); reject(e); },
132
+ });
133
+ this.ws!.send(msg);
134
+ });
135
+ }
136
+
137
+ async evaluate(expression: string): Promise<string> {
138
+ const result = await this.call("Runtime.evaluate", {
139
+ expression,
140
+ returnByValue: true,
141
+ awaitPromise: true,
142
+ });
143
+ const r = result as { result?: { value?: unknown; description?: string } };
144
+ if (r.result?.value !== undefined) return String(r.result.value);
145
+ return r.result?.description ?? "";
146
+ }
147
+
148
+ disconnect() {
149
+ if (this.ws) {
150
+ this.ws.close();
151
+ this.ws = null;
152
+ }
153
+ }
154
+ }
155
+
156
+ // ── Chrome process management ─────────────────────────────────────────────
157
+
158
+ let chromeProcess: ChildProcess | null = null;
159
+
160
+ async function isChromeAlive(): Promise<boolean> {
161
+ try {
162
+ const resp = await fetch(`http://127.0.0.1:${CDP_PORT}/json/version`);
163
+ return resp.ok;
164
+ } catch {
165
+ return false;
166
+ }
167
+ }
168
+
169
+ async function launchChrome(): Promise<void> {
170
+ mkdirSync(PROFILE_DIR, { recursive: true });
171
+
172
+ const chromePath = findChrome();
173
+
174
+ console.error(`[pi-search] Launching visible Chrome at ${chromePath}`);
175
+
176
+ const args = [
177
+ `--remote-debugging-port=${CDP_PORT}`,
178
+ "--remote-allow-origins=*",
179
+ `--user-data-dir=${PROFILE_DIR}`,
180
+ "--no-first-run",
181
+ "--no-default-browser-check",
182
+ "--disable-sync",
183
+ "--password-store=basic",
184
+ "--mute-audio",
185
+ "about:blank",
186
+ ];
187
+
188
+ chromeProcess = spawn(chromePath, args, {
189
+ stdio: ["ignore", "ignore", "ignore"],
190
+ detached: false,
191
+ });
192
+
193
+ chromeProcess.on("exit", (code) => {
194
+ console.error(`[pi-search] Chrome exited with code ${code}`);
195
+ chromeProcess = null;
196
+ });
197
+
198
+ // Wait for CDP to become available
199
+ for (let i = 0; i < 60; i++) {
200
+ if (await isChromeAlive()) {
201
+ console.error("[pi-search] Chrome is ready");
202
+ return;
203
+ }
204
+ await sleep(500);
205
+ }
206
+ throw new Error("Chrome did not become ready within 30s");
207
+ }
208
+
209
+ async function ensureChrome(): Promise<void> {
210
+ if (await isChromeAlive()) return;
211
+ if (chromeProcess) {
212
+ chromeProcess.kill();
213
+ chromeProcess = null;
214
+ await sleep(500);
215
+ }
216
+ await launchChrome();
217
+ }
218
+
219
+ // ── Page operations ──────────────────────────────────────────────────────
220
+
221
+ interface CDPTab {
222
+ wsUrl: string;
223
+ targetId: string;
224
+ }
225
+
226
+ async function getBrowserWSUrl(): Promise<string> {
227
+ const resp = await fetch(`http://127.0.0.1:${CDP_PORT}/json/version`);
228
+ const data = (await resp.json()) as { webSocketDebuggerUrl: string };
229
+ return data.webSocketDebuggerUrl;
230
+ }
231
+
232
+ async function openTab(): Promise<CDPTab> {
233
+ const browserUrl = await getBrowserWSUrl();
234
+ const browserCdp = new CDPClient();
235
+ await browserCdp.connect(browserUrl);
236
+
237
+ const result = (await browserCdp.call("Target.createTarget", {
238
+ url: "about:blank",
239
+ background: true,
240
+ newWindow: false,
241
+ })) as { targetId: string };
242
+
243
+ browserCdp.disconnect();
244
+
245
+ const wsUrl = `ws://127.0.0.1:${CDP_PORT}/devtools/page/${result.targetId}`;
246
+ return { wsUrl, targetId: result.targetId };
247
+ }
248
+
249
+ async function closeTab(targetId: string): Promise<void> {
250
+ try {
251
+ await fetch(
252
+ `http://127.0.0.1:${CDP_PORT}/json/close/${encodeURIComponent(targetId)}`
253
+ );
254
+ } catch {
255
+ // best effort
256
+ }
257
+ }
258
+
259
+ // ── JavaScript extractors (ds4-agent style) ────────────────────────────────
260
+
261
+ // Backtick constant for building JS strings that contain backticks
262
+ const BT = "`";
263
+
264
+ const GOOGLE_CONSENT_JS =
265
+ "(() => {" +
266
+ 'const clean=s=>(s||"").replace(/\\s+/g," ").trim();' +
267
+ "const pats=[/accept all/i,/i agree/i,/agree/i,/accetta tutto/i,/tout accepter/i,/aceptar todo/i,/alle akzeptieren/i];" +
268
+ 'const els=[...document.querySelectorAll("button,[role=button],input[type=submit],a")];' +
269
+ "for(const el of els){const t=clean(el.innerText||el.value||el.textContent);" +
270
+ "if(!t)continue;if(pats.some(p=>p.test(t))){el.click();return'clicked '+t;}}" +
271
+ 'return"";' +
272
+ "})()";
273
+
274
+ const GOOGLE_SEARCH_JS =
275
+ "(() => {" +
276
+ 'const clean=s=>(s||"").replace(/\\s+/g," ").trim();' +
277
+ 'const esc=s=>clean(s).replace(/\\\\/g,"\\\\\\\\").replace(/\\[/g,"\\\\[").replace(/\\]/g,"\\\\]").replace(/\\n/g," ");' +
278
+ 'const visible=el=>{const r=el.getBoundingClientRect();const st=getComputedStyle(el);return r.width>0&&r.height>0&&st.display!=="none"&&st.visibility!=="hidden"&&st.opacity!=="0";};' +
279
+ "const bad=h=>/(^|\\.)google\\./.test(h)||/(^|\\.)gstatic\\./.test(h)||/(^|\\.)googleusercontent\\./.test(h);" +
280
+ 'const lines=["# Google search results","","URL: "+location.href,"","## Visible links"];' +
281
+ "const seen=new Set();" +
282
+ 'for(const a of document.querySelectorAll("a[href]")){' +
283
+ "if(!visible(a))continue;let href=a.href||'';" +
284
+ 'try{const u=new URL(href);if(u.pathname==="/url"&&u.searchParams.get("q"))href=u.searchParams.get("q");}catch{}' +
285
+ "let u;try{u=new URL(href)}catch{continue;}" +
286
+ "if(!/^https?:$/.test(u.protocol))continue;" +
287
+ "if(bad(u.hostname))continue;" +
288
+ "const text=esc(a.innerText||a.textContent);if(text.length<3)continue;" +
289
+ "if(seen.has(u.href))continue;seen.add(u.href);" +
290
+ 'lines.push("- ["+text.slice(0,180)+"]("+u.href+")");if(seen.size>=30)break;}' +
291
+ 'lines.push("","## Text snippet",clean(document.body.innerText).slice(0,1200));' +
292
+ 'return lines.join("\\n");' +
293
+ "})()";
294
+
295
+ const EXTRACT_PAGE_JS =
296
+ "(() => {" +
297
+ 'const clean=s=>(s||"").replace(/\\s+/g," ").trim();' +
298
+ 'const esc=s=>clean(s).replace(/\\\\/g,"\\\\\\\\").replace(/\\[/g,"\\\\[").replace(/\\]/g,"\\\\]").replace(/\\n/g," ");' +
299
+ 'const visible=el=>{const r=el.getBoundingClientRect();const st=getComputedStyle(el);return r.width>0&&r.height>0&&st.display!=="none"&&st.visibility!=="hidden"&&st.opacity!=="0";};' +
300
+ "const inline=n=>{if(!n)return'';if(n.nodeType===3)return n.nodeValue;if(n.nodeType!==1)return'';const el=n;" +
301
+ 'if(el.tagName==="SCRIPT"||el.tagName==="STYLE"||el.tagName==="NOSCRIPT")return"";' +
302
+ 'if(el.tagName==="A"){const t=esc(el.innerText||el.textContent);const h=el.href||"";return t&&h?"["+t+"]("+h+")":t;}' +
303
+ 'if(el.tagName==="CODE")return"' + BT + '"+clean(el.innerText||el.textContent).replace(/`/g,"\\\\\\\\' + BT + '")+"' + BT + '";' +
304
+ "return[...el.childNodes].map(inline).join('');};" +
305
+ 'const lines=["# "+(clean(document.title)||location.href),"","URL: "+location.href,"","## Content"];' +
306
+ 'const blocks=[...document.body.querySelectorAll("h1,h2,h3,h4,h5,h6,p,li,pre,blockquote,td,th")];' +
307
+ "const seen=new Set();" +
308
+ "for(const el of blocks){" +
309
+ 'if(!visible(el))continue;let s="";const tag=el.tagName;' +
310
+ 'if(/^H[1-6]$/.test(tag)){s="#".repeat(Number(tag[1]))+" "+inline(el);}' +
311
+ 'else if(tag==="LI"){s="- "+inline(el);}' +
312
+ 'else if(tag==="PRE"){s="' + BT + BT + BT + '\\\\n"+(el.innerText||el.textContent||"").trimEnd()+"\\\\n' + BT + BT + BT + '";}' +
313
+ 'else if(tag==="BLOCKQUOTE"){s="> "+clean(el.innerText||el.textContent);}' +
314
+ "else{s=inline(el);}" +
315
+ "s=s.trim();if(!s||seen.has(s))continue;seen.add(s);" +
316
+ 'lines.push("",s);if(lines.join("\\\\n").length>90000){lines.push("","[Content truncated by browser extractor.]");break;}}' +
317
+ 'lines.push("","## Visible links");let n=0;const linkSeen=new Set();' +
318
+ 'for(const a of document.querySelectorAll("a[href]")){' +
319
+ "if(!visible(a))continue;const t=esc(a.innerText||a.textContent);if(t.length<3)continue;" +
320
+ "let u;try{u=new URL(a.href)}catch{continue;}" +
321
+ "if(!/^https?:$/.test(u.protocol)||linkSeen.has(u.href))continue;linkSeen.add(u.href);" +
322
+ 'lines.push("- ["+t.slice(0,160)+"]("+u.href+")");if(++n>=80)break;}' +
323
+ 'return lines.join("\\\\n");' +
324
+ "})()";
325
+
326
+ async function runInPage(
327
+ url: string,
328
+ js: string,
329
+ clickConsent: boolean,
330
+ dynamicScroll: boolean,
331
+ onStatus: (msg: string) => void
332
+ ): Promise<string> {
333
+ await ensureChrome();
334
+
335
+ const tab = await openTab();
336
+
337
+ const cdp = new CDPClient();
338
+ await cdp.connect(tab.wsUrl);
339
+
340
+ try {
341
+ // Enable domains
342
+ await cdp.call("Page.enable");
343
+ await cdp.call("Runtime.enable");
344
+
345
+ // Navigate and wait for load event (event-driven, no polling)
346
+ const loaded = new Promise<void>((resolve) => {
347
+ cdp.onEvent("Page.loadEventFired", () => resolve());
348
+ });
349
+ const loadTimeout = new Promise<void>((resolve) => setTimeout(resolve, 10_000));
350
+
351
+ onStatus(`Navigating to ${url}`);
352
+ await cdp.call("Page.navigate", { url });
353
+
354
+ // Wait for load event or timeout
355
+ await Promise.race([loaded, loadTimeout]);
356
+
357
+ // Handle consent
358
+ if (clickConsent) {
359
+ const clicked = await cdp.evaluate(GOOGLE_CONSENT_JS);
360
+ if (clicked) {
361
+ onStatus(`Consent: ${clicked}`);
362
+ // Brief wait after consent click, with a shorter page-ready check
363
+ const consentLoaded = new Promise<void>((resolve) => {
364
+ cdp.onEvent("Page.loadEventFired", () => resolve());
365
+ });
366
+ const consentTimeout = new Promise<void>((resolve) => setTimeout(resolve, 5_000));
367
+ await Promise.race([consentLoaded, consentTimeout]);
368
+ }
369
+ }
370
+
371
+ // Scroll for dynamic pages
372
+ if (dynamicScroll) {
373
+ onStatus("Scrolling for dynamic content...");
374
+ for (let i = 0; i < 3; i++) {
375
+ await cdp.evaluate("window.scrollTo(0, document.body.scrollHeight)");
376
+ await sleep(300);
377
+ }
378
+ await cdp.evaluate("window.scrollTo(0, 0)");
379
+ await sleep(200);
380
+ }
381
+
382
+ // Extract
383
+ onStatus("Extracting content...");
384
+ const result = await cdp.evaluate(js);
385
+
386
+ // Truncate
387
+ if (result.length > MAX_RESULT_BYTES) {
388
+ return result.slice(0, MAX_RESULT_BYTES) + "\n\n[Content truncated at 1MB]";
389
+ }
390
+ return result;
391
+ } finally {
392
+ cdp.disconnect();
393
+ await closeTab(tab.targetId);
394
+ }
395
+ }
396
+
397
+ // ── Public API ────────────────────────────────────────────────────────────
398
+
399
+ export interface SearchResult {
400
+ markdown: string;
401
+ url: string;
402
+ }
403
+
404
+ export async function googleSearch(
405
+ query: string,
406
+ onStatus?: (msg: string) => void
407
+ ): Promise<SearchResult> {
408
+ const status = onStatus ?? (() => {});
409
+ status(`Searching Google for: ${query}`);
410
+
411
+ const encodedQuery = encodeURIComponent(query);
412
+ const url = `https://www.google.com/search?q=${encodedQuery}`;
413
+
414
+ const markdown = await runInPage(url, GOOGLE_SEARCH_JS, true, false, status);
415
+ return { markdown, url };
416
+ }
417
+
418
+ export async function visitPage(
419
+ url: string,
420
+ onStatus?: (msg: string) => void
421
+ ): Promise<SearchResult> {
422
+ const status = onStatus ?? (() => {});
423
+ status(`Visiting: ${url}`);
424
+
425
+ const markdown = await runInPage(url, EXTRACT_PAGE_JS, true, true, status);
426
+ return { markdown, url };
427
+ }
428
+
429
+ export function shutdownChrome() {
430
+ if (chromeProcess) {
431
+ chromeProcess.kill();
432
+ chromeProcess = null;
433
+ }
434
+ }