pi-search-on-your-browser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -0
- package/index.ts +203 -0
- package/package.json +28 -0
- package/src/chrome.ts +434 -0
package/README.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# pi-search-on-your-browser
|
|
2
|
+
|
|
3
|
+
Search Google in your **own visible Chrome browser** — the [ds4-agent](https://github.com/antirez/ds4) style by @antirez.
|
|
4
|
+
|
|
5
|
+
> "If you need AI to do a search for you in the real world, ds4-agent is basically SOTA, because it can access the web sites without any limitations given that it uses your local Chrome browser (no, not in headless mode, that's the trick...)"
|
|
6
|
+
> — [@antirez on X](https://x.com/antirez/status/2066233392916525379), 2026-06-14
|
|
7
|
+
|
|
8
|
+
**This Pi package uses exactly the same approach:** launches your visible Chrome (not headless), navigates to google.com via CDP, runs JavaScript extractors in the page, and returns compact Markdown results. No API keys. No headless detection. Your real browser fingerprint, cookies, and login sessions.
|
|
9
|
+
|
|
10
|
+
## How it works
|
|
11
|
+
|
|
12
|
+
When you call `google_search` or `visit_page`:
|
|
13
|
+
|
|
14
|
+
1. A **visible Chrome window** opens (not headless) with a dedicated profile at `~/.pi-search-browser/`
|
|
15
|
+
2. Chrome DevTools Protocol (CDP) is used to navigate and extract content
|
|
16
|
+
3. JavaScript runs in the page to extract readable markdown
|
|
17
|
+
4. Chrome stays alive between calls for speed (kill with `/google-search-kill`)
|
|
18
|
+
|
|
19
|
+
This means you're authenticated everywhere — paywalled sites, Twitter, GitHub, Google — because it's **your real browser**.
|
|
20
|
+
|
|
21
|
+
## Install
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pi install /path/to/pi-search-on-your-browser
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Or from git (once published):
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pi install git:github.com/xezpeleta/pi-search-on-your-browser
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Tools
|
|
34
|
+
|
|
35
|
+
### `google_search`
|
|
36
|
+
|
|
37
|
+
Search Google and get compact markdown links + text snippet.
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
google_search({ query: "TypeScript 5.7 release notes" })
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### `visit_page`
|
|
44
|
+
|
|
45
|
+
Visit any URL and get the page content as markdown.
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
visit_page({ url: "https://example.com/article" })
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Commands
|
|
52
|
+
|
|
53
|
+
- `/google-search-kill` — Kill the Chrome browser
|
|
54
|
+
|
|
55
|
+
## Requirements
|
|
56
|
+
|
|
57
|
+
- Google Chrome or Chromium installed
|
|
58
|
+
- Node.js 20+
|
|
59
|
+
|
|
60
|
+
## Comparison with ds4-agent
|
|
61
|
+
|
|
62
|
+
| | pi-search-on-your-browser | ds4-agent |
|
|
63
|
+
|---|---|---|
|
|
64
|
+
| Language | TypeScript (Node.js) | C |
|
|
65
|
+
| Chrome connection | CDP WebSocket (manual RFC 6455) | CDP WebSocket (manual RFC 6455) |
|
|
66
|
+
| Profile | `~/.pi-search-browser/` | `~/.ds4/browser` |
|
|
67
|
+
| Google consent | Auto-click "Accept all" (multi-language) | Auto-click "Accept all" (multi-language) |
|
|
68
|
+
| Page extraction | Same JS extractors, ported to TS | Inline JS in C |
|
|
69
|
+
| Dependencies | Zero npm deps (just Node.js built-ins) | Zero deps (just POSIX) |
|
|
70
|
+
|
|
71
|
+
## License
|
|
72
|
+
|
|
73
|
+
MIT
|
package/index.ts
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pi-search-on-your-browser — exact same approach as ds4-agent, for Pi
|
|
3
|
+
*
|
|
4
|
+
* @antirez's ds4-agent strategy:
|
|
5
|
+
* https://x.com/antirez/status/2066233392916525379
|
|
6
|
+
* https://github.com/antirez/ds4
|
|
7
|
+
*
|
|
8
|
+
* Same approach: visible Chrome (not headless), CDP WebSocket, inline JS
|
|
9
|
+
* extractors. No API keys, no headless detection.
|
|
10
|
+
*
|
|
11
|
+
* Registered tools:
|
|
12
|
+
* - google_search — Search Google in a visible Chrome browser, returns markdown links + snippet
|
|
13
|
+
* - visit_page — Visit a URL in a visible Chrome browser, returns rendered page as markdown
|
|
14
|
+
*
|
|
15
|
+
* Registered commands:
|
|
16
|
+
* - /google-search-kill — Kill the Chrome process
|
|
17
|
+
*
|
|
18
|
+
* Chrome runs in a visible window (not headless) with a dedicated profile at
|
|
19
|
+
* ~/.pi-search-browser/ — cookies and sessions persist across calls.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import type { ExtensionAPI, ToolResult } from "@earendil-works/pi-coding-agent";
|
|
23
|
+
import { Text } from "@earendil-works/pi-tui";
|
|
24
|
+
import { Type } from "typebox";
|
|
25
|
+
import { googleSearch, visitPage, shutdownChrome } from "./src/chrome.js";
|
|
26
|
+
|
|
27
|
+
type RenderArgs = { query?: string; url?: string };
|
|
28
|
+
type RenderState = { expanded?: boolean; isPartial?: boolean };
|
|
29
|
+
type ToolTheme = {
|
|
30
|
+
fg: (color: string, text: string) => string;
|
|
31
|
+
bold: (text: string) => string;
|
|
32
|
+
dim: (text: string) => string;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
export default function searchOnYourBrowser(pi: ExtensionAPI) {
|
|
36
|
+
// ── google_search tool ───────────────────────────────────────────────────
|
|
37
|
+
|
|
38
|
+
pi.registerTool({
|
|
39
|
+
name: "google_search",
|
|
40
|
+
label: "Google Search",
|
|
41
|
+
description:
|
|
42
|
+
"Search Google in your visible Chrome browser and return compact Markdown links. Uses your real browser fingerprint — no API keys, no headless detection.",
|
|
43
|
+
promptSnippet: "google_search: search Google in your visible browser, returns markdown links",
|
|
44
|
+
promptGuidelines: [
|
|
45
|
+
"Use google_search to find web pages when you need real-time information. Results include clickable markdown links.",
|
|
46
|
+
],
|
|
47
|
+
parameters: Type.Object({
|
|
48
|
+
query: Type.String({ description: "Search query to send to Google" }),
|
|
49
|
+
}),
|
|
50
|
+
async execute(_toolCallId, params, _signal, onUpdate) {
|
|
51
|
+
const { query } = params;
|
|
52
|
+
if (!query || !query.trim()) {
|
|
53
|
+
return {
|
|
54
|
+
content: [{ type: "text" as const, text: "Tool error: google_search requires a query." }],
|
|
55
|
+
details: {},
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
try {
|
|
60
|
+
const started = Date.now();
|
|
61
|
+
const result = await googleSearch(query.trim(), (msg) => {
|
|
62
|
+
onUpdate?.({
|
|
63
|
+
content: [{ type: "text", text: msg }],
|
|
64
|
+
details: { _progress: true },
|
|
65
|
+
});
|
|
66
|
+
});
|
|
67
|
+
const elapsed = ((Date.now() - started) / 1000).toFixed(1);
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
content: [{ type: "text" as const, text: result.markdown }],
|
|
71
|
+
details: { url: result.url, elapsed: `${elapsed}s`, chars: result.markdown.length },
|
|
72
|
+
};
|
|
73
|
+
} catch (err: unknown) {
|
|
74
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
75
|
+
throw new Error(`google_search failed: ${message}`);
|
|
76
|
+
}
|
|
77
|
+
},
|
|
78
|
+
|
|
79
|
+
renderCall(args: Partial<RenderArgs>, theme: ToolTheme) {
|
|
80
|
+
const q = (args.query || "").slice(0, 60);
|
|
81
|
+
const trunc = q.length < (args.query || "").length ? "..." : "";
|
|
82
|
+
return new Text(
|
|
83
|
+
`${theme.fg("toolTitle", theme.bold("google_search"))} "${theme.fg("accent", q + trunc)}"`,
|
|
84
|
+
0,
|
|
85
|
+
0,
|
|
86
|
+
);
|
|
87
|
+
},
|
|
88
|
+
|
|
89
|
+
renderResult(result: ToolResult, { expanded, isPartial }: RenderState, theme: ToolTheme) {
|
|
90
|
+
if (isPartial) {
|
|
91
|
+
const progress = result.content.find((c) => c.type === "text")?.text ?? "Searching...";
|
|
92
|
+
return new Text(theme.fg("warning", progress), 0, 0);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const details = result.details as { url?: string; elapsed?: string; chars?: number } | undefined;
|
|
96
|
+
if (!expanded) {
|
|
97
|
+
const parts: string[] = [];
|
|
98
|
+
if (details?.chars) parts.push(`${details.chars.toLocaleString()} chars`);
|
|
99
|
+
if (details?.elapsed) parts.push(details.elapsed);
|
|
100
|
+
if (details?.url) parts.push(new URL(details.url).hostname);
|
|
101
|
+
return new Text(theme.fg("muted", ` → ${parts.join(" · ")}`), 0, 0);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const text = result.content.find((c) => c.type === "text")?.text ?? "";
|
|
105
|
+
return new Text(`\n${text.split("\n").map((l) => theme.fg("toolOutput", l)).join("\n")}`, 0, 0);
|
|
106
|
+
},
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
// ── visit_page tool ──────────────────────────────────────────────────────
|
|
110
|
+
|
|
111
|
+
pi.registerTool({
|
|
112
|
+
name: "visit_page",
|
|
113
|
+
label: "Visit Page",
|
|
114
|
+
description:
|
|
115
|
+
"Open a URL in your visible Chrome browser and return the rendered page as Markdown. Works with authenticated sites, paywalls, and JavaScript-heavy pages.",
|
|
116
|
+
promptSnippet: "visit_page: visit a URL in your visible browser, returns rendered markdown",
|
|
117
|
+
promptGuidelines: [
|
|
118
|
+
"Use visit_page to read a web page you found via google_search. It opens in your visible Chrome so authenticated/paywalled sites work.",
|
|
119
|
+
],
|
|
120
|
+
parameters: Type.Object({
|
|
121
|
+
url: Type.String({ description: "Full URL to visit" }),
|
|
122
|
+
}),
|
|
123
|
+
async execute(_toolCallId, params, _signal, onUpdate) {
|
|
124
|
+
const { url } = params;
|
|
125
|
+
if (!url || !url.trim()) {
|
|
126
|
+
return {
|
|
127
|
+
content: [{ type: "text" as const, text: "Tool error: visit_page requires a URL." }],
|
|
128
|
+
details: {},
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
let targetUrl: string;
|
|
133
|
+
try {
|
|
134
|
+
targetUrl = new URL(url.trim()).toString();
|
|
135
|
+
} catch {
|
|
136
|
+
return {
|
|
137
|
+
content: [{ type: "text" as const, text: `Tool error: visit_page: invalid URL: ${url}` }],
|
|
138
|
+
details: {},
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
try {
|
|
143
|
+
const started = Date.now();
|
|
144
|
+
const result = await visitPage(targetUrl, (msg) => {
|
|
145
|
+
onUpdate?.({
|
|
146
|
+
content: [{ type: "text", text: msg }],
|
|
147
|
+
details: { _progress: true },
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
const elapsed = ((Date.now() - started) / 1000).toFixed(1);
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
content: [{ type: "text" as const, text: result.markdown }],
|
|
154
|
+
details: { url: result.url, elapsed: `${elapsed}s`, chars: result.markdown.length },
|
|
155
|
+
};
|
|
156
|
+
} catch (err: unknown) {
|
|
157
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
158
|
+
throw new Error(`visit_page failed: ${message}`);
|
|
159
|
+
}
|
|
160
|
+
},
|
|
161
|
+
|
|
162
|
+
renderCall(args: Partial<RenderArgs>, theme: ToolTheme) {
|
|
163
|
+
const u = args.url || "";
|
|
164
|
+
const hostname = (() => { try { return new URL(u).hostname; } catch { return u; } })();
|
|
165
|
+
return new Text(
|
|
166
|
+
`${theme.fg("toolTitle", theme.bold("visit_page"))} ${theme.fg("accent", hostname)}`,
|
|
167
|
+
0,
|
|
168
|
+
0,
|
|
169
|
+
);
|
|
170
|
+
},
|
|
171
|
+
|
|
172
|
+
renderResult(result: ToolResult, { expanded, isPartial }: RenderState, theme: ToolTheme) {
|
|
173
|
+
if (isPartial) {
|
|
174
|
+
const progress = result.content.find((c) => c.type === "text")?.text ?? "Loading...";
|
|
175
|
+
return new Text(theme.fg("warning", progress), 0, 0);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const details = result.details as { url?: string; elapsed?: string; chars?: number } | undefined;
|
|
179
|
+
if (!expanded) {
|
|
180
|
+
const parts: string[] = [];
|
|
181
|
+
if (details?.chars) parts.push(`${details.chars.toLocaleString()} chars`);
|
|
182
|
+
if (details?.elapsed) parts.push(details.elapsed);
|
|
183
|
+
if (details?.url) {
|
|
184
|
+
try { parts.push(new URL(details.url).hostname); } catch { /* */ }
|
|
185
|
+
}
|
|
186
|
+
return new Text(theme.fg("muted", ` → ${parts.join(" · ")}`), 0, 0);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const text = result.content.find((c) => c.type === "text")?.text ?? "";
|
|
190
|
+
return new Text(`\n${text.split("\n").map((l) => theme.fg("toolOutput", l)).join("\n")}`, 0, 0);
|
|
191
|
+
},
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
// ── Commands ─────────────────────────────────────────────────────────────
|
|
195
|
+
|
|
196
|
+
pi.registerCommand("google-search-kill", {
|
|
197
|
+
description: "Kill the Google Search Chrome browser process",
|
|
198
|
+
handler: async (_args, ctx) => {
|
|
199
|
+
shutdownChrome();
|
|
200
|
+
ctx.ui.notify("Google Search Chrome killed.", "info");
|
|
201
|
+
},
|
|
202
|
+
});
|
|
203
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pi-search-on-your-browser",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Search Google in your own visible Chrome — same approach as @antirez's ds4-agent. Zero dependencies, no API keys.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"keywords": ["pi-package", "search", "google", "chrome", "cdp", "browser", "ds4-agent"],
|
|
7
|
+
"author": "xezpeleta",
|
|
8
|
+
"license": "MIT",
|
|
9
|
+
"engines": {
|
|
10
|
+
"node": ">=21.0.0"
|
|
11
|
+
},
|
|
12
|
+
"files": [
|
|
13
|
+
"index.ts",
|
|
14
|
+
"src/",
|
|
15
|
+
"README.md"
|
|
16
|
+
],
|
|
17
|
+
"pi": {
|
|
18
|
+
"extensions": ["./index.ts"]
|
|
19
|
+
},
|
|
20
|
+
"peerDependencies": {
|
|
21
|
+
"@earendil-works/pi-coding-agent": "*"
|
|
22
|
+
},
|
|
23
|
+
"peerDependenciesMeta": {
|
|
24
|
+
"@earendil-works/pi-coding-agent": {
|
|
25
|
+
"optional": true
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
package/src/chrome.ts
ADDED
|
@@ -0,0 +1,434 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chrome DevTools Protocol (CDP) client — Node.js built-in WebSocket.
|
|
3
|
+
*
|
|
4
|
+
* Same approach as ds4-agent (@antirez): visible Chrome (not headless),
|
|
5
|
+
* CDP WebSocket navigation, inline JavaScript extractors in the page.
|
|
6
|
+
*
|
|
7
|
+
* Reference: https://x.com/antirez/status/2066233392916525379
|
|
8
|
+
*
|
|
9
|
+
* Profile at ~/.pi-search-browser/ — dedicated, like ds4-agent's ~/.ds4/browser.
|
|
10
|
+
* Cookies and sessions persist across calls.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { spawn, type ChildProcess } from "node:child_process";
|
|
14
|
+
import { mkdirSync, existsSync } from "node:fs";
|
|
15
|
+
import { homedir } from "node:os";
|
|
16
|
+
import { join } from "node:path";
|
|
17
|
+
|
|
18
|
+
const PROFILE_DIR = join(homedir(), ".pi-search-browser");
|
|
19
|
+
const CDP_PORT = 9322;
|
|
20
|
+
const CDP_TIMEOUT_MS = 30_000;
|
|
21
|
+
const MAX_RESULT_BYTES = 1_048_576; // 1 MB
|
|
22
|
+
|
|
23
|
+
// ── Utilities ─────────────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
function sleep(ms: number): Promise<void> {
|
|
26
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function findChrome(): string {
|
|
30
|
+
const paths = [
|
|
31
|
+
process.env.CHROME_PATH,
|
|
32
|
+
"/usr/bin/google-chrome-stable",
|
|
33
|
+
"/usr/bin/google-chrome",
|
|
34
|
+
"/usr/bin/chromium",
|
|
35
|
+
"/usr/bin/chromium-browser",
|
|
36
|
+
"/snap/bin/chromium",
|
|
37
|
+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
38
|
+
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
|
39
|
+
];
|
|
40
|
+
for (const p of paths) {
|
|
41
|
+
if (p && existsSync(p)) return p;
|
|
42
|
+
}
|
|
43
|
+
return "google-chrome";
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ── CDP over WebSocket ────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
interface PendingCall {
|
|
49
|
+
resolve: (v: unknown) => void;
|
|
50
|
+
reject: (e: Error) => void;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
class CDPClient {
|
|
54
|
+
private ws: WebSocket | null = null;
|
|
55
|
+
private nextId = 1;
|
|
56
|
+
private pending = new Map<number, PendingCall>();
|
|
57
|
+
private connectPromise: Promise<void> | null = null;
|
|
58
|
+
private eventHandlers = new Map<string, Array<(params: unknown) => void>>();
|
|
59
|
+
|
|
60
|
+
async connect(wsUrl: string): Promise<void> {
|
|
61
|
+
this.connectPromise = new Promise((resolve, reject) => {
|
|
62
|
+
const ws = new WebSocket(wsUrl);
|
|
63
|
+
this.ws = ws;
|
|
64
|
+
|
|
65
|
+
const timer = setTimeout(() => {
|
|
66
|
+
ws.close();
|
|
67
|
+
reject(new Error(`WebSocket connect timeout`));
|
|
68
|
+
}, CDP_TIMEOUT_MS);
|
|
69
|
+
|
|
70
|
+
ws.onopen = () => {
|
|
71
|
+
clearTimeout(timer);
|
|
72
|
+
resolve();
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
ws.onmessage = (event) => {
|
|
76
|
+
let msg: { id?: number; method?: string; result?: unknown; error?: { message: string }; params?: unknown };
|
|
77
|
+
try {
|
|
78
|
+
msg = JSON.parse(event.data as string);
|
|
79
|
+
} catch {
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
// Events (no id field) — dispatch to handlers
|
|
83
|
+
if (msg.id === undefined || msg.id === null) {
|
|
84
|
+
if (msg.method) {
|
|
85
|
+
const handlers = this.eventHandlers.get(msg.method);
|
|
86
|
+
if (handlers) {
|
|
87
|
+
for (const h of handlers) h(msg.params);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
const cb = this.pending.get(msg.id);
|
|
93
|
+
if (!cb) return;
|
|
94
|
+
this.pending.delete(msg.id);
|
|
95
|
+
if (msg.error) {
|
|
96
|
+
cb.reject(new Error(`CDP error: ${msg.error.message || JSON.stringify(msg.error)}`));
|
|
97
|
+
} else {
|
|
98
|
+
cb.resolve(msg.result);
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
ws.onerror = () => {
|
|
103
|
+
clearTimeout(timer);
|
|
104
|
+
reject(new Error("WebSocket connection error"));
|
|
105
|
+
};
|
|
106
|
+
});
|
|
107
|
+
await this.connectPromise;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
onEvent(method: string, handler: (params: unknown) => void) {
|
|
111
|
+
const handlers = this.eventHandlers.get(method) || [];
|
|
112
|
+
handlers.push(handler);
|
|
113
|
+
this.eventHandlers.set(method, handlers);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
async call(method: string, params: Record<string, unknown> = {}): Promise<unknown> {
|
|
117
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
118
|
+
throw new Error("CDP not connected");
|
|
119
|
+
}
|
|
120
|
+
const id = this.nextId++;
|
|
121
|
+
const msg = JSON.stringify({ id, method, params });
|
|
122
|
+
|
|
123
|
+
return new Promise((resolve, reject) => {
|
|
124
|
+
const timer = setTimeout(() => {
|
|
125
|
+
this.pending.delete(id);
|
|
126
|
+
reject(new Error(`CDP call timeout: ${method}`));
|
|
127
|
+
}, CDP_TIMEOUT_MS);
|
|
128
|
+
|
|
129
|
+
this.pending.set(id, {
|
|
130
|
+
resolve: (v) => { clearTimeout(timer); resolve(v); },
|
|
131
|
+
reject: (e) => { clearTimeout(timer); reject(e); },
|
|
132
|
+
});
|
|
133
|
+
this.ws!.send(msg);
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
async evaluate(expression: string): Promise<string> {
|
|
138
|
+
const result = await this.call("Runtime.evaluate", {
|
|
139
|
+
expression,
|
|
140
|
+
returnByValue: true,
|
|
141
|
+
awaitPromise: true,
|
|
142
|
+
});
|
|
143
|
+
const r = result as { result?: { value?: unknown; description?: string } };
|
|
144
|
+
if (r.result?.value !== undefined) return String(r.result.value);
|
|
145
|
+
return r.result?.description ?? "";
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
disconnect() {
|
|
149
|
+
if (this.ws) {
|
|
150
|
+
this.ws.close();
|
|
151
|
+
this.ws = null;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// ── Chrome process management ─────────────────────────────────────────────
|
|
157
|
+
|
|
158
|
+
let chromeProcess: ChildProcess | null = null;
|
|
159
|
+
|
|
160
|
+
async function isChromeAlive(): Promise<boolean> {
|
|
161
|
+
try {
|
|
162
|
+
const resp = await fetch(`http://127.0.0.1:${CDP_PORT}/json/version`);
|
|
163
|
+
return resp.ok;
|
|
164
|
+
} catch {
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
async function launchChrome(): Promise<void> {
|
|
170
|
+
mkdirSync(PROFILE_DIR, { recursive: true });
|
|
171
|
+
|
|
172
|
+
const chromePath = findChrome();
|
|
173
|
+
|
|
174
|
+
console.error(`[pi-search] Launching visible Chrome at ${chromePath}`);
|
|
175
|
+
|
|
176
|
+
const args = [
|
|
177
|
+
`--remote-debugging-port=${CDP_PORT}`,
|
|
178
|
+
"--remote-allow-origins=*",
|
|
179
|
+
`--user-data-dir=${PROFILE_DIR}`,
|
|
180
|
+
"--no-first-run",
|
|
181
|
+
"--no-default-browser-check",
|
|
182
|
+
"--disable-sync",
|
|
183
|
+
"--password-store=basic",
|
|
184
|
+
"--mute-audio",
|
|
185
|
+
"about:blank",
|
|
186
|
+
];
|
|
187
|
+
|
|
188
|
+
chromeProcess = spawn(chromePath, args, {
|
|
189
|
+
stdio: ["ignore", "ignore", "ignore"],
|
|
190
|
+
detached: false,
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
chromeProcess.on("exit", (code) => {
|
|
194
|
+
console.error(`[pi-search] Chrome exited with code ${code}`);
|
|
195
|
+
chromeProcess = null;
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
// Wait for CDP to become available
|
|
199
|
+
for (let i = 0; i < 60; i++) {
|
|
200
|
+
if (await isChromeAlive()) {
|
|
201
|
+
console.error("[pi-search] Chrome is ready");
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
204
|
+
await sleep(500);
|
|
205
|
+
}
|
|
206
|
+
throw new Error("Chrome did not become ready within 30s");
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async function ensureChrome(): Promise<void> {
|
|
210
|
+
if (await isChromeAlive()) return;
|
|
211
|
+
if (chromeProcess) {
|
|
212
|
+
chromeProcess.kill();
|
|
213
|
+
chromeProcess = null;
|
|
214
|
+
await sleep(500);
|
|
215
|
+
}
|
|
216
|
+
await launchChrome();
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// ── Page operations ──────────────────────────────────────────────────────
|
|
220
|
+
|
|
221
|
+
interface CDPTab {
|
|
222
|
+
wsUrl: string;
|
|
223
|
+
targetId: string;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
async function getBrowserWSUrl(): Promise<string> {
|
|
227
|
+
const resp = await fetch(`http://127.0.0.1:${CDP_PORT}/json/version`);
|
|
228
|
+
const data = (await resp.json()) as { webSocketDebuggerUrl: string };
|
|
229
|
+
return data.webSocketDebuggerUrl;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
async function openTab(): Promise<CDPTab> {
|
|
233
|
+
const browserUrl = await getBrowserWSUrl();
|
|
234
|
+
const browserCdp = new CDPClient();
|
|
235
|
+
await browserCdp.connect(browserUrl);
|
|
236
|
+
|
|
237
|
+
const result = (await browserCdp.call("Target.createTarget", {
|
|
238
|
+
url: "about:blank",
|
|
239
|
+
background: true,
|
|
240
|
+
newWindow: false,
|
|
241
|
+
})) as { targetId: string };
|
|
242
|
+
|
|
243
|
+
browserCdp.disconnect();
|
|
244
|
+
|
|
245
|
+
const wsUrl = `ws://127.0.0.1:${CDP_PORT}/devtools/page/${result.targetId}`;
|
|
246
|
+
return { wsUrl, targetId: result.targetId };
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
async function closeTab(targetId: string): Promise<void> {
|
|
250
|
+
try {
|
|
251
|
+
await fetch(
|
|
252
|
+
`http://127.0.0.1:${CDP_PORT}/json/close/${encodeURIComponent(targetId)}`
|
|
253
|
+
);
|
|
254
|
+
} catch {
|
|
255
|
+
// best effort
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// ── JavaScript extractors (ds4-agent style) ────────────────────────────────
|
|
260
|
+
|
|
261
|
+
// Backtick constant for building JS strings that contain backticks
|
|
262
|
+
const BT = "`";
|
|
263
|
+
|
|
264
|
+
const GOOGLE_CONSENT_JS =
|
|
265
|
+
"(() => {" +
|
|
266
|
+
'const clean=s=>(s||"").replace(/\\s+/g," ").trim();' +
|
|
267
|
+
"const pats=[/accept all/i,/i agree/i,/agree/i,/accetta tutto/i,/tout accepter/i,/aceptar todo/i,/alle akzeptieren/i];" +
|
|
268
|
+
'const els=[...document.querySelectorAll("button,[role=button],input[type=submit],a")];' +
|
|
269
|
+
"for(const el of els){const t=clean(el.innerText||el.value||el.textContent);" +
|
|
270
|
+
"if(!t)continue;if(pats.some(p=>p.test(t))){el.click();return'clicked '+t;}}" +
|
|
271
|
+
'return"";' +
|
|
272
|
+
"})()";
|
|
273
|
+
|
|
274
|
+
const GOOGLE_SEARCH_JS =
|
|
275
|
+
"(() => {" +
|
|
276
|
+
'const clean=s=>(s||"").replace(/\\s+/g," ").trim();' +
|
|
277
|
+
'const esc=s=>clean(s).replace(/\\\\/g,"\\\\\\\\").replace(/\\[/g,"\\\\[").replace(/\\]/g,"\\\\]").replace(/\\n/g," ");' +
|
|
278
|
+
'const visible=el=>{const r=el.getBoundingClientRect();const st=getComputedStyle(el);return r.width>0&&r.height>0&&st.display!=="none"&&st.visibility!=="hidden"&&st.opacity!=="0";};' +
|
|
279
|
+
"const bad=h=>/(^|\\.)google\\./.test(h)||/(^|\\.)gstatic\\./.test(h)||/(^|\\.)googleusercontent\\./.test(h);" +
|
|
280
|
+
'const lines=["# Google search results","","URL: "+location.href,"","## Visible links"];' +
|
|
281
|
+
"const seen=new Set();" +
|
|
282
|
+
'for(const a of document.querySelectorAll("a[href]")){' +
|
|
283
|
+
"if(!visible(a))continue;let href=a.href||'';" +
|
|
284
|
+
'try{const u=new URL(href);if(u.pathname==="/url"&&u.searchParams.get("q"))href=u.searchParams.get("q");}catch{}' +
|
|
285
|
+
"let u;try{u=new URL(href)}catch{continue;}" +
|
|
286
|
+
"if(!/^https?:$/.test(u.protocol))continue;" +
|
|
287
|
+
"if(bad(u.hostname))continue;" +
|
|
288
|
+
"const text=esc(a.innerText||a.textContent);if(text.length<3)continue;" +
|
|
289
|
+
"if(seen.has(u.href))continue;seen.add(u.href);" +
|
|
290
|
+
'lines.push("- ["+text.slice(0,180)+"]("+u.href+")");if(seen.size>=30)break;}' +
|
|
291
|
+
'lines.push("","## Text snippet",clean(document.body.innerText).slice(0,1200));' +
|
|
292
|
+
'return lines.join("\\n");' +
|
|
293
|
+
"})()";
|
|
294
|
+
|
|
295
|
+
const EXTRACT_PAGE_JS =
|
|
296
|
+
"(() => {" +
|
|
297
|
+
'const clean=s=>(s||"").replace(/\\s+/g," ").trim();' +
|
|
298
|
+
'const esc=s=>clean(s).replace(/\\\\/g,"\\\\\\\\").replace(/\\[/g,"\\\\[").replace(/\\]/g,"\\\\]").replace(/\\n/g," ");' +
|
|
299
|
+
'const visible=el=>{const r=el.getBoundingClientRect();const st=getComputedStyle(el);return r.width>0&&r.height>0&&st.display!=="none"&&st.visibility!=="hidden"&&st.opacity!=="0";};' +
|
|
300
|
+
"const inline=n=>{if(!n)return'';if(n.nodeType===3)return n.nodeValue;if(n.nodeType!==1)return'';const el=n;" +
|
|
301
|
+
'if(el.tagName==="SCRIPT"||el.tagName==="STYLE"||el.tagName==="NOSCRIPT")return"";' +
|
|
302
|
+
'if(el.tagName==="A"){const t=esc(el.innerText||el.textContent);const h=el.href||"";return t&&h?"["+t+"]("+h+")":t;}' +
|
|
303
|
+
'if(el.tagName==="CODE")return"' + BT + '"+clean(el.innerText||el.textContent).replace(/`/g,"\\\\\\\\' + BT + '")+"' + BT + '";' +
|
|
304
|
+
"return[...el.childNodes].map(inline).join('');};" +
|
|
305
|
+
'const lines=["# "+(clean(document.title)||location.href),"","URL: "+location.href,"","## Content"];' +
|
|
306
|
+
'const blocks=[...document.body.querySelectorAll("h1,h2,h3,h4,h5,h6,p,li,pre,blockquote,td,th")];' +
|
|
307
|
+
"const seen=new Set();" +
|
|
308
|
+
"for(const el of blocks){" +
|
|
309
|
+
'if(!visible(el))continue;let s="";const tag=el.tagName;' +
|
|
310
|
+
'if(/^H[1-6]$/.test(tag)){s="#".repeat(Number(tag[1]))+" "+inline(el);}' +
|
|
311
|
+
'else if(tag==="LI"){s="- "+inline(el);}' +
|
|
312
|
+
'else if(tag==="PRE"){s="' + BT + BT + BT + '\\\\n"+(el.innerText||el.textContent||"").trimEnd()+"\\\\n' + BT + BT + BT + '";}' +
|
|
313
|
+
'else if(tag==="BLOCKQUOTE"){s="> "+clean(el.innerText||el.textContent);}' +
|
|
314
|
+
"else{s=inline(el);}" +
|
|
315
|
+
"s=s.trim();if(!s||seen.has(s))continue;seen.add(s);" +
|
|
316
|
+
'lines.push("",s);if(lines.join("\\\\n").length>90000){lines.push("","[Content truncated by browser extractor.]");break;}}' +
|
|
317
|
+
'lines.push("","## Visible links");let n=0;const linkSeen=new Set();' +
|
|
318
|
+
'for(const a of document.querySelectorAll("a[href]")){' +
|
|
319
|
+
"if(!visible(a))continue;const t=esc(a.innerText||a.textContent);if(t.length<3)continue;" +
|
|
320
|
+
"let u;try{u=new URL(a.href)}catch{continue;}" +
|
|
321
|
+
"if(!/^https?:$/.test(u.protocol)||linkSeen.has(u.href))continue;linkSeen.add(u.href);" +
|
|
322
|
+
'lines.push("- ["+t.slice(0,160)+"]("+u.href+")");if(++n>=80)break;}' +
|
|
323
|
+
'return lines.join("\\\\n");' +
|
|
324
|
+
"})()";
|
|
325
|
+
|
|
326
|
+
async function runInPage(
|
|
327
|
+
url: string,
|
|
328
|
+
js: string,
|
|
329
|
+
clickConsent: boolean,
|
|
330
|
+
dynamicScroll: boolean,
|
|
331
|
+
onStatus: (msg: string) => void
|
|
332
|
+
): Promise<string> {
|
|
333
|
+
await ensureChrome();
|
|
334
|
+
|
|
335
|
+
const tab = await openTab();
|
|
336
|
+
|
|
337
|
+
const cdp = new CDPClient();
|
|
338
|
+
await cdp.connect(tab.wsUrl);
|
|
339
|
+
|
|
340
|
+
try {
|
|
341
|
+
// Enable domains
|
|
342
|
+
await cdp.call("Page.enable");
|
|
343
|
+
await cdp.call("Runtime.enable");
|
|
344
|
+
|
|
345
|
+
// Navigate and wait for load event (event-driven, no polling)
|
|
346
|
+
const loaded = new Promise<void>((resolve) => {
|
|
347
|
+
cdp.onEvent("Page.loadEventFired", () => resolve());
|
|
348
|
+
});
|
|
349
|
+
const loadTimeout = new Promise<void>((resolve) => setTimeout(resolve, 10_000));
|
|
350
|
+
|
|
351
|
+
onStatus(`Navigating to ${url}`);
|
|
352
|
+
await cdp.call("Page.navigate", { url });
|
|
353
|
+
|
|
354
|
+
// Wait for load event or timeout
|
|
355
|
+
await Promise.race([loaded, loadTimeout]);
|
|
356
|
+
|
|
357
|
+
// Handle consent
|
|
358
|
+
if (clickConsent) {
|
|
359
|
+
const clicked = await cdp.evaluate(GOOGLE_CONSENT_JS);
|
|
360
|
+
if (clicked) {
|
|
361
|
+
onStatus(`Consent: ${clicked}`);
|
|
362
|
+
// Brief wait after consent click, with a shorter page-ready check
|
|
363
|
+
const consentLoaded = new Promise<void>((resolve) => {
|
|
364
|
+
cdp.onEvent("Page.loadEventFired", () => resolve());
|
|
365
|
+
});
|
|
366
|
+
const consentTimeout = new Promise<void>((resolve) => setTimeout(resolve, 5_000));
|
|
367
|
+
await Promise.race([consentLoaded, consentTimeout]);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Scroll for dynamic pages
|
|
372
|
+
if (dynamicScroll) {
|
|
373
|
+
onStatus("Scrolling for dynamic content...");
|
|
374
|
+
for (let i = 0; i < 3; i++) {
|
|
375
|
+
await cdp.evaluate("window.scrollTo(0, document.body.scrollHeight)");
|
|
376
|
+
await sleep(300);
|
|
377
|
+
}
|
|
378
|
+
await cdp.evaluate("window.scrollTo(0, 0)");
|
|
379
|
+
await sleep(200);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// Extract
|
|
383
|
+
onStatus("Extracting content...");
|
|
384
|
+
const result = await cdp.evaluate(js);
|
|
385
|
+
|
|
386
|
+
// Truncate
|
|
387
|
+
if (result.length > MAX_RESULT_BYTES) {
|
|
388
|
+
return result.slice(0, MAX_RESULT_BYTES) + "\n\n[Content truncated at 1MB]";
|
|
389
|
+
}
|
|
390
|
+
return result;
|
|
391
|
+
} finally {
|
|
392
|
+
cdp.disconnect();
|
|
393
|
+
await closeTab(tab.targetId);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// ── Public API ────────────────────────────────────────────────────────────
|
|
398
|
+
|
|
399
|
+
export interface SearchResult {
|
|
400
|
+
markdown: string;
|
|
401
|
+
url: string;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
export async function googleSearch(
|
|
405
|
+
query: string,
|
|
406
|
+
onStatus?: (msg: string) => void
|
|
407
|
+
): Promise<SearchResult> {
|
|
408
|
+
const status = onStatus ?? (() => {});
|
|
409
|
+
status(`Searching Google for: ${query}`);
|
|
410
|
+
|
|
411
|
+
const encodedQuery = encodeURIComponent(query);
|
|
412
|
+
const url = `https://www.google.com/search?q=${encodedQuery}`;
|
|
413
|
+
|
|
414
|
+
const markdown = await runInPage(url, GOOGLE_SEARCH_JS, true, false, status);
|
|
415
|
+
return { markdown, url };
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
export async function visitPage(
|
|
419
|
+
url: string,
|
|
420
|
+
onStatus?: (msg: string) => void
|
|
421
|
+
): Promise<SearchResult> {
|
|
422
|
+
const status = onStatus ?? (() => {});
|
|
423
|
+
status(`Visiting: ${url}`);
|
|
424
|
+
|
|
425
|
+
const markdown = await runInPage(url, EXTRACT_PAGE_JS, true, true, status);
|
|
426
|
+
return { markdown, url };
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
export function shutdownChrome() {
|
|
430
|
+
if (chromeProcess) {
|
|
431
|
+
chromeProcess.kill();
|
|
432
|
+
chromeProcess = null;
|
|
433
|
+
}
|
|
434
|
+
}
|