@ogulcancelik/pi-web-browse 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +73 -0
- package/SKILL.md +101 -0
- package/lib/bot-protection.js +92 -0
- package/lib/browser-bin.js +133 -0
- package/lib/cdp.js +218 -0
- package/lib/daemon-client.js +134 -0
- package/lib/daemon.js +194 -0
- package/lib/debug-dump.js +76 -0
- package/lib/extract.js +58 -0
- package/lib/fetch.js +71 -0
- package/lib/http-fetch.js +41 -0
- package/lib/search.js +226 -0
- package/package.json +60 -0
- package/web-browse.js +651 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Can Celik
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# pi-web-browse
|
|
2
|
+
|
|
3
|
+
Web search and content extraction skill for [pi](https://github.com/badlogic/pi-mono). Search the web and fetch pages via a real headless browser (CDP).
|
|
4
|
+
|
|
5
|
+
**Works on Linux, macOS, and Windows.**
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- 🔍 **Web Search** - Search via Google (falls back to DuckDuckGo if blocked)
|
|
10
|
+
- 🌐 **Page Fetching** - Extract readable content from any URL
|
|
11
|
+
- 🤖 **Bot Protection Bypass** - Handles JS challenges, Cloudflare, etc.
|
|
12
|
+
- 🚀 **Persistent Daemon** - Warm browser session for fast subsequent requests
|
|
13
|
+
- 🖥️ **Cross-Platform** - Auto-detects Brave, Chrome, Edge, Chromium
|
|
14
|
+
|
|
15
|
+
## Install
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pi install npm:@ogulcancelik/pi-web-browse
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Or via git:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pi install github.com/ogulcancelik/pi-web-browse
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
(Optional, try without installing):
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pi -e npm:@ogulcancelik/pi-web-browse
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
After first use, the agent will guide you through setup.
|
|
34
|
+
|
|
35
|
+
## Usage
|
|
36
|
+
|
|
37
|
+
The agent will automatically use this skill when you ask it to search the web or fetch page content.
|
|
38
|
+
|
|
39
|
+
You can also invoke it directly:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
/skill:web-browse "rust async runtime"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Configuration
|
|
46
|
+
|
|
47
|
+
Environment variables (all optional):
|
|
48
|
+
|
|
49
|
+
| Variable | Description | Default |
|
|
50
|
+
|----------|-------------|---------|
|
|
51
|
+
| `WEB_BROWSE_BROWSER_BIN` | Browser binary path | Auto-detected |
|
|
52
|
+
| `WEB_BROWSE_USER_AGENT` | User-Agent string | Chrome on Windows |
|
|
53
|
+
| `WEB_BROWSE_DAEMON_PORT` | Daemon HTTP port | 9377 |
|
|
54
|
+
| `WEB_BROWSE_CDP_PORT` | Chrome DevTools port | 9223 |
|
|
55
|
+
| `WEB_BROWSE_DEBUG_DUMP` | Save debug files on failure | off |
|
|
56
|
+
|
|
57
|
+
## Browser Detection
|
|
58
|
+
|
|
59
|
+
The skill auto-detects browsers in common locations:
|
|
60
|
+
|
|
61
|
+
- **Linux:** brave, brave-browser, google-chrome, chromium (from PATH)
|
|
62
|
+
- **macOS:** Brave Browser, Google Chrome, Chromium, Edge (in /Applications)
|
|
63
|
+
- **Windows:** Brave, Chrome, Edge, Chromium (Program Files, LocalAppData)
|
|
64
|
+
|
|
65
|
+
## How It Works
|
|
66
|
+
|
|
67
|
+
1. **Search** - Uses Google via headless browser (falls back to DuckDuckGo if blocked)
|
|
68
|
+
2. **Fetch** - Opens URL in headless Chromium, waits for JS, extracts readable content
|
|
69
|
+
3. **Daemon** - Keeps a warm browser session for speed + bot-protection bypass
|
|
70
|
+
|
|
71
|
+
## License
|
|
72
|
+
|
|
73
|
+
MIT
|
package/SKILL.md
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: web-browse
|
|
3
|
+
description: "Search the web and fetch/read pages via a real headless browser (CDP). Use this instead of curl when sites are JS-heavy or bot-protected. Works on Linux, macOS, and Windows."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Web Browse
|
|
7
|
+
|
|
8
|
+
Search the web, then open/fetch pages in a **real browser session** (headless Chromium via CDP) and extract readable text.
|
|
9
|
+
Use this instead of `curl` when sites are JS-heavy or bot-protected.
|
|
10
|
+
|
|
11
|
+
## Setup
|
|
12
|
+
|
|
13
|
+
Run once before first use:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
cd {baseDir}
|
|
17
|
+
npm install
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
The skill auto-detects browsers already installed on your system (Brave, Chrome, Edge, Chromium).
|
|
21
|
+
On Windows, Edge is pre-installed and works out of the box.
|
|
22
|
+
|
|
23
|
+
**No browser installed?** (rare) Run: `npx playwright install chromium`
|
|
24
|
+
|
|
25
|
+
## Configuration (optional)
|
|
26
|
+
|
|
27
|
+
Environment variables:
|
|
28
|
+
|
|
29
|
+
| Variable | Description |
|
|
30
|
+
|----------|-------------|
|
|
31
|
+
| `WEB_BROWSE_BROWSER_BIN` | Path to browser binary (auto-detected if not set) |
|
|
32
|
+
| `WEB_BROWSE_USER_AGENT` | Override User-Agent string |
|
|
33
|
+
| `WEB_BROWSE_DAEMON_PORT` | Daemon port (default: 9377) |
|
|
34
|
+
| `WEB_BROWSE_CDP_PORT` | CDP port (default: 9223) |
|
|
35
|
+
| `WEB_BROWSE_DEBUG_DUMP` | Set to `1` to save screenshots/HTML on failures |
|
|
36
|
+
|
|
37
|
+
You can also pass `--browser-bin <path>` as a CLI argument.
|
|
38
|
+
|
|
39
|
+
## Usage
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Search (results are cached for ~10 minutes)
|
|
43
|
+
{baseDir}/web-browse.js "your query"
|
|
44
|
+
{baseDir}/web-browse.js "your query" -n 10
|
|
45
|
+
|
|
46
|
+
# Fetch specific cached results by index
|
|
47
|
+
{baseDir}/web-browse.js --fetch 1,3,5
|
|
48
|
+
|
|
49
|
+
# Fetch a specific URL
|
|
50
|
+
{baseDir}/web-browse.js --url <url> # truncated (~2000 chars)
|
|
51
|
+
{baseDir}/web-browse.js --url <url> --full # full content
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**Windows note:** Use `node {baseDir}/web-browse.js` instead of `{baseDir}/web-browse.js`
|
|
55
|
+
|
|
56
|
+
## Default behavior: persistent daemon (auto)
|
|
57
|
+
|
|
58
|
+
Direct calls automatically start/use a local daemon that keeps a **persistent headless browser+CDP session**.
|
|
59
|
+
This avoids browser startup overhead and helps with bot-protection pages that auto-clear (e.g. Anubis PoW).
|
|
60
|
+
|
|
61
|
+
### Daemon controls (optional, for debugging)
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
{baseDir}/web-browse.js --daemon status
|
|
65
|
+
{baseDir}/web-browse.js --daemon start
|
|
66
|
+
{baseDir}/web-browse.js --daemon stop
|
|
67
|
+
{baseDir}/web-browse.js --daemon restart
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Bypass daemon (one-shot)
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
{baseDir}/web-browse.js --no-daemon --url https://example.com
|
|
74
|
+
{baseDir}/web-browse.js --no-daemon "your query"
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Workflow
|
|
78
|
+
|
|
79
|
+
1) **Search** → see snippets → decide what to read
|
|
80
|
+
2) **Fetch by index** → `--fetch 1,3` opens those results and extracts content
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
{baseDir}/web-browse.js "rust async runtime" # shows results
|
|
84
|
+
{baseDir}/web-browse.js --fetch 1,3 # fetches result #1 and #3
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Browser Support
|
|
88
|
+
|
|
89
|
+
The skill auto-detects installed browsers in this order:
|
|
90
|
+
|
|
91
|
+
**Linux:** brave, brave-browser, google-chrome, chromium
|
|
92
|
+
**macOS:** Brave Browser, Google Chrome, Chromium, Microsoft Edge (in /Applications)
|
|
93
|
+
**Windows:** Brave, Chrome, Edge, Chromium (common install paths)
|
|
94
|
+
|
|
95
|
+
To use a specific browser, set `WEB_BROWSE_BROWSER_BIN` or pass `--browser-bin <path>`.
|
|
96
|
+
|
|
97
|
+
## Notes
|
|
98
|
+
|
|
99
|
+
- Content is truncated by default to save tokens; use `--full` for complete output.
|
|
100
|
+
- The daemon keeps a warm browser session for faster subsequent requests.
|
|
101
|
+
- CDP profile is stored in `~/.config/web-browse-cdp-profile/` (configurable via `--cdp-profile`).
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
const DEFAULT_MARKERS = [
|
|
2
|
+
"making sure you're not a bot",
|
|
3
|
+
"protected by anubis",
|
|
4
|
+
"anubis uses a proof-of-work",
|
|
5
|
+
"checking your browser",
|
|
6
|
+
"just a moment",
|
|
7
|
+
"cf-browser-verification",
|
|
8
|
+
"enable javascript and cookies to continue",
|
|
9
|
+
"attention required",
|
|
10
|
+
"verify you are human",
|
|
11
|
+
"unusual traffic",
|
|
12
|
+
];
|
|
13
|
+
|
|
14
|
+
export function isLikelyBotProtectionText(title, text, markers = DEFAULT_MARKERS) {
|
|
15
|
+
const t = String(title || "").toLowerCase();
|
|
16
|
+
const body = String(text || "").slice(0, 6000).toLowerCase();
|
|
17
|
+
const haystack = `${t}\n${body}`;
|
|
18
|
+
return markers.some((m) => haystack.includes(m));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export async function isLikelyBotProtectionPage(page, markers = DEFAULT_MARKERS) {
|
|
22
|
+
for (let attempt = 0; attempt < 3; attempt += 1) {
|
|
23
|
+
try {
|
|
24
|
+
return await page.evaluate((markersArg) => {
|
|
25
|
+
const title = (document.title || "").toLowerCase();
|
|
26
|
+
const text = (document.body?.innerText || "").slice(0, 6000).toLowerCase();
|
|
27
|
+
const haystack = `${title}\n${text}`;
|
|
28
|
+
return markersArg.some((marker) => haystack.includes(marker));
|
|
29
|
+
}, markers);
|
|
30
|
+
} catch (err) {
|
|
31
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
32
|
+
if (message.includes("Execution context was destroyed") || message.includes("Cannot find context")) {
|
|
33
|
+
await page.waitForTimeout(250).catch(() => {});
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export async function waitForBotProtectionToClear(
|
|
45
|
+
page,
|
|
46
|
+
url,
|
|
47
|
+
{
|
|
48
|
+
markers = DEFAULT_MARKERS,
|
|
49
|
+
timeoutMs = 30000,
|
|
50
|
+
log = (msg) => console.error(msg),
|
|
51
|
+
} = {},
|
|
52
|
+
) {
|
|
53
|
+
// Fast-path: for normal pages, do NOT wait for networkidle.
|
|
54
|
+
await page.waitForTimeout(150 + Math.floor(Math.random() * 150));
|
|
55
|
+
|
|
56
|
+
let detected = false;
|
|
57
|
+
for (let i = 0; i < 3; i += 1) {
|
|
58
|
+
detected = await isLikelyBotProtectionPage(page, markers);
|
|
59
|
+
if (detected) break;
|
|
60
|
+
await page.waitForTimeout(200);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (!detected) return { detected: false, cleared: true, waitedMs: 0 };
|
|
64
|
+
|
|
65
|
+
const start = Date.now();
|
|
66
|
+
log(`Bot protection detected for ${url}. Waiting for it to clear...`);
|
|
67
|
+
|
|
68
|
+
await page
|
|
69
|
+
.waitForFunction(
|
|
70
|
+
(markersArg) => {
|
|
71
|
+
const title = (document.title || "").toLowerCase();
|
|
72
|
+
const text = (document.body?.innerText || "").slice(0, 6000).toLowerCase();
|
|
73
|
+
const haystack = `${title}\n${text}`;
|
|
74
|
+
return !markersArg.some((marker) => haystack.includes(marker));
|
|
75
|
+
},
|
|
76
|
+
markers,
|
|
77
|
+
{ timeout: timeoutMs },
|
|
78
|
+
)
|
|
79
|
+
.catch(() => {});
|
|
80
|
+
|
|
81
|
+
await page.waitForLoadState("domcontentloaded", { timeout: 10000 }).catch(() => {});
|
|
82
|
+
await page.waitForLoadState("networkidle", { timeout: 5000 }).catch(() => {});
|
|
83
|
+
await page.waitForTimeout(150);
|
|
84
|
+
|
|
85
|
+
const stillBlocked = await isLikelyBotProtectionPage(page, markers);
|
|
86
|
+
if (stillBlocked) {
|
|
87
|
+
const title = await page.title().catch(() => "");
|
|
88
|
+
throw new Error(`Bot protection challenge did not clear (title="${title}")`);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return { detected: true, cleared: true, waitedMs: Date.now() - start };
|
|
92
|
+
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { accessSync, constants, existsSync } from "node:fs";
|
|
2
|
+
import { join, delimiter } from "node:path";
|
|
3
|
+
import { platform } from "node:os";
|
|
4
|
+
|
|
5
|
+
const PLATFORM = platform();
|
|
6
|
+
const IS_MACOS = PLATFORM === "darwin";
|
|
7
|
+
const IS_WINDOWS = PLATFORM === "win32";
|
|
8
|
+
|
|
9
|
+
// macOS .app bundle paths (checked as absolute paths)
|
|
10
|
+
const MACOS_BROWSER_PATHS = [
|
|
11
|
+
"/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
|
|
12
|
+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
13
|
+
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
|
|
14
|
+
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
|
15
|
+
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
// Linux binary names (searched on PATH)
|
|
19
|
+
const LINUX_BROWSER_NAMES = [
|
|
20
|
+
"brave",
|
|
21
|
+
"brave-browser",
|
|
22
|
+
"google-chrome",
|
|
23
|
+
"google-chrome-stable",
|
|
24
|
+
"chromium",
|
|
25
|
+
"chromium-browser",
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
// Windows browser paths (common install locations)
|
|
29
|
+
const WINDOWS_BROWSER_PATHS = [
|
|
30
|
+
// Brave
|
|
31
|
+
join(process.env.LOCALAPPDATA || "", "BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
|
|
32
|
+
join(process.env.PROGRAMFILES || "", "BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
|
|
33
|
+
join(process.env["PROGRAMFILES(X86)"] || "", "BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
|
|
34
|
+
// Chrome
|
|
35
|
+
join(process.env.LOCALAPPDATA || "", "Google", "Chrome", "Application", "chrome.exe"),
|
|
36
|
+
join(process.env.PROGRAMFILES || "", "Google", "Chrome", "Application", "chrome.exe"),
|
|
37
|
+
join(process.env["PROGRAMFILES(X86)"] || "", "Google", "Chrome", "Application", "chrome.exe"),
|
|
38
|
+
// Edge (comes with Windows 10/11)
|
|
39
|
+
join(process.env.PROGRAMFILES || "", "Microsoft", "Edge", "Application", "msedge.exe"),
|
|
40
|
+
join(process.env["PROGRAMFILES(X86)"] || "", "Microsoft", "Edge", "Application", "msedge.exe"),
|
|
41
|
+
// Chromium
|
|
42
|
+
join(process.env.LOCALAPPDATA || "", "Chromium", "Application", "chrome.exe"),
|
|
43
|
+
];
|
|
44
|
+
|
|
45
|
+
function isExecutableFile(filePath) {
|
|
46
|
+
try {
|
|
47
|
+
// On Windows, just check if file exists (no X_OK bit)
|
|
48
|
+
if (IS_WINDOWS) {
|
|
49
|
+
return existsSync(filePath);
|
|
50
|
+
}
|
|
51
|
+
accessSync(filePath, constants.X_OK);
|
|
52
|
+
return true;
|
|
53
|
+
} catch {
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function findExecutableOnPath(name, env = process.env) {
|
|
59
|
+
if (!name) return null;
|
|
60
|
+
|
|
61
|
+
// Absolute/relative path - check directly
|
|
62
|
+
// Handle both Unix "/" and Windows "\" or "C:\"
|
|
63
|
+
if (name.includes("/") || (IS_WINDOWS && (name.includes("\\") || /^[A-Za-z]:/.test(name)))) {
|
|
64
|
+
return isExecutableFile(name) ? name : null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const pathEnv = env.PATH || env.Path || "";
|
|
68
|
+
const dirs = pathEnv.split(delimiter).filter(Boolean);
|
|
69
|
+
|
|
70
|
+
for (const dir of dirs) {
|
|
71
|
+
const candidate = join(dir, name);
|
|
72
|
+
if (isExecutableFile(candidate)) return candidate;
|
|
73
|
+
// On Windows, try with .exe extension if not provided
|
|
74
|
+
if (IS_WINDOWS && !name.toLowerCase().endsWith(".exe")) {
|
|
75
|
+
const candidateExe = join(dir, name + ".exe");
|
|
76
|
+
if (isExecutableFile(candidateExe)) return candidateExe;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Resolve a browser binary for CDP automation.
|
|
85
|
+
*
|
|
86
|
+
* Precedence:
|
|
87
|
+
* - preferredBin (CLI)
|
|
88
|
+
* - WEB_BROWSE_BROWSER_BIN
|
|
89
|
+
* - BRAVE_BIN (backwards compat)
|
|
90
|
+
* - OS-specific defaults (macOS .app bundles, Windows paths, or Linux PATH names)
|
|
91
|
+
*/
|
|
92
|
+
export function resolveBrowserBin(preferredBin = null, env = process.env) {
|
|
93
|
+
// Priority overrides (env vars, CLI arg)
|
|
94
|
+
const overrides = [
|
|
95
|
+
preferredBin,
|
|
96
|
+
env.WEB_BROWSE_BROWSER_BIN,
|
|
97
|
+
env.BRAVE_BIN,
|
|
98
|
+
].filter(Boolean);
|
|
99
|
+
|
|
100
|
+
// Check overrides first
|
|
101
|
+
for (const cand of overrides) {
|
|
102
|
+
const resolved = findExecutableOnPath(cand, env);
|
|
103
|
+
if (resolved) return resolved;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// OS-specific browser paths
|
|
107
|
+
let osCandidates;
|
|
108
|
+
let osName;
|
|
109
|
+
if (IS_MACOS) {
|
|
110
|
+
osCandidates = MACOS_BROWSER_PATHS;
|
|
111
|
+
osName = "macOS";
|
|
112
|
+
} else if (IS_WINDOWS) {
|
|
113
|
+
osCandidates = WINDOWS_BROWSER_PATHS;
|
|
114
|
+
osName = "Windows";
|
|
115
|
+
} else {
|
|
116
|
+
osCandidates = LINUX_BROWSER_NAMES;
|
|
117
|
+
osName = "Linux";
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
for (const cand of osCandidates) {
|
|
121
|
+
// Skip empty paths (from undefined env vars on Windows)
|
|
122
|
+
if (!cand || cand.startsWith(join("", ""))) continue;
|
|
123
|
+
const resolved = IS_WINDOWS ? (isExecutableFile(cand) ? cand : null) : findExecutableOnPath(cand, env);
|
|
124
|
+
if (resolved) return resolved;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const allTried = [...overrides, ...osCandidates.filter(Boolean)];
|
|
128
|
+
throw new Error(
|
|
129
|
+
`No supported browser binary found on ${osName}. ` +
|
|
130
|
+
"Set WEB_BROWSE_BROWSER_BIN or BRAVE_BIN, or pass --browser-bin <path>. " +
|
|
131
|
+
`Tried: ${allTried.join(", ")}`,
|
|
132
|
+
);
|
|
133
|
+
}
|
package/lib/cdp.js
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { createServer as createNetServer } from "node:net";
|
|
3
|
+
import { platform } from "node:os";
|
|
4
|
+
|
|
5
|
+
import { resolveBrowserBin } from "./browser-bin.js";
|
|
6
|
+
|
|
7
|
+
const PLATFORM = platform();
|
|
8
|
+
const IS_MACOS = PLATFORM === "darwin";
|
|
9
|
+
const IS_WINDOWS = PLATFORM === "win32";
|
|
10
|
+
|
|
11
|
+
export async function waitForCdpVersion(port, timeoutMs = 10000) {
|
|
12
|
+
const start = Date.now();
|
|
13
|
+
|
|
14
|
+
while (Date.now() - start < timeoutMs) {
|
|
15
|
+
try {
|
|
16
|
+
const response = await fetch(`http://127.0.0.1:${port}/json/version`, { method: "GET" });
|
|
17
|
+
if (response.ok) {
|
|
18
|
+
const payload = await response.json().catch(() => null);
|
|
19
|
+
if (
|
|
20
|
+
payload &&
|
|
21
|
+
typeof payload === "object" &&
|
|
22
|
+
typeof payload.webSocketDebuggerUrl === "string" &&
|
|
23
|
+
payload.webSocketDebuggerUrl.startsWith("ws")
|
|
24
|
+
) {
|
|
25
|
+
return payload;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
} catch {
|
|
29
|
+
// ignore
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
await new Promise((resolve) => setTimeout(resolve, 300));
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export async function waitForCdp(port, timeoutMs = 10000) {
|
|
39
|
+
return Boolean(await waitForCdpVersion(port, timeoutMs));
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async function getEphemeralPort() {
|
|
43
|
+
return new Promise((resolve, reject) => {
|
|
44
|
+
const server = createNetServer();
|
|
45
|
+
server.unref();
|
|
46
|
+
|
|
47
|
+
server.once("error", reject);
|
|
48
|
+
|
|
49
|
+
server.listen(0, "127.0.0.1", () => {
|
|
50
|
+
const address = server.address();
|
|
51
|
+
const port = typeof address === "object" && address ? address.port : undefined;
|
|
52
|
+
|
|
53
|
+
server.close((err) => {
|
|
54
|
+
if (err) return reject(err);
|
|
55
|
+
if (!port) return reject(new Error("Could not determine ephemeral port"));
|
|
56
|
+
resolve(port);
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async function isPortAvailable(port) {
|
|
63
|
+
if (!Number.isInteger(port) || port <= 0 || port > 65535) return false;
|
|
64
|
+
|
|
65
|
+
return new Promise((resolve) => {
|
|
66
|
+
const server = createNetServer();
|
|
67
|
+
server.unref();
|
|
68
|
+
|
|
69
|
+
server.once("error", () => resolve(false));
|
|
70
|
+
|
|
71
|
+
server.listen(port, "127.0.0.1", () => {
|
|
72
|
+
server.close(() => resolve(true));
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async function chooseAvailablePort(preferredPort) {
|
|
78
|
+
if (await isPortAvailable(preferredPort)) return preferredPort;
|
|
79
|
+
|
|
80
|
+
for (let offset = 1; offset <= 25; offset += 1) {
|
|
81
|
+
const candidate = preferredPort + offset;
|
|
82
|
+
if (await isPortAvailable(candidate)) return candidate;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return await getEphemeralPort();
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export async function startBrowserForCdp(preferredPort, profileDir, browserBin = null, spawnedProcessGroupPids = null) {
|
|
89
|
+
const bin = resolveBrowserBin(browserBin);
|
|
90
|
+
const port = await chooseAvailablePort(preferredPort);
|
|
91
|
+
|
|
92
|
+
// OS-specific headless flags
|
|
93
|
+
let headlessArgs;
|
|
94
|
+
if (IS_MACOS || IS_WINDOWS) {
|
|
95
|
+
// macOS and Windows: use standard headless mode
|
|
96
|
+
headlessArgs = [
|
|
97
|
+
"--headless=new",
|
|
98
|
+
"--window-size=1280,720",
|
|
99
|
+
];
|
|
100
|
+
} else {
|
|
101
|
+
// Linux: use ozone headless platform (Wayland/X11 independent)
|
|
102
|
+
headlessArgs = [
|
|
103
|
+
"--ozone-platform=headless",
|
|
104
|
+
"--ozone-override-screen-size=1280,720",
|
|
105
|
+
];
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const args = [
|
|
109
|
+
...headlessArgs,
|
|
110
|
+
"--no-sandbox",
|
|
111
|
+
"--disable-setuid-sandbox",
|
|
112
|
+
"--no-first-run",
|
|
113
|
+
"--no-default-browser-check",
|
|
114
|
+
|
|
115
|
+
// Reduce background throttling so JS challenges (e.g., Anubis PoW) run at normal speed.
|
|
116
|
+
"--disable-background-timer-throttling",
|
|
117
|
+
"--disable-backgrounding-occluded-windows",
|
|
118
|
+
"--disable-renderer-backgrounding",
|
|
119
|
+
|
|
120
|
+
`--remote-debugging-port=${port}`,
|
|
121
|
+
"--remote-debugging-address=127.0.0.1",
|
|
122
|
+
`--user-data-dir=${profileDir}`,
|
|
123
|
+
"about:blank",
|
|
124
|
+
];
|
|
125
|
+
|
|
126
|
+
const env = { ...process.env };
|
|
127
|
+
// Prevent any UI from connecting to the current Wayland/X11 session (Linux only).
|
|
128
|
+
if (!IS_MACOS && !IS_WINDOWS) {
|
|
129
|
+
delete env.WAYLAND_DISPLAY;
|
|
130
|
+
delete env.DISPLAY;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// On Windows, don't use detached mode (process groups work differently)
|
|
134
|
+
const spawnOpts = IS_WINDOWS
|
|
135
|
+
? { stdio: "ignore", env }
|
|
136
|
+
: { stdio: "ignore", detached: true, env };
|
|
137
|
+
|
|
138
|
+
const proc = spawn(bin, args, spawnOpts);
|
|
139
|
+
if (!IS_WINDOWS) proc.unref();
|
|
140
|
+
|
|
141
|
+
if (spawnedProcessGroupPids && proc.pid) {
|
|
142
|
+
spawnedProcessGroupPids.add(proc.pid);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const ready = await waitForCdp(port, 15000);
|
|
146
|
+
if (!ready) {
|
|
147
|
+
if (spawnedProcessGroupPids && proc.pid) spawnedProcessGroupPids.delete(proc.pid);
|
|
148
|
+
|
|
149
|
+
killBrowserProcess(proc);
|
|
150
|
+
|
|
151
|
+
throw new Error(`Failed to start browser with CDP on port ${port} (bin=${bin})`);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return { proc, port, bin };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Kill a browser process (cross-platform)
|
|
159
|
+
*/
|
|
160
|
+
export function killBrowserProcess(proc) {
|
|
161
|
+
if (!proc || !proc.pid) return;
|
|
162
|
+
|
|
163
|
+
try {
|
|
164
|
+
if (IS_WINDOWS) {
|
|
165
|
+
// Windows: use taskkill to kill process tree
|
|
166
|
+
spawn("taskkill", ["/pid", proc.pid.toString(), "/T", "/F"], { stdio: "ignore" });
|
|
167
|
+
} else {
|
|
168
|
+
// Unix: kill process group (negative PID)
|
|
169
|
+
process.kill(-proc.pid);
|
|
170
|
+
}
|
|
171
|
+
} catch {
|
|
172
|
+
// Fallback: try killing just the process
|
|
173
|
+
try {
|
|
174
|
+
proc.kill();
|
|
175
|
+
} catch {
|
|
176
|
+
// ignore
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
export function isLikelyUsableBrowserCdp(versionPayload) {
|
|
182
|
+
if (!versionPayload || typeof versionPayload !== "object") return false;
|
|
183
|
+
|
|
184
|
+
const userAgent = typeof versionPayload["User-Agent"] === "string" ? versionPayload["User-Agent"] : "";
|
|
185
|
+
if (userAgent.toLowerCase().includes("electron/")) return false;
|
|
186
|
+
|
|
187
|
+
return true;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
export async function resolveCdpOptions({ useCdpFlag, cdpStartFlag, cdpPortValue }) {
|
|
191
|
+
let effectiveUseCdp = useCdpFlag || cdpStartFlag;
|
|
192
|
+
let effectiveCdpStart = cdpStartFlag;
|
|
193
|
+
let effectiveCdpPort = cdpPortValue;
|
|
194
|
+
|
|
195
|
+
if (!effectiveUseCdp && !effectiveCdpStart) {
|
|
196
|
+
const cdp9223 = await waitForCdpVersion(9223, 1000);
|
|
197
|
+
if (isLikelyUsableBrowserCdp(cdp9223)) {
|
|
198
|
+
effectiveUseCdp = true;
|
|
199
|
+
effectiveCdpPort = 9223;
|
|
200
|
+
} else {
|
|
201
|
+
const cdp9222 = await waitForCdpVersion(9222, 1000);
|
|
202
|
+
if (isLikelyUsableBrowserCdp(cdp9222)) {
|
|
203
|
+
effectiveUseCdp = true;
|
|
204
|
+
effectiveCdpPort = 9222;
|
|
205
|
+
} else {
|
|
206
|
+
effectiveUseCdp = true;
|
|
207
|
+
effectiveCdpStart = true;
|
|
208
|
+
effectiveCdpPort = 9223;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
useCdp: effectiveUseCdp,
|
|
215
|
+
cdpStart: effectiveCdpStart,
|
|
216
|
+
cdpPort: effectiveCdpPort,
|
|
217
|
+
};
|
|
218
|
+
}
|