@ogulcancelik/pi-web-browse 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +73 -0
- package/SKILL.md +101 -0
- package/lib/bot-protection.js +92 -0
- package/lib/browser-bin.js +133 -0
- package/lib/cdp.js +218 -0
- package/lib/daemon-client.js +134 -0
- package/lib/daemon.js +194 -0
- package/lib/debug-dump.js +76 -0
- package/lib/extract.js +58 -0
- package/lib/fetch.js +71 -0
- package/lib/http-fetch.js +41 -0
- package/lib/search.js +226 -0
- package/package.json +60 -0
- package/web-browse.js +651 -0
package/web-browse.js
ADDED
|
@@ -0,0 +1,651 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Web Browse - search the web and fetch/read pages via a real browser (CDP)
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* ./web-browse.js "query" # search, show snippets, cache results
|
|
8
|
+
* ./web-browse.js "query" -n 10 # more results
|
|
9
|
+
* ./web-browse.js --fetch 1,3,5 # fetch cached results by index
|
|
10
|
+
* ./web-browse.js --url <url> # fetch a specific URL
|
|
11
|
+
* ./web-browse.js --url <url> --full # fetch without truncation
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { spawn } from "node:child_process";
|
|
15
|
+
import { searchWebFromContext } from "./lib/search.js";
|
|
16
|
+
import { fetchUrlViaHttp } from "./lib/http-fetch.js";
|
|
17
|
+
import { fetchUrlFromContext, fetchUrlsFromContext, cleanupContextPages } from "./lib/fetch.js";
|
|
18
|
+
import { fetch as undiciFetch, Agent } from "undici";
|
|
19
|
+
import { chromium } from "playwright";
|
|
20
|
+
import { writeFileSync, readFileSync, existsSync, mkdtempSync, rmSync } from "fs";
|
|
21
|
+
import { tmpdir, homedir } from "os";
|
|
22
|
+
import { join } from "path";
|
|
23
|
+
import { checkDaemonHealth, ensureDaemonRunning, sendDaemonCommand, stopDaemon } from "./lib/daemon-client.js";
|
|
24
|
+
import { fileURLToPath } from "node:url";
|
|
25
|
+
|
|
26
|
+
import { runWebBrowseDaemon } from "./lib/daemon.js";
|
|
27
|
+
import { startBrowserForCdp, killBrowserProcess, resolveCdpOptions as resolveCdpOptionsModule } from "./lib/cdp.js";
|
|
28
|
+
import { platform } from "node:os";
|
|
29
|
+
|
|
30
|
+
const IS_WINDOWS = platform() === "win32";
|
|
31
|
+
|
|
32
|
+
const CACHE_FILE = join(tmpdir(), "web-browse-cache.json");
|
|
33
|
+
const CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes
|
|
34
|
+
|
|
35
|
+
// Daemon: keep a persistent headless Brave+CDP session to avoid startup cost and
|
|
36
|
+
// reduce bot-protection flakiness (JS challenges benefit from a warm session).
|
|
37
|
+
const DAEMON_PORT = parseInt(
|
|
38
|
+
process.env.WEB_BROWSE_DAEMON_PORT || process.env.LOCAL_SEARCH_DAEMON_PORT || "9377",
|
|
39
|
+
10,
|
|
40
|
+
);
|
|
41
|
+
const DAEMON_URL = `http://127.0.0.1:${DAEMON_PORT}`;
|
|
42
|
+
const DAEMON_PID_FILE = join(tmpdir(), "web-browse-daemon.pid");
|
|
43
|
+
|
|
44
|
+
// Force IPv4 to avoid timeout issues with some hosts (e.g., GitHub Pages)
|
|
45
|
+
const agent = new Agent({ connect: { family: 4 } });
|
|
46
|
+
const httpFetch = (url, opts = {}) => undiciFetch(url, { ...opts, dispatcher: agent });
|
|
47
|
+
|
|
48
|
+
// Use a generic Windows Chrome user agent (works well across platforms)
|
|
49
|
+
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
|
|
50
|
+
const USER_AGENT = process.env.WEB_BROWSE_USER_AGENT || DEFAULT_USER_AGENT;
|
|
51
|
+
|
|
52
|
+
const DEBUG_DUMP_ENABLED = ["1", "true", "yes"].includes(String(process.env.WEB_BROWSE_DEBUG_DUMP || "").toLowerCase());
|
|
53
|
+
const DEBUG_DUMP_BASE_DIR = process.env.WEB_BROWSE_DEBUG_DUMP_DIR || tmpdir();
|
|
54
|
+
const FETCH_OPTS = {
|
|
55
|
+
botProtectionTimeoutMs: 30000,
|
|
56
|
+
debugDumpEnabled: DEBUG_DUMP_ENABLED,
|
|
57
|
+
debugDumpBaseDir: DEBUG_DUMP_BASE_DIR,
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
const HEADERS = {
|
|
61
|
+
"User-Agent": USER_AGENT,
|
|
62
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
63
|
+
"Accept-Language": process.env.WEB_BROWSE_ACCEPT_LANGUAGE || "en-US,en;q=0.9",
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
const spawnedBraveProcessGroupPids = new Set();
|
|
68
|
+
|
|
69
|
+
function cleanupSpawnedBraveProcesses() {
|
|
70
|
+
for (const pid of spawnedBraveProcessGroupPids) {
|
|
71
|
+
try {
|
|
72
|
+
if (IS_WINDOWS) {
|
|
73
|
+
// Windows: use taskkill to kill process tree
|
|
74
|
+
spawn("taskkill", ["/pid", pid.toString(), "/T", "/F"], { stdio: "ignore" });
|
|
75
|
+
} else {
|
|
76
|
+
// Unix: kill process group (negative PID)
|
|
77
|
+
process.kill(-pid);
|
|
78
|
+
}
|
|
79
|
+
} catch {
|
|
80
|
+
// ignore
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
spawnedBraveProcessGroupPids.clear();
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
process.on("SIGINT", () => {
|
|
88
|
+
cleanupSpawnedBraveProcesses();
|
|
89
|
+
process.exit(130);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
process.on("SIGTERM", () => {
|
|
93
|
+
cleanupSpawnedBraveProcesses();
|
|
94
|
+
process.exit(143);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
process.on("exit", () => {
|
|
98
|
+
cleanupSpawnedBraveProcesses();
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// --- Argument Parsing ---
|
|
102
|
+
const args = process.argv.slice(2);
|
|
103
|
+
|
|
104
|
+
function getArg(flag) {
|
|
105
|
+
const idx = args.indexOf(flag);
|
|
106
|
+
if (idx !== -1 && args[idx + 1] && !args[idx + 1].startsWith("-")) {
|
|
107
|
+
return args[idx + 1];
|
|
108
|
+
}
|
|
109
|
+
return null;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function hasFlag(flag) {
|
|
113
|
+
return args.includes(flag);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function getQuery() {
|
|
117
|
+
for (const arg of args) {
|
|
118
|
+
if (
|
|
119
|
+
!arg.startsWith("-") &&
|
|
120
|
+
arg !== getArg("-n") &&
|
|
121
|
+
arg !== getArg("--url") &&
|
|
122
|
+
arg !== getArg("--fetch") &&
|
|
123
|
+
arg !== getArg("--cdp-profile") &&
|
|
124
|
+
arg !== getArg("--cdp-port") &&
|
|
125
|
+
arg !== getArg("--stress") &&
|
|
126
|
+
arg !== getArg("--daemon") &&
|
|
127
|
+
arg !== getArg("--browser-bin")
|
|
128
|
+
) {
|
|
129
|
+
return arg;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const numResults = parseInt(getArg("-n") || "5", 10);
|
|
136
|
+
const fetchIndices = getArg("--fetch");
|
|
137
|
+
const directUrl = getArg("--url");
|
|
138
|
+
const fullContent = hasFlag("--full");
|
|
139
|
+
const cdpStart = hasFlag("--cdp-start");
|
|
140
|
+
const useCdp = hasFlag("--cdp") || cdpStart;
|
|
141
|
+
const cdpPort = parseInt(getArg("--cdp-port") || (cdpStart ? "9223" : "9222"), 10);
|
|
142
|
+
const cdpProfile = getArg("--cdp-profile") || join(homedir(), ".config", "web-browse-cdp-profile");
|
|
143
|
+
const browserBinArg = getArg("--browser-bin");
|
|
144
|
+
const stressCount = parseInt(getArg("--stress") || "0", 10);
|
|
145
|
+
const daemonCommand = getArg("--daemon"); // start|stop|status|restart
|
|
146
|
+
const daemonRun = hasFlag("--daemon-run");
|
|
147
|
+
const noDaemon = hasFlag("--no-daemon");
|
|
148
|
+
const query = getQuery();
|
|
149
|
+
|
|
150
|
+
// --- Help ---
|
|
151
|
+
if (hasFlag("--help") || hasFlag("-h") || (args.length === 0)) {
|
|
152
|
+
console.log(`Web Browse - search the web and fetch/read pages (no API keys needed)
|
|
153
|
+
|
|
154
|
+
Usage:
|
|
155
|
+
./web-browse.js "query" # search, show snippets, cache results
|
|
156
|
+
./web-browse.js "query" -n 10 # more results
|
|
157
|
+
./web-browse.js --fetch 1,3,5 # fetch cached results by index
|
|
158
|
+
./web-browse.js --url <url> # fetch a specific URL (truncated)
|
|
159
|
+
./web-browse.js --url <url> --full # fetch without truncation
|
|
160
|
+
|
|
161
|
+
# Daemon (persistent headless browser session)
|
|
162
|
+
./web-browse.js --daemon start|stop|status|restart
|
|
163
|
+
./web-browse.js --no-daemon ... # bypass daemon (one-shot mode)
|
|
164
|
+
|
|
165
|
+
# Config
|
|
166
|
+
./web-browse.js --browser-bin <path> ...
|
|
167
|
+
WEB_BROWSE_USER_AGENT="..." ./web-browse.js ...
|
|
168
|
+
|
|
169
|
+
Default behavior:
|
|
170
|
+
Direct calls automatically start/use a local daemon that keeps a persistent headless Brave+CDP session.
|
|
171
|
+
This avoids browser startup overhead and helps with bot-protection pages that auto-clear.
|
|
172
|
+
|
|
173
|
+
Examples:
|
|
174
|
+
./web-browse.js --daemon start
|
|
175
|
+
./web-browse.js "rust async runtime"
|
|
176
|
+
./web-browse.js --fetch 1,3
|
|
177
|
+
./web-browse.js --url https://example.com
|
|
178
|
+
./web-browse.js --no-daemon --url https://example.com
|
|
179
|
+
|
|
180
|
+
Notes:
|
|
181
|
+
./search.js is kept as a wrapper for backwards compatibility.`);
|
|
182
|
+
process.exit(0);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// --- Cache ---
|
|
186
|
+
function saveCache(query, results) {
|
|
187
|
+
const cache = { query, timestamp: Date.now(), results };
|
|
188
|
+
writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2));
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function loadCache() {
|
|
192
|
+
if (!existsSync(CACHE_FILE)) return null;
|
|
193
|
+
try {
|
|
194
|
+
const cache = JSON.parse(readFileSync(CACHE_FILE, "utf-8"));
|
|
195
|
+
if (Date.now() - cache.timestamp > CACHE_TTL_MS) {
|
|
196
|
+
return null; // expired
|
|
197
|
+
}
|
|
198
|
+
return cache;
|
|
199
|
+
} catch {
|
|
200
|
+
return null;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function getScriptPath() {
|
|
205
|
+
return fileURLToPath(import.meta.url);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function getDaemonForwardedArgs() {
|
|
209
|
+
const forwarded = [];
|
|
210
|
+
|
|
211
|
+
// Forward relevant CLI config to the daemon so the background session matches.
|
|
212
|
+
const explicitCdpProfile = getArg("--cdp-profile");
|
|
213
|
+
if (explicitCdpProfile) forwarded.push("--cdp-profile", explicitCdpProfile);
|
|
214
|
+
|
|
215
|
+
const explicitBrowserBin = getArg("--browser-bin");
|
|
216
|
+
if (explicitBrowserBin) forwarded.push("--browser-bin", explicitBrowserBin);
|
|
217
|
+
|
|
218
|
+
return forwarded;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
async function daemonHealth(timeoutMs = 600) {
|
|
222
|
+
return await checkDaemonHealth({ daemonUrl: DAEMON_URL, timeoutMs });
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
async function daemonEnsureRunning() {
|
|
226
|
+
return await ensureDaemonRunning({
|
|
227
|
+
scriptPath: getScriptPath(),
|
|
228
|
+
daemonUrl: DAEMON_URL,
|
|
229
|
+
daemonPidFile: DAEMON_PID_FILE,
|
|
230
|
+
forwardedArgs: getDaemonForwardedArgs(),
|
|
231
|
+
env: process.env,
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
async function daemonStop() {
|
|
236
|
+
return await stopDaemon({ daemonUrl: DAEMON_URL, daemonPidFile: DAEMON_PID_FILE });
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
async function daemonSendCommand(command, payload) {
|
|
240
|
+
return await sendDaemonCommand({ daemonUrl: DAEMON_URL, command, payload });
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
async function startBraveForCdp(preferredPort, profileDir, browserBin = null) {
|
|
244
|
+
const launched = await startBrowserForCdp(preferredPort, profileDir, browserBin, spawnedBraveProcessGroupPids);
|
|
245
|
+
return { proc: launched.proc, port: launched.port };
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
async function resolveCdpOptions(useCdpFlag, cdpStartFlag, cdpPortValue) {
|
|
249
|
+
return await resolveCdpOptionsModule({ useCdpFlag, cdpStartFlag, cdpPortValue });
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// --- Search (browser-first) ---
|
|
253
|
+
async function searchWebOneShot(
|
|
254
|
+
query,
|
|
255
|
+
num,
|
|
256
|
+
useCdpConnection = false,
|
|
257
|
+
cdpPortValue = 9222,
|
|
258
|
+
cdpAutoStart = false,
|
|
259
|
+
cdpProfileValue = join(homedir(), ".config", "web-browse-cdp-profile"),
|
|
260
|
+
) {
|
|
261
|
+
const profileDir = mkdtempSync(join(tmpdir(), "web-browse-profile-"));
|
|
262
|
+
const clampedNum = Math.max(1, Math.min(num, 20));
|
|
263
|
+
|
|
264
|
+
let context;
|
|
265
|
+
let browser;
|
|
266
|
+
let braveProcess;
|
|
267
|
+
let ownedContext = false;
|
|
268
|
+
|
|
269
|
+
try {
|
|
270
|
+
if (useCdpConnection) {
|
|
271
|
+
let effectiveCdpPort = cdpPortValue;
|
|
272
|
+
|
|
273
|
+
if (cdpAutoStart) {
|
|
274
|
+
console.error(`Starting browser for CDP (preferred port ${cdpPortValue})...`);
|
|
275
|
+
braveProcess = await startBraveForCdp(cdpPortValue, cdpProfileValue, browserBinArg);
|
|
276
|
+
effectiveCdpPort = braveProcess.port;
|
|
277
|
+
console.error(`Browser started (pid=${braveProcess.proc.pid}, port=${effectiveCdpPort})`);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
browser = await chromium.connectOverCDP(`http://127.0.0.1:${effectiveCdpPort}`);
|
|
281
|
+
const contexts = browser.contexts();
|
|
282
|
+
if (contexts.length > 0) {
|
|
283
|
+
context = contexts[0];
|
|
284
|
+
} else {
|
|
285
|
+
context = await browser.newContext();
|
|
286
|
+
ownedContext = true;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Inject stealth scripts for CDP mode (avoid bot detection)
|
|
290
|
+
await context.addInitScript(() => {
|
|
291
|
+
if (!window.chrome) window.chrome = {};
|
|
292
|
+
if (!window.chrome.runtime) window.chrome.runtime = { id: undefined };
|
|
293
|
+
});
|
|
294
|
+
} else {
|
|
295
|
+
context = await chromium.launchPersistentContext(profileDir, {
|
|
296
|
+
headless: true,
|
|
297
|
+
viewport: { width: 1280, height: 720 },
|
|
298
|
+
locale: "en-US",
|
|
299
|
+
timezoneId: "UTC",
|
|
300
|
+
userAgent: HEADERS["User-Agent"],
|
|
301
|
+
colorScheme: "light",
|
|
302
|
+
extraHTTPHeaders: {
|
|
303
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
304
|
+
"Upgrade-Insecure-Requests": "1",
|
|
305
|
+
},
|
|
306
|
+
args: [
|
|
307
|
+
"--no-sandbox",
|
|
308
|
+
"--disable-setuid-sandbox",
|
|
309
|
+
"--disable-dev-shm-usage",
|
|
310
|
+
"--disable-blink-features=AutomationControlled",
|
|
311
|
+
"--disable-features=IsolateOrigins,site-per-process",
|
|
312
|
+
],
|
|
313
|
+
});
|
|
314
|
+
ownedContext = true;
|
|
315
|
+
|
|
316
|
+
await context.addInitScript(() => {
|
|
317
|
+
Object.defineProperty(navigator, "webdriver", { get: () => undefined });
|
|
318
|
+
Object.defineProperty(navigator, "languages", { get: () => ["en-US", "en"] });
|
|
319
|
+
Object.defineProperty(navigator, "plugins", { get: () => [1, 2, 3, 4, 5] });
|
|
320
|
+
Object.defineProperty(navigator, "platform", { get: () => "Linux x86_64" });
|
|
321
|
+
Object.defineProperty(navigator, "hardwareConcurrency", { get: () => 8 });
|
|
322
|
+
Object.defineProperty(navigator, "deviceMemory", { get: () => 8 });
|
|
323
|
+
window.chrome = { runtime: {} };
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
const results = await searchWebFromContext({
|
|
328
|
+
context,
|
|
329
|
+
httpFetch,
|
|
330
|
+
headers: HEADERS,
|
|
331
|
+
query,
|
|
332
|
+
numResults: clampedNum,
|
|
333
|
+
log: (msg) => console.error(msg),
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
return results.slice(0, clampedNum);
|
|
337
|
+
} finally {
|
|
338
|
+
if (context && cdpAutoStart) {
|
|
339
|
+
const pages = context.pages();
|
|
340
|
+
for (const openPage of pages) {
|
|
341
|
+
await openPage.close().catch(() => {});
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (ownedContext && context) {
|
|
346
|
+
await context.close().catch(() => {});
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
if (browser) {
|
|
350
|
+
await browser.close().catch(() => {});
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if (braveProcess?.proc?.pid) {
|
|
354
|
+
const pid = braveProcess.proc.pid;
|
|
355
|
+
spawnedBraveProcessGroupPids.delete(pid);
|
|
356
|
+
|
|
357
|
+
try {
|
|
358
|
+
if (IS_WINDOWS) {
|
|
359
|
+
spawn("taskkill", ["/pid", pid.toString(), "/T", "/F"], { stdio: "ignore" });
|
|
360
|
+
} else {
|
|
361
|
+
process.kill(-pid);
|
|
362
|
+
}
|
|
363
|
+
} catch {
|
|
364
|
+
// ignore
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
rmSync(profileDir, { recursive: true, force: true });
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// --- Fetch URL Content ---
|
|
373
|
+
|
|
374
|
+
async function withCdpBrowser(cdpOptions, cdpProfileValue, handler) {
|
|
375
|
+
let context;
|
|
376
|
+
let browser;
|
|
377
|
+
let braveProcess;
|
|
378
|
+
|
|
379
|
+
try {
|
|
380
|
+
if (cdpOptions.useCdp) {
|
|
381
|
+
let effectiveCdpPort = cdpOptions.cdpPort;
|
|
382
|
+
|
|
383
|
+
if (cdpOptions.cdpStart) {
|
|
384
|
+
console.error(`Starting Brave for CDP (preferred port ${cdpOptions.cdpPort})...`);
|
|
385
|
+
braveProcess = await startBraveForCdp(cdpOptions.cdpPort, cdpProfileValue, browserBinArg);
|
|
386
|
+
effectiveCdpPort = braveProcess.port;
|
|
387
|
+
console.error(`Brave started (pid=${braveProcess.proc.pid}, port=${effectiveCdpPort})`);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
browser = await chromium.connectOverCDP(`http://127.0.0.1:${effectiveCdpPort}`);
|
|
391
|
+
context = browser.contexts()[0] ?? await browser.newContext();
|
|
392
|
+
|
|
393
|
+
// Inject stealth scripts for CDP mode (avoid bot detection)
|
|
394
|
+
await context.addInitScript(() => {
|
|
395
|
+
if (!window.chrome) window.chrome = {};
|
|
396
|
+
if (!window.chrome.runtime) window.chrome.runtime = { id: undefined };
|
|
397
|
+
});
|
|
398
|
+
} else {
|
|
399
|
+
throw new Error("CDP is required for this fetch");
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
return await handler(context);
|
|
403
|
+
} finally {
|
|
404
|
+
if (context && cdpOptions.cdpStart) {
|
|
405
|
+
const pages = context.pages();
|
|
406
|
+
for (const openPage of pages) {
|
|
407
|
+
await openPage.close().catch(() => {});
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
if (browser) {
|
|
411
|
+
await browser.close().catch(() => {});
|
|
412
|
+
}
|
|
413
|
+
if (braveProcess?.proc?.pid) {
|
|
414
|
+
const pid = braveProcess.proc.pid;
|
|
415
|
+
spawnedBraveProcessGroupPids.delete(pid);
|
|
416
|
+
|
|
417
|
+
try {
|
|
418
|
+
if (IS_WINDOWS) {
|
|
419
|
+
spawn("taskkill", ["/pid", pid.toString(), "/T", "/F"], { stdio: "ignore" });
|
|
420
|
+
} else {
|
|
421
|
+
process.kill(-pid);
|
|
422
|
+
}
|
|
423
|
+
} catch {
|
|
424
|
+
// ignore
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
async function fetchUrlWithCdp(url, truncate, cdpOptions, cdpProfileValue) {
|
|
432
|
+
return withCdpBrowser(cdpOptions, cdpProfileValue, async (context) => {
|
|
433
|
+
const result = await fetchUrlFromContext(context, url, truncate, FETCH_OPTS);
|
|
434
|
+
await cleanupContextPages(context);
|
|
435
|
+
return result;
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
async function fetchUrlsWithCdp(urls, truncate, cdpOptions, cdpProfileValue) {
|
|
440
|
+
return withCdpBrowser(cdpOptions, cdpProfileValue, async (context) => {
|
|
441
|
+
const results = await fetchUrlsFromContext(context, urls, truncate, FETCH_OPTS);
|
|
442
|
+
await cleanupContextPages(context);
|
|
443
|
+
return results;
|
|
444
|
+
});
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
async function runDaemon() {
|
|
448
|
+
const preferredCdpPort = parseInt(
|
|
449
|
+
process.env.WEB_BROWSE_CDP_PORT || process.env.LOCAL_SEARCH_CDP_PORT || "9223",
|
|
450
|
+
10,
|
|
451
|
+
);
|
|
452
|
+
|
|
453
|
+
await runWebBrowseDaemon({
|
|
454
|
+
daemonPort: DAEMON_PORT,
|
|
455
|
+
daemonUrl: DAEMON_URL,
|
|
456
|
+
daemonPidFile: DAEMON_PID_FILE,
|
|
457
|
+
preferredCdpPort,
|
|
458
|
+
cdpProfile,
|
|
459
|
+
browserBinArg,
|
|
460
|
+
startBraveForCdp,
|
|
461
|
+
chromium,
|
|
462
|
+
fetchUrlFromContext,
|
|
463
|
+
fetchUrlsFromContext,
|
|
464
|
+
searchWebFromContext,
|
|
465
|
+
httpFetch,
|
|
466
|
+
headers: HEADERS,
|
|
467
|
+
cleanupContextPages,
|
|
468
|
+
fetchOpts: FETCH_OPTS,
|
|
469
|
+
spawnedBrowserProcessGroupPids: spawnedBraveProcessGroupPids,
|
|
470
|
+
});
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// --- Output Formatting ---
|
|
474
|
+
function printSearchResults(results) {
|
|
475
|
+
console.log("=".repeat(70) + "\n");
|
|
476
|
+
results.forEach((result, i) => {
|
|
477
|
+
console.log(`## ${i + 1}. ${result.title}`);
|
|
478
|
+
console.log(`URL: ${result.link}`);
|
|
479
|
+
console.log(`${result.snippet || "(no snippet)"}\n`);
|
|
480
|
+
console.log("=".repeat(70) + "\n");
|
|
481
|
+
});
|
|
482
|
+
console.log(`💡 Use --fetch 1,2,3 to fetch specific results`);
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
function printFetchedContent(results) {
|
|
486
|
+
console.log("=".repeat(70) + "\n");
|
|
487
|
+
results.forEach((result, i) => {
|
|
488
|
+
console.log(`## ${result.title || result.url}`);
|
|
489
|
+
console.log(`URL: ${result.url}\n`);
|
|
490
|
+
if (result.error) {
|
|
491
|
+
console.log(`❌ Error: ${result.error}`);
|
|
492
|
+
} else {
|
|
493
|
+
console.log(result.content);
|
|
494
|
+
}
|
|
495
|
+
console.log("\n" + "=".repeat(70) + "\n");
|
|
496
|
+
});
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
// --- Main ---
|
|
500
|
+
async function main() {
|
|
501
|
+
// Daemon: internal entrypoint
|
|
502
|
+
if (daemonRun) {
|
|
503
|
+
await runDaemon();
|
|
504
|
+
return;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
// Daemon: user-facing controls
|
|
508
|
+
if (daemonCommand) {
|
|
509
|
+
const cmd = String(daemonCommand).toLowerCase();
|
|
510
|
+
|
|
511
|
+
if (cmd === "status") {
|
|
512
|
+
const health = await daemonHealth(1500);
|
|
513
|
+
console.log(JSON.stringify({ status: health ? "running" : "stopped", health }, null, 2));
|
|
514
|
+
return;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
if (cmd === "start") {
|
|
518
|
+
const health = await daemonEnsureRunning();
|
|
519
|
+
console.log(JSON.stringify({ status: "running", health }, null, 2));
|
|
520
|
+
return;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
if (cmd === "stop") {
|
|
524
|
+
const result = await daemonStop();
|
|
525
|
+
console.log(JSON.stringify(result, null, 2));
|
|
526
|
+
return;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
if (cmd === "restart") {
|
|
530
|
+
await daemonStop().catch(() => {});
|
|
531
|
+
const health = await daemonEnsureRunning();
|
|
532
|
+
console.log(JSON.stringify({ status: "running", health }, null, 2));
|
|
533
|
+
return;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
console.error(`Unknown --daemon command: ${daemonCommand} (expected: start|stop|status|restart)`);
|
|
537
|
+
process.exit(1);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// Mode 1: Fetch specific URL
|
|
541
|
+
if (directUrl) {
|
|
542
|
+
console.error(`Fetching: ${directUrl}\n`);
|
|
543
|
+
|
|
544
|
+
let result;
|
|
545
|
+
|
|
546
|
+
if (!noDaemon) {
|
|
547
|
+
await daemonEnsureRunning();
|
|
548
|
+
result = await daemonSendCommand("fetch", { url: directUrl, truncate: !fullContent });
|
|
549
|
+
} else {
|
|
550
|
+
const cdpOptions = await resolveCdpOptions(useCdp, cdpStart, cdpPort);
|
|
551
|
+
result = cdpOptions.useCdp
|
|
552
|
+
? await fetchUrlWithCdp(directUrl, !fullContent, cdpOptions, cdpProfile)
|
|
553
|
+
: await fetchUrlViaHttp(httpFetch, HEADERS, directUrl, !fullContent);
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
printFetchedContent([result]);
|
|
557
|
+
return;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// Mode 2: Fetch from cache by index
|
|
561
|
+
if (fetchIndices) {
|
|
562
|
+
const cache = loadCache();
|
|
563
|
+
if (!cache) {
|
|
564
|
+
console.error("No cached search results. Run a search first.");
|
|
565
|
+
process.exit(1);
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
const indices = fetchIndices.split(",").map(s => parseInt(s.trim(), 10) - 1);
|
|
569
|
+
const toFetch = indices
|
|
570
|
+
.filter(i => i >= 0 && i < cache.results.length)
|
|
571
|
+
.map(i => cache.results[i]);
|
|
572
|
+
|
|
573
|
+
if (toFetch.length === 0) {
|
|
574
|
+
console.error(`Invalid indices. Cache has ${cache.results.length} results (1-${cache.results.length}).`);
|
|
575
|
+
process.exit(1);
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
console.error(`Fetching ${toFetch.length} page(s)...\n`);
|
|
579
|
+
|
|
580
|
+
let results;
|
|
581
|
+
|
|
582
|
+
if (!noDaemon) {
|
|
583
|
+
await daemonEnsureRunning();
|
|
584
|
+
results = await daemonSendCommand("fetchMany", { urls: toFetch.map((item) => item.link), truncate: !fullContent });
|
|
585
|
+
} else {
|
|
586
|
+
const cdpOptions = await resolveCdpOptions(useCdp, cdpStart, cdpPort);
|
|
587
|
+
results = cdpOptions.useCdp
|
|
588
|
+
? await fetchUrlsWithCdp(toFetch.map((item) => item.link), !fullContent, cdpOptions, cdpProfile)
|
|
589
|
+
: await Promise.all(toFetch.map((item) => fetchUrlViaHttp(httpFetch, HEADERS, item.link, !fullContent)));
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
printFetchedContent(results);
|
|
593
|
+
return;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
// Mode 3: Search
|
|
597
|
+
if (query) {
|
|
598
|
+
const attemptSearch = async () => {
|
|
599
|
+
if (!noDaemon) {
|
|
600
|
+
await daemonEnsureRunning();
|
|
601
|
+
return await daemonSendCommand("search", { query, numResults });
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
const cdpOptions = await resolveCdpOptions(useCdp, cdpStart, cdpPort);
|
|
605
|
+
return await searchWebOneShot(
|
|
606
|
+
query,
|
|
607
|
+
numResults,
|
|
608
|
+
cdpOptions.useCdp,
|
|
609
|
+
cdpOptions.cdpPort,
|
|
610
|
+
cdpOptions.cdpStart,
|
|
611
|
+
cdpProfile,
|
|
612
|
+
);
|
|
613
|
+
};
|
|
614
|
+
|
|
615
|
+
if (stressCount > 0) {
|
|
616
|
+
console.error(`Stress mode: ${stressCount} searches for "${query}"\n`);
|
|
617
|
+
let successCount = 0;
|
|
618
|
+
|
|
619
|
+
for (let i = 0; i < stressCount; i += 1) {
|
|
620
|
+
console.error(`Run ${i + 1}/${stressCount}`);
|
|
621
|
+
const results = await attemptSearch();
|
|
622
|
+
if (results.length > 0) {
|
|
623
|
+
successCount += 1;
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
console.log(`Stress summary: ${successCount}/${stressCount} successful searches`);
|
|
628
|
+
process.exit(0);
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
console.error(`Searching: "${query}"\n`);
|
|
632
|
+
const results = await attemptSearch();
|
|
633
|
+
|
|
634
|
+
if (results.length === 0) {
|
|
635
|
+
console.log("No results found.");
|
|
636
|
+
process.exit(0);
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
saveCache(query, results);
|
|
640
|
+
printSearchResults(results);
|
|
641
|
+
return;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
console.error("No query provided. Use --help for usage.");
|
|
645
|
+
process.exit(1);
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
main().catch(err => {
|
|
649
|
+
console.error(`Error: ${err.message}`);
|
|
650
|
+
process.exit(1);
|
|
651
|
+
});
|