unbrowse 2.0.21 → 2.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -325,6 +325,7 @@ import { existsSync as existsSync2, mkdirSync as mkdirSync2, realpathSync } from
|
|
|
325
325
|
import os from "node:os";
|
|
326
326
|
import path from "node:path";
|
|
327
327
|
import { createRequire } from "node:module";
|
|
328
|
+
import { execFileSync } from "node:child_process";
|
|
328
329
|
import { fileURLToPath } from "node:url";
|
|
329
330
|
function getModuleDir(metaUrl) {
|
|
330
331
|
return path.dirname(fileURLToPath(metaUrl));
|
|
@@ -349,19 +350,32 @@ function resolveSiblingEntrypoint(metaUrl, basename) {
|
|
|
349
350
|
const file = fileURLToPath(metaUrl);
|
|
350
351
|
return path.join(path.dirname(file), `${basename}${path.extname(file) || ".js"}`);
|
|
351
352
|
}
|
|
352
|
-
function
|
|
353
|
+
function resolveBinaryOnPath(name) {
|
|
354
|
+
const checker = process.platform === "win32" ? "where" : "which";
|
|
355
|
+
try {
|
|
356
|
+
const output = execFileSync(checker, [name], { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] });
|
|
357
|
+
const match = output.split(/\r?\n/).map((line) => line.trim()).find(Boolean);
|
|
358
|
+
return match || null;
|
|
359
|
+
} catch {
|
|
360
|
+
return null;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
function runtimeInvocationForEntrypoint(metaUrl, entrypoint) {
|
|
353
364
|
if (path.extname(entrypoint) !== ".ts")
|
|
354
|
-
return [entrypoint];
|
|
365
|
+
return { command: process.execPath, args: [entrypoint] };
|
|
355
366
|
if (process.versions.bun)
|
|
356
|
-
return [entrypoint];
|
|
367
|
+
return { command: process.execPath, args: [entrypoint] };
|
|
368
|
+
const bunBinary = process.env.BUN_BIN || resolveBinaryOnPath("bun");
|
|
369
|
+
if (bunBinary)
|
|
370
|
+
return { command: bunBinary, args: [entrypoint] };
|
|
357
371
|
try {
|
|
358
372
|
const req = createRequire(metaUrl);
|
|
359
373
|
const tsxPkg = req.resolve("tsx/package.json");
|
|
360
374
|
const tsxLoader = path.join(path.dirname(tsxPkg), "dist", "loader.mjs");
|
|
361
375
|
if (existsSync2(tsxLoader))
|
|
362
|
-
return ["--import", tsxLoader, entrypoint];
|
|
376
|
+
return { command: process.execPath, args: ["--import", tsxLoader, entrypoint] };
|
|
363
377
|
} catch {}
|
|
364
|
-
return ["--import", "tsx", entrypoint];
|
|
378
|
+
return { command: process.execPath, args: ["--import", "tsx", entrypoint] };
|
|
365
379
|
}
|
|
366
380
|
function getUnbrowseHome() {
|
|
367
381
|
return path.join(os.homedir(), ".unbrowse");
|
|
@@ -560,7 +574,7 @@ async function maybeAutoUpdate(metaUrl, overrides = {}) {
|
|
|
560
574
|
import { closeSync, openSync, readFileSync as readFileSync4, statSync, unlinkSync, writeFileSync as writeFileSync2 } from "node:fs";
|
|
561
575
|
import path3 from "node:path";
|
|
562
576
|
import { spawn } from "node:child_process";
|
|
563
|
-
import { execFileSync } from "node:child_process";
|
|
577
|
+
import { execFileSync as execFileSync2 } from "node:child_process";
|
|
564
578
|
|
|
565
579
|
// ../../src/version.ts
|
|
566
580
|
import { createHash } from "crypto";
|
|
@@ -671,7 +685,7 @@ function findListeningPid(baseUrl) {
|
|
|
671
685
|
try {
|
|
672
686
|
const url = new URL(baseUrl);
|
|
673
687
|
const port = url.port || (url.protocol === "https:" ? "443" : "80");
|
|
674
|
-
const output =
|
|
688
|
+
const output = execFileSync2("lsof", ["-nP", `-iTCP:${port}`, "-sTCP:LISTEN", "-t"], {
|
|
675
689
|
encoding: "utf8",
|
|
676
690
|
stdio: ["ignore", "pipe", "ignore"]
|
|
677
691
|
}).trim();
|
|
@@ -683,7 +697,7 @@ function findListeningPid(baseUrl) {
|
|
|
683
697
|
}
|
|
684
698
|
function readProcessCommand(pid) {
|
|
685
699
|
try {
|
|
686
|
-
return
|
|
700
|
+
return execFileSync2("ps", ["-o", "command=", "-p", String(pid)], {
|
|
687
701
|
encoding: "utf8",
|
|
688
702
|
stdio: ["ignore", "pipe", "ignore"]
|
|
689
703
|
}).trim();
|
|
@@ -791,7 +805,8 @@ async function ensureLocalServer(baseUrl, noAutoStart, metaUrl) {
|
|
|
791
805
|
const logFile = getServerAutostartLogFile();
|
|
792
806
|
ensureDir(path3.dirname(logFile));
|
|
793
807
|
const logFd = openSync(logFile, "a");
|
|
794
|
-
const
|
|
808
|
+
const runtime = runtimeInvocationForEntrypoint(metaUrl, entrypoint);
|
|
809
|
+
const child = spawn(runtime.command, runtime.args, {
|
|
795
810
|
cwd: packageRoot,
|
|
796
811
|
detached: true,
|
|
797
812
|
stdio: ["ignore", logFd, logFd],
|
package/package.json
CHANGED
|
@@ -38,6 +38,25 @@ export interface LaunchedProfileContext {
|
|
|
38
38
|
tempDir: string;
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
async function waitForChildExit(child: ChildProcess, timeoutMs = 2_000): Promise<void> {
|
|
42
|
+
if (child.exitCode !== null || child.killed) return;
|
|
43
|
+
await new Promise<void>((resolve) => {
|
|
44
|
+
const timer = setTimeout(resolve, timeoutMs);
|
|
45
|
+
child.once("exit", () => {
|
|
46
|
+
clearTimeout(timer);
|
|
47
|
+
resolve();
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function removeTempDirQuietly(dir: string): void {
|
|
53
|
+
try {
|
|
54
|
+
rmSync(dir, { recursive: true, force: true });
|
|
55
|
+
} catch {
|
|
56
|
+
// best-effort cleanup; do not fail captures on temp profile removal
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
41
60
|
function resolveChromiumBinary(browserName: string): string | null {
|
|
42
61
|
const macos = new Map<string, string>([
|
|
43
62
|
["Chrome", "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"],
|
|
@@ -296,13 +315,14 @@ export async function launchChromiumProfileContext(meta: BrowserAuthSourceMeta):
|
|
|
296
315
|
};
|
|
297
316
|
} catch (error) {
|
|
298
317
|
try { child.kill("SIGTERM"); } catch {}
|
|
299
|
-
|
|
318
|
+
removeTempDirQuietly(tempDir);
|
|
300
319
|
throw error;
|
|
301
320
|
}
|
|
302
321
|
}
|
|
303
322
|
|
|
304
|
-
export function cleanupProfileContext(ctx: LaunchedProfileContext | null | undefined): void {
|
|
323
|
+
export async function cleanupProfileContext(ctx: LaunchedProfileContext | null | undefined): Promise<void> {
|
|
305
324
|
if (!ctx) return;
|
|
306
325
|
try { ctx.child.kill("SIGTERM"); } catch {}
|
|
307
|
-
|
|
326
|
+
await waitForChildExit(ctx.child);
|
|
327
|
+
removeTempDirQuietly(ctx.tempDir);
|
|
308
328
|
}
|
|
@@ -268,6 +268,20 @@ export function blockedAppShellErrorCode(
|
|
|
268
268
|
return hasAuth ? "blocked_app_shell" : "auth_required";
|
|
269
269
|
}
|
|
270
270
|
|
|
271
|
+
export function shouldShortCircuitEmbeddedPayloadCapture(url: string, intent: string | undefined, html?: string): boolean {
|
|
272
|
+
if (!html) return false;
|
|
273
|
+
const lowerIntent = intent?.toLowerCase() ?? "";
|
|
274
|
+
if (
|
|
275
|
+
/linkedin\.com/i.test(url) &&
|
|
276
|
+
/\/feed(?:\/|$)/i.test(url) &&
|
|
277
|
+
/\b(feed|timeline|stream|post|posts|update|updates|home)\b/.test(lowerIntent) &&
|
|
278
|
+
/voyagerFeedDashMainFeed/.test(html)
|
|
279
|
+
) {
|
|
280
|
+
return true;
|
|
281
|
+
}
|
|
282
|
+
return false;
|
|
283
|
+
}
|
|
284
|
+
|
|
271
285
|
function shouldRetryEphemeralProfileError(error: unknown): boolean {
|
|
272
286
|
const message = error instanceof Error ? error.message : String(error ?? "");
|
|
273
287
|
return /persistentcontext|target page, context or browser has been closed|browser has been closed|page has been closed/i.test(message);
|
|
@@ -930,16 +944,23 @@ export async function captureSession(
|
|
|
930
944
|
}
|
|
931
945
|
await kuri.stop();
|
|
932
946
|
kuri.useExternalChrome(browserCdpBaseUrl(profileCtx.cdpUrl), { child: profileCtx.child, tempDir: profileCtx.tempDir });
|
|
947
|
+
let nestedResult: CaptureResult | null = null;
|
|
933
948
|
try {
|
|
934
|
-
|
|
949
|
+
nestedResult = await captureSession(url, undefined, undefined, intent, {
|
|
935
950
|
...options,
|
|
936
951
|
forceEphemeral: true,
|
|
937
952
|
usedProfileContext: true,
|
|
938
953
|
preferExistingTab,
|
|
939
954
|
authStrategy: "header-replay",
|
|
940
955
|
});
|
|
956
|
+
return nestedResult;
|
|
941
957
|
} finally {
|
|
942
|
-
|
|
958
|
+
try {
|
|
959
|
+
await kuri.stop();
|
|
960
|
+
} catch (stopErr) {
|
|
961
|
+
log("capture", `profile-context cleanup failed for ${url}: ${stopErr instanceof Error ? stopErr.message : String(stopErr)}`);
|
|
962
|
+
if (!nestedResult) throw stopErr;
|
|
963
|
+
}
|
|
943
964
|
}
|
|
944
965
|
} catch (attachErr) {
|
|
945
966
|
log("capture", `forced profile context failed for ${url}: ${attachErr instanceof Error ? attachErr.message : String(attachErr)}`);
|
|
@@ -1081,6 +1102,40 @@ export async function captureSession(
|
|
|
1081
1102
|
await kuri.evaluate(tabId, INTERCEPTOR_SCRIPT);
|
|
1082
1103
|
} catch { /* page may not be ready */ }
|
|
1083
1104
|
|
|
1105
|
+
// For pages that embed the task payload directly in the HTML, return before
|
|
1106
|
+
// the longer network/intercept wait. This avoids losing useful captures to
|
|
1107
|
+
// later browser-engine instability on auth-gated SPAs like LinkedIn feed.
|
|
1108
|
+
try {
|
|
1109
|
+
await sleep(1_500, signal);
|
|
1110
|
+
throwIfAborted(signal);
|
|
1111
|
+
const earlyHtml = await kuri.getPageHtml(tabId);
|
|
1112
|
+
if (shouldShortCircuitEmbeddedPayloadCapture(url, intent, earlyHtml)) {
|
|
1113
|
+
let final_url = url;
|
|
1114
|
+
try {
|
|
1115
|
+
const rawUrl = await kuri.getCurrentUrl(tabId);
|
|
1116
|
+
final_url = typeof rawUrl === "string" ? rawUrl : String(rawUrl ?? url);
|
|
1117
|
+
try { new URL(final_url); } catch { final_url = url; }
|
|
1118
|
+
} catch {
|
|
1119
|
+
final_url = url;
|
|
1120
|
+
}
|
|
1121
|
+
lastHtml = earlyHtml;
|
|
1122
|
+
const rawCookies = await extractCookiesFromPage(tabId, url);
|
|
1123
|
+
const sessionCookies = filterFirstPartySessionCookies(rawCookies, url, final_url);
|
|
1124
|
+
log("capture", `short-circuiting embedded payload capture for ${url}`);
|
|
1125
|
+
return {
|
|
1126
|
+
requests: [],
|
|
1127
|
+
har_lineage_id: nanoid(),
|
|
1128
|
+
domain,
|
|
1129
|
+
cookies: sessionCookies,
|
|
1130
|
+
final_url,
|
|
1131
|
+
html: earlyHtml,
|
|
1132
|
+
js_bundles: new Map(),
|
|
1133
|
+
};
|
|
1134
|
+
}
|
|
1135
|
+
} catch {
|
|
1136
|
+
// fall through to the longer capture path
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1084
1139
|
// Build response bodies map from intercepted requests
|
|
1085
1140
|
const responseBodies = new Map<string, string>();
|
|
1086
1141
|
const jsBundleBodies = new Map<string, string>();
|
|
@@ -65,6 +65,18 @@ let externalChromeOverride: {
|
|
|
65
65
|
previousAttach?: string;
|
|
66
66
|
} | null = null;
|
|
67
67
|
|
|
68
|
+
async function waitForChildExit(child: ChildProcess | null | undefined, timeoutMs = 2_000): Promise<void> {
|
|
69
|
+
if (!child) return;
|
|
70
|
+
if (child.exitCode !== null || child.killed) return;
|
|
71
|
+
await new Promise<void>((resolve) => {
|
|
72
|
+
const timer = setTimeout(resolve, timeoutMs);
|
|
73
|
+
child.once("exit", () => {
|
|
74
|
+
clearTimeout(timer);
|
|
75
|
+
resolve();
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
|
|
68
80
|
function kuriBinaryName(): string {
|
|
69
81
|
return process.platform === "win32" ? "kuri.exe" : "kuri";
|
|
70
82
|
}
|
|
@@ -426,7 +438,12 @@ export async function stop(): Promise<void> {
|
|
|
426
438
|
// ignore
|
|
427
439
|
}
|
|
428
440
|
if (externalChromeOverride.tempDir) {
|
|
429
|
-
|
|
441
|
+
await waitForChildExit(externalChromeOverride.child);
|
|
442
|
+
try {
|
|
443
|
+
rmSync(externalChromeOverride.tempDir, { recursive: true, force: true });
|
|
444
|
+
} catch {
|
|
445
|
+
// best-effort cleanup; don't fail the caller on temp profile removal
|
|
446
|
+
}
|
|
430
447
|
}
|
|
431
448
|
if (externalChromeOverride.previousCdpUrl == null) delete process.env.CDP_URL;
|
|
432
449
|
else process.env.CDP_URL = externalChromeOverride.previousCdpUrl;
|
|
@@ -2,7 +2,7 @@ import { closeSync, openSync, readFileSync, statSync, unlinkSync, writeFileSync
|
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { spawn } from "node:child_process";
|
|
4
4
|
import { execFileSync } from "node:child_process";
|
|
5
|
-
import { ensureDir, getPackageRoot, getServerAutostartLogFile, getServerPidFile, resolveSiblingEntrypoint,
|
|
5
|
+
import { ensureDir, getPackageRoot, getServerAutostartLogFile, getServerPidFile, resolveSiblingEntrypoint, runtimeInvocationForEntrypoint } from "./paths.js";
|
|
6
6
|
import { CODE_HASH } from "../version.js";
|
|
7
7
|
|
|
8
8
|
type PidState = {
|
|
@@ -222,7 +222,8 @@ export async function ensureLocalServer(baseUrl: string, noAutoStart: boolean, m
|
|
|
222
222
|
const logFile = getServerAutostartLogFile();
|
|
223
223
|
ensureDir(path.dirname(logFile));
|
|
224
224
|
const logFd = openSync(logFile, "a");
|
|
225
|
-
const
|
|
225
|
+
const runtime = runtimeInvocationForEntrypoint(metaUrl, entrypoint);
|
|
226
|
+
const child = spawn(runtime.command, runtime.args, {
|
|
226
227
|
cwd: packageRoot,
|
|
227
228
|
detached: true,
|
|
228
229
|
stdio: ["ignore", logFd, logFd],
|
|
@@ -2,6 +2,7 @@ import { existsSync, mkdirSync, realpathSync } from "node:fs";
|
|
|
2
2
|
import os from "node:os";
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import { createRequire } from "node:module";
|
|
5
|
+
import { execFileSync } from "node:child_process";
|
|
5
6
|
import { fileURLToPath } from "node:url";
|
|
6
7
|
|
|
7
8
|
export function getModuleDir(metaUrl: string): string {
|
|
@@ -29,20 +30,38 @@ export function resolveSiblingEntrypoint(metaUrl: string, basename: string): str
|
|
|
29
30
|
return path.join(path.dirname(file), `${basename}${path.extname(file) || ".js"}`);
|
|
30
31
|
}
|
|
31
32
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
function resolveBinaryOnPath(name: string): string | null {
|
|
34
|
+
const checker = process.platform === "win32" ? "where" : "which";
|
|
35
|
+
try {
|
|
36
|
+
const output = execFileSync(checker, [name], { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] });
|
|
37
|
+
const match = output.split(/\r?\n/).map((line) => line.trim()).find(Boolean);
|
|
38
|
+
return match || null;
|
|
39
|
+
} catch {
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function runtimeInvocationForEntrypoint(metaUrl: string, entrypoint: string): { command: string; args: string[] } {
|
|
45
|
+
if (path.extname(entrypoint) !== ".ts") return { command: process.execPath, args: [entrypoint] };
|
|
46
|
+
if (process.versions.bun) return { command: process.execPath, args: [entrypoint] };
|
|
47
|
+
|
|
48
|
+
const bunBinary = process.env.BUN_BIN || resolveBinaryOnPath("bun");
|
|
49
|
+
if (bunBinary) return { command: bunBinary, args: [entrypoint] };
|
|
35
50
|
|
|
36
51
|
try {
|
|
37
52
|
const req = createRequire(metaUrl);
|
|
38
53
|
const tsxPkg = req.resolve("tsx/package.json");
|
|
39
54
|
const tsxLoader = path.join(path.dirname(tsxPkg), "dist", "loader.mjs");
|
|
40
|
-
if (existsSync(tsxLoader)) return ["--import", tsxLoader, entrypoint];
|
|
55
|
+
if (existsSync(tsxLoader)) return { command: process.execPath, args: ["--import", tsxLoader, entrypoint] };
|
|
41
56
|
} catch {
|
|
42
57
|
// fall through to bare specifier
|
|
43
58
|
}
|
|
44
59
|
|
|
45
|
-
return ["--import", "tsx", entrypoint];
|
|
60
|
+
return { command: process.execPath, args: ["--import", "tsx", entrypoint] };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function runtimeArgsForEntrypoint(metaUrl: string, entrypoint: string): string[] {
|
|
64
|
+
return runtimeInvocationForEntrypoint(metaUrl, entrypoint).args;
|
|
46
65
|
}
|
|
47
66
|
|
|
48
67
|
export function isMainModule(metaUrl: string): boolean {
|