unbrowse 2.0.23 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/cli.js +56 -8
- package/dist/index.js +5733 -4422
- package/dist/supervisor.js +230 -0
- package/package.json +1 -1
- package/runtime-src/api/routes.ts +4 -3
- package/runtime-src/auth/browser-cookies.ts +55 -27
- package/runtime-src/auth/index.ts +239 -18
- package/runtime-src/capture/form-submit.ts +332 -0
- package/runtime-src/capture/index.ts +262 -108
- package/runtime-src/capture/interaction.ts +128 -0
- package/runtime-src/cli.ts +33 -8
- package/runtime-src/client/index.ts +40 -3
- package/runtime-src/execution/index.ts +255 -44
- package/runtime-src/graph/index.ts +29 -3
- package/runtime-src/kuri/client.ts +80 -0
- package/runtime-src/orchestrator/index.ts +436 -70
- package/runtime-src/reverse-engineer/index.ts +83 -3
- package/runtime-src/runtime/local-server.ts +39 -2
- package/runtime-src/supervisor.ts +182 -0
package/README.md
CHANGED
|
@@ -122,11 +122,13 @@ Put that in:
|
|
|
122
122
|
```bash
|
|
123
123
|
unbrowse health
|
|
124
124
|
unbrowse resolve --intent "get trending searches" --url "https://google.com" --pretty
|
|
125
|
-
unbrowse login --url "https://calendar.google.com"
|
|
125
|
+
unbrowse login --url "https://calendar.google.com" --browser chrome
|
|
126
126
|
unbrowse skills
|
|
127
127
|
unbrowse search --intent "get stock prices"
|
|
128
128
|
```
|
|
129
129
|
|
|
130
|
+
`unbrowse login` reuses cookies from a supported local browser profile. On macOS, pass `--browser chrome|arc|dia|brave|edge|vivaldi|chromium|firefox` if your default browser is Safari or another unsupported app.
|
|
131
|
+
|
|
130
132
|
## Demo notes
|
|
131
133
|
|
|
132
134
|
- First-time capture/indexing on a site can take 20-80 seconds. That is the slow path; repeats should be much faster.
|
package/dist/cli.js
CHANGED
|
@@ -707,7 +707,7 @@ function readProcessCommand(pid) {
|
|
|
707
707
|
}
|
|
708
708
|
function isLikelyUnbrowseServerProcess(pid) {
|
|
709
709
|
const command = readProcessCommand(pid);
|
|
710
|
-
return /\bunbrowse\b|runtime-src\/index\.ts|src\/index\.ts|dist\/index\.js/i.test(command);
|
|
710
|
+
return /\bunbrowse\b|runtime-src\/(index|supervisor)\.ts|src\/(index|supervisor)\.ts|dist\/(index|supervisor)\.js/i.test(command);
|
|
711
711
|
}
|
|
712
712
|
async function stopManagedServer(pid, pidFile, baseUrl) {
|
|
713
713
|
try {
|
|
@@ -737,15 +737,32 @@ function isStartupLockStale(lockFile) {
|
|
|
737
737
|
return true;
|
|
738
738
|
}
|
|
739
739
|
}
|
|
740
|
+
function shouldReclaimStartupLock(lockFile, pidFile) {
|
|
741
|
+
if (!isStartupLockStale(lockFile))
|
|
742
|
+
return false;
|
|
743
|
+
const owner = readPidState(pidFile);
|
|
744
|
+
const ownerAlive = owner?.pid ? isPidAlive(owner.pid) : false;
|
|
745
|
+
return !ownerAlive;
|
|
746
|
+
}
|
|
740
747
|
function deriveListenEnv(baseUrl) {
|
|
741
748
|
const url = new URL(baseUrl);
|
|
742
749
|
const host = !url.hostname || url.hostname === "localhost" ? "127.0.0.1" : url.hostname;
|
|
743
750
|
const port = url.port || (url.protocol === "https:" ? "443" : "80");
|
|
744
751
|
return { HOST: host, PORT: port, UNBROWSE_URL: baseUrl };
|
|
745
752
|
}
|
|
753
|
+
function describeListenTarget(baseUrl) {
|
|
754
|
+
const url = new URL(baseUrl);
|
|
755
|
+
const host = !url.hostname || url.hostname === "localhost" ? "127.0.0.1" : url.hostname;
|
|
756
|
+
const port = url.port || (url.protocol === "https:" ? "443" : "80");
|
|
757
|
+
return `${host}:${port}`;
|
|
758
|
+
}
|
|
746
759
|
async function ensureLocalServer(baseUrl, noAutoStart, metaUrl) {
|
|
747
760
|
const pidFile = getServerPidFile(baseUrl);
|
|
748
761
|
const startupLockFile = `${pidFile}.lock`;
|
|
762
|
+
if (shouldReclaimStartupLock(startupLockFile, pidFile)) {
|
|
763
|
+
clearStalePidFile(pidFile);
|
|
764
|
+
clearStaleStartupLockFile(startupLockFile);
|
|
765
|
+
}
|
|
749
766
|
let existing = readPidState(pidFile);
|
|
750
767
|
const health = await getServerHealth(baseUrl);
|
|
751
768
|
if (health.ok) {
|
|
@@ -784,6 +801,11 @@ async function ensureLocalServer(baseUrl, noAutoStart, metaUrl) {
|
|
|
784
801
|
startupLockFd = openSync(startupLockFile, "wx");
|
|
785
802
|
} catch (error) {
|
|
786
803
|
if (error.code === "EEXIST") {
|
|
804
|
+
if (shouldReclaimStartupLock(startupLockFile, pidFile)) {
|
|
805
|
+
clearStalePidFile(pidFile);
|
|
806
|
+
clearStaleStartupLockFile(startupLockFile);
|
|
807
|
+
return ensureLocalServer(baseUrl, noAutoStart, metaUrl);
|
|
808
|
+
}
|
|
787
809
|
if (await waitForHealthy(baseUrl, 30000))
|
|
788
810
|
return;
|
|
789
811
|
const owner = readPidState(pidFile);
|
|
@@ -800,7 +822,16 @@ async function ensureLocalServer(baseUrl, noAutoStart, metaUrl) {
|
|
|
800
822
|
try {
|
|
801
823
|
if (await isServerHealthy(baseUrl))
|
|
802
824
|
return;
|
|
803
|
-
const
|
|
825
|
+
const discoveredPid = findListeningPid(baseUrl);
|
|
826
|
+
if (discoveredPid) {
|
|
827
|
+
if (isLikelyUnbrowseServerProcess(discoveredPid)) {
|
|
828
|
+
if (await waitForHealthy(baseUrl, 5000))
|
|
829
|
+
return;
|
|
830
|
+
throw new Error(`Port ${describeListenTarget(baseUrl)} already has an unbrowse server (pid ${discoveredPid}), but it did not become healthy.`);
|
|
831
|
+
}
|
|
832
|
+
throw new Error(`Port ${describeListenTarget(baseUrl)} already in use by pid ${discoveredPid}.`);
|
|
833
|
+
}
|
|
834
|
+
const entrypoint = resolveSiblingEntrypoint(metaUrl, "supervisor");
|
|
804
835
|
const packageRoot = getPackageRoot(metaUrl);
|
|
805
836
|
const logFile = getServerAutostartLogFile();
|
|
806
837
|
ensureDir(path3.dirname(logFile));
|
|
@@ -1374,6 +1405,17 @@ function detectEntityIndex(data) {
|
|
|
1374
1405
|
}
|
|
1375
1406
|
return best ? buildEntityIndex(best) : null;
|
|
1376
1407
|
}
|
|
1408
|
+
function unwrapCarrier(data) {
|
|
1409
|
+
if (data == null || typeof data !== "object" || Array.isArray(data))
|
|
1410
|
+
return data;
|
|
1411
|
+
const rec = data;
|
|
1412
|
+
const keys = Object.keys(rec);
|
|
1413
|
+
const isCarrierOnly = keys.every((key) => key === "data" || key === "_extraction");
|
|
1414
|
+
if (isCarrierOnly && "data" in rec && (("_extraction" in rec) || Array.isArray(rec.data) || rec.data != null && typeof rec.data === "object")) {
|
|
1415
|
+
return unwrapCarrier(rec.data);
|
|
1416
|
+
}
|
|
1417
|
+
return data;
|
|
1418
|
+
}
|
|
1377
1419
|
function resolvePath(obj, path5, entityIndex) {
|
|
1378
1420
|
if (!path5 || obj == null)
|
|
1379
1421
|
return obj;
|
|
@@ -1483,8 +1525,8 @@ function looksStructuredForDirectOutput(value) {
|
|
|
1483
1525
|
return scalarFields >= 2;
|
|
1484
1526
|
}
|
|
1485
1527
|
function applyTransforms(result, flags) {
|
|
1486
|
-
let data = result;
|
|
1487
|
-
const entityIndex = detectEntityIndex(
|
|
1528
|
+
let data = unwrapCarrier(result);
|
|
1529
|
+
const entityIndex = detectEntityIndex(data);
|
|
1488
1530
|
const pathFlag = flags.path;
|
|
1489
1531
|
if (pathFlag) {
|
|
1490
1532
|
data = resolvePath(data, pathFlag, entityIndex);
|
|
@@ -1611,7 +1653,7 @@ async function cmdResolve(flags) {
|
|
|
1611
1653
|
if (flags["force-capture"])
|
|
1612
1654
|
body.force_capture = true;
|
|
1613
1655
|
const hasTransforms = !!(flags.path || flags.extract);
|
|
1614
|
-
if (flags.raw || hasTransforms)
|
|
1656
|
+
if (flags.raw || flags.schema || hasTransforms)
|
|
1615
1657
|
body.projection = { raw: true };
|
|
1616
1658
|
const startedAt = Date.now();
|
|
1617
1659
|
let result = await withPendingNotice(api2("POST", "/v1/intent/resolve", body), "Still working. First-time capture/indexing for a site can take 20-80s. Waiting is usually better than falling back.");
|
|
@@ -1658,7 +1700,7 @@ async function cmdExecute(flags) {
|
|
|
1658
1700
|
if (flags["confirm-unsafe"])
|
|
1659
1701
|
body.confirm_unsafe = true;
|
|
1660
1702
|
const hasTransforms = !!(flags.path || flags.extract);
|
|
1661
|
-
if (flags.raw || hasTransforms)
|
|
1703
|
+
if (flags.raw || flags.schema || hasTransforms)
|
|
1662
1704
|
body.projection = { raw: true };
|
|
1663
1705
|
let result = await withPendingNotice(api2("POST", `/v1/skills/${skillId}/execute`, body), "Still working. This endpoint may require browser replay or first-time auth/capture setup.");
|
|
1664
1706
|
if (flags.schema) {
|
|
@@ -1693,7 +1735,12 @@ async function cmdLogin(flags) {
|
|
|
1693
1735
|
const url = flags.url;
|
|
1694
1736
|
if (!url)
|
|
1695
1737
|
die("--url is required");
|
|
1696
|
-
|
|
1738
|
+
const browserLabel = typeof flags.browser === "string" ? flags.browser : "default browser";
|
|
1739
|
+
const result = await withPendingNotice(api2("POST", "/v1/auth/login", {
|
|
1740
|
+
url,
|
|
1741
|
+
...typeof flags.browser === "string" ? { browser: flags.browser } : {}
|
|
1742
|
+
}), `Opened ${url} in ${browserLabel}. Finish sign-in there; waiting for fresh cookies...`, 1000);
|
|
1743
|
+
output(result, !!flags.pretty);
|
|
1697
1744
|
}
|
|
1698
1745
|
async function cmdSkills(flags) {
|
|
1699
1746
|
output(await api2("GET", "/v1/skills"), !!flags.pretty);
|
|
@@ -1728,7 +1775,7 @@ var CLI_REFERENCE = {
|
|
|
1728
1775
|
{ name: "resolve", usage: '--intent "..." --url "..." [opts]', desc: "Resolve intent \u2192 search/capture/execute" },
|
|
1729
1776
|
{ name: "execute", usage: "--skill ID --endpoint ID [opts]", desc: "Execute a specific endpoint" },
|
|
1730
1777
|
{ name: "feedback", usage: "--skill ID --endpoint ID --rating N", desc: "Submit feedback (mandatory after resolve)" },
|
|
1731
|
-
{ name: "login", usage: '--url "..."', desc: "Interactive browser login" },
|
|
1778
|
+
{ name: "login", usage: '--url "..." [--browser chrome|arc|dia|brave|edge|vivaldi|chromium|firefox]', desc: "Interactive browser login" },
|
|
1732
1779
|
{ name: "skills", usage: "", desc: "List all skills" },
|
|
1733
1780
|
{ name: "skill", usage: "<id>", desc: "Get skill details" },
|
|
1734
1781
|
{ name: "search", usage: '--intent "..." [--domain "..."]', desc: "Search marketplace" },
|
|
@@ -1753,6 +1800,7 @@ var CLI_REFERENCE = {
|
|
|
1753
1800
|
examples: [
|
|
1754
1801
|
"unbrowse health",
|
|
1755
1802
|
'unbrowse resolve --intent "get timeline" --url "https://x.com"',
|
|
1803
|
+
'unbrowse login --url "https://lu.ma/signin" --browser chrome',
|
|
1756
1804
|
"unbrowse execute --skill abc --endpoint def --pretty",
|
|
1757
1805
|
'unbrowse execute --skill abc --endpoint def --extract "user,text,likes" --limit 10',
|
|
1758
1806
|
'unbrowse execute --skill abc --endpoint def --path "data.included[]" --extract "name:actor.name,text:commentary.text" --limit 20',
|