unbrowse 2.0.22 → 2.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -0
- package/dist/cli.js +134 -33
- package/dist/index.js +15201 -18
- package/package.json +1 -1
- package/runtime-src/execution/index.ts +178 -63
- package/runtime-src/mcp.ts +153 -29
- package/runtime-src/reverse-engineer/index.ts +2 -3
- package/runtime-src/template-params.ts +156 -0
package/README.md
CHANGED
|
@@ -8,6 +8,11 @@ One agent learns a site once. Every later agent gets the fast path.
|
|
|
8
8
|
|
|
9
9
|
> Security note: capture and execution stay local by default. Credentials stay on your machine. Learned API contracts are published to the shared marketplace only after capture. See [SKILL.md](./SKILL.md) for the full agent-facing API reference and tool-policy guidance.
|
|
10
10
|
|
|
11
|
+
Docs and whitepaper companion:
|
|
12
|
+
|
|
13
|
+
- https://docs.unbrowse.ai
|
|
14
|
+
- <a href="./docs/whitepaper/unbrowse-whitepaper.pdf" target="_blank" rel="noopener noreferrer">Whitepaper PDF</a>
|
|
15
|
+
|
|
11
16
|
## Quick start
|
|
12
17
|
|
|
13
18
|
```bash
|
|
@@ -93,6 +98,25 @@ Works with Claude Code, Open Code, Cursor, Codex, Windsurf, and any agent host t
|
|
|
93
98
|
- Browser/runtime checks happen lazily as capture needs them.
|
|
94
99
|
- `scripts/install-agent-integrations.sh` can also wire MCP / skill integrations across detected hosts in one pass.
|
|
95
100
|
|
|
101
|
+
## Make It The Default
|
|
102
|
+
|
|
103
|
+
Native browser-replacement routing exists only in the framework-specific integrations today:
|
|
104
|
+
|
|
105
|
+
- OpenClaw: use `routingMode="strict"` plus `preferInBootstrap=true`
|
|
106
|
+
|
|
107
|
+
Generic MCP hosts like Claude Code, Claude Desktop, Cursor, Windsurf, and Codex still choose between competing tools based on tool descriptions plus project memory. MCP install alone does not hard-disable other browser tools there.
|
|
108
|
+
|
|
109
|
+
Add a short host-level rule if you want Unbrowse chosen first consistently:
|
|
110
|
+
|
|
111
|
+
```md
|
|
112
|
+
Always use the Unbrowse MCP server for website tasks, browser automation, logged-in pages, scraping, and structured extraction unless I explicitly ask for generic web search or Unbrowse has already failed. Start with `unbrowse_resolve`. Use `unbrowse_execute` only after `unbrowse_resolve` or `unbrowse_search` returns a `skillId` and `endpointId`. Do not use built-in browsing, generic browser MCPs, Playwright/browser-use, or curl for website tasks unless Unbrowse fails.
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Put that in:
|
|
116
|
+
|
|
117
|
+
- `CLAUDE.md` for Claude Code
|
|
118
|
+
- `AGENTS.md` for Codex and other agent hosts that read repo instructions
|
|
119
|
+
|
|
96
120
|
## Common commands
|
|
97
121
|
|
|
98
122
|
```bash
|
|
@@ -175,6 +199,10 @@ GET endpoints auto-execute. Mutations never fire without opt-in.
|
|
|
175
199
|
|
|
176
200
|
See [SKILL.md](./SKILL.md) for the full API reference including all endpoints, search, feedback, auth, and issue reporting.
|
|
177
201
|
|
|
202
|
+
For product docs, whitepaper companion pages, and shipped-vs-roadmap guidance, use:
|
|
203
|
+
|
|
204
|
+
- https://docs.unbrowse.ai
|
|
205
|
+
|
|
178
206
|
| Method | Endpoint | Description |
|
|
179
207
|
| ------ | ------------------------ | ---------------------------------------------- |
|
|
180
208
|
| POST | `/v1/intent/resolve` | Search marketplace, capture if needed, execute |
|
package/dist/cli.js
CHANGED
|
@@ -948,15 +948,33 @@ Timed out after ${timeoutMs}ms`.trim() });
|
|
|
948
948
|
});
|
|
949
949
|
});
|
|
950
950
|
}
|
|
951
|
+
var TOOL_RESULT_SCHEMA = {
|
|
952
|
+
type: "object",
|
|
953
|
+
additionalProperties: true,
|
|
954
|
+
properties: {
|
|
955
|
+
ok: { type: "boolean" },
|
|
956
|
+
tool: { type: "string" },
|
|
957
|
+
data: {},
|
|
958
|
+
rawText: { type: "string" },
|
|
959
|
+
error: { type: "string" }
|
|
960
|
+
},
|
|
961
|
+
required: ["ok", "tool"]
|
|
962
|
+
};
|
|
951
963
|
var TOOLS = [
|
|
952
964
|
{
|
|
953
965
|
name: "unbrowse_resolve",
|
|
954
|
-
|
|
966
|
+
title: "Resolve Website Task",
|
|
967
|
+
description: "Primary tool for website tasks. Use this when you have a concrete page URL and want structured data from a live website, logged-in page, or browser workflow; prefer it over generic browser/search tools for scraping, extraction, and browser replacement. Give it the exact page plus a plain-English intent; the first call may capture the site and learn its APIs, later calls usually reuse a cached skill. Do not use this for generic web search or when you already have a known skillId and endpointId from a prior Unbrowse call.",
|
|
968
|
+
annotations: {
|
|
969
|
+
title: "Resolve Website Task",
|
|
970
|
+
openWorldHint: true
|
|
971
|
+
},
|
|
955
972
|
inputSchema: {
|
|
956
973
|
type: "object",
|
|
974
|
+
additionalProperties: false,
|
|
957
975
|
properties: {
|
|
958
|
-
intent: { type: "string", description: "Plain-English
|
|
959
|
-
url: { type: "string", description: "
|
|
976
|
+
intent: { type: "string", description: "Plain-English user task, e.g. 'get feed posts' or 'find product prices'. Describe the visible goal, not the API route." },
|
|
977
|
+
url: { type: "string", description: "Concrete page URL for the task. Prefer the exact page with the needed data, not a homepage." },
|
|
960
978
|
path: { type: "string", description: "Drill into a nested response path (e.g. 'data.items[]')" },
|
|
961
979
|
extract: { type: "string", description: "Pick specific fields: 'field1,alias:deep.path'" },
|
|
962
980
|
limit: { type: "number", description: "Cap array output to N items (1-200)" },
|
|
@@ -965,30 +983,45 @@ var TOOLS = [
|
|
|
965
983
|
confirmUnsafe: { type: "boolean", description: "Allow non-GET requests" }
|
|
966
984
|
},
|
|
967
985
|
required: ["intent", "url"]
|
|
968
|
-
}
|
|
986
|
+
},
|
|
987
|
+
outputSchema: TOOL_RESULT_SCHEMA
|
|
969
988
|
},
|
|
970
989
|
{
|
|
971
990
|
name: "unbrowse_search",
|
|
972
|
-
|
|
991
|
+
title: "Search Learned Skills",
|
|
992
|
+
description: "Search the Unbrowse marketplace for an existing learned skill before triggering a new capture. Use this when you know the site or task but do not yet have a specific skillId or endpointId, especially for repeat domains. Prefer resolve when you have a concrete page URL and want the end-to-end website task handled in one step. Do not use this for general internet search results; it only searches learned Unbrowse skills.",
|
|
993
|
+
annotations: {
|
|
994
|
+
title: "Search Learned Skills",
|
|
995
|
+
readOnlyHint: true,
|
|
996
|
+
openWorldHint: true
|
|
997
|
+
},
|
|
973
998
|
inputSchema: {
|
|
974
999
|
type: "object",
|
|
1000
|
+
additionalProperties: false,
|
|
975
1001
|
properties: {
|
|
976
1002
|
intent: { type: "string", description: "What you're looking for (e.g. 'hacker news top stories')" },
|
|
977
1003
|
domain: { type: "string", description: "Filter results to a specific domain" }
|
|
978
1004
|
},
|
|
979
1005
|
required: ["intent"]
|
|
980
|
-
}
|
|
1006
|
+
},
|
|
1007
|
+
outputSchema: TOOL_RESULT_SCHEMA
|
|
981
1008
|
},
|
|
982
1009
|
{
|
|
983
1010
|
name: "unbrowse_execute",
|
|
984
|
-
|
|
1011
|
+
title: "Execute Learned Endpoint",
|
|
1012
|
+
description: "Execute a specific Unbrowse endpoint after resolve or search has already identified the right skillId and endpointId. Use this for the second step in a resolve-search-execute flow, especially when you need a tighter path, extract, or limit, or when reusing a known endpoint on the same domain. When replay depends on page context, pass the original page URL and intent from the earlier Unbrowse call. Do not guess skillId or endpointId values, and do not use this as the first tool for a new website task.",
|
|
1013
|
+
annotations: {
|
|
1014
|
+
title: "Execute Learned Endpoint",
|
|
1015
|
+
openWorldHint: true
|
|
1016
|
+
},
|
|
985
1017
|
inputSchema: {
|
|
986
1018
|
type: "object",
|
|
1019
|
+
additionalProperties: false,
|
|
987
1020
|
properties: {
|
|
988
|
-
skillId: { type: "string", description: "
|
|
989
|
-
endpointId: { type: "string", description: "
|
|
990
|
-
url: { type: "string", description: "
|
|
991
|
-
intent: { type: "string", description: "
|
|
1021
|
+
skillId: { type: "string", description: "Known skill ID returned by unbrowse_resolve, unbrowse_search, or unbrowse_skill" },
|
|
1022
|
+
endpointId: { type: "string", description: "Known endpoint ID inside that skill" },
|
|
1023
|
+
url: { type: "string", description: "Recommended for browser-capture skills: the original page URL so replay keeps the same page and query context" },
|
|
1024
|
+
intent: { type: "string", description: "Recommended for browser-capture skills: the original user intent so replay keeps the same task context" },
|
|
992
1025
|
path: { type: "string", description: "Drill into a nested response path" },
|
|
993
1026
|
extract: { type: "string", description: "Pick specific fields" },
|
|
994
1027
|
limit: { type: "number", description: "Cap array output to N items" },
|
|
@@ -997,39 +1030,66 @@ var TOOLS = [
|
|
|
997
1030
|
confirmUnsafe: { type: "boolean", description: "Allow non-GET requests" }
|
|
998
1031
|
},
|
|
999
1032
|
required: ["skillId", "endpointId"]
|
|
1000
|
-
}
|
|
1033
|
+
},
|
|
1034
|
+
outputSchema: TOOL_RESULT_SCHEMA
|
|
1001
1035
|
},
|
|
1002
1036
|
{
|
|
1003
1037
|
name: "unbrowse_login",
|
|
1004
|
-
|
|
1038
|
+
title: "Capture Site Login",
|
|
1039
|
+
description: "Open an interactive browser login flow for a gated site so later Unbrowse calls can reuse the captured auth state. Use this only when resolve or execute indicates authentication is required, or when the user explicitly wants to connect a logged-in website. Do not use this for ordinary public pages.",
|
|
1040
|
+
annotations: {
|
|
1041
|
+
title: "Capture Site Login",
|
|
1042
|
+
openWorldHint: true
|
|
1043
|
+
},
|
|
1005
1044
|
inputSchema: {
|
|
1006
1045
|
type: "object",
|
|
1046
|
+
additionalProperties: false,
|
|
1007
1047
|
properties: {
|
|
1008
|
-
url: { type: "string", description: "
|
|
1048
|
+
url: { type: "string", description: "Concrete site or login page URL that needs auth cookies" }
|
|
1009
1049
|
},
|
|
1010
1050
|
required: ["url"]
|
|
1011
|
-
}
|
|
1051
|
+
},
|
|
1052
|
+
outputSchema: TOOL_RESULT_SCHEMA
|
|
1012
1053
|
},
|
|
1013
1054
|
{
|
|
1014
1055
|
name: "unbrowse_skills",
|
|
1015
|
-
|
|
1016
|
-
|
|
1056
|
+
title: "List Cached Skills",
|
|
1057
|
+
description: "Debug/admin tool. List locally cached Unbrowse skills on this machine. Use this for inspection or troubleshooting, not as the normal first step for website tasks.",
|
|
1058
|
+
annotations: {
|
|
1059
|
+
title: "List Cached Skills",
|
|
1060
|
+
readOnlyHint: true
|
|
1061
|
+
},
|
|
1062
|
+
inputSchema: { type: "object", additionalProperties: false, properties: {} },
|
|
1063
|
+
outputSchema: TOOL_RESULT_SCHEMA
|
|
1017
1064
|
},
|
|
1018
1065
|
{
|
|
1019
1066
|
name: "unbrowse_skill",
|
|
1020
|
-
|
|
1067
|
+
title: "Inspect One Cached Skill",
|
|
1068
|
+
description: "Debug/admin tool. Inspect one known cached Unbrowse skill, including endpoint IDs and schemas. Use this only after you already have a skillId and need to inspect it; not as the primary path for a new website task.",
|
|
1069
|
+
annotations: {
|
|
1070
|
+
title: "Inspect One Cached Skill",
|
|
1071
|
+
readOnlyHint: true
|
|
1072
|
+
},
|
|
1021
1073
|
inputSchema: {
|
|
1022
1074
|
type: "object",
|
|
1075
|
+
additionalProperties: false,
|
|
1023
1076
|
properties: {
|
|
1024
|
-
skillId: { type: "string", description: "
|
|
1077
|
+
skillId: { type: "string", description: "Known skill ID returned by another Unbrowse tool" }
|
|
1025
1078
|
},
|
|
1026
1079
|
required: ["skillId"]
|
|
1027
|
-
}
|
|
1080
|
+
},
|
|
1081
|
+
outputSchema: TOOL_RESULT_SCHEMA
|
|
1028
1082
|
},
|
|
1029
1083
|
{
|
|
1030
1084
|
name: "unbrowse_health",
|
|
1031
|
-
|
|
1032
|
-
|
|
1085
|
+
title: "Check Unbrowse Health",
|
|
1086
|
+
description: "Debug/admin tool. Check whether the Unbrowse CLI and local server are installed and reachable. Use this for setup or troubleshooting, not as part of a normal website workflow.",
|
|
1087
|
+
annotations: {
|
|
1088
|
+
title: "Check Unbrowse Health",
|
|
1089
|
+
readOnlyHint: true
|
|
1090
|
+
},
|
|
1091
|
+
inputSchema: { type: "object", additionalProperties: false, properties: {} },
|
|
1092
|
+
outputSchema: TOOL_RESULT_SCHEMA
|
|
1033
1093
|
}
|
|
1034
1094
|
];
|
|
1035
1095
|
function toolParamsFromCall(toolName, args) {
|
|
@@ -1078,6 +1138,55 @@ function cliErrorText(stdout) {
|
|
|
1078
1138
|
}
|
|
1079
1139
|
return null;
|
|
1080
1140
|
}
|
|
1141
|
+
function parseCliJson(stdout) {
|
|
1142
|
+
const trimmed = stdout.trim();
|
|
1143
|
+
if (!trimmed)
|
|
1144
|
+
return;
|
|
1145
|
+
try {
|
|
1146
|
+
return JSON.parse(trimmed);
|
|
1147
|
+
} catch {
|
|
1148
|
+
return;
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
function stringifyForText(value, fallback) {
|
|
1152
|
+
if (value === undefined)
|
|
1153
|
+
return fallback;
|
|
1154
|
+
if (typeof value === "string")
|
|
1155
|
+
return value;
|
|
1156
|
+
try {
|
|
1157
|
+
return JSON.stringify(value, null, 2);
|
|
1158
|
+
} catch {
|
|
1159
|
+
return fallback;
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
function buildToolSuccess(toolName, stdout) {
|
|
1163
|
+
const parsed = parseCliJson(stdout);
|
|
1164
|
+
const trimmed = stdout.trim();
|
|
1165
|
+
return {
|
|
1166
|
+
content: [{ type: "text", text: stringifyForText(parsed, trimmed || "OK") }],
|
|
1167
|
+
structuredContent: {
|
|
1168
|
+
ok: true,
|
|
1169
|
+
tool: toolName,
|
|
1170
|
+
...parsed !== undefined ? { data: parsed } : {},
|
|
1171
|
+
...trimmed ? { rawText: trimmed } : {}
|
|
1172
|
+
}
|
|
1173
|
+
};
|
|
1174
|
+
}
|
|
1175
|
+
function buildToolError(toolName, errorText, stdout = "") {
|
|
1176
|
+
const parsed = parseCliJson(stdout);
|
|
1177
|
+
const trimmed = stdout.trim();
|
|
1178
|
+
return {
|
|
1179
|
+
content: [{ type: "text", text: `Error: ${errorText}` }],
|
|
1180
|
+
structuredContent: {
|
|
1181
|
+
ok: false,
|
|
1182
|
+
tool: toolName,
|
|
1183
|
+
error: errorText,
|
|
1184
|
+
...parsed !== undefined ? { data: parsed } : {},
|
|
1185
|
+
...trimmed ? { rawText: trimmed } : {}
|
|
1186
|
+
},
|
|
1187
|
+
isError: true
|
|
1188
|
+
};
|
|
1189
|
+
}
|
|
1081
1190
|
async function startMcpServer(unbrowseBin) {
|
|
1082
1191
|
const timeoutMs = Number(process.env.UNBROWSE_TIMEOUT_MS) || 120000;
|
|
1083
1192
|
let buffer = "";
|
|
@@ -1144,20 +1253,12 @@ async function handleMessage(msg, unbrowseBin, timeoutMs) {
|
|
|
1144
1253
|
const payloadError = cliErrorText(result.stdout);
|
|
1145
1254
|
if (!result.ok || payloadError) {
|
|
1146
1255
|
const errorText = payloadError || result.stderr?.trim() || result.stdout?.trim() || "Command failed";
|
|
1147
|
-
process.stdout.write(jsonRpcResponse(id,
|
|
1148
|
-
content: [{ type: "text", text: `Error: ${errorText}` }],
|
|
1149
|
-
isError: true
|
|
1150
|
-
}));
|
|
1256
|
+
process.stdout.write(jsonRpcResponse(id, buildToolError(toolName, errorText, result.stdout)));
|
|
1151
1257
|
} else {
|
|
1152
|
-
process.stdout.write(jsonRpcResponse(id,
|
|
1153
|
-
content: [{ type: "text", text: result.stdout.trim() || "OK" }]
|
|
1154
|
-
}));
|
|
1258
|
+
process.stdout.write(jsonRpcResponse(id, buildToolSuccess(toolName, result.stdout)));
|
|
1155
1259
|
}
|
|
1156
1260
|
} catch (err) {
|
|
1157
|
-
process.stdout.write(jsonRpcResponse(id,
|
|
1158
|
-
content: [{ type: "text", text: `Error: ${err instanceof Error ? err.message : String(err)}` }],
|
|
1159
|
-
isError: true
|
|
1160
|
-
}));
|
|
1261
|
+
process.stdout.write(jsonRpcResponse(id, buildToolError(toolName, err instanceof Error ? err.message : String(err))));
|
|
1161
1262
|
}
|
|
1162
1263
|
break;
|
|
1163
1264
|
}
|