@browserbasehq/orca 3.0.0-preview.4 → 3.0.0-preview.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.js +106 -28
- package/package.json +13 -14
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Browserbase Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/dist/index.js
CHANGED
|
@@ -1642,13 +1642,10 @@ function decorateRoles(nodes, opts) {
|
|
|
1642
1642
|
}
|
|
1643
1643
|
function buildHierarchicalTree(nodes, opts) {
|
|
1644
1644
|
return __async(this, null, function* () {
|
|
1645
|
-
var _a2
|
|
1645
|
+
var _a2;
|
|
1646
1646
|
const nodeMap = /* @__PURE__ */ new Map();
|
|
1647
1647
|
for (const n of nodes) {
|
|
1648
|
-
const
|
|
1649
|
-
const hasDescription = Boolean((_b = n.description) == null ? void 0 : _b.trim());
|
|
1650
|
-
const hasChildren = !!(n.childIds && n.childIds.length);
|
|
1651
|
-
const keep = hasName || hasDescription || hasChildren || !isStructural(n.role);
|
|
1648
|
+
const keep = !!(n.name && n.name.trim()) || !!(n.childIds && n.childIds.length) || !isStructural(n.role);
|
|
1652
1649
|
if (!keep) continue;
|
|
1653
1650
|
nodeMap.set(n.nodeId, __spreadValues({}, n));
|
|
1654
1651
|
}
|
|
@@ -1656,7 +1653,7 @@ function buildHierarchicalTree(nodes, opts) {
|
|
|
1656
1653
|
if (!n.parentId) continue;
|
|
1657
1654
|
const parent = nodeMap.get(n.parentId);
|
|
1658
1655
|
const cur = nodeMap.get(n.nodeId);
|
|
1659
|
-
if (parent && cur) ((
|
|
1656
|
+
if (parent && cur) ((_a2 = parent.children) != null ? _a2 : parent.children = []).push(cur);
|
|
1660
1657
|
}
|
|
1661
1658
|
const roots = nodes.filter((n) => !n.parentId && nodeMap.has(n.nodeId)).map((n) => nodeMap.get(n.nodeId));
|
|
1662
1659
|
const cleaned = (yield Promise.all(roots.map(pruneStructuralSafe))).filter(
|
|
@@ -1665,17 +1662,15 @@ function buildHierarchicalTree(nodes, opts) {
|
|
|
1665
1662
|
return { tree: cleaned };
|
|
1666
1663
|
function pruneStructuralSafe(node) {
|
|
1667
1664
|
return __async(this, null, function* () {
|
|
1668
|
-
var _a3
|
|
1665
|
+
var _a3;
|
|
1669
1666
|
if (+node.nodeId < 0) return null;
|
|
1670
|
-
const
|
|
1671
|
-
const structuralRole = isStructural(node.role) && !hasDescription;
|
|
1672
|
-
const children = (_b2 = node.children) != null ? _b2 : [];
|
|
1667
|
+
const children = (_a3 = node.children) != null ? _a3 : [];
|
|
1673
1668
|
if (!children.length) {
|
|
1674
|
-
return
|
|
1669
|
+
return isStructural(node.role) ? null : node;
|
|
1675
1670
|
}
|
|
1676
1671
|
const cleanedKids = (yield Promise.all(children.map(pruneStructuralSafe))).filter(Boolean);
|
|
1677
1672
|
const prunedStatic = removeRedundantStaticTextChildren(node, cleanedKids);
|
|
1678
|
-
if (
|
|
1673
|
+
if (isStructural(node.role)) {
|
|
1679
1674
|
if (prunedStatic.length === 1) return prunedStatic[0];
|
|
1680
1675
|
if (prunedStatic.length === 0) return null;
|
|
1681
1676
|
}
|
|
@@ -6501,7 +6496,7 @@ var import_path5 = __toESM(require("path"));
|
|
|
6501
6496
|
var import_process2 = __toESM(require("process"));
|
|
6502
6497
|
|
|
6503
6498
|
// lib/version.ts
|
|
6504
|
-
var STAGEHAND_VERSION = "3.0.0-preview.
|
|
6499
|
+
var STAGEHAND_VERSION = "3.0.0-preview.5";
|
|
6505
6500
|
|
|
6506
6501
|
// lib/v3/types/public/sdkErrors.ts
|
|
6507
6502
|
var StagehandError = class extends Error {
|
|
@@ -7896,7 +7891,7 @@ Return the element that matches the instruction if it exists. Otherwise, return
|
|
|
7896
7891
|
}
|
|
7897
7892
|
function buildActPrompt(action, supportedActions, variables) {
|
|
7898
7893
|
let instruction = `Find the most relevant element to perform an action on given the following action: ${action}.
|
|
7899
|
-
|
|
7894
|
+
IF AND ONLY IF the action EXPLICITLY includes the word 'dropdown' and implies choosing/selecting an option from a dropdown, ignore the 'General Instructions' section, and follow the 'Dropdown Specific Instructions' section carefully.
|
|
7900
7895
|
|
|
7901
7896
|
General Instructions:
|
|
7902
7897
|
Provide an action for this element such as ${supportedActions.join(", ")}. Remember that to users, buttons and links look the same in most cases.
|
|
@@ -8305,9 +8300,7 @@ function act(_0) {
|
|
|
8305
8300
|
"the arguments to pass to the method. For example, for a click, the arguments are empty, but for a fill, the arguments are the value to fill in."
|
|
8306
8301
|
)
|
|
8307
8302
|
),
|
|
8308
|
-
twoStep: import_v32.z.boolean()
|
|
8309
|
-
"true if we will need to take another action after this. false otherwise"
|
|
8310
|
-
)
|
|
8303
|
+
twoStep: import_v32.z.boolean()
|
|
8311
8304
|
});
|
|
8312
8305
|
const messages = [
|
|
8313
8306
|
buildActSystemPrompt(userProvidedInstructions),
|
|
@@ -8437,7 +8430,6 @@ function normalizeRootXPath(input) {
|
|
|
8437
8430
|
function performUnderstudyMethod(page, frame, method, rawXPath, args, domSettleTimeoutMs) {
|
|
8438
8431
|
return __async(this, null, function* () {
|
|
8439
8432
|
var _a2;
|
|
8440
|
-
yield waitForDomNetworkQuiet(frame, domSettleTimeoutMs);
|
|
8441
8433
|
const selectorRaw = normalizeRootXPath(rawXPath);
|
|
8442
8434
|
const locator = yield resolveLocatorWithHops(
|
|
8443
8435
|
page,
|
|
@@ -9056,6 +9048,7 @@ var ActHandler = class {
|
|
|
9056
9048
|
const llmClient = this.resolveLlmClient(model);
|
|
9057
9049
|
const doObserveAndAct = () => __async(this, null, function* () {
|
|
9058
9050
|
var _a2, _b, _c, _d, _e, _f, _g, _h, _i, _j;
|
|
9051
|
+
yield waitForDomNetworkQuiet(page.mainFrame(), this.defaultDomSettleTimeoutMs);
|
|
9059
9052
|
const snapshot = yield captureHybridSnapshot(page, {
|
|
9060
9053
|
experimental: true
|
|
9061
9054
|
});
|
|
@@ -9689,7 +9682,8 @@ var createActTool = (v3, executionModel) => (0, import_ai2.tool)({
|
|
|
9689
9682
|
});
|
|
9690
9683
|
return {
|
|
9691
9684
|
success: (_b = result.success) != null ? _b : true,
|
|
9692
|
-
action: (_c = result == null ? void 0 : result.actionDescription) != null ? _c : action
|
|
9685
|
+
action: (_c = result == null ? void 0 : result.actionDescription) != null ? _c : action,
|
|
9686
|
+
playwrightArguments: actions.length > 0 ? actions[0] : void 0
|
|
9693
9687
|
};
|
|
9694
9688
|
} catch (error) {
|
|
9695
9689
|
return { success: false, error: (_d = error == null ? void 0 : error.message) != null ? _d : String(error) };
|
|
@@ -9867,7 +9861,11 @@ For any form with 2+ inputs/textareas. Faster than individual typing.`,
|
|
|
9867
9861
|
observeResults,
|
|
9868
9862
|
actions: replayableActions
|
|
9869
9863
|
});
|
|
9870
|
-
return {
|
|
9864
|
+
return {
|
|
9865
|
+
success: true,
|
|
9866
|
+
actions: completed,
|
|
9867
|
+
playwrightArguments: replayableActions
|
|
9868
|
+
};
|
|
9871
9869
|
})
|
|
9872
9870
|
});
|
|
9873
9871
|
|
|
@@ -10088,6 +10086,69 @@ function calculateCompressionStats(originalSize, compressedSize, screenshotCount
|
|
|
10088
10086
|
};
|
|
10089
10087
|
}
|
|
10090
10088
|
|
|
10089
|
+
// lib/v3/agent/utils/actionMapping.ts
|
|
10090
|
+
function mapToolResultToActions({
|
|
10091
|
+
toolCallName,
|
|
10092
|
+
toolResult,
|
|
10093
|
+
args,
|
|
10094
|
+
reasoning
|
|
10095
|
+
}) {
|
|
10096
|
+
switch (toolCallName) {
|
|
10097
|
+
case "act":
|
|
10098
|
+
return mapActToolResult(toolResult, args, reasoning);
|
|
10099
|
+
case "fillForm":
|
|
10100
|
+
return mapFillFormToolResult(toolResult, args, reasoning);
|
|
10101
|
+
default:
|
|
10102
|
+
return [createStandardAction(toolCallName, args, reasoning)];
|
|
10103
|
+
}
|
|
10104
|
+
}
|
|
10105
|
+
function mapActToolResult(toolResult, args, reasoning) {
|
|
10106
|
+
if (!toolResult || typeof toolResult !== "object") {
|
|
10107
|
+
return [createStandardAction("act", args, reasoning)];
|
|
10108
|
+
}
|
|
10109
|
+
const result = toolResult;
|
|
10110
|
+
const output = result.output || result;
|
|
10111
|
+
const action = __spreadValues({
|
|
10112
|
+
type: "act",
|
|
10113
|
+
reasoning,
|
|
10114
|
+
taskCompleted: false
|
|
10115
|
+
}, args);
|
|
10116
|
+
if (output.playwrightArguments) {
|
|
10117
|
+
action.playwrightArguments = output.playwrightArguments;
|
|
10118
|
+
}
|
|
10119
|
+
return [action];
|
|
10120
|
+
}
|
|
10121
|
+
function mapFillFormToolResult(toolResult, args, reasoning) {
|
|
10122
|
+
if (!toolResult || typeof toolResult !== "object") {
|
|
10123
|
+
return [createStandardAction("fillForm", args, reasoning)];
|
|
10124
|
+
}
|
|
10125
|
+
const result = toolResult;
|
|
10126
|
+
const output = result.output || result;
|
|
10127
|
+
const observeResults = Array.isArray(output == null ? void 0 : output.playwrightArguments) ? output.playwrightArguments : [];
|
|
10128
|
+
const actions = [];
|
|
10129
|
+
actions.push(__spreadValues({
|
|
10130
|
+
type: "fillForm",
|
|
10131
|
+
reasoning,
|
|
10132
|
+
taskCompleted: false
|
|
10133
|
+
}, args));
|
|
10134
|
+
for (const observeResult of observeResults) {
|
|
10135
|
+
actions.push({
|
|
10136
|
+
type: "act",
|
|
10137
|
+
reasoning: "acting from fillform tool",
|
|
10138
|
+
taskCompleted: false,
|
|
10139
|
+
playwrightArguments: observeResult
|
|
10140
|
+
});
|
|
10141
|
+
}
|
|
10142
|
+
return actions;
|
|
10143
|
+
}
|
|
10144
|
+
function createStandardAction(toolCallName, args, reasoning) {
|
|
10145
|
+
return __spreadValues({
|
|
10146
|
+
type: toolCallName,
|
|
10147
|
+
reasoning,
|
|
10148
|
+
taskCompleted: toolCallName === "close" ? args == null ? void 0 : args.taskComplete : false
|
|
10149
|
+
}, args);
|
|
10150
|
+
}
|
|
10151
|
+
|
|
10091
10152
|
// lib/v3/handlers/v3AgentHandler.ts
|
|
10092
10153
|
var V3AgentHandler = class {
|
|
10093
10154
|
constructor(v3, logger, llmClient, executionModel, systemInstructions, mcpTools) {
|
|
@@ -10108,6 +10169,7 @@ var V3AgentHandler = class {
|
|
|
10108
10169
|
let finalMessage = "";
|
|
10109
10170
|
let completed = false;
|
|
10110
10171
|
const collectedReasoning = [];
|
|
10172
|
+
let currentPageUrl = (yield this.v3.context.awaitActivePage()).url();
|
|
10111
10173
|
try {
|
|
10112
10174
|
const systemPrompt = this.buildSystemPrompt(
|
|
10113
10175
|
options.instruction,
|
|
@@ -10142,14 +10204,17 @@ var V3AgentHandler = class {
|
|
|
10142
10204
|
temperature: 1,
|
|
10143
10205
|
toolChoice: "auto",
|
|
10144
10206
|
onStepFinish: (event) => __async(this, null, function* () {
|
|
10207
|
+
var _a3;
|
|
10145
10208
|
this.logger({
|
|
10146
10209
|
category: "agent",
|
|
10147
10210
|
message: `Step finished: ${event.finishReason}`,
|
|
10148
10211
|
level: 2
|
|
10149
10212
|
});
|
|
10150
10213
|
if (event.toolCalls && event.toolCalls.length > 0) {
|
|
10151
|
-
for (
|
|
10214
|
+
for (let i = 0; i < event.toolCalls.length; i++) {
|
|
10215
|
+
const toolCall = event.toolCalls[i];
|
|
10152
10216
|
const args = toolCall.input;
|
|
10217
|
+
const toolResult = (_a3 = event.toolResults) == null ? void 0 : _a3[i];
|
|
10153
10218
|
if (event.text.length > 0) {
|
|
10154
10219
|
collectedReasoning.push(event.text);
|
|
10155
10220
|
this.logger({
|
|
@@ -10166,13 +10231,19 @@ var V3AgentHandler = class {
|
|
|
10166
10231
|
finalMessage = closeReasoning ? `${allReasoning} ${closeReasoning}`.trim() : allReasoning || "Task completed successfully";
|
|
10167
10232
|
}
|
|
10168
10233
|
}
|
|
10169
|
-
const
|
|
10170
|
-
|
|
10171
|
-
|
|
10172
|
-
|
|
10173
|
-
|
|
10174
|
-
|
|
10234
|
+
const mappedActions = mapToolResultToActions({
|
|
10235
|
+
toolCallName: toolCall.toolName,
|
|
10236
|
+
toolResult,
|
|
10237
|
+
args,
|
|
10238
|
+
reasoning: event.text || void 0
|
|
10239
|
+
});
|
|
10240
|
+
for (const action of mappedActions) {
|
|
10241
|
+
action.pageUrl = currentPageUrl;
|
|
10242
|
+
action.timestamp = Date.now();
|
|
10243
|
+
actions.push(action);
|
|
10244
|
+
}
|
|
10175
10245
|
}
|
|
10246
|
+
currentPageUrl = (yield this.v3.context.awaitActivePage()).url();
|
|
10176
10247
|
}
|
|
10177
10248
|
})
|
|
10178
10249
|
});
|
|
@@ -11856,6 +11927,7 @@ var GoogleCUAClient = class extends AgentClient {
|
|
|
11856
11927
|
level: 2
|
|
11857
11928
|
});
|
|
11858
11929
|
if (action.type === "function" && action.name === "open_web_browser") {
|
|
11930
|
+
action.pageUrl = this.currentUrl;
|
|
11859
11931
|
logger({
|
|
11860
11932
|
category: "agent",
|
|
11861
11933
|
message: "Skipping open_web_browser action",
|
|
@@ -12063,7 +12135,8 @@ var GoogleCUAClient = class extends AgentClient {
|
|
|
12063
12135
|
return {
|
|
12064
12136
|
type: "function",
|
|
12065
12137
|
name: "open_web_browser",
|
|
12066
|
-
arguments: null
|
|
12138
|
+
arguments: null,
|
|
12139
|
+
timestamp: Date.now()
|
|
12067
12140
|
};
|
|
12068
12141
|
case "click_at": {
|
|
12069
12142
|
const { x, y } = this.normalizeCoordinates(
|
|
@@ -12211,6 +12284,9 @@ var GoogleCUAClient = class extends AgentClient {
|
|
|
12211
12284
|
}
|
|
12212
12285
|
captureScreenshot(options) {
|
|
12213
12286
|
return __async(this, null, function* () {
|
|
12287
|
+
if (options == null ? void 0 : options.currentUrl) {
|
|
12288
|
+
this.currentUrl = options.currentUrl;
|
|
12289
|
+
}
|
|
12214
12290
|
if (options == null ? void 0 : options.base64Image) {
|
|
12215
12291
|
return `data:image/png;base64,${options.base64Image}`;
|
|
12216
12292
|
}
|
|
@@ -12332,6 +12408,7 @@ var V3CuaAgentHandler = class {
|
|
|
12332
12408
|
}));
|
|
12333
12409
|
this.agentClient.setActionHandler((action) => __async(this, null, function* () {
|
|
12334
12410
|
var _a2, _b, _c;
|
|
12411
|
+
action.pageUrl = (yield this.v3.context.awaitActivePage()).url();
|
|
12335
12412
|
const defaultDelay = 1e3;
|
|
12336
12413
|
const waitBetween = ((_a2 = this.options.clientOptions) == null ? void 0 : _a2.waitBetweenActions) || defaultDelay;
|
|
12337
12414
|
try {
|
|
@@ -12343,6 +12420,7 @@ var V3CuaAgentHandler = class {
|
|
|
12343
12420
|
}
|
|
12344
12421
|
yield new Promise((r) => setTimeout(r, 300));
|
|
12345
12422
|
yield this.executeAction(action);
|
|
12423
|
+
action.timestamp = Date.now();
|
|
12346
12424
|
yield new Promise((r) => setTimeout(r, waitBetween));
|
|
12347
12425
|
try {
|
|
12348
12426
|
yield this.captureAndSendScreenshot();
|
package/package.json
CHANGED
|
@@ -1,21 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@browserbasehq/orca",
|
|
3
|
-
"version": "3.0.0-preview.
|
|
3
|
+
"version": "3.0.0-preview.5",
|
|
4
4
|
"description": "An AI web browsing framework focused on simplicity and extensibility.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.js",
|
|
7
7
|
"types": "./dist/index.d.ts",
|
|
8
|
-
"scripts": {
|
|
9
|
-
"gen-version": "tsx scripts/gen-version.ts",
|
|
10
|
-
"build-dom-scripts": "tsx lib/v3/dom/genDomScripts.ts && tsx lib/v3/dom/genLocatorScripts.ts",
|
|
11
|
-
"build-js": "tsup --entry.index lib/v3/index.ts --dts",
|
|
12
|
-
"typecheck": "tsc --noEmit",
|
|
13
|
-
"prepare": "pnpm run build",
|
|
14
|
-
"build": "pnpm run gen-version && pnpm run build-dom-scripts && pnpm run build-js && pnpm run typecheck",
|
|
15
|
-
"example": "node --import tsx -e \"const args=process.argv.slice(1).filter(a=>a!=='--'); const [p]=args; const n=(p||'example').replace(/^\\.\\//,'').replace(/\\.ts$/i,''); import(new URL(require('node:path').resolve('examples', n + '.ts'), 'file:'));\" --",
|
|
16
|
-
"lint": "cd ../.. && prettier --check packages/core && cd packages/core && eslint .",
|
|
17
|
-
"format": "prettier --write ."
|
|
18
|
-
},
|
|
19
8
|
"files": [
|
|
20
9
|
"dist/index.js",
|
|
21
10
|
"dist/index.d.ts",
|
|
@@ -89,5 +78,15 @@
|
|
|
89
78
|
"bugs": {
|
|
90
79
|
"url": "https://github.com/browserbase/stagehand/issues"
|
|
91
80
|
},
|
|
92
|
-
"homepage": "https://stagehand.dev"
|
|
93
|
-
|
|
81
|
+
"homepage": "https://stagehand.dev",
|
|
82
|
+
"scripts": {
|
|
83
|
+
"gen-version": "tsx scripts/gen-version.ts",
|
|
84
|
+
"build-dom-scripts": "tsx lib/v3/dom/genDomScripts.ts && tsx lib/v3/dom/genLocatorScripts.ts",
|
|
85
|
+
"build-js": "tsup --entry.index lib/v3/index.ts --dts",
|
|
86
|
+
"typecheck": "tsc --noEmit",
|
|
87
|
+
"build": "pnpm run gen-version && pnpm run build-dom-scripts && pnpm run build-js && pnpm run typecheck",
|
|
88
|
+
"example": "node --import tsx -e \"const args=process.argv.slice(1).filter(a=>a!=='--'); const [p]=args; const n=(p||'example').replace(/^\\.\\//,'').replace(/\\.ts$/i,''); import(new URL(require('node:path').resolve('examples', n + '.ts'), 'file:'));\" --",
|
|
89
|
+
"lint": "cd ../.. && prettier --check packages/core && cd packages/core && eslint .",
|
|
90
|
+
"format": "prettier --write ."
|
|
91
|
+
}
|
|
92
|
+
}
|