@browserbasehq/stagehand 3.0.7-alpha-ddd7464487a0cd0615d4f1ba458bc6f3fa51b27d → 3.0.7-alpha-bd2db925f66a826d61d58be1611d55646cbdb560
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +75 -14
- package/package.json +1 -2
package/dist/index.js
CHANGED
|
@@ -179,7 +179,7 @@ var __forAwait = (obj, it, method) => (it = obj[__knownSymbol("asyncIterator")])
|
|
|
179
179
|
var STAGEHAND_VERSION;
|
|
180
180
|
var init_version = __esm({
|
|
181
181
|
"lib/version.ts"() {
|
|
182
|
-
STAGEHAND_VERSION = "3.0.7-alpha-
|
|
182
|
+
STAGEHAND_VERSION = "3.0.7-alpha-bd2db925f66a826d61d58be1611d55646cbdb560";
|
|
183
183
|
}
|
|
184
184
|
});
|
|
185
185
|
|
|
@@ -27179,7 +27179,6 @@ init_sdkErrors();
|
|
|
27179
27179
|
var import_genai = require("@google/genai");
|
|
27180
27180
|
var import_zod2 = require("zod");
|
|
27181
27181
|
var import_v3 = __toESM(require("zod/v3"));
|
|
27182
|
-
var import_sharp = __toESM(require("sharp"));
|
|
27183
27182
|
var ID_PATTERN = /^\d+-\d+$/;
|
|
27184
27183
|
var zFactories = {
|
|
27185
27184
|
v4: import_zod2.z,
|
|
@@ -33509,6 +33508,7 @@ var OpenAICUAClient = class extends AgentClient {
|
|
|
33509
33508
|
constructor(type, modelName, userProvidedInstructions, clientOptions, tools) {
|
|
33510
33509
|
super(type, modelName, userProvidedInstructions);
|
|
33511
33510
|
this.currentViewport = { width: 1288, height: 711 };
|
|
33511
|
+
this.actualScreenshotSize = { width: 1288, height: 711 };
|
|
33512
33512
|
this.reasoningItems = /* @__PURE__ */ new Map();
|
|
33513
33513
|
this.environment = "browser";
|
|
33514
33514
|
this.apiKey = (clientOptions == null ? void 0 : clientOptions.apiKey) || process.env.OPENAI_API_KEY || "";
|
|
@@ -33529,6 +33529,9 @@ var OpenAICUAClient = class extends AgentClient {
|
|
|
33529
33529
|
setViewport(width, height) {
|
|
33530
33530
|
this.currentViewport = { width, height };
|
|
33531
33531
|
}
|
|
33532
|
+
setScreenshotSize(width, height) {
|
|
33533
|
+
this.actualScreenshotSize = { width, height };
|
|
33534
|
+
}
|
|
33532
33535
|
setCurrentUrl(url) {
|
|
33533
33536
|
this.currentUrl = url;
|
|
33534
33537
|
}
|
|
@@ -33963,9 +33966,16 @@ var OpenAICUAClient = class extends AgentClient {
|
|
|
33963
33966
|
}
|
|
33964
33967
|
convertComputerCallToAction(call) {
|
|
33965
33968
|
const { action } = call;
|
|
33969
|
+
const scaledAction = __spreadValues({}, action);
|
|
33970
|
+
if (action.x !== void 0 && action.y !== void 0) {
|
|
33971
|
+
const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
|
|
33972
|
+
const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
|
|
33973
|
+
scaledAction.x = Math.floor(action.x * scaleX);
|
|
33974
|
+
scaledAction.y = Math.floor(action.y * scaleY);
|
|
33975
|
+
}
|
|
33966
33976
|
return __spreadValues({
|
|
33967
33977
|
type: action.type
|
|
33968
|
-
},
|
|
33978
|
+
}, scaledAction);
|
|
33969
33979
|
}
|
|
33970
33980
|
convertFunctionCallToAction(call) {
|
|
33971
33981
|
try {
|
|
@@ -34184,6 +34194,7 @@ var GoogleCUAClient = class extends AgentClient {
|
|
|
34184
34194
|
constructor(type, modelName, userProvidedInstructions, clientOptions, tools) {
|
|
34185
34195
|
super(type, modelName, userProvidedInstructions);
|
|
34186
34196
|
this.currentViewport = { width: 1288, height: 711 };
|
|
34197
|
+
this.actualScreenshotSize = { width: 1288, height: 711 };
|
|
34187
34198
|
this.history = [];
|
|
34188
34199
|
this.environment = "ENVIRONMENT_BROWSER";
|
|
34189
34200
|
this.tools = tools;
|
|
@@ -34222,6 +34233,9 @@ var GoogleCUAClient = class extends AgentClient {
|
|
|
34222
34233
|
setViewport(width, height) {
|
|
34223
34234
|
this.currentViewport = { width, height };
|
|
34224
34235
|
}
|
|
34236
|
+
setScreenshotSize(width, height) {
|
|
34237
|
+
this.actualScreenshotSize = { width, height };
|
|
34238
|
+
}
|
|
34225
34239
|
setCurrentUrl(url) {
|
|
34226
34240
|
this.currentUrl = url;
|
|
34227
34241
|
}
|
|
@@ -34837,9 +34851,13 @@ var GoogleCUAClient = class extends AgentClient {
|
|
|
34837
34851
|
normalizeCoordinates(x2, y) {
|
|
34838
34852
|
x2 = Math.min(999, Math.max(0, x2));
|
|
34839
34853
|
y = Math.min(999, Math.max(0, y));
|
|
34854
|
+
const screenshotX = x2 / 1e3 * this.actualScreenshotSize.width;
|
|
34855
|
+
const screenshotY = y / 1e3 * this.actualScreenshotSize.height;
|
|
34856
|
+
const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
|
|
34857
|
+
const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
|
|
34840
34858
|
return {
|
|
34841
|
-
x: Math.floor(
|
|
34842
|
-
y: Math.floor(
|
|
34859
|
+
x: Math.floor(screenshotX * scaleX),
|
|
34860
|
+
y: Math.floor(screenshotY * scaleY)
|
|
34843
34861
|
};
|
|
34844
34862
|
}
|
|
34845
34863
|
captureScreenshot(options) {
|
|
@@ -34890,6 +34908,8 @@ var MicrosoftCUAClient = class extends AgentClient {
|
|
|
34890
34908
|
};
|
|
34891
34909
|
// Resized dimensions for model input
|
|
34892
34910
|
this.resizedViewport = { width: 1288, height: 711 };
|
|
34911
|
+
// Actual screenshot dimensions (tracked separately from viewport)
|
|
34912
|
+
this.actualScreenshotSize = { width: 1288, height: 711 };
|
|
34893
34913
|
this.apiKey = (clientOptions == null ? void 0 : clientOptions.apiKey) || process.env.AZURE_API_KEY || process.env.FIREWORKS_API_KEY || "";
|
|
34894
34914
|
this.baseURL = (clientOptions == null ? void 0 : clientOptions.baseURL) || process.env.AZURE_ENDPOINT || process.env.FIREWORKS_ENDPOINT || "";
|
|
34895
34915
|
this.clientOptions = {
|
|
@@ -34916,6 +34936,9 @@ var MicrosoftCUAClient = class extends AgentClient {
|
|
|
34916
34936
|
this.currentViewport = { width, height };
|
|
34917
34937
|
this.resizedViewport = this.smartResize(width, height);
|
|
34918
34938
|
}
|
|
34939
|
+
setScreenshotSize(width, height) {
|
|
34940
|
+
this.actualScreenshotSize = { width, height };
|
|
34941
|
+
}
|
|
34919
34942
|
setCurrentUrl(url) {
|
|
34920
34943
|
this.currentUrl = url;
|
|
34921
34944
|
}
|
|
@@ -34953,7 +34976,7 @@ var MicrosoftCUAClient = class extends AgentClient {
|
|
|
34953
34976
|
* Simplified to match Python's minimal approach
|
|
34954
34977
|
*/
|
|
34955
34978
|
generateSystemPrompt() {
|
|
34956
|
-
const { width, height } = this.
|
|
34979
|
+
const { width, height } = this.actualScreenshotSize;
|
|
34957
34980
|
let basePrompt = "You are a helpful assistant.";
|
|
34958
34981
|
if (this.userProvidedInstructions) {
|
|
34959
34982
|
basePrompt = `${basePrompt}
|
|
@@ -35114,8 +35137,8 @@ ${functionCallTemplate}`;
|
|
|
35114
35137
|
const transformCoordinate = (coord) => {
|
|
35115
35138
|
if (!coord || coord.length !== 2) return coord;
|
|
35116
35139
|
const [x2, y] = coord;
|
|
35117
|
-
const scaleX = this.currentViewport.width / this.
|
|
35118
|
-
const scaleY = this.currentViewport.height / this.
|
|
35140
|
+
const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
|
|
35141
|
+
const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
|
|
35119
35142
|
return [Math.round(x2 * scaleX), Math.round(y * scaleY)];
|
|
35120
35143
|
};
|
|
35121
35144
|
const baseAction = {
|
|
@@ -35624,6 +35647,14 @@ var AgentProvider = class _AgentProvider {
|
|
|
35624
35647
|
// lib/v3/handlers/v3CuaAgentHandler.ts
|
|
35625
35648
|
init_flowLogger();
|
|
35626
35649
|
init_sdkErrors();
|
|
35650
|
+
function getPNGDimensions(buffer) {
|
|
35651
|
+
if (buffer.length < 24 || buffer[0] !== 137 || buffer[1] !== 80 || buffer[2] !== 78 || buffer[3] !== 71) {
|
|
35652
|
+
throw new Error("Invalid PNG file");
|
|
35653
|
+
}
|
|
35654
|
+
const width = buffer.readUInt32BE(16);
|
|
35655
|
+
const height = buffer.readUInt32BE(20);
|
|
35656
|
+
return { width, height };
|
|
35657
|
+
}
|
|
35627
35658
|
var V3CuaAgentHandler = class {
|
|
35628
35659
|
constructor(v3, logger, options, tools) {
|
|
35629
35660
|
this.v3 = v3;
|
|
@@ -35653,8 +35684,23 @@ var V3CuaAgentHandler = class {
|
|
|
35653
35684
|
this.agentClient.setScreenshotProvider(() => __async(this, null, function* () {
|
|
35654
35685
|
this.ensureNotClosed();
|
|
35655
35686
|
const page = yield this.v3.context.awaitActivePage();
|
|
35656
|
-
const
|
|
35657
|
-
|
|
35687
|
+
const screenshotBuffer = yield page.screenshot({ fullPage: false });
|
|
35688
|
+
if (this.agentClient instanceof GoogleCUAClient || this.agentClient instanceof OpenAICUAClient || this.agentClient instanceof MicrosoftCUAClient) {
|
|
35689
|
+
try {
|
|
35690
|
+
const dimensions = getPNGDimensions(screenshotBuffer);
|
|
35691
|
+
this.agentClient.setScreenshotSize(
|
|
35692
|
+
dimensions.width,
|
|
35693
|
+
dimensions.height
|
|
35694
|
+
);
|
|
35695
|
+
} catch (e2) {
|
|
35696
|
+
this.logger({
|
|
35697
|
+
category: "agent",
|
|
35698
|
+
message: `Could not read screenshot dimensions: ${e2}`,
|
|
35699
|
+
level: 1
|
|
35700
|
+
});
|
|
35701
|
+
}
|
|
35702
|
+
}
|
|
35703
|
+
return screenshotBuffer.toString("base64");
|
|
35658
35704
|
}));
|
|
35659
35705
|
this.agentClient.setActionHandler((action) => __async(this, null, function* () {
|
|
35660
35706
|
var _a4, _b, _c;
|
|
@@ -36078,11 +36124,26 @@ var V3CuaAgentHandler = class {
|
|
|
36078
36124
|
});
|
|
36079
36125
|
try {
|
|
36080
36126
|
const page = yield this.v3.context.awaitActivePage();
|
|
36081
|
-
const
|
|
36082
|
-
this.
|
|
36127
|
+
const screenshotBuffer = yield page.screenshot({ fullPage: false });
|
|
36128
|
+
if (this.agentClient instanceof GoogleCUAClient || this.agentClient instanceof OpenAICUAClient || this.agentClient instanceof MicrosoftCUAClient) {
|
|
36129
|
+
try {
|
|
36130
|
+
const dimensions = getPNGDimensions(screenshotBuffer);
|
|
36131
|
+
this.agentClient.setScreenshotSize(
|
|
36132
|
+
dimensions.width,
|
|
36133
|
+
dimensions.height
|
|
36134
|
+
);
|
|
36135
|
+
} catch (e2) {
|
|
36136
|
+
this.logger({
|
|
36137
|
+
category: "agent",
|
|
36138
|
+
message: `Could not read screenshot dimensions: ${e2}`,
|
|
36139
|
+
level: 1
|
|
36140
|
+
});
|
|
36141
|
+
}
|
|
36142
|
+
}
|
|
36143
|
+
this.v3.bus.emit("agent_screenshot_taken_event", screenshotBuffer);
|
|
36083
36144
|
const currentUrl = page.url();
|
|
36084
36145
|
return yield this.agentClient.captureScreenshot({
|
|
36085
|
-
base64Image,
|
|
36146
|
+
base64Image: screenshotBuffer.toString("base64"),
|
|
36086
36147
|
currentUrl
|
|
36087
36148
|
});
|
|
36088
36149
|
} catch (e2) {
|
|
@@ -66748,7 +66809,7 @@ var _V3 = class _V3 {
|
|
|
66748
66809
|
}
|
|
66749
66810
|
this.logger({
|
|
66750
66811
|
category: "agent",
|
|
66751
|
-
message:
|
|
66812
|
+
message: "Creating v3 agent instance",
|
|
66752
66813
|
level: 1,
|
|
66753
66814
|
auxiliary: __spreadValues({
|
|
66754
66815
|
cua: { value: isCuaMode ? "true" : "false", type: "boolean" },
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@browserbasehq/stagehand",
|
|
3
|
-
"version": "3.0.7-alpha-
|
|
3
|
+
"version": "3.0.7-alpha-bd2db925f66a826d61d58be1611d55646cbdb560",
|
|
4
4
|
"description": "An AI web browsing framework focused on simplicity and extensibility.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.js",
|
|
@@ -41,7 +41,6 @@
|
|
|
41
41
|
"pino": "^9.6.0",
|
|
42
42
|
"pino-pretty": "^13.0.0",
|
|
43
43
|
"playwright": "^1.52.0",
|
|
44
|
-
"sharp": "^0.34.5",
|
|
45
44
|
"ws": "^8.18.0",
|
|
46
45
|
"zod-to-json-schema": "^3.25.0"
|
|
47
46
|
},
|