@browserbasehq/stagehand 3.0.7-alpha-ddd7464487a0cd0615d4f1ba458bc6f3fa51b27d → 3.0.7-alpha-bd2db925f66a826d61d58be1611d55646cbdb560

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +75 -14
  2. package/package.json +1 -2
package/dist/index.js CHANGED
@@ -179,7 +179,7 @@ var __forAwait = (obj, it, method) => (it = obj[__knownSymbol("asyncIterator")])
179
179
  var STAGEHAND_VERSION;
180
180
  var init_version = __esm({
181
181
  "lib/version.ts"() {
182
- STAGEHAND_VERSION = "3.0.7-alpha-ddd7464487a0cd0615d4f1ba458bc6f3fa51b27d";
182
+ STAGEHAND_VERSION = "3.0.7-alpha-bd2db925f66a826d61d58be1611d55646cbdb560";
183
183
  }
184
184
  });
185
185
 
@@ -27179,7 +27179,6 @@ init_sdkErrors();
27179
27179
  var import_genai = require("@google/genai");
27180
27180
  var import_zod2 = require("zod");
27181
27181
  var import_v3 = __toESM(require("zod/v3"));
27182
- var import_sharp = __toESM(require("sharp"));
27183
27182
  var ID_PATTERN = /^\d+-\d+$/;
27184
27183
  var zFactories = {
27185
27184
  v4: import_zod2.z,
@@ -33509,6 +33508,7 @@ var OpenAICUAClient = class extends AgentClient {
33509
33508
  constructor(type, modelName, userProvidedInstructions, clientOptions, tools) {
33510
33509
  super(type, modelName, userProvidedInstructions);
33511
33510
  this.currentViewport = { width: 1288, height: 711 };
33511
+ this.actualScreenshotSize = { width: 1288, height: 711 };
33512
33512
  this.reasoningItems = /* @__PURE__ */ new Map();
33513
33513
  this.environment = "browser";
33514
33514
  this.apiKey = (clientOptions == null ? void 0 : clientOptions.apiKey) || process.env.OPENAI_API_KEY || "";
@@ -33529,6 +33529,9 @@ var OpenAICUAClient = class extends AgentClient {
33529
33529
  setViewport(width, height) {
33530
33530
  this.currentViewport = { width, height };
33531
33531
  }
33532
+ setScreenshotSize(width, height) {
33533
+ this.actualScreenshotSize = { width, height };
33534
+ }
33532
33535
  setCurrentUrl(url) {
33533
33536
  this.currentUrl = url;
33534
33537
  }
@@ -33963,9 +33966,16 @@ var OpenAICUAClient = class extends AgentClient {
33963
33966
  }
33964
33967
  convertComputerCallToAction(call) {
33965
33968
  const { action } = call;
33969
+ const scaledAction = __spreadValues({}, action);
33970
+ if (action.x !== void 0 && action.y !== void 0) {
33971
+ const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
33972
+ const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
33973
+ scaledAction.x = Math.floor(action.x * scaleX);
33974
+ scaledAction.y = Math.floor(action.y * scaleY);
33975
+ }
33966
33976
  return __spreadValues({
33967
33977
  type: action.type
33968
- }, action);
33978
+ }, scaledAction);
33969
33979
  }
33970
33980
  convertFunctionCallToAction(call) {
33971
33981
  try {
@@ -34184,6 +34194,7 @@ var GoogleCUAClient = class extends AgentClient {
34184
34194
  constructor(type, modelName, userProvidedInstructions, clientOptions, tools) {
34185
34195
  super(type, modelName, userProvidedInstructions);
34186
34196
  this.currentViewport = { width: 1288, height: 711 };
34197
+ this.actualScreenshotSize = { width: 1288, height: 711 };
34187
34198
  this.history = [];
34188
34199
  this.environment = "ENVIRONMENT_BROWSER";
34189
34200
  this.tools = tools;
@@ -34222,6 +34233,9 @@ var GoogleCUAClient = class extends AgentClient {
34222
34233
  setViewport(width, height) {
34223
34234
  this.currentViewport = { width, height };
34224
34235
  }
34236
+ setScreenshotSize(width, height) {
34237
+ this.actualScreenshotSize = { width, height };
34238
+ }
34225
34239
  setCurrentUrl(url) {
34226
34240
  this.currentUrl = url;
34227
34241
  }
@@ -34837,9 +34851,13 @@ var GoogleCUAClient = class extends AgentClient {
34837
34851
  normalizeCoordinates(x2, y) {
34838
34852
  x2 = Math.min(999, Math.max(0, x2));
34839
34853
  y = Math.min(999, Math.max(0, y));
34854
+ const screenshotX = x2 / 1e3 * this.actualScreenshotSize.width;
34855
+ const screenshotY = y / 1e3 * this.actualScreenshotSize.height;
34856
+ const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
34857
+ const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
34840
34858
  return {
34841
- x: Math.floor(x2 / 1e3 * this.currentViewport.width),
34842
- y: Math.floor(y / 1e3 * this.currentViewport.height)
34859
+ x: Math.floor(screenshotX * scaleX),
34860
+ y: Math.floor(screenshotY * scaleY)
34843
34861
  };
34844
34862
  }
34845
34863
  captureScreenshot(options) {
@@ -34890,6 +34908,8 @@ var MicrosoftCUAClient = class extends AgentClient {
34890
34908
  };
34891
34909
  // Resized dimensions for model input
34892
34910
  this.resizedViewport = { width: 1288, height: 711 };
34911
+ // Actual screenshot dimensions (tracked separately from viewport)
34912
+ this.actualScreenshotSize = { width: 1288, height: 711 };
34893
34913
  this.apiKey = (clientOptions == null ? void 0 : clientOptions.apiKey) || process.env.AZURE_API_KEY || process.env.FIREWORKS_API_KEY || "";
34894
34914
  this.baseURL = (clientOptions == null ? void 0 : clientOptions.baseURL) || process.env.AZURE_ENDPOINT || process.env.FIREWORKS_ENDPOINT || "";
34895
34915
  this.clientOptions = {
@@ -34916,6 +34936,9 @@ var MicrosoftCUAClient = class extends AgentClient {
34916
34936
  this.currentViewport = { width, height };
34917
34937
  this.resizedViewport = this.smartResize(width, height);
34918
34938
  }
34939
+ setScreenshotSize(width, height) {
34940
+ this.actualScreenshotSize = { width, height };
34941
+ }
34919
34942
  setCurrentUrl(url) {
34920
34943
  this.currentUrl = url;
34921
34944
  }
@@ -34953,7 +34976,7 @@ var MicrosoftCUAClient = class extends AgentClient {
34953
34976
  * Simplified to match Python's minimal approach
34954
34977
  */
34955
34978
  generateSystemPrompt() {
34956
- const { width, height } = this.resizedViewport;
34979
+ const { width, height } = this.actualScreenshotSize;
34957
34980
  let basePrompt = "You are a helpful assistant.";
34958
34981
  if (this.userProvidedInstructions) {
34959
34982
  basePrompt = `${basePrompt}
@@ -35114,8 +35137,8 @@ ${functionCallTemplate}`;
35114
35137
  const transformCoordinate = (coord) => {
35115
35138
  if (!coord || coord.length !== 2) return coord;
35116
35139
  const [x2, y] = coord;
35117
- const scaleX = this.currentViewport.width / this.resizedViewport.width;
35118
- const scaleY = this.currentViewport.height / this.resizedViewport.height;
35140
+ const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
35141
+ const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
35119
35142
  return [Math.round(x2 * scaleX), Math.round(y * scaleY)];
35120
35143
  };
35121
35144
  const baseAction = {
@@ -35624,6 +35647,14 @@ var AgentProvider = class _AgentProvider {
35624
35647
  // lib/v3/handlers/v3CuaAgentHandler.ts
35625
35648
  init_flowLogger();
35626
35649
  init_sdkErrors();
35650
+ function getPNGDimensions(buffer) {
35651
+ if (buffer.length < 24 || buffer[0] !== 137 || buffer[1] !== 80 || buffer[2] !== 78 || buffer[3] !== 71) {
35652
+ throw new Error("Invalid PNG file");
35653
+ }
35654
+ const width = buffer.readUInt32BE(16);
35655
+ const height = buffer.readUInt32BE(20);
35656
+ return { width, height };
35657
+ }
35627
35658
  var V3CuaAgentHandler = class {
35628
35659
  constructor(v3, logger, options, tools) {
35629
35660
  this.v3 = v3;
@@ -35653,8 +35684,23 @@ var V3CuaAgentHandler = class {
35653
35684
  this.agentClient.setScreenshotProvider(() => __async(this, null, function* () {
35654
35685
  this.ensureNotClosed();
35655
35686
  const page = yield this.v3.context.awaitActivePage();
35656
- const base64 = yield page.screenshot({ fullPage: false });
35657
- return base64.toString("base64");
35687
+ const screenshotBuffer = yield page.screenshot({ fullPage: false });
35688
+ if (this.agentClient instanceof GoogleCUAClient || this.agentClient instanceof OpenAICUAClient || this.agentClient instanceof MicrosoftCUAClient) {
35689
+ try {
35690
+ const dimensions = getPNGDimensions(screenshotBuffer);
35691
+ this.agentClient.setScreenshotSize(
35692
+ dimensions.width,
35693
+ dimensions.height
35694
+ );
35695
+ } catch (e2) {
35696
+ this.logger({
35697
+ category: "agent",
35698
+ message: `Could not read screenshot dimensions: ${e2}`,
35699
+ level: 1
35700
+ });
35701
+ }
35702
+ }
35703
+ return screenshotBuffer.toString("base64");
35658
35704
  }));
35659
35705
  this.agentClient.setActionHandler((action) => __async(this, null, function* () {
35660
35706
  var _a4, _b, _c;
@@ -36078,11 +36124,26 @@ var V3CuaAgentHandler = class {
36078
36124
  });
36079
36125
  try {
36080
36126
  const page = yield this.v3.context.awaitActivePage();
36081
- const base64Image = yield page.screenshot({ fullPage: false });
36082
- this.v3.bus.emit("agent_screensot_taken_event", base64Image);
36127
+ const screenshotBuffer = yield page.screenshot({ fullPage: false });
36128
+ if (this.agentClient instanceof GoogleCUAClient || this.agentClient instanceof OpenAICUAClient || this.agentClient instanceof MicrosoftCUAClient) {
36129
+ try {
36130
+ const dimensions = getPNGDimensions(screenshotBuffer);
36131
+ this.agentClient.setScreenshotSize(
36132
+ dimensions.width,
36133
+ dimensions.height
36134
+ );
36135
+ } catch (e2) {
36136
+ this.logger({
36137
+ category: "agent",
36138
+ message: `Could not read screenshot dimensions: ${e2}`,
36139
+ level: 1
36140
+ });
36141
+ }
36142
+ }
36143
+ this.v3.bus.emit("agent_screenshot_taken_event", screenshotBuffer);
36083
36144
  const currentUrl = page.url();
36084
36145
  return yield this.agentClient.captureScreenshot({
36085
- base64Image,
36146
+ base64Image: screenshotBuffer.toString("base64"),
36086
36147
  currentUrl
36087
36148
  });
36088
36149
  } catch (e2) {
@@ -66748,7 +66809,7 @@ var _V3 = class _V3 {
66748
66809
  }
66749
66810
  this.logger({
66750
66811
  category: "agent",
66751
- message: `Creating v3 agent instance with options: ${JSON.stringify(options)}`,
66812
+ message: "Creating v3 agent instance",
66752
66813
  level: 1,
66753
66814
  auxiliary: __spreadValues({
66754
66815
  cua: { value: isCuaMode ? "true" : "false", type: "boolean" },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@browserbasehq/stagehand",
3
- "version": "3.0.7-alpha-ddd7464487a0cd0615d4f1ba458bc6f3fa51b27d",
3
+ "version": "3.0.7-alpha-bd2db925f66a826d61d58be1611d55646cbdb560",
4
4
  "description": "An AI web browsing framework focused on simplicity and extensibility.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
@@ -41,7 +41,6 @@
41
41
  "pino": "^9.6.0",
42
42
  "pino-pretty": "^13.0.0",
43
43
  "playwright": "^1.52.0",
44
- "sharp": "^0.34.5",
45
44
  "ws": "^8.18.0",
46
45
  "zod-to-json-schema": "^3.25.0"
47
46
  },