@browserbasehq/stagehand 3.0.7-alpha-b48c9c68e97f6370e2c01ba0232b951915142abd → 3.0.7-alpha-05f5580937c3c157550e3c25ae6671f44f562211

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -947,6 +947,7 @@ declare class StagehandAPIClient {
947
947
  private projectId;
948
948
  private sessionId?;
949
949
  private modelApiKey;
950
+ private modelProvider?;
950
951
  private logger;
951
952
  private fetchWithCookies;
952
953
  constructor({ apiKey, projectId, logger }: StagehandAPIConstructorParams);
@@ -960,6 +961,14 @@ declare class StagehandAPIClient {
960
961
  agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions | string, frameId?: string): Promise<AgentResult>;
961
962
  end(): Promise<Response>;
962
963
  getReplayMetrics(): Promise<StagehandMetrics>;
964
+ /**
965
+ * Prepares a model configuration for the API payload by ensuring
966
+ * the apiKey is included. If the model is passed as a string,
967
+ * it converts it to an object with modelName and apiKey.
968
+ * The apiKey is loaded from environment variables only if the provider
969
+ * differs from the one used during init.
970
+ */
971
+ private prepareModelConfig;
963
972
  private execute;
964
973
  private request;
965
974
  }
@@ -1091,7 +1100,7 @@ declare class V3Context {
1091
1100
  awaitActivePage(timeoutMs?: number): Promise<Page>;
1092
1101
  }
1093
1102
 
1094
- type AgentReplayStep = AgentReplayActStep | AgentReplayFillFormStep | AgentReplayGotoStep | AgentReplayScrollStep | AgentReplayWaitStep | AgentReplayNavBackStep | {
1103
+ type AgentReplayStep = AgentReplayActStep | AgentReplayFillFormStep | AgentReplayGotoStep | AgentReplayScrollStep | AgentReplayWaitStep | AgentReplayNavBackStep | AgentReplayKeysStep | {
1095
1104
  type: string;
1096
1105
  [key: string]: unknown;
1097
1106
  };
@@ -1134,6 +1143,16 @@ interface AgentReplayNavBackStep {
1134
1143
  type: "navback";
1135
1144
  waitUntil?: LoadState;
1136
1145
  }
1146
+ interface AgentReplayKeysStep {
1147
+ type: "keys";
1148
+ instruction?: string;
1149
+ playwrightArguments: {
1150
+ method: "type" | "press";
1151
+ text?: string;
1152
+ keys?: string;
1153
+ times?: number;
1154
+ };
1155
+ }
1137
1156
 
1138
1157
  /**
1139
1158
  * Response
package/dist/index.js CHANGED
@@ -179,7 +179,7 @@ var __forAwait = (obj, it, method) => (it = obj[__knownSymbol("asyncIterator")])
179
179
  var STAGEHAND_VERSION;
180
180
  var init_version = __esm({
181
181
  "lib/version.ts"() {
182
- STAGEHAND_VERSION = "3.0.7-alpha-b48c9c68e97f6370e2c01ba0232b951915142abd";
182
+ STAGEHAND_VERSION = "3.0.7-alpha-05f5580937c3c157550e3c25ae6671f44f562211";
183
183
  }
184
184
  });
185
185
 
@@ -28811,6 +28811,9 @@ var AgentCache = class {
28811
28811
  case "navback":
28812
28812
  yield this.replayAgentNavBackStep(step, ctx);
28813
28813
  return;
28814
+ case "keys":
28815
+ yield this.replayAgentKeysStep(step, ctx);
28816
+ return;
28814
28817
  case "close":
28815
28818
  case "extract":
28816
28819
  case "screenshot":
@@ -28900,6 +28903,22 @@ var AgentCache = class {
28900
28903
  yield page.goBack({ waitUntil: (_a4 = step.waitUntil) != null ? _a4 : "domcontentloaded" });
28901
28904
  });
28902
28905
  }
28906
+ replayAgentKeysStep(step, ctx) {
28907
+ return __async(this, null, function* () {
28908
+ const page = yield ctx.awaitActivePage();
28909
+ const { method, text, keys, times } = step.playwrightArguments;
28910
+ const repeatCount = Math.max(1, times != null ? times : 1);
28911
+ if (method === "type" && text) {
28912
+ for (let i2 = 0; i2 < repeatCount; i2++) {
28913
+ yield page.type(text, { delay: 100 });
28914
+ }
28915
+ } else if (method === "press" && keys) {
28916
+ for (let i2 = 0; i2 < repeatCount; i2++) {
28917
+ yield page.keyPress(keys, { delay: 100 });
28918
+ }
28919
+ }
28920
+ });
28921
+ }
28903
28922
  };
28904
28923
 
28905
28924
  // lib/v3/cache/CacheStorage.ts
@@ -31323,6 +31342,16 @@ var extractTool = (v3, executionModel, logger) => (0, import_ai10.tool)({
31323
31342
  // lib/v3/agent/tools/click.ts
31324
31343
  var import_ai11 = require("ai");
31325
31344
  var import_zod15 = require("zod");
31345
+
31346
+ // lib/v3/agent/utils/xpath.ts
31347
+ function ensureXPath(value) {
31348
+ if (typeof value !== "string") return null;
31349
+ const trimmed = value.trim();
31350
+ if (!trimmed) return null;
31351
+ return trimmed.startsWith("xpath=") ? trimmed : `xpath=${trimmed}`;
31352
+ }
31353
+
31354
+ // lib/v3/agent/tools/click.ts
31326
31355
  function waitForTimeout(ms) {
31327
31356
  return new Promise((resolve3) => setTimeout(resolve3, ms));
31328
31357
  }
@@ -31353,15 +31382,27 @@ var clickTool = (v3, provider) => (0, import_ai11.tool)({
31353
31382
  }
31354
31383
  }
31355
31384
  });
31356
- yield page.click(processed.x, processed.y);
31385
+ const xpath = yield page.click(processed.x, processed.y, {
31386
+ returnXpath: true
31387
+ });
31357
31388
  if (isGoogleProvider(provider)) {
31358
31389
  yield waitForTimeout(1e3);
31359
31390
  }
31360
- v3.recordAgentReplayStep({
31361
- type: "click",
31362
- instruction: describe,
31363
- playwrightArguments: { coordinates: [processed.x, processed.y] }
31364
- });
31391
+ const normalizedXpath = ensureXPath(xpath);
31392
+ if (normalizedXpath) {
31393
+ const action = {
31394
+ selector: normalizedXpath,
31395
+ description: describe,
31396
+ method: "click",
31397
+ arguments: []
31398
+ };
31399
+ v3.recordAgentReplayStep({
31400
+ type: "act",
31401
+ instruction: describe,
31402
+ actions: [action],
31403
+ actionDescription: describe
31404
+ });
31405
+ }
31365
31406
  return {
31366
31407
  success: true,
31367
31408
  describe,
@@ -31410,19 +31451,28 @@ var typeTool = (v3, provider) => (0, import_ai12.tool)({
31410
31451
  }
31411
31452
  }
31412
31453
  });
31413
- yield page.click(processed.x, processed.y);
31454
+ const xpath = yield page.click(processed.x, processed.y, {
31455
+ returnXpath: true
31456
+ });
31414
31457
  if (isGoogleProvider(provider)) {
31415
31458
  yield waitForTimeout2(1e3);
31416
31459
  }
31417
31460
  yield page.type(text);
31418
- v3.recordAgentReplayStep({
31419
- type: "type",
31420
- instruction: describe,
31421
- playwrightArguments: {
31422
- coordinates: [processed.x, processed.y],
31423
- text
31424
- }
31425
- });
31461
+ const normalizedXpath = ensureXPath(xpath);
31462
+ if (normalizedXpath) {
31463
+ const action = {
31464
+ selector: normalizedXpath,
31465
+ description: describe,
31466
+ method: "type",
31467
+ arguments: [text]
31468
+ };
31469
+ v3.recordAgentReplayStep({
31470
+ type: "act",
31471
+ instruction: describe,
31472
+ actions: [action],
31473
+ actionDescription: describe
31474
+ });
31475
+ }
31426
31476
  return { success: true, describe, text };
31427
31477
  } catch (error) {
31428
31478
  return {
@@ -31473,20 +31523,29 @@ var dragAndDropTool = (v3, provider) => (0, import_ai13.tool)({
31473
31523
  }
31474
31524
  }
31475
31525
  });
31476
- yield page.dragAndDrop(
31526
+ const [fromXpath, toXpath] = yield page.dragAndDrop(
31477
31527
  processedStart.x,
31478
31528
  processedStart.y,
31479
31529
  processedEnd.x,
31480
- processedEnd.y
31530
+ processedEnd.y,
31531
+ { returnXpath: true }
31481
31532
  );
31482
- v3.recordAgentReplayStep({
31483
- type: "dragAndDrop",
31484
- instruction: describe,
31485
- playwrightArguments: {
31486
- startCoordinates: [processedStart.x, processedStart.y],
31487
- endCoordinates: [processedEnd.x, processedEnd.y]
31488
- }
31489
- });
31533
+ const normalizedFrom = ensureXPath(fromXpath);
31534
+ const normalizedTo = ensureXPath(toXpath);
31535
+ if (normalizedFrom && normalizedTo) {
31536
+ const action = {
31537
+ selector: normalizedFrom,
31538
+ description: describe,
31539
+ method: "dragAndDrop",
31540
+ arguments: [normalizedTo]
31541
+ };
31542
+ v3.recordAgentReplayStep({
31543
+ type: "act",
31544
+ instruction: describe,
31545
+ actions: [action],
31546
+ actionDescription: describe
31547
+ });
31548
+ }
31490
31549
  return { success: true, describe };
31491
31550
  } catch (error) {
31492
31551
  return {
@@ -31533,21 +31592,28 @@ var clickAndHoldTool = (v3, provider) => (0, import_ai14.tool)({
31533
31592
  }
31534
31593
  }
31535
31594
  });
31536
- yield page.dragAndDrop(
31595
+ const [xpath] = yield page.dragAndDrop(
31537
31596
  processed.x,
31538
31597
  processed.y,
31539
31598
  processed.x,
31540
31599
  processed.y,
31541
- { delay: duration }
31600
+ { delay: duration, returnXpath: true }
31542
31601
  );
31543
- v3.recordAgentReplayStep({
31544
- type: "clickAndHold",
31545
- instruction: describe,
31546
- playwrightArguments: {
31547
- coordinates: [processed.x, processed.y],
31548
- duration
31549
- }
31550
- });
31602
+ const normalizedXpath = ensureXPath(xpath);
31603
+ if (normalizedXpath) {
31604
+ const action = {
31605
+ selector: normalizedXpath,
31606
+ description: describe,
31607
+ method: "clickAndHold",
31608
+ arguments: [String(duration)]
31609
+ };
31610
+ v3.recordAgentReplayStep({
31611
+ type: "act",
31612
+ instruction: describe,
31613
+ actions: [action],
31614
+ actionDescription: describe
31615
+ });
31616
+ }
31551
31617
  return { success: true, describe };
31552
31618
  } catch (error) {
31553
31619
  return {
@@ -31677,16 +31743,35 @@ MANDATORY USE CASES (always use fillFormVision for these):
31677
31743
  }
31678
31744
  }
31679
31745
  });
31746
+ const actions = [];
31680
31747
  for (const field of processedFields) {
31681
- yield page.click(field.coordinates.x, field.coordinates.y);
31748
+ const xpath = yield page.click(
31749
+ field.coordinates.x,
31750
+ field.coordinates.y,
31751
+ {
31752
+ returnXpath: true
31753
+ }
31754
+ );
31682
31755
  yield page.type(field.value);
31756
+ const normalizedXpath = ensureXPath(xpath);
31757
+ if (normalizedXpath) {
31758
+ actions.push({
31759
+ selector: normalizedXpath,
31760
+ description: field.action,
31761
+ method: "type",
31762
+ arguments: [field.value]
31763
+ });
31764
+ }
31683
31765
  yield new Promise((resolve3) => setTimeout(resolve3, 100));
31684
31766
  }
31685
- v3.recordAgentReplayStep({
31686
- type: "fillFormVision",
31687
- instruction: `Fill ${fields.length} form fields`,
31688
- playwrightArguments: processedFields
31689
- });
31767
+ if (actions.length > 0) {
31768
+ v3.recordAgentReplayStep({
31769
+ type: "act",
31770
+ instruction: `Fill ${fields.length} form fields`,
31771
+ actions,
31772
+ actionDescription: `Fill ${fields.length} form fields`
31773
+ });
31774
+ }
31690
31775
  return {
31691
31776
  success: true,
31692
31777
  playwrightArguments: processedFields
@@ -35680,7 +35765,7 @@ var V3CuaAgentHandler = class {
35680
35765
  clickCount: clickCount != null ? clickCount : 1,
35681
35766
  returnXpath: true
35682
35767
  });
35683
- const normalized = this.ensureXPath(xpath);
35768
+ const normalized = ensureXPath(xpath);
35684
35769
  if (normalized) {
35685
35770
  const stagehandAction = {
35686
35771
  selector: normalized,
@@ -35711,7 +35796,7 @@ var V3CuaAgentHandler = class {
35711
35796
  clickCount: 2,
35712
35797
  returnXpath: true
35713
35798
  });
35714
- const normalized = this.ensureXPath(xpath);
35799
+ const normalized = ensureXPath(xpath);
35715
35800
  if (normalized) {
35716
35801
  const stagehandAction = {
35717
35802
  selector: normalized,
@@ -35741,7 +35826,7 @@ var V3CuaAgentHandler = class {
35741
35826
  clickCount: 3,
35742
35827
  returnXpath: true
35743
35828
  });
35744
- const normalized = this.ensureXPath(xpath);
35829
+ const normalized = ensureXPath(xpath);
35745
35830
  if (normalized) {
35746
35831
  const stagehandAction = {
35747
35832
  selector: normalized,
@@ -35767,7 +35852,7 @@ var V3CuaAgentHandler = class {
35767
35852
  yield page.type(String(text != null ? text : ""));
35768
35853
  if (recording) {
35769
35854
  const xpath = yield computeActiveElementXpath(page);
35770
- const normalized = this.ensureXPath(xpath);
35855
+ const normalized = ensureXPath(xpath);
35771
35856
  if (normalized) {
35772
35857
  const stagehandAction = {
35773
35858
  selector: normalized,
@@ -35837,8 +35922,8 @@ var V3CuaAgentHandler = class {
35837
35922
  returnXpath: true
35838
35923
  });
35839
35924
  const [fromXpath, toXpath] = xps || ["", ""];
35840
- const from = this.ensureXPath(fromXpath);
35841
- const to = this.ensureXPath(toXpath);
35925
+ const from = ensureXPath(fromXpath);
35926
+ const to = ensureXPath(toXpath);
35842
35927
  if (from && to) {
35843
35928
  const stagehandAction = {
35844
35929
  selector: from,
@@ -35927,12 +36012,6 @@ var V3CuaAgentHandler = class {
35927
36012
  computePointerTarget(action) {
35928
36013
  return typeof action.x === "number" && typeof action.y === "number" ? `(${action.x}, ${action.y})` : typeof action.selector === "string" ? action.selector : typeof action.input === "string" ? action.input : typeof action.description === "string" ? action.description : void 0;
35929
36014
  }
35930
- ensureXPath(value) {
35931
- if (typeof value !== "string") return null;
35932
- const trimmed = value.trim();
35933
- if (!trimmed) return null;
35934
- return trimmed.startsWith("xpath=") ? trimmed : `xpath=${trimmed}`;
35935
- }
35936
36015
  describePointerAction(kind, x2, y) {
35937
36016
  const nx = Number(x2);
35938
36017
  const ny = Number(y);
@@ -65014,6 +65093,7 @@ var StagehandAPIClient = class {
65014
65093
  throw new StagehandAPIError("modelApiKey is required");
65015
65094
  }
65016
65095
  this.modelApiKey = modelApiKey;
65096
+ this.modelProvider = (modelName == null ? void 0 : modelName.includes("/")) ? modelName.split("/")[0] : void 0;
65017
65097
  const region = browserbaseSessionCreateParams == null ? void 0 : browserbaseSessionCreateParams.region;
65018
65098
  if (region && region !== "us-west-2") {
65019
65099
  return { sessionId: browserbaseSessionID != null ? browserbaseSessionID : null, available: false };
@@ -65068,6 +65148,9 @@ var StagehandAPIClient = class {
65068
65148
  if (options) {
65069
65149
  const _a4 = options, { page: _ } = _a4, restOptions = __objRest(_a4, ["page"]);
65070
65150
  if (Object.keys(restOptions).length > 0) {
65151
+ if (restOptions.model) {
65152
+ restOptions.model = this.prepareModelConfig(restOptions.model);
65153
+ }
65071
65154
  args.options = restOptions;
65072
65155
  }
65073
65156
  }
@@ -65093,6 +65176,9 @@ var StagehandAPIClient = class {
65093
65176
  if (options) {
65094
65177
  const _a4 = options, { page: _ } = _a4, restOptions = __objRest(_a4, ["page"]);
65095
65178
  if (Object.keys(restOptions).length > 0) {
65179
+ if (restOptions.model) {
65180
+ restOptions.model = this.prepareModelConfig(restOptions.model);
65181
+ }
65096
65182
  args.options = restOptions;
65097
65183
  }
65098
65184
  }
@@ -65115,6 +65201,9 @@ var StagehandAPIClient = class {
65115
65201
  if (options) {
65116
65202
  const _a4 = options, { page: _ } = _a4, restOptions = __objRest(_a4, ["page"]);
65117
65203
  if (Object.keys(restOptions).length > 0) {
65204
+ if (restOptions.model) {
65205
+ restOptions.model = this.prepareModelConfig(restOptions.model);
65206
+ }
65118
65207
  args.options = restOptions;
65119
65208
  }
65120
65209
  }
@@ -65259,6 +65348,32 @@ var StagehandAPIClient = class {
65259
65348
  return metrics;
65260
65349
  });
65261
65350
  }
65351
+ /**
65352
+ * Prepares a model configuration for the API payload by ensuring
65353
+ * the apiKey is included. If the model is passed as a string,
65354
+ * it converts it to an object with modelName and apiKey.
65355
+ * The apiKey is loaded from environment variables only if the provider
65356
+ * differs from the one used during init.
65357
+ */
65358
+ prepareModelConfig(model) {
65359
+ var _a4, _b, _c;
65360
+ if (typeof model === "string") {
65361
+ const provider = model.includes("/") ? model.split("/")[0] : void 0;
65362
+ const apiKey = provider && provider !== this.modelProvider ? (_a4 = loadApiKeyFromEnv(provider, this.logger)) != null ? _a4 : this.modelApiKey : this.modelApiKey;
65363
+ return {
65364
+ modelName: model,
65365
+ apiKey
65366
+ };
65367
+ }
65368
+ if (!model.apiKey) {
65369
+ const provider = ((_b = model.modelName) == null ? void 0 : _b.includes("/")) ? model.modelName.split("/")[0] : void 0;
65370
+ const apiKey = provider && provider !== this.modelProvider ? (_c = loadApiKeyFromEnv(provider, this.logger)) != null ? _c : this.modelApiKey : this.modelApiKey;
65371
+ return __spreadProps(__spreadValues({}, model), {
65372
+ apiKey
65373
+ });
65374
+ }
65375
+ return model;
65376
+ }
65262
65377
  execute(_0) {
65263
65378
  return __async(this, arguments, function* ({
65264
65379
  method,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@browserbasehq/stagehand",
3
- "version": "3.0.7-alpha-b48c9c68e97f6370e2c01ba0232b951915142abd",
3
+ "version": "3.0.7-alpha-05f5580937c3c157550e3c25ae6671f44f562211",
4
4
  "description": "An AI web browsing framework focused on simplicity and extensibility.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",