@browserbasehq/stagehand 3.0.7-alpha-fea1700552af3319052f463685752501c8e71de3 → 3.0.7-alpha-e0e22e06bc752a8ffde30f3dbfa58d91e24e6c09

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1100,7 +1100,7 @@ declare class V3Context {
1100
1100
  awaitActivePage(timeoutMs?: number): Promise<Page>;
1101
1101
  }
1102
1102
 
1103
- type AgentReplayStep = AgentReplayActStep | AgentReplayFillFormStep | AgentReplayGotoStep | AgentReplayScrollStep | AgentReplayWaitStep | AgentReplayNavBackStep | {
1103
+ type AgentReplayStep = AgentReplayActStep | AgentReplayFillFormStep | AgentReplayGotoStep | AgentReplayScrollStep | AgentReplayWaitStep | AgentReplayNavBackStep | AgentReplayKeysStep | {
1104
1104
  type: string;
1105
1105
  [key: string]: unknown;
1106
1106
  };
@@ -1143,6 +1143,16 @@ interface AgentReplayNavBackStep {
1143
1143
  type: "navback";
1144
1144
  waitUntil?: LoadState;
1145
1145
  }
1146
+ interface AgentReplayKeysStep {
1147
+ type: "keys";
1148
+ instruction?: string;
1149
+ playwrightArguments: {
1150
+ method: "type" | "press";
1151
+ text?: string;
1152
+ keys?: string;
1153
+ times?: number;
1154
+ };
1155
+ }
1146
1156
 
1147
1157
  /**
1148
1158
  * Response
@@ -1983,6 +1993,7 @@ type AgentConfig = {
1983
1993
  * - 'dom' (default): Uses DOM-based tools (act, fillForm) for structured interactions
1984
1994
  * - 'hybrid': Uses coordinate-based tools (click, type, dragAndDrop, clickAndHold, fillFormVision)
1985
1995
  * for visual/screenshot-based interactions
1996
+ * @experimental hybrid mode requires `experimental: true` in Stagehand constructor
1986
1997
  */
1987
1998
  mode?: AgentToolMode;
1988
1999
  };
package/dist/index.js CHANGED
@@ -179,7 +179,7 @@ var __forAwait = (obj, it, method) => (it = obj[__knownSymbol("asyncIterator")])
179
179
  var STAGEHAND_VERSION;
180
180
  var init_version = __esm({
181
181
  "lib/version.ts"() {
182
- STAGEHAND_VERSION = "3.0.7-alpha-fea1700552af3319052f463685752501c8e71de3";
182
+ STAGEHAND_VERSION = "3.0.7-alpha-e0e22e06bc752a8ffde30f3dbfa58d91e24e6c09";
183
183
  }
184
184
  });
185
185
 
@@ -28811,6 +28811,9 @@ var AgentCache = class {
28811
28811
  case "navback":
28812
28812
  yield this.replayAgentNavBackStep(step, ctx);
28813
28813
  return;
28814
+ case "keys":
28815
+ yield this.replayAgentKeysStep(step, ctx);
28816
+ return;
28814
28817
  case "close":
28815
28818
  case "extract":
28816
28819
  case "screenshot":
@@ -28900,6 +28903,22 @@ var AgentCache = class {
28900
28903
  yield page.goBack({ waitUntil: (_a4 = step.waitUntil) != null ? _a4 : "domcontentloaded" });
28901
28904
  });
28902
28905
  }
28906
+ replayAgentKeysStep(step, ctx) {
28907
+ return __async(this, null, function* () {
28908
+ const page = yield ctx.awaitActivePage();
28909
+ const { method, text, keys, times } = step.playwrightArguments;
28910
+ const repeatCount = Math.max(1, times != null ? times : 1);
28911
+ if (method === "type" && text) {
28912
+ for (let i2 = 0; i2 < repeatCount; i2++) {
28913
+ yield page.type(text, { delay: 100 });
28914
+ }
28915
+ } else if (method === "press" && keys) {
28916
+ for (let i2 = 0; i2 < repeatCount; i2++) {
28917
+ yield page.keyPress(keys, { delay: 100 });
28918
+ }
28919
+ }
28920
+ });
28921
+ }
28903
28922
  };
28904
28923
 
28905
28924
  // lib/v3/cache/CacheStorage.ts
@@ -31323,6 +31342,16 @@ var extractTool = (v3, executionModel, logger) => (0, import_ai10.tool)({
31323
31342
  // lib/v3/agent/tools/click.ts
31324
31343
  var import_ai11 = require("ai");
31325
31344
  var import_zod15 = require("zod");
31345
+
31346
+ // lib/v3/agent/utils/xpath.ts
31347
+ function ensureXPath(value) {
31348
+ if (typeof value !== "string") return null;
31349
+ const trimmed = value.trim();
31350
+ if (!trimmed) return null;
31351
+ return trimmed.startsWith("xpath=") ? trimmed : `xpath=${trimmed}`;
31352
+ }
31353
+
31354
+ // lib/v3/agent/tools/click.ts
31326
31355
  function waitForTimeout(ms) {
31327
31356
  return new Promise((resolve3) => setTimeout(resolve3, ms));
31328
31357
  }
@@ -31353,15 +31382,27 @@ var clickTool = (v3, provider) => (0, import_ai11.tool)({
31353
31382
  }
31354
31383
  }
31355
31384
  });
31356
- yield page.click(processed.x, processed.y);
31385
+ const xpath = yield page.click(processed.x, processed.y, {
31386
+ returnXpath: true
31387
+ });
31357
31388
  if (isGoogleProvider(provider)) {
31358
31389
  yield waitForTimeout(1e3);
31359
31390
  }
31360
- v3.recordAgentReplayStep({
31361
- type: "click",
31362
- instruction: describe,
31363
- playwrightArguments: { coordinates: [processed.x, processed.y] }
31364
- });
31391
+ const normalizedXpath = ensureXPath(xpath);
31392
+ if (normalizedXpath) {
31393
+ const action = {
31394
+ selector: normalizedXpath,
31395
+ description: describe,
31396
+ method: "click",
31397
+ arguments: []
31398
+ };
31399
+ v3.recordAgentReplayStep({
31400
+ type: "act",
31401
+ instruction: describe,
31402
+ actions: [action],
31403
+ actionDescription: describe
31404
+ });
31405
+ }
31365
31406
  return {
31366
31407
  success: true,
31367
31408
  describe,
@@ -31410,19 +31451,28 @@ var typeTool = (v3, provider) => (0, import_ai12.tool)({
31410
31451
  }
31411
31452
  }
31412
31453
  });
31413
- yield page.click(processed.x, processed.y);
31454
+ const xpath = yield page.click(processed.x, processed.y, {
31455
+ returnXpath: true
31456
+ });
31414
31457
  if (isGoogleProvider(provider)) {
31415
31458
  yield waitForTimeout2(1e3);
31416
31459
  }
31417
31460
  yield page.type(text);
31418
- v3.recordAgentReplayStep({
31419
- type: "type",
31420
- instruction: describe,
31421
- playwrightArguments: {
31422
- coordinates: [processed.x, processed.y],
31423
- text
31424
- }
31425
- });
31461
+ const normalizedXpath = ensureXPath(xpath);
31462
+ if (normalizedXpath) {
31463
+ const action = {
31464
+ selector: normalizedXpath,
31465
+ description: describe,
31466
+ method: "type",
31467
+ arguments: [text]
31468
+ };
31469
+ v3.recordAgentReplayStep({
31470
+ type: "act",
31471
+ instruction: describe,
31472
+ actions: [action],
31473
+ actionDescription: describe
31474
+ });
31475
+ }
31426
31476
  return { success: true, describe, text };
31427
31477
  } catch (error) {
31428
31478
  return {
@@ -31473,20 +31523,29 @@ var dragAndDropTool = (v3, provider) => (0, import_ai13.tool)({
31473
31523
  }
31474
31524
  }
31475
31525
  });
31476
- yield page.dragAndDrop(
31526
+ const [fromXpath, toXpath] = yield page.dragAndDrop(
31477
31527
  processedStart.x,
31478
31528
  processedStart.y,
31479
31529
  processedEnd.x,
31480
- processedEnd.y
31530
+ processedEnd.y,
31531
+ { returnXpath: true }
31481
31532
  );
31482
- v3.recordAgentReplayStep({
31483
- type: "dragAndDrop",
31484
- instruction: describe,
31485
- playwrightArguments: {
31486
- startCoordinates: [processedStart.x, processedStart.y],
31487
- endCoordinates: [processedEnd.x, processedEnd.y]
31488
- }
31489
- });
31533
+ const normalizedFrom = ensureXPath(fromXpath);
31534
+ const normalizedTo = ensureXPath(toXpath);
31535
+ if (normalizedFrom && normalizedTo) {
31536
+ const action = {
31537
+ selector: normalizedFrom,
31538
+ description: describe,
31539
+ method: "dragAndDrop",
31540
+ arguments: [normalizedTo]
31541
+ };
31542
+ v3.recordAgentReplayStep({
31543
+ type: "act",
31544
+ instruction: describe,
31545
+ actions: [action],
31546
+ actionDescription: describe
31547
+ });
31548
+ }
31490
31549
  return { success: true, describe };
31491
31550
  } catch (error) {
31492
31551
  return {
@@ -31533,21 +31592,28 @@ var clickAndHoldTool = (v3, provider) => (0, import_ai14.tool)({
31533
31592
  }
31534
31593
  }
31535
31594
  });
31536
- yield page.dragAndDrop(
31595
+ const [xpath] = yield page.dragAndDrop(
31537
31596
  processed.x,
31538
31597
  processed.y,
31539
31598
  processed.x,
31540
31599
  processed.y,
31541
- { delay: duration }
31600
+ { delay: duration, returnXpath: true }
31542
31601
  );
31543
- v3.recordAgentReplayStep({
31544
- type: "clickAndHold",
31545
- instruction: describe,
31546
- playwrightArguments: {
31547
- coordinates: [processed.x, processed.y],
31548
- duration
31549
- }
31550
- });
31602
+ const normalizedXpath = ensureXPath(xpath);
31603
+ if (normalizedXpath) {
31604
+ const action = {
31605
+ selector: normalizedXpath,
31606
+ description: describe,
31607
+ method: "clickAndHold",
31608
+ arguments: [String(duration)]
31609
+ };
31610
+ v3.recordAgentReplayStep({
31611
+ type: "act",
31612
+ instruction: describe,
31613
+ actions: [action],
31614
+ actionDescription: describe
31615
+ });
31616
+ }
31551
31617
  return { success: true, describe };
31552
31618
  } catch (error) {
31553
31619
  return {
@@ -31677,16 +31743,35 @@ MANDATORY USE CASES (always use fillFormVision for these):
31677
31743
  }
31678
31744
  }
31679
31745
  });
31746
+ const actions = [];
31680
31747
  for (const field of processedFields) {
31681
- yield page.click(field.coordinates.x, field.coordinates.y);
31748
+ const xpath = yield page.click(
31749
+ field.coordinates.x,
31750
+ field.coordinates.y,
31751
+ {
31752
+ returnXpath: true
31753
+ }
31754
+ );
31682
31755
  yield page.type(field.value);
31756
+ const normalizedXpath = ensureXPath(xpath);
31757
+ if (normalizedXpath) {
31758
+ actions.push({
31759
+ selector: normalizedXpath,
31760
+ description: field.action,
31761
+ method: "type",
31762
+ arguments: [field.value]
31763
+ });
31764
+ }
31683
31765
  yield new Promise((resolve3) => setTimeout(resolve3, 100));
31684
31766
  }
31685
- v3.recordAgentReplayStep({
31686
- type: "fillFormVision",
31687
- instruction: `Fill ${fields.length} form fields`,
31688
- playwrightArguments: processedFields
31689
- });
31767
+ if (actions.length > 0) {
31768
+ v3.recordAgentReplayStep({
31769
+ type: "act",
31770
+ instruction: `Fill ${fields.length} form fields`,
31771
+ actions,
31772
+ actionDescription: `Fill ${fields.length} form fields`
31773
+ });
31774
+ }
31690
31775
  return {
31691
31776
  success: true,
31692
31777
  playwrightArguments: processedFields
@@ -35680,7 +35765,7 @@ var V3CuaAgentHandler = class {
35680
35765
  clickCount: clickCount != null ? clickCount : 1,
35681
35766
  returnXpath: true
35682
35767
  });
35683
- const normalized = this.ensureXPath(xpath);
35768
+ const normalized = ensureXPath(xpath);
35684
35769
  if (normalized) {
35685
35770
  const stagehandAction = {
35686
35771
  selector: normalized,
@@ -35711,7 +35796,7 @@ var V3CuaAgentHandler = class {
35711
35796
  clickCount: 2,
35712
35797
  returnXpath: true
35713
35798
  });
35714
- const normalized = this.ensureXPath(xpath);
35799
+ const normalized = ensureXPath(xpath);
35715
35800
  if (normalized) {
35716
35801
  const stagehandAction = {
35717
35802
  selector: normalized,
@@ -35741,7 +35826,7 @@ var V3CuaAgentHandler = class {
35741
35826
  clickCount: 3,
35742
35827
  returnXpath: true
35743
35828
  });
35744
- const normalized = this.ensureXPath(xpath);
35829
+ const normalized = ensureXPath(xpath);
35745
35830
  if (normalized) {
35746
35831
  const stagehandAction = {
35747
35832
  selector: normalized,
@@ -35767,7 +35852,7 @@ var V3CuaAgentHandler = class {
35767
35852
  yield page.type(String(text != null ? text : ""));
35768
35853
  if (recording) {
35769
35854
  const xpath = yield computeActiveElementXpath(page);
35770
- const normalized = this.ensureXPath(xpath);
35855
+ const normalized = ensureXPath(xpath);
35771
35856
  if (normalized) {
35772
35857
  const stagehandAction = {
35773
35858
  selector: normalized,
@@ -35837,8 +35922,8 @@ var V3CuaAgentHandler = class {
35837
35922
  returnXpath: true
35838
35923
  });
35839
35924
  const [fromXpath, toXpath] = xps || ["", ""];
35840
- const from = this.ensureXPath(fromXpath);
35841
- const to = this.ensureXPath(toXpath);
35925
+ const from = ensureXPath(fromXpath);
35926
+ const to = ensureXPath(toXpath);
35842
35927
  if (from && to) {
35843
35928
  const stagehandAction = {
35844
35929
  selector: from,
@@ -35927,12 +36012,6 @@ var V3CuaAgentHandler = class {
35927
36012
  computePointerTarget(action) {
35928
36013
  return typeof action.x === "number" && typeof action.y === "number" ? `(${action.x}, ${action.y})` : typeof action.selector === "string" ? action.selector : typeof action.input === "string" ? action.input : typeof action.description === "string" ? action.description : void 0;
35929
36014
  }
35930
- ensureXPath(value) {
35931
- if (typeof value !== "string") return null;
35932
- const trimmed = value.trim();
35933
- if (!trimmed) return null;
35934
- return trimmed.startsWith("xpath=") ? trimmed : `xpath=${trimmed}`;
35935
- }
35936
36015
  describePointerAction(kind, x2, y) {
35937
36016
  const nx = Number(x2);
35938
36017
  const ny = Number(y);
@@ -65442,6 +65521,9 @@ function validateExperimentalFeatures(options) {
65442
65521
  if (hasIntegrations || hasTools) {
65443
65522
  features.push("MCP integrations and custom tools");
65444
65523
  }
65524
+ if ((agentConfig == null ? void 0 : agentConfig.mode) === "hybrid") {
65525
+ features.push("hybrid mode");
65526
+ }
65445
65527
  if (!(agentConfig == null ? void 0 : agentConfig.cua) && (isStreaming || (agentConfig == null ? void 0 : agentConfig.stream))) {
65446
65528
  features.push("streaming");
65447
65529
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@browserbasehq/stagehand",
3
- "version": "3.0.7-alpha-fea1700552af3319052f463685752501c8e71de3",
3
+ "version": "3.0.7-alpha-e0e22e06bc752a8ffde30f3dbfa58d91e24e6c09",
4
4
  "description": "An AI web browsing framework focused on simplicity and extensibility.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",