@browserbasehq/stagehand 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,12 +2,19 @@
2
2
  // lib/dom/debug.ts
3
3
  async function debugDom() {
4
4
  window.chunkNumber = 0;
5
- const { selectorMap, outputString } = await window.processElements(
6
- window.chunkNumber
7
- );
5
+ const { selectorMap: multiSelectorMap, outputString } = await window.processElements(window.chunkNumber);
6
+ const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
8
7
  drawChunk(selectorMap);
9
8
  setupChunkNav();
10
9
  }
10
+ function multiSelectorMapToSelectorMap(multiSelectorMap) {
11
+ return Object.fromEntries(
12
+ Object.entries(multiSelectorMap).map(([key, selectors]) => [
13
+ Number(key),
14
+ selectors[0]
15
+ ])
16
+ );
17
+ }
11
18
  function drawChunk(selectorMap) {
12
19
  cleanupMarkers();
13
20
  Object.entries(selectorMap).forEach(([_index, selector]) => {
@@ -81,7 +88,10 @@
81
88
  window.chunkNumber -= 1;
82
89
  window.scrollTo(0, window.chunkNumber * window.innerHeight);
83
90
  await window.waitForDomSettle();
84
- const { selectorMap } = await processElements(window.chunkNumber);
91
+ const { selectorMap: multiSelectorMap } = await window.processElements(
92
+ window.chunkNumber
93
+ );
94
+ const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
85
95
  drawChunk(selectorMap);
86
96
  setupChunkNav();
87
97
  };
@@ -103,7 +113,10 @@
103
113
  window.chunkNumber += 1;
104
114
  window.scrollTo(0, window.chunkNumber * window.innerHeight);
105
115
  await window.waitForDomSettle();
106
- const { selectorMap } = await processElements(window.chunkNumber);
116
+ const { selectorMap: multiSelectorMap } = await window.processElements(
117
+ window.chunkNumber
118
+ );
119
+ const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
107
120
  drawChunk(selectorMap);
108
121
  setupChunkNav();
109
122
  };
@@ -60,7 +60,7 @@
60
60
  generateStandardXPath(element),
61
61
  generatedIdBasedXPath(element)
62
62
  ]);
63
- return [...idBasedXPath ? [idBasedXPath] : [], standardXPath, complexXPath];
63
+ return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];
64
64
  }
65
65
  async function generateComplexXPath(element) {
66
66
  const parts = [];
@@ -145,7 +145,7 @@
145
145
  }
146
146
  element = element.parentElement;
147
147
  }
148
- return parts.length ? `//${parts.join("//")}` : "";
148
+ return parts.length ? `/${parts.join("/")}` : "";
149
149
  }
150
150
  async function generatedIdBasedXPath(element) {
151
151
  if (isElementNode(element) && element.id) {
@@ -163,11 +163,7 @@
163
163
  }
164
164
  async function processDom(chunksSeen) {
165
165
  const { chunk, chunksArray } = await pickChunk(chunksSeen);
166
- const { outputString, selectorMap } = await processElements2(
167
- chunk,
168
- void 0,
169
- void 0
170
- );
166
+ const { outputString, selectorMap } = await processElements(chunk);
171
167
  console.log(
172
168
  `Stagehand (Browser Process): Extracted dom elements:
173
169
  ${outputString}`
@@ -187,7 +183,7 @@ ${outputString}`
187
183
  let index = 0;
188
184
  const results = [];
189
185
  for (let chunk = 0; chunk < totalChunks; chunk++) {
190
- const result = await processElements2(chunk, true, index);
186
+ const result = await processElements(chunk, true, index);
191
187
  results.push(result);
192
188
  index += Object.keys(result.selectorMap).length;
193
189
  }
@@ -221,7 +217,7 @@ ${outputString}`
221
217
  });
222
218
  }
223
219
  var xpathCache = /* @__PURE__ */ new Map();
224
- async function processElements2(chunk, scrollToChunk = true, indexOffset = 0) {
220
+ async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {
225
221
  console.time("processElements:total");
226
222
  const viewportHeight = window.innerHeight;
227
223
  const chunkHeight = viewportHeight * chunk;
@@ -336,7 +332,7 @@ ${outputString}`
336
332
  }
337
333
  window.processDom = processDom;
338
334
  window.processAllOfDom = processAllOfDom;
339
- window.processElements = processElements2;
335
+ window.processElements = processElements;
340
336
  window.scrollToHeight = scrollToHeight;
341
337
  var leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"];
342
338
  var interactiveElementTypes = [
@@ -501,12 +497,19 @@ ${outputString}`
501
497
  // lib/dom/debug.ts
502
498
  async function debugDom() {
503
499
  window.chunkNumber = 0;
504
- const { selectorMap, outputString } = await window.processElements(
505
- window.chunkNumber
506
- );
500
+ const { selectorMap: multiSelectorMap, outputString } = await window.processElements(window.chunkNumber);
501
+ const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
507
502
  drawChunk(selectorMap);
508
503
  setupChunkNav();
509
504
  }
505
+ function multiSelectorMapToSelectorMap(multiSelectorMap) {
506
+ return Object.fromEntries(
507
+ Object.entries(multiSelectorMap).map(([key, selectors]) => [
508
+ Number(key),
509
+ selectors[0]
510
+ ])
511
+ );
512
+ }
510
513
  function drawChunk(selectorMap) {
511
514
  cleanupMarkers();
512
515
  Object.entries(selectorMap).forEach(([_index, selector]) => {
@@ -580,7 +583,10 @@ ${outputString}`
580
583
  window.chunkNumber -= 1;
581
584
  window.scrollTo(0, window.chunkNumber * window.innerHeight);
582
585
  await window.waitForDomSettle();
583
- const { selectorMap } = await processElements(window.chunkNumber);
586
+ const { selectorMap: multiSelectorMap } = await window.processElements(
587
+ window.chunkNumber
588
+ );
589
+ const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
584
590
  drawChunk(selectorMap);
585
591
  setupChunkNav();
586
592
  };
@@ -602,7 +608,10 @@ ${outputString}`
602
608
  window.chunkNumber += 1;
603
609
  window.scrollTo(0, window.chunkNumber * window.innerHeight);
604
610
  await window.waitForDomSettle();
605
- const { selectorMap } = await processElements(window.chunkNumber);
611
+ const { selectorMap: multiSelectorMap } = await window.processElements(
612
+ window.chunkNumber
613
+ );
614
+ const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
606
615
  drawChunk(selectorMap);
607
616
  setupChunkNav();
608
617
  };
@@ -60,7 +60,7 @@
60
60
  generateStandardXPath(element),
61
61
  generatedIdBasedXPath(element)
62
62
  ]);
63
- return [...idBasedXPath ? [idBasedXPath] : [], standardXPath, complexXPath];
63
+ return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];
64
64
  }
65
65
  async function generateComplexXPath(element) {
66
66
  const parts = [];
@@ -145,7 +145,7 @@
145
145
  }
146
146
  element = element.parentElement;
147
147
  }
148
- return parts.length ? `//${parts.join("//")}` : "";
148
+ return parts.length ? `/${parts.join("/")}` : "";
149
149
  }
150
150
  async function generatedIdBasedXPath(element) {
151
151
  if (isElementNode(element) && element.id) {
@@ -163,11 +163,7 @@
163
163
  }
164
164
  async function processDom(chunksSeen) {
165
165
  const { chunk, chunksArray } = await pickChunk(chunksSeen);
166
- const { outputString, selectorMap } = await processElements(
167
- chunk,
168
- void 0,
169
- void 0
170
- );
166
+ const { outputString, selectorMap } = await processElements(chunk);
171
167
  console.log(
172
168
  `Stagehand (Browser Process): Extracted dom elements:
173
169
  ${outputString}`
@@ -8,11 +8,7 @@
8
8
  }
9
9
  async function processDom(chunksSeen) {
10
10
  const { chunk, chunksArray } = await pickChunk(chunksSeen);
11
- const { outputString, selectorMap } = await processElements(
12
- chunk,
13
- void 0,
14
- void 0
15
- );
11
+ const { outputString, selectorMap } = await processElements(chunk);
16
12
  console.log(
17
13
  `Stagehand (Browser Process): Extracted dom elements:
18
14
  ${outputString}`
@@ -386,7 +382,7 @@ ${outputString}`
386
382
  generateStandardXPath(element),
387
383
  generatedIdBasedXPath(element)
388
384
  ]);
389
- return [...idBasedXPath ? [idBasedXPath] : [], standardXPath, complexXPath];
385
+ return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];
390
386
  }
391
387
  async function generateComplexXPath(element) {
392
388
  const parts = [];
@@ -471,7 +467,7 @@ ${outputString}`
471
467
  }
472
468
  element = element.parentElement;
473
469
  }
474
- return parts.length ? `//${parts.join("//")}` : "";
470
+ return parts.length ? `/${parts.join("/")}` : "";
475
471
  }
476
472
  async function generatedIdBasedXPath(element) {
477
473
  if (isElementNode(element) && element.id) {
package/dist/index.js CHANGED
@@ -90,21 +90,23 @@ var import_sdk2 = require("@browserbasehq/sdk");
90
90
  // lib/prompt.ts
91
91
  var actSystemPrompt = `
92
92
  # Instructions
93
- You are a browser automation assistant. Your job is to accomplish the user's goal across multiple model calls.
93
+ You are a browser automation assistant. Your job is to accomplish the user's goal across multiple model calls by running playwright commands.
94
94
 
95
- You are given:
95
+ ## Input
96
+ You will receive:
96
97
  1. the user's overall goal
97
98
  2. the steps that you've taken so far
98
99
  3. a list of active DOM elements in this chunk to consider to get closer to the goal.
99
100
  4. Optionally, a list of variable names that the user has provided that you may use to accomplish the goal. To use the variables, you must use the special <|VARIABLE_NAME|> syntax.
100
101
 
101
- You have 2 tools that you can call: doAction, and skipSection. Do action only performs Playwright actions. Do not perform any other actions.
102
102
 
103
- Note: If there is a popup on the page for cookies or advertising that has nothing to do with the goal, try to close it first before proceeding. As this can block the goal from being completed.
103
+ ## Your Goal / Specification
104
+ You have 2 tools that you can call: doAction, and skipSection. Do action only performs Playwright actions. Do exactly what the user's goal is. Do not perform any other actions or exceed the scope of the goal.
105
+ If the user's goal will be accomplished after running the playwright action, set completed to true. Better to have completed set to true if your are not sure.
104
106
 
105
- Also, verify if the goal has been accomplished already. Do this by checking if the goal has been accomplished based on the previous steps completed, the current page DOM elements and the current page URL / starting page URL. If it has, set completed to true and finish the task.
107
+ Note: If there is a popup on the page for cookies or advertising that has nothing to do with the goal, try to close it first before proceeding. As this can block the goal from being completed.
106
108
 
107
- Do exactly what the user's goal is. Do not exceed the scope of the goal.
109
+ Again, if the user's goal will be accomplished after running the playwright action, set completed to true.
108
110
  `;
109
111
  var verifyActCompletionSystemPrompt = `
110
112
  You are a browser automation assistant. The job has given you a goal and a list of steps that have been taken so far. Your job is to determine if the user's goal has been completed based on the provided information.
@@ -169,7 +171,7 @@ ${steps}
169
171
  # Current Active Dom Elements
170
172
  ${domElements}
171
173
  `;
172
- if (variables) {
174
+ if (variables && Object.keys(variables).length > 0) {
173
175
  actUserPrompt += `
174
176
  # Variables
175
177
  ${Object.entries(variables).map(([key, value]) => `<|${key.toUpperCase()}|>`).join("\n")}
@@ -600,6 +602,16 @@ var OpenAIClient = class {
600
602
  }
601
603
  createChatCompletion(options) {
602
604
  return __async(this, null, function* () {
605
+ const _a = options, { image: _ } = _a, optionsWithoutImage = __objRest(_a, ["image"]);
606
+ this.logger({
607
+ category: "OpenAI",
608
+ message: `Creating chat completion with options: ${JSON.stringify(
609
+ optionsWithoutImage,
610
+ null,
611
+ 2
612
+ )}`,
613
+ level: 1
614
+ });
603
615
  const cacheOptions = {
604
616
  model: options.model,
605
617
  messages: options.messages,
@@ -642,7 +654,7 @@ var OpenAIClient = class {
642
654
  };
643
655
  options.messages = [...options.messages, screenshotMessage];
644
656
  }
645
- const _a = options, { image, response_model } = _a, openAiOptions = __objRest(_a, ["image", "response_model"]);
657
+ const _b = options, { image, response_model } = _b, openAiOptions = __objRest(_b, ["image", "response_model"]);
646
658
  let responseFormat = void 0;
647
659
  if (options.response_model) {
648
660
  responseFormat = (0, import_zod2.zodResponseFormat)(
@@ -653,6 +665,11 @@ var OpenAIClient = class {
653
665
  const response = yield this.client.chat.completions.create(__spreadProps(__spreadValues({}, openAiOptions), {
654
666
  response_format: responseFormat
655
667
  }));
668
+ this.logger({
669
+ category: "OpenAI",
670
+ message: `Response: ${JSON.stringify(response, null, 2)}`,
671
+ level: 1
672
+ });
656
673
  if (response_model) {
657
674
  const extractedData = response.choices[0].message.content;
658
675
  const parsedData = JSON.parse(extractedData);
@@ -688,7 +705,17 @@ var AnthropicClient = class {
688
705
  }
689
706
  createChatCompletion(options) {
690
707
  return __async(this, null, function* () {
691
- var _a, _b, _c, _d, _e, _f, _g;
708
+ var _b, _c, _d, _e, _f, _g, _h;
709
+ const _a = options, { image: _ } = _a, optionsWithoutImage = __objRest(_a, ["image"]);
710
+ this.logger({
711
+ category: "Anthropic",
712
+ message: `Creating chat completion with options: ${JSON.stringify(
713
+ optionsWithoutImage,
714
+ null,
715
+ 2
716
+ )}`,
717
+ level: 1
718
+ });
692
719
  const cacheOptions = {
693
720
  model: options.model,
694
721
  messages: options.messages,
@@ -736,7 +763,7 @@ var AnthropicClient = class {
736
763
  };
737
764
  options.messages = [...options.messages, screenshotMessage];
738
765
  }
739
- let anthropicTools = (_a = options.tools) == null ? void 0 : _a.map((tool) => {
766
+ let anthropicTools = (_b = options.tools) == null ? void 0 : _b.map((tool) => {
740
767
  if (tool.type === "function") {
741
768
  return {
742
769
  name: tool.function.name,
@@ -753,8 +780,8 @@ var AnthropicClient = class {
753
780
  let toolDefinition;
754
781
  if (options.response_model) {
755
782
  const jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(options.response_model.schema);
756
- const schemaProperties = ((_c = (_b = jsonSchema.definitions) == null ? void 0 : _b.MySchema) == null ? void 0 : _c.properties) || jsonSchema.properties;
757
- const schemaRequired = ((_e = (_d = jsonSchema.definitions) == null ? void 0 : _d.MySchema) == null ? void 0 : _e.required) || jsonSchema.required;
783
+ const schemaProperties = ((_d = (_c = jsonSchema.definitions) == null ? void 0 : _c.MySchema) == null ? void 0 : _d.properties) || jsonSchema.properties;
784
+ const schemaRequired = ((_f = (_e = jsonSchema.definitions) == null ? void 0 : _e.MySchema) == null ? void 0 : _f.required) || jsonSchema.required;
758
785
  toolDefinition = {
759
786
  name: "print_extracted_data",
760
787
  description: "Prints the extracted data based on the provided schema.",
@@ -780,6 +807,11 @@ var AnthropicClient = class {
780
807
  system: systemMessage == null ? void 0 : systemMessage.content,
781
808
  temperature: options.temperature
782
809
  });
810
+ this.logger({
811
+ category: "Anthropic",
812
+ message: `Response: ${JSON.stringify(response, null, 2)}`,
813
+ level: 1
814
+ });
783
815
  const transformedResponse = {
784
816
  id: response.id,
785
817
  object: "chat.completion",
@@ -790,7 +822,7 @@ var AnthropicClient = class {
790
822
  index: 0,
791
823
  message: {
792
824
  role: "assistant",
793
- content: ((_f = response.content.find((c) => c.type === "text")) == null ? void 0 : _f.text) || null,
825
+ content: ((_g = response.content.find((c) => c.type === "text")) == null ? void 0 : _g.text) || null,
794
826
  tool_calls: response.content.filter((c) => c.type === "tool_use").map((toolUse) => ({
795
827
  id: toolUse.id,
796
828
  type: "function",
@@ -824,7 +856,7 @@ var AnthropicClient = class {
824
856
  } else {
825
857
  if (!options.retries || options.retries < 5) {
826
858
  return this.createChatCompletion(__spreadProps(__spreadValues({}, options), {
827
- retries: ((_g = options.retries) != null ? _g : 0) + 1
859
+ retries: ((_h = options.retries) != null ? _h : 0) + 1
828
860
  }));
829
861
  }
830
862
  throw new Error(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@browserbasehq/stagehand",
3
- "version": "1.1.2",
3
+ "version": "1.2.0",
4
4
  "description": "An AI web browsing framework focused on simplicity and extensibility.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",