@browserbasehq/stagehand 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -8,6 +8,7 @@ var __getOwnPropSymbols = Object.getOwnPropertySymbols;
8
8
  var __getProtoOf = Object.getPrototypeOf;
9
9
  var __hasOwnProp = Object.prototype.hasOwnProperty;
10
10
  var __propIsEnum = Object.prototype.propertyIsEnumerable;
11
+ var __reflectGet = Reflect.get;
11
12
  var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
12
13
  var __spreadValues = (a, b) => {
13
14
  for (var prop in b || (b = {}))
@@ -54,6 +55,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
54
55
  mod
55
56
  ));
56
57
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
58
+ var __superGet = (cls, obj, key) => __reflectGet(__getProtoOf(cls), key, obj);
57
59
  var __async = (__this, __arguments, generator) => {
58
60
  return new Promise((resolve, reject) => {
59
61
  var fulfilled = (value) => {
@@ -82,27 +84,29 @@ __export(lib_exports, {
82
84
  });
83
85
  module.exports = __toCommonJS(lib_exports);
84
86
  var import_test = require("@playwright/test");
85
- var import_crypto = __toESM(require("crypto"));
86
87
  var import_fs2 = __toESM(require("fs"));
87
88
  var import_sdk2 = require("@browserbasehq/sdk");
88
89
 
89
90
  // lib/prompt.ts
90
91
  var actSystemPrompt = `
91
92
  # Instructions
92
- You are a browser automation assistant. Your job is to accomplish the user's goal across multiple model calls.
93
+ You are a browser automation assistant. Your job is to accomplish the user's goal across multiple model calls by running playwright commands.
93
94
 
94
- You are given:
95
+ ## Input
96
+ You will receive:
95
97
  1. the user's overall goal
96
98
  2. the steps that you've taken so far
97
99
  3. a list of active DOM elements in this chunk to consider to get closer to the goal.
100
+ 4. Optionally, a list of variable names that the user has provided that you may use to accomplish the goal. To use the variables, you must use the special <|VARIABLE_NAME|> syntax.
98
101
 
99
- You have 2 tools that you can call: doAction, and skipSection. Do action only performs Playwright actions. Do not perform any other actions.
100
102
 
101
- Note: If there is a popup on the page for cookies or advertising that has nothing to do with the goal, try to close it first before proceeding. As this can block the goal from being completed.
103
+ ## Your Goal / Specification
104
+ You have 2 tools that you can call: doAction, and skipSection. Do action only performs Playwright actions. Do exactly what the user's goal is. Do not perform any other actions or exceed the scope of the goal.
105
+ If the user's goal will be accomplished after running the playwright action, set completed to true. Better to have completed set to true if your are not sure.
102
106
 
103
- Also, verify if the goal has been accomplished already. Do this by checking if the goal has been accomplished based on the previous steps completed, the current page DOM elements and the current page URL / starting page URL. If it has, set completed to true and finish the task.
107
+ Note: If there is a popup on the page for cookies or advertising that has nothing to do with the goal, try to close it first before proceeding. As this can block the goal from being completed.
104
108
 
105
- Do exactly what the user's goal is. Do not exceed the scope of the goal.
109
+ Again, if the user's goal will be accomplished after running the playwright action, set completed to true.
106
110
  `;
107
111
  var verifyActCompletionSystemPrompt = `
108
112
  You are a browser automation assistant. The job has given you a goal and a list of steps that have been taken so far. Your job is to determine if the user's goal has been completed based on the provided information.
@@ -156,8 +160,8 @@ function buildActSystemPrompt() {
156
160
  content: actSystemPrompt
157
161
  };
158
162
  }
159
- function buildActUserPrompt(action, steps = "None", domElements) {
160
- const actUserPrompt = `
163
+ function buildActUserPrompt(action, steps = "None", domElements, variables) {
164
+ let actUserPrompt = `
161
165
  # My Goal
162
166
  ${action}
163
167
 
@@ -167,6 +171,12 @@ ${steps}
167
171
  # Current Active Dom Elements
168
172
  ${domElements}
169
173
  `;
174
+ if (variables && Object.keys(variables).length > 0) {
175
+ actUserPrompt += `
176
+ # Variables
177
+ ${Object.entries(variables).map(([key, value]) => `<|${key.toUpperCase()}|>`).join("\n")}
178
+ `;
179
+ }
170
180
  return {
171
181
  role: "user",
172
182
  content: actUserPrompt
@@ -392,6 +402,14 @@ function verifyActCompletion(_0) {
392
402
  return response.completed;
393
403
  });
394
404
  }
405
+ function fillInVariables(text, variables) {
406
+ let processedText = text;
407
+ Object.entries(variables).forEach(([key, value]) => {
408
+ const placeholder = `<|${key.toUpperCase()}|>`;
409
+ processedText = processedText.replace(placeholder, value);
410
+ });
411
+ return processedText;
412
+ }
395
413
  function act(_0) {
396
414
  return __async(this, arguments, function* ({
397
415
  action,
@@ -402,12 +420,13 @@ function act(_0) {
402
420
  screenshot,
403
421
  retries = 0,
404
422
  logger,
405
- requestId
423
+ requestId,
424
+ variables
406
425
  }) {
407
426
  const llmClient = llmProvider.getClient(modelName, requestId);
408
427
  const messages = [
409
428
  buildActSystemPrompt(),
410
- buildActUserPrompt(action, steps, domElements)
429
+ buildActUserPrompt(action, steps, domElements, variables)
411
430
  ];
412
431
  const response = yield llmClient.createChatCompletion({
413
432
  model: modelName,
@@ -583,6 +602,16 @@ var OpenAIClient = class {
583
602
  }
584
603
  createChatCompletion(options) {
585
604
  return __async(this, null, function* () {
605
+ const _a = options, { image: _ } = _a, optionsWithoutImage = __objRest(_a, ["image"]);
606
+ this.logger({
607
+ category: "OpenAI",
608
+ message: `Creating chat completion with options: ${JSON.stringify(
609
+ optionsWithoutImage,
610
+ null,
611
+ 2
612
+ )}`,
613
+ level: 1
614
+ });
586
615
  const cacheOptions = {
587
616
  model: options.model,
588
617
  messages: options.messages,
@@ -596,7 +625,18 @@ var OpenAIClient = class {
596
625
  if (this.enableCaching) {
597
626
  const cachedResponse = yield this.cache.get(cacheOptions, this.requestId);
598
627
  if (cachedResponse) {
628
+ this.logger({
629
+ category: "llm_cache",
630
+ message: `LLM Cache hit - returning cached response`,
631
+ level: 1
632
+ });
599
633
  return cachedResponse;
634
+ } else {
635
+ this.logger({
636
+ category: "llm_cache",
637
+ message: `LLM Cache miss - no cached response found`,
638
+ level: 1
639
+ });
600
640
  }
601
641
  }
602
642
  if (options.image) {
@@ -614,7 +654,7 @@ var OpenAIClient = class {
614
654
  };
615
655
  options.messages = [...options.messages, screenshotMessage];
616
656
  }
617
- const _a = options, { image, response_model } = _a, openAiOptions = __objRest(_a, ["image", "response_model"]);
657
+ const _b = options, { image, response_model } = _b, openAiOptions = __objRest(_b, ["image", "response_model"]);
618
658
  let responseFormat = void 0;
619
659
  if (options.response_model) {
620
660
  responseFormat = (0, import_zod2.zodResponseFormat)(
@@ -625,6 +665,11 @@ var OpenAIClient = class {
625
665
  const response = yield this.client.chat.completions.create(__spreadProps(__spreadValues({}, openAiOptions), {
626
666
  response_format: responseFormat
627
667
  }));
668
+ this.logger({
669
+ category: "OpenAI",
670
+ message: `Response: ${JSON.stringify(response, null, 2)}`,
671
+ level: 1
672
+ });
628
673
  if (response_model) {
629
674
  const extractedData = response.choices[0].message.content;
630
675
  const parsedData = JSON.parse(extractedData);
@@ -660,7 +705,17 @@ var AnthropicClient = class {
660
705
  }
661
706
  createChatCompletion(options) {
662
707
  return __async(this, null, function* () {
663
- var _a, _b, _c, _d, _e, _f, _g;
708
+ var _b, _c, _d, _e, _f, _g, _h;
709
+ const _a = options, { image: _ } = _a, optionsWithoutImage = __objRest(_a, ["image"]);
710
+ this.logger({
711
+ category: "Anthropic",
712
+ message: `Creating chat completion with options: ${JSON.stringify(
713
+ optionsWithoutImage,
714
+ null,
715
+ 2
716
+ )}`,
717
+ level: 1
718
+ });
664
719
  const cacheOptions = {
665
720
  model: options.model,
666
721
  messages: options.messages,
@@ -673,7 +728,18 @@ var AnthropicClient = class {
673
728
  if (this.enableCaching) {
674
729
  const cachedResponse = yield this.cache.get(cacheOptions, this.requestId);
675
730
  if (cachedResponse) {
731
+ this.logger({
732
+ category: "llm_cache",
733
+ message: `LLM Cache hit - returning cached response`,
734
+ level: 1
735
+ });
676
736
  return cachedResponse;
737
+ } else {
738
+ this.logger({
739
+ category: "llm_cache",
740
+ message: `LLM Cache miss - no cached response found`,
741
+ level: 1
742
+ });
677
743
  }
678
744
  }
679
745
  const systemMessage = options.messages.find((msg) => msg.role === "system");
@@ -697,7 +763,7 @@ var AnthropicClient = class {
697
763
  };
698
764
  options.messages = [...options.messages, screenshotMessage];
699
765
  }
700
- let anthropicTools = (_a = options.tools) == null ? void 0 : _a.map((tool) => {
766
+ let anthropicTools = (_b = options.tools) == null ? void 0 : _b.map((tool) => {
701
767
  if (tool.type === "function") {
702
768
  return {
703
769
  name: tool.function.name,
@@ -714,8 +780,8 @@ var AnthropicClient = class {
714
780
  let toolDefinition;
715
781
  if (options.response_model) {
716
782
  const jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(options.response_model.schema);
717
- const schemaProperties = ((_c = (_b = jsonSchema.definitions) == null ? void 0 : _b.MySchema) == null ? void 0 : _c.properties) || jsonSchema.properties;
718
- const schemaRequired = ((_e = (_d = jsonSchema.definitions) == null ? void 0 : _d.MySchema) == null ? void 0 : _e.required) || jsonSchema.required;
783
+ const schemaProperties = ((_d = (_c = jsonSchema.definitions) == null ? void 0 : _c.MySchema) == null ? void 0 : _d.properties) || jsonSchema.properties;
784
+ const schemaRequired = ((_f = (_e = jsonSchema.definitions) == null ? void 0 : _e.MySchema) == null ? void 0 : _f.required) || jsonSchema.required;
719
785
  toolDefinition = {
720
786
  name: "print_extracted_data",
721
787
  description: "Prints the extracted data based on the provided schema.",
@@ -741,6 +807,11 @@ var AnthropicClient = class {
741
807
  system: systemMessage == null ? void 0 : systemMessage.content,
742
808
  temperature: options.temperature
743
809
  });
810
+ this.logger({
811
+ category: "Anthropic",
812
+ message: `Response: ${JSON.stringify(response, null, 2)}`,
813
+ level: 1
814
+ });
744
815
  const transformedResponse = {
745
816
  id: response.id,
746
817
  object: "chat.completion",
@@ -751,7 +822,7 @@ var AnthropicClient = class {
751
822
  index: 0,
752
823
  message: {
753
824
  role: "assistant",
754
- content: ((_f = response.content.find((c) => c.type === "text")) == null ? void 0 : _f.text) || null,
825
+ content: ((_g = response.content.find((c) => c.type === "text")) == null ? void 0 : _g.text) || null,
755
826
  tool_calls: response.content.filter((c) => c.type === "tool_use").map((toolUse) => ({
756
827
  id: toolUse.id,
757
828
  type: "function",
@@ -785,7 +856,7 @@ var AnthropicClient = class {
785
856
  } else {
786
857
  if (!options.retries || options.retries < 5) {
787
858
  return this.createChatCompletion(__spreadProps(__spreadValues({}, options), {
788
- retries: ((_g = options.retries) != null ? _g : 0) + 1
859
+ retries: ((_h = options.retries) != null ? _h : 0) + 1
789
860
  }));
790
861
  }
791
862
  throw new Error(
@@ -801,24 +872,24 @@ var AnthropicClient = class {
801
872
  }
802
873
  };
803
874
 
804
- // lib/llm/LLMCache.ts
875
+ // lib/cache/BaseCache.ts
805
876
  var fs = __toESM(require("fs"));
806
877
  var path = __toESM(require("path"));
807
878
  var crypto = __toESM(require("crypto"));
808
- var LLMCache = class {
809
- constructor(logger, cacheDir = path.join(process.cwd(), "tmp", ".cache"), cacheFile = "llm_calls.json") {
879
+ var BaseCache = class {
880
+ constructor(logger, cacheDir = path.join(process.cwd(), "tmp", ".cache"), cacheFile = "cache.json") {
810
881
  this.CACHE_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1e3;
811
882
  // 1 week in milliseconds
812
883
  this.CLEANUP_PROBABILITY = 0.01;
813
- // 1% chance
814
884
  this.LOCK_TIMEOUT_MS = 1e3;
815
- this.lock_acquired = false;
816
- this.count_lock_acquire_failures = 0;
817
- this.request_id_to_used_hashes = {};
885
+ this.lockAcquired = false;
886
+ this.lockAcquireFailures = 0;
887
+ // Added for request ID tracking
888
+ this.requestIdToUsedHashes = {};
818
889
  this.logger = logger;
819
890
  this.cacheDir = cacheDir;
820
891
  this.cacheFile = path.join(cacheDir, cacheFile);
821
- this.lockFile = path.join(cacheDir, "llm_cache.lock");
892
+ this.lockFile = path.join(cacheDir, "cache.lock");
822
893
  this.ensureCacheDirectory();
823
894
  this.setupProcessHandlers();
824
895
  }
@@ -832,11 +903,11 @@ var LLMCache = class {
832
903
  process.on("SIGTERM", releaseLockAndExit);
833
904
  process.on("uncaughtException", (err) => {
834
905
  this.logger({
835
- category: "llm_cache",
906
+ category: "base_cache",
836
907
  message: `Uncaught exception: ${err}`,
837
908
  level: 2
838
909
  });
839
- if (this.lock_acquired) {
910
+ if (this.lockAcquired) {
840
911
  releaseLockAndExit();
841
912
  }
842
913
  });
@@ -844,6 +915,11 @@ var LLMCache = class {
844
915
  ensureCacheDirectory() {
845
916
  if (!fs.existsSync(this.cacheDir)) {
846
917
  fs.mkdirSync(this.cacheDir, { recursive: true });
918
+ this.logger({
919
+ category: "base_cache",
920
+ message: `Created cache directory at ${this.cacheDir}`,
921
+ level: 1
922
+ });
847
923
  }
848
924
  }
849
925
  createHash(data) {
@@ -862,25 +938,35 @@ var LLMCache = class {
862
938
  const lockAge = Date.now() - fs.statSync(this.lockFile).mtimeMs;
863
939
  if (lockAge > this.LOCK_TIMEOUT_MS) {
864
940
  fs.unlinkSync(this.lockFile);
941
+ this.logger({
942
+ category: "base_cache",
943
+ message: "Stale lock file removed",
944
+ level: 1
945
+ });
865
946
  }
866
947
  }
867
948
  fs.writeFileSync(this.lockFile, process.pid.toString(), { flag: "wx" });
868
- this.count_lock_acquire_failures = 0;
869
- this.lock_acquired = true;
949
+ this.lockAcquireFailures = 0;
950
+ this.lockAcquired = true;
951
+ this.logger({
952
+ category: "base_cache",
953
+ message: "Lock acquired",
954
+ level: 1
955
+ });
870
956
  return true;
871
957
  } catch (error) {
872
958
  yield this.sleep(5);
873
959
  }
874
960
  }
875
961
  this.logger({
876
- category: "llm_cache",
962
+ category: "base_cache",
877
963
  message: "Failed to acquire lock after timeout",
878
964
  level: 2
879
965
  });
880
- this.count_lock_acquire_failures++;
881
- if (this.count_lock_acquire_failures >= 3) {
966
+ this.lockAcquireFailures++;
967
+ if (this.lockAcquireFailures >= 3) {
882
968
  this.logger({
883
- category: "llm_cache",
969
+ category: "base_cache",
884
970
  message: "Failed to acquire lock 3 times in a row. Releasing lock manually.",
885
971
  level: 1
886
972
  });
@@ -893,111 +979,125 @@ var LLMCache = class {
893
979
  try {
894
980
  if (fs.existsSync(this.lockFile)) {
895
981
  fs.unlinkSync(this.lockFile);
982
+ this.logger({
983
+ category: "base_cache",
984
+ message: "Lock released",
985
+ level: 1
986
+ });
896
987
  }
897
- this.lock_acquired = false;
988
+ this.lockAcquired = false;
898
989
  } catch (error) {
899
990
  this.logger({
900
- category: "llm_cache",
991
+ category: "base_cache",
901
992
  message: `Error releasing lock: ${error}`,
902
993
  level: 2
903
994
  });
904
995
  }
905
996
  }
997
+ /**
998
+ * Cleans up stale cache entries that exceed the maximum age.
999
+ */
1000
+ cleanupStaleEntries() {
1001
+ return __async(this, null, function* () {
1002
+ if (!(yield this.acquireLock())) {
1003
+ this.logger({
1004
+ category: "llm_cache",
1005
+ message: "Failed to acquire lock for cleanup",
1006
+ level: 2
1007
+ });
1008
+ return;
1009
+ }
1010
+ try {
1011
+ const cache = this.readCache();
1012
+ const now = Date.now();
1013
+ let entriesRemoved = 0;
1014
+ for (const [hash, entry] of Object.entries(cache)) {
1015
+ if (now - entry.timestamp > this.CACHE_MAX_AGE_MS) {
1016
+ delete cache[hash];
1017
+ entriesRemoved++;
1018
+ }
1019
+ }
1020
+ if (entriesRemoved > 0) {
1021
+ this.writeCache(cache);
1022
+ this.logger({
1023
+ category: "llm_cache",
1024
+ message: `Cleaned up ${entriesRemoved} stale cache entries`,
1025
+ level: 1
1026
+ });
1027
+ }
1028
+ } catch (error) {
1029
+ this.logger({
1030
+ category: "llm_cache",
1031
+ message: `Error during cache cleanup: ${error}`,
1032
+ level: 2
1033
+ });
1034
+ } finally {
1035
+ this.releaseLock();
1036
+ }
1037
+ });
1038
+ }
906
1039
  readCache() {
907
1040
  if (fs.existsSync(this.cacheFile)) {
908
- return JSON.parse(fs.readFileSync(this.cacheFile, "utf-8"));
1041
+ try {
1042
+ const data = fs.readFileSync(this.cacheFile, "utf-8");
1043
+ return JSON.parse(data);
1044
+ } catch (error) {
1045
+ this.logger({
1046
+ category: "base_cache",
1047
+ message: `Error reading cache file: ${error}. Resetting cache.`,
1048
+ level: 1
1049
+ });
1050
+ this.resetCache();
1051
+ return {};
1052
+ }
909
1053
  }
910
1054
  return {};
911
1055
  }
912
1056
  writeCache(cache) {
913
1057
  try {
914
- if (Math.random() < this.CLEANUP_PROBABILITY) {
915
- this.cleanupStaleEntries(cache);
916
- }
917
1058
  fs.writeFileSync(this.cacheFile, JSON.stringify(cache, null, 2));
918
- } finally {
919
- this.releaseLock();
920
- }
921
- }
922
- cleanupStaleEntries(cache) {
923
- if (!this.acquireLock()) {
924
- this.logger({
925
- category: "llm_cache",
926
- message: "Failed to acquire lock for cleaning up cache",
927
- level: 2
928
- });
929
- return;
930
- }
931
- try {
932
- const now = Date.now();
933
- let entriesRemoved = 0;
934
- for (const [hash, entry] of Object.entries(cache)) {
935
- if (now - entry.timestamp > this.CACHE_MAX_AGE_MS) {
936
- delete cache[hash];
937
- entriesRemoved++;
938
- }
939
- }
940
- if (entriesRemoved > 0) {
941
- this.logger({
942
- category: "llm_cache",
943
- message: `Cleaned up ${entriesRemoved} stale cache entries`,
944
- level: 1
945
- });
946
- }
947
- } catch (error) {
948
1059
  this.logger({
949
- category: "llm_cache",
950
- message: `Error cleaning up stale cache entries: ${error}`,
1060
+ category: "base_cache",
1061
+ message: "Cache written to file",
951
1062
  level: 1
952
1063
  });
953
- } finally {
954
- this.releaseLock();
955
- }
956
- }
957
- resetCache() {
958
- if (!this.acquireLock()) {
1064
+ } catch (error) {
959
1065
  this.logger({
960
- category: "llm_cache",
961
- message: "Failed to acquire lock for resetting cache",
1066
+ category: "base_cache",
1067
+ message: `Error writing cache file: ${error}`,
962
1068
  level: 2
963
1069
  });
964
- return;
965
- }
966
- try {
967
- this.ensureCacheDirectory();
968
- fs.writeFileSync(this.cacheFile, "{}");
969
1070
  } finally {
970
1071
  this.releaseLock();
971
1072
  }
972
1073
  }
973
- get(options, requestId) {
1074
+ /**
1075
+ * Retrieves data from the cache based on the provided options.
1076
+ * @param hashObj - The options used to generate the cache key.
1077
+ * @param requestId - The identifier for the current request.
1078
+ * @returns The cached data if available, otherwise null.
1079
+ */
1080
+ get(hashObj, requestId) {
974
1081
  return __async(this, null, function* () {
975
- var _a, _b;
976
1082
  if (!(yield this.acquireLock())) {
977
1083
  this.logger({
978
- category: "llm_cache",
1084
+ category: "base_cache",
979
1085
  message: "Failed to acquire lock for getting cache",
980
1086
  level: 2
981
1087
  });
982
1088
  return null;
983
1089
  }
984
1090
  try {
985
- const hash = this.createHash(options);
1091
+ const hash = this.createHash(hashObj);
986
1092
  const cache = this.readCache();
987
1093
  if (cache[hash]) {
988
- this.logger({
989
- category: "llm_cache",
990
- message: "Cache hit",
991
- level: 1
992
- });
993
- (_b = (_a = this.request_id_to_used_hashes)[requestId]) != null ? _b : _a[requestId] = [];
994
- this.request_id_to_used_hashes[requestId].push(hash);
995
- return cache[hash].response;
1094
+ this.trackRequestIdUsage(requestId, hash);
1095
+ return cache[hash].data;
996
1096
  }
997
1097
  return null;
998
1098
  } catch (error) {
999
1099
  this.logger({
1000
- category: "llm_cache",
1100
+ category: "base_cache",
1001
1101
  message: `Error getting cache: ${error}. Resetting cache.`,
1002
1102
  level: 1
1003
1103
  });
@@ -1008,82 +1108,189 @@ var LLMCache = class {
1008
1108
  }
1009
1109
  });
1010
1110
  }
1011
- deleteCacheForRequestId(requestId) {
1111
+ /**
1112
+ * Stores data in the cache based on the provided options and requestId.
1113
+ * @param hashObj - The options used to generate the cache key.
1114
+ * @param data - The data to be cached.
1115
+ * @param requestId - The identifier for the cache entry.
1116
+ */
1117
+ set(hashObj, data, requestId) {
1012
1118
  return __async(this, null, function* () {
1013
- var _a;
1014
1119
  if (!(yield this.acquireLock())) {
1015
1120
  this.logger({
1016
- category: "llm_cache",
1017
- message: "Failed to acquire lock for deleting cache",
1121
+ category: "base_cache",
1122
+ message: "Failed to acquire lock for setting cache",
1018
1123
  level: 2
1019
1124
  });
1020
1125
  return;
1021
1126
  }
1022
1127
  try {
1128
+ const hash = this.createHash(hashObj);
1023
1129
  const cache = this.readCache();
1024
- let entriesRemoved = [];
1025
- for (const hash of (_a = this.request_id_to_used_hashes[requestId]) != null ? _a : []) {
1026
- if (cache[hash]) {
1027
- entriesRemoved.push(cache[hash]);
1028
- delete cache[hash];
1029
- }
1030
- }
1031
- this.logger({
1032
- category: "llm_cache",
1033
- message: `Deleted ${entriesRemoved.length} cache entries for requestId ${requestId}`,
1034
- level: 1
1035
- });
1130
+ cache[hash] = {
1131
+ data,
1132
+ timestamp: Date.now(),
1133
+ requestId
1134
+ };
1036
1135
  this.writeCache(cache);
1037
- } catch (exception) {
1136
+ this.trackRequestIdUsage(requestId, hash);
1137
+ } catch (error) {
1038
1138
  this.logger({
1039
- category: "llm_cache",
1040
- message: `Error deleting cache for requestId ${requestId}: ${exception}`,
1139
+ category: "base_cache",
1140
+ message: `Error setting cache: ${error}. Resetting cache.`,
1041
1141
  level: 1
1042
1142
  });
1143
+ this.resetCache();
1043
1144
  } finally {
1044
1145
  this.releaseLock();
1146
+ if (Math.random() < this.CLEANUP_PROBABILITY) {
1147
+ this.cleanupStaleEntries();
1148
+ }
1045
1149
  }
1046
1150
  });
1047
1151
  }
1048
- set(options, response, requestId) {
1152
+ delete(hashObj) {
1049
1153
  return __async(this, null, function* () {
1050
- var _a, _b;
1051
1154
  if (!(yield this.acquireLock())) {
1052
1155
  this.logger({
1053
- category: "llm_cache",
1054
- message: "Failed to acquire lock for setting cache",
1156
+ category: "base_cache",
1157
+ message: "Failed to acquire lock for removing cache entry",
1055
1158
  level: 2
1056
1159
  });
1057
1160
  return;
1058
1161
  }
1059
1162
  try {
1060
- const hash = this.createHash(options);
1163
+ const hash = this.createHash(hashObj);
1061
1164
  const cache = this.readCache();
1062
- cache[hash] = {
1063
- response,
1064
- timestamp: Date.now(),
1065
- requestId
1066
- };
1067
- this.writeCache(cache);
1068
- (_b = (_a = this.request_id_to_used_hashes)[requestId]) != null ? _b : _a[requestId] = [];
1069
- this.request_id_to_used_hashes[requestId].push(hash);
1165
+ if (cache[hash]) {
1166
+ delete cache[hash];
1167
+ this.writeCache(cache);
1168
+ } else {
1169
+ this.logger({
1170
+ category: "base_cache",
1171
+ message: "Cache entry not found to delete",
1172
+ level: 1
1173
+ });
1174
+ }
1175
+ } catch (error) {
1070
1176
  this.logger({
1071
- category: "llm_cache",
1072
- message: "Cache miss - saved new response",
1073
- level: 1
1177
+ category: "base_cache",
1178
+ message: `Error removing cache entry: ${error}`,
1179
+ level: 2
1074
1180
  });
1181
+ } finally {
1182
+ this.releaseLock();
1183
+ }
1184
+ });
1185
+ }
1186
+ /**
1187
+ * Tracks the usage of a hash with a specific requestId.
1188
+ * @param requestId - The identifier for the current request.
1189
+ * @param hash - The cache key hash.
1190
+ */
1191
+ trackRequestIdUsage(requestId, hash) {
1192
+ var _a, _b;
1193
+ (_b = (_a = this.requestIdToUsedHashes)[requestId]) != null ? _b : _a[requestId] = [];
1194
+ this.requestIdToUsedHashes[requestId].push(hash);
1195
+ }
1196
+ /**
1197
+ * Deletes all cache entries associated with a specific requestId.
1198
+ * @param requestId - The identifier for the request whose cache entries should be deleted.
1199
+ */
1200
+ deleteCacheForRequestId(requestId) {
1201
+ return __async(this, null, function* () {
1202
+ var _a;
1203
+ if (!(yield this.acquireLock())) {
1204
+ this.logger({
1205
+ category: "base_cache",
1206
+ message: "Failed to acquire lock for deleting cache",
1207
+ level: 2
1208
+ });
1209
+ return;
1210
+ }
1211
+ try {
1212
+ const cache = this.readCache();
1213
+ const hashes = (_a = this.requestIdToUsedHashes[requestId]) != null ? _a : [];
1214
+ let entriesRemoved = 0;
1215
+ for (const hash of hashes) {
1216
+ if (cache[hash]) {
1217
+ delete cache[hash];
1218
+ entriesRemoved++;
1219
+ }
1220
+ }
1221
+ if (entriesRemoved > 0) {
1222
+ this.writeCache(cache);
1223
+ } else {
1224
+ this.logger({
1225
+ category: "base_cache",
1226
+ message: `No cache entries found for requestId ${requestId}`,
1227
+ level: 1
1228
+ });
1229
+ }
1230
+ delete this.requestIdToUsedHashes[requestId];
1075
1231
  } catch (error) {
1076
1232
  this.logger({
1077
- category: "llm_cache",
1078
- message: `Error setting cache: ${error}. Resetting cache.`,
1079
- level: 1
1233
+ category: "base_cache",
1234
+ message: `Error deleting cache for requestId ${requestId}: ${error}`,
1235
+ level: 2
1080
1236
  });
1081
- this.resetCache();
1082
1237
  } finally {
1083
1238
  this.releaseLock();
1084
1239
  }
1085
1240
  });
1086
1241
  }
1242
+ /**
1243
+ * Resets the entire cache by clearing the cache file.
1244
+ */
1245
+ resetCache() {
1246
+ try {
1247
+ fs.writeFileSync(this.cacheFile, "{}");
1248
+ this.requestIdToUsedHashes = {};
1249
+ } catch (error) {
1250
+ this.logger({
1251
+ category: "base_cache",
1252
+ message: `Error resetting cache: ${error}`,
1253
+ level: 2
1254
+ });
1255
+ } finally {
1256
+ this.releaseLock();
1257
+ }
1258
+ }
1259
+ };
1260
+
1261
+ // lib/cache/LLMCache.ts
1262
+ var LLMCache = class _LLMCache extends BaseCache {
1263
+ constructor(logger, cacheDir, cacheFile) {
1264
+ super(logger, cacheDir, cacheFile || "llm_calls.json");
1265
+ }
1266
+ /**
1267
+ * Overrides the get method to track used hashes by requestId.
1268
+ * @param options - The options used to generate the cache key.
1269
+ * @param requestId - The identifier for the current request.
1270
+ * @returns The cached data if available, otherwise null.
1271
+ */
1272
+ get(options, requestId) {
1273
+ return __async(this, null, function* () {
1274
+ const data = yield __superGet(_LLMCache.prototype, this, "get").call(this, options, requestId);
1275
+ return data;
1276
+ });
1277
+ }
1278
+ /**
1279
+ * Overrides the set method to include cache cleanup logic.
1280
+ * @param options - The options used to generate the cache key.
1281
+ * @param data - The data to be cached.
1282
+ * @param requestId - The identifier for the current request.
1283
+ */
1284
+ set(options, data, requestId) {
1285
+ return __async(this, null, function* () {
1286
+ yield __superGet(_LLMCache.prototype, this, "set").call(this, options, data, requestId);
1287
+ this.logger({
1288
+ category: "llm_cache",
1289
+ message: "Cache miss - saved new response",
1290
+ level: 1
1291
+ });
1292
+ });
1293
+ }
1087
1294
  };
1088
1295
 
1089
1296
  // lib/llm/LLMProvider.ts
@@ -1203,8 +1410,8 @@ var ScreenshotService = class _ScreenshotService {
1203
1410
  const { width, height } = yield image.metadata();
1204
1411
  const svgAnnotations = yield Promise.all(
1205
1412
  Object.entries(this.selectorMap).map(
1206
- (_0) => __async(this, [_0], function* ([id, selector]) {
1207
- return this.createElementAnnotation(id, selector);
1413
+ (_0) => __async(this, [_0], function* ([id, selectors]) {
1414
+ return this.createElementAnnotation(id, selectors);
1208
1415
  })
1209
1416
  )
1210
1417
  );
@@ -1226,18 +1433,25 @@ var ScreenshotService = class _ScreenshotService {
1226
1433
  return annotatedScreenshot;
1227
1434
  });
1228
1435
  }
1229
- createElementAnnotation(id, selector) {
1436
+ createElementAnnotation(id, selectors) {
1230
1437
  return __async(this, null, function* () {
1231
1438
  try {
1232
- const element = yield this.page.locator(`xpath=${selector}`).first();
1233
- const box = yield element.boundingBox();
1439
+ let element = null;
1440
+ const selectorPromises = selectors.map(
1441
+ (selector) => __async(this, null, function* () {
1442
+ try {
1443
+ element = yield this.page.locator(`xpath=${selector}`).first();
1444
+ const box2 = yield element.boundingBox({ timeout: 5e3 });
1445
+ return box2;
1446
+ } catch (e) {
1447
+ return null;
1448
+ }
1449
+ })
1450
+ );
1451
+ const boxes = yield Promise.all(selectorPromises);
1452
+ const box = boxes.find((b) => b !== null);
1234
1453
  if (!box) {
1235
- this.log({
1236
- category: "Debug",
1237
- message: `No bounding box for element ${id}`,
1238
- level: 2
1239
- });
1240
- return "";
1454
+ throw new Error(`Unable to create annotation for element ${id}`);
1241
1455
  }
1242
1456
  const scrollPosition = yield this.page.evaluate(() => ({
1243
1457
  scrollX: window.scrollX,
@@ -1264,8 +1478,8 @@ var ScreenshotService = class _ScreenshotService {
1264
1478
  `;
1265
1479
  } catch (error) {
1266
1480
  this.log({
1267
- category: "Error",
1268
- message: `Failed to create annotation for element ${id}: ${error}`,
1481
+ category: "Vision",
1482
+ message: `Warning: Failed to create annotation for element ${id}: ${error}, trace: ${error.stack}`,
1269
1483
  level: 0
1270
1484
  });
1271
1485
  return "";
@@ -1317,21 +1531,962 @@ var ScreenshotService = class _ScreenshotService {
1317
1531
  }
1318
1532
  };
1319
1533
 
1320
- // lib/index.ts
1321
- require("dotenv").config({ path: ".env" });
1322
- function getBrowser(apiKey, projectId, env = "LOCAL", headless = false, logger, browserbaseSessionCreateParams, browserbaseResumeSessionID) {
1323
- return __async(this, null, function* () {
1324
- if (env === "BROWSERBASE") {
1325
- if (!apiKey) {
1326
- logger({
1327
- category: "Init",
1328
- message: "BROWSERBASE_API_KEY is required to use BROWSERBASE env. Defaulting to LOCAL.",
1329
- level: 0
1330
- });
1331
- env = "LOCAL";
1332
- }
1333
- if (!projectId) {
1334
- logger({
1534
+ // lib/types.ts
1535
+ var PlaywrightCommandException = class extends Error {
1536
+ constructor(message) {
1537
+ super(message);
1538
+ this.name = "PlaywrightCommandException";
1539
+ }
1540
+ };
1541
+ var PlaywrightCommandMethodNotSupportedException = class extends Error {
1542
+ constructor(message) {
1543
+ super(message);
1544
+ this.name = "PlaywrightCommandMethodNotSupportedException";
1545
+ }
1546
+ };
1547
+
1548
+ // lib/cache/ActionCache.ts
1549
+ var ActionCache = class _ActionCache extends BaseCache {
1550
+ constructor(logger, cacheDir, cacheFile) {
1551
+ super(logger, cacheDir, cacheFile || "action_cache.json");
1552
+ }
1553
+ addActionStep(_0) {
1554
+ return __async(this, arguments, function* ({
1555
+ url,
1556
+ action,
1557
+ previousSelectors,
1558
+ playwrightCommand,
1559
+ componentString,
1560
+ xpaths,
1561
+ newStepString,
1562
+ completed,
1563
+ requestId
1564
+ }) {
1565
+ this.logger({
1566
+ category: "action_cache",
1567
+ message: `Adding action step to cache: ${action}, requestId: ${requestId}, url: ${url}, previousSelectors: ${previousSelectors}`,
1568
+ level: 1
1569
+ });
1570
+ yield this.set(
1571
+ { url, action, previousSelectors },
1572
+ {
1573
+ playwrightCommand,
1574
+ componentString,
1575
+ xpaths,
1576
+ newStepString,
1577
+ completed,
1578
+ previousSelectors,
1579
+ action
1580
+ },
1581
+ requestId
1582
+ );
1583
+ });
1584
+ }
1585
+ /**
1586
+ * Retrieves all actions for a specific trajectory.
1587
+ * @param trajectoryId - Unique identifier for the trajectory.
1588
+ * @param requestId - The identifier for the current request.
1589
+ * @returns An array of TrajectoryEntry objects or null if not found.
1590
+ */
1591
+ getActionStep(_0) {
1592
+ return __async(this, arguments, function* ({
1593
+ url,
1594
+ action,
1595
+ previousSelectors,
1596
+ requestId
1597
+ }) {
1598
+ const data = yield __superGet(_ActionCache.prototype, this, "get").call(this, { url, action, previousSelectors }, requestId);
1599
+ if (!data) {
1600
+ return null;
1601
+ }
1602
+ return data;
1603
+ });
1604
+ }
1605
+ removeActionStep(cacheHashObj) {
1606
+ return __async(this, null, function* () {
1607
+ yield __superGet(_ActionCache.prototype, this, "delete").call(this, cacheHashObj);
1608
+ });
1609
+ }
1610
+ /**
1611
+ * Clears all actions for a specific trajectory.
1612
+ * @param trajectoryId - Unique identifier for the trajectory.
1613
+ * @param requestId - The identifier for the current request.
1614
+ */
1615
+ clearAction(requestId) {
1616
+ return __async(this, null, function* () {
1617
+ yield __superGet(_ActionCache.prototype, this, "deleteCacheForRequestId").call(this, requestId);
1618
+ this.logger({
1619
+ category: "action_cache",
1620
+ message: `Cleared action for ID: ${requestId}`,
1621
+ level: 1
1622
+ });
1623
+ });
1624
+ }
1625
+ /**
1626
+ * Resets the entire action cache.
1627
+ */
1628
+ resetCache() {
1629
+ return __async(this, null, function* () {
1630
+ yield __superGet(_ActionCache.prototype, this, "resetCache").call(this);
1631
+ this.logger({
1632
+ category: "action_cache",
1633
+ message: "Action cache has been reset.",
1634
+ level: 1
1635
+ });
1636
+ });
1637
+ }
1638
+ };
1639
+
1640
+ // lib/utils.ts
1641
+ var import_crypto = __toESM(require("crypto"));
1642
+ function generateId(operation) {
1643
+ return import_crypto.default.createHash("sha256").update(operation).digest("hex");
1644
+ }
1645
+
1646
+ // lib/handlers/actHandler.ts
1647
+ var StagehandActHandler = class {
1648
+ constructor({
1649
+ stagehand,
1650
+ verbose,
1651
+ llmProvider,
1652
+ enableCaching,
1653
+ logger,
1654
+ waitForSettledDom,
1655
+ defaultModelName,
1656
+ startDomDebug,
1657
+ cleanupDomDebug
1658
+ }) {
1659
+ this.stagehand = stagehand;
1660
+ this.verbose = verbose;
1661
+ this.llmProvider = llmProvider;
1662
+ this.enableCaching = enableCaching;
1663
+ this.logger = logger;
1664
+ this.waitForSettledDom = waitForSettledDom;
1665
+ this.actionCache = new ActionCache(this.logger);
1666
+ this.defaultModelName = defaultModelName;
1667
+ this.startDomDebug = startDomDebug;
1668
+ this.cleanupDomDebug = cleanupDomDebug;
1669
+ this.actions = {};
1670
+ }
1671
+ _recordAction(action, result) {
1672
+ return __async(this, null, function* () {
1673
+ const id = generateId(action);
1674
+ this.actions[id] = { result, action };
1675
+ return id;
1676
+ });
1677
+ }
1678
+ _verifyActionCompletion(_0) {
1679
+ return __async(this, arguments, function* ({
1680
+ completed,
1681
+ verifierUseVision,
1682
+ requestId,
1683
+ action,
1684
+ steps,
1685
+ model,
1686
+ domSettleTimeoutMs
1687
+ }) {
1688
+ yield this.waitForSettledDom(domSettleTimeoutMs);
1689
+ const { selectorMap } = yield this.stagehand.page.evaluate(() => {
1690
+ return window.processAllOfDom();
1691
+ });
1692
+ let actionCompleted = false;
1693
+ if (completed) {
1694
+ this.stagehand.log({
1695
+ category: "action",
1696
+ message: `Action marked as completed, Verifying if this is true...`,
1697
+ level: 1
1698
+ });
1699
+ let domElements = void 0;
1700
+ let fullpageScreenshot = void 0;
1701
+ if (verifierUseVision) {
1702
+ try {
1703
+ const screenshotService = new ScreenshotService(
1704
+ this.stagehand.page,
1705
+ selectorMap,
1706
+ this.verbose
1707
+ );
1708
+ fullpageScreenshot = yield screenshotService.getScreenshot(true, 15);
1709
+ } catch (e) {
1710
+ this.stagehand.log({
1711
+ category: "action",
1712
+ message: `Error getting full page screenshot: ${e.message}
1713
+ . Trying again...`,
1714
+ level: 1
1715
+ });
1716
+ const screenshotService = new ScreenshotService(
1717
+ this.stagehand.page,
1718
+ selectorMap,
1719
+ this.verbose
1720
+ );
1721
+ fullpageScreenshot = yield screenshotService.getScreenshot(true, 15);
1722
+ }
1723
+ } else {
1724
+ ({ outputString: domElements } = yield this.stagehand.page.evaluate(
1725
+ () => {
1726
+ return window.processAllOfDom();
1727
+ }
1728
+ ));
1729
+ }
1730
+ actionCompleted = yield verifyActCompletion({
1731
+ goal: action,
1732
+ steps,
1733
+ llmProvider: this.llmProvider,
1734
+ modelName: model,
1735
+ screenshot: fullpageScreenshot,
1736
+ domElements,
1737
+ logger: this.logger,
1738
+ requestId
1739
+ });
1740
+ this.stagehand.log({
1741
+ category: "action",
1742
+ message: `Action completion verification result: ${actionCompleted}`,
1743
+ level: 1
1744
+ });
1745
+ }
1746
+ return actionCompleted;
1747
+ });
1748
+ }
1749
+ _performPlaywrightMethod(method, args, xpath, domSettleTimeoutMs) {
1750
+ return __async(this, null, function* () {
1751
+ const locator = this.stagehand.page.locator(`xpath=${xpath}`).first();
1752
+ const initialUrl = this.stagehand.page.url();
1753
+ if (method === "scrollIntoView") {
1754
+ this.stagehand.log({
1755
+ category: "action",
1756
+ message: `Scrolling element into view`,
1757
+ level: 2
1758
+ });
1759
+ try {
1760
+ yield locator.evaluate((element) => {
1761
+ element.scrollIntoView({ behavior: "smooth", block: "center" });
1762
+ }).catch((e) => {
1763
+ this.stagehand.log({
1764
+ category: "action",
1765
+ message: `Error scrolling element into view: ${e.message}
1766
+ Trace: ${e.stack}`,
1767
+ level: 1
1768
+ });
1769
+ });
1770
+ } catch (e) {
1771
+ this.stagehand.log({
1772
+ category: "action",
1773
+ message: `Error scrolling element into view: ${e.message}
1774
+ Trace: ${e.stack}`,
1775
+ level: 1
1776
+ });
1777
+ throw new PlaywrightCommandException(e.message);
1778
+ }
1779
+ } else if (method === "fill" || method === "type") {
1780
+ try {
1781
+ yield locator.fill("");
1782
+ yield locator.click();
1783
+ const text = args[0];
1784
+ for (const char of text) {
1785
+ yield this.stagehand.page.keyboard.type(char, {
1786
+ delay: Math.random() * 50 + 25
1787
+ });
1788
+ }
1789
+ } catch (e) {
1790
+ this.logger({
1791
+ category: "action",
1792
+ message: `Error filling element: ${e.message}
1793
+ Trace: ${e.stack}`,
1794
+ level: 1
1795
+ });
1796
+ throw new PlaywrightCommandException(e.message);
1797
+ }
1798
+ } else if (method === "press") {
1799
+ try {
1800
+ const key = args[0];
1801
+ yield this.stagehand.page.keyboard.press(key);
1802
+ } catch (e) {
1803
+ this.logger({
1804
+ category: "action",
1805
+ message: `Error pressing key: ${e.message}
1806
+ Trace: ${e.stack}`,
1807
+ level: 1
1808
+ });
1809
+ throw new PlaywrightCommandException(e.message);
1810
+ }
1811
+ } else if (typeof locator[method] === "function") {
1812
+ this.logger({
1813
+ category: "action",
1814
+ message: `Page URL before action: ${this.stagehand.page.url()}`,
1815
+ level: 2
1816
+ });
1817
+ try {
1818
+ yield locator[method](...args);
1819
+ } catch (e) {
1820
+ this.logger({
1821
+ category: "action",
1822
+ message: `Error performing method ${method} with args ${JSON.stringify(
1823
+ args
1824
+ )}: ${e.message}
1825
+ Trace: ${e.stack}`,
1826
+ level: 1
1827
+ });
1828
+ throw new PlaywrightCommandException(e.message);
1829
+ }
1830
+ if (method === "click") {
1831
+ this.logger({
1832
+ category: "action",
1833
+ message: `Clicking element, checking for page navigation`,
1834
+ level: 1
1835
+ });
1836
+ const newOpenedTab = yield Promise.race([
1837
+ new Promise((resolve) => {
1838
+ this.stagehand.context.once("page", (page) => resolve(page));
1839
+ setTimeout(() => resolve(null), 1500);
1840
+ })
1841
+ ]);
1842
+ this.logger({
1843
+ category: "action",
1844
+ message: `Clicked element, ${newOpenedTab ? "opened a new tab" : "no new tabs opened"}`,
1845
+ level: 1
1846
+ });
1847
+ if (newOpenedTab) {
1848
+ this.logger({
1849
+ category: "action",
1850
+ message: `New page detected (new tab) with URL: ${newOpenedTab.url()}`,
1851
+ level: 1
1852
+ });
1853
+ yield newOpenedTab.close();
1854
+ yield this.stagehand.page.goto(newOpenedTab.url());
1855
+ yield this.stagehand.page.waitForLoadState("domcontentloaded");
1856
+ yield this.waitForSettledDom(domSettleTimeoutMs);
1857
+ }
1858
+ yield Promise.race([
1859
+ this.stagehand.page.waitForLoadState("networkidle"),
1860
+ new Promise((resolve) => setTimeout(resolve, 5e3))
1861
+ ]).catch((e) => {
1862
+ this.logger({
1863
+ category: "action",
1864
+ message: `Network idle timeout hit`,
1865
+ level: 1
1866
+ });
1867
+ });
1868
+ this.logger({
1869
+ category: "action",
1870
+ message: `Finished waiting for (possible) page navigation`,
1871
+ level: 1
1872
+ });
1873
+ if (this.stagehand.page.url() !== initialUrl) {
1874
+ this.logger({
1875
+ category: "action",
1876
+ message: `New page detected with URL: ${this.stagehand.page.url()}`,
1877
+ level: 1
1878
+ });
1879
+ }
1880
+ }
1881
+ } else {
1882
+ this.logger({
1883
+ category: "action",
1884
+ message: `Chosen method ${method} is invalid`,
1885
+ level: 1
1886
+ });
1887
+ throw new PlaywrightCommandMethodNotSupportedException(
1888
+ `Method ${method} not supported`
1889
+ );
1890
+ }
1891
+ yield this.waitForSettledDom(domSettleTimeoutMs);
1892
+ });
1893
+ }
1894
+ _getComponentString(locator) {
1895
+ return __async(this, null, function* () {
1896
+ return yield locator.evaluate((el) => {
1897
+ const clone = el.cloneNode(true);
1898
+ const attributesToKeep = [
1899
+ "type",
1900
+ "name",
1901
+ "placeholder",
1902
+ "aria-label",
1903
+ "role",
1904
+ "href",
1905
+ "title",
1906
+ "alt"
1907
+ ];
1908
+ Array.from(clone.attributes).forEach((attr) => {
1909
+ if (!attributesToKeep.includes(attr.name)) {
1910
+ clone.removeAttribute(attr.name);
1911
+ }
1912
+ });
1913
+ const outerHtml = clone.outerHTML;
1914
+ return outerHtml.trim().replace(/\s+/g, " ");
1915
+ });
1916
+ });
1917
+ }
1918
+ getElement(xpath, timeout = 5e3) {
1919
+ return __async(this, null, function* () {
1920
+ try {
1921
+ const element = this.stagehand.page.locator(`xpath=${xpath}`).first();
1922
+ yield element.waitFor({ state: "attached", timeout });
1923
+ return element;
1924
+ } catch (e) {
1925
+ this.logger({
1926
+ category: "action",
1927
+ message: `Element with XPath ${xpath} not found within ${timeout}ms.`,
1928
+ level: 1
1929
+ });
1930
+ return null;
1931
+ }
1932
+ });
1933
+ }
1934
+ _checkIfCachedStepIsValid_oneXpath(cachedStep) {
1935
+ return __async(this, null, function* () {
1936
+ this.logger({
1937
+ category: "action",
1938
+ message: `Checking if cached step is valid: ${cachedStep.xpath}, ${cachedStep.savedComponentString}`,
1939
+ level: 1
1940
+ });
1941
+ try {
1942
+ const locator = yield this.getElement(cachedStep.xpath);
1943
+ if (!locator) {
1944
+ this.logger({
1945
+ category: "action",
1946
+ message: `Locator not found for xpath: ${cachedStep.xpath}`,
1947
+ level: 1
1948
+ });
1949
+ return false;
1950
+ }
1951
+ this.logger({
1952
+ category: "action",
1953
+ message: `locator element: ${yield this._getComponentString(locator)}`,
1954
+ level: 1
1955
+ });
1956
+ let currentComponent = yield this._getComponentString(locator);
1957
+ this.logger({
1958
+ category: "action",
1959
+ message: `Current text: ${currentComponent}`,
1960
+ level: 1
1961
+ });
1962
+ if (!currentComponent || !cachedStep.savedComponentString) {
1963
+ this.logger({
1964
+ category: "action",
1965
+ message: `Current text or cached text is undefined`,
1966
+ level: 1
1967
+ });
1968
+ return false;
1969
+ }
1970
+ const normalizedCurrentText = currentComponent.trim().replace(/\s+/g, " ");
1971
+ const normalizedCachedText = cachedStep.savedComponentString.trim().replace(/\s+/g, " ");
1972
+ if (normalizedCurrentText !== normalizedCachedText) {
1973
+ this.logger({
1974
+ category: "action",
1975
+ message: `Current text and cached text do not match: ${normalizedCurrentText} !== ${normalizedCachedText}`,
1976
+ level: 1
1977
+ });
1978
+ return false;
1979
+ }
1980
+ return true;
1981
+ } catch (e) {
1982
+ this.logger({
1983
+ category: "action",
1984
+ message: `Error checking if cached step is valid: ${e.message}
1985
+ Trace: ${e.stack}`,
1986
+ level: 1
1987
+ });
1988
+ return false;
1989
+ }
1990
+ });
1991
+ }
1992
+ _getValidCachedStepXpath(cachedStep) {
1993
+ return __async(this, null, function* () {
1994
+ const reversedXpaths = [...cachedStep.xpaths].reverse();
1995
+ for (const xpath of reversedXpaths) {
1996
+ const isValid = yield this._checkIfCachedStepIsValid_oneXpath({
1997
+ xpath,
1998
+ savedComponentString: cachedStep.savedComponentString
1999
+ });
2000
+ if (isValid) {
2001
+ return xpath;
2002
+ }
2003
+ }
2004
+ return null;
2005
+ });
2006
+ }
2007
+ _runCachedActionIfAvailable(_0) {
2008
+ return __async(this, arguments, function* ({
2009
+ action,
2010
+ previousSelectors,
2011
+ requestId,
2012
+ steps,
2013
+ chunksSeen,
2014
+ modelName,
2015
+ useVision,
2016
+ verifierUseVision,
2017
+ retries,
2018
+ variables,
2019
+ model,
2020
+ domSettleTimeoutMs
2021
+ }) {
2022
+ const cacheObj = {
2023
+ url: this.stagehand.page.url(),
2024
+ action,
2025
+ previousSelectors,
2026
+ requestId
2027
+ };
2028
+ this.logger({
2029
+ category: "action",
2030
+ message: `Checking action cache for: ${JSON.stringify(cacheObj)}`,
2031
+ level: 1
2032
+ });
2033
+ const cachedStep = yield this.actionCache.getActionStep(cacheObj);
2034
+ if (!cachedStep) {
2035
+ this.logger({
2036
+ category: "action",
2037
+ message: `Action cache miss: ${JSON.stringify(cacheObj)}`,
2038
+ level: 1
2039
+ });
2040
+ return null;
2041
+ }
2042
+ this.logger({
2043
+ category: "action",
2044
+ message: `Action cache semi-hit: ${cachedStep.playwrightCommand.method} with args: ${JSON.stringify(
2045
+ cachedStep.playwrightCommand.args
2046
+ )}`,
2047
+ level: 1
2048
+ });
2049
+ try {
2050
+ const validXpath = yield this._getValidCachedStepXpath({
2051
+ xpaths: cachedStep.xpaths,
2052
+ savedComponentString: cachedStep.componentString
2053
+ });
2054
+ this.logger({
2055
+ category: "action",
2056
+ message: `Cached action step is valid: ${validXpath !== null}`,
2057
+ level: 1
2058
+ });
2059
+ if (!validXpath) {
2060
+ this.logger({
2061
+ category: "action",
2062
+ message: `Cached action step is invalid, removing...`,
2063
+ level: 1
2064
+ });
2065
+ yield this.actionCache.removeActionStep(cacheObj);
2066
+ return null;
2067
+ }
2068
+ this.logger({
2069
+ category: "action",
2070
+ message: `Action Cache Hit: ${cachedStep.playwrightCommand.method} with args: ${JSON.stringify(
2071
+ cachedStep.playwrightCommand.args
2072
+ )}`,
2073
+ level: 1
2074
+ });
2075
+ cachedStep.playwrightCommand.args = cachedStep.playwrightCommand.args.map(
2076
+ (arg) => {
2077
+ return fillInVariables(arg, variables);
2078
+ }
2079
+ );
2080
+ yield this._performPlaywrightMethod(
2081
+ cachedStep.playwrightCommand.method,
2082
+ cachedStep.playwrightCommand.args,
2083
+ validXpath,
2084
+ domSettleTimeoutMs
2085
+ );
2086
+ steps = steps + cachedStep.newStepString;
2087
+ const { outputString, selectorMap } = yield this.stagehand.page.evaluate(
2088
+ ({ chunksSeen: chunksSeen2 }) => {
2089
+ return window.processDom(chunksSeen2);
2090
+ },
2091
+ { chunksSeen }
2092
+ );
2093
+ if (cachedStep.completed) {
2094
+ let actionCompleted = yield this._verifyActionCompletion({
2095
+ completed: true,
2096
+ verifierUseVision,
2097
+ model,
2098
+ steps,
2099
+ requestId,
2100
+ action,
2101
+ domSettleTimeoutMs
2102
+ });
2103
+ this.logger({
2104
+ category: "action",
2105
+ message: `Action completion verification result from cache: ${actionCompleted}`,
2106
+ level: 1
2107
+ });
2108
+ if (actionCompleted) {
2109
+ return {
2110
+ success: true,
2111
+ message: "Action completed successfully using cached step",
2112
+ action
2113
+ };
2114
+ }
2115
+ }
2116
+ return this.act({
2117
+ action,
2118
+ steps,
2119
+ chunksSeen,
2120
+ modelName,
2121
+ useVision,
2122
+ verifierUseVision,
2123
+ retries,
2124
+ requestId,
2125
+ variables,
2126
+ previousSelectors: [...previousSelectors, cachedStep.xpaths[0]],
2127
+ skipActionCacheForThisStep: false,
2128
+ domSettleTimeoutMs
2129
+ });
2130
+ } catch (exception) {
2131
+ this.logger({
2132
+ category: "action",
2133
+ message: `Error performing cached action step: ${exception.message}
2134
+ Trace: ${exception.stack}`,
2135
+ level: 1
2136
+ });
2137
+ yield this.actionCache.removeActionStep(cacheObj);
2138
+ return null;
2139
+ }
2140
+ });
2141
+ }
2142
+ act(_0) {
2143
+ return __async(this, arguments, function* ({
2144
+ action,
2145
+ steps = "",
2146
+ chunksSeen,
2147
+ modelName,
2148
+ useVision,
2149
+ verifierUseVision,
2150
+ retries = 0,
2151
+ requestId,
2152
+ variables,
2153
+ previousSelectors,
2154
+ skipActionCacheForThisStep = false,
2155
+ domSettleTimeoutMs
2156
+ }) {
2157
+ var _a;
2158
+ try {
2159
+ yield this.waitForSettledDom(domSettleTimeoutMs);
2160
+ yield this.startDomDebug();
2161
+ const model = modelName != null ? modelName : this.defaultModelName;
2162
+ if (this.enableCaching && !skipActionCacheForThisStep) {
2163
+ const response2 = yield this._runCachedActionIfAvailable({
2164
+ action,
2165
+ previousSelectors,
2166
+ requestId,
2167
+ steps,
2168
+ chunksSeen,
2169
+ modelName: model,
2170
+ useVision,
2171
+ verifierUseVision,
2172
+ retries,
2173
+ variables,
2174
+ model,
2175
+ domSettleTimeoutMs
2176
+ });
2177
+ if (response2 !== null) {
2178
+ return response2;
2179
+ } else {
2180
+ return this.act({
2181
+ action,
2182
+ steps,
2183
+ chunksSeen,
2184
+ modelName,
2185
+ useVision,
2186
+ verifierUseVision,
2187
+ retries,
2188
+ requestId,
2189
+ variables,
2190
+ previousSelectors,
2191
+ skipActionCacheForThisStep: true,
2192
+ domSettleTimeoutMs
2193
+ });
2194
+ }
2195
+ }
2196
+ if (!modelsWithVision.includes(model) && (useVision !== false || verifierUseVision)) {
2197
+ this.logger({
2198
+ category: "action",
2199
+ message: `${model} does not support vision, but useVision was set to ${useVision}. Defaulting to false.`,
2200
+ level: 1
2201
+ });
2202
+ useVision = false;
2203
+ verifierUseVision = false;
2204
+ }
2205
+ this.logger({
2206
+ category: "action",
2207
+ message: `Running / Continuing action: ${action} on page: ${this.stagehand.page.url()}`,
2208
+ level: 2
2209
+ });
2210
+ this.logger({
2211
+ category: "action",
2212
+ message: `Processing DOM...`,
2213
+ level: 2
2214
+ });
2215
+ const { outputString, selectorMap, chunk, chunks } = yield this.stagehand.page.evaluate(
2216
+ ({ chunksSeen: chunksSeen2 }) => {
2217
+ return window.processDom(chunksSeen2);
2218
+ },
2219
+ { chunksSeen }
2220
+ );
2221
+ this.logger({
2222
+ category: "action",
2223
+ message: `Looking at chunk ${chunk}. Chunks left: ${chunks.length - chunksSeen.length}`,
2224
+ level: 1
2225
+ });
2226
+ let annotatedScreenshot;
2227
+ if (useVision === true) {
2228
+ if (!modelsWithVision.includes(model)) {
2229
+ this.logger({
2230
+ category: "action",
2231
+ message: `${model} does not support vision. Skipping vision processing.`,
2232
+ level: 1
2233
+ });
2234
+ } else {
2235
+ const screenshotService = new ScreenshotService(
2236
+ this.stagehand.page,
2237
+ selectorMap,
2238
+ this.verbose
2239
+ );
2240
+ annotatedScreenshot = yield screenshotService.getAnnotatedScreenshot(false);
2241
+ }
2242
+ }
2243
+ const response = yield act({
2244
+ action,
2245
+ domElements: outputString,
2246
+ steps,
2247
+ llmProvider: this.llmProvider,
2248
+ modelName: model,
2249
+ screenshot: annotatedScreenshot,
2250
+ logger: this.logger,
2251
+ requestId,
2252
+ variables
2253
+ });
2254
+ this.logger({
2255
+ category: "action",
2256
+ message: `Received response from LLM: ${JSON.stringify(response)}`,
2257
+ level: 1
2258
+ });
2259
+ yield this.cleanupDomDebug();
2260
+ if (!response) {
2261
+ if (chunksSeen.length + 1 < chunks.length) {
2262
+ chunksSeen.push(chunk);
2263
+ this.logger({
2264
+ category: "action",
2265
+ message: `No action found in current chunk. Chunks seen: ${chunksSeen.length}.`,
2266
+ level: 1
2267
+ });
2268
+ return this.act({
2269
+ action,
2270
+ steps: steps + (!steps.endsWith("\n") ? "\n" : "") + "## Step: Scrolled to another section\n",
2271
+ chunksSeen,
2272
+ modelName,
2273
+ useVision,
2274
+ verifierUseVision,
2275
+ requestId,
2276
+ variables,
2277
+ previousSelectors,
2278
+ skipActionCacheForThisStep,
2279
+ domSettleTimeoutMs
2280
+ });
2281
+ } else if (useVision === "fallback") {
2282
+ this.logger({
2283
+ category: "action",
2284
+ message: `Switching to vision-based processing`,
2285
+ level: 1
2286
+ });
2287
+ yield this.stagehand.page.evaluate(() => window.scrollToHeight(0));
2288
+ return yield this.act({
2289
+ action,
2290
+ steps,
2291
+ chunksSeen,
2292
+ modelName,
2293
+ useVision: true,
2294
+ verifierUseVision,
2295
+ requestId,
2296
+ variables,
2297
+ previousSelectors,
2298
+ skipActionCacheForThisStep,
2299
+ domSettleTimeoutMs
2300
+ });
2301
+ } else {
2302
+ if (this.enableCaching) {
2303
+ this.llmProvider.cleanRequestCache(requestId);
2304
+ this.actionCache.deleteCacheForRequestId(requestId);
2305
+ }
2306
+ return {
2307
+ success: false,
2308
+ message: `Action was not able to be completed.`,
2309
+ action
2310
+ };
2311
+ }
2312
+ }
2313
+ const elementId = response["element"];
2314
+ const xpaths = selectorMap[elementId];
2315
+ const method = response["method"];
2316
+ const args = response["args"];
2317
+ const elementLines = outputString.split("\n");
2318
+ const elementText = ((_a = elementLines.find((line) => line.startsWith(`${elementId}:`))) == null ? void 0 : _a.split(":")[1]) || "Element not found";
2319
+ this.logger({
2320
+ category: "action",
2321
+ message: `Executing method: ${method} on element: ${elementId} (xpaths: ${xpaths.join(
2322
+ ", "
2323
+ )}) with args: ${JSON.stringify(args)}`,
2324
+ level: 1
2325
+ });
2326
+ try {
2327
+ const initialUrl = this.stagehand.page.url();
2328
+ const locator = this.stagehand.page.locator(`xpath=${xpaths[0]}`).first();
2329
+ const originalUrl = this.stagehand.page.url();
2330
+ const componentString = yield this._getComponentString(locator);
2331
+ const responseArgs = [...args];
2332
+ if (variables) {
2333
+ responseArgs.forEach((arg, index) => {
2334
+ if (typeof arg === "string") {
2335
+ args[index] = fillInVariables(arg, variables);
2336
+ }
2337
+ });
2338
+ }
2339
+ yield this._performPlaywrightMethod(
2340
+ method,
2341
+ args,
2342
+ xpaths[0],
2343
+ domSettleTimeoutMs
2344
+ );
2345
+ const newStepString = (!steps.endsWith("\n") ? "\n" : "") + `## Step: ${response.step}
2346
+ Element: ${elementText}
2347
+ Action: ${response.method}
2348
+ Reasoning: ${response.why}
2349
+ `;
2350
+ steps += newStepString;
2351
+ if (this.enableCaching) {
2352
+ this.actionCache.addActionStep({
2353
+ action,
2354
+ url: originalUrl,
2355
+ previousSelectors,
2356
+ playwrightCommand: {
2357
+ method,
2358
+ args: responseArgs
2359
+ },
2360
+ componentString,
2361
+ requestId,
2362
+ xpaths,
2363
+ newStepString,
2364
+ completed: response.completed
2365
+ }).catch((e) => {
2366
+ this.logger({
2367
+ category: "action",
2368
+ message: `Error adding action step to cache: ${e.message}
2369
+ Trace: ${e.stack}`,
2370
+ level: 1
2371
+ });
2372
+ });
2373
+ }
2374
+ if (this.stagehand.page.url() !== initialUrl) {
2375
+ steps += ` Result (Important): Page URL changed from ${initialUrl} to ${this.stagehand.page.url()}
2376
+
2377
+ `;
2378
+ }
2379
+ const actionCompleted = yield this._verifyActionCompletion({
2380
+ completed: response.completed,
2381
+ verifierUseVision,
2382
+ requestId,
2383
+ action,
2384
+ steps,
2385
+ model,
2386
+ domSettleTimeoutMs
2387
+ });
2388
+ if (!actionCompleted) {
2389
+ this.logger({
2390
+ category: "action",
2391
+ message: `Continuing to next action step`,
2392
+ level: 1
2393
+ });
2394
+ return this.act({
2395
+ action,
2396
+ steps,
2397
+ modelName,
2398
+ chunksSeen,
2399
+ useVision,
2400
+ verifierUseVision,
2401
+ requestId,
2402
+ variables,
2403
+ previousSelectors: [...previousSelectors, xpaths[0]],
2404
+ skipActionCacheForThisStep: false,
2405
+ domSettleTimeoutMs
2406
+ });
2407
+ } else {
2408
+ this.logger({
2409
+ category: "action",
2410
+ message: `Action completed successfully`,
2411
+ level: 1
2412
+ });
2413
+ yield this._recordAction(action, response.step);
2414
+ return {
2415
+ success: true,
2416
+ message: `Action completed successfully: ${steps}${response.step}`,
2417
+ action
2418
+ };
2419
+ }
2420
+ } catch (error) {
2421
+ this.logger({
2422
+ category: "action",
2423
+ message: `Error performing action - D (Retries: ${retries}): ${error.message}
2424
+ Trace: ${error.stack}`,
2425
+ level: 1
2426
+ });
2427
+ if (retries < 2) {
2428
+ return this.act({
2429
+ action,
2430
+ steps,
2431
+ modelName,
2432
+ useVision,
2433
+ verifierUseVision,
2434
+ retries: retries + 1,
2435
+ chunksSeen,
2436
+ requestId,
2437
+ variables,
2438
+ previousSelectors,
2439
+ skipActionCacheForThisStep,
2440
+ domSettleTimeoutMs
2441
+ });
2442
+ }
2443
+ yield this._recordAction(action, "");
2444
+ if (this.enableCaching) {
2445
+ this.llmProvider.cleanRequestCache(requestId);
2446
+ this.actionCache.deleteCacheForRequestId(requestId);
2447
+ }
2448
+ return {
2449
+ success: false,
2450
+ message: `Error performing action - A: ${error.message}`,
2451
+ action
2452
+ };
2453
+ }
2454
+ } catch (error) {
2455
+ this.logger({
2456
+ category: "action",
2457
+ message: `Error performing action - B: ${error.message}
2458
+ Trace: ${error.stack}`,
2459
+ level: 1
2460
+ });
2461
+ if (this.enableCaching) {
2462
+ this.llmProvider.cleanRequestCache(requestId);
2463
+ this.actionCache.deleteCacheForRequestId(requestId);
2464
+ }
2465
+ return {
2466
+ success: false,
2467
+ message: `Error performing action - C: ${error.message}`,
2468
+ action
2469
+ };
2470
+ }
2471
+ });
2472
+ }
2473
+ };
2474
+
2475
+ // lib/index.ts
2476
+ require("dotenv").config({ path: ".env" });
2477
+ function getBrowser(apiKey, projectId, env = "LOCAL", headless = false, logger, browserbaseSessionCreateParams, browserbaseResumeSessionID) {
2478
+ return __async(this, null, function* () {
2479
+ if (env === "BROWSERBASE") {
2480
+ if (!apiKey) {
2481
+ logger({
2482
+ category: "Init",
2483
+ message: "BROWSERBASE_API_KEY is required to use BROWSERBASE env. Defaulting to LOCAL.",
2484
+ level: 0
2485
+ });
2486
+ env = "LOCAL";
2487
+ }
2488
+ if (!projectId) {
2489
+ logger({
1335
2490
  category: "Init",
1336
2491
  message: "BROWSERBASE_PROJECT_ID is required for some Browserbase features that may not work without it.",
1337
2492
  level: 1
@@ -1507,20 +2662,31 @@ var Stagehand = class {
1507
2662
  this.llmProvider = llmProvider || new LLMProvider(this.logger, this.enableCaching);
1508
2663
  this.env = env;
1509
2664
  this.observations = {};
1510
- this.apiKey = apiKey;
1511
- this.projectId = projectId;
1512
- this.actions = {};
2665
+ this.apiKey = apiKey != null ? apiKey : process.env.BROWSERBASE_API_KEY;
2666
+ this.projectId = projectId != null ? projectId : process.env.BROWSERBASE_PROJECT_ID;
1513
2667
  this.verbose = verbose != null ? verbose : 0;
1514
2668
  this.debugDom = debugDom != null ? debugDom : false;
1515
2669
  this.defaultModelName = "gpt-4o";
1516
2670
  this.domSettleTimeoutMs = domSettleTimeoutMs != null ? domSettleTimeoutMs : 3e4;
1517
2671
  this.headless = headless != null ? headless : false;
1518
2672
  this.browserBaseSessionCreateParams = browserBaseSessionCreateParams;
2673
+ this.actHandler = new StagehandActHandler({
2674
+ stagehand: this,
2675
+ verbose: this.verbose,
2676
+ llmProvider: this.llmProvider,
2677
+ enableCaching: this.enableCaching,
2678
+ logger: this.logger,
2679
+ waitForSettledDom: this._waitForSettledDom.bind(this),
2680
+ defaultModelName: this.defaultModelName,
2681
+ startDomDebug: this.startDomDebug.bind(this),
2682
+ cleanupDomDebug: this.cleanupDomDebug.bind(this)
2683
+ });
1519
2684
  this.browserbaseResumeSessionID = browserbaseResumeSessionID;
1520
2685
  }
1521
2686
  init() {
1522
2687
  return __async(this, arguments, function* ({
1523
- modelName = "gpt-4o"
2688
+ modelName = "gpt-4o",
2689
+ domSettleTimeoutMs
1524
2690
  } = {}) {
1525
2691
  const { context, debugUrl, sessionUrl } = yield getBrowser(
1526
2692
  this.apiKey,
@@ -1536,7 +2702,10 @@ var Stagehand = class {
1536
2702
  });
1537
2703
  this.context = context;
1538
2704
  this.page = context.pages()[0];
2705
+ yield this.page.waitForLoadState("domcontentloaded");
2706
+ yield this._waitForSettledDom();
1539
2707
  this.defaultModelName = modelName;
2708
+ this.domSettleTimeoutMs = domSettleTimeoutMs != null ? domSettleTimeoutMs : this.domSettleTimeoutMs;
1540
2709
  const originalGoto = this.page.goto.bind(this.page);
1541
2710
  this.page.goto = (url, options) => __async(this, null, function* () {
1542
2711
  const result = yield originalGoto(url, options);
@@ -1547,14 +2716,33 @@ var Stagehand = class {
1547
2716
  if (this.headless) {
1548
2717
  yield this.page.setViewportSize({ width: 1280, height: 720 });
1549
2718
  }
1550
- yield this.page.addInitScript({
1551
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js")
2719
+ yield this.context.addInitScript({
2720
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "xpathUtils.js"),
2721
+ content: import_fs2.default.readFileSync(
2722
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "xpathUtils.js"),
2723
+ "utf8"
2724
+ )
2725
+ });
2726
+ yield this.context.addInitScript({
2727
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js"),
2728
+ content: import_fs2.default.readFileSync(
2729
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js"),
2730
+ "utf8"
2731
+ )
1552
2732
  });
1553
- yield this.page.addInitScript({
1554
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js")
2733
+ yield this.context.addInitScript({
2734
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js"),
2735
+ content: import_fs2.default.readFileSync(
2736
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js"),
2737
+ "utf8"
2738
+ )
1555
2739
  });
1556
- yield this.page.addInitScript({
1557
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js")
2740
+ yield this.context.addInitScript({
2741
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js"),
2742
+ content: import_fs2.default.readFileSync(
2743
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js"),
2744
+ "utf8"
2745
+ )
1558
2746
  });
1559
2747
  return { debugUrl, sessionUrl };
1560
2748
  });
@@ -1574,14 +2762,33 @@ var Stagehand = class {
1574
2762
  if (this.headless) {
1575
2763
  yield this.page.setViewportSize({ width: 1280, height: 720 });
1576
2764
  }
1577
- yield this.page.addInitScript({
1578
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js")
2765
+ yield this.context.addInitScript({
2766
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "xpathUtils.js"),
2767
+ content: import_fs2.default.readFileSync(
2768
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "xpathUtils.js"),
2769
+ "utf8"
2770
+ )
2771
+ });
2772
+ yield this.context.addInitScript({
2773
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js"),
2774
+ content: import_fs2.default.readFileSync(
2775
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js"),
2776
+ "utf8"
2777
+ )
1579
2778
  });
1580
- yield this.page.addInitScript({
1581
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js")
2779
+ yield this.context.addInitScript({
2780
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js"),
2781
+ content: import_fs2.default.readFileSync(
2782
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js"),
2783
+ "utf8"
2784
+ )
1582
2785
  });
1583
- yield this.page.addInitScript({
1584
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js")
2786
+ yield this.context.addInitScript({
2787
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js"),
2788
+ content: import_fs2.default.readFileSync(
2789
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js"),
2790
+ "utf8"
2791
+ )
1585
2792
  });
1586
2793
  return { context: this.context };
1587
2794
  });
@@ -1720,24 +2927,13 @@ Trace: ${e.stack}`,
1720
2927
  }
1721
2928
  });
1722
2929
  }
1723
- // Recording
1724
- _generateId(operation) {
1725
- return import_crypto.default.createHash("sha256").update(operation).digest("hex");
1726
- }
1727
2930
  _recordObservation(instruction, result) {
1728
2931
  return __async(this, null, function* () {
1729
- const id = this._generateId(instruction);
2932
+ const id = generateId(instruction);
1730
2933
  this.observations[id] = { result, instruction };
1731
2934
  return id;
1732
2935
  });
1733
2936
  }
1734
- _recordAction(action, result) {
1735
- return __async(this, null, function* () {
1736
- const id = this._generateId(action);
1737
- this.actions[id] = { result, action };
1738
- return id;
1739
- });
1740
- }
1741
2937
  // Main methods
1742
2938
  _extract(_0) {
1743
2939
  return __async(this, arguments, function* ({
@@ -1826,116 +3022,25 @@ Trace: ${e.stack}`,
1826
3022
  domSettleTimeoutMs
1827
3023
  }) {
1828
3024
  if (!instruction) {
1829
- instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`;
1830
- }
1831
- const model = modelName != null ? modelName : this.defaultModelName;
1832
- this.log({
1833
- category: "observation",
1834
- message: `starting observation: ${instruction}`,
1835
- level: 1
1836
- });
1837
- yield this._waitForSettledDom(domSettleTimeoutMs);
1838
- yield this.startDomDebug();
1839
- let { outputString, selectorMap } = yield this.page.evaluate(
1840
- (fullPage2) => fullPage2 ? window.processAllOfDom() : window.processDom([]),
1841
- fullPage
1842
- );
1843
- let annotatedScreenshot;
1844
- if (useVision === true) {
1845
- if (!modelsWithVision.includes(model)) {
1846
- this.log({
1847
- category: "observation",
1848
- message: `${model} does not support vision. Skipping vision processing.`,
1849
- level: 1
1850
- });
1851
- } else {
1852
- const screenshotService = new ScreenshotService(
1853
- this.page,
1854
- selectorMap,
1855
- this.verbose
1856
- );
1857
- annotatedScreenshot = yield screenshotService.getAnnotatedScreenshot(fullPage);
1858
- outputString = "n/a. use the image to find the elements.";
1859
- }
1860
- }
1861
- const observationResponse = yield observe({
1862
- instruction,
1863
- domElements: outputString,
1864
- llmProvider: this.llmProvider,
1865
- modelName: modelName || this.defaultModelName,
1866
- image: annotatedScreenshot,
1867
- requestId
1868
- });
1869
- const elementsWithSelectors = observationResponse.elements.map(
1870
- (element) => {
1871
- const _a = element, { elementId } = _a, rest = __objRest(_a, ["elementId"]);
1872
- return __spreadProps(__spreadValues({}, rest), {
1873
- selector: `xpath=${selectorMap[elementId]}`
1874
- });
1875
- }
1876
- );
1877
- yield this.cleanupDomDebug();
1878
- this._recordObservation(instruction, elementsWithSelectors);
1879
- this.log({
1880
- category: "observation",
1881
- message: `found element ${JSON.stringify(elementsWithSelectors)}`,
1882
- level: 1
1883
- });
1884
- yield this._recordObservation(instruction, elementsWithSelectors);
1885
- return elementsWithSelectors;
1886
- });
1887
- }
1888
- _act(_0) {
1889
- return __async(this, arguments, function* ({
1890
- action,
1891
- steps = "",
1892
- chunksSeen,
1893
- modelName,
1894
- useVision,
1895
- verifierUseVision,
1896
- retries = 0,
1897
- requestId,
1898
- domSettleTimeoutMs
1899
- }) {
1900
- var _a;
1901
- const model = modelName != null ? modelName : this.defaultModelName;
1902
- if (!modelsWithVision.includes(model) && (useVision !== false || verifierUseVision)) {
1903
- this.log({
1904
- category: "action",
1905
- message: `${model} does not support vision, but useVision was set to ${useVision}. Defaulting to false.`,
1906
- level: 1
1907
- });
1908
- useVision = false;
1909
- verifierUseVision = false;
3025
+ instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`;
1910
3026
  }
3027
+ const model = modelName != null ? modelName : this.defaultModelName;
1911
3028
  this.log({
1912
- category: "action",
1913
- message: `Running / Continuing action: ${action} on page: ${this.page.url()}`,
1914
- level: 2
3029
+ category: "observation",
3030
+ message: `starting observation: ${instruction}`,
3031
+ level: 1
1915
3032
  });
1916
3033
  yield this._waitForSettledDom(domSettleTimeoutMs);
1917
3034
  yield this.startDomDebug();
1918
- this.log({
1919
- category: "action",
1920
- message: `Processing DOM...`,
1921
- level: 2
1922
- });
1923
- const { outputString, selectorMap, chunk, chunks } = yield this.page.evaluate(
1924
- ({ chunksSeen: chunksSeen2 }) => {
1925
- return window.processDom(chunksSeen2);
1926
- },
1927
- { chunksSeen }
3035
+ let { outputString, selectorMap } = yield this.page.evaluate(
3036
+ (fullPage2) => fullPage2 ? window.processAllOfDom() : window.processDom([]),
3037
+ fullPage
1928
3038
  );
1929
- this.log({
1930
- category: "action",
1931
- message: `Looking at chunk ${chunk}. Chunks left: ${chunks.length - chunksSeen.length}`,
1932
- level: 1
1933
- });
1934
3039
  let annotatedScreenshot;
1935
3040
  if (useVision === true) {
1936
3041
  if (!modelsWithVision.includes(model)) {
1937
3042
  this.log({
1938
- category: "action",
3043
+ category: "observation",
1939
3044
  message: `${model} does not support vision. Skipping vision processing.`,
1940
3045
  level: 1
1941
3046
  });
@@ -1945,419 +3050,35 @@ Trace: ${e.stack}`,
1945
3050
  selectorMap,
1946
3051
  this.verbose
1947
3052
  );
1948
- annotatedScreenshot = yield screenshotService.getAnnotatedScreenshot(false);
3053
+ annotatedScreenshot = yield screenshotService.getAnnotatedScreenshot(fullPage);
3054
+ outputString = "n/a. use the image to find the elements.";
1949
3055
  }
1950
3056
  }
1951
- const response = yield act({
1952
- action,
3057
+ const observationResponse = yield observe({
3058
+ instruction,
1953
3059
  domElements: outputString,
1954
- steps,
1955
3060
  llmProvider: this.llmProvider,
1956
- modelName: model,
1957
- screenshot: annotatedScreenshot,
1958
- logger: this.logger,
3061
+ modelName: modelName || this.defaultModelName,
3062
+ image: annotatedScreenshot,
1959
3063
  requestId
1960
3064
  });
1961
- this.log({
1962
- category: "action",
1963
- message: `Received response from LLM: ${JSON.stringify(response)}`,
1964
- level: 1
1965
- });
1966
- yield this.cleanupDomDebug();
1967
- if (!response) {
1968
- if (chunksSeen.length + 1 < chunks.length) {
1969
- chunksSeen.push(chunk);
1970
- this.log({
1971
- category: "action",
1972
- message: `No action found in current chunk. Chunks seen: ${chunksSeen.length}.`,
1973
- level: 1
1974
- });
1975
- return this._act({
1976
- action,
1977
- steps: steps + (!steps.endsWith("\n") ? "\n" : "") + "## Step: Scrolled to another section\n",
1978
- chunksSeen,
1979
- modelName,
1980
- useVision,
1981
- verifierUseVision,
1982
- requestId,
1983
- domSettleTimeoutMs
1984
- });
1985
- } else if (useVision === "fallback") {
1986
- this.log({
1987
- category: "action",
1988
- message: `Switching to vision-based processing`,
1989
- level: 1
1990
- });
1991
- yield this.page.evaluate(() => window.scrollToHeight(0));
1992
- return yield this._act({
1993
- action,
1994
- steps,
1995
- chunksSeen,
1996
- modelName,
1997
- useVision: true,
1998
- verifierUseVision,
1999
- requestId,
2000
- domSettleTimeoutMs
3065
+ const elementsWithSelectors = observationResponse.elements.map(
3066
+ (element) => {
3067
+ const _a = element, { elementId } = _a, rest = __objRest(_a, ["elementId"]);
3068
+ return __spreadProps(__spreadValues({}, rest), {
3069
+ selector: `xpath=${selectorMap[elementId][0]}`
2001
3070
  });
2002
- } else {
2003
- if (this.enableCaching) {
2004
- this.llmProvider.cleanRequestCache(requestId);
2005
- }
2006
- return {
2007
- success: false,
2008
- message: `Action was not able to be completed.`,
2009
- action
2010
- };
2011
3071
  }
2012
- }
2013
- const elementId = response["element"];
2014
- const xpath = selectorMap[elementId];
2015
- const method = response["method"];
2016
- const args = response["args"];
2017
- const elementLines = outputString.split("\n");
2018
- const elementText = ((_a = elementLines.find((line) => line.startsWith(`${elementId}:`))) == null ? void 0 : _a.split(":")[1]) || "Element not found";
3072
+ );
3073
+ yield this.cleanupDomDebug();
3074
+ this._recordObservation(instruction, elementsWithSelectors);
2019
3075
  this.log({
2020
- category: "action",
2021
- message: `Executing method: ${method} on element: ${elementId} (xpath: ${xpath}) with args: ${JSON.stringify(
2022
- args
2023
- )}`,
3076
+ category: "observation",
3077
+ message: `found element ${JSON.stringify(elementsWithSelectors)}`,
2024
3078
  level: 1
2025
3079
  });
2026
- let urlChangeString = "";
2027
- const locator = this.page.locator(`xpath=${xpath}`).first();
2028
- try {
2029
- const initialUrl = this.page.url();
2030
- if (method === "scrollIntoView") {
2031
- this.log({
2032
- category: "action",
2033
- message: `Scrolling element into view`,
2034
- level: 2
2035
- });
2036
- try {
2037
- yield locator.evaluate((element) => {
2038
- element.scrollIntoView({ behavior: "smooth", block: "center" });
2039
- }).catch((e) => {
2040
- this.log({
2041
- category: "action",
2042
- message: `Error scrolling element into view: ${e.message}
2043
- Trace: ${e.stack}`,
2044
- level: 1
2045
- });
2046
- });
2047
- } catch (e) {
2048
- this.log({
2049
- category: "action",
2050
- message: `Error scrolling element into view (Retries ${retries}): ${e.message}
2051
- Trace: ${e.stack}`,
2052
- level: 1
2053
- });
2054
- if (retries < 2) {
2055
- return this._act({
2056
- action,
2057
- steps,
2058
- modelName,
2059
- useVision,
2060
- verifierUseVision,
2061
- retries: retries + 1,
2062
- chunksSeen,
2063
- requestId,
2064
- domSettleTimeoutMs
2065
- });
2066
- }
2067
- }
2068
- } else if (method === "fill" || method === "type") {
2069
- try {
2070
- yield locator.fill("");
2071
- yield locator.click();
2072
- const text = args[0];
2073
- for (const char of text) {
2074
- yield this.page.keyboard.type(char, {
2075
- delay: Math.random() * 50 + 25
2076
- });
2077
- }
2078
- } catch (e) {
2079
- this.log({
2080
- category: "action",
2081
- message: `Error filling element (Retries ${retries}): ${e.message}
2082
- Trace: ${e.stack}`,
2083
- level: 1
2084
- });
2085
- if (retries < 2) {
2086
- return this._act({
2087
- action,
2088
- steps,
2089
- modelName,
2090
- useVision,
2091
- verifierUseVision,
2092
- retries: retries + 1,
2093
- chunksSeen,
2094
- requestId,
2095
- domSettleTimeoutMs
2096
- });
2097
- }
2098
- }
2099
- } else if (method === "press") {
2100
- try {
2101
- const key = args[0];
2102
- yield this.page.keyboard.press(key);
2103
- } catch (e) {
2104
- this.log({
2105
- category: "action",
2106
- message: `Error pressing key (Retries ${retries}): ${e.message}
2107
- Trace: ${e.stack}`,
2108
- level: 1
2109
- });
2110
- if (retries < 2) {
2111
- return this._act({
2112
- action,
2113
- steps,
2114
- modelName,
2115
- useVision,
2116
- verifierUseVision,
2117
- retries: retries + 1,
2118
- chunksSeen,
2119
- requestId,
2120
- domSettleTimeoutMs
2121
- });
2122
- }
2123
- }
2124
- } else if (typeof locator[method] === "function") {
2125
- this.log({
2126
- category: "action",
2127
- message: `Page URL before action: ${this.page.url()}`,
2128
- level: 2
2129
- });
2130
- try {
2131
- yield locator[method](...args);
2132
- } catch (e) {
2133
- this.log({
2134
- category: "action",
2135
- message: `Error performing method ${method} with args ${JSON.stringify(
2136
- args
2137
- )} (Retries: ${retries}): ${e.message}
2138
- Trace: ${e.stack}`,
2139
- level: 1
2140
- });
2141
- if (retries < 2) {
2142
- return this._act({
2143
- action,
2144
- steps,
2145
- modelName,
2146
- useVision,
2147
- verifierUseVision,
2148
- retries: retries + 1,
2149
- chunksSeen,
2150
- requestId,
2151
- domSettleTimeoutMs
2152
- });
2153
- }
2154
- }
2155
- if (method === "click") {
2156
- this.log({
2157
- category: "action",
2158
- message: `Clicking element, checking for page navigation`,
2159
- level: 1
2160
- });
2161
- const newOpenedTab = yield Promise.race([
2162
- new Promise((resolve) => {
2163
- this.context.once("page", (page) => resolve(page));
2164
- setTimeout(() => resolve(null), 1500);
2165
- })
2166
- ]);
2167
- this.log({
2168
- category: "action",
2169
- message: `Clicked element, ${newOpenedTab ? "opened a new tab" : "no new tabs opened"}`,
2170
- level: 1
2171
- });
2172
- if (newOpenedTab) {
2173
- this.log({
2174
- category: "action",
2175
- message: `New page detected (new tab) with URL: ${newOpenedTab.url()}`,
2176
- level: 1
2177
- });
2178
- yield newOpenedTab.close();
2179
- yield this.page.goto(newOpenedTab.url());
2180
- yield this.page.waitForLoadState("domcontentloaded");
2181
- yield this._waitForSettledDom(domSettleTimeoutMs);
2182
- }
2183
- yield Promise.race([
2184
- this.page.waitForLoadState("networkidle"),
2185
- new Promise((resolve) => setTimeout(resolve, 5e3))
2186
- ]).catch((e) => {
2187
- this.log({
2188
- category: "action",
2189
- message: `Network idle timeout hit`,
2190
- level: 1
2191
- });
2192
- });
2193
- this.log({
2194
- category: "action",
2195
- message: `Finished waiting for (possible) page navigation`,
2196
- level: 1
2197
- });
2198
- if (this.page.url() !== initialUrl) {
2199
- this.log({
2200
- category: "action",
2201
- message: `New page detected with URL: ${this.page.url()}`,
2202
- level: 1
2203
- });
2204
- }
2205
- }
2206
- } else {
2207
- this.log({
2208
- category: "action",
2209
- message: `Chosen method ${method} is invalid`,
2210
- level: 1
2211
- });
2212
- if (retries < 2) {
2213
- return this._act({
2214
- action,
2215
- steps,
2216
- modelName: model,
2217
- useVision,
2218
- verifierUseVision,
2219
- retries: retries + 1,
2220
- chunksSeen,
2221
- requestId
2222
- });
2223
- } else {
2224
- if (this.enableCaching) {
2225
- this.llmProvider.cleanRequestCache(requestId);
2226
- }
2227
- return {
2228
- success: false,
2229
- message: `Internal error: Chosen method ${method} is invalid`,
2230
- action
2231
- };
2232
- }
2233
- }
2234
- let newSteps = steps + (!steps.endsWith("\n") ? "\n" : "") + `## Step: ${response.step}
2235
- Element: ${elementText}
2236
- Action: ${response.method}
2237
- Reasoning: ${response.why}
2238
- `;
2239
- if (urlChangeString) {
2240
- newSteps += ` Result (Important): ${urlChangeString}
2241
-
2242
- `;
2243
- }
2244
- let actionComplete = false;
2245
- if (response.completed) {
2246
- this.log({
2247
- category: "action",
2248
- message: `Action marked as completed, Verifying if this is true...`,
2249
- level: 1
2250
- });
2251
- let domElements = void 0;
2252
- let fullpageScreenshot = void 0;
2253
- if (verifierUseVision) {
2254
- try {
2255
- const screenshotService = new ScreenshotService(
2256
- this.page,
2257
- selectorMap,
2258
- this.verbose
2259
- );
2260
- fullpageScreenshot = yield screenshotService.getScreenshot(
2261
- true,
2262
- 15
2263
- );
2264
- } catch (e) {
2265
- this.log({
2266
- category: "action",
2267
- message: `Error getting full page screenshot: ${e.message}
2268
- . Trying again...`,
2269
- level: 1
2270
- });
2271
- const screenshotService = new ScreenshotService(
2272
- this.page,
2273
- selectorMap,
2274
- this.verbose
2275
- );
2276
- fullpageScreenshot = yield screenshotService.getScreenshot(
2277
- true,
2278
- 15
2279
- );
2280
- }
2281
- } else {
2282
- ({ outputString: domElements } = yield this.page.evaluate(() => {
2283
- return window.processAllOfDom();
2284
- }));
2285
- }
2286
- actionComplete = yield verifyActCompletion({
2287
- goal: action,
2288
- steps: newSteps,
2289
- llmProvider: this.llmProvider,
2290
- modelName: model,
2291
- screenshot: fullpageScreenshot,
2292
- domElements,
2293
- logger: this.logger,
2294
- requestId
2295
- });
2296
- this.log({
2297
- category: "action",
2298
- message: `Action completion verification result: ${actionComplete}`,
2299
- level: 1
2300
- });
2301
- }
2302
- if (!actionComplete) {
2303
- this.log({
2304
- category: "action",
2305
- message: `Continuing to next action step`,
2306
- level: 1
2307
- });
2308
- return this._act({
2309
- action,
2310
- steps: newSteps,
2311
- modelName,
2312
- chunksSeen,
2313
- useVision,
2314
- verifierUseVision,
2315
- requestId,
2316
- domSettleTimeoutMs
2317
- });
2318
- } else {
2319
- this.log({
2320
- category: "action",
2321
- message: `Action completed successfully`,
2322
- level: 1
2323
- });
2324
- yield this._recordAction(action, response.step);
2325
- return {
2326
- success: true,
2327
- message: `Action completed successfully: ${steps}${response.step}`,
2328
- action
2329
- };
2330
- }
2331
- } catch (error) {
2332
- this.log({
2333
- category: "action",
2334
- message: `Error performing action (Retries: ${retries}): ${error.message}
2335
- Trace: ${error.stack}`,
2336
- level: 1
2337
- });
2338
- if (retries < 2) {
2339
- return this._act({
2340
- action,
2341
- steps,
2342
- modelName,
2343
- useVision,
2344
- verifierUseVision,
2345
- retries: retries + 1,
2346
- chunksSeen,
2347
- requestId,
2348
- domSettleTimeoutMs
2349
- });
2350
- }
2351
- yield this._recordAction(action, "");
2352
- if (this.enableCaching) {
2353
- this.llmProvider.cleanRequestCache(requestId);
2354
- }
2355
- return {
2356
- success: false,
2357
- message: `Error performing action: ${error.message}`,
2358
- action
2359
- };
2360
- }
3080
+ yield this._recordObservation(instruction, elementsWithSelectors);
3081
+ return elementsWithSelectors;
2361
3082
  });
2362
3083
  }
2363
3084
  act(_0) {
@@ -2365,6 +3086,7 @@ Trace: ${error.stack}`,
2365
3086
  action,
2366
3087
  modelName,
2367
3088
  useVision = "fallback",
3089
+ variables = {},
2368
3090
  domSettleTimeoutMs
2369
3091
  }) {
2370
3092
  useVision = useVision != null ? useVision : "fallback";
@@ -2373,13 +3095,19 @@ Trace: ${error.stack}`,
2373
3095
  category: "act",
2374
3096
  message: `Running act with action: ${action}, requestId: ${requestId}`
2375
3097
  });
2376
- return this._act({
3098
+ if (variables) {
3099
+ this.variables = __spreadValues(__spreadValues({}, this.variables), variables);
3100
+ }
3101
+ return this.actHandler.act({
2377
3102
  action,
2378
3103
  modelName,
2379
3104
  chunksSeen: [],
2380
3105
  useVision,
2381
3106
  verifierUseVision: useVision !== false,
2382
3107
  requestId,
3108
+ variables,
3109
+ previousSelectors: [],
3110
+ skipActionCacheForThisStep: false,
2383
3111
  domSettleTimeoutMs
2384
3112
  }).catch((e) => {
2385
3113
  this.logger({
@@ -2387,9 +3115,6 @@ Trace: ${error.stack}`,
2387
3115
  message: `Error acting: ${e.message}
2388
3116
  Trace: ${e.stack}`
2389
3117
  });
2390
- if (this.enableCaching) {
2391
- this.llmProvider.cleanRequestCache(requestId);
2392
- }
2393
3118
  return {
2394
3119
  success: false,
2395
3120
  message: `Internal error: Error acting: ${e.message}`,