@browserbasehq/stagehand 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -8,6 +8,7 @@ var __getOwnPropSymbols = Object.getOwnPropertySymbols;
8
8
  var __getProtoOf = Object.getPrototypeOf;
9
9
  var __hasOwnProp = Object.prototype.hasOwnProperty;
10
10
  var __propIsEnum = Object.prototype.propertyIsEnumerable;
11
+ var __reflectGet = Reflect.get;
11
12
  var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
12
13
  var __spreadValues = (a, b) => {
13
14
  for (var prop in b || (b = {}))
@@ -54,6 +55,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
54
55
  mod
55
56
  ));
56
57
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
58
+ var __superGet = (cls, obj, key) => __reflectGet(__getProtoOf(cls), key, obj);
57
59
  var __async = (__this, __arguments, generator) => {
58
60
  return new Promise((resolve, reject) => {
59
61
  var fulfilled = (value) => {
@@ -82,7 +84,6 @@ __export(lib_exports, {
82
84
  });
83
85
  module.exports = __toCommonJS(lib_exports);
84
86
  var import_test = require("@playwright/test");
85
- var import_crypto = __toESM(require("crypto"));
86
87
  var import_fs2 = __toESM(require("fs"));
87
88
  var import_sdk2 = require("@browserbasehq/sdk");
88
89
 
@@ -95,6 +96,7 @@ You are given:
95
96
  1. the user's overall goal
96
97
  2. the steps that you've taken so far
97
98
  3. a list of active DOM elements in this chunk to consider to get closer to the goal.
99
+ 4. Optionally, a list of variable names that the user has provided that you may use to accomplish the goal. To use the variables, you must use the special <|VARIABLE_NAME|> syntax.
98
100
 
99
101
  You have 2 tools that you can call: doAction, and skipSection. Do action only performs Playwright actions. Do not perform any other actions.
100
102
 
@@ -156,8 +158,8 @@ function buildActSystemPrompt() {
156
158
  content: actSystemPrompt
157
159
  };
158
160
  }
159
- function buildActUserPrompt(action, steps = "None", domElements) {
160
- const actUserPrompt = `
161
+ function buildActUserPrompt(action, steps = "None", domElements, variables) {
162
+ let actUserPrompt = `
161
163
  # My Goal
162
164
  ${action}
163
165
 
@@ -167,6 +169,12 @@ ${steps}
167
169
  # Current Active Dom Elements
168
170
  ${domElements}
169
171
  `;
172
+ if (variables) {
173
+ actUserPrompt += `
174
+ # Variables
175
+ ${Object.entries(variables).map(([key, value]) => `<|${key.toUpperCase()}|>`).join("\n")}
176
+ `;
177
+ }
170
178
  return {
171
179
  role: "user",
172
180
  content: actUserPrompt
@@ -392,6 +400,14 @@ function verifyActCompletion(_0) {
392
400
  return response.completed;
393
401
  });
394
402
  }
403
+ function fillInVariables(text, variables) {
404
+ let processedText = text;
405
+ Object.entries(variables).forEach(([key, value]) => {
406
+ const placeholder = `<|${key.toUpperCase()}|>`;
407
+ processedText = processedText.replace(placeholder, value);
408
+ });
409
+ return processedText;
410
+ }
395
411
  function act(_0) {
396
412
  return __async(this, arguments, function* ({
397
413
  action,
@@ -402,12 +418,13 @@ function act(_0) {
402
418
  screenshot,
403
419
  retries = 0,
404
420
  logger,
405
- requestId
421
+ requestId,
422
+ variables
406
423
  }) {
407
424
  const llmClient = llmProvider.getClient(modelName, requestId);
408
425
  const messages = [
409
426
  buildActSystemPrompt(),
410
- buildActUserPrompt(action, steps, domElements)
427
+ buildActUserPrompt(action, steps, domElements, variables)
411
428
  ];
412
429
  const response = yield llmClient.createChatCompletion({
413
430
  model: modelName,
@@ -596,7 +613,18 @@ var OpenAIClient = class {
596
613
  if (this.enableCaching) {
597
614
  const cachedResponse = yield this.cache.get(cacheOptions, this.requestId);
598
615
  if (cachedResponse) {
616
+ this.logger({
617
+ category: "llm_cache",
618
+ message: `LLM Cache hit - returning cached response`,
619
+ level: 1
620
+ });
599
621
  return cachedResponse;
622
+ } else {
623
+ this.logger({
624
+ category: "llm_cache",
625
+ message: `LLM Cache miss - no cached response found`,
626
+ level: 1
627
+ });
600
628
  }
601
629
  }
602
630
  if (options.image) {
@@ -673,7 +701,18 @@ var AnthropicClient = class {
673
701
  if (this.enableCaching) {
674
702
  const cachedResponse = yield this.cache.get(cacheOptions, this.requestId);
675
703
  if (cachedResponse) {
704
+ this.logger({
705
+ category: "llm_cache",
706
+ message: `LLM Cache hit - returning cached response`,
707
+ level: 1
708
+ });
676
709
  return cachedResponse;
710
+ } else {
711
+ this.logger({
712
+ category: "llm_cache",
713
+ message: `LLM Cache miss - no cached response found`,
714
+ level: 1
715
+ });
677
716
  }
678
717
  }
679
718
  const systemMessage = options.messages.find((msg) => msg.role === "system");
@@ -801,24 +840,24 @@ var AnthropicClient = class {
801
840
  }
802
841
  };
803
842
 
804
- // lib/llm/LLMCache.ts
843
+ // lib/cache/BaseCache.ts
805
844
  var fs = __toESM(require("fs"));
806
845
  var path = __toESM(require("path"));
807
846
  var crypto = __toESM(require("crypto"));
808
- var LLMCache = class {
809
- constructor(logger, cacheDir = path.join(process.cwd(), "tmp", ".cache"), cacheFile = "llm_calls.json") {
847
+ var BaseCache = class {
848
+ constructor(logger, cacheDir = path.join(process.cwd(), "tmp", ".cache"), cacheFile = "cache.json") {
810
849
  this.CACHE_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1e3;
811
850
  // 1 week in milliseconds
812
851
  this.CLEANUP_PROBABILITY = 0.01;
813
- // 1% chance
814
852
  this.LOCK_TIMEOUT_MS = 1e3;
815
- this.lock_acquired = false;
816
- this.count_lock_acquire_failures = 0;
817
- this.request_id_to_used_hashes = {};
853
+ this.lockAcquired = false;
854
+ this.lockAcquireFailures = 0;
855
+ // Added for request ID tracking
856
+ this.requestIdToUsedHashes = {};
818
857
  this.logger = logger;
819
858
  this.cacheDir = cacheDir;
820
859
  this.cacheFile = path.join(cacheDir, cacheFile);
821
- this.lockFile = path.join(cacheDir, "llm_cache.lock");
860
+ this.lockFile = path.join(cacheDir, "cache.lock");
822
861
  this.ensureCacheDirectory();
823
862
  this.setupProcessHandlers();
824
863
  }
@@ -832,11 +871,11 @@ var LLMCache = class {
832
871
  process.on("SIGTERM", releaseLockAndExit);
833
872
  process.on("uncaughtException", (err) => {
834
873
  this.logger({
835
- category: "llm_cache",
874
+ category: "base_cache",
836
875
  message: `Uncaught exception: ${err}`,
837
876
  level: 2
838
877
  });
839
- if (this.lock_acquired) {
878
+ if (this.lockAcquired) {
840
879
  releaseLockAndExit();
841
880
  }
842
881
  });
@@ -844,6 +883,11 @@ var LLMCache = class {
844
883
  ensureCacheDirectory() {
845
884
  if (!fs.existsSync(this.cacheDir)) {
846
885
  fs.mkdirSync(this.cacheDir, { recursive: true });
886
+ this.logger({
887
+ category: "base_cache",
888
+ message: `Created cache directory at ${this.cacheDir}`,
889
+ level: 1
890
+ });
847
891
  }
848
892
  }
849
893
  createHash(data) {
@@ -862,25 +906,35 @@ var LLMCache = class {
862
906
  const lockAge = Date.now() - fs.statSync(this.lockFile).mtimeMs;
863
907
  if (lockAge > this.LOCK_TIMEOUT_MS) {
864
908
  fs.unlinkSync(this.lockFile);
909
+ this.logger({
910
+ category: "base_cache",
911
+ message: "Stale lock file removed",
912
+ level: 1
913
+ });
865
914
  }
866
915
  }
867
916
  fs.writeFileSync(this.lockFile, process.pid.toString(), { flag: "wx" });
868
- this.count_lock_acquire_failures = 0;
869
- this.lock_acquired = true;
917
+ this.lockAcquireFailures = 0;
918
+ this.lockAcquired = true;
919
+ this.logger({
920
+ category: "base_cache",
921
+ message: "Lock acquired",
922
+ level: 1
923
+ });
870
924
  return true;
871
925
  } catch (error) {
872
926
  yield this.sleep(5);
873
927
  }
874
928
  }
875
929
  this.logger({
876
- category: "llm_cache",
930
+ category: "base_cache",
877
931
  message: "Failed to acquire lock after timeout",
878
932
  level: 2
879
933
  });
880
- this.count_lock_acquire_failures++;
881
- if (this.count_lock_acquire_failures >= 3) {
934
+ this.lockAcquireFailures++;
935
+ if (this.lockAcquireFailures >= 3) {
882
936
  this.logger({
883
- category: "llm_cache",
937
+ category: "base_cache",
884
938
  message: "Failed to acquire lock 3 times in a row. Releasing lock manually.",
885
939
  level: 1
886
940
  });
@@ -893,111 +947,125 @@ var LLMCache = class {
893
947
  try {
894
948
  if (fs.existsSync(this.lockFile)) {
895
949
  fs.unlinkSync(this.lockFile);
950
+ this.logger({
951
+ category: "base_cache",
952
+ message: "Lock released",
953
+ level: 1
954
+ });
896
955
  }
897
- this.lock_acquired = false;
956
+ this.lockAcquired = false;
898
957
  } catch (error) {
899
958
  this.logger({
900
- category: "llm_cache",
959
+ category: "base_cache",
901
960
  message: `Error releasing lock: ${error}`,
902
961
  level: 2
903
962
  });
904
963
  }
905
964
  }
965
+ /**
966
+ * Cleans up stale cache entries that exceed the maximum age.
967
+ */
968
+ cleanupStaleEntries() {
969
+ return __async(this, null, function* () {
970
+ if (!(yield this.acquireLock())) {
971
+ this.logger({
972
+ category: "llm_cache",
973
+ message: "Failed to acquire lock for cleanup",
974
+ level: 2
975
+ });
976
+ return;
977
+ }
978
+ try {
979
+ const cache = this.readCache();
980
+ const now = Date.now();
981
+ let entriesRemoved = 0;
982
+ for (const [hash, entry] of Object.entries(cache)) {
983
+ if (now - entry.timestamp > this.CACHE_MAX_AGE_MS) {
984
+ delete cache[hash];
985
+ entriesRemoved++;
986
+ }
987
+ }
988
+ if (entriesRemoved > 0) {
989
+ this.writeCache(cache);
990
+ this.logger({
991
+ category: "llm_cache",
992
+ message: `Cleaned up ${entriesRemoved} stale cache entries`,
993
+ level: 1
994
+ });
995
+ }
996
+ } catch (error) {
997
+ this.logger({
998
+ category: "llm_cache",
999
+ message: `Error during cache cleanup: ${error}`,
1000
+ level: 2
1001
+ });
1002
+ } finally {
1003
+ this.releaseLock();
1004
+ }
1005
+ });
1006
+ }
906
1007
  readCache() {
907
1008
  if (fs.existsSync(this.cacheFile)) {
908
- return JSON.parse(fs.readFileSync(this.cacheFile, "utf-8"));
1009
+ try {
1010
+ const data = fs.readFileSync(this.cacheFile, "utf-8");
1011
+ return JSON.parse(data);
1012
+ } catch (error) {
1013
+ this.logger({
1014
+ category: "base_cache",
1015
+ message: `Error reading cache file: ${error}. Resetting cache.`,
1016
+ level: 1
1017
+ });
1018
+ this.resetCache();
1019
+ return {};
1020
+ }
909
1021
  }
910
1022
  return {};
911
1023
  }
912
1024
  writeCache(cache) {
913
1025
  try {
914
- if (Math.random() < this.CLEANUP_PROBABILITY) {
915
- this.cleanupStaleEntries(cache);
916
- }
917
1026
  fs.writeFileSync(this.cacheFile, JSON.stringify(cache, null, 2));
918
- } finally {
919
- this.releaseLock();
920
- }
921
- }
922
- cleanupStaleEntries(cache) {
923
- if (!this.acquireLock()) {
924
- this.logger({
925
- category: "llm_cache",
926
- message: "Failed to acquire lock for cleaning up cache",
927
- level: 2
928
- });
929
- return;
930
- }
931
- try {
932
- const now = Date.now();
933
- let entriesRemoved = 0;
934
- for (const [hash, entry] of Object.entries(cache)) {
935
- if (now - entry.timestamp > this.CACHE_MAX_AGE_MS) {
936
- delete cache[hash];
937
- entriesRemoved++;
938
- }
939
- }
940
- if (entriesRemoved > 0) {
941
- this.logger({
942
- category: "llm_cache",
943
- message: `Cleaned up ${entriesRemoved} stale cache entries`,
944
- level: 1
945
- });
946
- }
947
- } catch (error) {
948
1027
  this.logger({
949
- category: "llm_cache",
950
- message: `Error cleaning up stale cache entries: ${error}`,
1028
+ category: "base_cache",
1029
+ message: "Cache written to file",
951
1030
  level: 1
952
1031
  });
953
- } finally {
954
- this.releaseLock();
955
- }
956
- }
957
- resetCache() {
958
- if (!this.acquireLock()) {
1032
+ } catch (error) {
959
1033
  this.logger({
960
- category: "llm_cache",
961
- message: "Failed to acquire lock for resetting cache",
1034
+ category: "base_cache",
1035
+ message: `Error writing cache file: ${error}`,
962
1036
  level: 2
963
1037
  });
964
- return;
965
- }
966
- try {
967
- this.ensureCacheDirectory();
968
- fs.writeFileSync(this.cacheFile, "{}");
969
1038
  } finally {
970
1039
  this.releaseLock();
971
1040
  }
972
1041
  }
973
- get(options, requestId) {
1042
+ /**
1043
+ * Retrieves data from the cache based on the provided options.
1044
+ * @param hashObj - The options used to generate the cache key.
1045
+ * @param requestId - The identifier for the current request.
1046
+ * @returns The cached data if available, otherwise null.
1047
+ */
1048
+ get(hashObj, requestId) {
974
1049
  return __async(this, null, function* () {
975
- var _a, _b;
976
1050
  if (!(yield this.acquireLock())) {
977
1051
  this.logger({
978
- category: "llm_cache",
1052
+ category: "base_cache",
979
1053
  message: "Failed to acquire lock for getting cache",
980
1054
  level: 2
981
1055
  });
982
1056
  return null;
983
1057
  }
984
1058
  try {
985
- const hash = this.createHash(options);
1059
+ const hash = this.createHash(hashObj);
986
1060
  const cache = this.readCache();
987
1061
  if (cache[hash]) {
988
- this.logger({
989
- category: "llm_cache",
990
- message: "Cache hit",
991
- level: 1
992
- });
993
- (_b = (_a = this.request_id_to_used_hashes)[requestId]) != null ? _b : _a[requestId] = [];
994
- this.request_id_to_used_hashes[requestId].push(hash);
995
- return cache[hash].response;
1062
+ this.trackRequestIdUsage(requestId, hash);
1063
+ return cache[hash].data;
996
1064
  }
997
1065
  return null;
998
1066
  } catch (error) {
999
1067
  this.logger({
1000
- category: "llm_cache",
1068
+ category: "base_cache",
1001
1069
  message: `Error getting cache: ${error}. Resetting cache.`,
1002
1070
  level: 1
1003
1071
  });
@@ -1008,82 +1076,189 @@ var LLMCache = class {
1008
1076
  }
1009
1077
  });
1010
1078
  }
1011
- deleteCacheForRequestId(requestId) {
1079
+ /**
1080
+ * Stores data in the cache based on the provided options and requestId.
1081
+ * @param hashObj - The options used to generate the cache key.
1082
+ * @param data - The data to be cached.
1083
+ * @param requestId - The identifier for the cache entry.
1084
+ */
1085
+ set(hashObj, data, requestId) {
1012
1086
  return __async(this, null, function* () {
1013
- var _a;
1014
1087
  if (!(yield this.acquireLock())) {
1015
1088
  this.logger({
1016
- category: "llm_cache",
1017
- message: "Failed to acquire lock for deleting cache",
1089
+ category: "base_cache",
1090
+ message: "Failed to acquire lock for setting cache",
1018
1091
  level: 2
1019
1092
  });
1020
1093
  return;
1021
1094
  }
1022
1095
  try {
1096
+ const hash = this.createHash(hashObj);
1023
1097
  const cache = this.readCache();
1024
- let entriesRemoved = [];
1025
- for (const hash of (_a = this.request_id_to_used_hashes[requestId]) != null ? _a : []) {
1026
- if (cache[hash]) {
1027
- entriesRemoved.push(cache[hash]);
1028
- delete cache[hash];
1029
- }
1030
- }
1031
- this.logger({
1032
- category: "llm_cache",
1033
- message: `Deleted ${entriesRemoved.length} cache entries for requestId ${requestId}`,
1034
- level: 1
1035
- });
1098
+ cache[hash] = {
1099
+ data,
1100
+ timestamp: Date.now(),
1101
+ requestId
1102
+ };
1036
1103
  this.writeCache(cache);
1037
- } catch (exception) {
1104
+ this.trackRequestIdUsage(requestId, hash);
1105
+ } catch (error) {
1038
1106
  this.logger({
1039
- category: "llm_cache",
1040
- message: `Error deleting cache for requestId ${requestId}: ${exception}`,
1107
+ category: "base_cache",
1108
+ message: `Error setting cache: ${error}. Resetting cache.`,
1041
1109
  level: 1
1042
1110
  });
1111
+ this.resetCache();
1043
1112
  } finally {
1044
1113
  this.releaseLock();
1114
+ if (Math.random() < this.CLEANUP_PROBABILITY) {
1115
+ this.cleanupStaleEntries();
1116
+ }
1045
1117
  }
1046
1118
  });
1047
1119
  }
1048
- set(options, response, requestId) {
1120
+ delete(hashObj) {
1049
1121
  return __async(this, null, function* () {
1050
- var _a, _b;
1051
1122
  if (!(yield this.acquireLock())) {
1052
1123
  this.logger({
1053
- category: "llm_cache",
1054
- message: "Failed to acquire lock for setting cache",
1124
+ category: "base_cache",
1125
+ message: "Failed to acquire lock for removing cache entry",
1055
1126
  level: 2
1056
1127
  });
1057
1128
  return;
1058
1129
  }
1059
1130
  try {
1060
- const hash = this.createHash(options);
1131
+ const hash = this.createHash(hashObj);
1061
1132
  const cache = this.readCache();
1062
- cache[hash] = {
1063
- response,
1064
- timestamp: Date.now(),
1065
- requestId
1066
- };
1067
- this.writeCache(cache);
1068
- (_b = (_a = this.request_id_to_used_hashes)[requestId]) != null ? _b : _a[requestId] = [];
1069
- this.request_id_to_used_hashes[requestId].push(hash);
1133
+ if (cache[hash]) {
1134
+ delete cache[hash];
1135
+ this.writeCache(cache);
1136
+ } else {
1137
+ this.logger({
1138
+ category: "base_cache",
1139
+ message: "Cache entry not found to delete",
1140
+ level: 1
1141
+ });
1142
+ }
1143
+ } catch (error) {
1070
1144
  this.logger({
1071
- category: "llm_cache",
1072
- message: "Cache miss - saved new response",
1073
- level: 1
1145
+ category: "base_cache",
1146
+ message: `Error removing cache entry: ${error}`,
1147
+ level: 2
1148
+ });
1149
+ } finally {
1150
+ this.releaseLock();
1151
+ }
1152
+ });
1153
+ }
1154
+ /**
1155
+ * Tracks the usage of a hash with a specific requestId.
1156
+ * @param requestId - The identifier for the current request.
1157
+ * @param hash - The cache key hash.
1158
+ */
1159
+ trackRequestIdUsage(requestId, hash) {
1160
+ var _a, _b;
1161
+ (_b = (_a = this.requestIdToUsedHashes)[requestId]) != null ? _b : _a[requestId] = [];
1162
+ this.requestIdToUsedHashes[requestId].push(hash);
1163
+ }
1164
+ /**
1165
+ * Deletes all cache entries associated with a specific requestId.
1166
+ * @param requestId - The identifier for the request whose cache entries should be deleted.
1167
+ */
1168
+ deleteCacheForRequestId(requestId) {
1169
+ return __async(this, null, function* () {
1170
+ var _a;
1171
+ if (!(yield this.acquireLock())) {
1172
+ this.logger({
1173
+ category: "base_cache",
1174
+ message: "Failed to acquire lock for deleting cache",
1175
+ level: 2
1074
1176
  });
1177
+ return;
1178
+ }
1179
+ try {
1180
+ const cache = this.readCache();
1181
+ const hashes = (_a = this.requestIdToUsedHashes[requestId]) != null ? _a : [];
1182
+ let entriesRemoved = 0;
1183
+ for (const hash of hashes) {
1184
+ if (cache[hash]) {
1185
+ delete cache[hash];
1186
+ entriesRemoved++;
1187
+ }
1188
+ }
1189
+ if (entriesRemoved > 0) {
1190
+ this.writeCache(cache);
1191
+ } else {
1192
+ this.logger({
1193
+ category: "base_cache",
1194
+ message: `No cache entries found for requestId ${requestId}`,
1195
+ level: 1
1196
+ });
1197
+ }
1198
+ delete this.requestIdToUsedHashes[requestId];
1075
1199
  } catch (error) {
1076
1200
  this.logger({
1077
- category: "llm_cache",
1078
- message: `Error setting cache: ${error}. Resetting cache.`,
1079
- level: 1
1201
+ category: "base_cache",
1202
+ message: `Error deleting cache for requestId ${requestId}: ${error}`,
1203
+ level: 2
1080
1204
  });
1081
- this.resetCache();
1082
1205
  } finally {
1083
1206
  this.releaseLock();
1084
1207
  }
1085
1208
  });
1086
1209
  }
1210
+ /**
1211
+ * Resets the entire cache by clearing the cache file.
1212
+ */
1213
+ resetCache() {
1214
+ try {
1215
+ fs.writeFileSync(this.cacheFile, "{}");
1216
+ this.requestIdToUsedHashes = {};
1217
+ } catch (error) {
1218
+ this.logger({
1219
+ category: "base_cache",
1220
+ message: `Error resetting cache: ${error}`,
1221
+ level: 2
1222
+ });
1223
+ } finally {
1224
+ this.releaseLock();
1225
+ }
1226
+ }
1227
+ };
1228
+
1229
+ // lib/cache/LLMCache.ts
1230
+ var LLMCache = class _LLMCache extends BaseCache {
1231
+ constructor(logger, cacheDir, cacheFile) {
1232
+ super(logger, cacheDir, cacheFile || "llm_calls.json");
1233
+ }
1234
+ /**
1235
+ * Overrides the get method to track used hashes by requestId.
1236
+ * @param options - The options used to generate the cache key.
1237
+ * @param requestId - The identifier for the current request.
1238
+ * @returns The cached data if available, otherwise null.
1239
+ */
1240
+ get(options, requestId) {
1241
+ return __async(this, null, function* () {
1242
+ const data = yield __superGet(_LLMCache.prototype, this, "get").call(this, options, requestId);
1243
+ return data;
1244
+ });
1245
+ }
1246
+ /**
1247
+ * Overrides the set method to include cache cleanup logic.
1248
+ * @param options - The options used to generate the cache key.
1249
+ * @param data - The data to be cached.
1250
+ * @param requestId - The identifier for the current request.
1251
+ */
1252
+ set(options, data, requestId) {
1253
+ return __async(this, null, function* () {
1254
+ yield __superGet(_LLMCache.prototype, this, "set").call(this, options, data, requestId);
1255
+ this.logger({
1256
+ category: "llm_cache",
1257
+ message: "Cache miss - saved new response",
1258
+ level: 1
1259
+ });
1260
+ });
1261
+ }
1087
1262
  };
1088
1263
 
1089
1264
  // lib/llm/LLMProvider.ts
@@ -1203,8 +1378,8 @@ var ScreenshotService = class _ScreenshotService {
1203
1378
  const { width, height } = yield image.metadata();
1204
1379
  const svgAnnotations = yield Promise.all(
1205
1380
  Object.entries(this.selectorMap).map(
1206
- (_0) => __async(this, [_0], function* ([id, selector]) {
1207
- return this.createElementAnnotation(id, selector);
1381
+ (_0) => __async(this, [_0], function* ([id, selectors]) {
1382
+ return this.createElementAnnotation(id, selectors);
1208
1383
  })
1209
1384
  )
1210
1385
  );
@@ -1226,18 +1401,25 @@ var ScreenshotService = class _ScreenshotService {
1226
1401
  return annotatedScreenshot;
1227
1402
  });
1228
1403
  }
1229
- createElementAnnotation(id, selector) {
1404
+ createElementAnnotation(id, selectors) {
1230
1405
  return __async(this, null, function* () {
1231
1406
  try {
1232
- const element = yield this.page.locator(`xpath=${selector}`).first();
1233
- const box = yield element.boundingBox();
1407
+ let element = null;
1408
+ const selectorPromises = selectors.map(
1409
+ (selector) => __async(this, null, function* () {
1410
+ try {
1411
+ element = yield this.page.locator(`xpath=${selector}`).first();
1412
+ const box2 = yield element.boundingBox({ timeout: 5e3 });
1413
+ return box2;
1414
+ } catch (e) {
1415
+ return null;
1416
+ }
1417
+ })
1418
+ );
1419
+ const boxes = yield Promise.all(selectorPromises);
1420
+ const box = boxes.find((b) => b !== null);
1234
1421
  if (!box) {
1235
- this.log({
1236
- category: "Debug",
1237
- message: `No bounding box for element ${id}`,
1238
- level: 2
1239
- });
1240
- return "";
1422
+ throw new Error(`Unable to create annotation for element ${id}`);
1241
1423
  }
1242
1424
  const scrollPosition = yield this.page.evaluate(() => ({
1243
1425
  scrollX: window.scrollX,
@@ -1264,8 +1446,8 @@ var ScreenshotService = class _ScreenshotService {
1264
1446
  `;
1265
1447
  } catch (error) {
1266
1448
  this.log({
1267
- category: "Error",
1268
- message: `Failed to create annotation for element ${id}: ${error}`,
1449
+ category: "Vision",
1450
+ message: `Warning: Failed to create annotation for element ${id}: ${error}, trace: ${error.stack}`,
1269
1451
  level: 0
1270
1452
  });
1271
1453
  return "";
@@ -1317,40 +1499,981 @@ var ScreenshotService = class _ScreenshotService {
1317
1499
  }
1318
1500
  };
1319
1501
 
1320
- // lib/index.ts
1321
- require("dotenv").config({ path: ".env" });
1322
- function getBrowser(apiKey, projectId, env = "LOCAL", headless = false, logger, browserbaseSessionCreateParams, browserbaseResumeSessionID) {
1323
- return __async(this, null, function* () {
1324
- if (env === "BROWSERBASE") {
1325
- if (!apiKey) {
1326
- logger({
1327
- category: "Init",
1328
- message: "BROWSERBASE_API_KEY is required to use BROWSERBASE env. Defaulting to LOCAL.",
1329
- level: 0
1330
- });
1331
- env = "LOCAL";
1332
- }
1333
- if (!projectId) {
1334
- logger({
1335
- category: "Init",
1336
- message: "BROWSERBASE_PROJECT_ID is required for some Browserbase features that may not work without it.",
1337
- level: 1
1338
- });
1339
- }
1340
- }
1341
- if (env === "BROWSERBASE") {
1342
- if (!apiKey) {
1343
- throw new Error("BROWSERBASE_API_KEY is required.");
1344
- }
1345
- let debugUrl = void 0;
1346
- let sessionUrl = void 0;
1347
- let sessionId;
1348
- let connectUrl;
1349
- const browserbase = new import_sdk2.Browserbase({
1350
- apiKey
1351
- });
1352
- if (browserbaseResumeSessionID) {
1353
- try {
1502
+ // lib/types.ts
1503
+ var PlaywrightCommandException = class extends Error {
1504
+ constructor(message) {
1505
+ super(message);
1506
+ this.name = "PlaywrightCommandException";
1507
+ }
1508
+ };
1509
+ var PlaywrightCommandMethodNotSupportedException = class extends Error {
1510
+ constructor(message) {
1511
+ super(message);
1512
+ this.name = "PlaywrightCommandMethodNotSupportedException";
1513
+ }
1514
+ };
1515
+
1516
+ // lib/cache/ActionCache.ts
1517
+ var ActionCache = class _ActionCache extends BaseCache {
1518
+ constructor(logger, cacheDir, cacheFile) {
1519
+ super(logger, cacheDir, cacheFile || "action_cache.json");
1520
+ }
1521
+ addActionStep(_0) {
1522
+ return __async(this, arguments, function* ({
1523
+ url,
1524
+ action,
1525
+ previousSelectors,
1526
+ playwrightCommand,
1527
+ componentString,
1528
+ xpaths,
1529
+ newStepString,
1530
+ completed,
1531
+ requestId
1532
+ }) {
1533
+ this.logger({
1534
+ category: "action_cache",
1535
+ message: `Adding action step to cache: ${action}, requestId: ${requestId}, url: ${url}, previousSelectors: ${previousSelectors}`,
1536
+ level: 1
1537
+ });
1538
+ yield this.set(
1539
+ { url, action, previousSelectors },
1540
+ {
1541
+ playwrightCommand,
1542
+ componentString,
1543
+ xpaths,
1544
+ newStepString,
1545
+ completed,
1546
+ previousSelectors,
1547
+ action
1548
+ },
1549
+ requestId
1550
+ );
1551
+ });
1552
+ }
1553
+ /**
1554
+ * Retrieves all actions for a specific trajectory.
1555
+ * @param trajectoryId - Unique identifier for the trajectory.
1556
+ * @param requestId - The identifier for the current request.
1557
+ * @returns An array of TrajectoryEntry objects or null if not found.
1558
+ */
1559
+ getActionStep(_0) {
1560
+ return __async(this, arguments, function* ({
1561
+ url,
1562
+ action,
1563
+ previousSelectors,
1564
+ requestId
1565
+ }) {
1566
+ const data = yield __superGet(_ActionCache.prototype, this, "get").call(this, { url, action, previousSelectors }, requestId);
1567
+ if (!data) {
1568
+ return null;
1569
+ }
1570
+ return data;
1571
+ });
1572
+ }
1573
+ removeActionStep(cacheHashObj) {
1574
+ return __async(this, null, function* () {
1575
+ yield __superGet(_ActionCache.prototype, this, "delete").call(this, cacheHashObj);
1576
+ });
1577
+ }
1578
+ /**
1579
+ * Clears all actions for a specific trajectory.
1580
+ * @param trajectoryId - Unique identifier for the trajectory.
1581
+ * @param requestId - The identifier for the current request.
1582
+ */
1583
+ clearAction(requestId) {
1584
+ return __async(this, null, function* () {
1585
+ yield __superGet(_ActionCache.prototype, this, "deleteCacheForRequestId").call(this, requestId);
1586
+ this.logger({
1587
+ category: "action_cache",
1588
+ message: `Cleared action for ID: ${requestId}`,
1589
+ level: 1
1590
+ });
1591
+ });
1592
+ }
1593
+ /**
1594
+ * Resets the entire action cache.
1595
+ */
1596
+ resetCache() {
1597
+ return __async(this, null, function* () {
1598
+ yield __superGet(_ActionCache.prototype, this, "resetCache").call(this);
1599
+ this.logger({
1600
+ category: "action_cache",
1601
+ message: "Action cache has been reset.",
1602
+ level: 1
1603
+ });
1604
+ });
1605
+ }
1606
+ };
1607
+
1608
+ // lib/utils.ts
1609
+ var import_crypto = __toESM(require("crypto"));
1610
+ function generateId(operation) {
1611
+ return import_crypto.default.createHash("sha256").update(operation).digest("hex");
1612
+ }
1613
+
1614
+ // lib/handlers/actHandler.ts
1615
+ var StagehandActHandler = class {
1616
+ constructor({
1617
+ stagehand,
1618
+ verbose,
1619
+ llmProvider,
1620
+ enableCaching,
1621
+ logger,
1622
+ waitForSettledDom,
1623
+ defaultModelName,
1624
+ startDomDebug,
1625
+ cleanupDomDebug
1626
+ }) {
1627
+ this.stagehand = stagehand;
1628
+ this.verbose = verbose;
1629
+ this.llmProvider = llmProvider;
1630
+ this.enableCaching = enableCaching;
1631
+ this.logger = logger;
1632
+ this.waitForSettledDom = waitForSettledDom;
1633
+ this.actionCache = new ActionCache(this.logger);
1634
+ this.defaultModelName = defaultModelName;
1635
+ this.startDomDebug = startDomDebug;
1636
+ this.cleanupDomDebug = cleanupDomDebug;
1637
+ this.actions = {};
1638
+ }
1639
+ _recordAction(action, result) {
1640
+ return __async(this, null, function* () {
1641
+ const id = generateId(action);
1642
+ this.actions[id] = { result, action };
1643
+ return id;
1644
+ });
1645
+ }
1646
+ _verifyActionCompletion(_0) {
1647
+ return __async(this, arguments, function* ({
1648
+ completed,
1649
+ verifierUseVision,
1650
+ requestId,
1651
+ action,
1652
+ steps,
1653
+ model,
1654
+ domSettleTimeoutMs
1655
+ }) {
1656
+ yield this.waitForSettledDom(domSettleTimeoutMs);
1657
+ const { selectorMap } = yield this.stagehand.page.evaluate(() => {
1658
+ return window.processAllOfDom();
1659
+ });
1660
+ let actionCompleted = false;
1661
+ if (completed) {
1662
+ this.stagehand.log({
1663
+ category: "action",
1664
+ message: `Action marked as completed, Verifying if this is true...`,
1665
+ level: 1
1666
+ });
1667
+ let domElements = void 0;
1668
+ let fullpageScreenshot = void 0;
1669
+ if (verifierUseVision) {
1670
+ try {
1671
+ const screenshotService = new ScreenshotService(
1672
+ this.stagehand.page,
1673
+ selectorMap,
1674
+ this.verbose
1675
+ );
1676
+ fullpageScreenshot = yield screenshotService.getScreenshot(true, 15);
1677
+ } catch (e) {
1678
+ this.stagehand.log({
1679
+ category: "action",
1680
+ message: `Error getting full page screenshot: ${e.message}
1681
+ . Trying again...`,
1682
+ level: 1
1683
+ });
1684
+ const screenshotService = new ScreenshotService(
1685
+ this.stagehand.page,
1686
+ selectorMap,
1687
+ this.verbose
1688
+ );
1689
+ fullpageScreenshot = yield screenshotService.getScreenshot(true, 15);
1690
+ }
1691
+ } else {
1692
+ ({ outputString: domElements } = yield this.stagehand.page.evaluate(
1693
+ () => {
1694
+ return window.processAllOfDom();
1695
+ }
1696
+ ));
1697
+ }
1698
+ actionCompleted = yield verifyActCompletion({
1699
+ goal: action,
1700
+ steps,
1701
+ llmProvider: this.llmProvider,
1702
+ modelName: model,
1703
+ screenshot: fullpageScreenshot,
1704
+ domElements,
1705
+ logger: this.logger,
1706
+ requestId
1707
+ });
1708
+ this.stagehand.log({
1709
+ category: "action",
1710
+ message: `Action completion verification result: ${actionCompleted}`,
1711
+ level: 1
1712
+ });
1713
+ }
1714
+ return actionCompleted;
1715
+ });
1716
+ }
1717
+ _performPlaywrightMethod(method, args, xpath, domSettleTimeoutMs) {
1718
+ return __async(this, null, function* () {
1719
+ const locator = this.stagehand.page.locator(`xpath=${xpath}`).first();
1720
+ const initialUrl = this.stagehand.page.url();
1721
+ if (method === "scrollIntoView") {
1722
+ this.stagehand.log({
1723
+ category: "action",
1724
+ message: `Scrolling element into view`,
1725
+ level: 2
1726
+ });
1727
+ try {
1728
+ yield locator.evaluate((element) => {
1729
+ element.scrollIntoView({ behavior: "smooth", block: "center" });
1730
+ }).catch((e) => {
1731
+ this.stagehand.log({
1732
+ category: "action",
1733
+ message: `Error scrolling element into view: ${e.message}
1734
+ Trace: ${e.stack}`,
1735
+ level: 1
1736
+ });
1737
+ });
1738
+ } catch (e) {
1739
+ this.stagehand.log({
1740
+ category: "action",
1741
+ message: `Error scrolling element into view: ${e.message}
1742
+ Trace: ${e.stack}`,
1743
+ level: 1
1744
+ });
1745
+ throw new PlaywrightCommandException(e.message);
1746
+ }
1747
+ } else if (method === "fill" || method === "type") {
1748
+ try {
1749
+ yield locator.fill("");
1750
+ yield locator.click();
1751
+ const text = args[0];
1752
+ for (const char of text) {
1753
+ yield this.stagehand.page.keyboard.type(char, {
1754
+ delay: Math.random() * 50 + 25
1755
+ });
1756
+ }
1757
+ } catch (e) {
1758
+ this.logger({
1759
+ category: "action",
1760
+ message: `Error filling element: ${e.message}
1761
+ Trace: ${e.stack}`,
1762
+ level: 1
1763
+ });
1764
+ throw new PlaywrightCommandException(e.message);
1765
+ }
1766
+ } else if (method === "press") {
1767
+ try {
1768
+ const key = args[0];
1769
+ yield this.stagehand.page.keyboard.press(key);
1770
+ } catch (e) {
1771
+ this.logger({
1772
+ category: "action",
1773
+ message: `Error pressing key: ${e.message}
1774
+ Trace: ${e.stack}`,
1775
+ level: 1
1776
+ });
1777
+ throw new PlaywrightCommandException(e.message);
1778
+ }
1779
+ } else if (typeof locator[method] === "function") {
1780
+ this.logger({
1781
+ category: "action",
1782
+ message: `Page URL before action: ${this.stagehand.page.url()}`,
1783
+ level: 2
1784
+ });
1785
+ try {
1786
+ yield locator[method](...args);
1787
+ } catch (e) {
1788
+ this.logger({
1789
+ category: "action",
1790
+ message: `Error performing method ${method} with args ${JSON.stringify(
1791
+ args
1792
+ )}: ${e.message}
1793
+ Trace: ${e.stack}`,
1794
+ level: 1
1795
+ });
1796
+ throw new PlaywrightCommandException(e.message);
1797
+ }
1798
+ if (method === "click") {
1799
+ this.logger({
1800
+ category: "action",
1801
+ message: `Clicking element, checking for page navigation`,
1802
+ level: 1
1803
+ });
1804
+ const newOpenedTab = yield Promise.race([
1805
+ new Promise((resolve) => {
1806
+ this.stagehand.context.once("page", (page) => resolve(page));
1807
+ setTimeout(() => resolve(null), 1500);
1808
+ })
1809
+ ]);
1810
+ this.logger({
1811
+ category: "action",
1812
+ message: `Clicked element, ${newOpenedTab ? "opened a new tab" : "no new tabs opened"}`,
1813
+ level: 1
1814
+ });
1815
+ if (newOpenedTab) {
1816
+ this.logger({
1817
+ category: "action",
1818
+ message: `New page detected (new tab) with URL: ${newOpenedTab.url()}`,
1819
+ level: 1
1820
+ });
1821
+ yield newOpenedTab.close();
1822
+ yield this.stagehand.page.goto(newOpenedTab.url());
1823
+ yield this.stagehand.page.waitForLoadState("domcontentloaded");
1824
+ yield this.waitForSettledDom(domSettleTimeoutMs);
1825
+ }
1826
+ yield Promise.race([
1827
+ this.stagehand.page.waitForLoadState("networkidle"),
1828
+ new Promise((resolve) => setTimeout(resolve, 5e3))
1829
+ ]).catch((e) => {
1830
+ this.logger({
1831
+ category: "action",
1832
+ message: `Network idle timeout hit`,
1833
+ level: 1
1834
+ });
1835
+ });
1836
+ this.logger({
1837
+ category: "action",
1838
+ message: `Finished waiting for (possible) page navigation`,
1839
+ level: 1
1840
+ });
1841
+ if (this.stagehand.page.url() !== initialUrl) {
1842
+ this.logger({
1843
+ category: "action",
1844
+ message: `New page detected with URL: ${this.stagehand.page.url()}`,
1845
+ level: 1
1846
+ });
1847
+ }
1848
+ }
1849
+ } else {
1850
+ this.logger({
1851
+ category: "action",
1852
+ message: `Chosen method ${method} is invalid`,
1853
+ level: 1
1854
+ });
1855
+ throw new PlaywrightCommandMethodNotSupportedException(
1856
+ `Method ${method} not supported`
1857
+ );
1858
+ }
1859
+ yield this.waitForSettledDom(domSettleTimeoutMs);
1860
+ });
1861
+ }
1862
+ _getComponentString(locator) {
1863
+ return __async(this, null, function* () {
1864
+ return yield locator.evaluate((el) => {
1865
+ const clone = el.cloneNode(true);
1866
+ const attributesToKeep = [
1867
+ "type",
1868
+ "name",
1869
+ "placeholder",
1870
+ "aria-label",
1871
+ "role",
1872
+ "href",
1873
+ "title",
1874
+ "alt"
1875
+ ];
1876
+ Array.from(clone.attributes).forEach((attr) => {
1877
+ if (!attributesToKeep.includes(attr.name)) {
1878
+ clone.removeAttribute(attr.name);
1879
+ }
1880
+ });
1881
+ const outerHtml = clone.outerHTML;
1882
+ return outerHtml.trim().replace(/\s+/g, " ");
1883
+ });
1884
+ });
1885
+ }
1886
+ getElement(xpath, timeout = 5e3) {
1887
+ return __async(this, null, function* () {
1888
+ try {
1889
+ const element = this.stagehand.page.locator(`xpath=${xpath}`).first();
1890
+ yield element.waitFor({ state: "attached", timeout });
1891
+ return element;
1892
+ } catch (e) {
1893
+ this.logger({
1894
+ category: "action",
1895
+ message: `Element with XPath ${xpath} not found within ${timeout}ms.`,
1896
+ level: 1
1897
+ });
1898
+ return null;
1899
+ }
1900
+ });
1901
+ }
1902
+ _checkIfCachedStepIsValid_oneXpath(cachedStep) {
1903
+ return __async(this, null, function* () {
1904
+ this.logger({
1905
+ category: "action",
1906
+ message: `Checking if cached step is valid: ${cachedStep.xpath}, ${cachedStep.savedComponentString}`,
1907
+ level: 1
1908
+ });
1909
+ try {
1910
+ const locator = yield this.getElement(cachedStep.xpath);
1911
+ if (!locator) {
1912
+ this.logger({
1913
+ category: "action",
1914
+ message: `Locator not found for xpath: ${cachedStep.xpath}`,
1915
+ level: 1
1916
+ });
1917
+ return false;
1918
+ }
1919
+ this.logger({
1920
+ category: "action",
1921
+ message: `locator element: ${yield this._getComponentString(locator)}`,
1922
+ level: 1
1923
+ });
1924
+ let currentComponent = yield this._getComponentString(locator);
1925
+ this.logger({
1926
+ category: "action",
1927
+ message: `Current text: ${currentComponent}`,
1928
+ level: 1
1929
+ });
1930
+ if (!currentComponent || !cachedStep.savedComponentString) {
1931
+ this.logger({
1932
+ category: "action",
1933
+ message: `Current text or cached text is undefined`,
1934
+ level: 1
1935
+ });
1936
+ return false;
1937
+ }
1938
+ const normalizedCurrentText = currentComponent.trim().replace(/\s+/g, " ");
1939
+ const normalizedCachedText = cachedStep.savedComponentString.trim().replace(/\s+/g, " ");
1940
+ if (normalizedCurrentText !== normalizedCachedText) {
1941
+ this.logger({
1942
+ category: "action",
1943
+ message: `Current text and cached text do not match: ${normalizedCurrentText} !== ${normalizedCachedText}`,
1944
+ level: 1
1945
+ });
1946
+ return false;
1947
+ }
1948
+ return true;
1949
+ } catch (e) {
1950
+ this.logger({
1951
+ category: "action",
1952
+ message: `Error checking if cached step is valid: ${e.message}
1953
+ Trace: ${e.stack}`,
1954
+ level: 1
1955
+ });
1956
+ return false;
1957
+ }
1958
+ });
1959
+ }
1960
+ _getValidCachedStepXpath(cachedStep) {
1961
+ return __async(this, null, function* () {
1962
+ const reversedXpaths = [...cachedStep.xpaths].reverse();
1963
+ for (const xpath of reversedXpaths) {
1964
+ const isValid = yield this._checkIfCachedStepIsValid_oneXpath({
1965
+ xpath,
1966
+ savedComponentString: cachedStep.savedComponentString
1967
+ });
1968
+ if (isValid) {
1969
+ return xpath;
1970
+ }
1971
+ }
1972
+ return null;
1973
+ });
1974
+ }
1975
+ _runCachedActionIfAvailable(_0) {
1976
+ return __async(this, arguments, function* ({
1977
+ action,
1978
+ previousSelectors,
1979
+ requestId,
1980
+ steps,
1981
+ chunksSeen,
1982
+ modelName,
1983
+ useVision,
1984
+ verifierUseVision,
1985
+ retries,
1986
+ variables,
1987
+ model,
1988
+ domSettleTimeoutMs
1989
+ }) {
1990
+ const cacheObj = {
1991
+ url: this.stagehand.page.url(),
1992
+ action,
1993
+ previousSelectors,
1994
+ requestId
1995
+ };
1996
+ this.logger({
1997
+ category: "action",
1998
+ message: `Checking action cache for: ${JSON.stringify(cacheObj)}`,
1999
+ level: 1
2000
+ });
2001
+ const cachedStep = yield this.actionCache.getActionStep(cacheObj);
2002
+ if (!cachedStep) {
2003
+ this.logger({
2004
+ category: "action",
2005
+ message: `Action cache miss: ${JSON.stringify(cacheObj)}`,
2006
+ level: 1
2007
+ });
2008
+ return null;
2009
+ }
2010
+ this.logger({
2011
+ category: "action",
2012
+ message: `Action cache semi-hit: ${cachedStep.playwrightCommand.method} with args: ${JSON.stringify(
2013
+ cachedStep.playwrightCommand.args
2014
+ )}`,
2015
+ level: 1
2016
+ });
2017
+ try {
2018
+ const validXpath = yield this._getValidCachedStepXpath({
2019
+ xpaths: cachedStep.xpaths,
2020
+ savedComponentString: cachedStep.componentString
2021
+ });
2022
+ this.logger({
2023
+ category: "action",
2024
+ message: `Cached action step is valid: ${validXpath !== null}`,
2025
+ level: 1
2026
+ });
2027
+ if (!validXpath) {
2028
+ this.logger({
2029
+ category: "action",
2030
+ message: `Cached action step is invalid, removing...`,
2031
+ level: 1
2032
+ });
2033
+ yield this.actionCache.removeActionStep(cacheObj);
2034
+ return null;
2035
+ }
2036
+ this.logger({
2037
+ category: "action",
2038
+ message: `Action Cache Hit: ${cachedStep.playwrightCommand.method} with args: ${JSON.stringify(
2039
+ cachedStep.playwrightCommand.args
2040
+ )}`,
2041
+ level: 1
2042
+ });
2043
+ cachedStep.playwrightCommand.args = cachedStep.playwrightCommand.args.map(
2044
+ (arg) => {
2045
+ return fillInVariables(arg, variables);
2046
+ }
2047
+ );
2048
+ yield this._performPlaywrightMethod(
2049
+ cachedStep.playwrightCommand.method,
2050
+ cachedStep.playwrightCommand.args,
2051
+ validXpath,
2052
+ domSettleTimeoutMs
2053
+ );
2054
+ steps = steps + cachedStep.newStepString;
2055
+ const { outputString, selectorMap } = yield this.stagehand.page.evaluate(
2056
+ ({ chunksSeen: chunksSeen2 }) => {
2057
+ return window.processDom(chunksSeen2);
2058
+ },
2059
+ { chunksSeen }
2060
+ );
2061
+ if (cachedStep.completed) {
2062
+ let actionCompleted = yield this._verifyActionCompletion({
2063
+ completed: true,
2064
+ verifierUseVision,
2065
+ model,
2066
+ steps,
2067
+ requestId,
2068
+ action,
2069
+ domSettleTimeoutMs
2070
+ });
2071
+ this.logger({
2072
+ category: "action",
2073
+ message: `Action completion verification result from cache: ${actionCompleted}`,
2074
+ level: 1
2075
+ });
2076
+ if (actionCompleted) {
2077
+ return {
2078
+ success: true,
2079
+ message: "Action completed successfully using cached step",
2080
+ action
2081
+ };
2082
+ }
2083
+ }
2084
+ return this.act({
2085
+ action,
2086
+ steps,
2087
+ chunksSeen,
2088
+ modelName,
2089
+ useVision,
2090
+ verifierUseVision,
2091
+ retries,
2092
+ requestId,
2093
+ variables,
2094
+ previousSelectors: [...previousSelectors, cachedStep.xpaths[0]],
2095
+ skipActionCacheForThisStep: false,
2096
+ domSettleTimeoutMs
2097
+ });
2098
+ } catch (exception) {
2099
+ this.logger({
2100
+ category: "action",
2101
+ message: `Error performing cached action step: ${exception.message}
2102
+ Trace: ${exception.stack}`,
2103
+ level: 1
2104
+ });
2105
+ yield this.actionCache.removeActionStep(cacheObj);
2106
+ return null;
2107
+ }
2108
+ });
2109
+ }
2110
+ act(_0) {
2111
+ return __async(this, arguments, function* ({
2112
+ action,
2113
+ steps = "",
2114
+ chunksSeen,
2115
+ modelName,
2116
+ useVision,
2117
+ verifierUseVision,
2118
+ retries = 0,
2119
+ requestId,
2120
+ variables,
2121
+ previousSelectors,
2122
+ skipActionCacheForThisStep = false,
2123
+ domSettleTimeoutMs
2124
+ }) {
2125
+ var _a;
2126
+ try {
2127
+ yield this.waitForSettledDom(domSettleTimeoutMs);
2128
+ yield this.startDomDebug();
2129
+ const model = modelName != null ? modelName : this.defaultModelName;
2130
+ if (this.enableCaching && !skipActionCacheForThisStep) {
2131
+ const response2 = yield this._runCachedActionIfAvailable({
2132
+ action,
2133
+ previousSelectors,
2134
+ requestId,
2135
+ steps,
2136
+ chunksSeen,
2137
+ modelName: model,
2138
+ useVision,
2139
+ verifierUseVision,
2140
+ retries,
2141
+ variables,
2142
+ model,
2143
+ domSettleTimeoutMs
2144
+ });
2145
+ if (response2 !== null) {
2146
+ return response2;
2147
+ } else {
2148
+ return this.act({
2149
+ action,
2150
+ steps,
2151
+ chunksSeen,
2152
+ modelName,
2153
+ useVision,
2154
+ verifierUseVision,
2155
+ retries,
2156
+ requestId,
2157
+ variables,
2158
+ previousSelectors,
2159
+ skipActionCacheForThisStep: true,
2160
+ domSettleTimeoutMs
2161
+ });
2162
+ }
2163
+ }
2164
+ if (!modelsWithVision.includes(model) && (useVision !== false || verifierUseVision)) {
2165
+ this.logger({
2166
+ category: "action",
2167
+ message: `${model} does not support vision, but useVision was set to ${useVision}. Defaulting to false.`,
2168
+ level: 1
2169
+ });
2170
+ useVision = false;
2171
+ verifierUseVision = false;
2172
+ }
2173
+ this.logger({
2174
+ category: "action",
2175
+ message: `Running / Continuing action: ${action} on page: ${this.stagehand.page.url()}`,
2176
+ level: 2
2177
+ });
2178
+ this.logger({
2179
+ category: "action",
2180
+ message: `Processing DOM...`,
2181
+ level: 2
2182
+ });
2183
+ const { outputString, selectorMap, chunk, chunks } = yield this.stagehand.page.evaluate(
2184
+ ({ chunksSeen: chunksSeen2 }) => {
2185
+ return window.processDom(chunksSeen2);
2186
+ },
2187
+ { chunksSeen }
2188
+ );
2189
+ this.logger({
2190
+ category: "action",
2191
+ message: `Looking at chunk ${chunk}. Chunks left: ${chunks.length - chunksSeen.length}`,
2192
+ level: 1
2193
+ });
2194
+ let annotatedScreenshot;
2195
+ if (useVision === true) {
2196
+ if (!modelsWithVision.includes(model)) {
2197
+ this.logger({
2198
+ category: "action",
2199
+ message: `${model} does not support vision. Skipping vision processing.`,
2200
+ level: 1
2201
+ });
2202
+ } else {
2203
+ const screenshotService = new ScreenshotService(
2204
+ this.stagehand.page,
2205
+ selectorMap,
2206
+ this.verbose
2207
+ );
2208
+ annotatedScreenshot = yield screenshotService.getAnnotatedScreenshot(false);
2209
+ }
2210
+ }
2211
+ const response = yield act({
2212
+ action,
2213
+ domElements: outputString,
2214
+ steps,
2215
+ llmProvider: this.llmProvider,
2216
+ modelName: model,
2217
+ screenshot: annotatedScreenshot,
2218
+ logger: this.logger,
2219
+ requestId,
2220
+ variables
2221
+ });
2222
+ this.logger({
2223
+ category: "action",
2224
+ message: `Received response from LLM: ${JSON.stringify(response)}`,
2225
+ level: 1
2226
+ });
2227
+ yield this.cleanupDomDebug();
2228
+ if (!response) {
2229
+ if (chunksSeen.length + 1 < chunks.length) {
2230
+ chunksSeen.push(chunk);
2231
+ this.logger({
2232
+ category: "action",
2233
+ message: `No action found in current chunk. Chunks seen: ${chunksSeen.length}.`,
2234
+ level: 1
2235
+ });
2236
+ return this.act({
2237
+ action,
2238
+ steps: steps + (!steps.endsWith("\n") ? "\n" : "") + "## Step: Scrolled to another section\n",
2239
+ chunksSeen,
2240
+ modelName,
2241
+ useVision,
2242
+ verifierUseVision,
2243
+ requestId,
2244
+ variables,
2245
+ previousSelectors,
2246
+ skipActionCacheForThisStep,
2247
+ domSettleTimeoutMs
2248
+ });
2249
+ } else if (useVision === "fallback") {
2250
+ this.logger({
2251
+ category: "action",
2252
+ message: `Switching to vision-based processing`,
2253
+ level: 1
2254
+ });
2255
+ yield this.stagehand.page.evaluate(() => window.scrollToHeight(0));
2256
+ return yield this.act({
2257
+ action,
2258
+ steps,
2259
+ chunksSeen,
2260
+ modelName,
2261
+ useVision: true,
2262
+ verifierUseVision,
2263
+ requestId,
2264
+ variables,
2265
+ previousSelectors,
2266
+ skipActionCacheForThisStep,
2267
+ domSettleTimeoutMs
2268
+ });
2269
+ } else {
2270
+ if (this.enableCaching) {
2271
+ this.llmProvider.cleanRequestCache(requestId);
2272
+ this.actionCache.deleteCacheForRequestId(requestId);
2273
+ }
2274
+ return {
2275
+ success: false,
2276
+ message: `Action was not able to be completed.`,
2277
+ action
2278
+ };
2279
+ }
2280
+ }
2281
+ const elementId = response["element"];
2282
+ const xpaths = selectorMap[elementId];
2283
+ const method = response["method"];
2284
+ const args = response["args"];
2285
+ const elementLines = outputString.split("\n");
2286
+ const elementText = ((_a = elementLines.find((line) => line.startsWith(`${elementId}:`))) == null ? void 0 : _a.split(":")[1]) || "Element not found";
2287
+ this.logger({
2288
+ category: "action",
2289
+ message: `Executing method: ${method} on element: ${elementId} (xpaths: ${xpaths.join(
2290
+ ", "
2291
+ )}) with args: ${JSON.stringify(args)}`,
2292
+ level: 1
2293
+ });
2294
+ try {
2295
+ const initialUrl = this.stagehand.page.url();
2296
+ const locator = this.stagehand.page.locator(`xpath=${xpaths[0]}`).first();
2297
+ const originalUrl = this.stagehand.page.url();
2298
+ const componentString = yield this._getComponentString(locator);
2299
+ const responseArgs = [...args];
2300
+ if (variables) {
2301
+ responseArgs.forEach((arg, index) => {
2302
+ if (typeof arg === "string") {
2303
+ args[index] = fillInVariables(arg, variables);
2304
+ }
2305
+ });
2306
+ }
2307
+ yield this._performPlaywrightMethod(
2308
+ method,
2309
+ args,
2310
+ xpaths[0],
2311
+ domSettleTimeoutMs
2312
+ );
2313
+ const newStepString = (!steps.endsWith("\n") ? "\n" : "") + `## Step: ${response.step}
2314
+ Element: ${elementText}
2315
+ Action: ${response.method}
2316
+ Reasoning: ${response.why}
2317
+ `;
2318
+ steps += newStepString;
2319
+ if (this.enableCaching) {
2320
+ this.actionCache.addActionStep({
2321
+ action,
2322
+ url: originalUrl,
2323
+ previousSelectors,
2324
+ playwrightCommand: {
2325
+ method,
2326
+ args: responseArgs
2327
+ },
2328
+ componentString,
2329
+ requestId,
2330
+ xpaths,
2331
+ newStepString,
2332
+ completed: response.completed
2333
+ }).catch((e) => {
2334
+ this.logger({
2335
+ category: "action",
2336
+ message: `Error adding action step to cache: ${e.message}
2337
+ Trace: ${e.stack}`,
2338
+ level: 1
2339
+ });
2340
+ });
2341
+ }
2342
+ if (this.stagehand.page.url() !== initialUrl) {
2343
+ steps += ` Result (Important): Page URL changed from ${initialUrl} to ${this.stagehand.page.url()}
2344
+
2345
+ `;
2346
+ }
2347
+ const actionCompleted = yield this._verifyActionCompletion({
2348
+ completed: response.completed,
2349
+ verifierUseVision,
2350
+ requestId,
2351
+ action,
2352
+ steps,
2353
+ model,
2354
+ domSettleTimeoutMs
2355
+ });
2356
+ if (!actionCompleted) {
2357
+ this.logger({
2358
+ category: "action",
2359
+ message: `Continuing to next action step`,
2360
+ level: 1
2361
+ });
2362
+ return this.act({
2363
+ action,
2364
+ steps,
2365
+ modelName,
2366
+ chunksSeen,
2367
+ useVision,
2368
+ verifierUseVision,
2369
+ requestId,
2370
+ variables,
2371
+ previousSelectors: [...previousSelectors, xpaths[0]],
2372
+ skipActionCacheForThisStep: false,
2373
+ domSettleTimeoutMs
2374
+ });
2375
+ } else {
2376
+ this.logger({
2377
+ category: "action",
2378
+ message: `Action completed successfully`,
2379
+ level: 1
2380
+ });
2381
+ yield this._recordAction(action, response.step);
2382
+ return {
2383
+ success: true,
2384
+ message: `Action completed successfully: ${steps}${response.step}`,
2385
+ action
2386
+ };
2387
+ }
2388
+ } catch (error) {
2389
+ this.logger({
2390
+ category: "action",
2391
+ message: `Error performing action - D (Retries: ${retries}): ${error.message}
2392
+ Trace: ${error.stack}`,
2393
+ level: 1
2394
+ });
2395
+ if (retries < 2) {
2396
+ return this.act({
2397
+ action,
2398
+ steps,
2399
+ modelName,
2400
+ useVision,
2401
+ verifierUseVision,
2402
+ retries: retries + 1,
2403
+ chunksSeen,
2404
+ requestId,
2405
+ variables,
2406
+ previousSelectors,
2407
+ skipActionCacheForThisStep,
2408
+ domSettleTimeoutMs
2409
+ });
2410
+ }
2411
+ yield this._recordAction(action, "");
2412
+ if (this.enableCaching) {
2413
+ this.llmProvider.cleanRequestCache(requestId);
2414
+ this.actionCache.deleteCacheForRequestId(requestId);
2415
+ }
2416
+ return {
2417
+ success: false,
2418
+ message: `Error performing action - A: ${error.message}`,
2419
+ action
2420
+ };
2421
+ }
2422
+ } catch (error) {
2423
+ this.logger({
2424
+ category: "action",
2425
+ message: `Error performing action - B: ${error.message}
2426
+ Trace: ${error.stack}`,
2427
+ level: 1
2428
+ });
2429
+ if (this.enableCaching) {
2430
+ this.llmProvider.cleanRequestCache(requestId);
2431
+ this.actionCache.deleteCacheForRequestId(requestId);
2432
+ }
2433
+ return {
2434
+ success: false,
2435
+ message: `Error performing action - C: ${error.message}`,
2436
+ action
2437
+ };
2438
+ }
2439
+ });
2440
+ }
2441
+ };
2442
+
2443
+ // lib/index.ts
2444
+ require("dotenv").config({ path: ".env" });
2445
+ function getBrowser(apiKey, projectId, env = "LOCAL", headless = false, logger, browserbaseSessionCreateParams, browserbaseResumeSessionID) {
2446
+ return __async(this, null, function* () {
2447
+ if (env === "BROWSERBASE") {
2448
+ if (!apiKey) {
2449
+ logger({
2450
+ category: "Init",
2451
+ message: "BROWSERBASE_API_KEY is required to use BROWSERBASE env. Defaulting to LOCAL.",
2452
+ level: 0
2453
+ });
2454
+ env = "LOCAL";
2455
+ }
2456
+ if (!projectId) {
2457
+ logger({
2458
+ category: "Init",
2459
+ message: "BROWSERBASE_PROJECT_ID is required for some Browserbase features that may not work without it.",
2460
+ level: 1
2461
+ });
2462
+ }
2463
+ }
2464
+ if (env === "BROWSERBASE") {
2465
+ if (!apiKey) {
2466
+ throw new Error("BROWSERBASE_API_KEY is required.");
2467
+ }
2468
+ let debugUrl = void 0;
2469
+ let sessionUrl = void 0;
2470
+ let sessionId;
2471
+ let connectUrl;
2472
+ const browserbase = new import_sdk2.Browserbase({
2473
+ apiKey
2474
+ });
2475
+ if (browserbaseResumeSessionID) {
2476
+ try {
1354
2477
  const sessionStatus = yield browserbase.sessions.retrieve(
1355
2478
  browserbaseResumeSessionID
1356
2479
  );
@@ -1507,20 +2630,31 @@ var Stagehand = class {
1507
2630
  this.llmProvider = llmProvider || new LLMProvider(this.logger, this.enableCaching);
1508
2631
  this.env = env;
1509
2632
  this.observations = {};
1510
- this.apiKey = apiKey;
1511
- this.projectId = projectId;
1512
- this.actions = {};
2633
+ this.apiKey = apiKey != null ? apiKey : process.env.BROWSERBASE_API_KEY;
2634
+ this.projectId = projectId != null ? projectId : process.env.BROWSERBASE_PROJECT_ID;
1513
2635
  this.verbose = verbose != null ? verbose : 0;
1514
2636
  this.debugDom = debugDom != null ? debugDom : false;
1515
2637
  this.defaultModelName = "gpt-4o";
1516
- this.domSettleTimeoutMs = domSettleTimeoutMs != null ? domSettleTimeoutMs : 6e4;
2638
+ this.domSettleTimeoutMs = domSettleTimeoutMs != null ? domSettleTimeoutMs : 3e4;
1517
2639
  this.headless = headless != null ? headless : false;
1518
2640
  this.browserBaseSessionCreateParams = browserBaseSessionCreateParams;
2641
+ this.actHandler = new StagehandActHandler({
2642
+ stagehand: this,
2643
+ verbose: this.verbose,
2644
+ llmProvider: this.llmProvider,
2645
+ enableCaching: this.enableCaching,
2646
+ logger: this.logger,
2647
+ waitForSettledDom: this._waitForSettledDom.bind(this),
2648
+ defaultModelName: this.defaultModelName,
2649
+ startDomDebug: this.startDomDebug.bind(this),
2650
+ cleanupDomDebug: this.cleanupDomDebug.bind(this)
2651
+ });
1519
2652
  this.browserbaseResumeSessionID = browserbaseResumeSessionID;
1520
2653
  }
1521
2654
  init() {
1522
2655
  return __async(this, arguments, function* ({
1523
- modelName = "gpt-4o"
2656
+ modelName = "gpt-4o",
2657
+ domSettleTimeoutMs
1524
2658
  } = {}) {
1525
2659
  const { context, debugUrl, sessionUrl } = yield getBrowser(
1526
2660
  this.apiKey,
@@ -1536,7 +2670,10 @@ var Stagehand = class {
1536
2670
  });
1537
2671
  this.context = context;
1538
2672
  this.page = context.pages()[0];
2673
+ yield this.page.waitForLoadState("domcontentloaded");
2674
+ yield this._waitForSettledDom();
1539
2675
  this.defaultModelName = modelName;
2676
+ this.domSettleTimeoutMs = domSettleTimeoutMs != null ? domSettleTimeoutMs : this.domSettleTimeoutMs;
1540
2677
  const originalGoto = this.page.goto.bind(this.page);
1541
2678
  this.page.goto = (url, options) => __async(this, null, function* () {
1542
2679
  const result = yield originalGoto(url, options);
@@ -1547,14 +2684,33 @@ var Stagehand = class {
1547
2684
  if (this.headless) {
1548
2685
  yield this.page.setViewportSize({ width: 1280, height: 720 });
1549
2686
  }
1550
- yield this.page.addInitScript({
1551
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js")
2687
+ yield this.context.addInitScript({
2688
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "xpathUtils.js"),
2689
+ content: import_fs2.default.readFileSync(
2690
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "xpathUtils.js"),
2691
+ "utf8"
2692
+ )
2693
+ });
2694
+ yield this.context.addInitScript({
2695
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js"),
2696
+ content: import_fs2.default.readFileSync(
2697
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js"),
2698
+ "utf8"
2699
+ )
1552
2700
  });
1553
- yield this.page.addInitScript({
1554
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js")
2701
+ yield this.context.addInitScript({
2702
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js"),
2703
+ content: import_fs2.default.readFileSync(
2704
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js"),
2705
+ "utf8"
2706
+ )
1555
2707
  });
1556
- yield this.page.addInitScript({
1557
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js")
2708
+ yield this.context.addInitScript({
2709
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js"),
2710
+ content: import_fs2.default.readFileSync(
2711
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js"),
2712
+ "utf8"
2713
+ )
1558
2714
  });
1559
2715
  return { debugUrl, sessionUrl };
1560
2716
  });
@@ -1574,14 +2730,33 @@ var Stagehand = class {
1574
2730
  if (this.headless) {
1575
2731
  yield this.page.setViewportSize({ width: 1280, height: 720 });
1576
2732
  }
1577
- yield this.page.addInitScript({
1578
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js")
2733
+ yield this.context.addInitScript({
2734
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "xpathUtils.js"),
2735
+ content: import_fs2.default.readFileSync(
2736
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "xpathUtils.js"),
2737
+ "utf8"
2738
+ )
1579
2739
  });
1580
- yield this.page.addInitScript({
1581
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js")
2740
+ yield this.context.addInitScript({
2741
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js"),
2742
+ content: import_fs2.default.readFileSync(
2743
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js"),
2744
+ "utf8"
2745
+ )
2746
+ });
2747
+ yield this.context.addInitScript({
2748
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js"),
2749
+ content: import_fs2.default.readFileSync(
2750
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js"),
2751
+ "utf8"
2752
+ )
1582
2753
  });
1583
- yield this.page.addInitScript({
1584
- path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js")
2754
+ yield this.context.addInitScript({
2755
+ path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js"),
2756
+ content: import_fs2.default.readFileSync(
2757
+ import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js"),
2758
+ "utf8"
2759
+ )
1585
2760
  });
1586
2761
  return { context: this.context };
1587
2762
  });
@@ -1720,24 +2895,13 @@ Trace: ${e.stack}`,
1720
2895
  }
1721
2896
  });
1722
2897
  }
1723
- // Recording
1724
- _generateId(operation) {
1725
- return import_crypto.default.createHash("sha256").update(operation).digest("hex");
1726
- }
1727
2898
  _recordObservation(instruction, result) {
1728
2899
  return __async(this, null, function* () {
1729
- const id = this._generateId(instruction);
2900
+ const id = generateId(instruction);
1730
2901
  this.observations[id] = { result, instruction };
1731
2902
  return id;
1732
2903
  });
1733
2904
  }
1734
- _recordAction(action, result) {
1735
- return __async(this, null, function* () {
1736
- const id = this._generateId(action);
1737
- this.actions[id] = { result, action };
1738
- return id;
1739
- });
1740
- }
1741
2905
  // Main methods
1742
2906
  _extract(_0) {
1743
2907
  return __async(this, arguments, function* ({
@@ -1747,14 +2911,15 @@ Trace: ${e.stack}`,
1747
2911
  content = {},
1748
2912
  chunksSeen = [],
1749
2913
  modelName,
1750
- requestId
2914
+ requestId,
2915
+ domSettleTimeoutMs
1751
2916
  }) {
1752
2917
  this.log({
1753
2918
  category: "extraction",
1754
2919
  message: `starting extraction '${instruction}'`,
1755
2920
  level: 1
1756
2921
  });
1757
- yield this._waitForSettledDom();
2922
+ yield this._waitForSettledDom(domSettleTimeoutMs);
1758
2923
  yield this.startDomDebug();
1759
2924
  const { outputString, chunk, chunks } = yield this.page.evaluate(
1760
2925
  (chunksSeen2) => window.processDom(chunksSeen2 != null ? chunksSeen2 : []),
@@ -1802,14 +2967,15 @@ Trace: ${e.stack}`,
1802
2967
  message: `continuing extraction, progress: '${newProgress}'`,
1803
2968
  level: 1
1804
2969
  });
1805
- yield this._waitForSettledDom();
2970
+ yield this._waitForSettledDom(domSettleTimeoutMs);
1806
2971
  return this._extract({
1807
2972
  instruction,
1808
2973
  schema,
1809
2974
  progress: newProgress,
1810
2975
  content: output,
1811
2976
  chunksSeen,
1812
- modelName
2977
+ modelName,
2978
+ domSettleTimeoutMs
1813
2979
  });
1814
2980
  }
1815
2981
  });
@@ -1819,119 +2985,30 @@ Trace: ${e.stack}`,
1819
2985
  instruction,
1820
2986
  useVision,
1821
2987
  fullPage,
1822
- modelName,
1823
- requestId
1824
- }) {
1825
- if (!instruction) {
1826
- instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`;
1827
- }
1828
- const model = modelName != null ? modelName : this.defaultModelName;
1829
- this.log({
1830
- category: "observation",
1831
- message: `starting observation: ${instruction}`,
1832
- level: 1
1833
- });
1834
- yield this._waitForSettledDom();
1835
- yield this.startDomDebug();
1836
- let { outputString, selectorMap } = yield this.page.evaluate(
1837
- (fullPage2) => fullPage2 ? window.processAllOfDom() : window.processDom([]),
1838
- fullPage
1839
- );
1840
- let annotatedScreenshot;
1841
- if (useVision === true) {
1842
- if (!modelsWithVision.includes(model)) {
1843
- this.log({
1844
- category: "observation",
1845
- message: `${model} does not support vision. Skipping vision processing.`,
1846
- level: 1
1847
- });
1848
- } else {
1849
- const screenshotService = new ScreenshotService(
1850
- this.page,
1851
- selectorMap,
1852
- this.verbose
1853
- );
1854
- annotatedScreenshot = yield screenshotService.getAnnotatedScreenshot(fullPage);
1855
- outputString = "n/a. use the image to find the elements.";
1856
- }
1857
- }
1858
- const observationResponse = yield observe({
1859
- instruction,
1860
- domElements: outputString,
1861
- llmProvider: this.llmProvider,
1862
- modelName: modelName || this.defaultModelName,
1863
- image: annotatedScreenshot,
1864
- requestId
1865
- });
1866
- const elementsWithSelectors = observationResponse.elements.map(
1867
- (element) => {
1868
- const _a = element, { elementId } = _a, rest = __objRest(_a, ["elementId"]);
1869
- return __spreadProps(__spreadValues({}, rest), {
1870
- selector: `xpath=${selectorMap[elementId]}`
1871
- });
1872
- }
1873
- );
1874
- yield this.cleanupDomDebug();
1875
- this._recordObservation(instruction, elementsWithSelectors);
1876
- this.log({
1877
- category: "observation",
1878
- message: `found element ${JSON.stringify(elementsWithSelectors)}`,
1879
- level: 1
1880
- });
1881
- yield this._recordObservation(instruction, elementsWithSelectors);
1882
- return elementsWithSelectors;
1883
- });
1884
- }
1885
- _act(_0) {
1886
- return __async(this, arguments, function* ({
1887
- action,
1888
- steps = "",
1889
- chunksSeen,
1890
- modelName,
1891
- useVision,
1892
- verifierUseVision,
1893
- retries = 0,
1894
- requestId
1895
- }) {
1896
- var _a;
1897
- const model = modelName != null ? modelName : this.defaultModelName;
1898
- if (!modelsWithVision.includes(model) && (useVision !== false || verifierUseVision)) {
1899
- this.log({
1900
- category: "action",
1901
- message: `${model} does not support vision, but useVision was set to ${useVision}. Defaulting to false.`,
1902
- level: 1
1903
- });
1904
- useVision = false;
1905
- verifierUseVision = false;
2988
+ modelName,
2989
+ requestId,
2990
+ domSettleTimeoutMs
2991
+ }) {
2992
+ if (!instruction) {
2993
+ instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`;
1906
2994
  }
2995
+ const model = modelName != null ? modelName : this.defaultModelName;
1907
2996
  this.log({
1908
- category: "action",
1909
- message: `Running / Continuing action: ${action} on page: ${this.page.url()}`,
1910
- level: 2
2997
+ category: "observation",
2998
+ message: `starting observation: ${instruction}`,
2999
+ level: 1
1911
3000
  });
1912
- yield this._waitForSettledDom();
3001
+ yield this._waitForSettledDom(domSettleTimeoutMs);
1913
3002
  yield this.startDomDebug();
1914
- this.log({
1915
- category: "action",
1916
- message: `Processing DOM...`,
1917
- level: 2
1918
- });
1919
- const { outputString, selectorMap, chunk, chunks } = yield this.page.evaluate(
1920
- ({ chunksSeen: chunksSeen2 }) => {
1921
- return window.processDom(chunksSeen2);
1922
- },
1923
- { chunksSeen }
3003
+ let { outputString, selectorMap } = yield this.page.evaluate(
3004
+ (fullPage2) => fullPage2 ? window.processAllOfDom() : window.processDom([]),
3005
+ fullPage
1924
3006
  );
1925
- this.log({
1926
- category: "action",
1927
- message: `Looking at chunk ${chunk}. Chunks left: ${chunks.length - chunksSeen.length}`,
1928
- level: 1
1929
- });
1930
3007
  let annotatedScreenshot;
1931
3008
  if (useVision === true) {
1932
3009
  if (!modelsWithVision.includes(model)) {
1933
3010
  this.log({
1934
- category: "action",
3011
+ category: "observation",
1935
3012
  message: `${model} does not support vision. Skipping vision processing.`,
1936
3013
  level: 1
1937
3014
  });
@@ -1941,418 +3018,44 @@ Trace: ${e.stack}`,
1941
3018
  selectorMap,
1942
3019
  this.verbose
1943
3020
  );
1944
- annotatedScreenshot = yield screenshotService.getAnnotatedScreenshot(false);
3021
+ annotatedScreenshot = yield screenshotService.getAnnotatedScreenshot(fullPage);
3022
+ outputString = "n/a. use the image to find the elements.";
1945
3023
  }
1946
3024
  }
1947
- const response = yield act({
1948
- action,
3025
+ const observationResponse = yield observe({
3026
+ instruction,
1949
3027
  domElements: outputString,
1950
- steps,
1951
3028
  llmProvider: this.llmProvider,
1952
- modelName: model,
1953
- screenshot: annotatedScreenshot,
1954
- logger: this.logger,
3029
+ modelName: modelName || this.defaultModelName,
3030
+ image: annotatedScreenshot,
1955
3031
  requestId
1956
3032
  });
1957
- this.log({
1958
- category: "action",
1959
- message: `Received response from LLM: ${JSON.stringify(response)}`,
1960
- level: 1
1961
- });
1962
- yield this.cleanupDomDebug();
1963
- if (!response) {
1964
- if (chunksSeen.length + 1 < chunks.length) {
1965
- chunksSeen.push(chunk);
1966
- this.log({
1967
- category: "action",
1968
- message: `No action found in current chunk. Chunks seen: ${chunksSeen.length}.`,
1969
- level: 1
1970
- });
1971
- return this._act({
1972
- action,
1973
- steps: steps + (!steps.endsWith("\n") ? "\n" : "") + "## Step: Scrolled to another section\n",
1974
- chunksSeen,
1975
- modelName,
1976
- useVision,
1977
- verifierUseVision,
1978
- requestId
1979
- });
1980
- } else if (useVision === "fallback") {
1981
- this.log({
1982
- category: "action",
1983
- message: `Switching to vision-based processing`,
1984
- level: 1
1985
- });
1986
- yield this.page.evaluate(() => window.scrollToHeight(0));
1987
- return yield this._act({
1988
- action,
1989
- steps,
1990
- chunksSeen,
1991
- modelName,
1992
- useVision: true,
1993
- verifierUseVision,
1994
- requestId
3033
+ const elementsWithSelectors = observationResponse.elements.map(
3034
+ (element) => {
3035
+ const _a = element, { elementId } = _a, rest = __objRest(_a, ["elementId"]);
3036
+ return __spreadProps(__spreadValues({}, rest), {
3037
+ selector: `xpath=${selectorMap[elementId][0]}`
1995
3038
  });
1996
- } else {
1997
- if (this.enableCaching) {
1998
- this.llmProvider.cleanRequestCache(requestId);
1999
- }
2000
- return {
2001
- success: false,
2002
- message: `Action was not able to be completed.`,
2003
- action
2004
- };
2005
3039
  }
2006
- }
2007
- const elementId = response["element"];
2008
- const xpath = selectorMap[elementId];
2009
- const method = response["method"];
2010
- const args = response["args"];
2011
- const elementLines = outputString.split("\n");
2012
- const elementText = ((_a = elementLines.find((line) => line.startsWith(`${elementId}:`))) == null ? void 0 : _a.split(":")[1]) || "Element not found";
3040
+ );
3041
+ yield this.cleanupDomDebug();
3042
+ this._recordObservation(instruction, elementsWithSelectors);
2013
3043
  this.log({
2014
- category: "action",
2015
- message: `Executing method: ${method} on element: ${elementId} (xpath: ${xpath}) with args: ${JSON.stringify(
2016
- args
2017
- )}`,
3044
+ category: "observation",
3045
+ message: `found element ${JSON.stringify(elementsWithSelectors)}`,
2018
3046
  level: 1
2019
3047
  });
2020
- let urlChangeString = "";
2021
- const locator = this.page.locator(`xpath=${xpath}`).first();
2022
- try {
2023
- const initialUrl = this.page.url();
2024
- if (method === "scrollIntoView") {
2025
- this.log({
2026
- category: "action",
2027
- message: `Scrolling element into view`,
2028
- level: 2
2029
- });
2030
- try {
2031
- yield locator.evaluate((element) => {
2032
- element.scrollIntoView({ behavior: "smooth", block: "center" });
2033
- }).catch((e) => {
2034
- this.log({
2035
- category: "action",
2036
- message: `Error scrolling element into view: ${e.message}
2037
- Trace: ${e.stack}`,
2038
- level: 1
2039
- });
2040
- });
2041
- } catch (e) {
2042
- this.log({
2043
- category: "action",
2044
- message: `Error scrolling element into view (Retries ${retries}): ${e.message}
2045
- Trace: ${e.stack}`,
2046
- level: 1
2047
- });
2048
- if (retries < 2) {
2049
- return this._act({
2050
- action,
2051
- steps,
2052
- modelName,
2053
- useVision,
2054
- verifierUseVision,
2055
- retries: retries + 1,
2056
- chunksSeen,
2057
- requestId
2058
- });
2059
- }
2060
- }
2061
- } else if (method === "fill" || method === "type") {
2062
- try {
2063
- yield locator.fill("");
2064
- yield locator.click();
2065
- const text = args[0];
2066
- for (const char of text) {
2067
- yield this.page.keyboard.type(char, {
2068
- delay: Math.random() * 50 + 25
2069
- });
2070
- }
2071
- } catch (e) {
2072
- this.log({
2073
- category: "action",
2074
- message: `Error filling element (Retries ${retries}): ${e.message}
2075
- Trace: ${e.stack}`,
2076
- level: 1
2077
- });
2078
- if (retries < 2) {
2079
- return this._act({
2080
- action,
2081
- steps,
2082
- modelName,
2083
- useVision,
2084
- verifierUseVision,
2085
- retries: retries + 1,
2086
- chunksSeen,
2087
- requestId
2088
- });
2089
- }
2090
- }
2091
- } else if (method === "press") {
2092
- try {
2093
- const key = args[0];
2094
- yield this.page.keyboard.press(key);
2095
- } catch (e) {
2096
- this.log({
2097
- category: "action",
2098
- message: `Error pressing key (Retries ${retries}): ${e.message}
2099
- Trace: ${e.stack}`,
2100
- level: 1
2101
- });
2102
- if (retries < 2) {
2103
- return this._act({
2104
- action,
2105
- steps,
2106
- modelName,
2107
- useVision,
2108
- verifierUseVision,
2109
- retries: retries + 1,
2110
- chunksSeen,
2111
- requestId
2112
- });
2113
- }
2114
- }
2115
- } else if (typeof locator[method] === "function") {
2116
- this.log({
2117
- category: "action",
2118
- message: `Page URL before action: ${this.page.url()}`,
2119
- level: 2
2120
- });
2121
- try {
2122
- yield locator[method](...args);
2123
- } catch (e) {
2124
- this.log({
2125
- category: "action",
2126
- message: `Error performing method ${method} with args ${JSON.stringify(
2127
- args
2128
- )} (Retries: ${retries}): ${e.message}
2129
- Trace: ${e.stack}`,
2130
- level: 1
2131
- });
2132
- if (retries < 2) {
2133
- return this._act({
2134
- action,
2135
- steps,
2136
- modelName,
2137
- useVision,
2138
- verifierUseVision,
2139
- retries: retries + 1,
2140
- chunksSeen,
2141
- requestId
2142
- });
2143
- }
2144
- }
2145
- if (method === "click") {
2146
- this.log({
2147
- category: "action",
2148
- message: `Clicking element, checking for page navigation`,
2149
- level: 1
2150
- });
2151
- const newOpenedTab = yield Promise.race([
2152
- new Promise((resolve) => {
2153
- this.context.once("page", (page) => resolve(page));
2154
- setTimeout(() => resolve(null), 1500);
2155
- })
2156
- ]);
2157
- this.log({
2158
- category: "action",
2159
- message: `Clicked element, ${newOpenedTab ? "opened a new tab" : "no new tabs opened"}`,
2160
- level: 1
2161
- });
2162
- if (newOpenedTab) {
2163
- this.log({
2164
- category: "action",
2165
- message: `New page detected (new tab) with URL: ${newOpenedTab.url()}`,
2166
- level: 1
2167
- });
2168
- yield newOpenedTab.close();
2169
- yield this.page.goto(newOpenedTab.url());
2170
- yield this.page.waitForLoadState("domcontentloaded");
2171
- yield this._waitForSettledDom();
2172
- }
2173
- yield Promise.race([
2174
- this.page.waitForLoadState("networkidle"),
2175
- new Promise((resolve) => setTimeout(resolve, 5e3))
2176
- ]).catch((e) => {
2177
- this.log({
2178
- category: "action",
2179
- message: `Network idle timeout hit`,
2180
- level: 1
2181
- });
2182
- });
2183
- this.log({
2184
- category: "action",
2185
- message: `Finished waiting for (possible) page navigation`,
2186
- level: 1
2187
- });
2188
- if (this.page.url() !== initialUrl) {
2189
- this.log({
2190
- category: "action",
2191
- message: `New page detected with URL: ${this.page.url()}`,
2192
- level: 1
2193
- });
2194
- }
2195
- }
2196
- } else {
2197
- this.log({
2198
- category: "action",
2199
- message: `Chosen method ${method} is invalid`,
2200
- level: 1
2201
- });
2202
- if (retries < 2) {
2203
- return this._act({
2204
- action,
2205
- steps,
2206
- modelName: model,
2207
- useVision,
2208
- verifierUseVision,
2209
- retries: retries + 1,
2210
- chunksSeen,
2211
- requestId
2212
- });
2213
- } else {
2214
- if (this.enableCaching) {
2215
- this.llmProvider.cleanRequestCache(requestId);
2216
- }
2217
- return {
2218
- success: false,
2219
- message: `Internal error: Chosen method ${method} is invalid`,
2220
- action
2221
- };
2222
- }
2223
- }
2224
- let newSteps = steps + (!steps.endsWith("\n") ? "\n" : "") + `## Step: ${response.step}
2225
- Element: ${elementText}
2226
- Action: ${response.method}
2227
- Reasoning: ${response.why}
2228
- `;
2229
- if (urlChangeString) {
2230
- newSteps += ` Result (Important): ${urlChangeString}
2231
-
2232
- `;
2233
- }
2234
- let actionComplete = false;
2235
- if (response.completed) {
2236
- this.log({
2237
- category: "action",
2238
- message: `Action marked as completed, Verifying if this is true...`,
2239
- level: 1
2240
- });
2241
- let domElements = void 0;
2242
- let fullpageScreenshot = void 0;
2243
- if (verifierUseVision) {
2244
- try {
2245
- const screenshotService = new ScreenshotService(
2246
- this.page,
2247
- selectorMap,
2248
- this.verbose
2249
- );
2250
- fullpageScreenshot = yield screenshotService.getScreenshot(
2251
- true,
2252
- 15
2253
- );
2254
- } catch (e) {
2255
- this.log({
2256
- category: "action",
2257
- message: `Error getting full page screenshot: ${e.message}
2258
- . Trying again...`,
2259
- level: 1
2260
- });
2261
- const screenshotService = new ScreenshotService(
2262
- this.page,
2263
- selectorMap,
2264
- this.verbose
2265
- );
2266
- fullpageScreenshot = yield screenshotService.getScreenshot(
2267
- true,
2268
- 15
2269
- );
2270
- }
2271
- } else {
2272
- ({ outputString: domElements } = yield this.page.evaluate(() => {
2273
- return window.processAllOfDom();
2274
- }));
2275
- }
2276
- actionComplete = yield verifyActCompletion({
2277
- goal: action,
2278
- steps: newSteps,
2279
- llmProvider: this.llmProvider,
2280
- modelName: model,
2281
- screenshot: fullpageScreenshot,
2282
- domElements,
2283
- logger: this.logger,
2284
- requestId
2285
- });
2286
- this.log({
2287
- category: "action",
2288
- message: `Action completion verification result: ${actionComplete}`,
2289
- level: 1
2290
- });
2291
- }
2292
- if (!actionComplete) {
2293
- this.log({
2294
- category: "action",
2295
- message: `Continuing to next action step`,
2296
- level: 1
2297
- });
2298
- return this._act({
2299
- action,
2300
- steps: newSteps,
2301
- modelName,
2302
- chunksSeen,
2303
- useVision,
2304
- verifierUseVision,
2305
- requestId
2306
- });
2307
- } else {
2308
- this.log({
2309
- category: "action",
2310
- message: `Action completed successfully`,
2311
- level: 1
2312
- });
2313
- yield this._recordAction(action, response.step);
2314
- return {
2315
- success: true,
2316
- message: `Action completed successfully: ${steps}${response.step}`,
2317
- action
2318
- };
2319
- }
2320
- } catch (error) {
2321
- this.log({
2322
- category: "action",
2323
- message: `Error performing action (Retries: ${retries}): ${error.message}
2324
- Trace: ${error.stack}`,
2325
- level: 1
2326
- });
2327
- if (retries < 2) {
2328
- return this._act({
2329
- action,
2330
- steps,
2331
- modelName,
2332
- useVision,
2333
- verifierUseVision,
2334
- retries: retries + 1,
2335
- chunksSeen,
2336
- requestId
2337
- });
2338
- }
2339
- yield this._recordAction(action, "");
2340
- if (this.enableCaching) {
2341
- this.llmProvider.cleanRequestCache(requestId);
2342
- }
2343
- return {
2344
- success: false,
2345
- message: `Error performing action: ${error.message}`,
2346
- action
2347
- };
2348
- }
3048
+ yield this._recordObservation(instruction, elementsWithSelectors);
3049
+ return elementsWithSelectors;
2349
3050
  });
2350
3051
  }
2351
3052
  act(_0) {
2352
3053
  return __async(this, arguments, function* ({
2353
3054
  action,
2354
3055
  modelName,
2355
- useVision = "fallback"
3056
+ useVision = "fallback",
3057
+ variables = {},
3058
+ domSettleTimeoutMs
2356
3059
  }) {
2357
3060
  useVision = useVision != null ? useVision : "fallback";
2358
3061
  const requestId = Math.random().toString(36).substring(2);
@@ -2360,22 +3063,26 @@ Trace: ${error.stack}`,
2360
3063
  category: "act",
2361
3064
  message: `Running act with action: ${action}, requestId: ${requestId}`
2362
3065
  });
2363
- return this._act({
3066
+ if (variables) {
3067
+ this.variables = __spreadValues(__spreadValues({}, this.variables), variables);
3068
+ }
3069
+ return this.actHandler.act({
2364
3070
  action,
2365
3071
  modelName,
2366
3072
  chunksSeen: [],
2367
3073
  useVision,
2368
3074
  verifierUseVision: useVision !== false,
2369
- requestId
3075
+ requestId,
3076
+ variables,
3077
+ previousSelectors: [],
3078
+ skipActionCacheForThisStep: false,
3079
+ domSettleTimeoutMs
2370
3080
  }).catch((e) => {
2371
3081
  this.logger({
2372
3082
  category: "act",
2373
3083
  message: `Error acting: ${e.message}
2374
3084
  Trace: ${e.stack}`
2375
3085
  });
2376
- if (this.enableCaching) {
2377
- this.llmProvider.cleanRequestCache(requestId);
2378
- }
2379
3086
  return {
2380
3087
  success: false,
2381
3088
  message: `Internal error: Error acting: ${e.message}`,
@@ -2388,7 +3095,8 @@ Trace: ${e.stack}`
2388
3095
  return __async(this, arguments, function* ({
2389
3096
  instruction,
2390
3097
  schema,
2391
- modelName
3098
+ modelName,
3099
+ domSettleTimeoutMs
2392
3100
  }) {
2393
3101
  const requestId = Math.random().toString(36).substring(2);
2394
3102
  this.logger({
@@ -2399,7 +3107,8 @@ Trace: ${e.stack}`
2399
3107
  instruction,
2400
3108
  schema,
2401
3109
  modelName,
2402
- requestId
3110
+ requestId,
3111
+ domSettleTimeoutMs
2403
3112
  }).catch((e) => {
2404
3113
  this.logger({
2405
3114
  category: "extract",
@@ -2426,7 +3135,8 @@ Trace: ${e.stack}`
2426
3135
  modelName: options == null ? void 0 : options.modelName,
2427
3136
  useVision: (_b = options == null ? void 0 : options.useVision) != null ? _b : false,
2428
3137
  fullPage: false,
2429
- requestId
3138
+ requestId,
3139
+ domSettleTimeoutMs: options == null ? void 0 : options.domSettleTimeoutMs
2430
3140
  }).catch((e) => {
2431
3141
  this.logger({
2432
3142
  category: "observe",