@browserbasehq/stagehand 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -101,6 +101,7 @@ import { z } from "zod";
101
101
 
102
102
  const stagehand = new Stagehand({
103
103
  env: "BROWSERBASE",
104
+ enableCaching: true,
104
105
  });
105
106
  ```
106
107
 
@@ -137,6 +138,7 @@ This constructor is used to create an instance of Stagehand.
137
138
  - `2`: LLM-client level logging (most granular)
138
139
  - `debugDom`: a `boolean` that draws bounding boxes around elements presented to the LLM during automation.
139
140
  - `domSettleTimeoutMs`: an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. Defaults to 30000 (30 seconds).
141
+ - `enableCaching`: a `boolean` that enables caching of LLM responses. When set to `true`, the LLM requests will be cached on disk and reused for identical requests. Defaults to `false`.
140
142
 
141
143
  - **Returns:**
142
144
 
@@ -278,7 +280,6 @@ Stagehand currently supports the following models from OpenAI and Anthropic:
278
280
 
279
281
  These models can be specified when initializing the `Stagehand` instance or when calling methods like `act()` and `extract()`.
280
282
 
281
-
282
283
  ## How It Works
283
284
 
284
285
  The SDK has two major phases:
@@ -342,12 +343,14 @@ const productInfo = await stagehand.extract({
342
343
  - **Break down complex tasks into smaller, atomic steps**
343
344
 
344
345
  Instead of combining actions:
346
+
345
347
  ```javascript
346
348
  // Avoid this
347
349
  await stagehand.act({ action: "log in and purchase the first item" });
348
350
  ```
349
351
 
350
352
  Split them into individual steps:
353
+
351
354
  ```javascript
352
355
  await stagehand.act({ action: "click the login button" });
353
356
  // ...additional steps to log in...
@@ -385,11 +388,10 @@ await stagehand.act({ action: "fill out the form and submit it" });
385
388
  await stagehand.act({ action: "book the cheapest flight available" });
386
389
  ```
387
390
 
388
- By following these guidelines, you'll increase the reliability and effectiveness of your web automations with Stagehand. Remember, Stagehand excels at executing precise, well-defined actions so keeping your instructions atomic will lead to the best outcomes.
391
+ By following these guidelines, you'll increase the reliability and effectiveness of your web automations with Stagehand. Remember, Stagehand excels at executing precise, well-defined actions so keeping your instructions atomic will lead to the best outcomes.
389
392
 
390
393
  We leave the agentic behaviour to higher-level agentic systems which can use Stagehand as a tool.
391
394
 
392
-
393
395
  ## Roadmap
394
396
 
395
397
  At a high level, we're focused on improving reliability, speed, and cost in that order of priority.
@@ -464,7 +466,7 @@ Stagehand uses [tsup](https://github.com/egoist/tsup) to build the SDK and vanil
464
466
 
465
467
  ## Acknowledgements
466
468
 
467
- This project heavily relies on [Playwright](https://playwright.dev/) as a resilient backbone to automate the web. It also would not be possible without the awesome techniques and discoveries made by [tarsier](https://github.com/reworkd/tarsier), and [fuji-web](https://github.com/fuji-web).
469
+ This project heavily relies on [Playwright](https://playwright.dev/) as a resilient backbone to automate the web. It also would not be possible without the awesome techniques and discoveries made by [tarsier](https://github.com/reworkd/tarsier), and [fuji-web](https://github.com/normal-computing/fuji-web).
468
470
 
469
471
  [Jeremy Press](https://x.com/jeremypress) wrote the original MVP of Stagehand and continues to be a major ally to the project.
470
472
 
package/dist/index.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { Page, BrowserContext } from '@playwright/test';
2
2
  import { z } from 'zod';
3
+ import { Browserbase } from '@browserbasehq/sdk';
3
4
 
4
5
  interface ChatMessage {
5
6
  role: "system" | "user" | "assistant";
@@ -40,21 +41,25 @@ type AvailableModel = "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "claude-3
40
41
  declare class LLMProvider {
41
42
  private modelToProviderMap;
42
43
  private logger;
44
+ private enableCaching;
45
+ private cache;
43
46
  constructor(logger: (message: {
44
47
  category?: string;
45
48
  message: string;
46
- }) => void);
47
- getClient(modelName: AvailableModel): LLMClient;
49
+ }) => void, enableCaching: boolean);
50
+ cleanRequestCache(requestId: string): void;
51
+ getClient(modelName: AvailableModel, requestId: string): LLMClient;
48
52
  }
49
53
 
50
54
  declare class Stagehand {
51
55
  private llmProvider;
52
56
  private observations;
53
57
  private actions;
54
- private id;
55
58
  page: Page;
56
59
  context: BrowserContext;
57
60
  private env;
61
+ private apiKey;
62
+ private projectId;
58
63
  private verbose;
59
64
  private debugDom;
60
65
  private defaultModelName;
@@ -62,8 +67,13 @@ declare class Stagehand {
62
67
  private logger;
63
68
  private externalLogger?;
64
69
  private domSettleTimeoutMs;
65
- constructor({ env, verbose, debugDom, llmProvider, headless, logger, domSettleTimeoutMs, }?: {
70
+ private browserBaseSessionCreateParams?;
71
+ private enableCaching;
72
+ private browserbaseResumeSessionID?;
73
+ constructor({ env, apiKey, projectId, verbose, debugDom, llmProvider, headless, logger, browserBaseSessionCreateParams, domSettleTimeoutMs, enableCaching, browserbaseResumeSessionID, }?: {
66
74
  env: "LOCAL" | "BROWSERBASE";
75
+ apiKey?: string;
76
+ projectId?: string;
67
77
  verbose?: 0 | 1 | 2;
68
78
  debugDom?: boolean;
69
79
  llmProvider?: LLMProvider;
@@ -74,6 +84,9 @@ declare class Stagehand {
74
84
  level?: 0 | 1 | 2;
75
85
  }) => void;
76
86
  domSettleTimeoutMs?: number;
87
+ browserBaseSessionCreateParams?: Browserbase.Sessions.SessionCreateParams;
88
+ enableCaching?: boolean;
89
+ browserbaseResumeSessionID?: string;
77
90
  });
78
91
  init({ modelName, }?: {
79
92
  modelName?: AvailableModel;
package/dist/index.js CHANGED
@@ -84,6 +84,7 @@ module.exports = __toCommonJS(lib_exports);
84
84
  var import_test = require("@playwright/test");
85
85
  var import_crypto = __toESM(require("crypto"));
86
86
  var import_fs2 = __toESM(require("fs"));
87
+ var import_sdk2 = require("@browserbasehq/sdk");
87
88
 
88
89
  // lib/prompt.ts
89
90
  var actSystemPrompt = `
@@ -334,6 +335,7 @@ var modelsWithVision = [
334
335
  "gpt-4o-mini",
335
336
  "claude-3-5-sonnet-latest",
336
337
  "claude-3-5-sonnet-20240620",
338
+ "claude-3-5-sonnet-20241022",
337
339
  "gpt-4o-2024-08-06"
338
340
  ];
339
341
  var AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
@@ -347,9 +349,10 @@ function verifyActCompletion(_0) {
347
349
  modelName,
348
350
  screenshot,
349
351
  domElements,
350
- logger
352
+ logger,
353
+ requestId
351
354
  }) {
352
- const llmClient = llmProvider.getClient(modelName);
355
+ const llmClient = llmProvider.getClient(modelName, requestId);
353
356
  const messages = [
354
357
  buildVerifyActCompletionSystemPrompt(),
355
358
  buildVerifyActCompletionUserPrompt(goal, steps, domElements)
@@ -398,9 +401,10 @@ function act(_0) {
398
401
  modelName,
399
402
  screenshot,
400
403
  retries = 0,
401
- logger
404
+ logger,
405
+ requestId
402
406
  }) {
403
- const llmClient = llmProvider.getClient(modelName);
407
+ const llmClient = llmProvider.getClient(modelName, requestId);
404
408
  const messages = [
405
409
  buildActSystemPrompt(),
406
410
  buildActUserPrompt(action, steps, domElements)
@@ -437,7 +441,8 @@ function act(_0) {
437
441
  llmProvider,
438
442
  modelName,
439
443
  retries: retries + 1,
440
- logger
444
+ logger,
445
+ requestId
441
446
  });
442
447
  }
443
448
  });
@@ -452,9 +457,10 @@ function extract(_0) {
452
457
  llmProvider,
453
458
  modelName,
454
459
  chunksSeen,
455
- chunksTotal
460
+ chunksTotal,
461
+ requestId
456
462
  }) {
457
- const llmClient = llmProvider.getClient(modelName);
463
+ const llmClient = llmProvider.getClient(modelName, requestId);
458
464
  const extractionResponse = yield llmClient.createChatCompletion({
459
465
  model: modelName,
460
466
  messages: [
@@ -527,7 +533,8 @@ function observe(_0) {
527
533
  domElements,
528
534
  llmProvider,
529
535
  modelName,
530
- image
536
+ image,
537
+ requestId
531
538
  }) {
532
539
  const observeSchema = import_zod.z.object({
533
540
  elements: import_zod.z.array(
@@ -539,7 +546,7 @@ function observe(_0) {
539
546
  })
540
547
  ).describe("an array of elements that match the instruction")
541
548
  });
542
- const llmClient = llmProvider.getClient(modelName);
549
+ const llmClient = llmProvider.getClient(modelName, requestId);
543
550
  const observationResponse = yield llmClient.createChatCompletion({
544
551
  model: modelName,
545
552
  messages: [
@@ -567,12 +574,31 @@ function observe(_0) {
567
574
  var import_openai = __toESM(require("openai"));
568
575
  var import_zod2 = require("openai/helpers/zod");
569
576
  var OpenAIClient = class {
570
- constructor(logger) {
577
+ constructor(logger, enableCaching = false, cache, requestId) {
571
578
  this.client = new import_openai.default();
572
579
  this.logger = logger;
580
+ this.requestId = requestId;
581
+ this.cache = cache;
582
+ this.enableCaching = enableCaching;
573
583
  }
574
584
  createChatCompletion(options) {
575
585
  return __async(this, null, function* () {
586
+ const cacheOptions = {
587
+ model: options.model,
588
+ messages: options.messages,
589
+ temperature: options.temperature,
590
+ top_p: options.top_p,
591
+ frequency_penalty: options.frequency_penalty,
592
+ presence_penalty: options.presence_penalty,
593
+ image: options.image,
594
+ response_model: options.response_model
595
+ };
596
+ if (this.enableCaching) {
597
+ const cachedResponse = yield this.cache.get(cacheOptions, this.requestId);
598
+ if (cachedResponse) {
599
+ return cachedResponse;
600
+ }
601
+ }
576
602
  if (options.image) {
577
603
  const screenshotMessage = {
578
604
  role: "user",
@@ -602,8 +628,18 @@ var OpenAIClient = class {
602
628
  if (response_model) {
603
629
  const extractedData = response.choices[0].message.content;
604
630
  const parsedData = JSON.parse(extractedData);
631
+ if (this.enableCaching) {
632
+ this.cache.set(
633
+ cacheOptions,
634
+ __spreadValues({}, parsedData),
635
+ this.requestId
636
+ );
637
+ }
605
638
  return __spreadValues({}, parsedData);
606
639
  }
640
+ if (this.enableCaching) {
641
+ this.cache.set(cacheOptions, response, this.requestId);
642
+ }
607
643
  return response;
608
644
  });
609
645
  }
@@ -613,16 +649,33 @@ var OpenAIClient = class {
613
649
  var import_sdk = __toESM(require("@anthropic-ai/sdk"));
614
650
  var import_zod_to_json_schema = require("zod-to-json-schema");
615
651
  var AnthropicClient = class {
616
- constructor(logger) {
652
+ constructor(logger, enableCaching = false, cache, requestId) {
617
653
  this.client = new import_sdk.default({
618
654
  apiKey: process.env.ANTHROPIC_API_KEY
619
- // Make sure to set this environment variable
620
655
  });
621
656
  this.logger = logger;
657
+ this.cache = cache;
658
+ this.enableCaching = enableCaching;
659
+ this.requestId = requestId;
622
660
  }
623
661
  createChatCompletion(options) {
624
662
  return __async(this, null, function* () {
625
663
  var _a, _b, _c, _d, _e, _f, _g;
664
+ const cacheOptions = {
665
+ model: options.model,
666
+ messages: options.messages,
667
+ temperature: options.temperature,
668
+ image: options.image,
669
+ response_model: options.response_model,
670
+ tools: options.tools,
671
+ retries: options.retries
672
+ };
673
+ if (this.enableCaching) {
674
+ const cachedResponse = yield this.cache.get(cacheOptions, this.requestId);
675
+ if (cachedResponse) {
676
+ return cachedResponse;
677
+ }
678
+ }
626
679
  const systemMessage = options.messages.find((msg) => msg.role === "system");
627
680
  const userMessages = options.messages.filter(
628
681
  (msg) => msg.role !== "system"
@@ -724,26 +777,318 @@ var AnthropicClient = class {
724
777
  if (options.response_model) {
725
778
  const toolUse = response.content.find((c) => c.type === "tool_use");
726
779
  if (toolUse && "input" in toolUse) {
727
- return toolUse.input;
780
+ const result = toolUse.input;
781
+ if (this.enableCaching) {
782
+ this.cache.set(cacheOptions, result, this.requestId);
783
+ }
784
+ return result;
728
785
  } else {
729
- if (!options.retries || options.retries < 2) {
786
+ if (!options.retries || options.retries < 5) {
730
787
  return this.createChatCompletion(__spreadProps(__spreadValues({}, options), {
731
788
  retries: ((_g = options.retries) != null ? _g : 0) + 1
732
789
  }));
733
790
  }
734
791
  throw new Error(
735
- "Extraction failed: No tool use with input in response"
792
+ "Create Chat Completion Failed: No tool use with input in response"
736
793
  );
737
794
  }
738
795
  }
796
+ if (this.enableCaching) {
797
+ this.cache.set(cacheOptions, transformedResponse, this.requestId);
798
+ }
739
799
  return transformedResponse;
740
800
  });
741
801
  }
742
802
  };
743
803
 
804
+ // lib/llm/LLMCache.ts
805
+ var fs = __toESM(require("fs"));
806
+ var path = __toESM(require("path"));
807
+ var crypto = __toESM(require("crypto"));
808
+ var LLMCache = class {
809
+ constructor(logger, cacheDir = path.join(process.cwd(), "tmp", ".cache"), cacheFile = "llm_calls.json") {
810
+ this.CACHE_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1e3;
811
+ // 1 week in milliseconds
812
+ this.CLEANUP_PROBABILITY = 0.01;
813
+ // 1% chance
814
+ this.LOCK_TIMEOUT_MS = 1e3;
815
+ this.lock_acquired = false;
816
+ this.count_lock_acquire_failures = 0;
817
+ this.request_id_to_used_hashes = {};
818
+ this.logger = logger;
819
+ this.cacheDir = cacheDir;
820
+ this.cacheFile = path.join(cacheDir, cacheFile);
821
+ this.lockFile = path.join(cacheDir, "llm_cache.lock");
822
+ this.ensureCacheDirectory();
823
+ this.setupProcessHandlers();
824
+ }
825
+ setupProcessHandlers() {
826
+ const releaseLockAndExit = () => {
827
+ this.releaseLock();
828
+ process.exit();
829
+ };
830
+ process.on("exit", releaseLockAndExit);
831
+ process.on("SIGINT", releaseLockAndExit);
832
+ process.on("SIGTERM", releaseLockAndExit);
833
+ process.on("uncaughtException", (err) => {
834
+ this.logger({
835
+ category: "llm_cache",
836
+ message: `Uncaught exception: ${err}`,
837
+ level: 2
838
+ });
839
+ if (this.lock_acquired) {
840
+ releaseLockAndExit();
841
+ }
842
+ });
843
+ }
844
+ ensureCacheDirectory() {
845
+ if (!fs.existsSync(this.cacheDir)) {
846
+ fs.mkdirSync(this.cacheDir, { recursive: true });
847
+ }
848
+ }
849
+ createHash(data) {
850
+ const hash = crypto.createHash("sha256");
851
+ return hash.update(JSON.stringify(data)).digest("hex");
852
+ }
853
+ sleep(ms) {
854
+ return new Promise((resolve) => setTimeout(resolve, ms));
855
+ }
856
+ acquireLock() {
857
+ return __async(this, null, function* () {
858
+ const startTime = Date.now();
859
+ while (Date.now() - startTime < this.LOCK_TIMEOUT_MS) {
860
+ try {
861
+ if (fs.existsSync(this.lockFile)) {
862
+ const lockAge = Date.now() - fs.statSync(this.lockFile).mtimeMs;
863
+ if (lockAge > this.LOCK_TIMEOUT_MS) {
864
+ fs.unlinkSync(this.lockFile);
865
+ }
866
+ }
867
+ fs.writeFileSync(this.lockFile, process.pid.toString(), { flag: "wx" });
868
+ this.count_lock_acquire_failures = 0;
869
+ this.lock_acquired = true;
870
+ return true;
871
+ } catch (error) {
872
+ yield this.sleep(5);
873
+ }
874
+ }
875
+ this.logger({
876
+ category: "llm_cache",
877
+ message: "Failed to acquire lock after timeout",
878
+ level: 2
879
+ });
880
+ this.count_lock_acquire_failures++;
881
+ if (this.count_lock_acquire_failures >= 3) {
882
+ this.logger({
883
+ category: "llm_cache",
884
+ message: "Failed to acquire lock 3 times in a row. Releasing lock manually.",
885
+ level: 1
886
+ });
887
+ this.releaseLock();
888
+ }
889
+ return false;
890
+ });
891
+ }
892
+ releaseLock() {
893
+ try {
894
+ if (fs.existsSync(this.lockFile)) {
895
+ fs.unlinkSync(this.lockFile);
896
+ }
897
+ this.lock_acquired = false;
898
+ } catch (error) {
899
+ this.logger({
900
+ category: "llm_cache",
901
+ message: `Error releasing lock: ${error}`,
902
+ level: 2
903
+ });
904
+ }
905
+ }
906
+ readCache() {
907
+ if (fs.existsSync(this.cacheFile)) {
908
+ return JSON.parse(fs.readFileSync(this.cacheFile, "utf-8"));
909
+ }
910
+ return {};
911
+ }
912
+ writeCache(cache) {
913
+ try {
914
+ if (Math.random() < this.CLEANUP_PROBABILITY) {
915
+ this.cleanupStaleEntries(cache);
916
+ }
917
+ fs.writeFileSync(this.cacheFile, JSON.stringify(cache, null, 2));
918
+ } finally {
919
+ this.releaseLock();
920
+ }
921
+ }
922
+ cleanupStaleEntries(cache) {
923
+ if (!this.acquireLock()) {
924
+ this.logger({
925
+ category: "llm_cache",
926
+ message: "Failed to acquire lock for cleaning up cache",
927
+ level: 2
928
+ });
929
+ return;
930
+ }
931
+ try {
932
+ const now = Date.now();
933
+ let entriesRemoved = 0;
934
+ for (const [hash, entry] of Object.entries(cache)) {
935
+ if (now - entry.timestamp > this.CACHE_MAX_AGE_MS) {
936
+ delete cache[hash];
937
+ entriesRemoved++;
938
+ }
939
+ }
940
+ if (entriesRemoved > 0) {
941
+ this.logger({
942
+ category: "llm_cache",
943
+ message: `Cleaned up ${entriesRemoved} stale cache entries`,
944
+ level: 1
945
+ });
946
+ }
947
+ } catch (error) {
948
+ this.logger({
949
+ category: "llm_cache",
950
+ message: `Error cleaning up stale cache entries: ${error}`,
951
+ level: 1
952
+ });
953
+ } finally {
954
+ this.releaseLock();
955
+ }
956
+ }
957
+ resetCache() {
958
+ if (!this.acquireLock()) {
959
+ this.logger({
960
+ category: "llm_cache",
961
+ message: "Failed to acquire lock for resetting cache",
962
+ level: 2
963
+ });
964
+ return;
965
+ }
966
+ try {
967
+ this.ensureCacheDirectory();
968
+ fs.writeFileSync(this.cacheFile, "{}");
969
+ } finally {
970
+ this.releaseLock();
971
+ }
972
+ }
973
+ get(options, requestId) {
974
+ return __async(this, null, function* () {
975
+ var _a, _b;
976
+ if (!(yield this.acquireLock())) {
977
+ this.logger({
978
+ category: "llm_cache",
979
+ message: "Failed to acquire lock for getting cache",
980
+ level: 2
981
+ });
982
+ return null;
983
+ }
984
+ try {
985
+ const hash = this.createHash(options);
986
+ const cache = this.readCache();
987
+ if (cache[hash]) {
988
+ this.logger({
989
+ category: "llm_cache",
990
+ message: "Cache hit",
991
+ level: 1
992
+ });
993
+ (_b = (_a = this.request_id_to_used_hashes)[requestId]) != null ? _b : _a[requestId] = [];
994
+ this.request_id_to_used_hashes[requestId].push(hash);
995
+ return cache[hash].response;
996
+ }
997
+ return null;
998
+ } catch (error) {
999
+ this.logger({
1000
+ category: "llm_cache",
1001
+ message: `Error getting cache: ${error}. Resetting cache.`,
1002
+ level: 1
1003
+ });
1004
+ this.resetCache();
1005
+ return null;
1006
+ } finally {
1007
+ this.releaseLock();
1008
+ }
1009
+ });
1010
+ }
1011
+ deleteCacheForRequestId(requestId) {
1012
+ return __async(this, null, function* () {
1013
+ var _a;
1014
+ if (!(yield this.acquireLock())) {
1015
+ this.logger({
1016
+ category: "llm_cache",
1017
+ message: "Failed to acquire lock for deleting cache",
1018
+ level: 2
1019
+ });
1020
+ return;
1021
+ }
1022
+ try {
1023
+ const cache = this.readCache();
1024
+ let entriesRemoved = [];
1025
+ for (const hash of (_a = this.request_id_to_used_hashes[requestId]) != null ? _a : []) {
1026
+ if (cache[hash]) {
1027
+ entriesRemoved.push(cache[hash]);
1028
+ delete cache[hash];
1029
+ }
1030
+ }
1031
+ this.logger({
1032
+ category: "llm_cache",
1033
+ message: `Deleted ${entriesRemoved.length} cache entries for requestId ${requestId}`,
1034
+ level: 1
1035
+ });
1036
+ this.writeCache(cache);
1037
+ } catch (exception) {
1038
+ this.logger({
1039
+ category: "llm_cache",
1040
+ message: `Error deleting cache for requestId ${requestId}: ${exception}`,
1041
+ level: 1
1042
+ });
1043
+ } finally {
1044
+ this.releaseLock();
1045
+ }
1046
+ });
1047
+ }
1048
+ set(options, response, requestId) {
1049
+ return __async(this, null, function* () {
1050
+ var _a, _b;
1051
+ if (!(yield this.acquireLock())) {
1052
+ this.logger({
1053
+ category: "llm_cache",
1054
+ message: "Failed to acquire lock for setting cache",
1055
+ level: 2
1056
+ });
1057
+ return;
1058
+ }
1059
+ try {
1060
+ const hash = this.createHash(options);
1061
+ const cache = this.readCache();
1062
+ cache[hash] = {
1063
+ response,
1064
+ timestamp: Date.now(),
1065
+ requestId
1066
+ };
1067
+ this.writeCache(cache);
1068
+ (_b = (_a = this.request_id_to_used_hashes)[requestId]) != null ? _b : _a[requestId] = [];
1069
+ this.request_id_to_used_hashes[requestId].push(hash);
1070
+ this.logger({
1071
+ category: "llm_cache",
1072
+ message: "Cache miss - saved new response",
1073
+ level: 1
1074
+ });
1075
+ } catch (error) {
1076
+ this.logger({
1077
+ category: "llm_cache",
1078
+ message: `Error setting cache: ${error}. Resetting cache.`,
1079
+ level: 1
1080
+ });
1081
+ this.resetCache();
1082
+ } finally {
1083
+ this.releaseLock();
1084
+ }
1085
+ });
1086
+ }
1087
+ };
1088
+
744
1089
  // lib/llm/LLMProvider.ts
745
1090
  var LLMProvider = class {
746
- constructor(logger) {
1091
+ constructor(logger, enableCaching) {
747
1092
  this.modelToProviderMap = {
748
1093
  "gpt-4o": "openai",
749
1094
  "gpt-4o-mini": "openai",
@@ -753,17 +1098,36 @@ var LLMProvider = class {
753
1098
  "claude-3-5-sonnet-20241022": "anthropic"
754
1099
  };
755
1100
  this.logger = logger;
1101
+ this.enableCaching = enableCaching;
1102
+ this.cache = new LLMCache(logger);
1103
+ }
1104
+ cleanRequestCache(requestId) {
1105
+ this.logger({
1106
+ category: "llm_cache",
1107
+ message: `Cleaning up cache for requestId: ${requestId}`
1108
+ });
1109
+ this.cache.deleteCacheForRequestId(requestId);
756
1110
  }
757
- getClient(modelName) {
1111
+ getClient(modelName, requestId) {
758
1112
  const provider = this.modelToProviderMap[modelName];
759
1113
  if (!provider) {
760
1114
  throw new Error(`Unsupported model: ${modelName}`);
761
1115
  }
762
1116
  switch (provider) {
763
1117
  case "openai":
764
- return new OpenAIClient(this.logger);
1118
+ return new OpenAIClient(
1119
+ this.logger,
1120
+ this.enableCaching,
1121
+ this.cache,
1122
+ requestId
1123
+ );
765
1124
  case "anthropic":
766
- return new AnthropicClient(this.logger);
1125
+ return new AnthropicClient(
1126
+ this.logger,
1127
+ this.enableCaching,
1128
+ this.cache,
1129
+ requestId
1130
+ );
767
1131
  default:
768
1132
  throw new Error(`Unsupported provider: ${provider}`);
769
1133
  }
@@ -773,55 +1137,6 @@ var LLMProvider = class {
773
1137
  // lib/index.ts
774
1138
  var import_path2 = __toESM(require("path"));
775
1139
 
776
- // lib/browserbase.ts
777
- var Browserbase = class {
778
- createSession() {
779
- return __async(this, null, function* () {
780
- if (!process.env.BROWSERBASE_API_KEY || !process.env.BROWSERBASE_PROJECT_ID) {
781
- throw new Error(
782
- "BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID must be set"
783
- );
784
- }
785
- const response = yield fetch(`https://www.browserbase.com/v1/sessions`, {
786
- method: "POST",
787
- headers: {
788
- "x-bb-api-key": `${process.env.BROWSERBASE_API_KEY}`,
789
- "Content-Type": "application/json"
790
- },
791
- body: JSON.stringify({
792
- projectId: process.env.BROWSERBASE_PROJECT_ID
793
- })
794
- });
795
- const json = yield response.json();
796
- if (json.error) {
797
- throw new Error(json.error);
798
- }
799
- return {
800
- sessionId: json.id,
801
- connectUrl: json.connectUrl
802
- };
803
- });
804
- }
805
- retrieveDebugConnectionURL(sessionId) {
806
- return __async(this, null, function* () {
807
- if (!process.env.BROWSERBASE_API_KEY) {
808
- throw new Error("BROWSERBASE_API_KEY must be set");
809
- }
810
- const response = yield fetch(
811
- `https://www.browserbase.com/v1/sessions/${sessionId}/debug`,
812
- {
813
- method: "GET",
814
- headers: {
815
- "x-bb-api-key": `${process.env.BROWSERBASE_API_KEY}`
816
- }
817
- }
818
- );
819
- const json = yield response.json();
820
- return json.debuggerFullscreenUrl;
821
- });
822
- }
823
- };
824
-
825
1140
  // lib/vision.ts
826
1141
  var import_fs = __toESM(require("fs"));
827
1142
  var import_path = __toESM(require("path"));
@@ -1004,40 +1319,85 @@ var ScreenshotService = class _ScreenshotService {
1004
1319
 
1005
1320
  // lib/index.ts
1006
1321
  require("dotenv").config({ path: ".env" });
1007
- function getBrowser(env = "LOCAL", headless = false, logger) {
1322
+ function getBrowser(apiKey, projectId, env = "LOCAL", headless = false, logger, browserbaseSessionCreateParams, browserbaseResumeSessionID) {
1008
1323
  return __async(this, null, function* () {
1009
- if (env === "BROWSERBASE" && !process.env.BROWSERBASE_API_KEY) {
1010
- logger({
1011
- category: "Init",
1012
- message: "BROWSERBASE_API_KEY is required to use BROWSERBASE env. Defaulting to LOCAL.",
1013
- level: 0
1014
- });
1015
- env = "LOCAL";
1016
- }
1017
- if (env === "BROWSERBASE" && !process.env.BROWSERBASE_PROJECT_ID) {
1018
- logger({
1019
- category: "Init",
1020
- message: "BROWSERBASE_PROJECT_ID is required to use BROWSERBASE env. Defaulting to LOCAL.",
1021
- level: 0
1022
- });
1023
- env = "LOCAL";
1324
+ if (env === "BROWSERBASE") {
1325
+ if (!apiKey) {
1326
+ logger({
1327
+ category: "Init",
1328
+ message: "BROWSERBASE_API_KEY is required to use BROWSERBASE env. Defaulting to LOCAL.",
1329
+ level: 0
1330
+ });
1331
+ env = "LOCAL";
1332
+ }
1333
+ if (!projectId) {
1334
+ logger({
1335
+ category: "Init",
1336
+ message: "BROWSERBASE_PROJECT_ID is required for some Browserbase features that may not work without it.",
1337
+ level: 1
1338
+ });
1339
+ }
1024
1340
  }
1025
1341
  if (env === "BROWSERBASE") {
1342
+ if (!apiKey) {
1343
+ throw new Error("BROWSERBASE_API_KEY is required.");
1344
+ }
1026
1345
  let debugUrl = void 0;
1027
1346
  let sessionUrl = void 0;
1028
- logger({
1029
- category: "Init",
1030
- message: "Connecting you to Browserbase...",
1031
- level: 0
1347
+ let sessionId;
1348
+ let connectUrl;
1349
+ const browserbase = new import_sdk2.Browserbase({
1350
+ apiKey
1032
1351
  });
1033
- const browserbase = new Browserbase();
1034
- const { sessionId, connectUrl } = yield browserbase.createSession();
1352
+ if (browserbaseResumeSessionID) {
1353
+ try {
1354
+ const sessionStatus = yield browserbase.sessions.retrieve(
1355
+ browserbaseResumeSessionID
1356
+ );
1357
+ if (sessionStatus.status !== "RUNNING") {
1358
+ throw new Error(
1359
+ `Session ${browserbaseResumeSessionID} is not running (status: ${sessionStatus.status})`
1360
+ );
1361
+ }
1362
+ sessionId = browserbaseResumeSessionID;
1363
+ connectUrl = `wss://connect.browserbase.com?apiKey=${apiKey}&sessionId=${sessionId}`;
1364
+ logger({
1365
+ category: "Init",
1366
+ message: "Resuming existing Browserbase session...",
1367
+ level: 0
1368
+ });
1369
+ } catch (error) {
1370
+ logger({
1371
+ category: "Init",
1372
+ message: `Failed to resume session ${browserbaseResumeSessionID}: ${error.message}`,
1373
+ level: 0
1374
+ });
1375
+ throw error;
1376
+ }
1377
+ } else {
1378
+ logger({
1379
+ category: "Init",
1380
+ message: "Creating new Browserbase session...",
1381
+ level: 0
1382
+ });
1383
+ if (!projectId) {
1384
+ throw new Error(
1385
+ "BROWSERBASE_PROJECT_ID is required for new Browserbase sessions."
1386
+ );
1387
+ }
1388
+ const session = yield browserbase.sessions.create(__spreadValues({
1389
+ projectId
1390
+ }, browserbaseSessionCreateParams));
1391
+ sessionId = session.id;
1392
+ connectUrl = session.connectUrl;
1393
+ }
1035
1394
  const browser = yield import_test.chromium.connectOverCDP(connectUrl);
1036
- debugUrl = yield browserbase.retrieveDebugConnectionURL(sessionId);
1395
+ const { debuggerUrl } = yield browserbase.sessions.debug(sessionId);
1396
+ debugUrl = debuggerUrl;
1037
1397
  sessionUrl = `https://www.browserbase.com/sessions/${sessionId}`;
1038
1398
  logger({
1039
1399
  category: "Init",
1040
- message: `Browserbase session started.
1400
+ message: `Browserbase session ${browserbaseResumeSessionID ? "resumed" : "started"}.
1041
1401
 
1042
1402
  Session Url: ${sessionUrl}
1043
1403
 
@@ -1124,12 +1484,17 @@ function applyStealthScripts(context) {
1124
1484
  var Stagehand = class {
1125
1485
  constructor({
1126
1486
  env,
1127
- verbose = 0,
1128
- debugDom = false,
1487
+ apiKey,
1488
+ projectId,
1489
+ verbose,
1490
+ debugDom,
1129
1491
  llmProvider,
1130
- headless = false,
1492
+ headless,
1131
1493
  logger,
1132
- domSettleTimeoutMs = 6e4
1494
+ browserBaseSessionCreateParams,
1495
+ domSettleTimeoutMs,
1496
+ enableCaching,
1497
+ browserbaseResumeSessionID
1133
1498
  } = {
1134
1499
  env: "BROWSERBASE"
1135
1500
  }) {
@@ -1138,24 +1503,33 @@ var Stagehand = class {
1138
1503
  this.is_processing_browserbase_logs = false;
1139
1504
  this.externalLogger = logger;
1140
1505
  this.logger = this.log.bind(this);
1141
- this.llmProvider = llmProvider || new LLMProvider(this.logger);
1506
+ this.enableCaching = enableCaching != null ? enableCaching : false;
1507
+ this.llmProvider = llmProvider || new LLMProvider(this.logger, this.enableCaching);
1142
1508
  this.env = env;
1143
1509
  this.observations = {};
1510
+ this.apiKey = apiKey;
1511
+ this.projectId = projectId;
1144
1512
  this.actions = {};
1145
- this.verbose = verbose;
1146
- this.debugDom = debugDom;
1513
+ this.verbose = verbose != null ? verbose : 0;
1514
+ this.debugDom = debugDom != null ? debugDom : false;
1147
1515
  this.defaultModelName = "gpt-4o";
1148
- this.headless = headless;
1149
- this.domSettleTimeoutMs = domSettleTimeoutMs;
1516
+ this.domSettleTimeoutMs = domSettleTimeoutMs != null ? domSettleTimeoutMs : 6e4;
1517
+ this.headless = headless != null ? headless : false;
1518
+ this.browserBaseSessionCreateParams = browserBaseSessionCreateParams;
1519
+ this.browserbaseResumeSessionID = browserbaseResumeSessionID;
1150
1520
  }
1151
1521
  init() {
1152
1522
  return __async(this, arguments, function* ({
1153
1523
  modelName = "gpt-4o"
1154
1524
  } = {}) {
1155
1525
  const { context, debugUrl, sessionUrl } = yield getBrowser(
1526
+ this.apiKey,
1527
+ this.projectId,
1156
1528
  this.env,
1157
1529
  this.headless,
1158
- this.logger
1530
+ this.logger,
1531
+ this.browserBaseSessionCreateParams,
1532
+ this.browserbaseResumeSessionID
1159
1533
  ).catch((e) => {
1160
1534
  console.error("Error in init:", e);
1161
1535
  return { context: void 0, debugUrl: void 0, sessionUrl: void 0 };
@@ -1271,11 +1645,9 @@ var Stagehand = class {
1271
1645
  return __async(this, null, function* () {
1272
1646
  try {
1273
1647
  const timeout = timeoutMs != null ? timeoutMs : this.domSettleTimeoutMs;
1274
- const timeoutPromise = new Promise((resolve) => {
1275
- setTimeout(() => {
1276
- console.warn(
1277
- `[stagehand:dom] DOM settle timeout of ${timeout}ms exceeded, continuing anyway`
1278
- );
1648
+ let timeoutHandle;
1649
+ const timeoutPromise = new Promise((resolve, reject) => {
1650
+ timeoutHandle = setTimeout(() => {
1279
1651
  this.log({
1280
1652
  category: "dom",
1281
1653
  message: `DOM settle timeout of ${timeout}ms exceeded, continuing anyway`,
@@ -1284,16 +1656,12 @@ var Stagehand = class {
1284
1656
  resolve();
1285
1657
  }, timeout);
1286
1658
  });
1287
- yield Promise.race([
1288
- (() => __async(this, null, function* () {
1289
- yield this.page.waitForSelector("body");
1290
- yield this.page.waitForLoadState("domcontentloaded");
1291
- yield this.page.evaluate(() => {
1659
+ try {
1660
+ yield Promise.race([
1661
+ this.page.evaluate(() => {
1292
1662
  return new Promise((resolve) => {
1293
1663
  if (typeof window.waitForDomSettle === "function") {
1294
- window.waitForDomSettle().then(() => {
1295
- resolve();
1296
- });
1664
+ window.waitForDomSettle().then(resolve);
1297
1665
  } else {
1298
1666
  console.warn(
1299
1667
  "waitForDomSettle is not defined, considering DOM as settled"
@@ -1301,10 +1669,14 @@ var Stagehand = class {
1301
1669
  resolve();
1302
1670
  }
1303
1671
  });
1304
- });
1305
- }))(),
1306
- timeoutPromise
1307
- ]);
1672
+ }),
1673
+ this.page.waitForLoadState("domcontentloaded"),
1674
+ this.page.waitForSelector("body"),
1675
+ timeoutPromise
1676
+ ]);
1677
+ } finally {
1678
+ clearTimeout(timeoutHandle);
1679
+ }
1308
1680
  } catch (e) {
1309
1681
  this.log({
1310
1682
  category: "dom",
@@ -1374,7 +1746,8 @@ Trace: ${e.stack}`,
1374
1746
  progress = "",
1375
1747
  content = {},
1376
1748
  chunksSeen = [],
1377
- modelName
1749
+ modelName,
1750
+ requestId
1378
1751
  }) {
1379
1752
  this.log({
1380
1753
  category: "extraction",
@@ -1401,7 +1774,8 @@ Trace: ${e.stack}`,
1401
1774
  schema,
1402
1775
  modelName: modelName || this.defaultModelName,
1403
1776
  chunksSeen: chunksSeen.length,
1404
- chunksTotal: chunks.length
1777
+ chunksTotal: chunks.length,
1778
+ requestId
1405
1779
  });
1406
1780
  const _a = extractionResponse, {
1407
1781
  metadata: { progress: newProgress, completed }
@@ -1445,7 +1819,8 @@ Trace: ${e.stack}`,
1445
1819
  instruction,
1446
1820
  useVision,
1447
1821
  fullPage,
1448
- modelName
1822
+ modelName,
1823
+ requestId
1449
1824
  }) {
1450
1825
  if (!instruction) {
1451
1826
  instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`;
@@ -1485,7 +1860,8 @@ Trace: ${e.stack}`,
1485
1860
  domElements: outputString,
1486
1861
  llmProvider: this.llmProvider,
1487
1862
  modelName: modelName || this.defaultModelName,
1488
- image: annotatedScreenshot
1863
+ image: annotatedScreenshot,
1864
+ requestId
1489
1865
  });
1490
1866
  const elementsWithSelectors = observationResponse.elements.map(
1491
1867
  (element) => {
@@ -1514,7 +1890,8 @@ Trace: ${e.stack}`,
1514
1890
  modelName,
1515
1891
  useVision,
1516
1892
  verifierUseVision,
1517
- retries = 0
1893
+ retries = 0,
1894
+ requestId
1518
1895
  }) {
1519
1896
  var _a;
1520
1897
  const model = modelName != null ? modelName : this.defaultModelName;
@@ -1574,7 +1951,8 @@ Trace: ${e.stack}`,
1574
1951
  llmProvider: this.llmProvider,
1575
1952
  modelName: model,
1576
1953
  screenshot: annotatedScreenshot,
1577
- logger: this.logger
1954
+ logger: this.logger,
1955
+ requestId
1578
1956
  });
1579
1957
  this.log({
1580
1958
  category: "action",
@@ -1596,7 +1974,8 @@ Trace: ${e.stack}`,
1596
1974
  chunksSeen,
1597
1975
  modelName,
1598
1976
  useVision,
1599
- verifierUseVision
1977
+ verifierUseVision,
1978
+ requestId
1600
1979
  });
1601
1980
  } else if (useVision === "fallback") {
1602
1981
  this.log({
@@ -1611,9 +1990,13 @@ Trace: ${e.stack}`,
1611
1990
  chunksSeen,
1612
1991
  modelName,
1613
1992
  useVision: true,
1614
- verifierUseVision
1993
+ verifierUseVision,
1994
+ requestId
1615
1995
  });
1616
1996
  } else {
1997
+ if (this.enableCaching) {
1998
+ this.llmProvider.cleanRequestCache(requestId);
1999
+ }
1617
2000
  return {
1618
2001
  success: false,
1619
2002
  message: `Action was not able to be completed.`,
@@ -1670,7 +2053,8 @@ Trace: ${e.stack}`,
1670
2053
  useVision,
1671
2054
  verifierUseVision,
1672
2055
  retries: retries + 1,
1673
- chunksSeen
2056
+ chunksSeen,
2057
+ requestId
1674
2058
  });
1675
2059
  }
1676
2060
  }
@@ -1699,7 +2083,8 @@ Trace: ${e.stack}`,
1699
2083
  useVision,
1700
2084
  verifierUseVision,
1701
2085
  retries: retries + 1,
1702
- chunksSeen
2086
+ chunksSeen,
2087
+ requestId
1703
2088
  });
1704
2089
  }
1705
2090
  }
@@ -1722,7 +2107,8 @@ Trace: ${e.stack}`,
1722
2107
  useVision,
1723
2108
  verifierUseVision,
1724
2109
  retries: retries + 1,
1725
- chunksSeen
2110
+ chunksSeen,
2111
+ requestId
1726
2112
  });
1727
2113
  }
1728
2114
  }
@@ -1751,7 +2137,8 @@ Trace: ${e.stack}`,
1751
2137
  useVision,
1752
2138
  verifierUseVision,
1753
2139
  retries: retries + 1,
1754
- chunksSeen
2140
+ chunksSeen,
2141
+ requestId
1755
2142
  });
1756
2143
  }
1757
2144
  }
@@ -1820,9 +2207,13 @@ Trace: ${e.stack}`,
1820
2207
  useVision,
1821
2208
  verifierUseVision,
1822
2209
  retries: retries + 1,
1823
- chunksSeen
2210
+ chunksSeen,
2211
+ requestId
1824
2212
  });
1825
2213
  } else {
2214
+ if (this.enableCaching) {
2215
+ this.llmProvider.cleanRequestCache(requestId);
2216
+ }
1826
2217
  return {
1827
2218
  success: false,
1828
2219
  message: `Internal error: Chosen method ${method} is invalid`,
@@ -1889,7 +2280,8 @@ Trace: ${e.stack}`,
1889
2280
  modelName: model,
1890
2281
  screenshot: fullpageScreenshot,
1891
2282
  domElements,
1892
- logger: this.logger
2283
+ logger: this.logger,
2284
+ requestId
1893
2285
  });
1894
2286
  this.log({
1895
2287
  category: "action",
@@ -1909,7 +2301,8 @@ Trace: ${e.stack}`,
1909
2301
  modelName,
1910
2302
  chunksSeen,
1911
2303
  useVision,
1912
- verifierUseVision
2304
+ verifierUseVision,
2305
+ requestId
1913
2306
  });
1914
2307
  } else {
1915
2308
  this.log({
@@ -1939,10 +2332,14 @@ Trace: ${error.stack}`,
1939
2332
  useVision,
1940
2333
  verifierUseVision,
1941
2334
  retries: retries + 1,
1942
- chunksSeen
2335
+ chunksSeen,
2336
+ requestId
1943
2337
  });
1944
2338
  }
1945
2339
  yield this._recordAction(action, "");
2340
+ if (this.enableCaching) {
2341
+ this.llmProvider.cleanRequestCache(requestId);
2342
+ }
1946
2343
  return {
1947
2344
  success: false,
1948
2345
  message: `Error performing action: ${error.message}`,
@@ -1958,12 +2355,32 @@ Trace: ${error.stack}`,
1958
2355
  useVision = "fallback"
1959
2356
  }) {
1960
2357
  useVision = useVision != null ? useVision : "fallback";
2358
+ const requestId = Math.random().toString(36).substring(2);
2359
+ this.logger({
2360
+ category: "act",
2361
+ message: `Running act with action: ${action}, requestId: ${requestId}`
2362
+ });
1961
2363
  return this._act({
1962
2364
  action,
1963
2365
  modelName,
1964
2366
  chunksSeen: [],
1965
2367
  useVision,
1966
- verifierUseVision: useVision !== false
2368
+ verifierUseVision: useVision !== false,
2369
+ requestId
2370
+ }).catch((e) => {
2371
+ this.logger({
2372
+ category: "act",
2373
+ message: `Error acting: ${e.message}
2374
+ Trace: ${e.stack}`
2375
+ });
2376
+ if (this.enableCaching) {
2377
+ this.llmProvider.cleanRequestCache(requestId);
2378
+ }
2379
+ return {
2380
+ success: false,
2381
+ message: `Internal error: Error acting: ${e.message}`,
2382
+ action
2383
+ };
1967
2384
  });
1968
2385
  });
1969
2386
  }
@@ -1973,21 +2390,53 @@ Trace: ${error.stack}`,
1973
2390
  schema,
1974
2391
  modelName
1975
2392
  }) {
2393
+ const requestId = Math.random().toString(36).substring(2);
2394
+ this.logger({
2395
+ category: "extract",
2396
+ message: `Running extract with instruction: ${instruction}, requestId: ${requestId}`
2397
+ });
1976
2398
  return this._extract({
1977
2399
  instruction,
1978
2400
  schema,
1979
- modelName
2401
+ modelName,
2402
+ requestId
2403
+ }).catch((e) => {
2404
+ this.logger({
2405
+ category: "extract",
2406
+ message: `Internal error: Error extracting: ${e.message}
2407
+ Trace: ${e.stack}`
2408
+ });
2409
+ if (this.enableCaching) {
2410
+ this.llmProvider.cleanRequestCache(requestId);
2411
+ }
2412
+ throw e;
1980
2413
  });
1981
2414
  });
1982
2415
  }
1983
2416
  observe(options) {
1984
2417
  return __async(this, null, function* () {
1985
2418
  var _a, _b;
2419
+ const requestId = Math.random().toString(36).substring(2);
2420
+ this.logger({
2421
+ category: "observe",
2422
+ message: `Running observe with instruction: ${options == null ? void 0 : options.instruction}, requestId: ${requestId}`
2423
+ });
1986
2424
  return this._observe({
1987
2425
  instruction: (_a = options == null ? void 0 : options.instruction) != null ? _a : "Find actions that can be performed on this page.",
1988
2426
  modelName: options == null ? void 0 : options.modelName,
1989
2427
  useVision: (_b = options == null ? void 0 : options.useVision) != null ? _b : false,
1990
- fullPage: false
2428
+ fullPage: false,
2429
+ requestId
2430
+ }).catch((e) => {
2431
+ this.logger({
2432
+ category: "observe",
2433
+ message: `Error observing: ${e.message}
2434
+ Trace: ${e.stack}`
2435
+ });
2436
+ if (this.enableCaching) {
2437
+ this.llmProvider.cleanRequestCache(requestId);
2438
+ }
2439
+ throw e;
1991
2440
  });
1992
2441
  });
1993
2442
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@browserbasehq/stagehand",
3
- "version": "1.0.3",
3
+ "version": "1.1.0",
4
4
  "description": "An AI web browsing framework focused on simplicity and extensibility.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
@@ -50,6 +50,7 @@
50
50
  },
51
51
  "dependencies": {
52
52
  "@anthropic-ai/sdk": "^0.27.3",
53
+ "@browserbasehq/sdk": "^2.0.0",
53
54
  "anthropic": "^0.0.0",
54
55
  "anthropic-ai": "^0.0.10",
55
56
  "sharp": "^0.33.5",
@@ -1,2 +0,0 @@
1
- (() => {
2
- })();