@browserbasehq/orca 3.0.0-preview.1 → 3.0.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/dist/index.d.ts +759 -593
  2. package/dist/index.js +25560 -24375
  3. package/package.json +35 -74
  4. package/README.md +0 -165
  5. package/dist/lib/StagehandContext.d.ts +0 -25
  6. package/dist/lib/StagehandPage.d.ts +0 -103
  7. package/dist/lib/a11y/utils.d.ts +0 -144
  8. package/dist/lib/agent/AgentClient.d.ts +0 -20
  9. package/dist/lib/agent/AgentProvider.d.ts +0 -19
  10. package/dist/lib/agent/AnthropicCUAClient.d.ts +0 -56
  11. package/dist/lib/agent/GoogleCUAClient.d.ts +0 -63
  12. package/dist/lib/agent/OpenAICUAClient.d.ts +0 -65
  13. package/dist/lib/agent/StagehandAgent.d.ts +0 -15
  14. package/dist/lib/agent/tools/act.d.ts +0 -59
  15. package/dist/lib/agent/tools/ariaTree.d.ts +0 -11
  16. package/dist/lib/agent/tools/close.d.ts +0 -22
  17. package/dist/lib/agent/tools/extract.d.ts +0 -38
  18. package/dist/lib/agent/tools/fillform.d.ts +0 -37
  19. package/dist/lib/agent/tools/goto.d.ts +0 -29
  20. package/dist/lib/agent/tools/index.d.ts +0 -257
  21. package/dist/lib/agent/tools/navback.d.ts +0 -17
  22. package/dist/lib/agent/tools/screenshot.d.ts +0 -13
  23. package/dist/lib/agent/tools/scroll.d.ts +0 -23
  24. package/dist/lib/agent/tools/wait.d.ts +0 -18
  25. package/dist/lib/agent/utils/cuaKeyMapping.d.ts +0 -10
  26. package/dist/lib/agent/utils/imageCompression.d.ts +0 -53
  27. package/dist/lib/agent/utils/messageProcessing.d.ts +0 -13
  28. package/dist/lib/api.d.ts +0 -23
  29. package/dist/lib/browserbaseDefaults.d.ts +0 -9
  30. package/dist/lib/cache/ActionCache.d.ts +0 -62
  31. package/dist/lib/cache/BaseCache.d.ts +0 -66
  32. package/dist/lib/cache/LLMCache.d.ts +0 -22
  33. package/dist/lib/cache.d.ts +0 -29
  34. package/dist/lib/dom/build/scriptContent.d.ts +0 -1
  35. package/dist/lib/dom/elementCheckUtils.d.ts +0 -2
  36. package/dist/lib/dom/genDomScripts.d.ts +0 -1
  37. package/dist/lib/dom/index.d.ts +0 -2
  38. package/dist/lib/dom/process.d.ts +0 -17
  39. package/dist/lib/dom/utils.d.ts +0 -7
  40. package/dist/lib/dom/xpathUtils.d.ts +0 -14
  41. package/dist/lib/handlers/actHandler.d.ts +0 -33
  42. package/dist/lib/handlers/cuaAgentHandler.d.ts +0 -58
  43. package/dist/lib/handlers/extractHandler.d.ts +0 -54
  44. package/dist/lib/handlers/handlerUtils/actHandlerUtils.d.ts +0 -21
  45. package/dist/lib/handlers/observeHandler.d.ts +0 -40
  46. package/dist/lib/handlers/stagehandAgentHandler.d.ts +0 -27
  47. package/dist/lib/index.d.ts +0 -94
  48. package/dist/lib/inference.d.ts +0 -71
  49. package/dist/lib/inferenceLogUtils.d.ts +0 -12
  50. package/dist/lib/llm/AnthropicClient.d.ts +0 -21
  51. package/dist/lib/llm/CerebrasClient.d.ts +0 -22
  52. package/dist/lib/llm/GoogleClient.d.ts +0 -24
  53. package/dist/lib/llm/GroqClient.d.ts +0 -22
  54. package/dist/lib/llm/LLMClient.d.ts +0 -99
  55. package/dist/lib/llm/LLMProvider.d.ts +0 -13
  56. package/dist/lib/llm/OpenAIClient.d.ts +0 -20
  57. package/dist/lib/llm/aisdk.d.ts +0 -20
  58. package/dist/lib/logger.d.ts +0 -54
  59. package/dist/lib/mcp/connection.d.ts +0 -11
  60. package/dist/lib/mcp/utils.d.ts +0 -3
  61. package/dist/lib/prompt.d.ts +0 -12
  62. package/dist/lib/utils.d.ts +0 -65
  63. package/dist/lib/v3/agent/AgentClient.d.ts +0 -18
  64. package/dist/lib/v3/agent/AgentProvider.d.ts +0 -18
  65. package/dist/lib/v3/agent/AnthropicCUAClient.d.ts +0 -55
  66. package/dist/lib/v3/agent/OpenAICUAClient.d.ts +0 -64
  67. package/dist/lib/v3/agent/StagehandAgent.d.ts +0 -15
  68. package/dist/lib/v3/agent/tools/index.d.ts +0 -229
  69. package/dist/lib/v3/agent/tools/v3-act.d.ts +0 -29
  70. package/dist/lib/v3/agent/tools/v3-ariaTree.d.ts +0 -11
  71. package/dist/lib/v3/agent/tools/v3-close.d.ts +0 -24
  72. package/dist/lib/v3/agent/tools/v3-extract.d.ts +0 -38
  73. package/dist/lib/v3/agent/tools/v3-fillform.d.ts +0 -37
  74. package/dist/lib/v3/agent/tools/v3-goto.d.ts +0 -29
  75. package/dist/lib/v3/agent/tools/v3-navback.d.ts +0 -17
  76. package/dist/lib/v3/agent/tools/v3-screenshot.d.ts +0 -13
  77. package/dist/lib/v3/agent/tools/v3-scroll.d.ts +0 -23
  78. package/dist/lib/v3/agent/tools/v3-wait.d.ts +0 -19
  79. package/dist/lib/v3/agent/utils/cuaKeyMapping.d.ts +0 -10
  80. package/dist/lib/v3/agent/utils/imageCompression.d.ts +0 -18
  81. package/dist/lib/v3/agent/utils/messageProcessing.d.ts +0 -13
  82. package/dist/lib/v3/dom/build/scriptV3Content.d.ts +0 -1
  83. package/dist/lib/v3/dom/genDomScripts.d.ts +0 -1
  84. package/dist/lib/v3/dom/index.d.ts +0 -1
  85. package/dist/lib/v3/dom/piercer.entry.d.ts +0 -1
  86. package/dist/lib/v3/dom/piercer.runtime.d.ts +0 -25
  87. package/dist/lib/v3/handlers/actHandler.d.ts +0 -18
  88. package/dist/lib/v3/handlers/extractHandler.d.ts +0 -29
  89. package/dist/lib/v3/handlers/handlerUtils/actHandlerUtils.d.ts +0 -18
  90. package/dist/lib/v3/handlers/observeHandler.d.ts +0 -15
  91. package/dist/lib/v3/handlers/v3AgentHandler.d.ts +0 -17
  92. package/dist/lib/v3/handlers/v3CuaAgentHandler.d.ts +0 -26
  93. package/dist/lib/v3/index.d.ts +0 -10
  94. package/dist/lib/v3/launch/browserbase.d.ts +0 -8
  95. package/dist/lib/v3/launch/local.d.ts +0 -13
  96. package/dist/lib/v3/llm/AnthropicClient.d.ts +0 -16
  97. package/dist/lib/v3/llm/CerebrasClient.d.ts +0 -17
  98. package/dist/lib/v3/llm/GoogleClient.d.ts +0 -19
  99. package/dist/lib/v3/llm/GroqClient.d.ts +0 -17
  100. package/dist/lib/v3/llm/LLMClient.d.ts +0 -99
  101. package/dist/lib/v3/llm/LLMProvider.d.ts +0 -10
  102. package/dist/lib/v3/llm/OpenAIClient.d.ts +0 -15
  103. package/dist/lib/v3/llm/aisdk.d.ts +0 -15
  104. package/dist/lib/v3/logger.d.ts +0 -48
  105. package/dist/lib/v3/mcp/connection.d.ts +0 -11
  106. package/dist/lib/v3/mcp/utils.d.ts +0 -3
  107. package/dist/lib/v3/tests/default-page-tracking.spec.d.ts +0 -1
  108. package/dist/lib/v3/tests/downloads.spec.d.ts +0 -1
  109. package/dist/lib/v3/tests/perform-understudy-method.spec.d.ts +0 -1
  110. package/dist/lib/v3/tests/shadow-iframe.spec.d.ts +0 -1
  111. package/dist/lib/v3/tests/timeouts.spec.d.ts +0 -1
  112. package/dist/lib/v3/tests/v3.bb.config.d.ts +0 -4
  113. package/dist/lib/v3/tests/v3.config.d.ts +0 -4
  114. package/dist/lib/v3/tests/v3.playwright.config.d.ts +0 -2
  115. package/dist/lib/v3/tests/xpath-for-location-deep.spec.d.ts +0 -1
  116. package/dist/lib/v3/types/act.d.ts +0 -10
  117. package/dist/lib/v3/types/agent.d.ts +0 -132
  118. package/dist/lib/v3/types/api.d.ts +0 -40
  119. package/dist/lib/v3/types/cache.d.ts +0 -71
  120. package/dist/lib/v3/types/context.d.ts +0 -2
  121. package/dist/lib/v3/types/evals.d.ts +0 -71
  122. package/dist/lib/v3/types/evaluator.d.ts +0 -40
  123. package/dist/lib/v3/types/llm.d.ts +0 -11
  124. package/dist/lib/v3/types/log.d.ts +0 -23
  125. package/dist/lib/v3/types/model.d.ts +0 -20
  126. package/dist/lib/v3/types/playwright.d.ts +0 -6
  127. package/dist/lib/v3/types/stagehand.d.ts +0 -113
  128. package/dist/lib/v3/types/stagehandApiErrors.d.ts +0 -18
  129. package/dist/lib/v3/types/stagehandErrors.d.ts +0 -104
  130. package/dist/lib/v3/types.d.ts +0 -176
  131. package/dist/lib/v3/understudy/a11y/snapshot.d.ts +0 -71
  132. package/dist/lib/v3/understudy/cdp.d.ts +0 -58
  133. package/dist/lib/v3/understudy/context.d.ts +0 -120
  134. package/dist/lib/v3/understudy/deepLocator.d.ts +0 -69
  135. package/dist/lib/v3/understudy/executionContextRegistry.d.ts +0 -15
  136. package/dist/lib/v3/understudy/frame.d.ts +0 -63
  137. package/dist/lib/v3/understudy/frameLocator.d.ts +0 -46
  138. package/dist/lib/v3/understudy/frameRegistry.d.ts +0 -100
  139. package/dist/lib/v3/understudy/locator.d.ts +0 -196
  140. package/dist/lib/v3/understudy/page.d.ts +0 -241
  141. package/dist/lib/v3/understudy/piercer.d.ts +0 -4
  142. package/dist/lib/v3/v3.d.ts +0 -158
  143. package/dist/lib/version.d.ts +0 -5
  144. package/dist/stagehand.config.d.ts +0 -3
  145. package/dist/types/act.d.ts +0 -50
  146. package/dist/types/agent.d.ts +0 -143
  147. package/dist/types/api.d.ts +0 -40
  148. package/dist/types/browser.d.ts +0 -10
  149. package/dist/types/context.d.ts +0 -117
  150. package/dist/types/evals.d.ts +0 -94
  151. package/dist/types/evaluator.d.ts +0 -40
  152. package/dist/types/llm.d.ts +0 -11
  153. package/dist/types/log.d.ts +0 -23
  154. package/dist/types/model.d.ts +0 -17
  155. package/dist/types/page.d.ts +0 -38
  156. package/dist/types/playwright.d.ts +0 -12
  157. package/dist/types/stagehand.d.ts +0 -330
  158. package/dist/types/stagehandApiErrors.d.ts +0 -18
  159. package/dist/types/stagehandErrors.d.ts +0 -104
package/package.json CHANGED
@@ -1,37 +1,10 @@
1
1
  {
2
2
  "name": "@browserbasehq/orca",
3
- "version": "3.0.0-preview.1",
3
+ "version": "3.0.0-preview.3",
4
4
  "description": "An AI web browsing framework focused on simplicity and extensibility.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
7
7
  "types": "./dist/index.d.ts",
8
- "bin": {
9
- "evals": "./dist/evals/cli.js"
10
- },
11
- "scripts": {
12
- "example": "pnpm --filter @browserbasehq/stagehand-examples run start",
13
- "format": "prettier --write .",
14
- "prettier": "prettier --check .",
15
- "prettier:fix": "prettier --write .",
16
- "eslint": "eslint .",
17
- "cache:clear": "rm -rf .cache",
18
- "docs": "pnpm --filter @browserbasehq/stagehand-docs run dev",
19
- "evals": "tsx scripts/run-evals.ts",
20
- "evals:legacy": "tsx scripts/run-evals.ts",
21
- "e2e": "pnpm run build && cd evals/deterministic && playwright test --config=e2e.playwright.config.ts",
22
- "e2e:bb": "pnpm run build && cd evals/deterministic && playwright test --config=bb.playwright.config.ts",
23
- "e2e:local": "pnpm run build && cd evals/deterministic && playwright test --config=local.playwright.config.ts",
24
- "build-v3-dom-scripts": "tsx lib/v3/dom/genDomScripts.ts",
25
- "build-types": "tsc --emitDeclarationOnly --outDir dist",
26
- "build-js": "tsup --entry.index lib/v3/index.ts --dts",
27
- "build:cli": "tsup evals/cli.ts --outDir dist/evals --format cjs && cp evals/evals.config.json dist/evals/ && chmod +x dist/evals/cli.js && npm link",
28
- "build": "pnpm run lint && pnpm run gen-version && pnpm run build-v3-dom-scripts && pnpm run build-js && pnpm run build-types",
29
- "gen-version": "tsx scripts/gen-version.ts",
30
- "prepare": "pnpm run build",
31
- "lint": "pnpm run prettier:fix && pnpm run eslint",
32
- "release": "pnpm run build && changeset publish",
33
- "release-canary": "pnpm run build && changeset version --snapshot && changeset publish --tag alpha"
34
- },
35
8
  "files": [
36
9
  "dist/index.js",
37
10
  "dist/index.d.ts",
@@ -39,43 +12,15 @@
39
12
  "dist/types",
40
13
  "dist/stagehand.config.d.ts"
41
14
  ],
42
- "keywords": [],
15
+ "keywords": [
16
+ "ai",
17
+ "browser",
18
+ "automation",
19
+ "web-scraping",
20
+ "testing"
21
+ ],
43
22
  "author": "Browserbase",
44
23
  "license": "MIT",
45
- "devDependencies": {
46
- "@changesets/changelog-github": "^0.5.0",
47
- "@changesets/cli": "^2.27.9",
48
- "@eslint/js": "^9.16.0",
49
- "@langchain/core": "^0.3.40",
50
- "@langchain/openai": "^0.4.4",
51
- "playwright-core": "^1.54.1",
52
- "puppeteer-core": "^22.8.0",
53
- "chrome-launcher": "^1.2.0",
54
- "patchright-core": "^1.55.2",
55
- "@playwright/test": "^1.42.1",
56
- "@types/adm-zip": "^0.5.7",
57
- "@types/cheerio": "^0.22.35",
58
- "@types/express": "^4.17.21",
59
- "@types/node": "^20.11.30",
60
- "@types/ws": "^8.5.13",
61
- "adm-zip": "^0.5.16",
62
- "autoevals": "^0.0.64",
63
- "braintrust": "^0.0.171",
64
- "chalk": "^5.4.1",
65
- "cheerio": "^1.0.0",
66
- "chromium-bidi": "^0.10.0",
67
- "esbuild": "^0.21.4",
68
- "eslint": "^9.16.0",
69
- "express": "^4.21.0",
70
- "globals": "^15.13.0",
71
- "multer": "^1.4.5-lts.1",
72
- "prettier": "^3.2.5",
73
- "string-comparison": "^1.3.0",
74
- "tsup": "^8.2.1",
75
- "tsx": "^4.10.5",
76
- "typescript": "^5.2.2",
77
- "typescript-eslint": "^8.17.0"
78
- },
79
24
  "peerDependencies": {
80
25
  "deepmerge": "^4.3.1",
81
26
  "dotenv": "^16.4.5",
@@ -84,7 +29,8 @@
84
29
  "dependencies": {
85
30
  "@anthropic-ai/sdk": "0.39.0",
86
31
  "@browserbasehq/sdk": "^2.4.0",
87
- "@google/genai": "^0.8.0",
32
+ "@google/genai": "^1.22.0",
33
+ "@langchain/openai": "^0.4.4",
88
34
  "@modelcontextprotocol/sdk": "^1.17.2",
89
35
  "ai": "^4.3.9",
90
36
  "devtools-protocol": "^0.0.1464554",
@@ -108,23 +54,38 @@
108
54
  "@ai-sdk/perplexity": "^1.1.7",
109
55
  "@ai-sdk/togetherai": "^0.2.6",
110
56
  "@ai-sdk/xai": "^1.2.15",
57
+ "@langchain/core": "^0.3.40",
58
+ "playwright-core": "^1.54.1",
59
+ "puppeteer-core": "^22.8.0",
60
+ "chrome-launcher": "^1.2.0",
61
+ "patchright-core": "^1.55.2",
111
62
  "ollama-ai-provider": "^1.2.0"
112
63
  },
113
- "directories": {
114
- "doc": "docs",
115
- "example": "examples",
116
- "lib": "lib"
64
+ "devDependencies": {
65
+ "@playwright/test": "^1.42.1",
66
+ "typescript": "^5.2.2",
67
+ "tsup": "^8.2.1",
68
+ "tsx": "^4.10.5",
69
+ "prettier": "^3.2.5",
70
+ "eslint": "^9.16.0"
117
71
  },
118
72
  "repository": {
119
73
  "type": "git",
120
- "url": "git+https://github.com/browserbase/stagehand.git"
74
+ "url": "git+https://github.com/browserbase/stagehand.git",
75
+ "directory": "packages/core"
121
76
  },
122
77
  "bugs": {
123
78
  "url": "https://github.com/browserbase/stagehand/issues"
124
79
  },
125
80
  "homepage": "https://stagehand.dev",
126
- "overrides": {
127
- "whatwg-url": "^14.0.0"
128
- },
129
- "packageManager": "pnpm@9.15.0+sha512.76e2379760a4328ec4415815bcd6628dee727af3779aaa4c914e3944156c4299921a89f976381ee107d41f12cfa4b66681ca9c718f0668fa0831ed4c6d8ba56c"
130
- }
81
+ "scripts": {
82
+ "gen-version": "tsx scripts/gen-version.ts",
83
+ "build-dom-scripts": "tsx lib/v3/dom/genDomScripts.ts && tsx lib/v3/dom/genLocatorScripts.ts",
84
+ "build-js": "tsup --entry.index lib/v3/index.ts --dts",
85
+ "typecheck": "tsc --noEmit",
86
+ "build": "pnpm run gen-version && pnpm run build-dom-scripts && pnpm run build-js && pnpm run typecheck",
87
+ "example": "node --import tsx -e \"const args=process.argv.slice(1).filter(a=>a!=='--'); const [p]=args; const n=(p||'example').replace(/^\\.\\//,'').replace(/\\.ts$/i,''); import(new URL(require('node:path').resolve('examples', n + '.ts'), 'file:'));\" --",
88
+ "lint": "cd ../.. && prettier --check packages/core && cd packages/core && eslint .",
89
+ "format": "prettier --write ."
90
+ }
91
+ }
package/README.md DELETED
@@ -1,165 +0,0 @@
1
- <div id="toc" align="center" style="margin-bottom: 0;">
2
- <ul style="list-style: none; margin: 0; padding: 0;">
3
- <a href="https://stagehand.dev">
4
- <picture>
5
- <source media="(prefers-color-scheme: dark)" srcset="media/dark_logo.png" />
6
- <img alt="Stagehand" src="media/light_logo.png" width="200" style="margin-right: 30px;" />
7
- </picture>
8
- </a>
9
- </ul>
10
- </div>
11
- <p align="center">
12
- <strong>The AI Browser Automation Framework</strong><br>
13
- <a href="https://docs.stagehand.dev">Read the Docs</a>
14
- </p>
15
-
16
- <p align="center">
17
- <a href="https://github.com/browserbase/stagehand/tree/main?tab=MIT-1-ov-file#MIT-1-ov-file">
18
- <picture>
19
- <source media="(prefers-color-scheme: dark)" srcset="media/dark_license.svg" />
20
- <img alt="MIT License" src="media/light_license.svg" />
21
- </picture>
22
- </a>
23
- <a href="https://join.slack.com/t/stagehand-dev/shared_invite/zt-38khc8iv5-T2acb50_0OILUaX7lxeBOg">
24
- <picture>
25
- <source media="(prefers-color-scheme: dark)" srcset="media/dark_slack.svg" />
26
- <img alt="Slack Community" src="media/light_slack.svg" />
27
- </picture>
28
- </a>
29
- </p>
30
-
31
- <p align="center">
32
- <a href="https://trendshift.io/repositories/12122" target="_blank"><img src="https://trendshift.io/api/badge/repositories/12122" alt="browserbase%2Fstagehand | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
33
- </p>
34
-
35
- <p align="center">
36
- If you're looking for the Python implementation, you can find it
37
- <a href="https://github.com/browserbase/stagehand-python"> here</a>
38
- </p>
39
-
40
- <div align="center" style="display: flex; align-items: center; justify-content: center; gap: 4px; margin-bottom: 0;">
41
- <b>Vibe code</b>
42
- <span style="font-size: 1.05em;"> Stagehand with </span>
43
- <a href="https://director.ai" style="display: flex; align-items: center;">
44
- <span>Director</span>
45
- </a>
46
- <span> </span>
47
- <picture>
48
- <img alt="Director" src="media/director_icon.svg" width="25" />
49
- </picture>
50
- </div>
51
-
52
- ## Why Stagehand?
53
-
54
- Most existing browser automation tools either require you to write low-level code in a framework like Selenium, Playwright, or Puppeteer, or use high-level agents that can be unpredictable in production. By letting developers choose what to write in code vs. natural language, Stagehand is the natural choice for browser automations in production.
55
-
56
- 1. **Choose when to write code vs. natural language**: use AI when you want to navigate unfamiliar pages, and use code ([Playwright](https://playwright.dev/)) when you know exactly what you want to do.
57
-
58
- 2. **Preview and cache actions**: Stagehand lets you preview AI actions before running them, and also helps you easily cache repeatable actions to save time and tokens.
59
-
60
- 3. **Computer use models with one line of code**: Stagehand lets you integrate SOTA computer use models from OpenAI and Anthropic into the browser with one line of code.
61
-
62
- ## Example
63
-
64
- Here's how to build a sample browser automation with Stagehand:
65
-
66
- <div align="center">
67
- <div style="max-width:300px;">
68
- <img src="/media/github_demo.gif" alt="See Stagehand in Action">
69
- </div>
70
- </div>
71
-
72
- ```typescript
73
- // Use Playwright functions on the page object
74
- const page = stagehand.page;
75
- await page.goto("https://github.com/browserbase");
76
-
77
- // Use act() to execute individual actions
78
- await page.act("click on the stagehand repo");
79
-
80
- // Use Computer Use agents for larger actions
81
- const agent = stagehand.agent({
82
- provider: "openai",
83
- model: "computer-use-preview",
84
- });
85
- await agent.execute("Get to the latest PR");
86
-
87
- // Use extract() to read data from the page
88
- const { author, title } = await page.extract({
89
- instruction: "extract the author and title of the PR",
90
- schema: z.object({
91
- author: z.string().describe("The username of the PR author"),
92
- title: z.string().describe("The title of the PR"),
93
- }),
94
- });
95
- ```
96
-
97
- ## Documentation
98
-
99
- Visit [docs.stagehand.dev](https://docs.stagehand.dev) to view the full documentation.
100
-
101
- ## Getting Started
102
-
103
- Start with Stagehand with one line of code, or check out our [Quickstart Guide](https://docs.stagehand.dev/get_started/quickstart) for more information:
104
-
105
- ```bash
106
- npx create-browser-app
107
- ```
108
-
109
- <div align="center">
110
- <a href="https://www.loom.com/share/f5107f86d8c94fa0a8b4b1e89740f7a7">
111
- <p>Watch Anirudh demo create-browser-app to create a Stagehand project!</p>
112
- </a>
113
- <a href="https://www.loom.com/share/f5107f86d8c94fa0a8b4b1e89740f7a7">
114
- <img style="max-width:300px;" src="https://cdn.loom.com/sessions/thumbnails/f5107f86d8c94fa0a8b4b1e89740f7a7-ec3f428b6775ceeb-full-play.gif">
115
- </a>
116
- </div>
117
-
118
- ### Build and Run from Source
119
-
120
- ```bash
121
- git clone https://github.com/browserbase/stagehand.git
122
- cd stagehand
123
- pnpm install
124
- pnpm playwright install
125
- pnpm run build
126
- pnpm run example # run the blank script at ./examples/example.ts
127
- pnpm run example 2048 # run the 2048 example at ./examples/2048.ts
128
- pnpm run evals -man # see evaluation suite options
129
- ```
130
-
131
- Stagehand is best when you have an API key for an LLM provider and Browserbase credentials. To add these to your project, run:
132
-
133
- ```bash
134
- cp .env.example .env
135
- nano .env # Edit the .env file to add API keys
136
- ```
137
-
138
- ## Contributing
139
-
140
- > [!NOTE]
141
- > We highly value contributions to Stagehand! For questions or support, please join our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-38khc8iv5-T2acb50_0OILUaX7lxeBOg).
142
-
143
- At a high level, we're focused on improving reliability, speed, and cost in that order of priority. If you're interested in contributing, we strongly recommend reaching out to [Miguel Gonzalez](https://x.com/miguel_gonzf) or [Paul Klein](https://x.com/pk_iv) in our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-38khc8iv5-T2acb50_0OILUaX7lxeBOg) before starting to ensure that your contribution aligns with our goals.
144
-
145
- For more information, please see our [Contributing Guide](https://docs.stagehand.dev/examples/contributing).
146
-
147
- ## Acknowledgements
148
-
149
- This project heavily relies on [Playwright](https://playwright.dev/) as a resilient backbone to automate the web. It also would not be possible without the awesome techniques and discoveries made by [tarsier](https://github.com/reworkd/tarsier), [gemini-zod](https://github.com/jbeoris/gemini-zod), and [fuji-web](https://github.com/normal-computing/fuji-web).
150
-
151
- We'd like to thank the following people for their major contributions to Stagehand:
152
- - [Paul Klein](https://github.com/pkiv)
153
- - [Anirudh Kamath](https://github.com/kamath)
154
- - [Sean McGuire](https://github.com/seanmcguire12)
155
- - [Miguel Gonzalez](https://github.com/miguelg719)
156
- - [Sameel Arif](https://github.com/sameelarif)
157
- - [Filip Michalsky](https://github.com/filip-michalsky)
158
- - [Jeremy Press](https://x.com/jeremypress)
159
- - [Navid Pour](https://github.com/navidpour)
160
-
161
- ## License
162
-
163
- Licensed under the MIT License.
164
-
165
- Copyright 2025 Browserbase, Inc.
@@ -1,25 +0,0 @@
1
- import type { BrowserContext as PlaywrightContext, Page as PlaywrightPage } from "playwright";
2
- import { Stagehand } from "./index";
3
- import { StagehandPage } from "./StagehandPage";
4
- import { EnhancedContext } from "../types/context";
5
- export declare class StagehandContext {
6
- private readonly stagehand;
7
- private readonly intContext;
8
- private pageMap;
9
- private activeStagehandPage;
10
- private readonly frameIdMap;
11
- private constructor();
12
- private createStagehandPage;
13
- static init(context: PlaywrightContext, stagehand: Stagehand): Promise<StagehandContext>;
14
- get frameIdLookup(): ReadonlyMap<string, StagehandPage>;
15
- registerFrameId(frameId: string, page: StagehandPage): void;
16
- unregisterFrameId(frameId: string): void;
17
- getStagehandPageByFrameId(frameId: string): StagehandPage | undefined;
18
- get context(): EnhancedContext;
19
- getStagehandPage(page: PlaywrightPage): Promise<StagehandPage>;
20
- getStagehandPages(): Promise<StagehandPage[]>;
21
- setActivePage(page: StagehandPage): void;
22
- getActivePage(): StagehandPage | null;
23
- private handleNewPlaywrightPage;
24
- private attachFrameNavigatedListener;
25
- }
@@ -1,103 +0,0 @@
1
- import type { CDPSession, Page as PlaywrightPage, Frame } from "playwright";
2
- import { z } from "zod/v3";
3
- import { Page, defaultExtractSchema } from "../types/page";
4
- import { ExtractOptions, ExtractResult, ObserveOptions, ObserveResult } from "../types/stagehand";
5
- import { StagehandAPI } from "./api";
6
- import { ActOptions, ActResult, Stagehand } from "./index";
7
- import { LLMClient } from "./llm/LLMClient";
8
- import { StagehandContext } from "./StagehandContext";
9
- import { EncodedId, EnhancedContext } from "../types/context";
10
- export declare class StagehandPage {
11
- private stagehand;
12
- private rawPage;
13
- private intPage;
14
- private intContext;
15
- private actHandler;
16
- private extractHandler;
17
- private observeHandler;
18
- private llmClient;
19
- private cdpClient;
20
- private api;
21
- private userProvidedInstructions?;
22
- private waitForCaptchaSolves;
23
- private initialized;
24
- private readonly cdpClients;
25
- private fidOrdinals;
26
- private rootFrameId;
27
- get frameId(): string;
28
- updateRootFrameId(newId: string): void;
29
- constructor(page: PlaywrightPage, stagehand: Stagehand, context: StagehandContext, llmClient: LLMClient, userProvidedInstructions?: string, api?: StagehandAPI, waitForCaptchaSolves?: boolean);
30
- ordinalForFrameId(fid: string | undefined): number;
31
- encodeWithFrameId(fid: string | undefined, backendId: number): EncodedId;
32
- resetFrameOrdinals(): void;
33
- private ensureStagehandScript;
34
- /** Register the custom selector engine that pierces open/closed shadow roots. */
35
- private ensureStagehandSelectorEngine;
36
- /**
37
- * Waits for a captcha to be solved when using Browserbase environment.
38
- *
39
- * @param timeoutMs - Optional timeout in milliseconds. If provided, the promise will reject if the captcha solving hasn't started within the given time.
40
- * @throws StagehandEnvironmentError if called in a LOCAL environment
41
- * @throws CaptchaTimeoutError if the timeout is reached before captcha solving starts
42
- * @returns Promise that resolves when the captcha is solved
43
- */
44
- waitForCaptchaSolve(timeoutMs?: number): Promise<void>;
45
- init(): Promise<StagehandPage>;
46
- get page(): Page;
47
- get context(): EnhancedContext;
48
- /**
49
- * `_waitForSettledDom` waits until the DOM is settled, and therefore is
50
- * ready for actions to be taken.
51
- *
52
- * **Definition of "settled"**
53
- * • No in-flight network requests (except WebSocket / Server-Sent-Events).
54
- * • That idle state lasts for at least **500 ms** (the "quiet-window").
55
- *
56
- * **How it works**
57
- * 1. Subscribes to CDP Network and Page events for the main target and all
58
- * out-of-process iframes (via `Target.setAutoAttach { flatten:true }`).
59
- * 2. Every time `Network.requestWillBeSent` fires, the request ID is added
60
- * to an **`inflight`** `Set`.
61
- * 3. When the request finishes—`loadingFinished`, `loadingFailed`,
62
- * `requestServedFromCache`, or a *data:* response—the request ID is
63
- * removed.
64
- * 4. *Document* requests are also mapped **frameId → requestId**; when
65
- * `Page.frameStoppedLoading` fires the corresponding Document request is
66
- * removed immediately (covers iframes whose network events never close).
67
- * 5. A **stalled-request sweep timer** runs every 500 ms. If a *Document*
68
- * request has been open for ≥ 2 s it is forcibly removed; this prevents
69
- * ad/analytics iframes from blocking the wait forever.
70
- * 6. When `inflight` becomes empty the helper starts a 500 ms timer.
71
- * If no new request appears before the timer fires, the promise
72
- * resolves → **DOM is considered settled**.
73
- * 7. A global guard (`timeoutMs` or `stagehand.domSettleTimeoutMs`,
74
- * default ≈ 30 s) ensures we always resolve; if it fires we log how many
75
- * requests were still outstanding.
76
- *
77
- * @param timeoutMs – Optional hard cap (ms). Defaults to
78
- * `this.stagehand.domSettleTimeoutMs`.
79
- */
80
- _waitForSettledDom(timeoutMs?: number): Promise<void>;
81
- act(actionOrOptions: string | ActOptions | ObserveResult): Promise<ActResult>;
82
- extract<T extends z.AnyZodObject = typeof defaultExtractSchema>(instructionOrOptions?: string | ExtractOptions<T>): Promise<ExtractResult<T>>;
83
- observe(instructionOrOptions?: string | ObserveOptions): Promise<ObserveResult[]>;
84
- /**
85
- * Get or create a CDP session for the given target.
86
- * @param target The Page or (OOPIF) Frame you want to talk to.
87
- */
88
- getCDPClient(target?: PlaywrightPage | Frame): Promise<CDPSession>;
89
- /**
90
- * Send a CDP command to the chosen DevTools target.
91
- *
92
- * @param method Any valid CDP method, e.g. `"DOM.getDocument"`.
93
- * @param params Command parameters (optional).
94
- * @param target A `Page` or OOPIF `Frame`. Defaults to the main page.
95
- *
96
- * @typeParam T Expected result shape (defaults to `unknown`).
97
- */
98
- sendCDP<T = unknown>(method: string, params?: Record<string, unknown>, target?: PlaywrightPage | Frame): Promise<T>;
99
- /** Enable a CDP domain (e.g. `"Network"` or `"DOM"`) on the chosen target. */
100
- enableCDP(domain: string, target?: PlaywrightPage | Frame): Promise<void>;
101
- /** Disable a CDP domain on the chosen target. */
102
- disableCDP(domain: string, target?: PlaywrightPage | Frame): Promise<void>;
103
- }
@@ -1,144 +0,0 @@
1
- import { AccessibilityNode, TreeResult, BackendIdMaps, CombinedA11yResult, EncodedId } from "../../types/context";
2
- import { StagehandPage } from "../StagehandPage";
3
- import { LogLine } from "../../types/log";
4
- import { Frame } from "playwright";
5
- /**
6
- * Clean a string by removing private-use unicode characters, normalizing whitespace,
7
- * and trimming the result.
8
- *
9
- * @param input - The text to clean, potentially containing PUA and NBSP characters.
10
- * @returns A cleaned string with PUA characters removed, NBSP variants collapsed,
11
- * consecutive spaces merged, and leading/trailing whitespace trimmed.
12
- */
13
- export declare function cleanText(input: string): string;
14
- /**
15
- * Generate a human-readable, indented outline of an accessibility node tree.
16
- *
17
- * @param node - The accessibility node to format, optionally with an encodedId.
18
- * @param level - The current depth level for indentation (used internally).
19
- * @returns A string representation of the node and its descendants, with one node per line.
20
- */
21
- export declare function formatSimplifiedTree(node: AccessibilityNode & {
22
- encodedId?: EncodedId;
23
- }, level?: number): string;
24
- /**
25
- * Build mappings from CDP backendNodeIds to HTML tag names and relative XPaths.
26
- *
27
- * @param experimental - Whether to use experimental behaviour.
28
- * @param sp - The StagehandPage wrapper for Playwright and CDP calls.
29
- * @param targetFrame - Optional Playwright.Frame whose DOM subtree to map; defaults to main frame.
30
- * @returns A Promise resolving to BackendIdMaps containing tagNameMap and xpathMap.
31
- */
32
- export declare function buildBackendIdMaps(experimental: boolean, sp: StagehandPage, targetFrame?: Frame): Promise<BackendIdMaps>;
33
- /**
34
- * Convert a flat array of AccessibilityNodes into a cleaned, hierarchical tree.
35
- * Nodes are pruned, structural wrappers removed, and each kept node is stamped
36
- * with its EncodedId for later lookup or subtree injection.
37
- *
38
- * @param nodes - Raw flat list of AX nodes retrieved via CDP.
39
- * @param tagNameMap - Mapping of EncodedId to HTML tag names for structural decisions.
40
- * @param logger - Optional function for logging diagnostic messages.
41
- * @param xpathMap - Optional mapping of EncodedId to relative XPath for element lookup.
42
- * @returns A Promise resolving to a TreeResult with cleaned tree, simplified text outline,
43
- * iframe list, URL map, and inherited xpathMap.
44
- */
45
- export declare function buildHierarchicalTree(nodes: AccessibilityNode[], tagNameMap: Record<EncodedId, string>, logger?: (l: LogLine) => void, xpathMap?: Record<EncodedId, string>): Promise<TreeResult>;
46
- /**
47
- * Resolve the CDP frame identifier for a Playwright Frame, handling same-process and OOPIF.
48
- *
49
- * @param sp - The StagehandPage instance for issuing CDP commands.
50
- * @param frame - The target Playwright.Frame; undefined or main frame yields undefined.
51
- * @returns A Promise resolving to the CDP frameId string, or undefined for main document.
52
- */
53
- export declare function getCDPFrameId(sp: StagehandPage, frame?: Frame): Promise<string | undefined>;
54
- /**
55
- * Retrieve and build a cleaned accessibility tree for a document or specific iframe.
56
- * Prunes, formats, and optionally filters by XPath, including scrollable role decoration.
57
- *
58
- * @param stagehandPage - The StagehandPage instance for Playwright and CDP interaction.
59
- * @param logger - Logging function for diagnostics and performance metrics.
60
- * @param selector - Optional XPath to filter the AX tree to a specific subtree.
61
- * @param targetFrame - Optional Playwright.Frame to scope the AX tree retrieval.
62
- * @returns A Promise resolving to a TreeResult with the hierarchical AX tree and related metadata.
63
- */
64
- export declare function getAccessibilityTree(experimental: boolean, stagehandPage: StagehandPage, logger: (log: LogLine) => void, selector?: string, targetFrame?: Frame): Promise<TreeResult>;
65
- /**
66
- * Get the backendNodeId of the iframe element that contains a given Playwright.Frame.
67
- *
68
- * @param sp - The StagehandPage instance for issuing CDP commands.
69
- * @param frame - The Playwright.Frame whose host iframe element to locate.
70
- * @returns A Promise resolving to the backendNodeId of the iframe element, or null if not applicable.
71
- */
72
- export declare function getFrameRootBackendNodeId(sp: StagehandPage, frame: Frame | undefined): Promise<number | null>;
73
- /**
74
- * Compute the absolute XPath for the iframe element hosting a given Playwright.Frame.
75
- *
76
- * @param frame - The Playwright.Frame whose iframe element to locate.
77
- * @returns A Promise resolving to the XPath of the iframe element, or "/" if no frame provided.
78
- */
79
- export declare function getFrameRootXpathWithShadow(frame: Frame | undefined): Promise<string>;
80
- export declare function getFrameRootXpath(frame: Frame | undefined): Promise<string>;
81
- /**
82
- * Inject simplified subtree outlines into the main frame outline for nested iframes.
83
- * Walks the main tree text, looks for EncodedId labels, and inserts matching subtrees.
84
- *
85
- * @param tree - The indented AX outline of the main frame.
86
- * @param idToTree - Map of EncodedId to subtree outlines for nested frames.
87
- * @returns A single combined text outline with iframe subtrees injected.
88
- */
89
- export declare function injectSubtrees(tree: string, idToTree: Map<EncodedId, string>): string;
90
- /**
91
- * Retrieve and merge accessibility trees for the main document and nested iframes.
92
- * Walks through frame chains if a root XPath is provided, then stitches subtree outlines.
93
- *
94
- * @param stagehandPage - The StagehandPage instance for Playwright and CDP interaction.
95
- * @param logger - Logging function for diagnostics and performance.
96
- * @param rootXPath - Optional absolute XPath to focus the crawl on a subtree across frames.
97
- * @returns A Promise resolving to CombinedA11yResult with combined tree text, xpath map, and URL map.
98
- */
99
- export declare function getAccessibilityTreeWithFrames(experimental: boolean, stagehandPage: StagehandPage, logger: (l: LogLine) => void, rootXPath?: string): Promise<CombinedA11yResult>;
100
- /**
101
- * `findScrollableElementIds` is a function that identifies elements in
102
- * the browser that are deemed "scrollable". At a high level, it does the
103
- * following:
104
- * - Calls the browser-side `window.getScrollableElementXpaths()` function,
105
- * which returns a list of XPaths for scrollable containers.
106
- * - Iterates over the returned list of XPaths, locating each element in the DOM
107
- * using `stagehandPage.sendCDP(...)`
108
- * - During each iteration, we call `Runtime.evaluate` to run `document.evaluate(...)`
109
- * with each XPath, obtaining a `RemoteObject` reference if it exists.
110
- * - Then, for each valid object reference, we call `DOM.describeNode` to retrieve
111
- * the element’s `backendNodeId`.
112
- * - Collects all resulting `backendNodeId`s in a Set and returns them.
113
- *
114
- * @param stagehandPage - A StagehandPage instance with built-in CDP helpers.
115
- * @returns A Promise that resolves to a Set of unique `backendNodeId`s corresponding
116
- * to scrollable elements in the DOM.
117
- */
118
- export declare function findScrollableElementIds(stagehandPage: StagehandPage, targetFrame?: Frame): Promise<Set<number>>;
119
- /**
120
- * Resolve an XPath to a Chrome-DevTools-Protocol (CDP) remote-object ID.
121
- *
122
- * @param page A StagehandPage (or Playwright.Page with .sendCDP)
123
- * @param xpath An absolute or relative XPath
124
- * @returns The remote objectId for the matched node, or null
125
- */
126
- export declare function resolveObjectIdForXPath(page: StagehandPage, xpath: string, targetFrame?: Frame): Promise<string | null>;
127
- /**
128
- * Resolve a chain of iframe frames from an absolute XPath, returning the frame sequence and inner XPath.
129
- *
130
- * This helper walks an XPath expression containing iframe steps (e.g., '/html/body/iframe[2]/...'),
131
- * descending into each matching iframe element to build a frame chain, and returns the leftover
132
- * XPath segment to evaluate within the context of the last iframe.
133
- *
134
- * @param sp - The StagehandPage instance for evaluating XPath and locating frames.
135
- * @param absPath - An absolute XPath expression starting with '/', potentially including iframe steps.
136
- * @returns An object containing:
137
- * frames: Array of Frame objects representing each iframe in the chain.
138
- * rest: The remaining XPath string to evaluate inside the final iframe.
139
- * @throws Error if an iframe cannot be found or the final XPath cannot be resolved.
140
- */
141
- export declare function resolveFrameChain(sp: StagehandPage, absPath: string): Promise<{
142
- frames: Frame[];
143
- rest: string;
144
- }>;
@@ -1,20 +0,0 @@
1
- import { AgentAction, AgentResult, AgentType, AgentExecutionOptions } from "@/types/agent";
2
- import { ToolSet } from "ai/dist";
3
- /**
4
- * Abstract base class for agent clients
5
- * This provides a common interface for all agent implementations
6
- */
7
- export declare abstract class AgentClient {
8
- type: AgentType;
9
- modelName: string;
10
- clientOptions: Record<string, unknown>;
11
- userProvidedInstructions?: string;
12
- constructor(type: AgentType, modelName: string, userProvidedInstructions?: string);
13
- abstract execute(options: AgentExecutionOptions): Promise<AgentResult>;
14
- abstract captureScreenshot(options?: Record<string, unknown>): Promise<unknown>;
15
- abstract setViewport(width: number, height: number): void;
16
- abstract setCurrentUrl(url: string): void;
17
- abstract setScreenshotProvider(provider: () => Promise<string>): void;
18
- abstract setActionHandler(handler: (action: AgentAction) => Promise<void>): void;
19
- abstract setTools(tools: ToolSet): void;
20
- }
@@ -1,19 +0,0 @@
1
- import { AgentType } from "@/types/agent";
2
- import { LogLine } from "@/types/log";
3
- import { ToolSet } from "ai/dist";
4
- import { AgentClient } from "./AgentClient";
5
- export declare const modelToAgentProviderMap: Record<string, AgentType>;
6
- /**
7
- * Provider for agent clients
8
- * This class is responsible for creating the appropriate agent client
9
- * based on the provider type
10
- */
11
- export declare class AgentProvider {
12
- private logger;
13
- /**
14
- * Create a new agent provider
15
- */
16
- constructor(logger: (message: LogLine) => void);
17
- getClient(modelName: string, clientOptions?: Record<string, unknown>, userProvidedInstructions?: string, tools?: ToolSet): AgentClient;
18
- static getAgentProvider(modelName: string): AgentType;
19
- }