@browserbasehq/orca 3.0.0-preview.0 → 3.0.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/dist/index.d.ts +761 -593
  2. package/dist/index.js +25775 -24553
  3. package/package.json +35 -74
  4. package/README.md +0 -165
  5. package/dist/lib/api.d.ts +0 -23
  6. package/dist/lib/dom/build/scriptContent.d.ts +0 -1
  7. package/dist/lib/inference.d.ts +0 -71
  8. package/dist/lib/inferenceLogUtils.d.ts +0 -12
  9. package/dist/lib/logger.d.ts +0 -54
  10. package/dist/lib/prompt.d.ts +0 -12
  11. package/dist/lib/utils.d.ts +0 -65
  12. package/dist/lib/v3/agent/AgentClient.d.ts +0 -18
  13. package/dist/lib/v3/agent/AgentProvider.d.ts +0 -18
  14. package/dist/lib/v3/agent/AnthropicCUAClient.d.ts +0 -55
  15. package/dist/lib/v3/agent/OpenAICUAClient.d.ts +0 -64
  16. package/dist/lib/v3/agent/StagehandAgent.d.ts +0 -15
  17. package/dist/lib/v3/agent/tools/index.d.ts +0 -229
  18. package/dist/lib/v3/agent/tools/v3-act.d.ts +0 -29
  19. package/dist/lib/v3/agent/tools/v3-ariaTree.d.ts +0 -11
  20. package/dist/lib/v3/agent/tools/v3-close.d.ts +0 -24
  21. package/dist/lib/v3/agent/tools/v3-extract.d.ts +0 -38
  22. package/dist/lib/v3/agent/tools/v3-fillform.d.ts +0 -37
  23. package/dist/lib/v3/agent/tools/v3-goto.d.ts +0 -29
  24. package/dist/lib/v3/agent/tools/v3-navback.d.ts +0 -17
  25. package/dist/lib/v3/agent/tools/v3-screenshot.d.ts +0 -13
  26. package/dist/lib/v3/agent/tools/v3-scroll.d.ts +0 -23
  27. package/dist/lib/v3/agent/tools/v3-wait.d.ts +0 -19
  28. package/dist/lib/v3/agent/utils/cuaKeyMapping.d.ts +0 -10
  29. package/dist/lib/v3/agent/utils/imageCompression.d.ts +0 -18
  30. package/dist/lib/v3/agent/utils/messageProcessing.d.ts +0 -13
  31. package/dist/lib/v3/dom/build/scriptV3Content.d.ts +0 -1
  32. package/dist/lib/v3/dom/genDomScripts.d.ts +0 -1
  33. package/dist/lib/v3/dom/index.d.ts +0 -1
  34. package/dist/lib/v3/dom/piercer.entry.d.ts +0 -1
  35. package/dist/lib/v3/dom/piercer.runtime.d.ts +0 -25
  36. package/dist/lib/v3/handlers/actHandler.d.ts +0 -18
  37. package/dist/lib/v3/handlers/extractHandler.d.ts +0 -29
  38. package/dist/lib/v3/handlers/handlerUtils/actHandlerUtils.d.ts +0 -18
  39. package/dist/lib/v3/handlers/observeHandler.d.ts +0 -15
  40. package/dist/lib/v3/handlers/v3AgentHandler.d.ts +0 -17
  41. package/dist/lib/v3/handlers/v3CuaAgentHandler.d.ts +0 -26
  42. package/dist/lib/v3/index.d.ts +0 -10
  43. package/dist/lib/v3/launch/browserbase.d.ts +0 -8
  44. package/dist/lib/v3/launch/local.d.ts +0 -13
  45. package/dist/lib/v3/llm/AnthropicClient.d.ts +0 -16
  46. package/dist/lib/v3/llm/CerebrasClient.d.ts +0 -17
  47. package/dist/lib/v3/llm/GoogleClient.d.ts +0 -19
  48. package/dist/lib/v3/llm/GroqClient.d.ts +0 -17
  49. package/dist/lib/v3/llm/LLMClient.d.ts +0 -99
  50. package/dist/lib/v3/llm/LLMProvider.d.ts +0 -10
  51. package/dist/lib/v3/llm/OpenAIClient.d.ts +0 -15
  52. package/dist/lib/v3/llm/aisdk.d.ts +0 -15
  53. package/dist/lib/v3/logger.d.ts +0 -48
  54. package/dist/lib/v3/mcp/connection.d.ts +0 -11
  55. package/dist/lib/v3/mcp/utils.d.ts +0 -3
  56. package/dist/lib/v3/tests/default-page-tracking.spec.d.ts +0 -1
  57. package/dist/lib/v3/tests/perform-understudy-method.spec.d.ts +0 -1
  58. package/dist/lib/v3/tests/shadow-iframe.spec.d.ts +0 -1
  59. package/dist/lib/v3/tests/timeouts.spec.d.ts +0 -1
  60. package/dist/lib/v3/tests/v3.config.d.ts +0 -4
  61. package/dist/lib/v3/tests/v3.playwright.config.d.ts +0 -2
  62. package/dist/lib/v3/tests/xpath-for-location-deep.spec.d.ts +0 -1
  63. package/dist/lib/v3/types/act.d.ts +0 -10
  64. package/dist/lib/v3/types/agent.d.ts +0 -132
  65. package/dist/lib/v3/types/api.d.ts +0 -40
  66. package/dist/lib/v3/types/cache.d.ts +0 -71
  67. package/dist/lib/v3/types/context.d.ts +0 -2
  68. package/dist/lib/v3/types/evals.d.ts +0 -71
  69. package/dist/lib/v3/types/evaluator.d.ts +0 -40
  70. package/dist/lib/v3/types/llm.d.ts +0 -11
  71. package/dist/lib/v3/types/log.d.ts +0 -23
  72. package/dist/lib/v3/types/model.d.ts +0 -20
  73. package/dist/lib/v3/types/playwright.d.ts +0 -6
  74. package/dist/lib/v3/types/stagehand.d.ts +0 -113
  75. package/dist/lib/v3/types/stagehandApiErrors.d.ts +0 -18
  76. package/dist/lib/v3/types/stagehandErrors.d.ts +0 -104
  77. package/dist/lib/v3/types.d.ts +0 -176
  78. package/dist/lib/v3/understudy/a11y/snapshot.d.ts +0 -71
  79. package/dist/lib/v3/understudy/cdp.d.ts +0 -58
  80. package/dist/lib/v3/understudy/context.d.ts +0 -120
  81. package/dist/lib/v3/understudy/deepLocator.d.ts +0 -69
  82. package/dist/lib/v3/understudy/executionContextRegistry.d.ts +0 -15
  83. package/dist/lib/v3/understudy/frame.d.ts +0 -63
  84. package/dist/lib/v3/understudy/frameLocator.d.ts +0 -46
  85. package/dist/lib/v3/understudy/frameRegistry.d.ts +0 -100
  86. package/dist/lib/v3/understudy/locator.d.ts +0 -196
  87. package/dist/lib/v3/understudy/page.d.ts +0 -241
  88. package/dist/lib/v3/understudy/piercer.d.ts +0 -4
  89. package/dist/lib/v3/v3.d.ts +0 -156
  90. package/dist/lib/version.d.ts +0 -5
package/package.json CHANGED
@@ -1,37 +1,10 @@
1
1
  {
2
2
  "name": "@browserbasehq/orca",
3
- "version": "3.0.0-preview.0",
3
+ "version": "3.0.0-preview.3",
4
4
  "description": "An AI web browsing framework focused on simplicity and extensibility.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
7
7
  "types": "./dist/index.d.ts",
8
- "bin": {
9
- "evals": "./dist/evals/cli.js"
10
- },
11
- "scripts": {
12
- "example": "pnpm --filter @browserbasehq/stagehand-examples run start",
13
- "format": "prettier --write .",
14
- "prettier": "prettier --check .",
15
- "prettier:fix": "prettier --write .",
16
- "eslint": "eslint .",
17
- "cache:clear": "rm -rf .cache",
18
- "docs": "pnpm --filter @browserbasehq/stagehand-docs run dev",
19
- "evals": "tsx scripts/run-evals.ts",
20
- "evals:legacy": "tsx scripts/run-evals.ts",
21
- "e2e": "pnpm run build && cd evals/deterministic && playwright test --config=e2e.playwright.config.ts",
22
- "e2e:bb": "pnpm run build && cd evals/deterministic && playwright test --config=bb.playwright.config.ts",
23
- "e2e:local": "pnpm run build && cd evals/deterministic && playwright test --config=local.playwright.config.ts",
24
- "build-v3-dom-scripts": "tsx lib/v3/dom/genDomScripts.ts",
25
- "build-types": "tsc --emitDeclarationOnly --outDir dist",
26
- "build-js": "tsup --entry.index lib/v3/index.ts --dts",
27
- "build:cli": "tsup evals/cli.ts --outDir dist/evals --format cjs && cp evals/evals.config.json dist/evals/ && chmod +x dist/evals/cli.js && npm link",
28
- "build": "pnpm run lint && pnpm run gen-version && pnpm run build-v3-dom-scripts && pnpm run build-js && pnpm run build-types",
29
- "gen-version": "tsx scripts/gen-version.ts",
30
- "prepare": "pnpm run build",
31
- "lint": "pnpm run prettier:fix && pnpm run eslint",
32
- "release": "pnpm run build && changeset publish",
33
- "release-canary": "pnpm run build && changeset version --snapshot && changeset publish --tag alpha"
34
- },
35
8
  "files": [
36
9
  "dist/index.js",
37
10
  "dist/index.d.ts",
@@ -39,43 +12,15 @@
39
12
  "dist/types",
40
13
  "dist/stagehand.config.d.ts"
41
14
  ],
42
- "keywords": [],
15
+ "keywords": [
16
+ "ai",
17
+ "browser",
18
+ "automation",
19
+ "web-scraping",
20
+ "testing"
21
+ ],
43
22
  "author": "Browserbase",
44
23
  "license": "MIT",
45
- "devDependencies": {
46
- "@changesets/changelog-github": "^0.5.0",
47
- "@changesets/cli": "^2.27.9",
48
- "@eslint/js": "^9.16.0",
49
- "@langchain/core": "^0.3.40",
50
- "@langchain/openai": "^0.4.4",
51
- "playwright-core": "^1.54.1",
52
- "puppeteer-core": "^22.8.0",
53
- "chrome-launcher": "^1.2.0",
54
- "patchright-core": "^1.55.2",
55
- "@playwright/test": "^1.42.1",
56
- "@types/adm-zip": "^0.5.7",
57
- "@types/cheerio": "^0.22.35",
58
- "@types/express": "^4.17.21",
59
- "@types/node": "^20.11.30",
60
- "@types/ws": "^8.5.13",
61
- "adm-zip": "^0.5.16",
62
- "autoevals": "^0.0.64",
63
- "braintrust": "^0.0.171",
64
- "chalk": "^5.4.1",
65
- "cheerio": "^1.0.0",
66
- "chromium-bidi": "^0.10.0",
67
- "esbuild": "^0.21.4",
68
- "eslint": "^9.16.0",
69
- "express": "^4.21.0",
70
- "globals": "^15.13.0",
71
- "multer": "^1.4.5-lts.1",
72
- "prettier": "^3.2.5",
73
- "string-comparison": "^1.3.0",
74
- "tsup": "^8.2.1",
75
- "tsx": "^4.10.5",
76
- "typescript": "^5.2.2",
77
- "typescript-eslint": "^8.17.0"
78
- },
79
24
  "peerDependencies": {
80
25
  "deepmerge": "^4.3.1",
81
26
  "dotenv": "^16.4.5",
@@ -84,7 +29,8 @@
84
29
  "dependencies": {
85
30
  "@anthropic-ai/sdk": "0.39.0",
86
31
  "@browserbasehq/sdk": "^2.4.0",
87
- "@google/genai": "^0.8.0",
32
+ "@google/genai": "^1.22.0",
33
+ "@langchain/openai": "^0.4.4",
88
34
  "@modelcontextprotocol/sdk": "^1.17.2",
89
35
  "ai": "^4.3.9",
90
36
  "devtools-protocol": "^0.0.1464554",
@@ -108,23 +54,38 @@
108
54
  "@ai-sdk/perplexity": "^1.1.7",
109
55
  "@ai-sdk/togetherai": "^0.2.6",
110
56
  "@ai-sdk/xai": "^1.2.15",
57
+ "@langchain/core": "^0.3.40",
58
+ "playwright-core": "^1.54.1",
59
+ "puppeteer-core": "^22.8.0",
60
+ "chrome-launcher": "^1.2.0",
61
+ "patchright-core": "^1.55.2",
111
62
  "ollama-ai-provider": "^1.2.0"
112
63
  },
113
- "directories": {
114
- "doc": "docs",
115
- "example": "examples",
116
- "lib": "lib"
64
+ "devDependencies": {
65
+ "@playwright/test": "^1.42.1",
66
+ "typescript": "^5.2.2",
67
+ "tsup": "^8.2.1",
68
+ "tsx": "^4.10.5",
69
+ "prettier": "^3.2.5",
70
+ "eslint": "^9.16.0"
117
71
  },
118
72
  "repository": {
119
73
  "type": "git",
120
- "url": "git+https://github.com/browserbase/stagehand.git"
74
+ "url": "git+https://github.com/browserbase/stagehand.git",
75
+ "directory": "packages/core"
121
76
  },
122
77
  "bugs": {
123
78
  "url": "https://github.com/browserbase/stagehand/issues"
124
79
  },
125
80
  "homepage": "https://stagehand.dev",
126
- "overrides": {
127
- "whatwg-url": "^14.0.0"
128
- },
129
- "packageManager": "pnpm@9.15.0+sha512.76e2379760a4328ec4415815bcd6628dee727af3779aaa4c914e3944156c4299921a89f976381ee107d41f12cfa4b66681ca9c718f0668fa0831ed4c6d8ba56c"
130
- }
81
+ "scripts": {
82
+ "gen-version": "tsx scripts/gen-version.ts",
83
+ "build-dom-scripts": "tsx lib/v3/dom/genDomScripts.ts && tsx lib/v3/dom/genLocatorScripts.ts",
84
+ "build-js": "tsup --entry.index lib/v3/index.ts --dts",
85
+ "typecheck": "tsc --noEmit",
86
+ "build": "pnpm run gen-version && pnpm run build-dom-scripts && pnpm run build-js && pnpm run typecheck",
87
+ "example": "node --import tsx -e \"const args=process.argv.slice(1).filter(a=>a!=='--'); const [p]=args; const n=(p||'example').replace(/^\\.\\//,'').replace(/\\.ts$/i,''); import(new URL(require('node:path').resolve('examples', n + '.ts'), 'file:'));\" --",
88
+ "lint": "cd ../.. && prettier --check packages/core && cd packages/core && eslint .",
89
+ "format": "prettier --write ."
90
+ }
91
+ }
package/README.md DELETED
@@ -1,165 +0,0 @@
1
- <div id="toc" align="center" style="margin-bottom: 0;">
2
- <ul style="list-style: none; margin: 0; padding: 0;">
3
- <a href="https://stagehand.dev">
4
- <picture>
5
- <source media="(prefers-color-scheme: dark)" srcset="media/dark_logo.png" />
6
- <img alt="Stagehand" src="media/light_logo.png" width="200" style="margin-right: 30px;" />
7
- </picture>
8
- </a>
9
- </ul>
10
- </div>
11
- <p align="center">
12
- <strong>The AI Browser Automation Framework</strong><br>
13
- <a href="https://docs.stagehand.dev">Read the Docs</a>
14
- </p>
15
-
16
- <p align="center">
17
- <a href="https://github.com/browserbase/stagehand/tree/main?tab=MIT-1-ov-file#MIT-1-ov-file">
18
- <picture>
19
- <source media="(prefers-color-scheme: dark)" srcset="media/dark_license.svg" />
20
- <img alt="MIT License" src="media/light_license.svg" />
21
- </picture>
22
- </a>
23
- <a href="https://join.slack.com/t/stagehand-dev/shared_invite/zt-38khc8iv5-T2acb50_0OILUaX7lxeBOg">
24
- <picture>
25
- <source media="(prefers-color-scheme: dark)" srcset="media/dark_slack.svg" />
26
- <img alt="Slack Community" src="media/light_slack.svg" />
27
- </picture>
28
- </a>
29
- </p>
30
-
31
- <p align="center">
32
- <a href="https://trendshift.io/repositories/12122" target="_blank"><img src="https://trendshift.io/api/badge/repositories/12122" alt="browserbase%2Fstagehand | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
33
- </p>
34
-
35
- <p align="center">
36
- If you're looking for the Python implementation, you can find it
37
- <a href="https://github.com/browserbase/stagehand-python"> here</a>
38
- </p>
39
-
40
- <div align="center" style="display: flex; align-items: center; justify-content: center; gap: 4px; margin-bottom: 0;">
41
- <b>Vibe code</b>
42
- <span style="font-size: 1.05em;"> Stagehand with </span>
43
- <a href="https://director.ai" style="display: flex; align-items: center;">
44
- <span>Director</span>
45
- </a>
46
- <span> </span>
47
- <picture>
48
- <img alt="Director" src="media/director_icon.svg" width="25" />
49
- </picture>
50
- </div>
51
-
52
- ## Why Stagehand?
53
-
54
- Most existing browser automation tools either require you to write low-level code in a framework like Selenium, Playwright, or Puppeteer, or use high-level agents that can be unpredictable in production. By letting developers choose what to write in code vs. natural language, Stagehand is the natural choice for browser automations in production.
55
-
56
- 1. **Choose when to write code vs. natural language**: use AI when you want to navigate unfamiliar pages, and use code ([Playwright](https://playwright.dev/)) when you know exactly what you want to do.
57
-
58
- 2. **Preview and cache actions**: Stagehand lets you preview AI actions before running them, and also helps you easily cache repeatable actions to save time and tokens.
59
-
60
- 3. **Computer use models with one line of code**: Stagehand lets you integrate SOTA computer use models from OpenAI and Anthropic into the browser with one line of code.
61
-
62
- ## Example
63
-
64
- Here's how to build a sample browser automation with Stagehand:
65
-
66
- <div align="center">
67
- <div style="max-width:300px;">
68
- <img src="/media/github_demo.gif" alt="See Stagehand in Action">
69
- </div>
70
- </div>
71
-
72
- ```typescript
73
- // Use Playwright functions on the page object
74
- const page = stagehand.page;
75
- await page.goto("https://github.com/browserbase");
76
-
77
- // Use act() to execute individual actions
78
- await page.act("click on the stagehand repo");
79
-
80
- // Use Computer Use agents for larger actions
81
- const agent = stagehand.agent({
82
- provider: "openai",
83
- model: "computer-use-preview",
84
- });
85
- await agent.execute("Get to the latest PR");
86
-
87
- // Use extract() to read data from the page
88
- const { author, title } = await page.extract({
89
- instruction: "extract the author and title of the PR",
90
- schema: z.object({
91
- author: z.string().describe("The username of the PR author"),
92
- title: z.string().describe("The title of the PR"),
93
- }),
94
- });
95
- ```
96
-
97
- ## Documentation
98
-
99
- Visit [docs.stagehand.dev](https://docs.stagehand.dev) to view the full documentation.
100
-
101
- ## Getting Started
102
-
103
- Start with Stagehand with one line of code, or check out our [Quickstart Guide](https://docs.stagehand.dev/get_started/quickstart) for more information:
104
-
105
- ```bash
106
- npx create-browser-app
107
- ```
108
-
109
- <div align="center">
110
- <a href="https://www.loom.com/share/f5107f86d8c94fa0a8b4b1e89740f7a7">
111
- <p>Watch Anirudh demo create-browser-app to create a Stagehand project!</p>
112
- </a>
113
- <a href="https://www.loom.com/share/f5107f86d8c94fa0a8b4b1e89740f7a7">
114
- <img style="max-width:300px;" src="https://cdn.loom.com/sessions/thumbnails/f5107f86d8c94fa0a8b4b1e89740f7a7-ec3f428b6775ceeb-full-play.gif">
115
- </a>
116
- </div>
117
-
118
- ### Build and Run from Source
119
-
120
- ```bash
121
- git clone https://github.com/browserbase/stagehand.git
122
- cd stagehand
123
- pnpm install
124
- pnpm playwright install
125
- pnpm run build
126
- pnpm run example # run the blank script at ./examples/example.ts
127
- pnpm run example 2048 # run the 2048 example at ./examples/2048.ts
128
- pnpm run evals -man # see evaluation suite options
129
- ```
130
-
131
- Stagehand is best when you have an API key for an LLM provider and Browserbase credentials. To add these to your project, run:
132
-
133
- ```bash
134
- cp .env.example .env
135
- nano .env # Edit the .env file to add API keys
136
- ```
137
-
138
- ## Contributing
139
-
140
- > [!NOTE]
141
- > We highly value contributions to Stagehand! For questions or support, please join our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-38khc8iv5-T2acb50_0OILUaX7lxeBOg).
142
-
143
- At a high level, we're focused on improving reliability, speed, and cost in that order of priority. If you're interested in contributing, we strongly recommend reaching out to [Miguel Gonzalez](https://x.com/miguel_gonzf) or [Paul Klein](https://x.com/pk_iv) in our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-38khc8iv5-T2acb50_0OILUaX7lxeBOg) before starting to ensure that your contribution aligns with our goals.
144
-
145
- For more information, please see our [Contributing Guide](https://docs.stagehand.dev/examples/contributing).
146
-
147
- ## Acknowledgements
148
-
149
- This project heavily relies on [Playwright](https://playwright.dev/) as a resilient backbone to automate the web. It also would not be possible without the awesome techniques and discoveries made by [tarsier](https://github.com/reworkd/tarsier), [gemini-zod](https://github.com/jbeoris/gemini-zod), and [fuji-web](https://github.com/normal-computing/fuji-web).
150
-
151
- We'd like to thank the following people for their major contributions to Stagehand:
152
- - [Paul Klein](https://github.com/pkiv)
153
- - [Anirudh Kamath](https://github.com/kamath)
154
- - [Sean McGuire](https://github.com/seanmcguire12)
155
- - [Miguel Gonzalez](https://github.com/miguelg719)
156
- - [Sameel Arif](https://github.com/sameelarif)
157
- - [Filip Michalsky](https://github.com/filip-michalsky)
158
- - [Jeremy Press](https://x.com/jeremypress)
159
- - [Navid Pour](https://github.com/navidpour)
160
-
161
- ## License
162
-
163
- Licensed under the MIT License.
164
-
165
- Copyright 2025 Browserbase, Inc.
package/dist/lib/api.d.ts DELETED
@@ -1,23 +0,0 @@
1
- import { z } from "zod/v3";
2
- import { StagehandAPIConstructorParams, StartSessionParams, StartSessionResult } from "./v3/types/api";
3
- import { GotoOptions } from "./v3/types/playwright";
4
- import { ActOptions, ActResult, AgentConfig, ExtractOptions, ExtractResult, ObserveOptions, Action } from "./v3/types/stagehand";
5
- import { AgentExecuteOptions, AgentResult } from "@/lib/v3/types/agent";
6
- export declare class StagehandAPI {
7
- private apiKey;
8
- private projectId;
9
- private sessionId?;
10
- private modelApiKey;
11
- private logger;
12
- private fetchWithCookies;
13
- constructor({ apiKey, projectId, logger }: StagehandAPIConstructorParams);
14
- init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, debugDom, systemPrompt, selfHeal, waitForCaptchaSolves, actionTimeoutMs, browserbaseSessionCreateParams, browserbaseSessionID, }: StartSessionParams): Promise<StartSessionResult>;
15
- act(options: ActOptions | Action): Promise<ActResult>;
16
- extract<T extends z.AnyZodObject>(options: ExtractOptions<T>): Promise<ExtractResult<T>>;
17
- observe(options?: ObserveOptions): Promise<Action[]>;
18
- goto(url: string, options?: GotoOptions): Promise<void>;
19
- agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions): Promise<AgentResult>;
20
- end(): Promise<Response>;
21
- private execute;
22
- private request;
23
- }
@@ -1 +0,0 @@
1
- export declare const scriptContent = "(() => {\n // lib/dom/elementCheckUtils.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // types/stagehandErrors.ts\n var StagehandError = class extends Error {\n constructor(message) {\n super(message);\n this.name = this.constructor.name;\n }\n };\n var StagehandDomProcessError = class extends StagehandError {\n constructor(message) {\n super(`Error Processing Dom: ${message}`);\n }\n };\n\n // lib/dom/utils.ts\n function canElementScroll(elem) {\n if (typeof elem.scrollTo !== \"function\") {\n console.warn(\"canElementScroll: .scrollTo is not a function.\");\n return false;\n }\n try {\n const originalTop = elem.scrollTop;\n elem.scrollTo({\n top: originalTop + 100,\n left: 0,\n behavior: \"instant\"\n });\n if (elem.scrollTop === originalTop) {\n throw new StagehandDomProcessError(\"scrollTop did not change\");\n }\n elem.scrollTo({\n top: originalTop,\n left: 0,\n behavior: \"instant\"\n });\n return true;\n } catch (error) {\n console.warn(\"canElementScroll error:\", error.message || error);\n return false;\n }\n }\n function getNodeFromXpath(xpath) {\n return document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n }\n function waitForElementScrollEnd(element, idleMs = 100) {\n return new Promise((resolve) => {\n let scrollEndTimer;\n const handleScroll = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n element.removeEventListener(\"scroll\", handleScroll);\n resolve();\n }, idleMs);\n };\n element.addEventListener(\"scroll\", handleScroll, { passive: true });\n handleScroll();\n });\n }\n\n // lib/dom/process.ts\n function getScrollableElements(topN) {\n const docEl = document.documentElement;\n const scrollableElements = [docEl];\n const allElements = document.querySelectorAll(\"*\");\n for (const elem of allElements) {\n const style = window.getComputedStyle(elem);\n const overflowY = style.overflowY;\n const isPotentiallyScrollable = overflowY === \"auto\" || overflowY === \"scroll\" || overflowY === \"overlay\";\n if (isPotentiallyScrollable) {\n const candidateScrollDiff = elem.scrollHeight - elem.clientHeight;\n if (candidateScrollDiff > 0 && canElementScroll(elem)) {\n scrollableElements.push(elem);\n }\n }\n }\n scrollableElements.sort((a, b) => b.scrollHeight - a.scrollHeight);\n if (topN !== void 0) {\n return scrollableElements.slice(0, topN);\n }\n return scrollableElements;\n }\n async function getScrollableElementXpaths(topN) {\n const scrollableElems = getScrollableElements(topN);\n const xpaths = [];\n for (const elem of scrollableElems) {\n const allXPaths = await generateXPathsForElement(elem);\n const firstXPath = allXPaths?.[0] || \"\";\n xpaths.push(firstXPath);\n }\n return xpaths;\n }\n (() => {\n const closedRoots = /* @__PURE__ */ new WeakMap();\n const nativeAttachShadow = Element.prototype.attachShadow;\n Element.prototype.attachShadow = function(init) {\n const root = nativeAttachShadow.call(this, init);\n if (init.mode === \"closed\") closedRoots.set(this, root);\n return root;\n };\n const backdoor = {\n getClosedRoot: (host) => closedRoots.get(host),\n queryClosed: (host, selector) => {\n const root = closedRoots.get(host);\n return root ? Array.from(root.querySelectorAll(selector)) : [];\n },\n xpathClosed: (host, xp) => {\n const root = closedRoots.get(host);\n if (!root) return [];\n const it = document.evaluate(\n xp,\n root,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n const out = [];\n for (let i = 0; i < it.snapshotLength; ++i) {\n const n = it.snapshotItem(i);\n if (n) out.push(n);\n }\n return out;\n }\n };\n if (!(\"__stagehand__\" in window)) {\n Object.defineProperty(window, \"__stagehand__\", {\n value: backdoor,\n enumerable: false,\n writable: false,\n configurable: false\n });\n }\n })();\n window.getScrollableElementXpaths = getScrollableElementXpaths;\n window.getNodeFromXpath = getNodeFromXpath;\n window.waitForElementScrollEnd = waitForElementScrollEnd;\n})();\n";
@@ -1,71 +0,0 @@
1
- import { z } from "zod/v3";
2
- import { LogLine } from "./v3/types/log";
3
- import { LLMClient } from "./v3/llm/LLMClient";
4
- /** Simple usage shape if your LLM returns usage tokens. */
5
- interface LLMUsage {
6
- prompt_tokens: number;
7
- completion_tokens: number;
8
- total_tokens: number;
9
- }
10
- /**
11
- * For calls that use a schema: the LLMClient may return { data: T; usage?: LLMUsage }
12
- */
13
- export interface LLMParsedResponse<T> {
14
- data: T;
15
- usage?: LLMUsage;
16
- }
17
- export declare function extract({ instruction, domElements, schema, llmClient, logger, userProvidedInstructions, logInferenceToFile, }: {
18
- instruction: string;
19
- domElements: string;
20
- schema: z.ZodObject<z.ZodRawShape>;
21
- llmClient: LLMClient;
22
- userProvidedInstructions?: string;
23
- logger: (message: LogLine) => void;
24
- logInferenceToFile?: boolean;
25
- }): Promise<{
26
- metadata: {
27
- completed: boolean;
28
- progress: string;
29
- };
30
- prompt_tokens: number;
31
- completion_tokens: number;
32
- inference_time_ms: number;
33
- }>;
34
- export declare function observe({ instruction, domElements, llmClient, userProvidedInstructions, logger, logInferenceToFile, }: {
35
- instruction: string;
36
- domElements: string;
37
- llmClient: LLMClient;
38
- userProvidedInstructions?: string;
39
- logger: (message: LogLine) => void;
40
- logInferenceToFile?: boolean;
41
- }): Promise<{
42
- elements: {
43
- elementId: string;
44
- description: string;
45
- method: string;
46
- arguments: string[];
47
- }[];
48
- prompt_tokens: number;
49
- completion_tokens: number;
50
- inference_time_ms: number;
51
- }>;
52
- export declare function act({ instruction, domElements, llmClient, userProvidedInstructions, logger, logInferenceToFile, }: {
53
- instruction: string;
54
- domElements: string;
55
- llmClient: LLMClient;
56
- userProvidedInstructions?: string;
57
- logger: (message: LogLine) => void;
58
- logInferenceToFile?: boolean;
59
- }): Promise<{
60
- element: {
61
- elementId: string;
62
- description: string;
63
- method: string;
64
- arguments: string[];
65
- };
66
- prompt_tokens: number;
67
- completion_tokens: number;
68
- inference_time_ms: number;
69
- twoStep: boolean;
70
- }>;
71
- export {};
@@ -1,12 +0,0 @@
1
- /**
2
- * Appends a new entry to the act_summary.json file, then writes the file back out.
3
- */
4
- export declare function appendSummary<T>(inferenceType: string, entry: T): void;
5
- /**
6
- * Writes `data` as JSON into a file in `directory`, using a prefix plus timestamp.
7
- * Returns both the file name and the timestamp used, so you can log them.
8
- */
9
- export declare function writeTimestampedTxtFile(directory: string, prefix: string, data: unknown): {
10
- fileName: string;
11
- timestamp: string;
12
- };
@@ -1,54 +0,0 @@
1
- import pino from "pino";
2
- import { LogLine } from "./v3/types/log";
3
- export interface LoggerOptions {
4
- pretty?: boolean;
5
- level?: pino.Level;
6
- destination?: pino.DestinationStream;
7
- usePino?: boolean;
8
- }
9
- /**
10
- * Creates a configured Pino logger instance
11
- */
12
- export declare function createLogger(options?: LoggerOptions): import("pino/pino").Logger<never, boolean>;
13
- /**
14
- * StagehandLogger class that wraps Pino for our specific needs
15
- */
16
- export declare class StagehandLogger {
17
- /**
18
- * We maintain a single shared Pino instance when `usePino` is enabled.
19
- * This prevents spawning a new worker thread for every Stagehand instance
20
- * (which happens when `pino-pretty` transport is used), eliminating the
21
- * memory/RSS growth observed when many Stagehand objects are created and
22
- * disposed within the same process (e.g. a request-per-instance API).
23
- */
24
- private static sharedPinoLogger;
25
- private logger?;
26
- private verbose;
27
- private externalLogger?;
28
- private usePino;
29
- private isTest;
30
- constructor(options?: LoggerOptions, externalLogger?: (logLine: LogLine) => void);
31
- /**
32
- * Set the verbosity level
33
- */
34
- setVerbosity(level: 0 | 1 | 2): void;
35
- /**
36
- * Log a message using our LogLine format
37
- */
38
- log(logLine: LogLine): void;
39
- /**
40
- * Helper to format auxiliary data for structured logging
41
- */
42
- private formatAuxiliaryData;
43
- /**
44
- * Convenience methods for different log levels
45
- */
46
- error(message: string, data?: Record<string, unknown>): void;
47
- warn(message: string, data?: Record<string, unknown>): void;
48
- info(message: string, data?: Record<string, unknown>): void;
49
- debug(message: string, data?: Record<string, unknown>): void;
50
- /**
51
- * Convert a plain object to our auxiliary format
52
- */
53
- private convertToAuxiliary;
54
- }
@@ -1,12 +0,0 @@
1
- import { ChatMessage } from "./v3/llm/LLMClient";
2
- export declare function buildUserInstructionsString(userProvidedInstructions?: string): string;
3
- export declare function buildExtractSystemPrompt(isUsingPrintExtractedDataTool?: boolean, userProvidedInstructions?: string): ChatMessage;
4
- export declare function buildExtractUserPrompt(instruction: string, domElements: string, isUsingPrintExtractedDataTool?: boolean): ChatMessage;
5
- export declare function buildMetadataSystemPrompt(): ChatMessage;
6
- export declare function buildMetadataPrompt(instruction: string, extractionResponse: object): ChatMessage;
7
- export declare function buildObserveSystemPrompt(userProvidedInstructions?: string): ChatMessage;
8
- export declare function buildObserveUserMessage(instruction: string, domElements: string): ChatMessage;
9
- export declare function buildActSystemPrompt(userProvidedInstructions?: string): ChatMessage;
10
- export declare function buildActPrompt(action: string, supportedActions: string[], variables?: Record<string, string>): string;
11
- export declare function buildStepTwoPrompt(originalUserAction: string, previousAction: string, supportedActions: string[], variables?: Record<string, string>): string;
12
- export declare function buildOperatorSystemPrompt(goal: string): ChatMessage;
@@ -1,65 +0,0 @@
1
- import { Schema } from "@google/genai";
2
- import { z, ZodTypeAny } from "zod/v3";
3
- import { LogLine } from "./v3/types/log";
4
- import { ModelProvider } from "./v3/types/model";
5
- import { ZodPathSegments } from "./v3/types/stagehand";
6
- export declare function validateZodSchema(schema: z.ZodTypeAny, data: unknown): boolean;
7
- /**
8
- * Detects if the code is running in the Bun runtime environment.
9
- * @returns {boolean} True if running in Bun, false otherwise.
10
- */
11
- export declare function isRunningInBun(): boolean;
12
- export declare function toGeminiSchema(zodSchema: z.ZodTypeAny): Schema;
13
- export declare function getZodType(schema: z.ZodTypeAny): string;
14
- /**
15
- * Recursively traverses a given Zod schema, scanning for any fields of type `z.string().url()`.
16
- * For each such field, it replaces the `z.string().url()` with `z.number()`.
17
- *
18
- * This function is used internally by higher-level utilities (e.g., transforming entire object schemas)
19
- * and handles nested objects, arrays, unions, intersections, optionals.
20
- *
21
- * @param schema - The Zod schema to transform.
22
- * @param currentPath - An array of string/number keys representing the current schema path (used internally for recursion).
23
- * @returns A two-element tuple:
24
- * 1. The updated Zod schema, with any `.url()` fields replaced by `z.number()`.
25
- * 2. An array of {@link ZodPathSegments} objects representing each replaced field, including the path segments.
26
- */
27
- export declare function transformSchema(schema: z.ZodTypeAny, currentPath: Array<string | number>): [z.ZodTypeAny, ZodPathSegments[]];
28
- /**
29
- * Once we get the final extracted object that has numeric IDs in place of URLs,
30
- * use `injectUrls` to walk the object and replace numeric IDs
31
- * with the real URL strings from idToUrlMapping. The `path` may include `*`
32
- * for array indices (indicating "all items in the array").
33
- */
34
- export declare function injectUrls(obj: unknown, path: Array<string | number>, idToUrlMapping: Record<string, string>): void;
35
- /**
36
- * Mapping from LLM provider names to their corresponding environment variable names for API keys.
37
- */
38
- export declare const providerEnvVarMap: Partial<Record<ModelProvider | string, string>>;
39
- /**
40
- * Loads an API key for a provider, checking environment variables.
41
- * @param provider The name of the provider (e.g., 'openai', 'anthropic')
42
- * @param logger Optional logger for info/error messages
43
- * @returns The API key if found, undefined otherwise
44
- */
45
- export declare function loadApiKeyFromEnv(provider: string | undefined, logger: (logLine: LogLine) => void): string | undefined;
46
- export declare function trimTrailingTextNode(path: string | undefined): string | undefined;
47
- export interface JsonSchemaProperty {
48
- type: string;
49
- enum?: unknown[];
50
- items?: JsonSchemaProperty;
51
- properties?: Record<string, JsonSchemaProperty>;
52
- required?: string[];
53
- minimum?: number;
54
- maximum?: number;
55
- description?: string;
56
- }
57
- export interface JsonSchema extends JsonSchemaProperty {
58
- type: string;
59
- }
60
- /**
61
- * Converts a JSON Schema object to a Zod schema
62
- * @param schema The JSON Schema object to convert
63
- * @returns A Zod schema equivalent to the input JSON Schema
64
- */
65
- export declare function jsonSchemaToZod(schema: JsonSchema): ZodTypeAny;
@@ -1,18 +0,0 @@
1
- import { AgentAction, AgentResult, AgentType, AgentExecutionOptions } from "../types/agent";
2
- /**
3
- * Abstract base class for agent clients
4
- * This provides a common interface for all agent implementations
5
- */
6
- export declare abstract class AgentClient {
7
- type: AgentType;
8
- modelName: string;
9
- clientOptions: Record<string, unknown>;
10
- userProvidedInstructions?: string;
11
- constructor(type: AgentType, modelName: string, userProvidedInstructions?: string);
12
- abstract execute(options: AgentExecutionOptions): Promise<AgentResult>;
13
- abstract captureScreenshot(options?: Record<string, unknown>): Promise<unknown>;
14
- abstract setViewport(width: number, height: number): void;
15
- abstract setCurrentUrl(url: string): void;
16
- abstract setScreenshotProvider(provider: () => Promise<string>): void;
17
- abstract setActionHandler(handler: (action: AgentAction) => Promise<void>): void;
18
- }
@@ -1,18 +0,0 @@
1
- import { AgentType } from "../types/agent";
2
- import { LogLine } from "../types/log";
3
- import { ToolSet } from "ai/dist";
4
- import { AgentClient } from "./AgentClient";
5
- /**
6
- * Provider for agent clients
7
- * This class is responsible for creating the appropriate agent client
8
- * based on the provider type
9
- */
10
- export declare class AgentProvider {
11
- private logger;
12
- /**
13
- * Create a new agent provider
14
- */
15
- constructor(logger: (message: LogLine) => void);
16
- getClient(modelName: string, clientOptions?: Record<string, unknown>, userProvidedInstructions?: string, tools?: ToolSet): AgentClient;
17
- static getAgentProvider(modelName: string): AgentType;
18
- }