@browserbasehq/stagehand 2.5.3-alpha-4994eabfa8016347ec001b40ceadb0c905403470 → 4.0.0-alpha-49bc5b68e4fbeb28a120b301b51c31a36db7d922

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/dist/index.d.ts +1367 -751
  2. package/dist/index.js +37087 -22062
  3. package/package.json +54 -78
  4. package/README.md +0 -165
  5. package/dist/evals/args.d.ts +0 -15
  6. package/dist/evals/cli.d.ts +0 -2
  7. package/dist/evals/core/summary.d.ts +0 -2
  8. package/dist/evals/datasets/osworld/adapter.d.ts +0 -22
  9. package/dist/evals/datasets/osworld/index.d.ts +0 -9
  10. package/dist/evals/datasets/osworld/types.d.ts +0 -54
  11. package/dist/evals/deterministic/bb.playwright.config.d.ts +0 -5
  12. package/dist/evals/deterministic/e2e.playwright.config.d.ts +0 -5
  13. package/dist/evals/deterministic/local.playwright.config.d.ts +0 -5
  14. package/dist/evals/deterministic/stagehand.config.d.ts +0 -3
  15. package/dist/evals/deterministic/tests/BrowserContext/addInitScript.test.d.ts +0 -1
  16. package/dist/evals/deterministic/tests/BrowserContext/cookies.test.d.ts +0 -1
  17. package/dist/evals/deterministic/tests/BrowserContext/multiPage.test.d.ts +0 -1
  18. package/dist/evals/deterministic/tests/BrowserContext/page.test.d.ts +0 -1
  19. package/dist/evals/deterministic/tests/BrowserContext/routing.test.d.ts +0 -1
  20. package/dist/evals/deterministic/tests/Errors/apiKeyError.test.d.ts +0 -1
  21. package/dist/evals/deterministic/tests/browserbase/contexts.test.d.ts +0 -1
  22. package/dist/evals/deterministic/tests/browserbase/downloads.test.d.ts +0 -1
  23. package/dist/evals/deterministic/tests/browserbase/sessions.test.d.ts +0 -1
  24. package/dist/evals/deterministic/tests/browserbase/uploads.test.d.ts +0 -1
  25. package/dist/evals/deterministic/tests/local/create.test.d.ts +0 -1
  26. package/dist/evals/deterministic/tests/local/downloads.test.d.ts +0 -1
  27. package/dist/evals/deterministic/tests/page/addInitScript.test.d.ts +0 -1
  28. package/dist/evals/deterministic/tests/page/addRemoveLocatorHandler.test.d.ts +0 -1
  29. package/dist/evals/deterministic/tests/page/addTags.test.d.ts +0 -1
  30. package/dist/evals/deterministic/tests/page/bringToFront.test.d.ts +0 -1
  31. package/dist/evals/deterministic/tests/page/content.test.d.ts +0 -1
  32. package/dist/evals/deterministic/tests/page/evaluate.test.d.ts +0 -1
  33. package/dist/evals/deterministic/tests/page/expose.test.d.ts +0 -1
  34. package/dist/evals/deterministic/tests/page/frames.test.d.ts +0 -1
  35. package/dist/evals/deterministic/tests/page/getBy.test.d.ts +0 -1
  36. package/dist/evals/deterministic/tests/page/livePageProxy.test.d.ts +0 -1
  37. package/dist/evals/deterministic/tests/page/navigation.test.d.ts +0 -1
  38. package/dist/evals/deterministic/tests/page/on.test.d.ts +0 -1
  39. package/dist/evals/deterministic/tests/page/pageContext.test.d.ts +0 -1
  40. package/dist/evals/deterministic/tests/page/reload.test.d.ts +0 -1
  41. package/dist/evals/deterministic/tests/page/waitFor.test.d.ts +0 -1
  42. package/dist/evals/env.d.ts +0 -13
  43. package/dist/evals/evaluator.d.ts +0 -29
  44. package/dist/evals/index.eval.d.ts +0 -1
  45. package/dist/evals/initStagehand.d.ts +0 -37
  46. package/dist/evals/llm_clients/hn_aisdk.d.ts +0 -2
  47. package/dist/evals/llm_clients/hn_customOpenAI.d.ts +0 -2
  48. package/dist/evals/llm_clients/hn_langchain.d.ts +0 -2
  49. package/dist/evals/logger.d.ts +0 -50
  50. package/dist/evals/scoring.d.ts +0 -25
  51. package/dist/evals/suites/gaia.d.ts +0 -2
  52. package/dist/evals/suites/onlineMind2Web.d.ts +0 -2
  53. package/dist/evals/suites/osworld.d.ts +0 -2
  54. package/dist/evals/suites/webbench.d.ts +0 -2
  55. package/dist/evals/suites/webvoyager.d.ts +0 -2
  56. package/dist/evals/taskConfig.d.ts +0 -33
  57. package/dist/evals/tasks/agent/all_recipes.d.ts +0 -2
  58. package/dist/evals/tasks/agent/apple_trade_in.d.ts +0 -2
  59. package/dist/evals/tasks/agent/apple_tv.d.ts +0 -2
  60. package/dist/evals/tasks/agent/arxiv_gpt_report.d.ts +0 -2
  61. package/dist/evals/tasks/agent/gaia.d.ts +0 -9
  62. package/dist/evals/tasks/agent/github.d.ts +0 -2
  63. package/dist/evals/tasks/agent/github_react_version.d.ts +0 -2
  64. package/dist/evals/tasks/agent/google_flights.d.ts +0 -2
  65. package/dist/evals/tasks/agent/google_maps.d.ts +0 -2
  66. package/dist/evals/tasks/agent/google_maps_2.d.ts +0 -2
  67. package/dist/evals/tasks/agent/google_maps_3.d.ts +0 -2
  68. package/dist/evals/tasks/agent/google_shopping.d.ts +0 -2
  69. package/dist/evals/tasks/agent/hotel_booking.d.ts +0 -2
  70. package/dist/evals/tasks/agent/hugging_face.d.ts +0 -2
  71. package/dist/evals/tasks/agent/iframe_form.d.ts +0 -2
  72. package/dist/evals/tasks/agent/iframe_form_multiple.d.ts +0 -2
  73. package/dist/evals/tasks/agent/kayak.d.ts +0 -2
  74. package/dist/evals/tasks/agent/kith.d.ts +0 -2
  75. package/dist/evals/tasks/agent/nba_trades.d.ts +0 -2
  76. package/dist/evals/tasks/agent/onlineMind2Web.d.ts +0 -2
  77. package/dist/evals/tasks/agent/osworld.d.ts +0 -2
  78. package/dist/evals/tasks/agent/sf_library_card.d.ts +0 -2
  79. package/dist/evals/tasks/agent/sf_library_card_multiple.d.ts +0 -2
  80. package/dist/evals/tasks/agent/sign_in.d.ts +0 -2
  81. package/dist/evals/tasks/agent/steam_games.d.ts +0 -2
  82. package/dist/evals/tasks/agent/trivago.d.ts +0 -2
  83. package/dist/evals/tasks/agent/ubereats.d.ts +0 -2
  84. package/dist/evals/tasks/agent/webbench.d.ts +0 -2
  85. package/dist/evals/tasks/agent/webvoyager.d.ts +0 -2
  86. package/dist/evals/tasks/agent/youtube.d.ts +0 -2
  87. package/dist/evals/tasks/allrecipes.d.ts +0 -2
  88. package/dist/evals/tasks/amazon_add_to_cart.d.ts +0 -2
  89. package/dist/evals/tasks/apple.d.ts +0 -2
  90. package/dist/evals/tasks/arxiv.d.ts +0 -2
  91. package/dist/evals/tasks/bidnet.d.ts +0 -2
  92. package/dist/evals/tasks/checkboxes.d.ts +0 -2
  93. package/dist/evals/tasks/combination_sauce.d.ts +0 -2
  94. package/dist/evals/tasks/costar.d.ts +0 -2
  95. package/dist/evals/tasks/csr_in_oopif.d.ts +0 -2
  96. package/dist/evals/tasks/csr_in_spif.d.ts +0 -2
  97. package/dist/evals/tasks/custom_dropdown.d.ts +0 -2
  98. package/dist/evals/tasks/dropdown.d.ts +0 -2
  99. package/dist/evals/tasks/expect_act_timeout.d.ts +0 -2
  100. package/dist/evals/tasks/extract_aigrant_companies.d.ts +0 -2
  101. package/dist/evals/tasks/extract_aigrant_targeted.d.ts +0 -2
  102. package/dist/evals/tasks/extract_aigrant_targeted_2.d.ts +0 -2
  103. package/dist/evals/tasks/extract_apartments.d.ts +0 -2
  104. package/dist/evals/tasks/extract_area_codes.d.ts +0 -2
  105. package/dist/evals/tasks/extract_baptist_health.d.ts +0 -2
  106. package/dist/evals/tasks/extract_capacitor_info.d.ts +0 -2
  107. package/dist/evals/tasks/extract_collaborators.d.ts +0 -2
  108. package/dist/evals/tasks/extract_csa.d.ts +0 -2
  109. package/dist/evals/tasks/extract_geniusee.d.ts +0 -2
  110. package/dist/evals/tasks/extract_geniusee_2.d.ts +0 -2
  111. package/dist/evals/tasks/extract_github_commits.d.ts +0 -2
  112. package/dist/evals/tasks/extract_github_stars.d.ts +0 -2
  113. package/dist/evals/tasks/extract_hamilton_weather.d.ts +0 -2
  114. package/dist/evals/tasks/extract_jfk_links.d.ts +0 -2
  115. package/dist/evals/tasks/extract_jstor_news.d.ts +0 -2
  116. package/dist/evals/tasks/extract_memorial_healthcare.d.ts +0 -2
  117. package/dist/evals/tasks/extract_nhl_stats.d.ts +0 -2
  118. package/dist/evals/tasks/extract_partners.d.ts +0 -2
  119. package/dist/evals/tasks/extract_press_releases.d.ts +0 -2
  120. package/dist/evals/tasks/extract_professional_info.d.ts +0 -2
  121. package/dist/evals/tasks/extract_public_notices.d.ts +0 -2
  122. package/dist/evals/tasks/extract_recipe.d.ts +0 -2
  123. package/dist/evals/tasks/extract_regulations_table.d.ts +0 -2
  124. package/dist/evals/tasks/extract_repo_name.d.ts +0 -2
  125. package/dist/evals/tasks/extract_resistor_info.d.ts +0 -2
  126. package/dist/evals/tasks/extract_rockauto.d.ts +0 -2
  127. package/dist/evals/tasks/extract_single_link.d.ts +0 -2
  128. package/dist/evals/tasks/extract_snowshoeing_destinations.d.ts +0 -2
  129. package/dist/evals/tasks/extract_staff_members.d.ts +0 -2
  130. package/dist/evals/tasks/extract_zillow.d.ts +0 -2
  131. package/dist/evals/tasks/google_flights.d.ts +0 -11
  132. package/dist/evals/tasks/heal_custom_dropdown.d.ts +0 -2
  133. package/dist/evals/tasks/heal_scroll_50.d.ts +0 -2
  134. package/dist/evals/tasks/heal_simple_google_search.d.ts +0 -2
  135. package/dist/evals/tasks/hidden_input_dropdown.d.ts +0 -2
  136. package/dist/evals/tasks/history.d.ts +0 -2
  137. package/dist/evals/tasks/homedepot.d.ts +0 -2
  138. package/dist/evals/tasks/iframe_form_filling.d.ts +0 -2
  139. package/dist/evals/tasks/iframe_hn.d.ts +0 -2
  140. package/dist/evals/tasks/iframe_same_proc.d.ts +0 -2
  141. package/dist/evals/tasks/iframe_scroll.d.ts +0 -2
  142. package/dist/evals/tasks/iframes_nested.d.ts +0 -2
  143. package/dist/evals/tasks/imdb_movie_details.d.ts +0 -2
  144. package/dist/evals/tasks/instructions.d.ts +0 -2
  145. package/dist/evals/tasks/ionwave.d.ts +0 -2
  146. package/dist/evals/tasks/ionwave_observe.d.ts +0 -2
  147. package/dist/evals/tasks/login.d.ts +0 -2
  148. package/dist/evals/tasks/multi_tab.d.ts +0 -2
  149. package/dist/evals/tasks/namespace_xpath.d.ts +0 -2
  150. package/dist/evals/tasks/nested_iframes_2.d.ts +0 -2
  151. package/dist/evals/tasks/nextChunk.d.ts +0 -2
  152. package/dist/evals/tasks/no_js_click.d.ts +0 -2
  153. package/dist/evals/tasks/nonsense_action.d.ts +0 -2
  154. package/dist/evals/tasks/observe_amazon_add_to_cart.d.ts +0 -2
  155. package/dist/evals/tasks/observe_github.d.ts +0 -2
  156. package/dist/evals/tasks/observe_iframes1.d.ts +0 -2
  157. package/dist/evals/tasks/observe_iframes2.d.ts +0 -2
  158. package/dist/evals/tasks/observe_simple_google_search.d.ts +0 -2
  159. package/dist/evals/tasks/observe_taxes.d.ts +0 -2
  160. package/dist/evals/tasks/observe_vantechjournal.d.ts +0 -2
  161. package/dist/evals/tasks/observe_yc_startup.d.ts +0 -2
  162. package/dist/evals/tasks/oopif_in_csr.d.ts +0 -2
  163. package/dist/evals/tasks/oopif_in_osr.d.ts +0 -2
  164. package/dist/evals/tasks/os_dropdown.d.ts +0 -2
  165. package/dist/evals/tasks/osr_in_oopif.d.ts +0 -2
  166. package/dist/evals/tasks/osr_in_spif.d.ts +0 -2
  167. package/dist/evals/tasks/panamcs.d.ts +0 -2
  168. package/dist/evals/tasks/peeler_complex.d.ts +0 -2
  169. package/dist/evals/tasks/peeler_simple.d.ts +0 -2
  170. package/dist/evals/tasks/prevChunk.d.ts +0 -2
  171. package/dist/evals/tasks/radio_btn.d.ts +0 -2
  172. package/dist/evals/tasks/rakuten_jp.d.ts +0 -2
  173. package/dist/evals/tasks/sciquest.d.ts +0 -2
  174. package/dist/evals/tasks/screenshot_cdp_toggle.d.ts +0 -9
  175. package/dist/evals/tasks/scroll_50.d.ts +0 -2
  176. package/dist/evals/tasks/scroll_75.d.ts +0 -2
  177. package/dist/evals/tasks/shadow_dom.d.ts +0 -2
  178. package/dist/evals/tasks/simple_google_search.d.ts +0 -2
  179. package/dist/evals/tasks/spif_in_csr.d.ts +0 -2
  180. package/dist/evals/tasks/spif_in_osr.d.ts +0 -2
  181. package/dist/evals/tasks/stock_x.d.ts +0 -2
  182. package/dist/evals/tasks/tab_handling.d.ts +0 -2
  183. package/dist/evals/tasks/ted_talk.d.ts +0 -2
  184. package/dist/evals/tasks/vanta_h.d.ts +0 -2
  185. package/dist/evals/tasks/vantechjournal.d.ts +0 -2
  186. package/dist/evals/tasks/wichita.d.ts +0 -2
  187. package/dist/evals/tasks/wikipedia.d.ts +0 -2
  188. package/dist/evals/utils/ScreenshotCollector.d.ts +0 -34
  189. package/dist/evals/utils/imageUtils.d.ts +0 -1
  190. package/dist/evals/utils.d.ts +0 -61
  191. package/dist/examples/2048.d.ts +0 -1
  192. package/dist/examples/actionable_observe_example.d.ts +0 -14
  193. package/dist/examples/cua-example.d.ts +0 -1
  194. package/dist/examples/custom_client_aisdk.d.ts +0 -1
  195. package/dist/examples/custom_client_langchain.d.ts +0 -1
  196. package/dist/examples/custom_client_openai.d.ts +0 -1
  197. package/dist/examples/example.d.ts +0 -1
  198. package/dist/examples/external_clients/aisdk.d.ts +0 -20
  199. package/dist/examples/external_clients/customOpenAI.d.ts +0 -18
  200. package/dist/examples/external_clients/langchain.d.ts +0 -9
  201. package/dist/examples/form_filling_sensible.d.ts +0 -1
  202. package/dist/examples/google_enter.d.ts +0 -6
  203. package/dist/examples/instructions.d.ts +0 -1
  204. package/dist/examples/integrations/exa.d.ts +0 -1
  205. package/dist/examples/integrations/supabase.d.ts +0 -1
  206. package/dist/examples/operator-example.d.ts +0 -8
  207. package/dist/examples/parameterizeApiKey.d.ts +0 -1
  208. package/dist/examples/popup.d.ts +0 -6
  209. package/dist/examples/wordle.d.ts +0 -1
  210. package/dist/lib/StagehandContext.d.ts +0 -25
  211. package/dist/lib/StagehandPage.d.ts +0 -103
  212. package/dist/lib/a11y/utils.d.ts +0 -144
  213. package/dist/lib/agent/AgentClient.d.ts +0 -20
  214. package/dist/lib/agent/AgentProvider.d.ts +0 -19
  215. package/dist/lib/agent/AnthropicCUAClient.d.ts +0 -56
  216. package/dist/lib/agent/GoogleCUAClient.d.ts +0 -63
  217. package/dist/lib/agent/OpenAICUAClient.d.ts +0 -65
  218. package/dist/lib/agent/StagehandAgent.d.ts +0 -15
  219. package/dist/lib/agent/tools/act.d.ts +0 -59
  220. package/dist/lib/agent/tools/ariaTree.d.ts +0 -11
  221. package/dist/lib/agent/tools/close.d.ts +0 -22
  222. package/dist/lib/agent/tools/extract.d.ts +0 -38
  223. package/dist/lib/agent/tools/fillform.d.ts +0 -37
  224. package/dist/lib/agent/tools/goto.d.ts +0 -29
  225. package/dist/lib/agent/tools/index.d.ts +0 -257
  226. package/dist/lib/agent/tools/navback.d.ts +0 -17
  227. package/dist/lib/agent/tools/screenshot.d.ts +0 -13
  228. package/dist/lib/agent/tools/scroll.d.ts +0 -23
  229. package/dist/lib/agent/tools/wait.d.ts +0 -18
  230. package/dist/lib/agent/utils/cuaKeyMapping.d.ts +0 -10
  231. package/dist/lib/agent/utils/imageCompression.d.ts +0 -53
  232. package/dist/lib/agent/utils/messageProcessing.d.ts +0 -13
  233. package/dist/lib/api.d.ts +0 -23
  234. package/dist/lib/browserbaseDefaults.d.ts +0 -9
  235. package/dist/lib/cache/ActionCache.d.ts +0 -62
  236. package/dist/lib/cache/BaseCache.d.ts +0 -66
  237. package/dist/lib/cache/LLMCache.d.ts +0 -22
  238. package/dist/lib/cache.d.ts +0 -29
  239. package/dist/lib/dom/build/scriptContent.d.ts +0 -1
  240. package/dist/lib/dom/elementCheckUtils.d.ts +0 -2
  241. package/dist/lib/dom/genDomScripts.d.ts +0 -1
  242. package/dist/lib/dom/index.d.ts +0 -2
  243. package/dist/lib/dom/process.d.ts +0 -17
  244. package/dist/lib/dom/utils.d.ts +0 -7
  245. package/dist/lib/dom/xpathUtils.d.ts +0 -14
  246. package/dist/lib/handlers/actHandler.d.ts +0 -33
  247. package/dist/lib/handlers/cuaAgentHandler.d.ts +0 -58
  248. package/dist/lib/handlers/extractHandler.d.ts +0 -54
  249. package/dist/lib/handlers/handlerUtils/actHandlerUtils.d.ts +0 -21
  250. package/dist/lib/handlers/observeHandler.d.ts +0 -40
  251. package/dist/lib/handlers/stagehandAgentHandler.d.ts +0 -27
  252. package/dist/lib/index.d.ts +0 -94
  253. package/dist/lib/inference.d.ts +0 -61
  254. package/dist/lib/inferenceLogUtils.d.ts +0 -12
  255. package/dist/lib/llm/AnthropicClient.d.ts +0 -21
  256. package/dist/lib/llm/CerebrasClient.d.ts +0 -22
  257. package/dist/lib/llm/GoogleClient.d.ts +0 -24
  258. package/dist/lib/llm/GroqClient.d.ts +0 -22
  259. package/dist/lib/llm/LLMClient.d.ts +0 -99
  260. package/dist/lib/llm/LLMProvider.d.ts +0 -13
  261. package/dist/lib/llm/OpenAIClient.d.ts +0 -20
  262. package/dist/lib/llm/aisdk.d.ts +0 -20
  263. package/dist/lib/logger.d.ts +0 -54
  264. package/dist/lib/mcp/connection.d.ts +0 -11
  265. package/dist/lib/mcp/utils.d.ts +0 -3
  266. package/dist/lib/prompt.d.ts +0 -14
  267. package/dist/lib/utils.d.ts +0 -68
  268. package/dist/lib/version.d.ts +0 -5
  269. package/dist/scripts/gen-version.d.ts +0 -1
  270. package/dist/scripts/run-evals.d.ts +0 -1
  271. package/dist/stagehand.config.d.ts +0 -3
  272. package/dist/types/act.d.ts +0 -50
  273. package/dist/types/agent.d.ts +0 -144
  274. package/dist/types/api.d.ts +0 -40
  275. package/dist/types/browser.d.ts +0 -10
  276. package/dist/types/context.d.ts +0 -117
  277. package/dist/types/evals.d.ts +0 -95
  278. package/dist/types/evaluator.d.ts +0 -40
  279. package/dist/types/llm.d.ts +0 -11
  280. package/dist/types/log.d.ts +0 -23
  281. package/dist/types/model.d.ts +0 -17
  282. package/dist/types/page.d.ts +0 -38
  283. package/dist/types/playwright.d.ts +0 -12
  284. package/dist/types/stagehand.d.ts +0 -330
  285. package/dist/types/stagehandApiErrors.d.ts +0 -18
  286. package/dist/types/stagehandErrors.d.ts +0 -104
@@ -1,37 +0,0 @@
1
- /**
2
- * This file provides a function to initialize a Stagehand instance for use in evaluations.
3
- * It configures the Stagehand environment and sets default options based on the current environment
4
- * (e.g., local or BROWSERBASE), caching preferences, and verbosity. It also establishes a logger for
5
- * capturing logs emitted by Stagehand.
6
- *
7
- * We create a central config object (`StagehandConfig`) that defines all parameters for Stagehand.
8
- *
9
- * The `initStagehand` function takes the model name, an optional DOM settling timeout, and an EvalLogger,
10
- * then uses these to override some default values before creating and initializing the Stagehand instance.
11
- */
12
- import { ConstructorParams, LLMClient } from "@browserbasehq/stagehand";
13
- import { EvalLogger } from "./logger";
14
- import type { StagehandInitResult } from "@/types/evals";
15
- import { AvailableModel } from "@browserbasehq/stagehand";
16
- /**
17
- * Initializes a Stagehand instance for a given model:
18
- * - modelName: The model to use (overrides default in StagehandConfig)
19
- * - domSettleTimeoutMs: Optional timeout for DOM settling operations
20
- * - logger: An EvalLogger instance for capturing logs
21
- *
22
- * Returns:
23
- * - stagehand: The initialized Stagehand instance
24
- * - logger: The provided logger, associated with the Stagehand instance
25
- * - initResponse: Any response data returned by Stagehand initialization
26
- */
27
- export declare const initStagehand: ({ llmClient, modelClientOptions, domSettleTimeoutMs, logger, configOverrides, actTimeoutMs, modelName, }: {
28
- llmClient?: LLMClient;
29
- modelClientOptions?: {
30
- apiKey: string;
31
- };
32
- domSettleTimeoutMs?: number;
33
- logger: EvalLogger;
34
- configOverrides?: Partial<ConstructorParams>;
35
- actTimeoutMs?: number;
36
- modelName: AvailableModel;
37
- }) => Promise<StagehandInitResult>;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const hn_aisdk: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const hn_customOpenAI: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const hn_langchain: EvalFunction;
@@ -1,50 +0,0 @@
1
- import { LogLineEval } from "@/types/evals";
2
- import { Stagehand, LogLine } from "@browserbasehq/stagehand";
3
- /**
4
- * EvalLogger:
5
- * A logger class used during evaluations to capture and print log lines.
6
- *
7
- * Capabilities:
8
- * - Maintains an internal array of log lines (EvalLogger.logs) for later retrieval.
9
- * - Can be initialized with a Stagehand instance to provide consistent logging.
10
- * - Supports logging at different levels (info, error, warn).
11
- * - Each log line is converted to a string and printed to console for immediate feedback.
12
- * - Also keeps a structured version of the logs that can be returned for analysis or
13
- * included in evaluation output.
14
- */
15
- export declare class EvalLogger {
16
- private logs;
17
- stagehand?: Stagehand;
18
- constructor();
19
- /**
20
- * init:
21
- * Associates this logger with a given Stagehand instance.
22
- * This allows the logger to provide additional context if needed.
23
- */
24
- init(stagehand: Stagehand): void;
25
- /**
26
- * log:
27
- * Logs a message at the default (info) level.
28
- * Uses `logLineToString` to produce a readable output on the console,
29
- * and then stores the parsed log line in `this.logs`.
30
- */
31
- log(logLine: LogLine): void;
32
- /**
33
- * error:
34
- * Logs an error message with `console.error` and stores it.
35
- * Useful for capturing and differentiating error-level logs.
36
- */
37
- error(logLine: LogLine): void;
38
- /**
39
- * warn:
40
- * Logs a warning message with `console.warn` and stores it.
41
- * Helps differentiate warnings from regular info logs.
42
- */
43
- warn(logLine: LogLine): void;
44
- /**
45
- * getLogs:
46
- * Retrieves the array of stored log lines.
47
- * Useful for returning logs after a task completes, for analysis or debugging.
48
- */
49
- getLogs(): LogLineEval[];
50
- }
@@ -1,25 +0,0 @@
1
- /**
2
- * This file implements scoring functions needed by braintrust.
3
- */
4
- import { EvalArgs, EvalInput, EvalResult } from "@/types/evals";
5
- /**
6
- * Scoring function: exactMatch
7
- * Given the arguments (including input, output, and expected result),
8
- * this returns a score of 1 if the result matches the expectation, and 0 otherwise.
9
- *
10
- * If "expected" is true, it checks if the output indicates success.
11
- * If "expected" is a boolean or an object with _success flag,
12
- * it checks if output is exactly that success condition.
13
- */
14
- export declare function exactMatch(args: EvalArgs<EvalInput, boolean | {
15
- _success: boolean;
16
- }, unknown>): EvalResult;
17
- /**
18
- * Scoring function: errorMatch
19
- * Determines if an error occurred in the task.
20
- * Scores 1 if an error is found, otherwise 0.
21
- */
22
- export declare function errorMatch(args: EvalArgs<EvalInput, boolean | {
23
- _success: boolean;
24
- error?: unknown;
25
- }, unknown>): EvalResult;
@@ -1,2 +0,0 @@
1
- import type { Testcase } from "@/types/evals";
2
- export declare const buildGAIATestcases: (models: string[]) => Testcase[];
@@ -1,2 +0,0 @@
1
- import type { Testcase } from "@/types/evals";
2
- export declare const buildOnlineMind2WebTestcases: (models: string[]) => Testcase[];
@@ -1,2 +0,0 @@
1
- import type { Testcase } from "@/types/evals";
2
- export declare const buildOSWorldTestcases: (models: string[]) => Testcase[];
@@ -1,2 +0,0 @@
1
- import type { Testcase } from "@/types/evals";
2
- export declare const buildWebBenchTestcases: (models: string[]) => Testcase[];
@@ -1,2 +0,0 @@
1
- import type { Testcase } from "@/types/evals";
2
- export declare const buildWebVoyagerTestcases: (models: string[]) => Testcase[];
@@ -1,33 +0,0 @@
1
- /**
2
- * This file is responsible for:
3
- * - Loading and parsing the `evals.config.json` file, which defines tasks (evaluations) and their associated categories.
4
- * - Building a lookup structure (`tasksByName`) to map each task name to its categories.
5
- * - Filtering tasks based on command-line arguments (e.g., `filterByEvalName`) and ensuring that requested tasks exist.
6
- * - Determining which models to use for evaluations, depending on the category and environment variables.
7
- * - Validating that the chosen models are supported.
8
- *
9
- * The exported objects (`tasksByName`, `MODELS`, `config`) are used by the main evaluation script and other modules
10
- * to know which tasks and models are available, and to configure the evaluations accordingly.
11
- */
12
- import { AvailableModel } from "@browserbasehq/stagehand";
13
- /**
14
- * The `tasksConfig` defines all tasks from the config file. Each task has a name and categories.
15
- * We create a mapping `tasksByName` from task name to its categories for quick lookup.
16
- */
17
- type TaskConfig = {
18
- name: string;
19
- categories: string[];
20
- };
21
- declare const tasksConfig: TaskConfig[];
22
- declare const tasksByName: Record<string, {
23
- categories: string[];
24
- }>;
25
- /**
26
- * getModelList:
27
- * Returns a list of models to be used for the given category.
28
- * If category is "experimental", it merges DEFAULT_EVAL_MODELS and EXPERIMENTAL_EVAL_MODELS.
29
- * Otherwise, returns DEFAULT_EVAL_MODELS filtered by provider if specified.
30
- */
31
- declare const getModelList: (category?: string) => string[];
32
- declare const MODELS: AvailableModel[];
33
- export { tasksByName, MODELS, tasksConfig, getModelList };
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const all_recipes: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const apple_trade_in: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const apple_tv: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const arxiv_gpt_report: EvalFunction;
@@ -1,9 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- /**
3
- * Data-driven GAIA agent eval
4
- * - Expects per-test params injected via eval runner: { id, level, web, ques }
5
- * - Starts at `web`, runs the agent with `ques` as instruction
6
- * - Requires the agent to output a final answer in the form: "Final Answer: <value>"
7
- * - Marks success if such an answer string is present (exact matching against dataset can be layered later)
8
- */
9
- export declare const gaia: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const github: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const github_react_version: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const google_flights: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const google_maps: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const google_maps_2: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const google_maps_3: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const google_shopping: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const hotel_booking: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const hugging_face: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const iframe_form: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const iframe_form_multiple: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const kayak: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const kith: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const nba_trades: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const onlineMind2Web: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const osworld: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const sf_library_card: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const sf_library_card_multiple: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const sign_in: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const steam_games: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const trivago: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const ubereats: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const webbench: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const webvoyager: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const youtube: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const allrecipes: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const amazon_add_to_cart: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const apple: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const arxiv: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const bidnet: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const checkboxes: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const combination_sauce: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const costar: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const csr_in_oopif: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const csr_in_spif: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const custom_dropdown: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const dropdown: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const expect_act_timeout: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_aigrant_companies: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_aigrant_targeted: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_aigrant_targeted_2: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "../../types/evals";
2
- export declare const extract_apartments: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_area_codes: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_baptist_health: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_capacitor_info: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_collaborators: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_csa: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_geniusee: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_geniusee_2: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_github_commits: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_github_stars: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_hamilton_weather: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_jfk_links: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_jstor_news: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_memorial_healthcare: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_nhl_stats: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_partners: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_press_releases: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_professional_info: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_public_notices: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_recipe: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_regulations_table: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_repo_name: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_resistor_info: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_rockauto: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_single_link: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_snowshoeing_destinations: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const extract_staff_members: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "../../types/evals";
2
- export declare const extract_zillow: EvalFunction;
@@ -1,11 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- /**
3
- * This eval attempts to click on an element that should not pass the playwright actionability check
4
- * which happens by default if you call locator.click (more information here:
5
- * https://playwright.dev/docs/actionability)
6
- *
7
- * If this eval passes, it means that we have correctly set {force: true} in performPlaywrightMethod,
8
- * and the click was successful even though the target element (found by the xpath) did not
9
- * pass the actionability check.
10
- */
11
- export declare const google_flights: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const heal_custom_dropdown: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const heal_scroll_50: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const heal_simple_google_search: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const hidden_input_dropdown: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const history: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const homedepot: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const iframe_form_filling: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const iframe_hn: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const iframe_same_proc: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const iframe_scroll: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const iframes_nested: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const imdb_movie_details: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const instructions: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const ionwave: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const ionwave_observe: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const login: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const multi_tab: EvalFunction;
@@ -1,2 +0,0 @@
1
- import { EvalFunction } from "@/types/evals";
2
- export declare const namespace_xpath: EvalFunction;