@browserbasehq/stagehand 2.5.3-alpha-4994eabfa8016347ec001b40ceadb0c905403470 → 4.0.0-alpha-49bc5b68e4fbeb28a120b301b51c31a36db7d922
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1367 -751
- package/dist/index.js +37087 -22062
- package/package.json +54 -78
- package/README.md +0 -165
- package/dist/evals/args.d.ts +0 -15
- package/dist/evals/cli.d.ts +0 -2
- package/dist/evals/core/summary.d.ts +0 -2
- package/dist/evals/datasets/osworld/adapter.d.ts +0 -22
- package/dist/evals/datasets/osworld/index.d.ts +0 -9
- package/dist/evals/datasets/osworld/types.d.ts +0 -54
- package/dist/evals/deterministic/bb.playwright.config.d.ts +0 -5
- package/dist/evals/deterministic/e2e.playwright.config.d.ts +0 -5
- package/dist/evals/deterministic/local.playwright.config.d.ts +0 -5
- package/dist/evals/deterministic/stagehand.config.d.ts +0 -3
- package/dist/evals/deterministic/tests/BrowserContext/addInitScript.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/BrowserContext/cookies.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/BrowserContext/multiPage.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/BrowserContext/page.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/BrowserContext/routing.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/Errors/apiKeyError.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/browserbase/contexts.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/browserbase/downloads.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/browserbase/sessions.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/browserbase/uploads.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/local/create.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/local/downloads.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/addInitScript.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/addRemoveLocatorHandler.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/addTags.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/bringToFront.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/content.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/evaluate.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/expose.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/frames.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/getBy.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/livePageProxy.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/navigation.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/on.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/pageContext.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/reload.test.d.ts +0 -1
- package/dist/evals/deterministic/tests/page/waitFor.test.d.ts +0 -1
- package/dist/evals/env.d.ts +0 -13
- package/dist/evals/evaluator.d.ts +0 -29
- package/dist/evals/index.eval.d.ts +0 -1
- package/dist/evals/initStagehand.d.ts +0 -37
- package/dist/evals/llm_clients/hn_aisdk.d.ts +0 -2
- package/dist/evals/llm_clients/hn_customOpenAI.d.ts +0 -2
- package/dist/evals/llm_clients/hn_langchain.d.ts +0 -2
- package/dist/evals/logger.d.ts +0 -50
- package/dist/evals/scoring.d.ts +0 -25
- package/dist/evals/suites/gaia.d.ts +0 -2
- package/dist/evals/suites/onlineMind2Web.d.ts +0 -2
- package/dist/evals/suites/osworld.d.ts +0 -2
- package/dist/evals/suites/webbench.d.ts +0 -2
- package/dist/evals/suites/webvoyager.d.ts +0 -2
- package/dist/evals/taskConfig.d.ts +0 -33
- package/dist/evals/tasks/agent/all_recipes.d.ts +0 -2
- package/dist/evals/tasks/agent/apple_trade_in.d.ts +0 -2
- package/dist/evals/tasks/agent/apple_tv.d.ts +0 -2
- package/dist/evals/tasks/agent/arxiv_gpt_report.d.ts +0 -2
- package/dist/evals/tasks/agent/gaia.d.ts +0 -9
- package/dist/evals/tasks/agent/github.d.ts +0 -2
- package/dist/evals/tasks/agent/github_react_version.d.ts +0 -2
- package/dist/evals/tasks/agent/google_flights.d.ts +0 -2
- package/dist/evals/tasks/agent/google_maps.d.ts +0 -2
- package/dist/evals/tasks/agent/google_maps_2.d.ts +0 -2
- package/dist/evals/tasks/agent/google_maps_3.d.ts +0 -2
- package/dist/evals/tasks/agent/google_shopping.d.ts +0 -2
- package/dist/evals/tasks/agent/hotel_booking.d.ts +0 -2
- package/dist/evals/tasks/agent/hugging_face.d.ts +0 -2
- package/dist/evals/tasks/agent/iframe_form.d.ts +0 -2
- package/dist/evals/tasks/agent/iframe_form_multiple.d.ts +0 -2
- package/dist/evals/tasks/agent/kayak.d.ts +0 -2
- package/dist/evals/tasks/agent/kith.d.ts +0 -2
- package/dist/evals/tasks/agent/nba_trades.d.ts +0 -2
- package/dist/evals/tasks/agent/onlineMind2Web.d.ts +0 -2
- package/dist/evals/tasks/agent/osworld.d.ts +0 -2
- package/dist/evals/tasks/agent/sf_library_card.d.ts +0 -2
- package/dist/evals/tasks/agent/sf_library_card_multiple.d.ts +0 -2
- package/dist/evals/tasks/agent/sign_in.d.ts +0 -2
- package/dist/evals/tasks/agent/steam_games.d.ts +0 -2
- package/dist/evals/tasks/agent/trivago.d.ts +0 -2
- package/dist/evals/tasks/agent/ubereats.d.ts +0 -2
- package/dist/evals/tasks/agent/webbench.d.ts +0 -2
- package/dist/evals/tasks/agent/webvoyager.d.ts +0 -2
- package/dist/evals/tasks/agent/youtube.d.ts +0 -2
- package/dist/evals/tasks/allrecipes.d.ts +0 -2
- package/dist/evals/tasks/amazon_add_to_cart.d.ts +0 -2
- package/dist/evals/tasks/apple.d.ts +0 -2
- package/dist/evals/tasks/arxiv.d.ts +0 -2
- package/dist/evals/tasks/bidnet.d.ts +0 -2
- package/dist/evals/tasks/checkboxes.d.ts +0 -2
- package/dist/evals/tasks/combination_sauce.d.ts +0 -2
- package/dist/evals/tasks/costar.d.ts +0 -2
- package/dist/evals/tasks/csr_in_oopif.d.ts +0 -2
- package/dist/evals/tasks/csr_in_spif.d.ts +0 -2
- package/dist/evals/tasks/custom_dropdown.d.ts +0 -2
- package/dist/evals/tasks/dropdown.d.ts +0 -2
- package/dist/evals/tasks/expect_act_timeout.d.ts +0 -2
- package/dist/evals/tasks/extract_aigrant_companies.d.ts +0 -2
- package/dist/evals/tasks/extract_aigrant_targeted.d.ts +0 -2
- package/dist/evals/tasks/extract_aigrant_targeted_2.d.ts +0 -2
- package/dist/evals/tasks/extract_apartments.d.ts +0 -2
- package/dist/evals/tasks/extract_area_codes.d.ts +0 -2
- package/dist/evals/tasks/extract_baptist_health.d.ts +0 -2
- package/dist/evals/tasks/extract_capacitor_info.d.ts +0 -2
- package/dist/evals/tasks/extract_collaborators.d.ts +0 -2
- package/dist/evals/tasks/extract_csa.d.ts +0 -2
- package/dist/evals/tasks/extract_geniusee.d.ts +0 -2
- package/dist/evals/tasks/extract_geniusee_2.d.ts +0 -2
- package/dist/evals/tasks/extract_github_commits.d.ts +0 -2
- package/dist/evals/tasks/extract_github_stars.d.ts +0 -2
- package/dist/evals/tasks/extract_hamilton_weather.d.ts +0 -2
- package/dist/evals/tasks/extract_jfk_links.d.ts +0 -2
- package/dist/evals/tasks/extract_jstor_news.d.ts +0 -2
- package/dist/evals/tasks/extract_memorial_healthcare.d.ts +0 -2
- package/dist/evals/tasks/extract_nhl_stats.d.ts +0 -2
- package/dist/evals/tasks/extract_partners.d.ts +0 -2
- package/dist/evals/tasks/extract_press_releases.d.ts +0 -2
- package/dist/evals/tasks/extract_professional_info.d.ts +0 -2
- package/dist/evals/tasks/extract_public_notices.d.ts +0 -2
- package/dist/evals/tasks/extract_recipe.d.ts +0 -2
- package/dist/evals/tasks/extract_regulations_table.d.ts +0 -2
- package/dist/evals/tasks/extract_repo_name.d.ts +0 -2
- package/dist/evals/tasks/extract_resistor_info.d.ts +0 -2
- package/dist/evals/tasks/extract_rockauto.d.ts +0 -2
- package/dist/evals/tasks/extract_single_link.d.ts +0 -2
- package/dist/evals/tasks/extract_snowshoeing_destinations.d.ts +0 -2
- package/dist/evals/tasks/extract_staff_members.d.ts +0 -2
- package/dist/evals/tasks/extract_zillow.d.ts +0 -2
- package/dist/evals/tasks/google_flights.d.ts +0 -11
- package/dist/evals/tasks/heal_custom_dropdown.d.ts +0 -2
- package/dist/evals/tasks/heal_scroll_50.d.ts +0 -2
- package/dist/evals/tasks/heal_simple_google_search.d.ts +0 -2
- package/dist/evals/tasks/hidden_input_dropdown.d.ts +0 -2
- package/dist/evals/tasks/history.d.ts +0 -2
- package/dist/evals/tasks/homedepot.d.ts +0 -2
- package/dist/evals/tasks/iframe_form_filling.d.ts +0 -2
- package/dist/evals/tasks/iframe_hn.d.ts +0 -2
- package/dist/evals/tasks/iframe_same_proc.d.ts +0 -2
- package/dist/evals/tasks/iframe_scroll.d.ts +0 -2
- package/dist/evals/tasks/iframes_nested.d.ts +0 -2
- package/dist/evals/tasks/imdb_movie_details.d.ts +0 -2
- package/dist/evals/tasks/instructions.d.ts +0 -2
- package/dist/evals/tasks/ionwave.d.ts +0 -2
- package/dist/evals/tasks/ionwave_observe.d.ts +0 -2
- package/dist/evals/tasks/login.d.ts +0 -2
- package/dist/evals/tasks/multi_tab.d.ts +0 -2
- package/dist/evals/tasks/namespace_xpath.d.ts +0 -2
- package/dist/evals/tasks/nested_iframes_2.d.ts +0 -2
- package/dist/evals/tasks/nextChunk.d.ts +0 -2
- package/dist/evals/tasks/no_js_click.d.ts +0 -2
- package/dist/evals/tasks/nonsense_action.d.ts +0 -2
- package/dist/evals/tasks/observe_amazon_add_to_cart.d.ts +0 -2
- package/dist/evals/tasks/observe_github.d.ts +0 -2
- package/dist/evals/tasks/observe_iframes1.d.ts +0 -2
- package/dist/evals/tasks/observe_iframes2.d.ts +0 -2
- package/dist/evals/tasks/observe_simple_google_search.d.ts +0 -2
- package/dist/evals/tasks/observe_taxes.d.ts +0 -2
- package/dist/evals/tasks/observe_vantechjournal.d.ts +0 -2
- package/dist/evals/tasks/observe_yc_startup.d.ts +0 -2
- package/dist/evals/tasks/oopif_in_csr.d.ts +0 -2
- package/dist/evals/tasks/oopif_in_osr.d.ts +0 -2
- package/dist/evals/tasks/os_dropdown.d.ts +0 -2
- package/dist/evals/tasks/osr_in_oopif.d.ts +0 -2
- package/dist/evals/tasks/osr_in_spif.d.ts +0 -2
- package/dist/evals/tasks/panamcs.d.ts +0 -2
- package/dist/evals/tasks/peeler_complex.d.ts +0 -2
- package/dist/evals/tasks/peeler_simple.d.ts +0 -2
- package/dist/evals/tasks/prevChunk.d.ts +0 -2
- package/dist/evals/tasks/radio_btn.d.ts +0 -2
- package/dist/evals/tasks/rakuten_jp.d.ts +0 -2
- package/dist/evals/tasks/sciquest.d.ts +0 -2
- package/dist/evals/tasks/screenshot_cdp_toggle.d.ts +0 -9
- package/dist/evals/tasks/scroll_50.d.ts +0 -2
- package/dist/evals/tasks/scroll_75.d.ts +0 -2
- package/dist/evals/tasks/shadow_dom.d.ts +0 -2
- package/dist/evals/tasks/simple_google_search.d.ts +0 -2
- package/dist/evals/tasks/spif_in_csr.d.ts +0 -2
- package/dist/evals/tasks/spif_in_osr.d.ts +0 -2
- package/dist/evals/tasks/stock_x.d.ts +0 -2
- package/dist/evals/tasks/tab_handling.d.ts +0 -2
- package/dist/evals/tasks/ted_talk.d.ts +0 -2
- package/dist/evals/tasks/vanta_h.d.ts +0 -2
- package/dist/evals/tasks/vantechjournal.d.ts +0 -2
- package/dist/evals/tasks/wichita.d.ts +0 -2
- package/dist/evals/tasks/wikipedia.d.ts +0 -2
- package/dist/evals/utils/ScreenshotCollector.d.ts +0 -34
- package/dist/evals/utils/imageUtils.d.ts +0 -1
- package/dist/evals/utils.d.ts +0 -61
- package/dist/examples/2048.d.ts +0 -1
- package/dist/examples/actionable_observe_example.d.ts +0 -14
- package/dist/examples/cua-example.d.ts +0 -1
- package/dist/examples/custom_client_aisdk.d.ts +0 -1
- package/dist/examples/custom_client_langchain.d.ts +0 -1
- package/dist/examples/custom_client_openai.d.ts +0 -1
- package/dist/examples/example.d.ts +0 -1
- package/dist/examples/external_clients/aisdk.d.ts +0 -20
- package/dist/examples/external_clients/customOpenAI.d.ts +0 -18
- package/dist/examples/external_clients/langchain.d.ts +0 -9
- package/dist/examples/form_filling_sensible.d.ts +0 -1
- package/dist/examples/google_enter.d.ts +0 -6
- package/dist/examples/instructions.d.ts +0 -1
- package/dist/examples/integrations/exa.d.ts +0 -1
- package/dist/examples/integrations/supabase.d.ts +0 -1
- package/dist/examples/operator-example.d.ts +0 -8
- package/dist/examples/parameterizeApiKey.d.ts +0 -1
- package/dist/examples/popup.d.ts +0 -6
- package/dist/examples/wordle.d.ts +0 -1
- package/dist/lib/StagehandContext.d.ts +0 -25
- package/dist/lib/StagehandPage.d.ts +0 -103
- package/dist/lib/a11y/utils.d.ts +0 -144
- package/dist/lib/agent/AgentClient.d.ts +0 -20
- package/dist/lib/agent/AgentProvider.d.ts +0 -19
- package/dist/lib/agent/AnthropicCUAClient.d.ts +0 -56
- package/dist/lib/agent/GoogleCUAClient.d.ts +0 -63
- package/dist/lib/agent/OpenAICUAClient.d.ts +0 -65
- package/dist/lib/agent/StagehandAgent.d.ts +0 -15
- package/dist/lib/agent/tools/act.d.ts +0 -59
- package/dist/lib/agent/tools/ariaTree.d.ts +0 -11
- package/dist/lib/agent/tools/close.d.ts +0 -22
- package/dist/lib/agent/tools/extract.d.ts +0 -38
- package/dist/lib/agent/tools/fillform.d.ts +0 -37
- package/dist/lib/agent/tools/goto.d.ts +0 -29
- package/dist/lib/agent/tools/index.d.ts +0 -257
- package/dist/lib/agent/tools/navback.d.ts +0 -17
- package/dist/lib/agent/tools/screenshot.d.ts +0 -13
- package/dist/lib/agent/tools/scroll.d.ts +0 -23
- package/dist/lib/agent/tools/wait.d.ts +0 -18
- package/dist/lib/agent/utils/cuaKeyMapping.d.ts +0 -10
- package/dist/lib/agent/utils/imageCompression.d.ts +0 -53
- package/dist/lib/agent/utils/messageProcessing.d.ts +0 -13
- package/dist/lib/api.d.ts +0 -23
- package/dist/lib/browserbaseDefaults.d.ts +0 -9
- package/dist/lib/cache/ActionCache.d.ts +0 -62
- package/dist/lib/cache/BaseCache.d.ts +0 -66
- package/dist/lib/cache/LLMCache.d.ts +0 -22
- package/dist/lib/cache.d.ts +0 -29
- package/dist/lib/dom/build/scriptContent.d.ts +0 -1
- package/dist/lib/dom/elementCheckUtils.d.ts +0 -2
- package/dist/lib/dom/genDomScripts.d.ts +0 -1
- package/dist/lib/dom/index.d.ts +0 -2
- package/dist/lib/dom/process.d.ts +0 -17
- package/dist/lib/dom/utils.d.ts +0 -7
- package/dist/lib/dom/xpathUtils.d.ts +0 -14
- package/dist/lib/handlers/actHandler.d.ts +0 -33
- package/dist/lib/handlers/cuaAgentHandler.d.ts +0 -58
- package/dist/lib/handlers/extractHandler.d.ts +0 -54
- package/dist/lib/handlers/handlerUtils/actHandlerUtils.d.ts +0 -21
- package/dist/lib/handlers/observeHandler.d.ts +0 -40
- package/dist/lib/handlers/stagehandAgentHandler.d.ts +0 -27
- package/dist/lib/index.d.ts +0 -94
- package/dist/lib/inference.d.ts +0 -61
- package/dist/lib/inferenceLogUtils.d.ts +0 -12
- package/dist/lib/llm/AnthropicClient.d.ts +0 -21
- package/dist/lib/llm/CerebrasClient.d.ts +0 -22
- package/dist/lib/llm/GoogleClient.d.ts +0 -24
- package/dist/lib/llm/GroqClient.d.ts +0 -22
- package/dist/lib/llm/LLMClient.d.ts +0 -99
- package/dist/lib/llm/LLMProvider.d.ts +0 -13
- package/dist/lib/llm/OpenAIClient.d.ts +0 -20
- package/dist/lib/llm/aisdk.d.ts +0 -20
- package/dist/lib/logger.d.ts +0 -54
- package/dist/lib/mcp/connection.d.ts +0 -11
- package/dist/lib/mcp/utils.d.ts +0 -3
- package/dist/lib/prompt.d.ts +0 -14
- package/dist/lib/utils.d.ts +0 -68
- package/dist/lib/version.d.ts +0 -5
- package/dist/scripts/gen-version.d.ts +0 -1
- package/dist/scripts/run-evals.d.ts +0 -1
- package/dist/stagehand.config.d.ts +0 -3
- package/dist/types/act.d.ts +0 -50
- package/dist/types/agent.d.ts +0 -144
- package/dist/types/api.d.ts +0 -40
- package/dist/types/browser.d.ts +0 -10
- package/dist/types/context.d.ts +0 -117
- package/dist/types/evals.d.ts +0 -95
- package/dist/types/evaluator.d.ts +0 -40
- package/dist/types/llm.d.ts +0 -11
- package/dist/types/log.d.ts +0 -23
- package/dist/types/model.d.ts +0 -17
- package/dist/types/page.d.ts +0 -38
- package/dist/types/playwright.d.ts +0 -12
- package/dist/types/stagehand.d.ts +0 -330
- package/dist/types/stagehandApiErrors.d.ts +0 -18
- package/dist/types/stagehandErrors.d.ts +0 -104
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* This file provides a function to initialize a Stagehand instance for use in evaluations.
|
|
3
|
-
* It configures the Stagehand environment and sets default options based on the current environment
|
|
4
|
-
* (e.g., local or BROWSERBASE), caching preferences, and verbosity. It also establishes a logger for
|
|
5
|
-
* capturing logs emitted by Stagehand.
|
|
6
|
-
*
|
|
7
|
-
* We create a central config object (`StagehandConfig`) that defines all parameters for Stagehand.
|
|
8
|
-
*
|
|
9
|
-
* The `initStagehand` function takes the model name, an optional DOM settling timeout, and an EvalLogger,
|
|
10
|
-
* then uses these to override some default values before creating and initializing the Stagehand instance.
|
|
11
|
-
*/
|
|
12
|
-
import { ConstructorParams, LLMClient } from "@browserbasehq/stagehand";
|
|
13
|
-
import { EvalLogger } from "./logger";
|
|
14
|
-
import type { StagehandInitResult } from "@/types/evals";
|
|
15
|
-
import { AvailableModel } from "@browserbasehq/stagehand";
|
|
16
|
-
/**
|
|
17
|
-
* Initializes a Stagehand instance for a given model:
|
|
18
|
-
* - modelName: The model to use (overrides default in StagehandConfig)
|
|
19
|
-
* - domSettleTimeoutMs: Optional timeout for DOM settling operations
|
|
20
|
-
* - logger: An EvalLogger instance for capturing logs
|
|
21
|
-
*
|
|
22
|
-
* Returns:
|
|
23
|
-
* - stagehand: The initialized Stagehand instance
|
|
24
|
-
* - logger: The provided logger, associated with the Stagehand instance
|
|
25
|
-
* - initResponse: Any response data returned by Stagehand initialization
|
|
26
|
-
*/
|
|
27
|
-
export declare const initStagehand: ({ llmClient, modelClientOptions, domSettleTimeoutMs, logger, configOverrides, actTimeoutMs, modelName, }: {
|
|
28
|
-
llmClient?: LLMClient;
|
|
29
|
-
modelClientOptions?: {
|
|
30
|
-
apiKey: string;
|
|
31
|
-
};
|
|
32
|
-
domSettleTimeoutMs?: number;
|
|
33
|
-
logger: EvalLogger;
|
|
34
|
-
configOverrides?: Partial<ConstructorParams>;
|
|
35
|
-
actTimeoutMs?: number;
|
|
36
|
-
modelName: AvailableModel;
|
|
37
|
-
}) => Promise<StagehandInitResult>;
|
package/dist/evals/logger.d.ts
DELETED
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import { LogLineEval } from "@/types/evals";
|
|
2
|
-
import { Stagehand, LogLine } from "@browserbasehq/stagehand";
|
|
3
|
-
/**
|
|
4
|
-
* EvalLogger:
|
|
5
|
-
* A logger class used during evaluations to capture and print log lines.
|
|
6
|
-
*
|
|
7
|
-
* Capabilities:
|
|
8
|
-
* - Maintains an internal array of log lines (EvalLogger.logs) for later retrieval.
|
|
9
|
-
* - Can be initialized with a Stagehand instance to provide consistent logging.
|
|
10
|
-
* - Supports logging at different levels (info, error, warn).
|
|
11
|
-
* - Each log line is converted to a string and printed to console for immediate feedback.
|
|
12
|
-
* - Also keeps a structured version of the logs that can be returned for analysis or
|
|
13
|
-
* included in evaluation output.
|
|
14
|
-
*/
|
|
15
|
-
export declare class EvalLogger {
|
|
16
|
-
private logs;
|
|
17
|
-
stagehand?: Stagehand;
|
|
18
|
-
constructor();
|
|
19
|
-
/**
|
|
20
|
-
* init:
|
|
21
|
-
* Associates this logger with a given Stagehand instance.
|
|
22
|
-
* This allows the logger to provide additional context if needed.
|
|
23
|
-
*/
|
|
24
|
-
init(stagehand: Stagehand): void;
|
|
25
|
-
/**
|
|
26
|
-
* log:
|
|
27
|
-
* Logs a message at the default (info) level.
|
|
28
|
-
* Uses `logLineToString` to produce a readable output on the console,
|
|
29
|
-
* and then stores the parsed log line in `this.logs`.
|
|
30
|
-
*/
|
|
31
|
-
log(logLine: LogLine): void;
|
|
32
|
-
/**
|
|
33
|
-
* error:
|
|
34
|
-
* Logs an error message with `console.error` and stores it.
|
|
35
|
-
* Useful for capturing and differentiating error-level logs.
|
|
36
|
-
*/
|
|
37
|
-
error(logLine: LogLine): void;
|
|
38
|
-
/**
|
|
39
|
-
* warn:
|
|
40
|
-
* Logs a warning message with `console.warn` and stores it.
|
|
41
|
-
* Helps differentiate warnings from regular info logs.
|
|
42
|
-
*/
|
|
43
|
-
warn(logLine: LogLine): void;
|
|
44
|
-
/**
|
|
45
|
-
* getLogs:
|
|
46
|
-
* Retrieves the array of stored log lines.
|
|
47
|
-
* Useful for returning logs after a task completes, for analysis or debugging.
|
|
48
|
-
*/
|
|
49
|
-
getLogs(): LogLineEval[];
|
|
50
|
-
}
|
package/dist/evals/scoring.d.ts
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* This file implements scoring functions needed by braintrust.
|
|
3
|
-
*/
|
|
4
|
-
import { EvalArgs, EvalInput, EvalResult } from "@/types/evals";
|
|
5
|
-
/**
|
|
6
|
-
* Scoring function: exactMatch
|
|
7
|
-
* Given the arguments (including input, output, and expected result),
|
|
8
|
-
* this returns a score of 1 if the result matches the expectation, and 0 otherwise.
|
|
9
|
-
*
|
|
10
|
-
* If "expected" is true, it checks if the output indicates success.
|
|
11
|
-
* If "expected" is a boolean or an object with _success flag,
|
|
12
|
-
* it checks if output is exactly that success condition.
|
|
13
|
-
*/
|
|
14
|
-
export declare function exactMatch(args: EvalArgs<EvalInput, boolean | {
|
|
15
|
-
_success: boolean;
|
|
16
|
-
}, unknown>): EvalResult;
|
|
17
|
-
/**
|
|
18
|
-
* Scoring function: errorMatch
|
|
19
|
-
* Determines if an error occurred in the task.
|
|
20
|
-
* Scores 1 if an error is found, otherwise 0.
|
|
21
|
-
*/
|
|
22
|
-
export declare function errorMatch(args: EvalArgs<EvalInput, boolean | {
|
|
23
|
-
_success: boolean;
|
|
24
|
-
error?: unknown;
|
|
25
|
-
}, unknown>): EvalResult;
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* This file is responsible for:
|
|
3
|
-
* - Loading and parsing the `evals.config.json` file, which defines tasks (evaluations) and their associated categories.
|
|
4
|
-
* - Building a lookup structure (`tasksByName`) to map each task name to its categories.
|
|
5
|
-
* - Filtering tasks based on command-line arguments (e.g., `filterByEvalName`) and ensuring that requested tasks exist.
|
|
6
|
-
* - Determining which models to use for evaluations, depending on the category and environment variables.
|
|
7
|
-
* - Validating that the chosen models are supported.
|
|
8
|
-
*
|
|
9
|
-
* The exported objects (`tasksByName`, `MODELS`, `config`) are used by the main evaluation script and other modules
|
|
10
|
-
* to know which tasks and models are available, and to configure the evaluations accordingly.
|
|
11
|
-
*/
|
|
12
|
-
import { AvailableModel } from "@browserbasehq/stagehand";
|
|
13
|
-
/**
|
|
14
|
-
* The `tasksConfig` defines all tasks from the config file. Each task has a name and categories.
|
|
15
|
-
* We create a mapping `tasksByName` from task name to its categories for quick lookup.
|
|
16
|
-
*/
|
|
17
|
-
type TaskConfig = {
|
|
18
|
-
name: string;
|
|
19
|
-
categories: string[];
|
|
20
|
-
};
|
|
21
|
-
declare const tasksConfig: TaskConfig[];
|
|
22
|
-
declare const tasksByName: Record<string, {
|
|
23
|
-
categories: string[];
|
|
24
|
-
}>;
|
|
25
|
-
/**
|
|
26
|
-
* getModelList:
|
|
27
|
-
* Returns a list of models to be used for the given category.
|
|
28
|
-
* If category is "experimental", it merges DEFAULT_EVAL_MODELS and EXPERIMENTAL_EVAL_MODELS.
|
|
29
|
-
* Otherwise, returns DEFAULT_EVAL_MODELS filtered by provider if specified.
|
|
30
|
-
*/
|
|
31
|
-
declare const getModelList: (category?: string) => string[];
|
|
32
|
-
declare const MODELS: AvailableModel[];
|
|
33
|
-
export { tasksByName, MODELS, tasksConfig, getModelList };
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import { EvalFunction } from "@/types/evals";
|
|
2
|
-
/**
|
|
3
|
-
* Data-driven GAIA agent eval
|
|
4
|
-
* - Expects per-test params injected via eval runner: { id, level, web, ques }
|
|
5
|
-
* - Starts at `web`, runs the agent with `ques` as instruction
|
|
6
|
-
* - Requires the agent to output a final answer in the form: "Final Answer: <value>"
|
|
7
|
-
* - Marks success if such an answer string is present (exact matching against dataset can be layered later)
|
|
8
|
-
*/
|
|
9
|
-
export declare const gaia: EvalFunction;
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import { EvalFunction } from "@/types/evals";
|
|
2
|
-
/**
|
|
3
|
-
* This eval attempts to click on an element that should not pass the playwright actionability check
|
|
4
|
-
* which happens by default if you call locator.click (more information here:
|
|
5
|
-
* https://playwright.dev/docs/actionability)
|
|
6
|
-
*
|
|
7
|
-
* If this eval passes, it means that we have correctly set {force: true} in performPlaywrightMethod,
|
|
8
|
-
* and the click was successful even though the target element (found by the xpath) did not
|
|
9
|
-
* pass the actionability check.
|
|
10
|
-
*/
|
|
11
|
-
export declare const google_flights: EvalFunction;
|