@browserbasehq/orca 3.0.0-preview.0 → 3.0.0-preview.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -0
- package/dist/index.js +43 -6
- package/dist/lib/StagehandContext.d.ts +25 -0
- package/dist/lib/StagehandPage.d.ts +103 -0
- package/dist/lib/a11y/utils.d.ts +144 -0
- package/dist/lib/agent/AgentClient.d.ts +20 -0
- package/dist/lib/agent/AgentProvider.d.ts +19 -0
- package/dist/lib/agent/AnthropicCUAClient.d.ts +56 -0
- package/dist/lib/agent/GoogleCUAClient.d.ts +63 -0
- package/dist/lib/agent/OpenAICUAClient.d.ts +65 -0
- package/dist/lib/agent/StagehandAgent.d.ts +15 -0
- package/dist/lib/agent/tools/act.d.ts +59 -0
- package/dist/lib/agent/tools/ariaTree.d.ts +11 -0
- package/dist/lib/agent/tools/close.d.ts +22 -0
- package/dist/lib/agent/tools/extract.d.ts +38 -0
- package/dist/lib/agent/tools/fillform.d.ts +37 -0
- package/dist/lib/agent/tools/goto.d.ts +29 -0
- package/dist/lib/agent/tools/index.d.ts +257 -0
- package/dist/lib/agent/tools/navback.d.ts +17 -0
- package/dist/lib/agent/tools/screenshot.d.ts +13 -0
- package/dist/lib/agent/tools/scroll.d.ts +23 -0
- package/dist/lib/agent/tools/wait.d.ts +18 -0
- package/dist/lib/agent/utils/cuaKeyMapping.d.ts +10 -0
- package/dist/lib/agent/utils/imageCompression.d.ts +53 -0
- package/dist/lib/agent/utils/messageProcessing.d.ts +13 -0
- package/dist/lib/browserbaseDefaults.d.ts +9 -0
- package/dist/lib/cache/ActionCache.d.ts +62 -0
- package/dist/lib/cache/BaseCache.d.ts +66 -0
- package/dist/lib/cache/LLMCache.d.ts +22 -0
- package/dist/lib/cache.d.ts +29 -0
- package/dist/lib/dom/elementCheckUtils.d.ts +2 -0
- package/dist/lib/dom/genDomScripts.d.ts +1 -0
- package/dist/lib/dom/index.d.ts +2 -0
- package/dist/lib/dom/process.d.ts +17 -0
- package/dist/lib/dom/utils.d.ts +7 -0
- package/dist/lib/dom/xpathUtils.d.ts +14 -0
- package/dist/lib/handlers/actHandler.d.ts +33 -0
- package/dist/lib/handlers/cuaAgentHandler.d.ts +58 -0
- package/dist/lib/handlers/extractHandler.d.ts +54 -0
- package/dist/lib/handlers/handlerUtils/actHandlerUtils.d.ts +21 -0
- package/dist/lib/handlers/observeHandler.d.ts +40 -0
- package/dist/lib/handlers/stagehandAgentHandler.d.ts +27 -0
- package/dist/lib/index.d.ts +94 -0
- package/dist/lib/llm/AnthropicClient.d.ts +21 -0
- package/dist/lib/llm/CerebrasClient.d.ts +22 -0
- package/dist/lib/llm/GoogleClient.d.ts +24 -0
- package/dist/lib/llm/GroqClient.d.ts +22 -0
- package/dist/lib/llm/LLMClient.d.ts +99 -0
- package/dist/lib/llm/LLMProvider.d.ts +13 -0
- package/dist/lib/llm/OpenAIClient.d.ts +20 -0
- package/dist/lib/llm/aisdk.d.ts +20 -0
- package/dist/lib/mcp/connection.d.ts +11 -0
- package/dist/lib/mcp/utils.d.ts +3 -0
- package/dist/lib/v3/tests/downloads.spec.d.ts +1 -0
- package/dist/lib/v3/tests/v3.bb.config.d.ts +4 -0
- package/dist/lib/v3/v3.d.ts +2 -0
- package/dist/lib/version.d.ts +1 -1
- package/dist/stagehand.config.d.ts +3 -0
- package/dist/types/act.d.ts +50 -0
- package/dist/types/agent.d.ts +143 -0
- package/dist/types/api.d.ts +40 -0
- package/dist/types/browser.d.ts +10 -0
- package/dist/types/context.d.ts +117 -0
- package/dist/types/evals.d.ts +94 -0
- package/dist/types/evaluator.d.ts +40 -0
- package/dist/types/llm.d.ts +11 -0
- package/dist/types/log.d.ts +23 -0
- package/dist/types/model.d.ts +17 -0
- package/dist/types/page.d.ts +38 -0
- package/dist/types/playwright.d.ts +12 -0
- package/dist/types/stagehand.d.ts +330 -0
- package/dist/types/stagehandApiErrors.d.ts +18 -0
- package/dist/types/stagehandErrors.d.ts +104 -0
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1355,6 +1355,7 @@ declare class V3 {
|
|
|
1355
1355
|
private llmProvider;
|
|
1356
1356
|
private readonly domSettleTimeoutMs?;
|
|
1357
1357
|
private _isClosing;
|
|
1358
|
+
browserbaseSessionId?: string;
|
|
1358
1359
|
private _onCdpClosed;
|
|
1359
1360
|
readonly experimental: boolean;
|
|
1360
1361
|
readonly logInferenceToFile: boolean;
|
|
@@ -1397,6 +1398,7 @@ declare class V3 {
|
|
|
1397
1398
|
init(): Promise<void>;
|
|
1398
1399
|
/** Apply post-connect local browser options that require CDP. */
|
|
1399
1400
|
private _applyPostConnectLocalOptions;
|
|
1401
|
+
private _ensureBrowserbaseDownloadsEnabled;
|
|
1400
1402
|
/**
|
|
1401
1403
|
* Run an "act" instruction through the ActHandler.
|
|
1402
1404
|
*
|
package/dist/index.js
CHANGED
|
@@ -1253,7 +1253,7 @@ function decorateRoles(nodes, opts) {
|
|
|
1253
1253
|
const domIsScrollable = encodedId ? opts.scrollableMap[encodedId] === true : false;
|
|
1254
1254
|
const tag = encodedId ? opts.tagNameMap[encodedId] : void 0;
|
|
1255
1255
|
const isHtmlElement = tag === "html";
|
|
1256
|
-
if (domIsScrollable || isHtmlElement) {
|
|
1256
|
+
if ((domIsScrollable || isHtmlElement) && tag !== "#document") {
|
|
1257
1257
|
const tagLabel = tag && tag.startsWith("#") ? tag.slice(1) : tag;
|
|
1258
1258
|
role = tagLabel ? `scrollable, ${tagLabel}` : `scrollable${role ? `, ${role}` : ""}`;
|
|
1259
1259
|
}
|
|
@@ -6660,7 +6660,7 @@ init_deepLocator();
|
|
|
6660
6660
|
init_logger2();
|
|
6661
6661
|
|
|
6662
6662
|
// lib/version.ts
|
|
6663
|
-
var STAGEHAND_VERSION = "3.0.0-preview.
|
|
6663
|
+
var STAGEHAND_VERSION = "3.0.0-preview.1";
|
|
6664
6664
|
|
|
6665
6665
|
// lib/v3/types/stagehandErrors.ts
|
|
6666
6666
|
var StagehandError = class extends Error {
|
|
@@ -7738,12 +7738,30 @@ function transformSchema(schema, currentPath) {
|
|
|
7738
7738
|
return [schema, []];
|
|
7739
7739
|
}
|
|
7740
7740
|
function injectUrls(obj, path5, idToUrlMapping) {
|
|
7741
|
-
var _a15;
|
|
7741
|
+
var _a15, _b;
|
|
7742
7742
|
if (path5.length === 0) return;
|
|
7743
|
+
const toId = (value) => {
|
|
7744
|
+
if (typeof value === "number") {
|
|
7745
|
+
return String(value);
|
|
7746
|
+
}
|
|
7747
|
+
if (typeof value === "string" && ID_PATTERN.test(value)) {
|
|
7748
|
+
return value;
|
|
7749
|
+
}
|
|
7750
|
+
return void 0;
|
|
7751
|
+
};
|
|
7743
7752
|
const [key, ...rest] = path5;
|
|
7744
7753
|
if (key === "*") {
|
|
7745
7754
|
if (Array.isArray(obj)) {
|
|
7746
|
-
|
|
7755
|
+
if (rest.length === 0) {
|
|
7756
|
+
for (let i = 0; i < obj.length; i += 1) {
|
|
7757
|
+
const id = toId(obj[i]);
|
|
7758
|
+
if (id !== void 0) {
|
|
7759
|
+
obj[i] = (_a15 = idToUrlMapping[id]) != null ? _a15 : "";
|
|
7760
|
+
}
|
|
7761
|
+
}
|
|
7762
|
+
} else {
|
|
7763
|
+
for (const item of obj) injectUrls(item, rest, idToUrlMapping);
|
|
7764
|
+
}
|
|
7747
7765
|
}
|
|
7748
7766
|
return;
|
|
7749
7767
|
}
|
|
@@ -7751,9 +7769,9 @@ function injectUrls(obj, path5, idToUrlMapping) {
|
|
|
7751
7769
|
const record = obj;
|
|
7752
7770
|
if (path5.length === 1) {
|
|
7753
7771
|
const fieldValue = record[key];
|
|
7754
|
-
const id =
|
|
7772
|
+
const id = toId(fieldValue);
|
|
7755
7773
|
if (id !== void 0) {
|
|
7756
|
-
record[key] = (
|
|
7774
|
+
record[key] = (_b = idToUrlMapping[id]) != null ? _b : "";
|
|
7757
7775
|
}
|
|
7758
7776
|
} else {
|
|
7759
7777
|
injectUrls(record[key], rest, idToUrlMapping);
|
|
@@ -27455,6 +27473,7 @@ var _V3 = class _V3 {
|
|
|
27455
27473
|
createdTempProfile: createdTemp,
|
|
27456
27474
|
preserveUserDataDir: !!lbo.preserveUserDataDir
|
|
27457
27475
|
};
|
|
27476
|
+
this.browserbaseSessionId = void 0;
|
|
27458
27477
|
yield this._applyPostConnectLocalOptions(lbo);
|
|
27459
27478
|
return;
|
|
27460
27479
|
}
|
|
@@ -27472,6 +27491,8 @@ var _V3 = class _V3 {
|
|
|
27472
27491
|
});
|
|
27473
27492
|
this.ctx.conn.onTransportClosed(this._onCdpClosed);
|
|
27474
27493
|
this.state = { kind: "BROWSERBASE", sessionId, ws, bb };
|
|
27494
|
+
this.browserbaseSessionId = sessionId;
|
|
27495
|
+
yield this._ensureBrowserbaseDownloadsEnabled();
|
|
27475
27496
|
try {
|
|
27476
27497
|
const resumed = !!this.opts.browserbaseSessionID;
|
|
27477
27498
|
let debugUrl;
|
|
@@ -27527,6 +27548,21 @@ var _V3 = class _V3 {
|
|
|
27527
27548
|
}
|
|
27528
27549
|
});
|
|
27529
27550
|
}
|
|
27551
|
+
_ensureBrowserbaseDownloadsEnabled() {
|
|
27552
|
+
return __async(this, null, function* () {
|
|
27553
|
+
var _a15;
|
|
27554
|
+
const conn = (_a15 = this.ctx) == null ? void 0 : _a15.conn;
|
|
27555
|
+
if (!conn) return;
|
|
27556
|
+
try {
|
|
27557
|
+
yield conn.send("Browser.setDownloadBehavior", {
|
|
27558
|
+
behavior: "allow",
|
|
27559
|
+
downloadPath: "downloads",
|
|
27560
|
+
eventsEnabled: true
|
|
27561
|
+
});
|
|
27562
|
+
} catch (e) {
|
|
27563
|
+
}
|
|
27564
|
+
});
|
|
27565
|
+
}
|
|
27530
27566
|
act(input, options) {
|
|
27531
27567
|
return __async(this, null, function* () {
|
|
27532
27568
|
return yield withInstanceLogContext(this.instanceId, () => __async(this, null, function* () {
|
|
@@ -27829,6 +27865,7 @@ var _V3 = class _V3 {
|
|
|
27829
27865
|
this.state = { kind: "UNINITIALIZED" };
|
|
27830
27866
|
this.ctx = null;
|
|
27831
27867
|
this._isClosing = false;
|
|
27868
|
+
this.browserbaseSessionId = void 0;
|
|
27832
27869
|
try {
|
|
27833
27870
|
unbindInstanceLogger(this.instanceId);
|
|
27834
27871
|
} catch (e) {
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { BrowserContext as PlaywrightContext, Page as PlaywrightPage } from "playwright";
|
|
2
|
+
import { Stagehand } from "./index";
|
|
3
|
+
import { StagehandPage } from "./StagehandPage";
|
|
4
|
+
import { EnhancedContext } from "../types/context";
|
|
5
|
+
export declare class StagehandContext {
|
|
6
|
+
private readonly stagehand;
|
|
7
|
+
private readonly intContext;
|
|
8
|
+
private pageMap;
|
|
9
|
+
private activeStagehandPage;
|
|
10
|
+
private readonly frameIdMap;
|
|
11
|
+
private constructor();
|
|
12
|
+
private createStagehandPage;
|
|
13
|
+
static init(context: PlaywrightContext, stagehand: Stagehand): Promise<StagehandContext>;
|
|
14
|
+
get frameIdLookup(): ReadonlyMap<string, StagehandPage>;
|
|
15
|
+
registerFrameId(frameId: string, page: StagehandPage): void;
|
|
16
|
+
unregisterFrameId(frameId: string): void;
|
|
17
|
+
getStagehandPageByFrameId(frameId: string): StagehandPage | undefined;
|
|
18
|
+
get context(): EnhancedContext;
|
|
19
|
+
getStagehandPage(page: PlaywrightPage): Promise<StagehandPage>;
|
|
20
|
+
getStagehandPages(): Promise<StagehandPage[]>;
|
|
21
|
+
setActivePage(page: StagehandPage): void;
|
|
22
|
+
getActivePage(): StagehandPage | null;
|
|
23
|
+
private handleNewPlaywrightPage;
|
|
24
|
+
private attachFrameNavigatedListener;
|
|
25
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import type { CDPSession, Page as PlaywrightPage, Frame } from "playwright";
|
|
2
|
+
import { z } from "zod/v3";
|
|
3
|
+
import { Page, defaultExtractSchema } from "../types/page";
|
|
4
|
+
import { ExtractOptions, ExtractResult, ObserveOptions, ObserveResult } from "../types/stagehand";
|
|
5
|
+
import { StagehandAPI } from "./api";
|
|
6
|
+
import { ActOptions, ActResult, Stagehand } from "./index";
|
|
7
|
+
import { LLMClient } from "./llm/LLMClient";
|
|
8
|
+
import { StagehandContext } from "./StagehandContext";
|
|
9
|
+
import { EncodedId, EnhancedContext } from "../types/context";
|
|
10
|
+
export declare class StagehandPage {
|
|
11
|
+
private stagehand;
|
|
12
|
+
private rawPage;
|
|
13
|
+
private intPage;
|
|
14
|
+
private intContext;
|
|
15
|
+
private actHandler;
|
|
16
|
+
private extractHandler;
|
|
17
|
+
private observeHandler;
|
|
18
|
+
private llmClient;
|
|
19
|
+
private cdpClient;
|
|
20
|
+
private api;
|
|
21
|
+
private userProvidedInstructions?;
|
|
22
|
+
private waitForCaptchaSolves;
|
|
23
|
+
private initialized;
|
|
24
|
+
private readonly cdpClients;
|
|
25
|
+
private fidOrdinals;
|
|
26
|
+
private rootFrameId;
|
|
27
|
+
get frameId(): string;
|
|
28
|
+
updateRootFrameId(newId: string): void;
|
|
29
|
+
constructor(page: PlaywrightPage, stagehand: Stagehand, context: StagehandContext, llmClient: LLMClient, userProvidedInstructions?: string, api?: StagehandAPI, waitForCaptchaSolves?: boolean);
|
|
30
|
+
ordinalForFrameId(fid: string | undefined): number;
|
|
31
|
+
encodeWithFrameId(fid: string | undefined, backendId: number): EncodedId;
|
|
32
|
+
resetFrameOrdinals(): void;
|
|
33
|
+
private ensureStagehandScript;
|
|
34
|
+
/** Register the custom selector engine that pierces open/closed shadow roots. */
|
|
35
|
+
private ensureStagehandSelectorEngine;
|
|
36
|
+
/**
|
|
37
|
+
* Waits for a captcha to be solved when using Browserbase environment.
|
|
38
|
+
*
|
|
39
|
+
* @param timeoutMs - Optional timeout in milliseconds. If provided, the promise will reject if the captcha solving hasn't started within the given time.
|
|
40
|
+
* @throws StagehandEnvironmentError if called in a LOCAL environment
|
|
41
|
+
* @throws CaptchaTimeoutError if the timeout is reached before captcha solving starts
|
|
42
|
+
* @returns Promise that resolves when the captcha is solved
|
|
43
|
+
*/
|
|
44
|
+
waitForCaptchaSolve(timeoutMs?: number): Promise<void>;
|
|
45
|
+
init(): Promise<StagehandPage>;
|
|
46
|
+
get page(): Page;
|
|
47
|
+
get context(): EnhancedContext;
|
|
48
|
+
/**
|
|
49
|
+
* `_waitForSettledDom` waits until the DOM is settled, and therefore is
|
|
50
|
+
* ready for actions to be taken.
|
|
51
|
+
*
|
|
52
|
+
* **Definition of "settled"**
|
|
53
|
+
* • No in-flight network requests (except WebSocket / Server-Sent-Events).
|
|
54
|
+
* • That idle state lasts for at least **500 ms** (the "quiet-window").
|
|
55
|
+
*
|
|
56
|
+
* **How it works**
|
|
57
|
+
* 1. Subscribes to CDP Network and Page events for the main target and all
|
|
58
|
+
* out-of-process iframes (via `Target.setAutoAttach { flatten:true }`).
|
|
59
|
+
* 2. Every time `Network.requestWillBeSent` fires, the request ID is added
|
|
60
|
+
* to an **`inflight`** `Set`.
|
|
61
|
+
* 3. When the request finishes—`loadingFinished`, `loadingFailed`,
|
|
62
|
+
* `requestServedFromCache`, or a *data:* response—the request ID is
|
|
63
|
+
* removed.
|
|
64
|
+
* 4. *Document* requests are also mapped **frameId → requestId**; when
|
|
65
|
+
* `Page.frameStoppedLoading` fires the corresponding Document request is
|
|
66
|
+
* removed immediately (covers iframes whose network events never close).
|
|
67
|
+
* 5. A **stalled-request sweep timer** runs every 500 ms. If a *Document*
|
|
68
|
+
* request has been open for ≥ 2 s it is forcibly removed; this prevents
|
|
69
|
+
* ad/analytics iframes from blocking the wait forever.
|
|
70
|
+
* 6. When `inflight` becomes empty the helper starts a 500 ms timer.
|
|
71
|
+
* If no new request appears before the timer fires, the promise
|
|
72
|
+
* resolves → **DOM is considered settled**.
|
|
73
|
+
* 7. A global guard (`timeoutMs` or `stagehand.domSettleTimeoutMs`,
|
|
74
|
+
* default ≈ 30 s) ensures we always resolve; if it fires we log how many
|
|
75
|
+
* requests were still outstanding.
|
|
76
|
+
*
|
|
77
|
+
* @param timeoutMs – Optional hard cap (ms). Defaults to
|
|
78
|
+
* `this.stagehand.domSettleTimeoutMs`.
|
|
79
|
+
*/
|
|
80
|
+
_waitForSettledDom(timeoutMs?: number): Promise<void>;
|
|
81
|
+
act(actionOrOptions: string | ActOptions | ObserveResult): Promise<ActResult>;
|
|
82
|
+
extract<T extends z.AnyZodObject = typeof defaultExtractSchema>(instructionOrOptions?: string | ExtractOptions<T>): Promise<ExtractResult<T>>;
|
|
83
|
+
observe(instructionOrOptions?: string | ObserveOptions): Promise<ObserveResult[]>;
|
|
84
|
+
/**
|
|
85
|
+
* Get or create a CDP session for the given target.
|
|
86
|
+
* @param target The Page or (OOPIF) Frame you want to talk to.
|
|
87
|
+
*/
|
|
88
|
+
getCDPClient(target?: PlaywrightPage | Frame): Promise<CDPSession>;
|
|
89
|
+
/**
|
|
90
|
+
* Send a CDP command to the chosen DevTools target.
|
|
91
|
+
*
|
|
92
|
+
* @param method Any valid CDP method, e.g. `"DOM.getDocument"`.
|
|
93
|
+
* @param params Command parameters (optional).
|
|
94
|
+
* @param target A `Page` or OOPIF `Frame`. Defaults to the main page.
|
|
95
|
+
*
|
|
96
|
+
* @typeParam T Expected result shape (defaults to `unknown`).
|
|
97
|
+
*/
|
|
98
|
+
sendCDP<T = unknown>(method: string, params?: Record<string, unknown>, target?: PlaywrightPage | Frame): Promise<T>;
|
|
99
|
+
/** Enable a CDP domain (e.g. `"Network"` or `"DOM"`) on the chosen target. */
|
|
100
|
+
enableCDP(domain: string, target?: PlaywrightPage | Frame): Promise<void>;
|
|
101
|
+
/** Disable a CDP domain on the chosen target. */
|
|
102
|
+
disableCDP(domain: string, target?: PlaywrightPage | Frame): Promise<void>;
|
|
103
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { AccessibilityNode, TreeResult, BackendIdMaps, CombinedA11yResult, EncodedId } from "../../types/context";
|
|
2
|
+
import { StagehandPage } from "../StagehandPage";
|
|
3
|
+
import { LogLine } from "../../types/log";
|
|
4
|
+
import { Frame } from "playwright";
|
|
5
|
+
/**
|
|
6
|
+
* Clean a string by removing private-use unicode characters, normalizing whitespace,
|
|
7
|
+
* and trimming the result.
|
|
8
|
+
*
|
|
9
|
+
* @param input - The text to clean, potentially containing PUA and NBSP characters.
|
|
10
|
+
* @returns A cleaned string with PUA characters removed, NBSP variants collapsed,
|
|
11
|
+
* consecutive spaces merged, and leading/trailing whitespace trimmed.
|
|
12
|
+
*/
|
|
13
|
+
export declare function cleanText(input: string): string;
|
|
14
|
+
/**
|
|
15
|
+
* Generate a human-readable, indented outline of an accessibility node tree.
|
|
16
|
+
*
|
|
17
|
+
* @param node - The accessibility node to format, optionally with an encodedId.
|
|
18
|
+
* @param level - The current depth level for indentation (used internally).
|
|
19
|
+
* @returns A string representation of the node and its descendants, with one node per line.
|
|
20
|
+
*/
|
|
21
|
+
export declare function formatSimplifiedTree(node: AccessibilityNode & {
|
|
22
|
+
encodedId?: EncodedId;
|
|
23
|
+
}, level?: number): string;
|
|
24
|
+
/**
|
|
25
|
+
* Build mappings from CDP backendNodeIds to HTML tag names and relative XPaths.
|
|
26
|
+
*
|
|
27
|
+
* @param experimental - Whether to use experimental behaviour.
|
|
28
|
+
* @param sp - The StagehandPage wrapper for Playwright and CDP calls.
|
|
29
|
+
* @param targetFrame - Optional Playwright.Frame whose DOM subtree to map; defaults to main frame.
|
|
30
|
+
* @returns A Promise resolving to BackendIdMaps containing tagNameMap and xpathMap.
|
|
31
|
+
*/
|
|
32
|
+
export declare function buildBackendIdMaps(experimental: boolean, sp: StagehandPage, targetFrame?: Frame): Promise<BackendIdMaps>;
|
|
33
|
+
/**
|
|
34
|
+
* Convert a flat array of AccessibilityNodes into a cleaned, hierarchical tree.
|
|
35
|
+
* Nodes are pruned, structural wrappers removed, and each kept node is stamped
|
|
36
|
+
* with its EncodedId for later lookup or subtree injection.
|
|
37
|
+
*
|
|
38
|
+
* @param nodes - Raw flat list of AX nodes retrieved via CDP.
|
|
39
|
+
* @param tagNameMap - Mapping of EncodedId to HTML tag names for structural decisions.
|
|
40
|
+
* @param logger - Optional function for logging diagnostic messages.
|
|
41
|
+
* @param xpathMap - Optional mapping of EncodedId to relative XPath for element lookup.
|
|
42
|
+
* @returns A Promise resolving to a TreeResult with cleaned tree, simplified text outline,
|
|
43
|
+
* iframe list, URL map, and inherited xpathMap.
|
|
44
|
+
*/
|
|
45
|
+
export declare function buildHierarchicalTree(nodes: AccessibilityNode[], tagNameMap: Record<EncodedId, string>, logger?: (l: LogLine) => void, xpathMap?: Record<EncodedId, string>): Promise<TreeResult>;
|
|
46
|
+
/**
|
|
47
|
+
* Resolve the CDP frame identifier for a Playwright Frame, handling same-process and OOPIF.
|
|
48
|
+
*
|
|
49
|
+
* @param sp - The StagehandPage instance for issuing CDP commands.
|
|
50
|
+
* @param frame - The target Playwright.Frame; undefined or main frame yields undefined.
|
|
51
|
+
* @returns A Promise resolving to the CDP frameId string, or undefined for main document.
|
|
52
|
+
*/
|
|
53
|
+
export declare function getCDPFrameId(sp: StagehandPage, frame?: Frame): Promise<string | undefined>;
|
|
54
|
+
/**
|
|
55
|
+
* Retrieve and build a cleaned accessibility tree for a document or specific iframe.
|
|
56
|
+
* Prunes, formats, and optionally filters by XPath, including scrollable role decoration.
|
|
57
|
+
*
|
|
58
|
+
* @param stagehandPage - The StagehandPage instance for Playwright and CDP interaction.
|
|
59
|
+
* @param logger - Logging function for diagnostics and performance metrics.
|
|
60
|
+
* @param selector - Optional XPath to filter the AX tree to a specific subtree.
|
|
61
|
+
* @param targetFrame - Optional Playwright.Frame to scope the AX tree retrieval.
|
|
62
|
+
* @returns A Promise resolving to a TreeResult with the hierarchical AX tree and related metadata.
|
|
63
|
+
*/
|
|
64
|
+
export declare function getAccessibilityTree(experimental: boolean, stagehandPage: StagehandPage, logger: (log: LogLine) => void, selector?: string, targetFrame?: Frame): Promise<TreeResult>;
|
|
65
|
+
/**
|
|
66
|
+
* Get the backendNodeId of the iframe element that contains a given Playwright.Frame.
|
|
67
|
+
*
|
|
68
|
+
* @param sp - The StagehandPage instance for issuing CDP commands.
|
|
69
|
+
* @param frame - The Playwright.Frame whose host iframe element to locate.
|
|
70
|
+
* @returns A Promise resolving to the backendNodeId of the iframe element, or null if not applicable.
|
|
71
|
+
*/
|
|
72
|
+
export declare function getFrameRootBackendNodeId(sp: StagehandPage, frame: Frame | undefined): Promise<number | null>;
|
|
73
|
+
/**
|
|
74
|
+
* Compute the absolute XPath for the iframe element hosting a given Playwright.Frame.
|
|
75
|
+
*
|
|
76
|
+
* @param frame - The Playwright.Frame whose iframe element to locate.
|
|
77
|
+
* @returns A Promise resolving to the XPath of the iframe element, or "/" if no frame provided.
|
|
78
|
+
*/
|
|
79
|
+
export declare function getFrameRootXpathWithShadow(frame: Frame | undefined): Promise<string>;
|
|
80
|
+
export declare function getFrameRootXpath(frame: Frame | undefined): Promise<string>;
|
|
81
|
+
/**
|
|
82
|
+
* Inject simplified subtree outlines into the main frame outline for nested iframes.
|
|
83
|
+
* Walks the main tree text, looks for EncodedId labels, and inserts matching subtrees.
|
|
84
|
+
*
|
|
85
|
+
* @param tree - The indented AX outline of the main frame.
|
|
86
|
+
* @param idToTree - Map of EncodedId to subtree outlines for nested frames.
|
|
87
|
+
* @returns A single combined text outline with iframe subtrees injected.
|
|
88
|
+
*/
|
|
89
|
+
export declare function injectSubtrees(tree: string, idToTree: Map<EncodedId, string>): string;
|
|
90
|
+
/**
|
|
91
|
+
* Retrieve and merge accessibility trees for the main document and nested iframes.
|
|
92
|
+
* Walks through frame chains if a root XPath is provided, then stitches subtree outlines.
|
|
93
|
+
*
|
|
94
|
+
* @param stagehandPage - The StagehandPage instance for Playwright and CDP interaction.
|
|
95
|
+
* @param logger - Logging function for diagnostics and performance.
|
|
96
|
+
* @param rootXPath - Optional absolute XPath to focus the crawl on a subtree across frames.
|
|
97
|
+
* @returns A Promise resolving to CombinedA11yResult with combined tree text, xpath map, and URL map.
|
|
98
|
+
*/
|
|
99
|
+
export declare function getAccessibilityTreeWithFrames(experimental: boolean, stagehandPage: StagehandPage, logger: (l: LogLine) => void, rootXPath?: string): Promise<CombinedA11yResult>;
|
|
100
|
+
/**
|
|
101
|
+
* `findScrollableElementIds` is a function that identifies elements in
|
|
102
|
+
* the browser that are deemed "scrollable". At a high level, it does the
|
|
103
|
+
* following:
|
|
104
|
+
* - Calls the browser-side `window.getScrollableElementXpaths()` function,
|
|
105
|
+
* which returns a list of XPaths for scrollable containers.
|
|
106
|
+
* - Iterates over the returned list of XPaths, locating each element in the DOM
|
|
107
|
+
* using `stagehandPage.sendCDP(...)`
|
|
108
|
+
* - During each iteration, we call `Runtime.evaluate` to run `document.evaluate(...)`
|
|
109
|
+
* with each XPath, obtaining a `RemoteObject` reference if it exists.
|
|
110
|
+
* - Then, for each valid object reference, we call `DOM.describeNode` to retrieve
|
|
111
|
+
* the element’s `backendNodeId`.
|
|
112
|
+
* - Collects all resulting `backendNodeId`s in a Set and returns them.
|
|
113
|
+
*
|
|
114
|
+
* @param stagehandPage - A StagehandPage instance with built-in CDP helpers.
|
|
115
|
+
* @returns A Promise that resolves to a Set of unique `backendNodeId`s corresponding
|
|
116
|
+
* to scrollable elements in the DOM.
|
|
117
|
+
*/
|
|
118
|
+
export declare function findScrollableElementIds(stagehandPage: StagehandPage, targetFrame?: Frame): Promise<Set<number>>;
|
|
119
|
+
/**
|
|
120
|
+
* Resolve an XPath to a Chrome-DevTools-Protocol (CDP) remote-object ID.
|
|
121
|
+
*
|
|
122
|
+
* @param page A StagehandPage (or Playwright.Page with .sendCDP)
|
|
123
|
+
* @param xpath An absolute or relative XPath
|
|
124
|
+
* @returns The remote objectId for the matched node, or null
|
|
125
|
+
*/
|
|
126
|
+
export declare function resolveObjectIdForXPath(page: StagehandPage, xpath: string, targetFrame?: Frame): Promise<string | null>;
|
|
127
|
+
/**
|
|
128
|
+
* Resolve a chain of iframe frames from an absolute XPath, returning the frame sequence and inner XPath.
|
|
129
|
+
*
|
|
130
|
+
* This helper walks an XPath expression containing iframe steps (e.g., '/html/body/iframe[2]/...'),
|
|
131
|
+
* descending into each matching iframe element to build a frame chain, and returns the leftover
|
|
132
|
+
* XPath segment to evaluate within the context of the last iframe.
|
|
133
|
+
*
|
|
134
|
+
* @param sp - The StagehandPage instance for evaluating XPath and locating frames.
|
|
135
|
+
* @param absPath - An absolute XPath expression starting with '/', potentially including iframe steps.
|
|
136
|
+
* @returns An object containing:
|
|
137
|
+
* frames: Array of Frame objects representing each iframe in the chain.
|
|
138
|
+
* rest: The remaining XPath string to evaluate inside the final iframe.
|
|
139
|
+
* @throws Error if an iframe cannot be found or the final XPath cannot be resolved.
|
|
140
|
+
*/
|
|
141
|
+
export declare function resolveFrameChain(sp: StagehandPage, absPath: string): Promise<{
|
|
142
|
+
frames: Frame[];
|
|
143
|
+
rest: string;
|
|
144
|
+
}>;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { AgentAction, AgentResult, AgentType, AgentExecutionOptions } from "@/types/agent";
|
|
2
|
+
import { ToolSet } from "ai/dist";
|
|
3
|
+
/**
|
|
4
|
+
* Abstract base class for agent clients
|
|
5
|
+
* This provides a common interface for all agent implementations
|
|
6
|
+
*/
|
|
7
|
+
export declare abstract class AgentClient {
|
|
8
|
+
type: AgentType;
|
|
9
|
+
modelName: string;
|
|
10
|
+
clientOptions: Record<string, unknown>;
|
|
11
|
+
userProvidedInstructions?: string;
|
|
12
|
+
constructor(type: AgentType, modelName: string, userProvidedInstructions?: string);
|
|
13
|
+
abstract execute(options: AgentExecutionOptions): Promise<AgentResult>;
|
|
14
|
+
abstract captureScreenshot(options?: Record<string, unknown>): Promise<unknown>;
|
|
15
|
+
abstract setViewport(width: number, height: number): void;
|
|
16
|
+
abstract setCurrentUrl(url: string): void;
|
|
17
|
+
abstract setScreenshotProvider(provider: () => Promise<string>): void;
|
|
18
|
+
abstract setActionHandler(handler: (action: AgentAction) => Promise<void>): void;
|
|
19
|
+
abstract setTools(tools: ToolSet): void;
|
|
20
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { AgentType } from "@/types/agent";
|
|
2
|
+
import { LogLine } from "@/types/log";
|
|
3
|
+
import { ToolSet } from "ai/dist";
|
|
4
|
+
import { AgentClient } from "./AgentClient";
|
|
5
|
+
export declare const modelToAgentProviderMap: Record<string, AgentType>;
|
|
6
|
+
/**
|
|
7
|
+
* Provider for agent clients
|
|
8
|
+
* This class is responsible for creating the appropriate agent client
|
|
9
|
+
* based on the provider type
|
|
10
|
+
*/
|
|
11
|
+
export declare class AgentProvider {
|
|
12
|
+
private logger;
|
|
13
|
+
/**
|
|
14
|
+
* Create a new agent provider
|
|
15
|
+
*/
|
|
16
|
+
constructor(logger: (message: LogLine) => void);
|
|
17
|
+
getClient(modelName: string, clientOptions?: Record<string, unknown>, userProvidedInstructions?: string, tools?: ToolSet): AgentClient;
|
|
18
|
+
static getAgentProvider(modelName: string): AgentType;
|
|
19
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { AgentAction, AgentExecutionOptions, AgentResult, AgentType, AnthropicContentBlock, AnthropicMessage, AnthropicToolResult, ToolUseItem } from "@/types/agent";
|
|
2
|
+
import { LogLine } from "@/types/log";
|
|
3
|
+
import { ToolSet } from "ai";
|
|
4
|
+
import { AgentClient } from "./AgentClient";
|
|
5
|
+
export type ResponseInputItem = AnthropicMessage | AnthropicToolResult;
|
|
6
|
+
/**
|
|
7
|
+
* Client for Anthropic's Computer Use API
|
|
8
|
+
* This implementation uses the official Anthropic Messages API for Computer Use
|
|
9
|
+
*/
|
|
10
|
+
export declare class AnthropicCUAClient extends AgentClient {
|
|
11
|
+
private apiKey;
|
|
12
|
+
private baseURL?;
|
|
13
|
+
private client;
|
|
14
|
+
lastMessageId?: string;
|
|
15
|
+
private currentViewport;
|
|
16
|
+
private currentUrl?;
|
|
17
|
+
private screenshotProvider?;
|
|
18
|
+
private actionHandler?;
|
|
19
|
+
private thinkingBudget;
|
|
20
|
+
private tools?;
|
|
21
|
+
constructor(type: AgentType, modelName: string, userProvidedInstructions?: string, clientOptions?: Record<string, unknown>, tools?: ToolSet);
|
|
22
|
+
setViewport(width: number, height: number): void;
|
|
23
|
+
setCurrentUrl(url: string): void;
|
|
24
|
+
setScreenshotProvider(provider: () => Promise<string>): void;
|
|
25
|
+
setActionHandler(handler: (action: AgentAction) => Promise<void>): void;
|
|
26
|
+
setTools(tools: ToolSet): void;
|
|
27
|
+
/**
|
|
28
|
+
* Execute a task with the Anthropic CUA
|
|
29
|
+
* This is the main entry point for the agent
|
|
30
|
+
* @implements AgentClient.execute
|
|
31
|
+
*/
|
|
32
|
+
execute(executionOptions: AgentExecutionOptions): Promise<AgentResult>;
|
|
33
|
+
executeStep(inputItems: ResponseInputItem[], logger: (message: LogLine) => void): Promise<{
|
|
34
|
+
actions: AgentAction[];
|
|
35
|
+
message: string;
|
|
36
|
+
completed: boolean;
|
|
37
|
+
nextInputItems: ResponseInputItem[];
|
|
38
|
+
usage: {
|
|
39
|
+
input_tokens: number;
|
|
40
|
+
output_tokens: number;
|
|
41
|
+
inference_time_ms: number;
|
|
42
|
+
};
|
|
43
|
+
}>;
|
|
44
|
+
private createInitialInputItems;
|
|
45
|
+
getAction(inputItems: ResponseInputItem[]): Promise<{
|
|
46
|
+
content: AnthropicContentBlock[];
|
|
47
|
+
id: string;
|
|
48
|
+
usage: Record<string, number>;
|
|
49
|
+
}>;
|
|
50
|
+
takeAction(toolUseItems: ToolUseItem[], logger: (message: LogLine) => void): Promise<AnthropicToolResult[]>;
|
|
51
|
+
private convertToolUseToAction;
|
|
52
|
+
captureScreenshot(options?: {
|
|
53
|
+
base64Image?: string;
|
|
54
|
+
currentUrl?: string;
|
|
55
|
+
}): Promise<string>;
|
|
56
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { LogLine } from "../../types/log";
|
|
2
|
+
import { AgentAction, AgentResult, AgentType, AgentExecutionOptions } from "@/types/agent";
|
|
3
|
+
import { AgentClient } from "./AgentClient";
|
|
4
|
+
/**
|
|
5
|
+
* Client for Google's Computer Use Assistant API
|
|
6
|
+
* This implementation uses the Google Generative AI SDK for Computer Use
|
|
7
|
+
*/
|
|
8
|
+
export declare class GoogleCUAClient extends AgentClient {
|
|
9
|
+
private apiKey;
|
|
10
|
+
private client;
|
|
11
|
+
private currentViewport;
|
|
12
|
+
private currentUrl?;
|
|
13
|
+
private screenshotProvider?;
|
|
14
|
+
private actionHandler?;
|
|
15
|
+
private history;
|
|
16
|
+
private environment;
|
|
17
|
+
private generateContentConfig;
|
|
18
|
+
constructor(type: AgentType, modelName: string, userProvidedInstructions?: string, clientOptions?: Record<string, unknown>);
|
|
19
|
+
setViewport(width: number, height: number): void;
|
|
20
|
+
setCurrentUrl(url: string): void;
|
|
21
|
+
setScreenshotProvider(provider: () => Promise<string>): void;
|
|
22
|
+
setActionHandler(handler: (action: AgentAction) => Promise<void>): void;
|
|
23
|
+
setTools(): void;
|
|
24
|
+
/**
|
|
25
|
+
* Execute a task with the Google CUA
|
|
26
|
+
* This is the main entry point for the agent
|
|
27
|
+
* @implements AgentClient.execute
|
|
28
|
+
*/
|
|
29
|
+
execute(executionOptions: AgentExecutionOptions): Promise<AgentResult>;
|
|
30
|
+
/**
|
|
31
|
+
* Initialize conversation history with the initial instruction
|
|
32
|
+
*/
|
|
33
|
+
private initializeHistory;
|
|
34
|
+
/**
|
|
35
|
+
* Execute a single step of the agent
|
|
36
|
+
*/
|
|
37
|
+
executeStep(logger: (message: LogLine) => void): Promise<{
|
|
38
|
+
actions: AgentAction[];
|
|
39
|
+
message: string;
|
|
40
|
+
completed: boolean;
|
|
41
|
+
usage: {
|
|
42
|
+
input_tokens: number;
|
|
43
|
+
output_tokens: number;
|
|
44
|
+
inference_time_ms: number;
|
|
45
|
+
};
|
|
46
|
+
}>;
|
|
47
|
+
/**
|
|
48
|
+
* Process the response from Google's API
|
|
49
|
+
*/
|
|
50
|
+
private processResponse;
|
|
51
|
+
/**
|
|
52
|
+
* Convert Google function call to Stagehand action
|
|
53
|
+
*/
|
|
54
|
+
private convertFunctionCallToAction;
|
|
55
|
+
/**
|
|
56
|
+
* Normalize coordinates from Google's 0-1000 range to actual viewport dimensions
|
|
57
|
+
*/
|
|
58
|
+
private normalizeCoordinates;
|
|
59
|
+
captureScreenshot(options?: {
|
|
60
|
+
base64Image?: string;
|
|
61
|
+
currentUrl?: string;
|
|
62
|
+
}): Promise<string>;
|
|
63
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { LogLine } from "../../types/log";
|
|
2
|
+
import { AgentAction, AgentResult, AgentType, AgentExecutionOptions, ResponseInputItem, ResponseItem } from "@/types/agent";
|
|
3
|
+
import { AgentClient } from "./AgentClient";
|
|
4
|
+
import { ToolSet } from "ai/dist";
|
|
5
|
+
/**
|
|
6
|
+
* Client for OpenAI's Computer Use Assistant API
|
|
7
|
+
* This implementation uses the official OpenAI Responses API for Computer Use
|
|
8
|
+
*/
|
|
9
|
+
export declare class OpenAICUAClient extends AgentClient {
|
|
10
|
+
private apiKey;
|
|
11
|
+
private organization?;
|
|
12
|
+
private baseURL;
|
|
13
|
+
private client;
|
|
14
|
+
lastResponseId?: string;
|
|
15
|
+
private currentViewport;
|
|
16
|
+
private currentUrl?;
|
|
17
|
+
private screenshotProvider?;
|
|
18
|
+
private actionHandler?;
|
|
19
|
+
private reasoningItems;
|
|
20
|
+
private environment;
|
|
21
|
+
private tools?;
|
|
22
|
+
constructor(type: AgentType, modelName: string, userProvidedInstructions?: string, clientOptions?: Record<string, unknown>, tools?: ToolSet);
|
|
23
|
+
setViewport(width: number, height: number): void;
|
|
24
|
+
setCurrentUrl(url: string): void;
|
|
25
|
+
setScreenshotProvider(provider: () => Promise<string>): void;
|
|
26
|
+
setActionHandler(handler: (action: AgentAction) => Promise<void>): void;
|
|
27
|
+
setTools(tools: ToolSet): void;
|
|
28
|
+
/**
|
|
29
|
+
* Execute a task with the OpenAI CUA
|
|
30
|
+
* This is the main entry point for the agent
|
|
31
|
+
* @implements AgentClient.execute
|
|
32
|
+
*/
|
|
33
|
+
execute(executionOptions: AgentExecutionOptions): Promise<AgentResult>;
|
|
34
|
+
/**
|
|
35
|
+
* Execute a single step of the agent
|
|
36
|
+
* This coordinates the flow: Request → Get Action → Execute Action
|
|
37
|
+
*/
|
|
38
|
+
executeStep(inputItems: ResponseInputItem[], previousResponseId: string | undefined, logger: (message: LogLine) => void): Promise<{
|
|
39
|
+
actions: AgentAction[];
|
|
40
|
+
message: string;
|
|
41
|
+
completed: boolean;
|
|
42
|
+
nextInputItems: ResponseInputItem[];
|
|
43
|
+
responseId: string;
|
|
44
|
+
usage: {
|
|
45
|
+
input_tokens: number;
|
|
46
|
+
output_tokens: number;
|
|
47
|
+
inference_time_ms: number;
|
|
48
|
+
};
|
|
49
|
+
}>;
|
|
50
|
+
private isComputerCallItem;
|
|
51
|
+
private isFunctionCallItem;
|
|
52
|
+
private createInitialInputItems;
|
|
53
|
+
getAction(inputItems: ResponseInputItem[], previousResponseId?: string): Promise<{
|
|
54
|
+
output: ResponseItem[];
|
|
55
|
+
responseId: string;
|
|
56
|
+
usage: Record<string, number>;
|
|
57
|
+
}>;
|
|
58
|
+
takeAction(output: ResponseItem[], logger: (message: LogLine) => void): Promise<ResponseInputItem[]>;
|
|
59
|
+
private convertComputerCallToAction;
|
|
60
|
+
private convertFunctionCallToAction;
|
|
61
|
+
captureScreenshot(options?: {
|
|
62
|
+
base64Image?: string;
|
|
63
|
+
currentUrl?: string;
|
|
64
|
+
}): Promise<string>;
|
|
65
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { LogLine } from "@/types/log";
|
|
2
|
+
import { AgentExecuteOptions, AgentResult } from "@/types/agent";
|
|
3
|
+
import { AgentClient } from "./AgentClient";
|
|
4
|
+
/**
|
|
5
|
+
* Main interface for agent operations in Stagehand
|
|
6
|
+
* This class provides methods for executing tasks with an agent
|
|
7
|
+
*/
|
|
8
|
+
export declare class StagehandAgent {
|
|
9
|
+
private client;
|
|
10
|
+
private logger;
|
|
11
|
+
constructor(client: AgentClient, logger: (message: LogLine) => void);
|
|
12
|
+
execute(optionsOrInstruction: AgentExecuteOptions | string): Promise<AgentResult>;
|
|
13
|
+
getModelName(): string;
|
|
14
|
+
getAgentType(): string;
|
|
15
|
+
}
|