@browserbasehq/stagehand 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,179 @@
1
+ import { LLMProvider } from "../llm/LLMProvider";
2
+ import { Stagehand } from "../index";
3
+ import { z } from "zod";
4
+ import { LogLine } from "../../types/log";
5
+ import { extract } from "../inference";
6
+ import { LLMClient } from "../llm/LLMClient";
7
+
8
+ export class StagehandExtractHandler {
9
+ private readonly stagehand: Stagehand;
10
+
11
+ private readonly logger: (logLine: LogLine) => void;
12
+ private readonly waitForSettledDom: (
13
+ domSettleTimeoutMs?: number,
14
+ ) => Promise<void>;
15
+ private readonly startDomDebug: () => Promise<void>;
16
+ private readonly cleanupDomDebug: () => Promise<void>;
17
+ private readonly llmProvider: LLMProvider;
18
+ private readonly llmClient: LLMClient;
19
+ private readonly verbose: 0 | 1 | 2;
20
+
21
+ constructor({
22
+ stagehand,
23
+ logger,
24
+ waitForSettledDom,
25
+ startDomDebug,
26
+ cleanupDomDebug,
27
+ llmProvider,
28
+ llmClient,
29
+ verbose,
30
+ }: {
31
+ stagehand: Stagehand;
32
+ logger: (message: {
33
+ category?: string;
34
+ message: string;
35
+ level?: number;
36
+ auxiliary?: { [key: string]: { value: string; type: string } };
37
+ }) => void;
38
+ waitForSettledDom: (domSettleTimeoutMs?: number) => Promise<void>;
39
+ startDomDebug: () => Promise<void>;
40
+ cleanupDomDebug: () => Promise<void>;
41
+ llmProvider: LLMProvider;
42
+ llmClient: LLMClient;
43
+ verbose: 0 | 1 | 2;
44
+ }) {
45
+ this.stagehand = stagehand;
46
+ this.logger = logger;
47
+ this.waitForSettledDom = waitForSettledDom;
48
+ this.startDomDebug = startDomDebug;
49
+ this.cleanupDomDebug = cleanupDomDebug;
50
+ this.llmProvider = llmProvider;
51
+ this.llmClient = llmClient;
52
+ this.verbose = verbose;
53
+ }
54
+
55
+ public async extract<T extends z.AnyZodObject>({
56
+ instruction,
57
+ schema,
58
+ progress = "",
59
+ content = {},
60
+ chunksSeen = [],
61
+ llmClient,
62
+ requestId,
63
+ domSettleTimeoutMs,
64
+ }: {
65
+ instruction: string;
66
+ schema: T;
67
+ progress?: string;
68
+ content?: z.infer<T>;
69
+ chunksSeen?: Array<number>;
70
+ llmClient: LLMClient;
71
+ requestId?: string;
72
+ domSettleTimeoutMs?: number;
73
+ }): Promise<z.infer<T>> {
74
+ this.logger({
75
+ category: "extraction",
76
+ message: "starting extraction",
77
+ level: 1,
78
+ auxiliary: {
79
+ instruction: {
80
+ value: instruction,
81
+ type: "string",
82
+ },
83
+ },
84
+ });
85
+
86
+ await this.waitForSettledDom(domSettleTimeoutMs);
87
+ await this.startDomDebug();
88
+ const { outputString, chunk, chunks } = await this.stagehand.page.evaluate(
89
+ (chunksSeen?: number[]) => window.processDom(chunksSeen ?? []),
90
+ chunksSeen,
91
+ );
92
+
93
+ this.logger({
94
+ category: "extraction",
95
+ message: "received output from processDom.",
96
+ auxiliary: {
97
+ chunk: {
98
+ value: chunk.toString(),
99
+ type: "integer",
100
+ },
101
+ chunks_left: {
102
+ value: (chunks.length - chunksSeen.length).toString(),
103
+ type: "integer",
104
+ },
105
+ chunks_total: {
106
+ value: chunks.length.toString(),
107
+ type: "integer",
108
+ },
109
+ },
110
+ });
111
+
112
+ const extractionResponse = await extract({
113
+ instruction,
114
+ progress,
115
+ previouslyExtractedContent: content,
116
+ domElements: outputString,
117
+ schema,
118
+ llmClient,
119
+ chunksSeen: chunksSeen.length,
120
+ chunksTotal: chunks.length,
121
+ requestId,
122
+ });
123
+
124
+ const {
125
+ metadata: { progress: newProgress, completed },
126
+ ...output
127
+ } = extractionResponse;
128
+ await this.cleanupDomDebug();
129
+
130
+ this.logger({
131
+ category: "extraction",
132
+ message: "received extraction response",
133
+ auxiliary: {
134
+ extraction_response: {
135
+ value: JSON.stringify(extractionResponse),
136
+ type: "object",
137
+ },
138
+ },
139
+ });
140
+
141
+ chunksSeen.push(chunk);
142
+
143
+ if (completed || chunksSeen.length === chunks.length) {
144
+ this.logger({
145
+ category: "extraction",
146
+ message: "got response",
147
+ auxiliary: {
148
+ extraction_response: {
149
+ value: JSON.stringify(extractionResponse),
150
+ type: "object",
151
+ },
152
+ },
153
+ });
154
+
155
+ return output;
156
+ } else {
157
+ this.logger({
158
+ category: "extraction",
159
+ message: "continuing extraction",
160
+ auxiliary: {
161
+ extraction_response: {
162
+ value: JSON.stringify(extractionResponse),
163
+ type: "object",
164
+ },
165
+ },
166
+ });
167
+ await this.waitForSettledDom(domSettleTimeoutMs);
168
+ return this.extract({
169
+ instruction,
170
+ schema,
171
+ progress: newProgress,
172
+ content: output,
173
+ chunksSeen,
174
+ llmClient,
175
+ domSettleTimeoutMs,
176
+ });
177
+ }
178
+ }
179
+ }
@@ -0,0 +1,170 @@
1
+ import { LogLine } from "../../types/log";
2
+ import { Stagehand } from "../index";
3
+ import { observe } from "../inference";
4
+ import { LLMClient } from "../llm/LLMClient";
5
+ import { LLMProvider } from "../llm/LLMProvider";
6
+ import { generateId } from "../utils";
7
+ import { ScreenshotService } from "../vision";
8
+
9
+ export class StagehandObserveHandler {
10
+ private readonly stagehand: Stagehand;
11
+ private readonly logger: (logLine: LogLine) => void;
12
+ private readonly waitForSettledDom: (
13
+ domSettleTimeoutMs?: number,
14
+ ) => Promise<void>;
15
+ private readonly startDomDebug: () => Promise<void>;
16
+ private readonly cleanupDomDebug: () => Promise<void>;
17
+ private readonly llmProvider: LLMProvider;
18
+ private readonly verbose: 0 | 1 | 2;
19
+ private readonly llmClient: LLMClient;
20
+ private observations: {
21
+ [key: string]: {
22
+ result: { selector: string; description: string }[];
23
+ instruction: string;
24
+ };
25
+ };
26
+
27
+ constructor({
28
+ stagehand,
29
+ logger,
30
+ waitForSettledDom,
31
+ startDomDebug,
32
+ cleanupDomDebug,
33
+ llmProvider,
34
+ verbose,
35
+ llmClient,
36
+ }: {
37
+ stagehand: Stagehand;
38
+ logger: (logLine: LogLine) => void;
39
+ waitForSettledDom: (domSettleTimeoutMs?: number) => Promise<void>;
40
+ startDomDebug: () => Promise<void>;
41
+ cleanupDomDebug: () => Promise<void>;
42
+ llmProvider: LLMProvider;
43
+ verbose: 0 | 1 | 2;
44
+ llmClient: LLMClient;
45
+ }) {
46
+ this.stagehand = stagehand;
47
+ this.logger = logger;
48
+ this.waitForSettledDom = waitForSettledDom;
49
+ this.startDomDebug = startDomDebug;
50
+ this.cleanupDomDebug = cleanupDomDebug;
51
+ this.llmProvider = llmProvider;
52
+ this.verbose = verbose;
53
+ this.llmClient = llmClient;
54
+ this.observations = {};
55
+ }
56
+
57
+ private async _recordObservation(
58
+ instruction: string,
59
+ result: { selector: string; description: string }[],
60
+ ): Promise<string> {
61
+ const id = generateId(instruction);
62
+
63
+ this.observations[id] = { result, instruction };
64
+
65
+ return id;
66
+ }
67
+
68
+ public async observe({
69
+ instruction,
70
+ useVision,
71
+ fullPage,
72
+ llmClient,
73
+ requestId,
74
+ domSettleTimeoutMs,
75
+ }: {
76
+ instruction: string;
77
+ useVision: boolean;
78
+ fullPage: boolean;
79
+ llmClient: LLMClient;
80
+ requestId?: string;
81
+ domSettleTimeoutMs?: number;
82
+ }): Promise<{ selector: string; description: string }[]> {
83
+ if (!instruction) {
84
+ instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`;
85
+ }
86
+ this.logger({
87
+ category: "observation",
88
+ message: "starting observation",
89
+ level: 1,
90
+ auxiliary: {
91
+ instruction: {
92
+ value: instruction,
93
+ type: "string",
94
+ },
95
+ },
96
+ });
97
+
98
+ await this.waitForSettledDom(domSettleTimeoutMs);
99
+ await this.startDomDebug();
100
+ let { outputString, selectorMap } = await this.stagehand.page.evaluate(
101
+ (fullPage: boolean) =>
102
+ fullPage ? window.processAllOfDom() : window.processDom([]),
103
+ fullPage,
104
+ );
105
+
106
+ let annotatedScreenshot: Buffer | undefined;
107
+ if (useVision === true) {
108
+ if (!llmClient.hasVision) {
109
+ this.logger({
110
+ category: "observation",
111
+ message: "Model does not support vision. Skipping vision processing.",
112
+ level: 1,
113
+ auxiliary: {
114
+ model: {
115
+ value: llmClient.modelName,
116
+ type: "string",
117
+ },
118
+ },
119
+ });
120
+ } else {
121
+ const screenshotService = new ScreenshotService(
122
+ this.stagehand.page,
123
+ selectorMap,
124
+ this.verbose,
125
+ this.logger,
126
+ );
127
+
128
+ annotatedScreenshot =
129
+ await screenshotService.getAnnotatedScreenshot(fullPage);
130
+ outputString = "n/a. use the image to find the elements.";
131
+ }
132
+ }
133
+
134
+ const observationResponse = await observe({
135
+ instruction,
136
+ domElements: outputString,
137
+ llmClient,
138
+ image: annotatedScreenshot,
139
+ requestId,
140
+ });
141
+
142
+ const elementsWithSelectors = observationResponse.elements.map(
143
+ (element) => {
144
+ const { elementId, ...rest } = element;
145
+
146
+ return {
147
+ ...rest,
148
+ selector: `xpath=${selectorMap[elementId][0]}`,
149
+ };
150
+ },
151
+ );
152
+
153
+ await this.cleanupDomDebug();
154
+
155
+ this.logger({
156
+ category: "observation",
157
+ message: "found elements",
158
+ level: 1,
159
+ auxiliary: {
160
+ elements: {
161
+ value: JSON.stringify(elementsWithSelectors),
162
+ type: "object",
163
+ },
164
+ },
165
+ });
166
+
167
+ await this._recordObservation(instruction, elementsWithSelectors);
168
+ return elementsWithSelectors;
169
+ }
170
+ }