@browserbasehq/stagehand 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cache/ActionCache.ts +158 -0
- package/lib/cache/BaseCache.ts +553 -0
- package/lib/cache/LLMCache.ts +48 -0
- package/lib/cache.ts +99 -0
- package/lib/dom/build/index.js +626 -0
- package/lib/dom/build/scriptContent.ts +1 -0
- package/lib/dom/debug.ts +147 -0
- package/lib/dom/genDomScripts.ts +29 -0
- package/lib/dom/global.d.ts +25 -0
- package/lib/dom/index.ts +3 -0
- package/lib/dom/process.ts +441 -0
- package/lib/dom/utils.ts +17 -0
- package/lib/dom/xpathUtils.ts +246 -0
- package/lib/handlers/actHandler.ts +1421 -0
- package/lib/handlers/extractHandler.ts +179 -0
- package/lib/handlers/observeHandler.ts +170 -0
- package/lib/index.ts +900 -0
- package/lib/inference.ts +324 -0
- package/lib/llm/AnthropicClient.ts +314 -0
- package/lib/llm/LLMClient.ts +66 -0
- package/lib/llm/LLMProvider.ts +81 -0
- package/lib/llm/OpenAIClient.ts +206 -0
- package/lib/prompt.ts +341 -0
- package/lib/utils.ts +16 -0
- package/lib/vision.ts +299 -0
- package/package.json +3 -3
package/lib/vision.ts
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
import { Page } from "@playwright/test";
|
|
2
|
+
import { exec } from "child_process";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
import path from "path";
|
|
5
|
+
import sharp from "sharp";
|
|
6
|
+
import { LogLine } from "../types/log";
|
|
7
|
+
import { logLineToString } from "./utils";
|
|
8
|
+
|
|
9
|
+
type AnnotationBox = {
|
|
10
|
+
x: number;
|
|
11
|
+
y: number;
|
|
12
|
+
width: number;
|
|
13
|
+
height: number;
|
|
14
|
+
id: string;
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
type NumberPosition = {
|
|
18
|
+
x: number;
|
|
19
|
+
y: number;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export class ScreenshotService {
|
|
23
|
+
private page: Page;
|
|
24
|
+
private selectorMap: Record<number, string[]>;
|
|
25
|
+
private annotationBoxes: AnnotationBox[] = [];
|
|
26
|
+
private numberPositions: NumberPosition[] = [];
|
|
27
|
+
private isDebugEnabled: boolean;
|
|
28
|
+
private verbose: 0 | 1 | 2;
|
|
29
|
+
private externalLogger?: (logLine: LogLine) => void;
|
|
30
|
+
|
|
31
|
+
constructor(
|
|
32
|
+
page: Page,
|
|
33
|
+
selectorMap: Record<number, string[]>,
|
|
34
|
+
verbose: 0 | 1 | 2,
|
|
35
|
+
externalLogger?: (logLine: LogLine) => void,
|
|
36
|
+
isDebugEnabled: boolean = false,
|
|
37
|
+
) {
|
|
38
|
+
this.page = page;
|
|
39
|
+
this.selectorMap = selectorMap;
|
|
40
|
+
this.isDebugEnabled = isDebugEnabled;
|
|
41
|
+
this.verbose = verbose;
|
|
42
|
+
this.externalLogger = externalLogger;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
log(logLine: LogLine) {
|
|
46
|
+
if (this.verbose >= logLine.level) {
|
|
47
|
+
console.log(logLineToString(logLine));
|
|
48
|
+
}
|
|
49
|
+
if (this.externalLogger) {
|
|
50
|
+
this.externalLogger(logLine);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async getScreenshot(
|
|
55
|
+
fullpage: boolean = true,
|
|
56
|
+
quality?: number,
|
|
57
|
+
): Promise<Buffer> {
|
|
58
|
+
if (quality && (quality < 0 || quality > 100)) {
|
|
59
|
+
throw new Error("quality must be between 0 and 100");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return await this.page.screenshot({
|
|
63
|
+
fullPage: fullpage,
|
|
64
|
+
quality,
|
|
65
|
+
type: "jpeg",
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async getScreenshotPixelCount(screenshot: Buffer): Promise<number> {
|
|
70
|
+
const image = sharp(screenshot);
|
|
71
|
+
const metadata = await image.metadata();
|
|
72
|
+
|
|
73
|
+
if (!metadata.width || !metadata.height) {
|
|
74
|
+
this.log({
|
|
75
|
+
category: "screenshotService",
|
|
76
|
+
message: "Unable to determine image dimensions.",
|
|
77
|
+
level: 0,
|
|
78
|
+
auxiliary: {
|
|
79
|
+
width: {
|
|
80
|
+
value: metadata.width?.toString() ?? "undefined",
|
|
81
|
+
type: "string", // might be undefined
|
|
82
|
+
},
|
|
83
|
+
height: {
|
|
84
|
+
value: metadata.height?.toString() ?? "undefined",
|
|
85
|
+
type: "string", // might be undefined
|
|
86
|
+
},
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
throw new Error("Unable to determine image dimensions.");
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const pixelCount = metadata.width * metadata.height;
|
|
93
|
+
this.log({
|
|
94
|
+
category: "screenshotService",
|
|
95
|
+
message: "got screenshot pixel count",
|
|
96
|
+
level: 1,
|
|
97
|
+
auxiliary: {
|
|
98
|
+
pixelCount: {
|
|
99
|
+
value: pixelCount.toString(),
|
|
100
|
+
type: "integer",
|
|
101
|
+
},
|
|
102
|
+
},
|
|
103
|
+
});
|
|
104
|
+
return pixelCount;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async getAnnotatedScreenshot(fullpage: boolean): Promise<Buffer> {
|
|
108
|
+
this.annotationBoxes = [];
|
|
109
|
+
this.numberPositions = [];
|
|
110
|
+
|
|
111
|
+
const screenshot = await this.getScreenshot(fullpage);
|
|
112
|
+
const image = sharp(screenshot);
|
|
113
|
+
|
|
114
|
+
const { width, height } = await image.metadata();
|
|
115
|
+
this.log({
|
|
116
|
+
category: "screenshotService",
|
|
117
|
+
message: "annotating screenshot",
|
|
118
|
+
level: 2,
|
|
119
|
+
auxiliary: {
|
|
120
|
+
selectorMap: {
|
|
121
|
+
value: JSON.stringify(this.selectorMap),
|
|
122
|
+
type: "object",
|
|
123
|
+
},
|
|
124
|
+
},
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
const svgAnnotations = await Promise.all(
|
|
128
|
+
Object.entries(this.selectorMap).map(async ([id, selectors]) =>
|
|
129
|
+
this.createElementAnnotation(id, selectors),
|
|
130
|
+
),
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
const scrollPosition = await this.page.evaluate(() => {
|
|
134
|
+
return {
|
|
135
|
+
scrollX: window.scrollX,
|
|
136
|
+
scrollY: window.scrollY,
|
|
137
|
+
};
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
const svg = `
|
|
141
|
+
<svg width="${width}" height="${height}" xmlns="http://www.w3.org/2000/svg" style="position:absolute;left:${-scrollPosition.scrollX}px;top:${-scrollPosition.scrollY}px;">
|
|
142
|
+
${svgAnnotations.join("")}
|
|
143
|
+
</svg>
|
|
144
|
+
`;
|
|
145
|
+
|
|
146
|
+
const annotatedScreenshot = await image
|
|
147
|
+
.composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
|
|
148
|
+
.toBuffer();
|
|
149
|
+
|
|
150
|
+
if (this.isDebugEnabled) {
|
|
151
|
+
await this.saveAndOpenScreenshot(annotatedScreenshot);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return annotatedScreenshot;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
private async createElementAnnotation(
|
|
158
|
+
id: string,
|
|
159
|
+
selectors: string[],
|
|
160
|
+
): Promise<string> {
|
|
161
|
+
try {
|
|
162
|
+
let element = null;
|
|
163
|
+
|
|
164
|
+
// Try each selector until one works
|
|
165
|
+
const selectorPromises: Promise<any | null>[] = selectors.map(
|
|
166
|
+
async (selector) => {
|
|
167
|
+
try {
|
|
168
|
+
element = await this.page.locator(`xpath=${selector}`).first();
|
|
169
|
+
const box = await element.boundingBox({ timeout: 5_000 });
|
|
170
|
+
return box;
|
|
171
|
+
} catch (e) {
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
},
|
|
175
|
+
);
|
|
176
|
+
|
|
177
|
+
const boxes = await Promise.all(selectorPromises);
|
|
178
|
+
const box = boxes.find((b) => b !== null);
|
|
179
|
+
|
|
180
|
+
if (!box) {
|
|
181
|
+
throw new Error(`Unable to create annotation for element ${id}`);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const scrollPosition = await this.page.evaluate(() => ({
|
|
185
|
+
scrollX: window.scrollX,
|
|
186
|
+
scrollY: window.scrollY,
|
|
187
|
+
}));
|
|
188
|
+
|
|
189
|
+
const adjustedBox = {
|
|
190
|
+
x: box.x + scrollPosition.scrollX,
|
|
191
|
+
y: box.y + scrollPosition.scrollY,
|
|
192
|
+
width: box.width,
|
|
193
|
+
height: box.height,
|
|
194
|
+
id: id,
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
this.annotationBoxes.push(adjustedBox);
|
|
198
|
+
|
|
199
|
+
const numberPosition = this.findNonOverlappingNumberPosition(adjustedBox);
|
|
200
|
+
|
|
201
|
+
const circleRadius = 12;
|
|
202
|
+
|
|
203
|
+
return `
|
|
204
|
+
<rect x="${adjustedBox.x}" y="${adjustedBox.y}" width="${adjustedBox.width}" height="${adjustedBox.height}"
|
|
205
|
+
fill="none" stroke="red" stroke-width="2" />
|
|
206
|
+
<circle cx="${numberPosition.x}" cy="${numberPosition.y}" r="${circleRadius}" fill="white" stroke="red" stroke-width="2" />
|
|
207
|
+
<text x="${numberPosition.x}" y="${numberPosition.y}" fill="red" font-size="16" font-weight="bold"
|
|
208
|
+
text-anchor="middle" dominant-baseline="central">
|
|
209
|
+
${id}
|
|
210
|
+
</text>
|
|
211
|
+
`;
|
|
212
|
+
} catch (error) {
|
|
213
|
+
this.log({
|
|
214
|
+
category: "screenshotService",
|
|
215
|
+
message: "warning: failed to create annotation for element",
|
|
216
|
+
level: 1,
|
|
217
|
+
auxiliary: {
|
|
218
|
+
element_id: {
|
|
219
|
+
value: id,
|
|
220
|
+
type: "string",
|
|
221
|
+
},
|
|
222
|
+
error: {
|
|
223
|
+
value: error.message,
|
|
224
|
+
type: "string",
|
|
225
|
+
},
|
|
226
|
+
trace: {
|
|
227
|
+
value: error.stack,
|
|
228
|
+
type: "string",
|
|
229
|
+
},
|
|
230
|
+
},
|
|
231
|
+
});
|
|
232
|
+
return "";
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
private findNonOverlappingNumberPosition(box: AnnotationBox): NumberPosition {
|
|
237
|
+
const circleRadius = 12;
|
|
238
|
+
let position: NumberPosition = {
|
|
239
|
+
x: box.x - circleRadius,
|
|
240
|
+
y: box.y - circleRadius,
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
let attempts = 0;
|
|
244
|
+
const maxAttempts = 10;
|
|
245
|
+
const offset = 5;
|
|
246
|
+
|
|
247
|
+
while (this.isNumberOverlapping(position) && attempts < maxAttempts) {
|
|
248
|
+
position.y += offset;
|
|
249
|
+
attempts++;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
this.numberPositions.push(position);
|
|
253
|
+
return position;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
private isNumberOverlapping(position: NumberPosition): boolean {
|
|
257
|
+
const circleRadius = 12;
|
|
258
|
+
return this.numberPositions.some(
|
|
259
|
+
(existingPosition) =>
|
|
260
|
+
Math.sqrt(
|
|
261
|
+
Math.pow(position.x - existingPosition.x, 2) +
|
|
262
|
+
Math.pow(position.y - existingPosition.y, 2),
|
|
263
|
+
) <
|
|
264
|
+
circleRadius * 2,
|
|
265
|
+
);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
async saveAndOpenScreenshot(screenshot: Buffer): Promise<void> {
|
|
269
|
+
const screenshotDir = path.join(process.cwd(), "screenshots");
|
|
270
|
+
if (!fs.existsSync(screenshotDir)) {
|
|
271
|
+
fs.mkdirSync(screenshotDir);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
275
|
+
const filename = path.join(screenshotDir, `screenshot-${timestamp}.png`);
|
|
276
|
+
|
|
277
|
+
fs.writeFileSync(filename, screenshot);
|
|
278
|
+
this.log({
|
|
279
|
+
category: "screenshotService",
|
|
280
|
+
message: "screenshot saved",
|
|
281
|
+
level: 1,
|
|
282
|
+
auxiliary: {
|
|
283
|
+
filename: {
|
|
284
|
+
value: filename,
|
|
285
|
+
type: "string",
|
|
286
|
+
},
|
|
287
|
+
},
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
// Open the screenshot with the default image viewer
|
|
291
|
+
if (process.platform === "win32") {
|
|
292
|
+
exec(`start ${filename}`);
|
|
293
|
+
} else if (process.platform === "darwin") {
|
|
294
|
+
exec(`open ${filename}`);
|
|
295
|
+
} else {
|
|
296
|
+
exec(`xdg-open ${filename}`);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@browserbasehq/stagehand",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.5.0",
|
|
4
4
|
"description": "An AI web browsing framework focused on simplicity and extensibility.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.js",
|
|
@@ -16,12 +16,12 @@
|
|
|
16
16
|
"build-types": "tsc --emitDeclarationOnly --outDir dist",
|
|
17
17
|
"build-js": "tsup lib/index.ts --dts",
|
|
18
18
|
"build": "npm run build-dom-scripts && npm run build-js && npm run build-types",
|
|
19
|
-
"postinstall": "npm run build",
|
|
20
19
|
"release": "npm run build && changeset publish",
|
|
21
20
|
"release-canary": "npm run build && changeset version --snapshot && changeset publish --tag alpha"
|
|
22
21
|
},
|
|
23
22
|
"files": [
|
|
24
|
-
"dist/**"
|
|
23
|
+
"dist/**",
|
|
24
|
+
"lib/**"
|
|
25
25
|
],
|
|
26
26
|
"keywords": [],
|
|
27
27
|
"author": "Paul Klein IV",
|