@browserbasehq/stagehand 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +475 -0
- package/dist/dom/build/debug.js +115 -0
- package/dist/dom/build/index.js +474 -0
- package/dist/dom/build/process.js +342 -0
- package/dist/dom/build/types.js +2 -0
- package/dist/dom/build/utils.js +19 -0
- package/dist/index.js +1970 -0
- package/package.json +70 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,1970 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __defProps = Object.defineProperties;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropDescs = Object.getOwnPropertyDescriptors;
|
|
6
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
|
+
var __getOwnPropSymbols = Object.getOwnPropertySymbols;
|
|
8
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
9
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
10
|
+
var __propIsEnum = Object.prototype.propertyIsEnumerable;
|
|
11
|
+
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
|
|
12
|
+
var __spreadValues = (a, b) => {
|
|
13
|
+
for (var prop in b || (b = {}))
|
|
14
|
+
if (__hasOwnProp.call(b, prop))
|
|
15
|
+
__defNormalProp(a, prop, b[prop]);
|
|
16
|
+
if (__getOwnPropSymbols)
|
|
17
|
+
for (var prop of __getOwnPropSymbols(b)) {
|
|
18
|
+
if (__propIsEnum.call(b, prop))
|
|
19
|
+
__defNormalProp(a, prop, b[prop]);
|
|
20
|
+
}
|
|
21
|
+
return a;
|
|
22
|
+
};
|
|
23
|
+
var __spreadProps = (a, b) => __defProps(a, __getOwnPropDescs(b));
|
|
24
|
+
var __objRest = (source, exclude) => {
|
|
25
|
+
var target = {};
|
|
26
|
+
for (var prop in source)
|
|
27
|
+
if (__hasOwnProp.call(source, prop) && exclude.indexOf(prop) < 0)
|
|
28
|
+
target[prop] = source[prop];
|
|
29
|
+
if (source != null && __getOwnPropSymbols)
|
|
30
|
+
for (var prop of __getOwnPropSymbols(source)) {
|
|
31
|
+
if (exclude.indexOf(prop) < 0 && __propIsEnum.call(source, prop))
|
|
32
|
+
target[prop] = source[prop];
|
|
33
|
+
}
|
|
34
|
+
return target;
|
|
35
|
+
};
|
|
36
|
+
var __export = (target, all) => {
|
|
37
|
+
for (var name in all)
|
|
38
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
39
|
+
};
|
|
40
|
+
var __copyProps = (to, from, except, desc) => {
|
|
41
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
42
|
+
for (let key of __getOwnPropNames(from))
|
|
43
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
44
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
45
|
+
}
|
|
46
|
+
return to;
|
|
47
|
+
};
|
|
48
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
49
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
50
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
51
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
52
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
53
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
54
|
+
mod
|
|
55
|
+
));
|
|
56
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
57
|
+
var __async = (__this, __arguments, generator) => {
|
|
58
|
+
return new Promise((resolve, reject) => {
|
|
59
|
+
var fulfilled = (value) => {
|
|
60
|
+
try {
|
|
61
|
+
step(generator.next(value));
|
|
62
|
+
} catch (e) {
|
|
63
|
+
reject(e);
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
var rejected = (value) => {
|
|
67
|
+
try {
|
|
68
|
+
step(generator.throw(value));
|
|
69
|
+
} catch (e) {
|
|
70
|
+
reject(e);
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
var step = (x) => x.done ? resolve(x.value) : Promise.resolve(x.value).then(fulfilled, rejected);
|
|
74
|
+
step((generator = generator.apply(__this, __arguments)).next());
|
|
75
|
+
});
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
// lib/index.ts
|
|
79
|
+
var lib_exports = {};
|
|
80
|
+
__export(lib_exports, {
|
|
81
|
+
Stagehand: () => Stagehand
|
|
82
|
+
});
|
|
83
|
+
module.exports = __toCommonJS(lib_exports);
|
|
84
|
+
var import_test = require("@playwright/test");
|
|
85
|
+
var import_crypto = __toESM(require("crypto"));
|
|
86
|
+
var import_fs2 = __toESM(require("fs"));
|
|
87
|
+
|
|
88
|
+
// lib/prompt.ts
|
|
89
|
+
var actSystemPrompt = `
|
|
90
|
+
# Instructions
|
|
91
|
+
You are a browser automation assistant. Your job is to accomplish the user's goal across multiple model calls.
|
|
92
|
+
|
|
93
|
+
You are given:
|
|
94
|
+
1. the user's overall goal
|
|
95
|
+
2. the steps that you've taken so far
|
|
96
|
+
3. a list of active DOM elements in this chunk to consider to get closer to the goal.
|
|
97
|
+
|
|
98
|
+
You have 2 tools that you can call: doAction, and skipSection. Do action only performs Playwright actions. Do not perform any other actions.
|
|
99
|
+
|
|
100
|
+
Note: If there is a popup on the page for cookies or advertising that has nothing to do with the goal, try to close it first before proceeding. As this can block the goal from being completed.
|
|
101
|
+
|
|
102
|
+
Also, verify if the goal has been accomplished already. Do this by checking if the goal has been accomplished based on the previous steps completed, the current page DOM elements and the current page URL / starting page URL. If it has, set completed to true and finish the task.
|
|
103
|
+
|
|
104
|
+
Do exactly what the user's goal is. Do not exceed the scope of the goal.
|
|
105
|
+
`;
|
|
106
|
+
var verifyActCompletionSystemPrompt = `
|
|
107
|
+
You are a browser automation assistant. The job has given you a goal and a list of steps that have been taken so far. Your job is to determine if the user's goal has been completed based on the provided information.
|
|
108
|
+
|
|
109
|
+
# Input
|
|
110
|
+
You will receive:
|
|
111
|
+
1. The user's goal: A clear description of what the user wants to achieve.
|
|
112
|
+
2. Steps taken so far: A list of actions that have been performed up to this point.
|
|
113
|
+
3. An image of the current page
|
|
114
|
+
|
|
115
|
+
# Your Task
|
|
116
|
+
Analyze the provided information to determine if the user's goal has been fully completed.
|
|
117
|
+
|
|
118
|
+
# Output
|
|
119
|
+
Return a boolean value:
|
|
120
|
+
- true: If the goal has been definitively completed based on the steps taken and the current page.
|
|
121
|
+
- false: If the goal has not been completed or if there's any uncertainty about its completion.
|
|
122
|
+
|
|
123
|
+
# Important Considerations
|
|
124
|
+
- False positives are okay. False negatives are not okay.
|
|
125
|
+
- Look for evidence of errors on the page or something having gone wrong in completing the goal. If one does not exist, return true.
|
|
126
|
+
`;
|
|
127
|
+
function buildVerifyActCompletionSystemPrompt() {
|
|
128
|
+
return {
|
|
129
|
+
role: "system",
|
|
130
|
+
content: verifyActCompletionSystemPrompt
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
function buildVerifyActCompletionUserPrompt(goal, steps = "None", domElements) {
|
|
134
|
+
let actUserPrompt = `
|
|
135
|
+
# My Goal
|
|
136
|
+
${goal}
|
|
137
|
+
|
|
138
|
+
# Steps You've Taken So Far
|
|
139
|
+
${steps}
|
|
140
|
+
`;
|
|
141
|
+
if (domElements) {
|
|
142
|
+
actUserPrompt += `
|
|
143
|
+
# Active DOM Elements on the current page
|
|
144
|
+
${domElements}
|
|
145
|
+
`;
|
|
146
|
+
}
|
|
147
|
+
return {
|
|
148
|
+
role: "user",
|
|
149
|
+
content: actUserPrompt
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
function buildActSystemPrompt() {
|
|
153
|
+
return {
|
|
154
|
+
role: "system",
|
|
155
|
+
content: actSystemPrompt
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
function buildActUserPrompt(action, steps = "None", domElements) {
|
|
159
|
+
const actUserPrompt = `
|
|
160
|
+
# My Goal
|
|
161
|
+
${action}
|
|
162
|
+
|
|
163
|
+
# Steps You've Taken So Far
|
|
164
|
+
${steps}
|
|
165
|
+
|
|
166
|
+
# Current Active Dom Elements
|
|
167
|
+
${domElements}
|
|
168
|
+
`;
|
|
169
|
+
return {
|
|
170
|
+
role: "user",
|
|
171
|
+
content: actUserPrompt
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
var actTools = [
|
|
175
|
+
{
|
|
176
|
+
type: "function",
|
|
177
|
+
function: {
|
|
178
|
+
name: "doAction",
|
|
179
|
+
description: "execute the next playwright step that directly accomplishes the goal",
|
|
180
|
+
parameters: {
|
|
181
|
+
type: "object",
|
|
182
|
+
required: ["method", "element", "args", "step", "completed"],
|
|
183
|
+
properties: {
|
|
184
|
+
method: {
|
|
185
|
+
type: "string",
|
|
186
|
+
description: "The playwright function to call."
|
|
187
|
+
},
|
|
188
|
+
element: {
|
|
189
|
+
type: "number",
|
|
190
|
+
description: "The element number to act on"
|
|
191
|
+
},
|
|
192
|
+
args: {
|
|
193
|
+
type: "array",
|
|
194
|
+
description: "The required arguments",
|
|
195
|
+
items: {
|
|
196
|
+
type: "string",
|
|
197
|
+
description: "The argument to pass to the function"
|
|
198
|
+
}
|
|
199
|
+
},
|
|
200
|
+
step: {
|
|
201
|
+
type: "string",
|
|
202
|
+
description: "human readable description of the step that is taken in the past tense. Please be very detailed."
|
|
203
|
+
},
|
|
204
|
+
why: {
|
|
205
|
+
type: "string",
|
|
206
|
+
description: "why is this step taken? how does it advance the goal?"
|
|
207
|
+
},
|
|
208
|
+
completed: {
|
|
209
|
+
type: "boolean",
|
|
210
|
+
description: "true if the goal should be accomplished after this step"
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
type: "function",
|
|
218
|
+
function: {
|
|
219
|
+
name: "skipSection",
|
|
220
|
+
description: "skips this area of the webpage because the current goal cannot be accomplished here",
|
|
221
|
+
parameters: {
|
|
222
|
+
type: "object",
|
|
223
|
+
properties: {
|
|
224
|
+
reason: {
|
|
225
|
+
type: "string",
|
|
226
|
+
description: "reason that no action is taken"
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
];
|
|
233
|
+
var extractSystemPrompt = `You are extracting content on behalf of a user. You will be given:
|
|
234
|
+
1. An instruction
|
|
235
|
+
2. A list of DOM elements to extract from
|
|
236
|
+
|
|
237
|
+
Print the exact text from the DOM elements with all symbols, characters, and endlines as is.
|
|
238
|
+
Print null or an empty string if no new information is found.
|
|
239
|
+
|
|
240
|
+
ONLY print the content using the print_extracted_data tool provided.
|
|
241
|
+
ONLY print the content using the print_extracted_data tool provided.
|
|
242
|
+
`;
|
|
243
|
+
function buildExtractSystemPrompt() {
|
|
244
|
+
const content = extractSystemPrompt.replace(/\s+/g, " ");
|
|
245
|
+
return {
|
|
246
|
+
role: "system",
|
|
247
|
+
content
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
function buildExtractUserPrompt(instruction, domElements) {
|
|
251
|
+
return {
|
|
252
|
+
role: "user",
|
|
253
|
+
content: `Instruction: ${instruction}
|
|
254
|
+
DOM: ${domElements}
|
|
255
|
+
|
|
256
|
+
ONLY print the content using the print_extracted_data tool provided.
|
|
257
|
+
ONLY print the content using the print_extracted_data tool provided.`
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
var refineSystemPrompt = `You are tasked with refining and filtering information for the final output based on newly extracted and previously extracted content. Your responsibilities are:
|
|
261
|
+
1. Remove exact duplicates for elements in arrays and objects.
|
|
262
|
+
2. For text fields, append or update relevant text if the new content is an extension, replacement, or continuation.
|
|
263
|
+
3. For non-text fields (e.g., numbers, booleans), update with new values if they differ.
|
|
264
|
+
4. Add any completely new fields or objects.
|
|
265
|
+
|
|
266
|
+
Return the updated content that includes both the previous content and the new, non-duplicate, or extended information.`;
|
|
267
|
+
function buildRefineSystemPrompt() {
|
|
268
|
+
return {
|
|
269
|
+
role: "system",
|
|
270
|
+
content: refineSystemPrompt
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
function buildRefineUserPrompt(instruction, previouslyExtractedContent, newlyExtractedContent) {
|
|
274
|
+
return {
|
|
275
|
+
role: "user",
|
|
276
|
+
content: `Instruction: ${instruction}
|
|
277
|
+
Previously extracted content: ${JSON.stringify(previouslyExtractedContent, null, 2)}
|
|
278
|
+
Newly extracted content: ${JSON.stringify(newlyExtractedContent, null, 2)}
|
|
279
|
+
Refined content:`
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
var metadataSystemPrompt = `You are an AI assistant tasked with evaluating the progress and completion status of an extraction task.
|
|
283
|
+
Analyze the extraction response and determine if the task is completed or if more information is needed.
|
|
284
|
+
|
|
285
|
+
Strictly abide by the following criteria:
|
|
286
|
+
1. If you are certain that the instruction is completed, set the completion status to true, even if there are still chunks left.
|
|
287
|
+
2. If there could still be more information to extract and there are still chunks left, set the completion status to false.`;
|
|
288
|
+
function buildMetadataSystemPrompt() {
|
|
289
|
+
return {
|
|
290
|
+
role: "system",
|
|
291
|
+
content: metadataSystemPrompt
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
function buildMetadataPrompt(instruction, extractionResponse, chunksSeen, chunksTotal) {
|
|
295
|
+
return {
|
|
296
|
+
role: "user",
|
|
297
|
+
content: `Instruction: ${instruction}
|
|
298
|
+
Extracted content: ${JSON.stringify(extractionResponse, null, 2)}
|
|
299
|
+
Chunks seen: ${chunksSeen}
|
|
300
|
+
Chunks total: ${chunksTotal}`
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
var observeSystemPrompt = `
|
|
304
|
+
You are helping the user automate the browser by finding elements based on what the user wants to observe in the page.
|
|
305
|
+
You will be given:
|
|
306
|
+
1. a instruction of elements to observe
|
|
307
|
+
2. a numbered list of possible elements or an annotated image of the page
|
|
308
|
+
|
|
309
|
+
Return an array of elements that match the instruction.
|
|
310
|
+
`;
|
|
311
|
+
function buildObserveSystemPrompt() {
|
|
312
|
+
const content = observeSystemPrompt.replace(/\s+/g, " ");
|
|
313
|
+
return {
|
|
314
|
+
role: "system",
|
|
315
|
+
content
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
function buildObserveUserMessage(instruction, domElements) {
|
|
319
|
+
return {
|
|
320
|
+
role: "user",
|
|
321
|
+
content: `instruction: ${instruction}
|
|
322
|
+
DOM: ${domElements}`
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// lib/inference.ts
|
|
327
|
+
var import_zod = require("zod");
|
|
328
|
+
|
|
329
|
+
// lib/llm/LLMClient.ts
|
|
330
|
+
var modelsWithVision = [
|
|
331
|
+
"gpt-4o",
|
|
332
|
+
"gpt-4o-mini",
|
|
333
|
+
"claude-3-5-sonnet-latest",
|
|
334
|
+
"claude-3-5-sonnet-20240620",
|
|
335
|
+
"gpt-4o-2024-08-06"
|
|
336
|
+
];
|
|
337
|
+
var AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
|
|
338
|
+
|
|
339
|
+
// lib/inference.ts
|
|
340
|
+
function verifyActCompletion(_0) {
|
|
341
|
+
return __async(this, arguments, function* ({
|
|
342
|
+
goal,
|
|
343
|
+
steps,
|
|
344
|
+
llmProvider,
|
|
345
|
+
modelName,
|
|
346
|
+
screenshot,
|
|
347
|
+
domElements,
|
|
348
|
+
logger
|
|
349
|
+
}) {
|
|
350
|
+
const llmClient = llmProvider.getClient(modelName);
|
|
351
|
+
const messages = [
|
|
352
|
+
buildVerifyActCompletionSystemPrompt(),
|
|
353
|
+
buildVerifyActCompletionUserPrompt(goal, steps, domElements)
|
|
354
|
+
];
|
|
355
|
+
const response = yield llmClient.createChatCompletion({
|
|
356
|
+
model: modelName,
|
|
357
|
+
messages,
|
|
358
|
+
temperature: 0.1,
|
|
359
|
+
top_p: 1,
|
|
360
|
+
frequency_penalty: 0,
|
|
361
|
+
presence_penalty: 0,
|
|
362
|
+
image: screenshot ? {
|
|
363
|
+
buffer: screenshot,
|
|
364
|
+
description: "This is a screenshot of the whole visible page."
|
|
365
|
+
} : void 0,
|
|
366
|
+
response_model: {
|
|
367
|
+
name: "Verification",
|
|
368
|
+
schema: import_zod.z.object({
|
|
369
|
+
completed: import_zod.z.boolean().describe("true if the goal is accomplished")
|
|
370
|
+
})
|
|
371
|
+
}
|
|
372
|
+
});
|
|
373
|
+
if (!response || typeof response !== "object") {
|
|
374
|
+
logger({
|
|
375
|
+
category: "VerifyAct",
|
|
376
|
+
message: "Unexpected response format: " + JSON.stringify(response)
|
|
377
|
+
});
|
|
378
|
+
return false;
|
|
379
|
+
}
|
|
380
|
+
if (response.completed === void 0) {
|
|
381
|
+
logger({
|
|
382
|
+
category: "VerifyAct",
|
|
383
|
+
message: "Missing 'completed' field in response"
|
|
384
|
+
});
|
|
385
|
+
return false;
|
|
386
|
+
}
|
|
387
|
+
return response.completed;
|
|
388
|
+
});
|
|
389
|
+
}
|
|
390
|
+
function act(_0) {
|
|
391
|
+
return __async(this, arguments, function* ({
|
|
392
|
+
action,
|
|
393
|
+
domElements,
|
|
394
|
+
steps,
|
|
395
|
+
llmProvider,
|
|
396
|
+
modelName,
|
|
397
|
+
screenshot,
|
|
398
|
+
retries = 0,
|
|
399
|
+
logger
|
|
400
|
+
}) {
|
|
401
|
+
const llmClient = llmProvider.getClient(modelName);
|
|
402
|
+
const messages = [
|
|
403
|
+
buildActSystemPrompt(),
|
|
404
|
+
buildActUserPrompt(action, steps, domElements)
|
|
405
|
+
];
|
|
406
|
+
const response = yield llmClient.createChatCompletion({
|
|
407
|
+
model: modelName,
|
|
408
|
+
messages,
|
|
409
|
+
temperature: 0.1,
|
|
410
|
+
top_p: 1,
|
|
411
|
+
frequency_penalty: 0,
|
|
412
|
+
presence_penalty: 0,
|
|
413
|
+
tool_choice: "auto",
|
|
414
|
+
tools: actTools,
|
|
415
|
+
image: screenshot ? { buffer: screenshot, description: AnnotatedScreenshotText } : void 0
|
|
416
|
+
});
|
|
417
|
+
const toolCalls = response.choices[0].message.tool_calls;
|
|
418
|
+
if (toolCalls && toolCalls.length > 0) {
|
|
419
|
+
if (toolCalls[0].function.name === "skipSection") {
|
|
420
|
+
return null;
|
|
421
|
+
}
|
|
422
|
+
return JSON.parse(toolCalls[0].function.arguments);
|
|
423
|
+
} else {
|
|
424
|
+
if (retries >= 2) {
|
|
425
|
+
logger({
|
|
426
|
+
category: "Act",
|
|
427
|
+
message: "No tool calls found in response"
|
|
428
|
+
});
|
|
429
|
+
return null;
|
|
430
|
+
}
|
|
431
|
+
return act({
|
|
432
|
+
action,
|
|
433
|
+
domElements,
|
|
434
|
+
steps,
|
|
435
|
+
llmProvider,
|
|
436
|
+
modelName,
|
|
437
|
+
retries: retries + 1,
|
|
438
|
+
logger
|
|
439
|
+
});
|
|
440
|
+
}
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
function extract(_0) {
|
|
444
|
+
return __async(this, arguments, function* ({
|
|
445
|
+
instruction,
|
|
446
|
+
progress,
|
|
447
|
+
previouslyExtractedContent,
|
|
448
|
+
domElements,
|
|
449
|
+
schema,
|
|
450
|
+
llmProvider,
|
|
451
|
+
modelName,
|
|
452
|
+
chunksSeen,
|
|
453
|
+
chunksTotal
|
|
454
|
+
}) {
|
|
455
|
+
const llmClient = llmProvider.getClient(modelName);
|
|
456
|
+
const extractionResponse = yield llmClient.createChatCompletion({
|
|
457
|
+
model: modelName,
|
|
458
|
+
messages: [
|
|
459
|
+
buildExtractSystemPrompt(),
|
|
460
|
+
buildExtractUserPrompt(instruction, domElements)
|
|
461
|
+
],
|
|
462
|
+
response_model: {
|
|
463
|
+
schema,
|
|
464
|
+
name: "Extraction"
|
|
465
|
+
},
|
|
466
|
+
temperature: 0.1,
|
|
467
|
+
top_p: 1,
|
|
468
|
+
frequency_penalty: 0,
|
|
469
|
+
presence_penalty: 0
|
|
470
|
+
});
|
|
471
|
+
const refinedResponse = yield llmClient.createChatCompletion({
|
|
472
|
+
model: modelName,
|
|
473
|
+
messages: [
|
|
474
|
+
buildRefineSystemPrompt(),
|
|
475
|
+
buildRefineUserPrompt(
|
|
476
|
+
instruction,
|
|
477
|
+
previouslyExtractedContent,
|
|
478
|
+
extractionResponse
|
|
479
|
+
)
|
|
480
|
+
],
|
|
481
|
+
response_model: {
|
|
482
|
+
schema,
|
|
483
|
+
name: "RefinedExtraction"
|
|
484
|
+
},
|
|
485
|
+
temperature: 0.1,
|
|
486
|
+
top_p: 1,
|
|
487
|
+
frequency_penalty: 0,
|
|
488
|
+
presence_penalty: 0
|
|
489
|
+
});
|
|
490
|
+
const metadataSchema = import_zod.z.object({
|
|
491
|
+
progress: import_zod.z.string().describe(
|
|
492
|
+
"progress of what has been extracted so far, as concise as possible"
|
|
493
|
+
),
|
|
494
|
+
completed: import_zod.z.boolean().describe(
|
|
495
|
+
"true if the goal is now accomplished. Use this conservatively, only when you are sure that the goal has been completed."
|
|
496
|
+
)
|
|
497
|
+
});
|
|
498
|
+
const metadataResponse = yield llmClient.createChatCompletion({
|
|
499
|
+
model: modelName,
|
|
500
|
+
messages: [
|
|
501
|
+
buildMetadataSystemPrompt(),
|
|
502
|
+
buildMetadataPrompt(
|
|
503
|
+
instruction,
|
|
504
|
+
refinedResponse,
|
|
505
|
+
chunksSeen,
|
|
506
|
+
chunksTotal
|
|
507
|
+
)
|
|
508
|
+
],
|
|
509
|
+
response_model: {
|
|
510
|
+
name: "Metadata",
|
|
511
|
+
schema: metadataSchema
|
|
512
|
+
},
|
|
513
|
+
temperature: 0.1,
|
|
514
|
+
top_p: 1,
|
|
515
|
+
frequency_penalty: 0,
|
|
516
|
+
presence_penalty: 0
|
|
517
|
+
});
|
|
518
|
+
refinedResponse.metadata = metadataResponse;
|
|
519
|
+
return refinedResponse;
|
|
520
|
+
});
|
|
521
|
+
}
|
|
522
|
+
function observe(_0) {
|
|
523
|
+
return __async(this, arguments, function* ({
|
|
524
|
+
instruction,
|
|
525
|
+
domElements,
|
|
526
|
+
llmProvider,
|
|
527
|
+
modelName,
|
|
528
|
+
image
|
|
529
|
+
}) {
|
|
530
|
+
const observeSchema = import_zod.z.object({
|
|
531
|
+
elements: import_zod.z.array(
|
|
532
|
+
import_zod.z.object({
|
|
533
|
+
elementId: import_zod.z.number().describe("the number of the element"),
|
|
534
|
+
description: import_zod.z.string().describe(
|
|
535
|
+
"a description of the element and what it is relevant for"
|
|
536
|
+
)
|
|
537
|
+
})
|
|
538
|
+
).describe("an array of elements that match the instruction")
|
|
539
|
+
});
|
|
540
|
+
const llmClient = llmProvider.getClient(modelName);
|
|
541
|
+
const observationResponse = yield llmClient.createChatCompletion({
|
|
542
|
+
model: modelName,
|
|
543
|
+
messages: [
|
|
544
|
+
buildObserveSystemPrompt(),
|
|
545
|
+
buildObserveUserMessage(instruction, domElements)
|
|
546
|
+
],
|
|
547
|
+
image: image ? { buffer: image, description: AnnotatedScreenshotText } : void 0,
|
|
548
|
+
response_model: {
|
|
549
|
+
schema: observeSchema,
|
|
550
|
+
name: "Observation"
|
|
551
|
+
},
|
|
552
|
+
temperature: 0.1,
|
|
553
|
+
top_p: 1,
|
|
554
|
+
frequency_penalty: 0,
|
|
555
|
+
presence_penalty: 0
|
|
556
|
+
});
|
|
557
|
+
if (!observationResponse) {
|
|
558
|
+
throw new Error("no response when finding a selector");
|
|
559
|
+
}
|
|
560
|
+
return observationResponse;
|
|
561
|
+
});
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
// lib/llm/OpenAIClient.ts
|
|
565
|
+
var import_openai = __toESM(require("openai"));
|
|
566
|
+
var import_zod2 = require("openai/helpers/zod");
|
|
567
|
+
var OpenAIClient = class {
|
|
568
|
+
constructor(logger) {
|
|
569
|
+
this.client = new import_openai.default();
|
|
570
|
+
this.logger = logger;
|
|
571
|
+
}
|
|
572
|
+
createChatCompletion(options) {
|
|
573
|
+
return __async(this, null, function* () {
|
|
574
|
+
if (options.image) {
|
|
575
|
+
const screenshotMessage = {
|
|
576
|
+
role: "user",
|
|
577
|
+
content: [
|
|
578
|
+
{
|
|
579
|
+
type: "image_url",
|
|
580
|
+
image_url: {
|
|
581
|
+
url: `data:image/jpeg;base64,${options.image.buffer.toString("base64")}`
|
|
582
|
+
}
|
|
583
|
+
},
|
|
584
|
+
...options.image.description ? [{ type: "text", text: options.image.description }] : []
|
|
585
|
+
]
|
|
586
|
+
};
|
|
587
|
+
options.messages = [...options.messages, screenshotMessage];
|
|
588
|
+
}
|
|
589
|
+
const _a = options, { image, response_model } = _a, openAiOptions = __objRest(_a, ["image", "response_model"]);
|
|
590
|
+
let responseFormat = void 0;
|
|
591
|
+
if (options.response_model) {
|
|
592
|
+
responseFormat = (0, import_zod2.zodResponseFormat)(
|
|
593
|
+
options.response_model.schema,
|
|
594
|
+
options.response_model.name
|
|
595
|
+
);
|
|
596
|
+
}
|
|
597
|
+
const response = yield this.client.chat.completions.create(__spreadProps(__spreadValues({}, openAiOptions), {
|
|
598
|
+
response_format: responseFormat
|
|
599
|
+
}));
|
|
600
|
+
if (response_model) {
|
|
601
|
+
const extractedData = response.choices[0].message.content;
|
|
602
|
+
const parsedData = JSON.parse(extractedData);
|
|
603
|
+
return __spreadValues({}, parsedData);
|
|
604
|
+
}
|
|
605
|
+
return response;
|
|
606
|
+
});
|
|
607
|
+
}
|
|
608
|
+
};
|
|
609
|
+
|
|
610
|
+
// lib/llm/AnthropicClient.ts
|
|
611
|
+
var import_sdk = __toESM(require("@anthropic-ai/sdk"));
|
|
612
|
+
var import_zod_to_json_schema = require("zod-to-json-schema");
|
|
613
|
+
var AnthropicClient = class {
|
|
614
|
+
constructor(logger) {
|
|
615
|
+
this.client = new import_sdk.default({
|
|
616
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
617
|
+
// Make sure to set this environment variable
|
|
618
|
+
});
|
|
619
|
+
this.logger = logger;
|
|
620
|
+
}
|
|
621
|
+
createChatCompletion(options) {
|
|
622
|
+
return __async(this, null, function* () {
|
|
623
|
+
var _a, _b, _c, _d, _e, _f, _g;
|
|
624
|
+
const systemMessage = options.messages.find((msg) => msg.role === "system");
|
|
625
|
+
const userMessages = options.messages.filter(
|
|
626
|
+
(msg) => msg.role !== "system"
|
|
627
|
+
);
|
|
628
|
+
if (options.image) {
|
|
629
|
+
const screenshotMessage = {
|
|
630
|
+
role: "user",
|
|
631
|
+
content: [
|
|
632
|
+
{
|
|
633
|
+
type: "image",
|
|
634
|
+
source: {
|
|
635
|
+
type: "base64",
|
|
636
|
+
media_type: "image/jpeg",
|
|
637
|
+
data: options.image.buffer.toString("base64")
|
|
638
|
+
}
|
|
639
|
+
},
|
|
640
|
+
...options.image.description ? [{ type: "text", text: options.image.description }] : []
|
|
641
|
+
]
|
|
642
|
+
};
|
|
643
|
+
options.messages = [...options.messages, screenshotMessage];
|
|
644
|
+
}
|
|
645
|
+
let anthropicTools = (_a = options.tools) == null ? void 0 : _a.map((tool) => {
|
|
646
|
+
if (tool.type === "function") {
|
|
647
|
+
return {
|
|
648
|
+
name: tool.function.name,
|
|
649
|
+
description: tool.function.description,
|
|
650
|
+
input_schema: {
|
|
651
|
+
type: "object",
|
|
652
|
+
properties: tool.function.parameters.properties,
|
|
653
|
+
required: tool.function.parameters.required
|
|
654
|
+
}
|
|
655
|
+
};
|
|
656
|
+
}
|
|
657
|
+
return tool;
|
|
658
|
+
});
|
|
659
|
+
let toolDefinition;
|
|
660
|
+
if (options.response_model) {
|
|
661
|
+
const jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(options.response_model.schema);
|
|
662
|
+
const schemaProperties = ((_c = (_b = jsonSchema.definitions) == null ? void 0 : _b.MySchema) == null ? void 0 : _c.properties) || jsonSchema.properties;
|
|
663
|
+
const schemaRequired = ((_e = (_d = jsonSchema.definitions) == null ? void 0 : _d.MySchema) == null ? void 0 : _e.required) || jsonSchema.required;
|
|
664
|
+
toolDefinition = {
|
|
665
|
+
name: "print_extracted_data",
|
|
666
|
+
description: "Prints the extracted data based on the provided schema.",
|
|
667
|
+
input_schema: {
|
|
668
|
+
type: "object",
|
|
669
|
+
properties: schemaProperties,
|
|
670
|
+
required: schemaRequired
|
|
671
|
+
}
|
|
672
|
+
};
|
|
673
|
+
}
|
|
674
|
+
if (toolDefinition) {
|
|
675
|
+
anthropicTools = anthropicTools != null ? anthropicTools : [];
|
|
676
|
+
anthropicTools.push(toolDefinition);
|
|
677
|
+
}
|
|
678
|
+
const response = yield this.client.messages.create({
|
|
679
|
+
model: options.model,
|
|
680
|
+
max_tokens: options.max_tokens || 1500,
|
|
681
|
+
messages: userMessages.map((msg) => ({
|
|
682
|
+
role: msg.role,
|
|
683
|
+
content: msg.content
|
|
684
|
+
})),
|
|
685
|
+
tools: anthropicTools,
|
|
686
|
+
system: systemMessage == null ? void 0 : systemMessage.content,
|
|
687
|
+
temperature: options.temperature
|
|
688
|
+
});
|
|
689
|
+
const transformedResponse = {
|
|
690
|
+
id: response.id,
|
|
691
|
+
object: "chat.completion",
|
|
692
|
+
created: Date.now(),
|
|
693
|
+
model: response.model,
|
|
694
|
+
choices: [
|
|
695
|
+
{
|
|
696
|
+
index: 0,
|
|
697
|
+
message: {
|
|
698
|
+
role: "assistant",
|
|
699
|
+
content: ((_f = response.content.find((c) => c.type === "text")) == null ? void 0 : _f.text) || null,
|
|
700
|
+
tool_calls: response.content.filter((c) => c.type === "tool_use").map((toolUse) => ({
|
|
701
|
+
id: toolUse.id,
|
|
702
|
+
type: "function",
|
|
703
|
+
function: {
|
|
704
|
+
name: toolUse.name,
|
|
705
|
+
arguments: JSON.stringify(toolUse.input)
|
|
706
|
+
}
|
|
707
|
+
}))
|
|
708
|
+
},
|
|
709
|
+
finish_reason: response.stop_reason
|
|
710
|
+
}
|
|
711
|
+
],
|
|
712
|
+
usage: {
|
|
713
|
+
prompt_tokens: response.usage.input_tokens,
|
|
714
|
+
completion_tokens: response.usage.output_tokens,
|
|
715
|
+
total_tokens: response.usage.input_tokens + response.usage.output_tokens
|
|
716
|
+
}
|
|
717
|
+
};
|
|
718
|
+
this.logger({
|
|
719
|
+
category: "Anthropic",
|
|
720
|
+
message: "Transformed response: " + JSON.stringify(transformedResponse)
|
|
721
|
+
});
|
|
722
|
+
if (options.response_model) {
|
|
723
|
+
const toolUse = response.content.find((c) => c.type === "tool_use");
|
|
724
|
+
if (toolUse && "input" in toolUse) {
|
|
725
|
+
return toolUse.input;
|
|
726
|
+
} else {
|
|
727
|
+
if (!options.retries || options.retries < 2) {
|
|
728
|
+
return this.createChatCompletion(__spreadProps(__spreadValues({}, options), {
|
|
729
|
+
retries: ((_g = options.retries) != null ? _g : 0) + 1
|
|
730
|
+
}));
|
|
731
|
+
}
|
|
732
|
+
throw new Error(
|
|
733
|
+
"Extraction failed: No tool use with input in response"
|
|
734
|
+
);
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
return transformedResponse;
|
|
738
|
+
});
|
|
739
|
+
}
|
|
740
|
+
};
|
|
741
|
+
|
|
742
|
+
// lib/llm/LLMProvider.ts
|
|
743
|
+
var LLMProvider = class {
|
|
744
|
+
constructor(logger) {
|
|
745
|
+
this.modelToProviderMap = {
|
|
746
|
+
"gpt-4o": "openai",
|
|
747
|
+
"gpt-4o-mini": "openai",
|
|
748
|
+
"gpt-4o-2024-08-06": "openai",
|
|
749
|
+
"claude-3-5-sonnet-latest": "anthropic",
|
|
750
|
+
"claude-3-5-sonnet-20240620": "anthropic",
|
|
751
|
+
"claude-3-5-sonnet-20241022": "anthropic"
|
|
752
|
+
};
|
|
753
|
+
this.logger = logger;
|
|
754
|
+
}
|
|
755
|
+
getClient(modelName) {
|
|
756
|
+
const provider = this.modelToProviderMap[modelName];
|
|
757
|
+
if (!provider) {
|
|
758
|
+
throw new Error(`Unsupported model: ${modelName}`);
|
|
759
|
+
}
|
|
760
|
+
switch (provider) {
|
|
761
|
+
case "openai":
|
|
762
|
+
return new OpenAIClient(this.logger);
|
|
763
|
+
case "anthropic":
|
|
764
|
+
return new AnthropicClient(this.logger);
|
|
765
|
+
default:
|
|
766
|
+
throw new Error(`Unsupported provider: ${provider}`);
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
};
|
|
770
|
+
|
|
771
|
+
// lib/index.ts
|
|
772
|
+
var import_path2 = __toESM(require("path"));
|
|
773
|
+
|
|
774
|
+
// lib/browserbase.ts
|
|
775
|
+
var Browserbase = class {
|
|
776
|
+
createSession() {
|
|
777
|
+
return __async(this, null, function* () {
|
|
778
|
+
if (!process.env.BROWSERBASE_API_KEY || !process.env.BROWSERBASE_PROJECT_ID) {
|
|
779
|
+
throw new Error(
|
|
780
|
+
"BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID must be set"
|
|
781
|
+
);
|
|
782
|
+
}
|
|
783
|
+
const response = yield fetch(`https://www.browserbase.com/v1/sessions`, {
|
|
784
|
+
method: "POST",
|
|
785
|
+
headers: {
|
|
786
|
+
"x-bb-api-key": `${process.env.BROWSERBASE_API_KEY}`,
|
|
787
|
+
"Content-Type": "application/json"
|
|
788
|
+
},
|
|
789
|
+
body: JSON.stringify({
|
|
790
|
+
projectId: process.env.BROWSERBASE_PROJECT_ID
|
|
791
|
+
})
|
|
792
|
+
});
|
|
793
|
+
const json = yield response.json();
|
|
794
|
+
if (json.error) {
|
|
795
|
+
throw new Error(json.error);
|
|
796
|
+
}
|
|
797
|
+
return {
|
|
798
|
+
sessionId: json.id,
|
|
799
|
+
connectUrl: json.connectUrl
|
|
800
|
+
};
|
|
801
|
+
});
|
|
802
|
+
}
|
|
803
|
+
retrieveDebugConnectionURL(sessionId) {
|
|
804
|
+
return __async(this, null, function* () {
|
|
805
|
+
if (!process.env.BROWSERBASE_API_KEY) {
|
|
806
|
+
throw new Error("BROWSERBASE_API_KEY must be set");
|
|
807
|
+
}
|
|
808
|
+
const response = yield fetch(
|
|
809
|
+
`https://www.browserbase.com/v1/sessions/${sessionId}/debug`,
|
|
810
|
+
{
|
|
811
|
+
method: "GET",
|
|
812
|
+
headers: {
|
|
813
|
+
"x-bb-api-key": `${process.env.BROWSERBASE_API_KEY}`
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
);
|
|
817
|
+
const json = yield response.json();
|
|
818
|
+
return json.debuggerFullscreenUrl;
|
|
819
|
+
});
|
|
820
|
+
}
|
|
821
|
+
};
|
|
822
|
+
|
|
823
|
+
// lib/vision.ts
|
|
824
|
+
var import_fs = __toESM(require("fs"));
|
|
825
|
+
var import_path = __toESM(require("path"));
|
|
826
|
+
var import_sharp = __toESM(require("sharp"));
|
|
827
|
+
var import_child_process = require("child_process");
|
|
828
|
+
var ScreenshotService = class _ScreenshotService {
|
|
829
|
+
constructor(page, selectorMap, verbose, isDebugEnabled = false) {
|
|
830
|
+
this.annotationBoxes = [];
|
|
831
|
+
this.numberPositions = [];
|
|
832
|
+
this.page = page;
|
|
833
|
+
this.selectorMap = selectorMap;
|
|
834
|
+
this.isDebugEnabled = isDebugEnabled;
|
|
835
|
+
this.verbose = verbose;
|
|
836
|
+
}
|
|
837
|
+
log({
|
|
838
|
+
category,
|
|
839
|
+
message,
|
|
840
|
+
level = 1
|
|
841
|
+
}) {
|
|
842
|
+
if (this.verbose >= level) {
|
|
843
|
+
const categoryString = category ? `:${category}` : "";
|
|
844
|
+
console.log(`[stagehand${categoryString}] ${message}`);
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
getScreenshot(fullpage = true, quality) {
|
|
848
|
+
return __async(this, null, function* () {
|
|
849
|
+
if (quality && (quality < 0 || quality > 100)) {
|
|
850
|
+
throw new Error("quality must be between 0 and 100");
|
|
851
|
+
}
|
|
852
|
+
return yield this.page.screenshot({
|
|
853
|
+
fullPage: fullpage,
|
|
854
|
+
quality,
|
|
855
|
+
type: "jpeg"
|
|
856
|
+
});
|
|
857
|
+
});
|
|
858
|
+
}
|
|
859
|
+
getScreenshotPixelCount(screenshot) {
|
|
860
|
+
return __async(this, null, function* () {
|
|
861
|
+
const image = (0, import_sharp.default)(screenshot);
|
|
862
|
+
const metadata = yield image.metadata();
|
|
863
|
+
if (!metadata.width || !metadata.height) {
|
|
864
|
+
this.log({
|
|
865
|
+
category: "Error",
|
|
866
|
+
message: "Unable to determine image dimensions.",
|
|
867
|
+
level: 0
|
|
868
|
+
});
|
|
869
|
+
throw new Error("Unable to determine image dimensions.");
|
|
870
|
+
}
|
|
871
|
+
const pixelCount = metadata.width * metadata.height;
|
|
872
|
+
this.log({
|
|
873
|
+
category: "Info",
|
|
874
|
+
message: `Screenshot pixel count: ${pixelCount}`,
|
|
875
|
+
level: 1
|
|
876
|
+
});
|
|
877
|
+
return pixelCount;
|
|
878
|
+
});
|
|
879
|
+
}
|
|
880
|
+
getAnnotatedScreenshot(fullpage) {
|
|
881
|
+
return __async(this, null, function* () {
|
|
882
|
+
this.annotationBoxes = [];
|
|
883
|
+
this.numberPositions = [];
|
|
884
|
+
const screenshot = yield this.getScreenshot(fullpage);
|
|
885
|
+
const image = (0, import_sharp.default)(screenshot);
|
|
886
|
+
const { width, height } = yield image.metadata();
|
|
887
|
+
const svgAnnotations = yield Promise.all(
|
|
888
|
+
Object.entries(this.selectorMap).map(
|
|
889
|
+
(_0) => __async(this, [_0], function* ([id, selector]) {
|
|
890
|
+
return this.createElementAnnotation(id, selector);
|
|
891
|
+
})
|
|
892
|
+
)
|
|
893
|
+
);
|
|
894
|
+
const scrollPosition = yield this.page.evaluate(() => {
|
|
895
|
+
return {
|
|
896
|
+
scrollX: window.scrollX,
|
|
897
|
+
scrollY: window.scrollY
|
|
898
|
+
};
|
|
899
|
+
});
|
|
900
|
+
const svg = `
|
|
901
|
+
<svg width="${width}" height="${height}" xmlns="http://www.w3.org/2000/svg" style="position:absolute;left:${-scrollPosition.scrollX}px;top:${-scrollPosition.scrollY}px;">
|
|
902
|
+
${svgAnnotations.join("")}
|
|
903
|
+
</svg>
|
|
904
|
+
`;
|
|
905
|
+
const annotatedScreenshot = yield image.composite([{ input: Buffer.from(svg), top: 0, left: 0 }]).toBuffer();
|
|
906
|
+
if (this.isDebugEnabled) {
|
|
907
|
+
yield _ScreenshotService.saveAndOpenScreenshot(annotatedScreenshot);
|
|
908
|
+
}
|
|
909
|
+
return annotatedScreenshot;
|
|
910
|
+
});
|
|
911
|
+
}
|
|
912
|
+
createElementAnnotation(id, selector) {
|
|
913
|
+
return __async(this, null, function* () {
|
|
914
|
+
try {
|
|
915
|
+
const element = yield this.page.locator(`xpath=${selector}`).first();
|
|
916
|
+
const box = yield element.boundingBox();
|
|
917
|
+
if (!box) {
|
|
918
|
+
this.log({
|
|
919
|
+
category: "Debug",
|
|
920
|
+
message: `No bounding box for element ${id}`,
|
|
921
|
+
level: 2
|
|
922
|
+
});
|
|
923
|
+
return "";
|
|
924
|
+
}
|
|
925
|
+
const scrollPosition = yield this.page.evaluate(() => ({
|
|
926
|
+
scrollX: window.scrollX,
|
|
927
|
+
scrollY: window.scrollY
|
|
928
|
+
}));
|
|
929
|
+
const adjustedBox = {
|
|
930
|
+
x: box.x + scrollPosition.scrollX,
|
|
931
|
+
y: box.y + scrollPosition.scrollY,
|
|
932
|
+
width: box.width,
|
|
933
|
+
height: box.height,
|
|
934
|
+
id
|
|
935
|
+
};
|
|
936
|
+
this.annotationBoxes.push(adjustedBox);
|
|
937
|
+
const numberPosition = this.findNonOverlappingNumberPosition(adjustedBox);
|
|
938
|
+
const circleRadius = 12;
|
|
939
|
+
return `
|
|
940
|
+
<rect x="${adjustedBox.x}" y="${adjustedBox.y}" width="${adjustedBox.width}" height="${adjustedBox.height}"
|
|
941
|
+
fill="none" stroke="red" stroke-width="2" />
|
|
942
|
+
<circle cx="${numberPosition.x}" cy="${numberPosition.y}" r="${circleRadius}" fill="white" stroke="red" stroke-width="2" />
|
|
943
|
+
<text x="${numberPosition.x}" y="${numberPosition.y}" fill="red" font-size="16" font-weight="bold"
|
|
944
|
+
text-anchor="middle" dominant-baseline="central">
|
|
945
|
+
${id}
|
|
946
|
+
</text>
|
|
947
|
+
`;
|
|
948
|
+
} catch (error) {
|
|
949
|
+
this.log({
|
|
950
|
+
category: "Error",
|
|
951
|
+
message: `Failed to create annotation for element ${id}: ${error}`,
|
|
952
|
+
level: 0
|
|
953
|
+
});
|
|
954
|
+
return "";
|
|
955
|
+
}
|
|
956
|
+
});
|
|
957
|
+
}
|
|
958
|
+
findNonOverlappingNumberPosition(box) {
|
|
959
|
+
const circleRadius = 12;
|
|
960
|
+
let position = {
|
|
961
|
+
x: box.x - circleRadius,
|
|
962
|
+
y: box.y - circleRadius
|
|
963
|
+
};
|
|
964
|
+
let attempts = 0;
|
|
965
|
+
const maxAttempts = 10;
|
|
966
|
+
const offset = 5;
|
|
967
|
+
while (this.isNumberOverlapping(position) && attempts < maxAttempts) {
|
|
968
|
+
position.y += offset;
|
|
969
|
+
attempts++;
|
|
970
|
+
}
|
|
971
|
+
this.numberPositions.push(position);
|
|
972
|
+
return position;
|
|
973
|
+
}
|
|
974
|
+
isNumberOverlapping(position) {
|
|
975
|
+
const circleRadius = 12;
|
|
976
|
+
return this.numberPositions.some(
|
|
977
|
+
(existingPosition) => Math.sqrt(
|
|
978
|
+
Math.pow(position.x - existingPosition.x, 2) + Math.pow(position.y - existingPosition.y, 2)
|
|
979
|
+
) < circleRadius * 2
|
|
980
|
+
);
|
|
981
|
+
}
|
|
982
|
+
static saveAndOpenScreenshot(screenshot) {
|
|
983
|
+
return __async(this, null, function* () {
|
|
984
|
+
const screenshotDir = import_path.default.join(process.cwd(), "screenshots");
|
|
985
|
+
if (!import_fs.default.existsSync(screenshotDir)) {
|
|
986
|
+
import_fs.default.mkdirSync(screenshotDir);
|
|
987
|
+
}
|
|
988
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
989
|
+
const filename = import_path.default.join(screenshotDir, `screenshot-${timestamp}.png`);
|
|
990
|
+
import_fs.default.writeFileSync(filename, screenshot);
|
|
991
|
+
console.log(`Screenshot saved to: ${filename}`);
|
|
992
|
+
if (process.platform === "win32") {
|
|
993
|
+
(0, import_child_process.exec)(`start ${filename}`);
|
|
994
|
+
} else if (process.platform === "darwin") {
|
|
995
|
+
(0, import_child_process.exec)(`open ${filename}`);
|
|
996
|
+
} else {
|
|
997
|
+
(0, import_child_process.exec)(`xdg-open ${filename}`);
|
|
998
|
+
}
|
|
999
|
+
});
|
|
1000
|
+
}
|
|
1001
|
+
};
|
|
1002
|
+
|
|
1003
|
+
// lib/index.ts
|
|
1004
|
+
require("dotenv").config({ path: ".env" });
|
|
1005
|
+
function getBrowser(env = "LOCAL", headless = false, logger) {
|
|
1006
|
+
return __async(this, null, function* () {
|
|
1007
|
+
if (env === "BROWSERBASE" && !process.env.BROWSERBASE_API_KEY) {
|
|
1008
|
+
logger({
|
|
1009
|
+
category: "Init",
|
|
1010
|
+
message: "BROWSERBASE_API_KEY is required to use BROWSERBASE env. Defaulting to LOCAL.",
|
|
1011
|
+
level: 0
|
|
1012
|
+
});
|
|
1013
|
+
env = "LOCAL";
|
|
1014
|
+
}
|
|
1015
|
+
if (env === "BROWSERBASE" && !process.env.BROWSERBASE_PROJECT_ID) {
|
|
1016
|
+
logger({
|
|
1017
|
+
category: "Init",
|
|
1018
|
+
message: "BROWSERBASE_PROJECT_ID is required to use BROWSERBASE env. Defaulting to LOCAL.",
|
|
1019
|
+
level: 0
|
|
1020
|
+
});
|
|
1021
|
+
env = "LOCAL";
|
|
1022
|
+
}
|
|
1023
|
+
if (env === "BROWSERBASE") {
|
|
1024
|
+
let debugUrl = void 0;
|
|
1025
|
+
let sessionUrl = void 0;
|
|
1026
|
+
logger({
|
|
1027
|
+
category: "Init",
|
|
1028
|
+
message: "Connecting you to Browserbase...",
|
|
1029
|
+
level: 0
|
|
1030
|
+
});
|
|
1031
|
+
const browserbase = new Browserbase();
|
|
1032
|
+
const { sessionId, connectUrl } = yield browserbase.createSession();
|
|
1033
|
+
const browser = yield import_test.chromium.connectOverCDP(connectUrl);
|
|
1034
|
+
debugUrl = yield browserbase.retrieveDebugConnectionURL(sessionId);
|
|
1035
|
+
sessionUrl = `https://www.browserbase.com/sessions/${sessionId}`;
|
|
1036
|
+
logger({
|
|
1037
|
+
category: "Init",
|
|
1038
|
+
message: `Browserbase session started.
|
|
1039
|
+
|
|
1040
|
+
Session Url: ${sessionUrl}
|
|
1041
|
+
|
|
1042
|
+
Live debug accessible here: ${debugUrl}.`,
|
|
1043
|
+
level: 0
|
|
1044
|
+
});
|
|
1045
|
+
const context = browser.contexts()[0];
|
|
1046
|
+
return { browser, context, debugUrl, sessionUrl };
|
|
1047
|
+
} else {
|
|
1048
|
+
logger({
|
|
1049
|
+
category: "Init",
|
|
1050
|
+
message: `Launching local browser in ${headless ? "headless" : "headed"} mode`,
|
|
1051
|
+
level: 0
|
|
1052
|
+
});
|
|
1053
|
+
const tmpDir = import_fs2.default.mkdtempSync(`/tmp/pwtest`);
|
|
1054
|
+
import_fs2.default.mkdirSync(`${tmpDir}/userdir/Default`, { recursive: true });
|
|
1055
|
+
const defaultPreferences = {
|
|
1056
|
+
plugins: {
|
|
1057
|
+
always_open_pdf_externally: true
|
|
1058
|
+
}
|
|
1059
|
+
};
|
|
1060
|
+
import_fs2.default.writeFileSync(
|
|
1061
|
+
`${tmpDir}/userdir/Default/Preferences`,
|
|
1062
|
+
JSON.stringify(defaultPreferences)
|
|
1063
|
+
);
|
|
1064
|
+
const downloadsPath = `${process.cwd()}/downloads`;
|
|
1065
|
+
import_fs2.default.mkdirSync(downloadsPath, { recursive: true });
|
|
1066
|
+
const context = yield import_test.chromium.launchPersistentContext(
|
|
1067
|
+
`${tmpDir}/userdir`,
|
|
1068
|
+
{
|
|
1069
|
+
acceptDownloads: true,
|
|
1070
|
+
headless,
|
|
1071
|
+
viewport: {
|
|
1072
|
+
width: 1250,
|
|
1073
|
+
height: 800
|
|
1074
|
+
},
|
|
1075
|
+
locale: "en-US",
|
|
1076
|
+
timezoneId: "America/New_York",
|
|
1077
|
+
deviceScaleFactor: 1,
|
|
1078
|
+
args: [
|
|
1079
|
+
"--enable-webgl",
|
|
1080
|
+
"--use-gl=swiftshader",
|
|
1081
|
+
"--enable-accelerated-2d-canvas",
|
|
1082
|
+
"--disable-blink-features=AutomationControlled",
|
|
1083
|
+
"--disable-web-security"
|
|
1084
|
+
],
|
|
1085
|
+
bypassCSP: true,
|
|
1086
|
+
userDataDir: "./user_data"
|
|
1087
|
+
}
|
|
1088
|
+
);
|
|
1089
|
+
logger({
|
|
1090
|
+
category: "Init",
|
|
1091
|
+
message: "Local browser started successfully."
|
|
1092
|
+
});
|
|
1093
|
+
yield applyStealthScripts(context);
|
|
1094
|
+
return { context };
|
|
1095
|
+
}
|
|
1096
|
+
});
|
|
1097
|
+
}
|
|
1098
|
+
function applyStealthScripts(context) {
|
|
1099
|
+
return __async(this, null, function* () {
|
|
1100
|
+
yield context.addInitScript(() => {
|
|
1101
|
+
Object.defineProperty(navigator, "webdriver", {
|
|
1102
|
+
get: () => void 0
|
|
1103
|
+
});
|
|
1104
|
+
Object.defineProperty(navigator, "languages", {
|
|
1105
|
+
get: () => ["en-US", "en"]
|
|
1106
|
+
});
|
|
1107
|
+
Object.defineProperty(navigator, "plugins", {
|
|
1108
|
+
get: () => [1, 2, 3, 4, 5]
|
|
1109
|
+
});
|
|
1110
|
+
delete window.__playwright;
|
|
1111
|
+
delete window.__pw_manual;
|
|
1112
|
+
delete window.__PW_inspect;
|
|
1113
|
+
Object.defineProperty(navigator, "headless", {
|
|
1114
|
+
get: () => false
|
|
1115
|
+
});
|
|
1116
|
+
const originalQuery = window.navigator.permissions.query;
|
|
1117
|
+
window.navigator.permissions.query = (parameters) => parameters.name === "notifications" ? Promise.resolve({
|
|
1118
|
+
state: Notification.permission
|
|
1119
|
+
}) : originalQuery(parameters);
|
|
1120
|
+
});
|
|
1121
|
+
});
|
|
1122
|
+
}
|
|
1123
|
+
var Stagehand = class {
|
|
1124
|
+
constructor({
|
|
1125
|
+
env,
|
|
1126
|
+
verbose = 0,
|
|
1127
|
+
debugDom = false,
|
|
1128
|
+
llmProvider,
|
|
1129
|
+
headless = false,
|
|
1130
|
+
logger,
|
|
1131
|
+
domSettleTimeoutMs = 6e4
|
|
1132
|
+
} = {
|
|
1133
|
+
env: "BROWSERBASE"
|
|
1134
|
+
}) {
|
|
1135
|
+
// Logging
|
|
1136
|
+
this.pending_logs_to_send_to_browserbase = [];
|
|
1137
|
+
this.is_processing_browserbase_logs = false;
|
|
1138
|
+
this.externalLogger = logger;
|
|
1139
|
+
this.logger = this.log.bind(this);
|
|
1140
|
+
this.llmProvider = llmProvider || new LLMProvider(this.logger);
|
|
1141
|
+
this.env = env;
|
|
1142
|
+
this.observations = {};
|
|
1143
|
+
this.actions = {};
|
|
1144
|
+
this.verbose = verbose;
|
|
1145
|
+
this.debugDom = debugDom;
|
|
1146
|
+
this.defaultModelName = "gpt-4o";
|
|
1147
|
+
this.headless = headless;
|
|
1148
|
+
this.domSettleTimeoutMs = domSettleTimeoutMs;
|
|
1149
|
+
}
|
|
1150
|
+
init() {
|
|
1151
|
+
return __async(this, arguments, function* ({
|
|
1152
|
+
modelName = "gpt-4o"
|
|
1153
|
+
} = {}) {
|
|
1154
|
+
const { context, debugUrl, sessionUrl } = yield getBrowser(
|
|
1155
|
+
this.env,
|
|
1156
|
+
this.headless,
|
|
1157
|
+
this.logger
|
|
1158
|
+
).catch((e) => {
|
|
1159
|
+
console.error("Error in init:", e);
|
|
1160
|
+
return { context: void 0, debugUrl: void 0, sessionUrl: void 0 };
|
|
1161
|
+
});
|
|
1162
|
+
this.context = context;
|
|
1163
|
+
this.page = context.pages()[0];
|
|
1164
|
+
this.defaultModelName = modelName;
|
|
1165
|
+
const originalGoto = this.page.goto.bind(this.page);
|
|
1166
|
+
this.page.goto = (url, options) => __async(this, null, function* () {
|
|
1167
|
+
const result = yield originalGoto(url, options);
|
|
1168
|
+
yield this.page.waitForLoadState("domcontentloaded");
|
|
1169
|
+
yield this._waitForSettledDom();
|
|
1170
|
+
return result;
|
|
1171
|
+
});
|
|
1172
|
+
if (this.headless) {
|
|
1173
|
+
yield this.page.setViewportSize({ width: 1280, height: 720 });
|
|
1174
|
+
}
|
|
1175
|
+
yield this.page.addInitScript({
|
|
1176
|
+
path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "process.js")
|
|
1177
|
+
});
|
|
1178
|
+
yield this.page.addInitScript({
|
|
1179
|
+
path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "utils.js")
|
|
1180
|
+
});
|
|
1181
|
+
yield this.page.addInitScript({
|
|
1182
|
+
path: import_path2.default.join(__dirname, "..", "dist", "dom", "build", "debug.js")
|
|
1183
|
+
});
|
|
1184
|
+
return { debugUrl, sessionUrl };
|
|
1185
|
+
});
|
|
1186
|
+
}
|
|
1187
|
+
log({
|
|
1188
|
+
message,
|
|
1189
|
+
category,
|
|
1190
|
+
level
|
|
1191
|
+
}) {
|
|
1192
|
+
const logObj = { category, message, level };
|
|
1193
|
+
logObj.level = logObj.level || 1;
|
|
1194
|
+
if (this.externalLogger) {
|
|
1195
|
+
this.externalLogger(logObj);
|
|
1196
|
+
} else {
|
|
1197
|
+
const categoryString = logObj.category ? `:${logObj.category}` : "";
|
|
1198
|
+
const logMessage = `[stagehand${categoryString}] ${logObj.message}`;
|
|
1199
|
+
console.log(logMessage);
|
|
1200
|
+
}
|
|
1201
|
+
this.pending_logs_to_send_to_browserbase.push(__spreadProps(__spreadValues({}, logObj), {
|
|
1202
|
+
id: Math.random().toString(36).substring(2, 15)
|
|
1203
|
+
}));
|
|
1204
|
+
this._run_browserbase_log_processing_cycle();
|
|
1205
|
+
}
|
|
1206
|
+
_run_browserbase_log_processing_cycle() {
|
|
1207
|
+
return __async(this, null, function* () {
|
|
1208
|
+
if (this.is_processing_browserbase_logs) {
|
|
1209
|
+
return;
|
|
1210
|
+
}
|
|
1211
|
+
this.is_processing_browserbase_logs = true;
|
|
1212
|
+
const pending_logs = [...this.pending_logs_to_send_to_browserbase];
|
|
1213
|
+
for (const logObj of pending_logs) {
|
|
1214
|
+
yield this._log_to_browserbase(logObj);
|
|
1215
|
+
}
|
|
1216
|
+
this.is_processing_browserbase_logs = false;
|
|
1217
|
+
});
|
|
1218
|
+
}
|
|
1219
|
+
_log_to_browserbase(logObj) {
|
|
1220
|
+
return __async(this, null, function* () {
|
|
1221
|
+
logObj.level = logObj.level || 1;
|
|
1222
|
+
if (!this.page) {
|
|
1223
|
+
return;
|
|
1224
|
+
}
|
|
1225
|
+
if (this.verbose >= logObj.level) {
|
|
1226
|
+
yield this.page.evaluate((logObj2) => {
|
|
1227
|
+
const logMessage = `[stagehand${logObj2.category ? `:${logObj2.category}` : ""}] ${logObj2.message}`;
|
|
1228
|
+
if (logObj2.message.toLowerCase().includes("trace") || logObj2.message.toLowerCase().includes("error:")) {
|
|
1229
|
+
console.error(logMessage);
|
|
1230
|
+
} else {
|
|
1231
|
+
console.log(logMessage);
|
|
1232
|
+
}
|
|
1233
|
+
}, logObj).then(() => {
|
|
1234
|
+
this.pending_logs_to_send_to_browserbase = this.pending_logs_to_send_to_browserbase.filter(
|
|
1235
|
+
(log) => log.id !== logObj.id
|
|
1236
|
+
);
|
|
1237
|
+
}).catch((e) => {
|
|
1238
|
+
});
|
|
1239
|
+
}
|
|
1240
|
+
});
|
|
1241
|
+
}
|
|
1242
|
+
_waitForSettledDom(timeoutMs) {
|
|
1243
|
+
return __async(this, null, function* () {
|
|
1244
|
+
try {
|
|
1245
|
+
const timeout = timeoutMs != null ? timeoutMs : this.domSettleTimeoutMs;
|
|
1246
|
+
const timeoutPromise = new Promise((resolve) => {
|
|
1247
|
+
setTimeout(() => {
|
|
1248
|
+
console.warn(
|
|
1249
|
+
`[stagehand:dom] DOM settle timeout of ${timeout}ms exceeded, continuing anyway`
|
|
1250
|
+
);
|
|
1251
|
+
this.log({
|
|
1252
|
+
category: "dom",
|
|
1253
|
+
message: `DOM settle timeout of ${timeout}ms exceeded, continuing anyway`,
|
|
1254
|
+
level: 1
|
|
1255
|
+
});
|
|
1256
|
+
resolve();
|
|
1257
|
+
}, timeout);
|
|
1258
|
+
});
|
|
1259
|
+
yield Promise.race([
|
|
1260
|
+
(() => __async(this, null, function* () {
|
|
1261
|
+
yield this.page.waitForSelector("body");
|
|
1262
|
+
yield this.page.waitForLoadState("domcontentloaded");
|
|
1263
|
+
yield this.page.evaluate(() => {
|
|
1264
|
+
return new Promise((resolve) => {
|
|
1265
|
+
if (typeof window.waitForDomSettle === "function") {
|
|
1266
|
+
window.waitForDomSettle().then(() => {
|
|
1267
|
+
resolve();
|
|
1268
|
+
});
|
|
1269
|
+
} else {
|
|
1270
|
+
console.warn(
|
|
1271
|
+
"waitForDomSettle is not defined, considering DOM as settled"
|
|
1272
|
+
);
|
|
1273
|
+
resolve();
|
|
1274
|
+
}
|
|
1275
|
+
});
|
|
1276
|
+
});
|
|
1277
|
+
}))(),
|
|
1278
|
+
timeoutPromise
|
|
1279
|
+
]);
|
|
1280
|
+
} catch (e) {
|
|
1281
|
+
this.log({
|
|
1282
|
+
category: "dom",
|
|
1283
|
+
message: `Error in waitForSettledDom: ${e.message}
|
|
1284
|
+
Trace: ${e.stack}`,
|
|
1285
|
+
level: 1
|
|
1286
|
+
});
|
|
1287
|
+
}
|
|
1288
|
+
});
|
|
1289
|
+
}
|
|
1290
|
+
startDomDebug() {
|
|
1291
|
+
return __async(this, null, function* () {
|
|
1292
|
+
try {
|
|
1293
|
+
yield this.page.evaluate(() => {
|
|
1294
|
+
if (typeof window.debugDom === "function") {
|
|
1295
|
+
window.debugDom();
|
|
1296
|
+
} else {
|
|
1297
|
+
this.log({
|
|
1298
|
+
category: "dom",
|
|
1299
|
+
message: "debugDom is not defined",
|
|
1300
|
+
level: 1
|
|
1301
|
+
});
|
|
1302
|
+
}
|
|
1303
|
+
}).catch(() => {
|
|
1304
|
+
});
|
|
1305
|
+
} catch (e) {
|
|
1306
|
+
this.log({
|
|
1307
|
+
category: "dom",
|
|
1308
|
+
message: `Error in startDomDebug: ${e.message}
|
|
1309
|
+
Trace: ${e.stack}`,
|
|
1310
|
+
level: 1
|
|
1311
|
+
});
|
|
1312
|
+
}
|
|
1313
|
+
});
|
|
1314
|
+
}
|
|
1315
|
+
cleanupDomDebug() {
|
|
1316
|
+
return __async(this, null, function* () {
|
|
1317
|
+
if (this.debugDom) {
|
|
1318
|
+
yield this.page.evaluate(() => window.cleanupDebug()).catch(() => {
|
|
1319
|
+
});
|
|
1320
|
+
}
|
|
1321
|
+
});
|
|
1322
|
+
}
|
|
1323
|
+
// Recording
|
|
1324
|
+
_generateId(operation) {
|
|
1325
|
+
return import_crypto.default.createHash("sha256").update(operation).digest("hex");
|
|
1326
|
+
}
|
|
1327
|
+
_recordObservation(instruction, result) {
|
|
1328
|
+
return __async(this, null, function* () {
|
|
1329
|
+
const id = this._generateId(instruction);
|
|
1330
|
+
this.observations[id] = { result, instruction };
|
|
1331
|
+
return id;
|
|
1332
|
+
});
|
|
1333
|
+
}
|
|
1334
|
+
_recordAction(action, result) {
|
|
1335
|
+
return __async(this, null, function* () {
|
|
1336
|
+
const id = this._generateId(action);
|
|
1337
|
+
this.actions[id] = { result, action };
|
|
1338
|
+
return id;
|
|
1339
|
+
});
|
|
1340
|
+
}
|
|
1341
|
+
// Main methods
|
|
1342
|
+
_extract(_0) {
|
|
1343
|
+
return __async(this, arguments, function* ({
|
|
1344
|
+
instruction,
|
|
1345
|
+
schema,
|
|
1346
|
+
progress = "",
|
|
1347
|
+
content = {},
|
|
1348
|
+
chunksSeen = [],
|
|
1349
|
+
modelName
|
|
1350
|
+
}) {
|
|
1351
|
+
this.log({
|
|
1352
|
+
category: "extraction",
|
|
1353
|
+
message: `starting extraction '${instruction}'`,
|
|
1354
|
+
level: 1
|
|
1355
|
+
});
|
|
1356
|
+
yield this._waitForSettledDom();
|
|
1357
|
+
yield this.startDomDebug();
|
|
1358
|
+
const { outputString, chunk, chunks } = yield this.page.evaluate(
|
|
1359
|
+
(chunksSeen2) => window.processDom(chunksSeen2 != null ? chunksSeen2 : []),
|
|
1360
|
+
chunksSeen
|
|
1361
|
+
);
|
|
1362
|
+
this.log({
|
|
1363
|
+
category: "extraction",
|
|
1364
|
+
message: `received output from processDom. Current chunk index: ${chunk}, Number of chunks left: ${chunks.length - chunksSeen.length}`,
|
|
1365
|
+
level: 1
|
|
1366
|
+
});
|
|
1367
|
+
const extractionResponse = yield extract({
|
|
1368
|
+
instruction,
|
|
1369
|
+
progress,
|
|
1370
|
+
previouslyExtractedContent: content,
|
|
1371
|
+
domElements: outputString,
|
|
1372
|
+
llmProvider: this.llmProvider,
|
|
1373
|
+
schema,
|
|
1374
|
+
modelName: modelName || this.defaultModelName,
|
|
1375
|
+
chunksSeen: chunksSeen.length,
|
|
1376
|
+
chunksTotal: chunks.length
|
|
1377
|
+
});
|
|
1378
|
+
const _a = extractionResponse, {
|
|
1379
|
+
metadata: { progress: newProgress, completed }
|
|
1380
|
+
} = _a, output = __objRest(_a, [
|
|
1381
|
+
"metadata"
|
|
1382
|
+
]);
|
|
1383
|
+
yield this.cleanupDomDebug();
|
|
1384
|
+
this.log({
|
|
1385
|
+
category: "extraction",
|
|
1386
|
+
message: `received extraction response: ${JSON.stringify(extractionResponse)}`,
|
|
1387
|
+
level: 1
|
|
1388
|
+
});
|
|
1389
|
+
chunksSeen.push(chunk);
|
|
1390
|
+
if (completed || chunksSeen.length === chunks.length) {
|
|
1391
|
+
this.log({
|
|
1392
|
+
category: "extraction",
|
|
1393
|
+
message: `response: ${JSON.stringify(extractionResponse)}`,
|
|
1394
|
+
level: 1
|
|
1395
|
+
});
|
|
1396
|
+
return output;
|
|
1397
|
+
} else {
|
|
1398
|
+
this.log({
|
|
1399
|
+
category: "extraction",
|
|
1400
|
+
message: `continuing extraction, progress: '${newProgress}'`,
|
|
1401
|
+
level: 1
|
|
1402
|
+
});
|
|
1403
|
+
yield this._waitForSettledDom();
|
|
1404
|
+
return this._extract({
|
|
1405
|
+
instruction,
|
|
1406
|
+
schema,
|
|
1407
|
+
progress: newProgress,
|
|
1408
|
+
content: output,
|
|
1409
|
+
chunksSeen,
|
|
1410
|
+
modelName
|
|
1411
|
+
});
|
|
1412
|
+
}
|
|
1413
|
+
});
|
|
1414
|
+
}
|
|
1415
|
+
_observe(_0) {
|
|
1416
|
+
return __async(this, arguments, function* ({
|
|
1417
|
+
instruction,
|
|
1418
|
+
useVision,
|
|
1419
|
+
fullPage,
|
|
1420
|
+
modelName
|
|
1421
|
+
}) {
|
|
1422
|
+
if (!instruction) {
|
|
1423
|
+
instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`;
|
|
1424
|
+
}
|
|
1425
|
+
const model = modelName != null ? modelName : this.defaultModelName;
|
|
1426
|
+
this.log({
|
|
1427
|
+
category: "observation",
|
|
1428
|
+
message: `starting observation: ${instruction}`,
|
|
1429
|
+
level: 1
|
|
1430
|
+
});
|
|
1431
|
+
yield this._waitForSettledDom();
|
|
1432
|
+
yield this.startDomDebug();
|
|
1433
|
+
let { outputString, selectorMap } = yield this.page.evaluate(
|
|
1434
|
+
(fullPage2) => fullPage2 ? window.processAllOfDom() : window.processDom([]),
|
|
1435
|
+
fullPage
|
|
1436
|
+
);
|
|
1437
|
+
let annotatedScreenshot;
|
|
1438
|
+
if (useVision === true) {
|
|
1439
|
+
if (!modelsWithVision.includes(model)) {
|
|
1440
|
+
this.log({
|
|
1441
|
+
category: "observation",
|
|
1442
|
+
message: `${model} does not support vision. Skipping vision processing.`,
|
|
1443
|
+
level: 1
|
|
1444
|
+
});
|
|
1445
|
+
} else {
|
|
1446
|
+
const screenshotService = new ScreenshotService(
|
|
1447
|
+
this.page,
|
|
1448
|
+
selectorMap,
|
|
1449
|
+
this.verbose
|
|
1450
|
+
);
|
|
1451
|
+
annotatedScreenshot = yield screenshotService.getAnnotatedScreenshot(fullPage);
|
|
1452
|
+
outputString = "n/a. use the image to find the elements.";
|
|
1453
|
+
}
|
|
1454
|
+
}
|
|
1455
|
+
const observationResponse = yield observe({
|
|
1456
|
+
instruction,
|
|
1457
|
+
domElements: outputString,
|
|
1458
|
+
llmProvider: this.llmProvider,
|
|
1459
|
+
modelName: modelName || this.defaultModelName,
|
|
1460
|
+
image: annotatedScreenshot
|
|
1461
|
+
});
|
|
1462
|
+
const elementsWithSelectors = observationResponse.elements.map(
|
|
1463
|
+
(element) => {
|
|
1464
|
+
const _a = element, { elementId } = _a, rest = __objRest(_a, ["elementId"]);
|
|
1465
|
+
return __spreadProps(__spreadValues({}, rest), {
|
|
1466
|
+
selector: `xpath=${selectorMap[elementId]}`
|
|
1467
|
+
});
|
|
1468
|
+
}
|
|
1469
|
+
);
|
|
1470
|
+
yield this.cleanupDomDebug();
|
|
1471
|
+
this._recordObservation(instruction, elementsWithSelectors);
|
|
1472
|
+
this.log({
|
|
1473
|
+
category: "observation",
|
|
1474
|
+
message: `found element ${JSON.stringify(elementsWithSelectors)}`,
|
|
1475
|
+
level: 1
|
|
1476
|
+
});
|
|
1477
|
+
yield this._recordObservation(instruction, elementsWithSelectors);
|
|
1478
|
+
return elementsWithSelectors;
|
|
1479
|
+
});
|
|
1480
|
+
}
|
|
1481
|
+
_act(_0) {
|
|
1482
|
+
return __async(this, arguments, function* ({
|
|
1483
|
+
action,
|
|
1484
|
+
steps = "",
|
|
1485
|
+
chunksSeen,
|
|
1486
|
+
modelName,
|
|
1487
|
+
useVision,
|
|
1488
|
+
verifierUseVision,
|
|
1489
|
+
retries = 0
|
|
1490
|
+
}) {
|
|
1491
|
+
var _a;
|
|
1492
|
+
const model = modelName != null ? modelName : this.defaultModelName;
|
|
1493
|
+
if (!modelsWithVision.includes(model) && (useVision !== false || verifierUseVision)) {
|
|
1494
|
+
this.log({
|
|
1495
|
+
category: "action",
|
|
1496
|
+
message: `${model} does not support vision, but useVision was set to ${useVision}. Defaulting to false.`,
|
|
1497
|
+
level: 1
|
|
1498
|
+
});
|
|
1499
|
+
useVision = false;
|
|
1500
|
+
verifierUseVision = false;
|
|
1501
|
+
}
|
|
1502
|
+
this.log({
|
|
1503
|
+
category: "action",
|
|
1504
|
+
message: `Running / Continuing action: ${action} on page: ${this.page.url()}`,
|
|
1505
|
+
level: 2
|
|
1506
|
+
});
|
|
1507
|
+
yield this._waitForSettledDom();
|
|
1508
|
+
yield this.startDomDebug();
|
|
1509
|
+
this.log({
|
|
1510
|
+
category: "action",
|
|
1511
|
+
message: `Processing DOM...`,
|
|
1512
|
+
level: 2
|
|
1513
|
+
});
|
|
1514
|
+
const { outputString, selectorMap, chunk, chunks } = yield this.page.evaluate(
|
|
1515
|
+
({ chunksSeen: chunksSeen2 }) => {
|
|
1516
|
+
return window.processDom(chunksSeen2);
|
|
1517
|
+
},
|
|
1518
|
+
{ chunksSeen }
|
|
1519
|
+
);
|
|
1520
|
+
this.log({
|
|
1521
|
+
category: "action",
|
|
1522
|
+
message: `Looking at chunk ${chunk}. Chunks left: ${chunks.length - chunksSeen.length}`,
|
|
1523
|
+
level: 1
|
|
1524
|
+
});
|
|
1525
|
+
let annotatedScreenshot;
|
|
1526
|
+
if (useVision === true) {
|
|
1527
|
+
if (!modelsWithVision.includes(model)) {
|
|
1528
|
+
this.log({
|
|
1529
|
+
category: "action",
|
|
1530
|
+
message: `${model} does not support vision. Skipping vision processing.`,
|
|
1531
|
+
level: 1
|
|
1532
|
+
});
|
|
1533
|
+
} else {
|
|
1534
|
+
const screenshotService = new ScreenshotService(
|
|
1535
|
+
this.page,
|
|
1536
|
+
selectorMap,
|
|
1537
|
+
this.verbose
|
|
1538
|
+
);
|
|
1539
|
+
annotatedScreenshot = yield screenshotService.getAnnotatedScreenshot(false);
|
|
1540
|
+
}
|
|
1541
|
+
}
|
|
1542
|
+
const response = yield act({
|
|
1543
|
+
action,
|
|
1544
|
+
domElements: outputString,
|
|
1545
|
+
steps,
|
|
1546
|
+
llmProvider: this.llmProvider,
|
|
1547
|
+
modelName: model,
|
|
1548
|
+
screenshot: annotatedScreenshot,
|
|
1549
|
+
logger: this.logger
|
|
1550
|
+
});
|
|
1551
|
+
this.log({
|
|
1552
|
+
category: "action",
|
|
1553
|
+
message: `Received response from LLM: ${JSON.stringify(response)}`,
|
|
1554
|
+
level: 1
|
|
1555
|
+
});
|
|
1556
|
+
yield this.cleanupDomDebug();
|
|
1557
|
+
if (!response) {
|
|
1558
|
+
if (chunksSeen.length + 1 < chunks.length) {
|
|
1559
|
+
chunksSeen.push(chunk);
|
|
1560
|
+
this.log({
|
|
1561
|
+
category: "action",
|
|
1562
|
+
message: `No action found in current chunk. Chunks seen: ${chunksSeen.length}.`,
|
|
1563
|
+
level: 1
|
|
1564
|
+
});
|
|
1565
|
+
return this._act({
|
|
1566
|
+
action,
|
|
1567
|
+
steps: steps + (!steps.endsWith("\n") ? "\n" : "") + "## Step: Scrolled to another section\n",
|
|
1568
|
+
chunksSeen,
|
|
1569
|
+
modelName,
|
|
1570
|
+
useVision,
|
|
1571
|
+
verifierUseVision
|
|
1572
|
+
});
|
|
1573
|
+
} else if (useVision === "fallback") {
|
|
1574
|
+
this.log({
|
|
1575
|
+
category: "action",
|
|
1576
|
+
message: `Switching to vision-based processing`,
|
|
1577
|
+
level: 1
|
|
1578
|
+
});
|
|
1579
|
+
yield this.page.evaluate(() => window.scrollToHeight(0));
|
|
1580
|
+
return yield this._act({
|
|
1581
|
+
action,
|
|
1582
|
+
steps,
|
|
1583
|
+
chunksSeen,
|
|
1584
|
+
modelName,
|
|
1585
|
+
useVision: true,
|
|
1586
|
+
verifierUseVision
|
|
1587
|
+
});
|
|
1588
|
+
} else {
|
|
1589
|
+
return {
|
|
1590
|
+
success: false,
|
|
1591
|
+
message: `Action was not able to be completed.`,
|
|
1592
|
+
action
|
|
1593
|
+
};
|
|
1594
|
+
}
|
|
1595
|
+
}
|
|
1596
|
+
const elementId = response["element"];
|
|
1597
|
+
const xpath = selectorMap[elementId];
|
|
1598
|
+
const method = response["method"];
|
|
1599
|
+
const args = response["args"];
|
|
1600
|
+
const elementLines = outputString.split("\n");
|
|
1601
|
+
const elementText = ((_a = elementLines.find((line) => line.startsWith(`${elementId}:`))) == null ? void 0 : _a.split(":")[1]) || "Element not found";
|
|
1602
|
+
this.log({
|
|
1603
|
+
category: "action",
|
|
1604
|
+
message: `Executing method: ${method} on element: ${elementId} (xpath: ${xpath}) with args: ${JSON.stringify(
|
|
1605
|
+
args
|
|
1606
|
+
)}`,
|
|
1607
|
+
level: 1
|
|
1608
|
+
});
|
|
1609
|
+
let urlChangeString = "";
|
|
1610
|
+
const locator = this.page.locator(`xpath=${xpath}`).first();
|
|
1611
|
+
try {
|
|
1612
|
+
const initialUrl = this.page.url();
|
|
1613
|
+
if (method === "scrollIntoView") {
|
|
1614
|
+
this.log({
|
|
1615
|
+
category: "action",
|
|
1616
|
+
message: `Scrolling element into view`,
|
|
1617
|
+
level: 2
|
|
1618
|
+
});
|
|
1619
|
+
try {
|
|
1620
|
+
yield locator.evaluate((element) => {
|
|
1621
|
+
element.scrollIntoView({ behavior: "smooth", block: "center" });
|
|
1622
|
+
}).catch((e) => {
|
|
1623
|
+
this.log({
|
|
1624
|
+
category: "action",
|
|
1625
|
+
message: `Error scrolling element into view: ${e.message}
|
|
1626
|
+
Trace: ${e.stack}`,
|
|
1627
|
+
level: 1
|
|
1628
|
+
});
|
|
1629
|
+
});
|
|
1630
|
+
} catch (e) {
|
|
1631
|
+
this.log({
|
|
1632
|
+
category: "action",
|
|
1633
|
+
message: `Error scrolling element into view (Retries ${retries}): ${e.message}
|
|
1634
|
+
Trace: ${e.stack}`,
|
|
1635
|
+
level: 1
|
|
1636
|
+
});
|
|
1637
|
+
if (retries < 2) {
|
|
1638
|
+
return this._act({
|
|
1639
|
+
action,
|
|
1640
|
+
steps,
|
|
1641
|
+
modelName,
|
|
1642
|
+
useVision,
|
|
1643
|
+
verifierUseVision,
|
|
1644
|
+
retries: retries + 1,
|
|
1645
|
+
chunksSeen
|
|
1646
|
+
});
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
} else if (method === "fill" || method === "type") {
|
|
1650
|
+
try {
|
|
1651
|
+
yield locator.fill("");
|
|
1652
|
+
yield locator.click();
|
|
1653
|
+
const text = args[0];
|
|
1654
|
+
for (const char of text) {
|
|
1655
|
+
yield this.page.keyboard.type(char, {
|
|
1656
|
+
delay: Math.random() * 50 + 25
|
|
1657
|
+
});
|
|
1658
|
+
}
|
|
1659
|
+
} catch (e) {
|
|
1660
|
+
this.log({
|
|
1661
|
+
category: "action",
|
|
1662
|
+
message: `Error filling element (Retries ${retries}): ${e.message}
|
|
1663
|
+
Trace: ${e.stack}`,
|
|
1664
|
+
level: 1
|
|
1665
|
+
});
|
|
1666
|
+
if (retries < 2) {
|
|
1667
|
+
return this._act({
|
|
1668
|
+
action,
|
|
1669
|
+
steps,
|
|
1670
|
+
modelName,
|
|
1671
|
+
useVision,
|
|
1672
|
+
verifierUseVision,
|
|
1673
|
+
retries: retries + 1,
|
|
1674
|
+
chunksSeen
|
|
1675
|
+
});
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
1678
|
+
} else if (method === "press") {
|
|
1679
|
+
try {
|
|
1680
|
+
const key = args[0];
|
|
1681
|
+
yield this.page.keyboard.press(key);
|
|
1682
|
+
} catch (e) {
|
|
1683
|
+
this.log({
|
|
1684
|
+
category: "action",
|
|
1685
|
+
message: `Error pressing key (Retries ${retries}): ${e.message}
|
|
1686
|
+
Trace: ${e.stack}`,
|
|
1687
|
+
level: 1
|
|
1688
|
+
});
|
|
1689
|
+
if (retries < 2) {
|
|
1690
|
+
return this._act({
|
|
1691
|
+
action,
|
|
1692
|
+
steps,
|
|
1693
|
+
modelName,
|
|
1694
|
+
useVision,
|
|
1695
|
+
verifierUseVision,
|
|
1696
|
+
retries: retries + 1,
|
|
1697
|
+
chunksSeen
|
|
1698
|
+
});
|
|
1699
|
+
}
|
|
1700
|
+
}
|
|
1701
|
+
} else if (typeof locator[method] === "function") {
|
|
1702
|
+
this.log({
|
|
1703
|
+
category: "action",
|
|
1704
|
+
message: `Page URL before action: ${this.page.url()}`,
|
|
1705
|
+
level: 2
|
|
1706
|
+
});
|
|
1707
|
+
try {
|
|
1708
|
+
yield locator[method](...args);
|
|
1709
|
+
} catch (e) {
|
|
1710
|
+
this.log({
|
|
1711
|
+
category: "action",
|
|
1712
|
+
message: `Error performing method ${method} with args ${JSON.stringify(
|
|
1713
|
+
args
|
|
1714
|
+
)} (Retries: ${retries}): ${e.message}
|
|
1715
|
+
Trace: ${e.stack}`,
|
|
1716
|
+
level: 1
|
|
1717
|
+
});
|
|
1718
|
+
if (retries < 2) {
|
|
1719
|
+
return this._act({
|
|
1720
|
+
action,
|
|
1721
|
+
steps,
|
|
1722
|
+
modelName,
|
|
1723
|
+
useVision,
|
|
1724
|
+
verifierUseVision,
|
|
1725
|
+
retries: retries + 1,
|
|
1726
|
+
chunksSeen
|
|
1727
|
+
});
|
|
1728
|
+
}
|
|
1729
|
+
}
|
|
1730
|
+
if (method === "click") {
|
|
1731
|
+
this.log({
|
|
1732
|
+
category: "action",
|
|
1733
|
+
message: `Clicking element, checking for page navigation`,
|
|
1734
|
+
level: 1
|
|
1735
|
+
});
|
|
1736
|
+
const newOpenedTab = yield Promise.race([
|
|
1737
|
+
new Promise((resolve) => {
|
|
1738
|
+
this.context.once("page", (page) => resolve(page));
|
|
1739
|
+
setTimeout(() => resolve(null), 1500);
|
|
1740
|
+
})
|
|
1741
|
+
]);
|
|
1742
|
+
this.log({
|
|
1743
|
+
category: "action",
|
|
1744
|
+
message: `Clicked element, ${newOpenedTab ? "opened a new tab" : "no new tabs opened"}`,
|
|
1745
|
+
level: 1
|
|
1746
|
+
});
|
|
1747
|
+
if (newOpenedTab) {
|
|
1748
|
+
this.log({
|
|
1749
|
+
category: "action",
|
|
1750
|
+
message: `New page detected (new tab) with URL: ${newOpenedTab.url()}`,
|
|
1751
|
+
level: 1
|
|
1752
|
+
});
|
|
1753
|
+
yield newOpenedTab.close();
|
|
1754
|
+
yield this.page.goto(newOpenedTab.url());
|
|
1755
|
+
yield this.page.waitForLoadState("domcontentloaded");
|
|
1756
|
+
yield this._waitForSettledDom();
|
|
1757
|
+
}
|
|
1758
|
+
yield Promise.race([
|
|
1759
|
+
this.page.waitForLoadState("networkidle"),
|
|
1760
|
+
new Promise((resolve) => setTimeout(resolve, 5e3))
|
|
1761
|
+
]).catch((e) => {
|
|
1762
|
+
this.log({
|
|
1763
|
+
category: "action",
|
|
1764
|
+
message: `Network idle timeout hit`,
|
|
1765
|
+
level: 1
|
|
1766
|
+
});
|
|
1767
|
+
});
|
|
1768
|
+
this.log({
|
|
1769
|
+
category: "action",
|
|
1770
|
+
message: `Finished waiting for (possible) page navigation`,
|
|
1771
|
+
level: 1
|
|
1772
|
+
});
|
|
1773
|
+
if (this.page.url() !== initialUrl) {
|
|
1774
|
+
this.log({
|
|
1775
|
+
category: "action",
|
|
1776
|
+
message: `New page detected with URL: ${this.page.url()}`,
|
|
1777
|
+
level: 1
|
|
1778
|
+
});
|
|
1779
|
+
}
|
|
1780
|
+
}
|
|
1781
|
+
} else {
|
|
1782
|
+
this.log({
|
|
1783
|
+
category: "action",
|
|
1784
|
+
message: `Chosen method ${method} is invalid`,
|
|
1785
|
+
level: 1
|
|
1786
|
+
});
|
|
1787
|
+
if (retries < 2) {
|
|
1788
|
+
return this._act({
|
|
1789
|
+
action,
|
|
1790
|
+
steps,
|
|
1791
|
+
modelName: model,
|
|
1792
|
+
useVision,
|
|
1793
|
+
verifierUseVision,
|
|
1794
|
+
retries: retries + 1,
|
|
1795
|
+
chunksSeen
|
|
1796
|
+
});
|
|
1797
|
+
} else {
|
|
1798
|
+
return {
|
|
1799
|
+
success: false,
|
|
1800
|
+
message: `Internal error: Chosen method ${method} is invalid`,
|
|
1801
|
+
action
|
|
1802
|
+
};
|
|
1803
|
+
}
|
|
1804
|
+
}
|
|
1805
|
+
let newSteps = steps + (!steps.endsWith("\n") ? "\n" : "") + `## Step: ${response.step}
|
|
1806
|
+
Element: ${elementText}
|
|
1807
|
+
Action: ${response.method}
|
|
1808
|
+
Reasoning: ${response.why}
|
|
1809
|
+
`;
|
|
1810
|
+
if (urlChangeString) {
|
|
1811
|
+
newSteps += ` Result (Important): ${urlChangeString}
|
|
1812
|
+
|
|
1813
|
+
`;
|
|
1814
|
+
}
|
|
1815
|
+
let actionComplete = false;
|
|
1816
|
+
if (response.completed) {
|
|
1817
|
+
this.log({
|
|
1818
|
+
category: "action",
|
|
1819
|
+
message: `Action marked as completed, Verifying if this is true...`,
|
|
1820
|
+
level: 1
|
|
1821
|
+
});
|
|
1822
|
+
let domElements = void 0;
|
|
1823
|
+
let fullpageScreenshot = void 0;
|
|
1824
|
+
if (verifierUseVision) {
|
|
1825
|
+
try {
|
|
1826
|
+
const screenshotService = new ScreenshotService(
|
|
1827
|
+
this.page,
|
|
1828
|
+
selectorMap,
|
|
1829
|
+
this.verbose
|
|
1830
|
+
);
|
|
1831
|
+
fullpageScreenshot = yield screenshotService.getScreenshot(
|
|
1832
|
+
true,
|
|
1833
|
+
15
|
|
1834
|
+
);
|
|
1835
|
+
} catch (e) {
|
|
1836
|
+
this.log({
|
|
1837
|
+
category: "action",
|
|
1838
|
+
message: `Error getting full page screenshot: ${e.message}
|
|
1839
|
+
. Trying again...`,
|
|
1840
|
+
level: 1
|
|
1841
|
+
});
|
|
1842
|
+
const screenshotService = new ScreenshotService(
|
|
1843
|
+
this.page,
|
|
1844
|
+
selectorMap,
|
|
1845
|
+
this.verbose
|
|
1846
|
+
);
|
|
1847
|
+
fullpageScreenshot = yield screenshotService.getScreenshot(
|
|
1848
|
+
true,
|
|
1849
|
+
15
|
|
1850
|
+
);
|
|
1851
|
+
}
|
|
1852
|
+
} else {
|
|
1853
|
+
({ outputString: domElements } = yield this.page.evaluate(() => {
|
|
1854
|
+
return window.processAllOfDom();
|
|
1855
|
+
}));
|
|
1856
|
+
}
|
|
1857
|
+
actionComplete = yield verifyActCompletion({
|
|
1858
|
+
goal: action,
|
|
1859
|
+
steps: newSteps,
|
|
1860
|
+
llmProvider: this.llmProvider,
|
|
1861
|
+
modelName: model,
|
|
1862
|
+
screenshot: fullpageScreenshot,
|
|
1863
|
+
domElements,
|
|
1864
|
+
logger: this.logger
|
|
1865
|
+
});
|
|
1866
|
+
this.log({
|
|
1867
|
+
category: "action",
|
|
1868
|
+
message: `Action completion verification result: ${actionComplete}`,
|
|
1869
|
+
level: 1
|
|
1870
|
+
});
|
|
1871
|
+
}
|
|
1872
|
+
if (!actionComplete) {
|
|
1873
|
+
this.log({
|
|
1874
|
+
category: "action",
|
|
1875
|
+
message: `Continuing to next action step`,
|
|
1876
|
+
level: 1
|
|
1877
|
+
});
|
|
1878
|
+
return this._act({
|
|
1879
|
+
action,
|
|
1880
|
+
steps: newSteps,
|
|
1881
|
+
modelName,
|
|
1882
|
+
chunksSeen,
|
|
1883
|
+
useVision,
|
|
1884
|
+
verifierUseVision
|
|
1885
|
+
});
|
|
1886
|
+
} else {
|
|
1887
|
+
this.log({
|
|
1888
|
+
category: "action",
|
|
1889
|
+
message: `Action completed successfully`,
|
|
1890
|
+
level: 1
|
|
1891
|
+
});
|
|
1892
|
+
yield this._recordAction(action, response.step);
|
|
1893
|
+
return {
|
|
1894
|
+
success: true,
|
|
1895
|
+
message: `Action completed successfully: ${steps}${response.step}`,
|
|
1896
|
+
action
|
|
1897
|
+
};
|
|
1898
|
+
}
|
|
1899
|
+
} catch (error) {
|
|
1900
|
+
this.log({
|
|
1901
|
+
category: "action",
|
|
1902
|
+
message: `Error performing action (Retries: ${retries}): ${error.message}
|
|
1903
|
+
Trace: ${error.stack}`,
|
|
1904
|
+
level: 1
|
|
1905
|
+
});
|
|
1906
|
+
if (retries < 2) {
|
|
1907
|
+
return this._act({
|
|
1908
|
+
action,
|
|
1909
|
+
steps,
|
|
1910
|
+
modelName,
|
|
1911
|
+
useVision,
|
|
1912
|
+
verifierUseVision,
|
|
1913
|
+
retries: retries + 1,
|
|
1914
|
+
chunksSeen
|
|
1915
|
+
});
|
|
1916
|
+
}
|
|
1917
|
+
yield this._recordAction(action, "");
|
|
1918
|
+
return {
|
|
1919
|
+
success: false,
|
|
1920
|
+
message: `Error performing action: ${error.message}`,
|
|
1921
|
+
action
|
|
1922
|
+
};
|
|
1923
|
+
}
|
|
1924
|
+
});
|
|
1925
|
+
}
|
|
1926
|
+
act(_0) {
|
|
1927
|
+
return __async(this, arguments, function* ({
|
|
1928
|
+
action,
|
|
1929
|
+
modelName,
|
|
1930
|
+
useVision = "fallback"
|
|
1931
|
+
}) {
|
|
1932
|
+
useVision = useVision != null ? useVision : "fallback";
|
|
1933
|
+
return this._act({
|
|
1934
|
+
action,
|
|
1935
|
+
modelName,
|
|
1936
|
+
chunksSeen: [],
|
|
1937
|
+
useVision,
|
|
1938
|
+
verifierUseVision: useVision !== false
|
|
1939
|
+
});
|
|
1940
|
+
});
|
|
1941
|
+
}
|
|
1942
|
+
extract(_0) {
|
|
1943
|
+
return __async(this, arguments, function* ({
|
|
1944
|
+
instruction,
|
|
1945
|
+
schema,
|
|
1946
|
+
modelName
|
|
1947
|
+
}) {
|
|
1948
|
+
return this._extract({
|
|
1949
|
+
instruction,
|
|
1950
|
+
schema,
|
|
1951
|
+
modelName
|
|
1952
|
+
});
|
|
1953
|
+
});
|
|
1954
|
+
}
|
|
1955
|
+
observe(options) {
|
|
1956
|
+
return __async(this, null, function* () {
|
|
1957
|
+
var _a, _b;
|
|
1958
|
+
return this._observe({
|
|
1959
|
+
instruction: (_a = options == null ? void 0 : options.instruction) != null ? _a : "Find actions that can be performed on this page.",
|
|
1960
|
+
modelName: options == null ? void 0 : options.modelName,
|
|
1961
|
+
useVision: (_b = options == null ? void 0 : options.useVision) != null ? _b : false,
|
|
1962
|
+
fullPage: false
|
|
1963
|
+
});
|
|
1964
|
+
});
|
|
1965
|
+
}
|
|
1966
|
+
};
|
|
1967
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
1968
|
+
0 && (module.exports = {
|
|
1969
|
+
Stagehand
|
|
1970
|
+
});
|