@browserbasehq/stagehand 1.0.3 → 1.1.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +10 -5
  2. package/dist/evals/index.eval.js +1075 -0
  3. package/dist/evals/index.eval.js.map +1 -0
  4. package/dist/evals/playground.js +112 -0
  5. package/dist/evals/playground.js.map +1 -0
  6. package/dist/evals/utils.js +52 -0
  7. package/dist/evals/utils.js.map +1 -0
  8. package/dist/examples/2048.js +108 -0
  9. package/dist/examples/2048.js.map +1 -0
  10. package/dist/examples/debugUrl.js +35 -0
  11. package/dist/examples/debugUrl.js.map +1 -0
  12. package/dist/examples/example.js +37 -0
  13. package/dist/examples/example.js.map +1 -0
  14. package/dist/index.d.ts +22 -6
  15. package/dist/index.js +629 -152
  16. package/dist/lib/browserbase.js +56 -0
  17. package/dist/lib/browserbase.js.map +1 -0
  18. package/dist/lib/cache.js +78 -0
  19. package/dist/lib/cache.js.map +1 -0
  20. package/dist/lib/dom/debug.js +119 -0
  21. package/dist/lib/dom/debug.js.map +1 -0
  22. package/dist/lib/dom/index.js +20 -0
  23. package/dist/lib/dom/index.js.map +1 -0
  24. package/dist/lib/dom/process.js +396 -0
  25. package/dist/lib/dom/process.js.map +1 -0
  26. package/dist/lib/dom/utils.js +28 -0
  27. package/dist/lib/dom/utils.js.map +1 -0
  28. package/dist/lib/index.js +978 -0
  29. package/dist/lib/index.js.map +1 -0
  30. package/dist/lib/inference.js +226 -0
  31. package/dist/lib/inference.js.map +1 -0
  32. package/dist/lib/llm/AnthropicClient.js +150 -0
  33. package/dist/lib/llm/AnthropicClient.js.map +1 -0
  34. package/dist/lib/llm/LLMClient.js +12 -0
  35. package/dist/lib/llm/LLMClient.js.map +1 -0
  36. package/dist/lib/llm/LLMProvider.js +34 -0
  37. package/dist/lib/llm/LLMProvider.js.map +1 -0
  38. package/dist/lib/llm/OpenAIClient.js +69 -0
  39. package/dist/lib/llm/OpenAIClient.js.map +1 -0
  40. package/dist/lib/prompt.js +288 -0
  41. package/dist/lib/prompt.js.map +1 -0
  42. package/dist/lib/vision.js +194 -0
  43. package/dist/lib/vision.js.map +1 -0
  44. package/package.json +2 -1
@@ -0,0 +1,1075 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ var _a;
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ const braintrust_1 = require("braintrust");
17
+ const lib_1 = require("../lib");
18
+ const zod_1 = require("zod");
19
+ const process_1 = __importDefault(require("process"));
20
+ const utils_1 = require("./utils");
21
+ const env = ((_a = process_1.default.env.EVAL_ENV) === null || _a === void 0 ? void 0 : _a.toLowerCase()) === "browserbase"
22
+ ? "BROWSERBASE"
23
+ : "LOCAL";
24
+ const expedia = () => __awaiter(void 0, void 0, void 0, function* () {
25
+ const logger = new utils_1.EvalLogger();
26
+ const stagehand = new lib_1.Stagehand({
27
+ env,
28
+ headless: false,
29
+ verbose: 2,
30
+ debugDom: true,
31
+ logger: (message) => {
32
+ logger.log(message.message);
33
+ },
34
+ });
35
+ logger.init(stagehand);
36
+ const { debugUrl, sessionUrl } = yield stagehand.init();
37
+ try {
38
+ yield stagehand.page.goto("https://www.expedia.com/flights");
39
+ yield stagehand.act({
40
+ action: "find round-trip flights from San Francisco (SFO) to Toronto (YYZ) for Jan 1, 2025 (up to one to two weeks)",
41
+ });
42
+ yield stagehand.act({ action: "Go to the first non-stop flight" });
43
+ yield stagehand.act({ action: "select the cheapest flight" });
44
+ yield stagehand.act({ action: "click on the first non-stop flight" });
45
+ yield stagehand.act({
46
+ action: "Take me to the checkout page",
47
+ });
48
+ const url = yield stagehand.page.url();
49
+ return {
50
+ _success: url.startsWith("https://www.expedia.com/Checkout/"),
51
+ logs: logger.getLogs(),
52
+ debugUrl,
53
+ sessionUrl,
54
+ };
55
+ }
56
+ catch (error) {
57
+ logger.error(`Error in expedia function: ${JSON.stringify(error, null, 2)}. Trace: ${error.stack}`);
58
+ return {
59
+ _success: false,
60
+ error: JSON.parse(JSON.stringify(error, null, 2)),
61
+ debugUrl,
62
+ sessionUrl,
63
+ logs: logger.getLogs(),
64
+ };
65
+ }
66
+ finally {
67
+ yield stagehand.context.close().catch(() => { });
68
+ }
69
+ });
70
+ const vanta = () => __awaiter(void 0, void 0, void 0, function* () {
71
+ const logger = new utils_1.EvalLogger();
72
+ const stagehand = new lib_1.Stagehand({
73
+ env,
74
+ headless: process_1.default.env.HEADLESS !== "false",
75
+ logger: (message) => {
76
+ logger.log(message);
77
+ },
78
+ verbose: 2,
79
+ });
80
+ logger.init(stagehand);
81
+ const { debugUrl, sessionUrl } = yield stagehand.init();
82
+ yield stagehand.page.goto("https://www.vanta.com/");
83
+ const observations = yield stagehand.observe();
84
+ if (observations.length === 0) {
85
+ yield stagehand.context.close();
86
+ return {
87
+ _success: false,
88
+ observations,
89
+ debugUrl,
90
+ sessionUrl,
91
+ logs: logger.getLogs(),
92
+ };
93
+ }
94
+ const expectedLocator = `body > div.page-wrapper > div.nav_component > div.nav_element.w-nav > div.padding-global > div > div > nav > div.nav_cta-wrapper.is-new > a.nav_cta-button-desktop.is-smaller.w-button`;
95
+ const expectedResult = yield stagehand.page
96
+ .locator(expectedLocator)
97
+ .first()
98
+ .innerHTML();
99
+ let foundMatch = false;
100
+ for (const observation of observations) {
101
+ try {
102
+ const observationResult = yield stagehand.page
103
+ .locator(observation.selector)
104
+ .first()
105
+ .innerHTML();
106
+ if (observationResult === expectedResult) {
107
+ foundMatch = true;
108
+ break;
109
+ }
110
+ }
111
+ catch (error) {
112
+ console.warn(`Failed to check observation with selector ${observation.selector}:`, error.message);
113
+ continue;
114
+ }
115
+ }
116
+ yield stagehand.context.close();
117
+ return {
118
+ _success: foundMatch,
119
+ expected: expectedResult,
120
+ observations,
121
+ debugUrl,
122
+ sessionUrl,
123
+ logs: logger.getLogs(),
124
+ };
125
+ });
126
+ const vanta_h = () => __awaiter(void 0, void 0, void 0, function* () {
127
+ const logger = new utils_1.EvalLogger();
128
+ const stagehand = new lib_1.Stagehand({
129
+ env,
130
+ headless: process_1.default.env.HEADLESS !== "false",
131
+ logger: (message) => {
132
+ logger.log(message.message);
133
+ },
134
+ verbose: 2,
135
+ });
136
+ logger.init(stagehand);
137
+ const { debugUrl, sessionUrl } = yield stagehand.init();
138
+ yield stagehand.page.goto("https://www.vanta.com/");
139
+ const observations = yield stagehand.observe({
140
+ instruction: "find the buy now button",
141
+ });
142
+ yield stagehand.context.close();
143
+ // we should have no saved observation since the element shouldn't exist
144
+ return {
145
+ _success: observations.length === 0,
146
+ observations,
147
+ debugUrl,
148
+ sessionUrl,
149
+ logs: logger.getLogs(),
150
+ };
151
+ });
152
+ const simple_google_search = () => __awaiter(void 0, void 0, void 0, function* () {
153
+ const logger = new utils_1.EvalLogger();
154
+ const stagehand = new lib_1.Stagehand({
155
+ env,
156
+ headless: process_1.default.env.HEADLESS !== "false",
157
+ logger: (message) => {
158
+ logger.log(message.message);
159
+ },
160
+ verbose: 2,
161
+ });
162
+ logger.init(stagehand);
163
+ const { debugUrl, sessionUrl } = yield stagehand.init();
164
+ yield stagehand.page.goto("https://www.google.com");
165
+ yield stagehand.act({
166
+ action: 'Search for "OpenAI"',
167
+ });
168
+ const expectedUrl = "https://www.google.com/search?q=OpenAI";
169
+ const currentUrl = yield stagehand.page.url();
170
+ yield stagehand.context.close();
171
+ return {
172
+ _success: currentUrl.startsWith(expectedUrl),
173
+ currentUrl,
174
+ debugUrl,
175
+ sessionUrl,
176
+ logs: logger.getLogs(),
177
+ };
178
+ });
179
+ const peeler_simple = () => __awaiter(void 0, void 0, void 0, function* () {
180
+ const logger = new utils_1.EvalLogger();
181
+ const stagehand = new lib_1.Stagehand({
182
+ env: "LOCAL",
183
+ headless: process_1.default.env.HEADLESS !== "false",
184
+ logger: (message) => {
185
+ logger.log(message.message);
186
+ },
187
+ verbose: 2,
188
+ });
189
+ logger.init(stagehand);
190
+ const { debugUrl, sessionUrl } = yield stagehand.init();
191
+ yield stagehand.page.goto(`file://${process_1.default.cwd()}/evals/assets/peeler.html`);
192
+ yield stagehand.act({ action: "add the peeler to cart" });
193
+ const successMessageLocator = stagehand.page.locator('text="Congratulations, you have 1 A in your cart"');
194
+ const isVisible = yield successMessageLocator.isVisible();
195
+ yield stagehand.context.close();
196
+ return {
197
+ _success: isVisible,
198
+ debugUrl,
199
+ sessionUrl,
200
+ logs: logger.getLogs(),
201
+ };
202
+ });
203
+ const peeler_complex = () => __awaiter(void 0, void 0, void 0, function* () {
204
+ const logger = new utils_1.EvalLogger();
205
+ const stagehand = new lib_1.Stagehand({
206
+ env,
207
+ verbose: 2,
208
+ headless: process_1.default.env.HEADLESS !== "false",
209
+ logger: (message) => {
210
+ logger.log(message.message);
211
+ },
212
+ });
213
+ logger.init(stagehand);
214
+ const { debugUrl, sessionUrl } = yield stagehand.init();
215
+ try {
216
+ yield stagehand.page.goto(`https://chefstoys.com/`, { timeout: 60000 });
217
+ yield stagehand.act({
218
+ action: "search for peelers",
219
+ });
220
+ yield stagehand.act({
221
+ action: 'click on the first "OXO" brand peeler',
222
+ });
223
+ const { price } = yield stagehand.extract({
224
+ instruction: "get the price of the peeler",
225
+ schema: zod_1.z.object({ price: zod_1.z.number().nullable() }),
226
+ modelName: "gpt-4o-2024-08-06",
227
+ });
228
+ return {
229
+ _success: price === 11.99,
230
+ price,
231
+ debugUrl,
232
+ sessionUrl,
233
+ logs: logger.getLogs(),
234
+ };
235
+ }
236
+ catch (error) {
237
+ const errorMessage = JSON.parse(JSON.stringify(error, null, 2));
238
+ const errorStack = errorMessage.stack;
239
+ const fullError = `Error in peeler_complex function: ${errorMessage.message} Trace: ${errorStack}`;
240
+ logger.error(fullError);
241
+ return {
242
+ _success: false,
243
+ error: JSON.parse(JSON.stringify(error, null, 2)),
244
+ debugUrl,
245
+ sessionUrl,
246
+ logs: logger.getLogs(),
247
+ };
248
+ }
249
+ finally {
250
+ yield stagehand.context.close();
251
+ }
252
+ });
253
+ const homedepot = () => __awaiter(void 0, void 0, void 0, function* () {
254
+ const logger = new utils_1.EvalLogger();
255
+ const stagehand = new lib_1.Stagehand({
256
+ env,
257
+ verbose: 2,
258
+ headless: process_1.default.env.HEADLESS !== "false",
259
+ logger: (message) => {
260
+ logger.log(message.message);
261
+ },
262
+ });
263
+ logger.init(stagehand);
264
+ const { debugUrl, sessionUrl } = yield stagehand.init();
265
+ try {
266
+ yield stagehand.page.goto("https://www.homedepot.com/");
267
+ yield stagehand.act({ action: "search for gas grills" });
268
+ yield stagehand.act({ action: "click on the best selling gas grill" });
269
+ yield stagehand.act({ action: "click on the Product Details" });
270
+ yield stagehand.act({ action: "find the Primary Burner BTU" });
271
+ const productSpecs = yield stagehand.extract({
272
+ instruction: "Extract the Primary exact Burner BTU of the product",
273
+ schema: zod_1.z.object({
274
+ productSpecs: zod_1.z
275
+ .array(zod_1.z.object({
276
+ burnerBTU: zod_1.z.string().describe("Primary Burner BTU exact value"),
277
+ }))
278
+ .describe("Gas grill Primary Burner BTU exact value"),
279
+ }),
280
+ modelName: "gpt-4o-2024-08-06",
281
+ });
282
+ logger.log(`The gas grill primary burner BTU is: ${productSpecs}`);
283
+ if (!productSpecs ||
284
+ !productSpecs.productSpecs ||
285
+ productSpecs.productSpecs.length !== 1) {
286
+ return {
287
+ _success: false,
288
+ productSpecs,
289
+ debugUrl,
290
+ sessionUrl,
291
+ logs: logger.getLogs(),
292
+ };
293
+ }
294
+ if ((productSpecs.productSpecs[0].burnerBTU.match(/0/g) || []).length == 4 &&
295
+ (productSpecs.productSpecs[0].burnerBTU.match(/4/g) || []).length === 1) {
296
+ return {
297
+ _success: true,
298
+ productSpecs,
299
+ debugUrl,
300
+ sessionUrl,
301
+ logs: logger.getLogs(),
302
+ };
303
+ }
304
+ else {
305
+ return {
306
+ _success: false,
307
+ productSpecs,
308
+ debugUrl,
309
+ sessionUrl,
310
+ logs: logger.getLogs(),
311
+ };
312
+ }
313
+ }
314
+ catch (error) {
315
+ logger.error(`Error in homedepot function: ${JSON.stringify(error, null, 2)}, Trace: ${error.stack}`);
316
+ return {
317
+ _success: false,
318
+ error: JSON.parse(JSON.stringify(error, null, 2)),
319
+ debugUrl,
320
+ sessionUrl,
321
+ logs: logger.getLogs(),
322
+ };
323
+ }
324
+ finally {
325
+ yield stagehand.context.close().catch(() => { });
326
+ }
327
+ });
328
+ const extract_github_stars = () => __awaiter(void 0, void 0, void 0, function* () {
329
+ const logger = new utils_1.EvalLogger();
330
+ const stagehand = new lib_1.Stagehand({
331
+ env,
332
+ verbose: 2,
333
+ headless: process_1.default.env.HEADLESS !== "false",
334
+ logger: (message) => {
335
+ logger.log(message.message);
336
+ },
337
+ });
338
+ logger.init(stagehand);
339
+ const { debugUrl, sessionUrl } = yield stagehand.init();
340
+ try {
341
+ yield stagehand.page.goto("https://github.com/facebook/react");
342
+ const { stars } = yield stagehand.extract({
343
+ instruction: "Extract the number of stars for the project",
344
+ schema: zod_1.z.object({
345
+ stars: zod_1.z.number().describe("the number of stars for the project"),
346
+ }),
347
+ modelName: "gpt-4o-2024-08-06",
348
+ });
349
+ const expectedStarsString = yield stagehand.page
350
+ .locator("#repo-stars-counter-star")
351
+ .first()
352
+ .innerHTML();
353
+ const expectedStars = expectedStarsString.toLowerCase().endsWith('k')
354
+ ? parseFloat(expectedStarsString.slice(0, -1)) * 1000
355
+ : parseFloat(expectedStarsString);
356
+ yield stagehand.context.close().catch(() => { });
357
+ return {
358
+ _success: stars === expectedStars,
359
+ stars,
360
+ debugUrl,
361
+ sessionUrl,
362
+ logs: logger.getLogs(),
363
+ };
364
+ }
365
+ catch (error) {
366
+ console.error("Error or timeout occurred:", error);
367
+ yield stagehand.context.close().catch(() => { });
368
+ return {
369
+ _success: false,
370
+ error: JSON.parse(JSON.stringify(error, null, 2)),
371
+ debugUrl,
372
+ sessionUrl,
373
+ logs: logger.getLogs(),
374
+ };
375
+ }
376
+ });
377
+ const extract_collaborators_from_github_repository = () => __awaiter(void 0, void 0, void 0, function* () {
378
+ const logger = new utils_1.EvalLogger();
379
+ const stagehand = new lib_1.Stagehand({
380
+ env,
381
+ verbose: 2,
382
+ headless: process_1.default.env.HEADLESS !== "false",
383
+ logger: (message) => {
384
+ logger.log(message.message);
385
+ },
386
+ });
387
+ logger.init(stagehand);
388
+ const { debugUrl, sessionUrl } = yield stagehand.init();
389
+ try {
390
+ yield stagehand.page.goto("https://github.com/facebook/react");
391
+ yield stagehand.act({
392
+ action: "find the contributors section",
393
+ });
394
+ const { contributors } = yield stagehand.extract({
395
+ instruction: "Extract top 20 contributors of this repository",
396
+ schema: zod_1.z.object({
397
+ contributors: zod_1.z.array(zod_1.z.object({
398
+ github_username: zod_1.z
399
+ .string()
400
+ .describe("the github username of the contributor"),
401
+ information: zod_1.z.string().describe("number of commits contributed"),
402
+ })),
403
+ }),
404
+ modelName: "gpt-4o-2024-08-06",
405
+ });
406
+ console.log("Extracted collaborators:", contributors);
407
+ yield stagehand.context.close().catch(() => { });
408
+ return {
409
+ _success: contributors.length === 20,
410
+ contributors,
411
+ debugUrl,
412
+ sessionUrl,
413
+ logs: logger.getLogs(),
414
+ };
415
+ }
416
+ catch (error) {
417
+ console.error("Error or timeout occurred:", error);
418
+ yield stagehand.context.close().catch(() => { });
419
+ return {
420
+ _success: false,
421
+ error: JSON.parse(JSON.stringify(error, null, 2)),
422
+ debugUrl,
423
+ sessionUrl,
424
+ logs: logger.getLogs(),
425
+ };
426
+ }
427
+ });
428
+ const extract_last_twenty_github_commits = () => __awaiter(void 0, void 0, void 0, function* () {
429
+ const logger = new utils_1.EvalLogger();
430
+ const stagehand = new lib_1.Stagehand({
431
+ env,
432
+ verbose: 2,
433
+ headless: process_1.default.env.HEADLESS !== "false",
434
+ logger: (message) => {
435
+ logger.log(message.message);
436
+ },
437
+ });
438
+ logger.init(stagehand);
439
+ const { debugUrl, sessionUrl } = yield stagehand.init();
440
+ try {
441
+ yield stagehand.page.goto("https://github.com/facebook/react");
442
+ yield stagehand.act({
443
+ action: "find commit history, generally described by the number of commits",
444
+ });
445
+ const { commits } = yield stagehand.extract({
446
+ instruction: "Extract last 20 commits",
447
+ schema: zod_1.z.object({
448
+ commits: zod_1.z.array(zod_1.z.object({
449
+ commit_message: zod_1.z.string(),
450
+ commit_url: zod_1.z.string(),
451
+ commit_hash: zod_1.z.string(),
452
+ })),
453
+ }),
454
+ modelName: "gpt-4o-2024-08-06",
455
+ });
456
+ logger.log(`Extracted commits: ${commits}`);
457
+ yield stagehand.context.close().catch(() => { });
458
+ return {
459
+ _success: commits.length === 20,
460
+ commits,
461
+ debugUrl,
462
+ sessionUrl,
463
+ logs: logger.getLogs(),
464
+ };
465
+ }
466
+ catch (error) {
467
+ console.error("Error or timeout occurred:", error);
468
+ yield stagehand.context.close().catch(() => { });
469
+ return {
470
+ _success: false,
471
+ error: JSON.parse(JSON.stringify(error, null, 2)),
472
+ debugUrl,
473
+ sessionUrl,
474
+ logs: logger.getLogs(),
475
+ };
476
+ }
477
+ });
478
+ const wikipedia = () => __awaiter(void 0, void 0, void 0, function* () {
479
+ const logger = new utils_1.EvalLogger();
480
+ const stagehand = new lib_1.Stagehand({
481
+ env,
482
+ verbose: 2,
483
+ headless: process_1.default.env.HEADLESS !== "false",
484
+ logger: (message) => {
485
+ logger.log(message.message);
486
+ },
487
+ });
488
+ logger.init(stagehand);
489
+ const { debugUrl, sessionUrl } = yield stagehand.init();
490
+ yield stagehand.page.goto(`https://en.wikipedia.org/wiki/Baseball`);
491
+ yield stagehand.act({
492
+ action: 'click the "hit and run" link in this article',
493
+ });
494
+ const url = "https://en.wikipedia.org/wiki/Hit_and_run_(baseball)";
495
+ const currentUrl = yield stagehand.page.url();
496
+ yield stagehand.context.close().catch(() => { });
497
+ return {
498
+ _success: currentUrl === url,
499
+ expected: url,
500
+ actual: currentUrl,
501
+ debugUrl,
502
+ sessionUrl,
503
+ logs: logger.getLogs(),
504
+ };
505
+ });
506
+ // Validate that the action is not found on the page
507
+ const nonsense_action = () => __awaiter(void 0, void 0, void 0, function* () {
508
+ const logger = new utils_1.EvalLogger();
509
+ const stagehand = new lib_1.Stagehand({
510
+ env: "LOCAL",
511
+ verbose: 2,
512
+ debugDom: true,
513
+ headless: true,
514
+ logger: (message) => {
515
+ logger.log(message.message);
516
+ },
517
+ });
518
+ logger.init(stagehand);
519
+ const { debugUrl, sessionUrl } = yield stagehand.init();
520
+ try {
521
+ yield stagehand.page.goto("https://www.homedepot.com/");
522
+ const result = yield stagehand.act({
523
+ action: "click on the first banana",
524
+ });
525
+ console.log("result", result);
526
+ // Assert the output
527
+ const expectedResult = {
528
+ success: false,
529
+ message: "Action not found on the current page after checking all chunks.",
530
+ action: "click on the first banana",
531
+ };
532
+ const isResultCorrect = JSON.stringify(result) === JSON.stringify(expectedResult);
533
+ return {
534
+ _success: isResultCorrect,
535
+ debugUrl,
536
+ sessionUrl,
537
+ logs: logger.getLogs(),
538
+ };
539
+ }
540
+ catch (error) {
541
+ console.error(`Error in nonsense_action function: ${error.message}`);
542
+ return {
543
+ _success: false,
544
+ error: JSON.parse(JSON.stringify(error, null, 2)),
545
+ debugUrl,
546
+ sessionUrl,
547
+ logs: logger.getLogs(),
548
+ };
549
+ }
550
+ finally {
551
+ yield stagehand.context.close();
552
+ }
553
+ });
554
+ const costar = () => __awaiter(void 0, void 0, void 0, function* () {
555
+ const logger = new utils_1.EvalLogger();
556
+ const stagehand = new lib_1.Stagehand({
557
+ env,
558
+ verbose: 2,
559
+ debugDom: true,
560
+ headless: process_1.default.env.HEADLESS !== "false",
561
+ logger: (message) => {
562
+ logger.log(message.message);
563
+ },
564
+ });
565
+ logger.init(stagehand);
566
+ const { debugUrl, sessionUrl } = yield stagehand.init();
567
+ // TODO: fix this eval - does not work in headless mode
568
+ try {
569
+ yield Promise.race([
570
+ stagehand.page.goto("https://www.costar.com/"),
571
+ new Promise((_, reject) => setTimeout(() => reject(new Error("Navigation timeout")), 30000)),
572
+ ]);
573
+ yield stagehand.act({ action: "click on the first article" });
574
+ yield stagehand.act({
575
+ action: "click on the learn more button for the first job",
576
+ });
577
+ const articleTitle = yield stagehand.extract({
578
+ instruction: "extract the title of the article",
579
+ schema: zod_1.z.object({
580
+ title: zod_1.z.string().describe("the title of the article").nullable(),
581
+ }),
582
+ modelName: "gpt-4o-2024-08-06",
583
+ });
584
+ logger.log(`articleTitle: ${articleTitle}`);
585
+ // Check if the title is more than 5 characters
586
+ const isTitleValid = articleTitle.title !== null && articleTitle.title.length > 5;
587
+ yield stagehand.context.close();
588
+ return {
589
+ title: articleTitle.title,
590
+ _success: isTitleValid,
591
+ debugUrl,
592
+ sessionUrl,
593
+ logs: logger.getLogs(),
594
+ };
595
+ }
596
+ catch (error) {
597
+ logger.error(`Error in costar function: ${error.message}`);
598
+ return {
599
+ title: null,
600
+ _success: false,
601
+ debugUrl,
602
+ sessionUrl,
603
+ logs: logger.getLogs(),
604
+ };
605
+ }
606
+ finally {
607
+ yield stagehand.context.close();
608
+ }
609
+ });
610
+ const google_jobs = () => __awaiter(void 0, void 0, void 0, function* () {
611
+ const logger = new utils_1.EvalLogger();
612
+ const stagehand = new lib_1.Stagehand({
613
+ env,
614
+ verbose: 2,
615
+ debugDom: true,
616
+ headless: process_1.default.env.HEADLESS !== "false",
617
+ logger: (message) => {
618
+ logger.log(message.message);
619
+ },
620
+ });
621
+ logger.init(stagehand);
622
+ const { debugUrl, sessionUrl } = yield stagehand.init();
623
+ try {
624
+ yield stagehand.page.goto("https://www.google.com/");
625
+ yield stagehand.act({ action: "click on the about page" });
626
+ yield stagehand.act({ action: "click on the careers page" });
627
+ yield stagehand.act({ action: "input data scientist into role" });
628
+ yield stagehand.act({ action: "input new york city into location" });
629
+ yield stagehand.act({ action: "click on the search button" });
630
+ // NOTE: "click on the first Learn More button" is not working - the span for learn more is not clickable and the a href is after it
631
+ yield stagehand.act({ action: "click on the first job link" });
632
+ const jobDetails = yield stagehand.extract({
633
+ instruction: "Extract the following details from the job posting: application deadline, minimum qualifications (degree and years of experience), and preferred qualifications (degree and years of experience)",
634
+ schema: zod_1.z.object({
635
+ applicationDeadline: zod_1.z
636
+ .string()
637
+ .describe("The date until which the application window will be open")
638
+ .nullable(),
639
+ minimumQualifications: zod_1.z.object({
640
+ degree: zod_1.z.string().describe("The minimum required degree").nullable(),
641
+ yearsOfExperience: zod_1.z
642
+ .number()
643
+ .describe("The minimum required years of experience")
644
+ .nullable(),
645
+ }),
646
+ preferredQualifications: zod_1.z.object({
647
+ degree: zod_1.z.string().describe("The preferred degree").nullable(),
648
+ yearsOfExperience: zod_1.z
649
+ .number()
650
+ .describe("The preferred years of experience")
651
+ .nullable(),
652
+ }),
653
+ }),
654
+ modelName: "gpt-4o-2024-08-06",
655
+ });
656
+ logger.log(`Job Details: ${jobDetails}`);
657
+ const isJobDetailsValid = jobDetails &&
658
+ Object.values(jobDetails).every((value) => value !== null &&
659
+ value !== undefined &&
660
+ (typeof value !== "object" ||
661
+ Object.values(value).every((v) => v !== null &&
662
+ v !== undefined &&
663
+ (typeof v === "number" || typeof v === "string"))));
664
+ logger.log(`Job Details valid: ${isJobDetailsValid}`);
665
+ return {
666
+ _success: isJobDetailsValid,
667
+ jobDetails,
668
+ debugUrl,
669
+ sessionUrl,
670
+ logs: logger.getLogs(),
671
+ };
672
+ }
673
+ catch (error) {
674
+ logger.error(`Error in google_jobs function: ${error.message}. Trace: ${error.stack}`);
675
+ return {
676
+ _success: false,
677
+ debugUrl,
678
+ sessionUrl,
679
+ error: JSON.parse(JSON.stringify(error, null, 2)),
680
+ logs: logger.getLogs(),
681
+ };
682
+ }
683
+ finally {
684
+ yield stagehand.context.close();
685
+ }
686
+ });
687
+ const extract_partners = () => __awaiter(void 0, void 0, void 0, function* () {
688
+ const logger = new utils_1.EvalLogger();
689
+ const stagehand = new lib_1.Stagehand({
690
+ env,
691
+ verbose: 2,
692
+ debugDom: true,
693
+ headless: process_1.default.env.HEADLESS !== "false",
694
+ logger: (message) => {
695
+ logger.log(message.message);
696
+ },
697
+ });
698
+ logger.init(stagehand);
699
+ const { debugUrl, sessionUrl } = yield stagehand.init({
700
+ modelName: "gpt-4o",
701
+ });
702
+ try {
703
+ yield stagehand.page.goto("https://ramp.com");
704
+ yield stagehand.act({
705
+ action: "Close the popup.",
706
+ });
707
+ yield stagehand.act({
708
+ action: "Scroll down to the bottom of the page.",
709
+ });
710
+ yield stagehand.act({
711
+ action: "Click on the link or button that leads to the partners page. If it's in a dropdown or hidden section, first interact with the element to reveal it, then click the link.",
712
+ });
713
+ const partners = yield stagehand.extract({
714
+ instruction: `
715
+ Extract the names of all partner companies mentioned on this page.
716
+ These could be inside text, links, or images representing partner companies.
717
+ If no specific partner names are found, look for any sections or categories of partners mentioned.
718
+ Also, check for any text that explains why partner names might not be listed, if applicable.
719
+ `,
720
+ schema: zod_1.z.object({
721
+ partners: zod_1.z.array(zod_1.z.object({
722
+ name: zod_1.z
723
+ .string()
724
+ .describe("The name of the partner company or category of partners"),
725
+ })),
726
+ explanation: zod_1.z
727
+ .string()
728
+ .optional()
729
+ .describe("Any explanation about partner listing or absence thereof"),
730
+ }),
731
+ });
732
+ const expectedPartners = [
733
+ "Accounting Partners",
734
+ "Private Equity & Venture Capital Partners",
735
+ "Services Partners",
736
+ "Affiliates",
737
+ ];
738
+ if (partners.explanation) {
739
+ logger.log(`Explanation: ${partners.explanation}`);
740
+ }
741
+ const foundPartners = partners.partners.map((partner) => partner.name.toLowerCase());
742
+ const allExpectedPartnersFound = expectedPartners.every((partner) => foundPartners.includes(partner.toLowerCase()));
743
+ logger.log(`All expected partners found: ${allExpectedPartnersFound}`);
744
+ logger.log(`Expected: ${expectedPartners}`);
745
+ logger.log(`Found: ${foundPartners}`);
746
+ return {
747
+ _success: allExpectedPartnersFound,
748
+ partners,
749
+ debugUrl,
750
+ sessionUrl,
751
+ logs: logger.getLogs(),
752
+ };
753
+ }
754
+ catch (error) {
755
+ logger.error(`Error in extractPartners function: ${error.message}. Trace: ${error.stack}`);
756
+ return {
757
+ _success: false,
758
+ debugUrl,
759
+ sessionUrl,
760
+ error: JSON.parse(JSON.stringify(error, null, 2)),
761
+ logs: logger.getLogs(),
762
+ };
763
+ }
764
+ finally {
765
+ yield stagehand.context.close().catch(() => { });
766
+ }
767
+ });
768
+ const laroche_form = () => __awaiter(void 0, void 0, void 0, function* () {
769
+ const logger = new utils_1.EvalLogger();
770
+ const stagehand = new lib_1.Stagehand({
771
+ env,
772
+ verbose: 2,
773
+ debugDom: true,
774
+ headless: process_1.default.env.HEADLESS !== "false",
775
+ logger: (message) => {
776
+ logger.log(message.message);
777
+ },
778
+ });
779
+ logger.init(stagehand);
780
+ const { debugUrl, sessionUrl } = yield stagehand.init({
781
+ modelName: "gpt-4o",
782
+ });
783
+ try {
784
+ yield stagehand.page.goto("https://www.laroche-posay.us/offers/anthelios-melt-in-milk-sunscreen-sample.html");
785
+ yield stagehand.act({ action: "close the privacy policy popup" });
786
+ // Wait for possible navigation
787
+ yield stagehand.page
788
+ .waitForNavigation({ waitUntil: "domcontentloaded", timeout: 10000 })
789
+ .catch(() => { });
790
+ yield stagehand.act({ action: "fill the last name field" });
791
+ yield stagehand.act({ action: "fill address 1 field" });
792
+ yield stagehand.act({ action: "select a state" });
793
+ yield stagehand.act({ action: "select a skin type" });
794
+ // TODO - finish this eval once we have a way to extract form data from children iframes
795
+ // const formData = await stagehand.extract({
796
+ // instruction: "Extract the filled form data",
797
+ // schema: z.object({
798
+ // firstName: z.string(),
799
+ // lastName: z.string(),
800
+ // email: z.string(),
801
+ // phone: z.string(),
802
+ // zipCode: z.string(),
803
+ // interestedIn: z.string(),
804
+ // startTerm: z.string(),
805
+ // programOfInterest: z.string(),
806
+ // }),
807
+ // modelName: "gpt-4o",
808
+ // });
809
+ // console.log("Extracted form data:", formData);
810
+ // const isFormDataValid =
811
+ // formData.firstName === "John" &&
812
+ // formData.lastName === "Doe" &&
813
+ // formData.email === "john.doe@example.com" &&
814
+ // formData.phone === "1234567890" &&
815
+ // formData.zipCode === "12345" &&
816
+ return {
817
+ _success: true,
818
+ logs: logger.getLogs(),
819
+ debugUrl,
820
+ sessionUrl,
821
+ };
822
+ }
823
+ catch (error) {
824
+ logger.error(`Error in LarocheForm function: ${error.message}. Trace: ${error.stack}`);
825
+ return {
826
+ _success: false,
827
+ error: error.message,
828
+ debugUrl,
829
+ sessionUrl,
830
+ logs: logger.getLogs(),
831
+ };
832
+ }
833
+ finally {
834
+ yield stagehand.context.close().catch(() => { });
835
+ }
836
+ });
837
+ const arxiv = () => __awaiter(void 0, void 0, void 0, function* () {
838
+ const logger = new utils_1.EvalLogger();
839
+ const stagehand = new lib_1.Stagehand({
840
+ env,
841
+ verbose: 2,
842
+ debugDom: true,
843
+ headless: process_1.default.env.HEADLESS !== "false",
844
+ logger: (message) => {
845
+ logger.log(message.message);
846
+ },
847
+ });
848
+ logger.init(stagehand);
849
+ const { debugUrl, sessionUrl } = yield stagehand.init({
850
+ modelName: "gpt-4o-2024-08-06",
851
+ });
852
+ const papers = [];
853
+ try {
854
+ yield stagehand.page.goto("https://arxiv.org/search/");
855
+ yield stagehand.act({
856
+ action: "search for the recent papers about web agents with multimodal models",
857
+ });
858
+ const paper_links = yield stagehand.extract({
859
+ instruction: "extract the titles and links for two papers",
860
+ schema: zod_1.z.object({
861
+ papers: zod_1.z
862
+ .array(zod_1.z.object({
863
+ title: zod_1.z.string().describe("the title of the paper"),
864
+ link: zod_1.z.string().describe("the link to the paper").nullable(),
865
+ }))
866
+ .describe("list of papers"),
867
+ }),
868
+ modelName: "gpt-4o-2024-08-06",
869
+ });
870
+ if (!paper_links ||
871
+ !paper_links.papers ||
872
+ paper_links.papers.length === 0) {
873
+ return {
874
+ _success: false,
875
+ logs: logger.getLogs(),
876
+ debugUrl,
877
+ sessionUrl,
878
+ };
879
+ }
880
+ for (const paper of paper_links.papers) {
881
+ if (paper.link) {
882
+ yield stagehand.page.goto(paper.link);
883
+ const abstract = yield stagehand.extract({
884
+ instruction: "extract details of the paper from the abstract",
885
+ schema: zod_1.z.object({
886
+ category: zod_1.z
887
+ .string()
888
+ .describe("the category of the paper. one of {'Benchmark', 'Dataset', 'Model', 'Framework', 'System', 'Other'}"),
889
+ problem: zod_1.z
890
+ .string()
891
+ .describe("summarize the problem that the paper is trying to solve in one sentence")
892
+ .nullable(),
893
+ methodology: zod_1.z
894
+ .string()
895
+ .describe("summarize the methodology of the paper in one sentence")
896
+ .nullable(),
897
+ results: zod_1.z
898
+ .string()
899
+ .describe("summarize the results of the paper in one sentence")
900
+ .nullable(),
901
+ conclusion: zod_1.z
902
+ .string()
903
+ .describe("summarize the conclusion of the paper in one sentence")
904
+ .nullable(),
905
+ code: zod_1.z
906
+ .string()
907
+ .describe("if provided, extract only the link to the code repository, without additional text. this is often optional and not always provided.")
908
+ .nullable(),
909
+ }),
910
+ modelName: "gpt-4o-2024-08-06",
911
+ });
912
+ papers.push({
913
+ title: paper.title,
914
+ link: paper.link,
915
+ category: abstract.category,
916
+ problem: abstract.problem,
917
+ methodology: abstract.methodology,
918
+ results: abstract.results,
919
+ conclusion: abstract.conclusion,
920
+ code: abstract.code,
921
+ });
922
+ }
923
+ }
924
+ if (!papers || papers.length === 0) {
925
+ return {
926
+ _success: false,
927
+ logs: logger.getLogs(),
928
+ debugUrl,
929
+ sessionUrl,
930
+ };
931
+ }
932
+ logger.log(JSON.stringify(papers, null, 2));
933
+ // Assert that the length of papers is three
934
+ if (papers.length !== 2) {
935
+ logger.log(`Expected 2 papers, but got ${papers.length}`);
936
+ return {
937
+ _success: false,
938
+ error: "Incorrect number of papers extracted",
939
+ logs: logger.getLogs(),
940
+ debugUrl,
941
+ sessionUrl,
942
+ };
943
+ }
944
+ // Ensure that every paper has a problem and methodology
945
+ for (const paper of papers) {
946
+ if (!paper.problem || !paper.methodology) {
947
+ logger.log(`Paper "${paper.title}" is missing problem or methodology`);
948
+ return {
949
+ _success: false,
950
+ error: "Incomplete paper information",
951
+ logs: logger.getLogs(),
952
+ debugUrl,
953
+ sessionUrl,
954
+ };
955
+ }
956
+ }
957
+ return {
958
+ _success: true,
959
+ papers,
960
+ logs: logger.getLogs(),
961
+ debugUrl,
962
+ sessionUrl,
963
+ };
964
+ }
965
+ catch (error) {
966
+ logger.error(`Error in arxiv function: ${error.message}. Trace: ${error.stack}`);
967
+ return {
968
+ _success: false,
969
+ logs: logger.getLogs(),
970
+ debugUrl,
971
+ sessionUrl,
972
+ };
973
+ }
974
+ finally {
975
+ yield stagehand.context.close().catch(() => { });
976
+ }
977
+ });
978
+ const tasks = {
979
+ vanta,
980
+ vanta_h,
981
+ peeler_simple,
982
+ peeler_complex,
983
+ wikipedia,
984
+ simple_google_search,
985
+ extract_github_stars,
986
+ extract_collaborators_from_github_repository,
987
+ extract_last_twenty_github_commits,
988
+ costar,
989
+ google_jobs,
990
+ homedepot,
991
+ extract_partners,
992
+ laroche_form,
993
+ arxiv,
994
+ expedia,
995
+ };
996
+ const exactMatch = (args) => {
997
+ var _a, _b;
998
+ console.log(`Task "${args.input.name}" returned: ${args.output}`);
999
+ const expected = (_a = args.expected) !== null && _a !== void 0 ? _a : true;
1000
+ if (expected === true) {
1001
+ return {
1002
+ name: "Exact match",
1003
+ score: args.output === true || ((_b = args.output) === null || _b === void 0 ? void 0 : _b._success) == true,
1004
+ };
1005
+ }
1006
+ return {
1007
+ name: "Exact match",
1008
+ score: args.output === expected,
1009
+ };
1010
+ };
1011
+ const testcases = [
1012
+ {
1013
+ input: {
1014
+ name: "vanta",
1015
+ },
1016
+ },
1017
+ {
1018
+ input: {
1019
+ name: "vanta_h",
1020
+ },
1021
+ },
1022
+ {
1023
+ input: {
1024
+ name: "peeler_simple",
1025
+ },
1026
+ },
1027
+ {
1028
+ input: { name: "wikipedia" },
1029
+ },
1030
+ { input: { name: "peeler_complex" } },
1031
+ { input: { name: "simple_google_search" } },
1032
+ { input: { name: "extract_github_stars" } },
1033
+ {
1034
+ input: {
1035
+ name: "extract_collaborators_from_github_repository",
1036
+ },
1037
+ },
1038
+ { input: { name: "extract_last_twenty_github_commits" } },
1039
+ { input: { name: "google_jobs" } },
1040
+ { input: { name: "homedepot" } },
1041
+ { input: { name: "extract_partners" } },
1042
+ { input: { name: "laroche_form" } },
1043
+ { input: { name: "arxiv" } },
1044
+ // { input: { name: "expedia" } },
1045
+ ];
1046
+ (0, braintrust_1.Eval)("stagehand", {
1047
+ data: () => {
1048
+ return testcases;
1049
+ },
1050
+ task: (input) => __awaiter(void 0, void 0, void 0, function* () {
1051
+ // console.log("input", input);
1052
+ try {
1053
+ // Handle predefined tasks
1054
+ const result = yield tasks[input.name](input);
1055
+ if (result) {
1056
+ console.log(`✅ ${input.name}: Passed`);
1057
+ }
1058
+ else {
1059
+ console.log(`❌ ${input.name}: Failed`);
1060
+ }
1061
+ return result;
1062
+ }
1063
+ catch (error) {
1064
+ console.error(`❌ ${input.name}: Error - ${error}`);
1065
+ return {
1066
+ _success: false,
1067
+ error: JSON.parse(JSON.stringify(error, null, 2)),
1068
+ };
1069
+ }
1070
+ }),
1071
+ scores: [exactMatch],
1072
+ maxConcurrency: 5,
1073
+ // trialCount: 3,
1074
+ });
1075
+ //# sourceMappingURL=index.eval.js.map