@intuned/browser-dev 0.1.5-dev.1 → 0.1.6-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/RELEASE.md +11 -9
  2. package/dist/ai/export.d.ts +1 -7
  3. package/dist/ai/extractStructuredData.js +1 -1
  4. package/dist/ai/extractStructuredDataUsingAi.js +23 -2
  5. package/dist/ai/extractionHelpers/validateSchema.js +34 -2
  6. package/dist/ai/index.d.ts +1 -7
  7. package/dist/ai/tests/testExtractStructuredData.spec.js +150 -18
  8. package/dist/ai/tests/testIsPageLoaded.spec.js +1 -1
  9. package/dist/ai/types/models.js +2 -5
  10. package/dist/ai/validators.js +1 -1
  11. package/dist/common/aiModelsValidations.js +2 -4
  12. package/dist/helpers/downloadFile.js +1 -3
  13. package/dist/helpers/frame_utils/checkFrameAllowsAsyncScripts.js +20 -0
  14. package/dist/helpers/frame_utils/findAllIframes.js +4 -1
  15. package/dist/helpers/frame_utils/tests/testFindAllIframes.spec.js +43 -0
  16. package/dist/helpers/tests/testDownloadFile.spec.js +3 -4
  17. package/dist/helpers/tests/testResolveUrl.spec.js +4 -4
  18. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +1041 -4
  19. package/dist/helpers/tests/testWithDomSettledWait.spec.js +23 -0
  20. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +1 -1
  21. package/dist/helpers/uploadFileToS3.js +6 -0
  22. package/dist/helpers/utils/getS3Client.js +2 -2
  23. package/dist/helpers/validateDataUsingSchema.js +93 -7
  24. package/dist/helpers/waitForDomSettled.js +17 -8
  25. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +1 -1
  26. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +1 -1
  27. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +1 -1
  28. package/dist/optimized-extractors/types/aiModelsValidation.js +1 -3
  29. package/package.json +4 -3
package/RELEASE.md CHANGED
@@ -4,13 +4,13 @@
4
4
 
5
5
  ```bash
6
6
  # From typescript-sdk directory
7
- bash scripts/release.sh prod 1.2.3
7
+ yarn release prod 1.2.3
8
8
  ```
9
9
 
10
10
  ## Usage
11
11
 
12
12
  ```bash
13
- bash scripts/release.sh {prod|dev} version [webapp-repo-path]
13
+ yarn release {prod|dev} version [webapp-repo-path]
14
14
  ```
15
15
 
16
16
  **Arguments:**
@@ -23,13 +23,13 @@ bash scripts/release.sh {prod|dev} version [webapp-repo-path]
23
23
 
24
24
  ```bash
25
25
  # Publish to npm (prod)
26
- bash scripts/release.sh prod 1.2.3
26
+ yarn release prod 1.2.3
27
27
 
28
28
  # Publish dev version to npm
29
- bash scripts/release.sh dev 1.2.3-beta
29
+ yarn release dev 1.2.3-beta
30
30
 
31
31
  # Custom WebApp path
32
- bash scripts/release.sh prod 1.2.3 /path/to/WebApp
32
+ yarn release prod 1.2.3 /path/to/WebApp
33
33
  ```
34
34
 
35
35
  ## Repo Structure
@@ -48,13 +48,15 @@ intuned-master/
48
48
 
49
49
  1. ✅ Builds project (`yarn build`, type checks)
50
50
  2. ✅ Updates version in `package.json`
51
- 3. ✅ Adjusts package name for dev (`intuned-browser-dev`)
51
+ 3. ✅ Adjusts package name for dev (`@intuned/browser-dev`)
52
52
  4. ✅ Publishes to npm
53
- 5. ✅ Updates all TypeScript templates in WebApp (`playwright_v1/`)
54
- 6. Regenerates template files in WebApp
53
+ 5. ✅ For prod only:
54
+ - Updates all TypeScript templates in WebApp (`playwright_v1/`)
55
+ - Regenerates template files in WebApp
56
+ - Updates `packageVersions.json` (`typescript.sdk.version`)
55
57
 
56
58
  ## Prerequisites
57
59
 
58
60
  - Node.js & Yarn installed
59
61
  - npm credentials configured (`npm login`)
60
- - WebApp repo cloned (if updating templates)
62
+ - WebApp repo cloned (for prod releases)
@@ -260,7 +260,7 @@ export type JsonSchema =
260
260
  * const article = await extractStructuredData({
261
261
  * source: articleContainer,
262
262
  * strategy: "MARKDOWN",
263
- * model: "claude-3-5-sonnet-20240620",
263
+ * model: "claude-3-7-sonnet-latest",
264
264
  * dataSchema: {
265
265
  * type: "object",
266
266
  * properties: {
@@ -376,9 +376,6 @@ export declare function extractStructuredData(options: {
376
376
  type SUPPORTED_CLAUDE_MODELS =
377
377
  | "claude-3-5-haiku-20241022"
378
378
  | "claude-3-5-haiku-latest"
379
- | "claude-3-5-sonnet-20240620"
380
- | "claude-3-5-sonnet-20241022"
381
- | "claude-3-5-sonnet-latest"
382
379
  | "claude-3-7-sonnet-20250219"
383
380
  | "claude-3-7-sonnet-latest"
384
381
  | "claude-3-haiku-20240307"
@@ -509,9 +506,6 @@ type SUPPORTED_OPENAI_MODELS =
509
506
  * **Supported Anthropic (Claude) Models:**
510
507
  * "claude-3-5-haiku-20241022"
511
508
  * "claude-3-5-haiku-latest"
512
- * "claude-3-5-sonnet-20240620"
513
- * "claude-3-5-sonnet-20241022"
514
- * "claude-3-5-sonnet-latest"
515
509
  * "claude-3-7-sonnet-20250219"
516
510
  * "claude-3-7-sonnet-latest"
517
511
  * "claude-3-haiku-20240307"
@@ -302,7 +302,7 @@ const extractStructuredDataFromContent = async options => {
302
302
  options: {
303
303
  prompt: options.prompt,
304
304
  images,
305
- jsonSchema: options.dataSchema,
305
+ jsonSchema: parsingResult.data.dataSchema,
306
306
  content: texts.join("\n"),
307
307
  enableDomMatching: false,
308
308
  apiKey: options.apiKey,
@@ -34,6 +34,12 @@ async function extractStructuredDataUsingAi(input) {
34
34
  });
35
35
  const gatewayModel = await gateway.getModel();
36
36
  const tools = (0, _tools.getTools)(toolName, jsonSchema);
37
+ if (tools.isErr()) {
38
+ _Logger.logger.error("Schema validation failed", {
39
+ error: tools.error
40
+ });
41
+ return (0, _neverthrow.err)(Errors.invalidExtractionResult(`Invalid JSON schema: ${tools.error.context}`));
42
+ }
37
43
  const messages = (0, _prompt.getMessages)({
38
44
  prompt,
39
45
  content,
@@ -49,7 +55,7 @@ async function extractStructuredDataUsingAi(input) {
49
55
  result = await (0, _ai.generateText)({
50
56
  model: gatewayModel,
51
57
  messages: messagesHistory,
52
- tools: tools.isOk() ? tools.value : {},
58
+ tools: tools.value,
53
59
  toolChoice: "required",
54
60
  maxRetries
55
61
  });
@@ -60,7 +66,22 @@ async function extractStructuredDataUsingAi(input) {
60
66
  var _result$usage;
61
67
  accumulatedTokens += ((_result$usage = result.usage) === null || _result$usage === void 0 ? void 0 : _result$usage.totalTokens) ?? 0;
62
68
  }
63
- const toolCall = result.toolCalls[0] ?? null;
69
+ if (!result.toolCalls || result.toolCalls.length === 0) {
70
+ var _result$text;
71
+ _Logger.logger.error("AI did not return any tool calls", {
72
+ responseText: result.text,
73
+ model
74
+ });
75
+ return (0, _neverthrow.err)(Errors.invalidExtractionResult(`AI model did not return any tool calls. This might be due to invalid schema or content. Response: ${((_result$text = result.text) === null || _result$text === void 0 ? void 0 : _result$text.substring(0, 200)) || "empty"}`));
76
+ }
77
+ const toolCall = result.toolCalls[0];
78
+ if (!toolCall || !toolCall.input) {
79
+ _Logger.logger.error("Tool call missing input data", {
80
+ toolCall,
81
+ model
82
+ });
83
+ return (0, _neverthrow.err)(Errors.invalidExtractionResult(`AI tool call is missing input data. Tool call name: ${(toolCall === null || toolCall === void 0 ? void 0 : toolCall.toolName) || "unknown"}`));
84
+ }
64
85
  let extractedData = toolCall.input;
65
86
  const isArray = jsonSchema.type === "array";
66
87
  if (isArray && extractedData.extracted_data) {
@@ -17,13 +17,45 @@ function validateJSONSchema(schema) {
17
17
  if (!schema || typeof schema !== "object") {
18
18
  return (0, _neverthrow.err)(errors.invalidJsonSchema("Schema must be an object"));
19
19
  }
20
+ if (schema.anyOf || schema.oneOf) {
21
+ const unionSchemas = schema.anyOf || schema.oneOf;
22
+ if (!Array.isArray(unionSchemas)) {
23
+ return (0, _neverthrow.err)(errors.invalidJsonSchema("anyOf/oneOf must be an array of schemas"));
24
+ }
25
+ for (const subSchema of unionSchemas) {
26
+ if (subSchema.type === "null") {
27
+ continue;
28
+ }
29
+ const subValidation = validateJSONSchema(subSchema);
30
+ if (subValidation.isErr()) {
31
+ return subValidation;
32
+ }
33
+ }
34
+ return (0, _neverthrow.ok)(schema);
35
+ }
20
36
  if (!schema.type) {
21
37
  return (0, _neverthrow.err)(errors.invalidJsonSchema("Schema must have a 'type' property"));
22
38
  }
23
- const validTypes = ["string", "number", "integer", "boolean", "array", "object"];
24
- if (!validTypes.includes(schema.type)) {
39
+ const validTypes = ["string", "number", "integer", "boolean", "array", "object", "null"];
40
+ if (Array.isArray(schema.type)) {
41
+ const invalidTypes = schema.type.filter(t => !validTypes.includes(t));
42
+ if (invalidTypes.length > 0) {
43
+ return (0, _neverthrow.err)(errors.invalidJsonSchema(`Invalid schema types: ${invalidTypes.join(", ")}`));
44
+ }
45
+ const nonNullTypes = schema.type.filter(t => t !== "null");
46
+ if (nonNullTypes.length === 0) {
47
+ return (0, _neverthrow.ok)(schema);
48
+ }
49
+ schema = {
50
+ ...schema,
51
+ type: nonNullTypes[0]
52
+ };
53
+ } else if (!validTypes.includes(schema.type)) {
25
54
  return (0, _neverthrow.err)(errors.invalidJsonSchema(`Invalid schema type: ${schema.type}`));
26
55
  }
56
+ if (schema.type === "null") {
57
+ return (0, _neverthrow.ok)(schema);
58
+ }
27
59
  if (schema.type === "array") {
28
60
  if (!schema.items) {
29
61
  return (0, _neverthrow.err)(errors.invalidJsonSchema("Array schema must have 'items' property"));
@@ -260,7 +260,7 @@ export type JsonSchema =
260
260
  * const article = await extractStructuredData({
261
261
  * source: articleContainer,
262
262
  * strategy: "MARKDOWN",
263
- * model: "claude-3-5-sonnet-20240620",
263
+ * model: "claude-3-7-sonnet-latest",
264
264
  * dataSchema: {
265
265
  * type: "object",
266
266
  * properties: {
@@ -376,9 +376,6 @@ export declare function extractStructuredData(options: {
376
376
  type SUPPORTED_CLAUDE_MODELS =
377
377
  | "claude-3-5-haiku-20241022"
378
378
  | "claude-3-5-haiku-latest"
379
- | "claude-3-5-sonnet-20240620"
380
- | "claude-3-5-sonnet-20241022"
381
- | "claude-3-5-sonnet-latest"
382
379
  | "claude-3-7-sonnet-20250219"
383
380
  | "claude-3-7-sonnet-latest"
384
381
  | "claude-3-haiku-20240307"
@@ -509,9 +506,6 @@ type SUPPORTED_OPENAI_MODELS =
509
506
  * **Supported Anthropic (Claude) Models:**
510
507
  * "claude-3-5-haiku-20241022"
511
508
  * "claude-3-5-haiku-latest"
512
- * "claude-3-5-sonnet-20240620"
513
- * "claude-3-5-sonnet-20241022"
514
- * "claude-3-5-sonnet-latest"
515
509
  * "claude-3-7-sonnet-20250219"
516
510
  * "claude-3-7-sonnet-latest"
517
511
  * "claude-3-haiku-20240307"
@@ -2,7 +2,8 @@
2
2
 
3
3
  var _extendedTest = require("../../common/extendedTest");
4
4
  var _ = require("..");
5
- var _playwrightCore = require("playwright-core");
5
+ var _helpers = require("../../helpers");
6
+ var _playwright = require("playwright");
6
7
  var _dotenv = require("dotenv");
7
8
  var _zod = require("zod");
8
9
  (0, _dotenv.config)();
@@ -130,7 +131,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
130
131
  imageStrategy: "Extract article metadata including title, author, date, read time, and tags"
131
132
  };
132
133
  (0, _extendedTest.beforeAll)(async () => {
133
- browser = await _playwrightCore.chromium.launch({
134
+ browser = await _playwright.chromium.launch({
134
135
  headless: true
135
136
  });
136
137
  });
@@ -222,7 +223,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
222
223
  prompt: getPromptVariation(sharedPrompts.imageStrategy),
223
224
  enableDomMatching: false,
224
225
  strategy: "IMAGE",
225
- model: "claude-3-5-sonnet-20240620",
226
+ model: "claude-3-7-sonnet-latest",
226
227
  apiKey: process.env.ANTHROPIC_API_KEY
227
228
  });
228
229
  (0, _extendedTest.expect)(data).toHaveProperty("title", "The Future of AI in 2024");
@@ -266,7 +267,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
266
267
  prompt: getPromptVariation(sharedPrompts.markdownStrategy),
267
268
  enableDomMatching: false,
268
269
  strategy: "MARKDOWN",
269
- model: "claude-3-5-sonnet-20240620",
270
+ model: "claude-3-7-sonnet-latest",
270
271
  apiKey: process.env.ANTHROPIC_API_KEY
271
272
  });
272
273
  (0, _extendedTest.expect)(data).toHaveProperty("title", "The Future of AI in 2024");
@@ -295,7 +296,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
295
296
  prompt: getPromptVariation(sharedPrompts.arrayStrings),
296
297
  enableDomMatching: false,
297
298
  strategy: "HTML",
298
- model: "claude-3-5-sonnet-20240620",
299
+ model: "claude-3-7-sonnet-latest",
299
300
  apiKey: process.env.ANTHROPIC_API_KEY
300
301
  });
301
302
  (0, _extendedTest.expect)(Array.isArray(data)).toBe(true);
@@ -331,7 +332,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
331
332
  prompt: getPromptVariation(sharedPrompts.arrayObjects),
332
333
  enableDomMatching: false,
333
334
  strategy: "HTML",
334
- model: "claude-3-5-sonnet-20240620",
335
+ model: "claude-3-7-sonnet-latest",
335
336
  apiKey: process.env.ANTHROPIC_API_KEY
336
337
  });
337
338
  (0, _extendedTest.expect)(Array.isArray(data)).toBe(true);
@@ -392,7 +393,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
392
393
  prompt: getPromptVariation(sharedPrompts.objectNested),
393
394
  enableDomMatching: false,
394
395
  strategy: "HTML",
395
- model: "claude-3-5-sonnet-20240620",
396
+ model: "claude-3-7-sonnet-latest",
396
397
  apiKey: process.env.ANTHROPIC_API_KEY
397
398
  });
398
399
  (0, _extendedTest.expect)(data.user.name).toBe("Sarah Wilson");
@@ -426,7 +427,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
426
427
  prompt: getPromptVariation(sharedPrompts.objectConstraints),
427
428
  enableDomMatching: false,
428
429
  strategy: "HTML",
429
- model: "claude-3-5-sonnet-20240620",
430
+ model: "claude-3-7-sonnet-latest",
430
431
  apiKey: process.env.ANTHROPIC_API_KEY
431
432
  });
432
433
  (0, _extendedTest.expect)(data.title.length).toBeGreaterThanOrEqual(10);
@@ -462,7 +463,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
462
463
  prompt: getPromptVariation(sharedPrompts.domMatching),
463
464
  enableDomMatching: true,
464
465
  strategy: "HTML",
465
- model: "claude-3-5-sonnet-20240620"
466
+ model: "claude-3-7-sonnet-latest"
466
467
  })).rejects.toThrow("For DOM matching, all types of the extraction fields must be STRINGS");
467
468
  });
468
469
  });
@@ -487,7 +488,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
487
488
  prompt: getPromptVariation(sharedPrompts.cachingBasic),
488
489
  enableDomMatching: true,
489
490
  strategy: "HTML",
490
- model: "claude-3-5-sonnet-20240620",
491
+ model: "claude-3-7-sonnet-latest",
491
492
  apiKey: process.env.ANTHROPIC_API_KEY
492
493
  });
493
494
  const secondResult = await (0, _.extractStructuredData)({
@@ -496,7 +497,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
496
497
  prompt: getPromptVariation(sharedPrompts.cachingBasic),
497
498
  enableDomMatching: true,
498
499
  strategy: "HTML",
499
- model: "claude-3-5-sonnet-20240620",
500
+ model: "claude-3-7-sonnet-latest",
500
501
  apiKey: process.env.ANTHROPIC_API_KEY
501
502
  });
502
503
  (0, _extendedTest.expect)(secondResult).toEqual(firstResult);
@@ -521,7 +522,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
521
522
  prompt: getPromptVariation(sharedPrompts.cachingBasic),
522
523
  enableDomMatching: true,
523
524
  strategy: "HTML",
524
- model: "claude-3-5-sonnet-20240620",
525
+ model: "claude-3-7-sonnet-latest",
525
526
  apiKey: process.env.ANTHROPIC_API_KEY
526
527
  });
527
528
  const modifiedTemplate = productListTemplate.replace("Water Resistant", "DOM HAS CHANGED BUT NOT THE PRICE OR TITLE");
@@ -532,7 +533,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
532
533
  prompt: getPromptVariation(sharedPrompts.cachingBasic),
533
534
  enableDomMatching: true,
534
535
  strategy: "HTML",
535
- model: "claude-3-5-sonnet-20240620",
536
+ model: "claude-3-7-sonnet-latest",
536
537
  apiKey: process.env.ANTHROPIC_API_KEY
537
538
  });
538
539
  (0, _extendedTest.expect)(secondResult).toEqual(firstResult);
@@ -559,7 +560,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
559
560
  prompt: getPromptVariation(sharedPrompts.cachingBasic),
560
561
  enableDomMatching: true,
561
562
  strategy: "HTML",
562
- model: "claude-3-5-sonnet-20240620",
563
+ model: "claude-3-7-sonnet-latest",
563
564
  apiKey: process.env.ANTHROPIC_API_KEY
564
565
  });
565
566
  const modifiedTemplate = productListTemplate.replace("iPhone 14 Pro", "iPhone 15 Pro").replace("$999", "$1099");
@@ -570,7 +571,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
570
571
  prompt: getPromptVariation(sharedPrompts.cachingBasic),
571
572
  enableDomMatching: true,
572
573
  strategy: "HTML",
573
- model: "claude-3-5-sonnet-20240620",
574
+ model: "claude-3-7-sonnet-latest",
574
575
  apiKey: process.env.ANTHROPIC_API_KEY
575
576
  });
576
577
  (0, _extendedTest.expect)(secondResult).not.toEqual(firstResult);
@@ -599,7 +600,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
599
600
  prompt: getPromptVariation(sharedPrompts.cachingBasic),
600
601
  enableDomMatching: false,
601
602
  strategy: "HTML",
602
- model: "claude-3-5-sonnet-20240620",
603
+ model: "claude-3-7-sonnet-latest",
603
604
  apiKey: process.env.ANTHROPIC_API_KEY
604
605
  });
605
606
  const modifiedTemplate = productListTemplate.replace("iPhone 14 Pro", "iPhone 15 Pro").replace("$999", "$1099");
@@ -610,7 +611,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
610
611
  prompt: getPromptVariation(sharedPrompts.cachingBasic),
611
612
  enableDomMatching: false,
612
613
  strategy: "HTML",
613
- model: "claude-3-5-sonnet-20240620",
614
+ model: "claude-3-7-sonnet-latest",
614
615
  apiKey: process.env.ANTHROPIC_API_KEY
615
616
  });
616
617
  (0, _extendedTest.expect)(secondResult).not.toEqual(firstResult);
@@ -634,7 +635,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
634
635
  prompt: "Extract prsoduct information including title, price, stock status, and rating",
635
636
  enableDomMatching: true,
636
637
  strategy: "HTML",
637
- model: "claude-3-5-sonnet-20240620",
638
+ model: "claude-3-7-sonnet-latest",
638
639
  apiKey: process.env.ANTHROPIC_API_KEY
639
640
  });
640
641
  (0, _extendedTest.expect)(data).toHaveProperty("title", "iPhone 14 Pro");
@@ -642,5 +643,136 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
642
643
  (0, _extendedTest.expect)(data).toHaveProperty("stock", "In Stock");
643
644
  (0, _extendedTest.expect)(data).toHaveProperty("rating", "4.5");
644
645
  });
646
+ (0, _extendedTest.test)("should extract book details from real website using Zod schema", async () => {
647
+ const BookSchema = _zod.z.object({
648
+ name: _zod.z.string().describe("Book title"),
649
+ price: _zod.z.string().describe("Book price"),
650
+ description: _zod.z.string().nullable().describe("Book description"),
651
+ in_stock: _zod.z.boolean().describe("Stock availability"),
652
+ rating: _zod.z.string().nullable().describe("Book rating")
653
+ });
654
+ await (0, _helpers.goToUrl)({
655
+ page,
656
+ url: "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html"
657
+ });
658
+ const product = await (0, _.extractStructuredData)({
659
+ source: page,
660
+ strategy: "HTML",
661
+ model: "claude-3-7-sonnet-latest",
662
+ dataSchema: BookSchema,
663
+ prompt: "Extract book details from this page",
664
+ apiKey: process.env.ANTHROPIC_API_KEY,
665
+ enableCache: false,
666
+ maxRetries: 3
667
+ });
668
+ console.log(`Found product: ${product.name} - ${product.price}`);
669
+ (0, _extendedTest.expect)(product).toHaveProperty("name");
670
+ (0, _extendedTest.expect)(product).toHaveProperty("price");
671
+ (0, _extendedTest.expect)(typeof product.name).toBe("string");
672
+ (0, _extendedTest.expect)(typeof product.price).toBe("string");
673
+ (0, _extendedTest.expect)(typeof product.in_stock).toBe("boolean");
674
+ (0, _extendedTest.expect)(product.name).toBe("A Light in the Attic");
675
+ (0, _extendedTest.expect)(product.price).toMatch(/£\d+\.\d+/);
676
+ });
677
+ });
678
+ (0, _extendedTest.describe)("Content-based Extraction (without Page/DOM)", () => {
679
+ (0, _extendedTest.test)("should extract from text content using Zod schema", async () => {
680
+ const PersonSchema = _zod.z.object({
681
+ name: _zod.z.string().describe("Person's full name"),
682
+ age: _zod.z.number().describe("Person's age in years"),
683
+ occupation: _zod.z.string().describe("Person's job title"),
684
+ company: _zod.z.string().describe("Company name").optional()
685
+ });
686
+ const textContent = {
687
+ type: "text",
688
+ data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
689
+ };
690
+ const person = await (0, _.extractStructuredData)({
691
+ content: textContent,
692
+ model: "claude-3-7-sonnet-latest",
693
+ dataSchema: PersonSchema,
694
+ prompt: "Extract person information from the text",
695
+ apiKey: process.env.ANTHROPIC_API_KEY,
696
+ enableCache: false
697
+ });
698
+ (0, _extendedTest.expect)(person).toHaveProperty("name", "John Doe");
699
+ (0, _extendedTest.expect)(person).toHaveProperty("age", 30);
700
+ (0, _extendedTest.expect)(person).toHaveProperty("occupation", "Software Engineer");
701
+ (0, _extendedTest.expect)(person).toHaveProperty("company", "Tech Corp");
702
+ });
703
+ (0, _extendedTest.test)("should extract from multiple text items", async () => {
704
+ const CompanySchema = _zod.z.object({
705
+ companyName: _zod.z.string().describe("Company name"),
706
+ employees: _zod.z.number().describe("Number of employees"),
707
+ founded: _zod.z.number().describe("Year founded"),
708
+ industry: _zod.z.string().describe("Industry sector")
709
+ });
710
+ const textContent = {
711
+ type: "text",
712
+ data: "Tech Corp was founded in 2010 and now employs 500 people in the software industry."
713
+ };
714
+ const company = await (0, _.extractStructuredData)({
715
+ content: textContent,
716
+ model: "claude-3-7-sonnet-latest",
717
+ dataSchema: CompanySchema,
718
+ prompt: "Extract company information",
719
+ apiKey: process.env.ANTHROPIC_API_KEY
720
+ });
721
+ (0, _extendedTest.expect)(company).toHaveProperty("companyName", "Tech Corp");
722
+ (0, _extendedTest.expect)(company).toHaveProperty("employees", 500);
723
+ (0, _extendedTest.expect)(company).toHaveProperty("founded", 2010);
724
+ (0, _extendedTest.expect)(company).toHaveProperty("industry");
725
+ });
726
+ (0, _extendedTest.test)("should extract array from text content", async () => {
727
+ const SkillsSchema = _zod.z.array(_zod.z.string()).describe("List of skills");
728
+ const textContent = {
729
+ type: "text",
730
+ data: "Sarah has skills in JavaScript, Python, React, Node.js, and Docker"
731
+ };
732
+ const skills = await (0, _.extractStructuredData)({
733
+ content: textContent,
734
+ model: "claude-3-7-sonnet-latest",
735
+ dataSchema: SkillsSchema,
736
+ prompt: "Extract all the skills mentioned",
737
+ apiKey: process.env.ANTHROPIC_API_KEY
738
+ });
739
+ (0, _extendedTest.expect)(Array.isArray(skills)).toBe(true);
740
+ (0, _extendedTest.expect)(skills.length).toBeGreaterThanOrEqual(3);
741
+ (0, _extendedTest.expect)(skills).toContain("JavaScript");
742
+ (0, _extendedTest.expect)(skills).toContain("Python");
743
+ (0, _extendedTest.expect)(skills).toContain("React");
744
+ });
745
+ (0, _extendedTest.test)("should extract complex nested object from text", async () => {
746
+ const EventSchema = _zod.z.object({
747
+ eventName: _zod.z.string().describe("Name of the event"),
748
+ date: _zod.z.string().describe("Event date"),
749
+ location: _zod.z.object({
750
+ city: _zod.z.string().describe("City name"),
751
+ venue: _zod.z.string().describe("Venue name")
752
+ }),
753
+ attendees: _zod.z.array(_zod.z.object({
754
+ name: _zod.z.string(),
755
+ role: _zod.z.string()
756
+ }))
757
+ });
758
+ const textContent = {
759
+ type: "text",
760
+ data: `The Tech Summit 2024 will be held on March 20th at the Grand Convention Center in San Francisco.
761
+ Confirmed speakers include: Dr. Jane Smith (Keynote Speaker) and Mike Johnson (CTO Panel).`
762
+ };
763
+ const event = await (0, _.extractStructuredData)({
764
+ content: textContent,
765
+ model: "claude-3-7-sonnet-latest",
766
+ dataSchema: EventSchema,
767
+ prompt: "Extract event details including location and attendees",
768
+ apiKey: process.env.ANTHROPIC_API_KEY
769
+ });
770
+ (0, _extendedTest.expect)(event).toHaveProperty("eventName");
771
+ (0, _extendedTest.expect)(event).toHaveProperty("date");
772
+ (0, _extendedTest.expect)(event.location).toHaveProperty("city", "San Francisco");
773
+ (0, _extendedTest.expect)(event.location).toHaveProperty("venue");
774
+ (0, _extendedTest.expect)(Array.isArray(event.attendees)).toBe(true);
775
+ (0, _extendedTest.expect)(event.attendees.length).toBeGreaterThanOrEqual(2);
776
+ });
645
777
  });
646
778
  });
@@ -216,7 +216,7 @@ _extendedTest.describe.skip("isPageLoaded Tests", () => {
216
216
  const result = await (0, _isPageLoaded.isPageLoaded)({
217
217
  page,
218
218
  model: "gpt-4o-2024-05-13",
219
- apiKey: apiKey,
219
+ apiKey,
220
220
  timeoutInMs: 10000
221
221
  });
222
222
  (0, _extendedTest.expect)(result).toBe(false);
@@ -5,14 +5,12 @@ Object.defineProperty(exports, "__esModule", {
5
5
  });
6
6
  exports.SUPPORTED_VISION_MODELS = exports.SUPPORTED_TEXT_MODELS = exports.SUPPORTED_GPT_MODELS = exports.SUPPORTED_GOOGLE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = exports.MODELS_MAPPINGS = exports.MAX_TOKENS_OVERRIDES = exports.GPT_MODELS_MAPPINGS = exports.GOOGLE_MODELS_MAPPINGS = exports.CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_MODELS_MAPPINGS = void 0;
7
7
  const CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = ["claude-3-5-haiku", "claude-3-5-haiku-20241022"];
8
- const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = ["claude-3.5-sonnet", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022"];
8
+ const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = ["claude-3.7-sonnet-latest"];
9
9
  const SUPPORTED_CLAUDE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = [...CLAUDE_ONLY_TEXT_MODELS, ...CLAUDE_VISION_SUPPORTED_MODELS];
10
10
  const CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS_MAPPINGS = {
11
11
  "claude-3-haiku": "claude-3-haiku-20240307",
12
12
  "claude-3-5-haiku": "claude-3-5-haiku-20241022",
13
- "claude-3-opus": "claude-3-opus-20240229",
14
- "claude-3-sonnet": "claude-3-sonnet-20240229",
15
- "claude-3.5-sonnet": "claude-3-5-sonnet-20241022"
13
+ "claude-3-opus": "claude-3-opus-20240229"
16
14
  };
17
15
  const GPT_ONLY_TEXT_GPT_MODELS = ["gpt3.5-turbo", "gpt-3.5-turbo-0125"];
18
16
  const GPT_VISION_SUPPORTED_MODELS = ["gpt4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-mini", "gpt-4o-mini-2024-07-18"];
@@ -37,7 +35,6 @@ const MODELS_MAPPINGS = exports.MODELS_MAPPINGS = {
37
35
  ...GOOGLE_MODELS_MAPPINGS
38
36
  };
39
37
  const MAX_TOKENS_OVERRIDES = exports.MAX_TOKENS_OVERRIDES = {
40
- "claude-3-5-sonnet-20240620": 8192,
41
38
  "gemini-1.5-pro-002": 8192,
42
39
  "gemini-1.5-flash-8b-002": 8192,
43
40
  "gemini-1.5-flash-002": 8192,
@@ -71,7 +71,7 @@ const jsonSchemaCustomValidation = _zod.z.any().transform(value => {
71
71
  const message = e.message.replace("schema is invalid: ", "").split(", ")[0].replace("data/", "").replaceAll("/", ".");
72
72
  ctx.addIssue({
73
73
  code: _zod.z.ZodIssueCode.custom,
74
- message: message
74
+ message
75
75
  });
76
76
  }
77
77
  }).refine(v => {
@@ -4,9 +4,8 @@ Object.defineProperty(exports, "__esModule", {
4
4
  value: true
5
5
  });
6
6
  exports.SUPPPORTED_GPT_MODELS = exports.SUPPPORTED_CLAUDE_MODELS = exports.SUPPORTED_MODELS = exports.MODELS_MAPPINGS = exports.MAX_TOKENS_OVERRIDES = exports.GPT_MODELS = exports.GOOGLE_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS = void 0;
7
- const CLAUDE_MODELS = exports.CLAUDE_MODELS = ["claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-5-haiku-20241022", "claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"];
7
+ const CLAUDE_MODELS = exports.CLAUDE_MODELS = ["claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-5-haiku-20241022", "claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"];
8
8
  const MAX_TOKENS_OVERRIDES = exports.MAX_TOKENS_OVERRIDES = {
9
- "claude-3-5-sonnet-20240620": 8192,
10
9
  "gemini-1.5-pro-002": 8192,
11
10
  "gemini-1.5-flash-8b-002": 8192,
12
11
  "gemini-1.5-flash-002": 8192,
@@ -18,7 +17,6 @@ const CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS_MAPPINGS = {
18
17
  "claude-3-5-haiku": "claude-3-5-haiku-20241022",
19
18
  "claude-3-opus": "claude-3-opus-20240229",
20
19
  "claude-3-sonnet": "claude-3-sonnet-20240229",
21
- "claude-3.5-sonnet": "claude-3-5-sonnet-20241022",
22
20
  "claude-4-sonnet": "claude-sonnet-4-20250514",
23
21
  "claude-4-opus": "claude-opus-4-20250514"
24
22
  };
@@ -27,6 +25,6 @@ const GOOGLE_MODELS = exports.GOOGLE_MODELS = ["gemini-2.5-pro", "gemini-2.5-fla
27
25
  const MODELS_MAPPINGS = exports.MODELS_MAPPINGS = {
28
26
  ...CLAUDE_MODELS_MAPPINGS
29
27
  };
30
- const SUPPPORTED_CLAUDE_MODELS = exports.SUPPPORTED_CLAUDE_MODELS = ["claude-3-5-haiku-20241022", "claude-3-5-haiku-latest", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-latest", "claude-3-7-sonnet-20250219", "claude-3-7-sonnet-latest", "claude-3-haiku-20240307", "claude-4-opus-20250514", "claude-4-sonnet-20250514", "claude-opus-4-1", "claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514"];
28
+ const SUPPPORTED_CLAUDE_MODELS = exports.SUPPPORTED_CLAUDE_MODELS = ["claude-3-5-haiku-20241022", "claude-3-5-haiku-latest", "claude-3-7-sonnet-20250219", "claude-3-7-sonnet-latest", "claude-3-haiku-20240307", "claude-4-opus-20250514", "claude-4-sonnet-20250514", "claude-opus-4-1", "claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514"];
31
29
  const SUPPPORTED_GPT_MODELS = exports.SUPPPORTED_GPT_MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-instruct", "gpt-3.5-turbo-instruct-0914", "gpt-4", "gpt-4-0314", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613", "gpt-4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4.1", "gpt-4.1-2025-04-14", "gpt-4.1-mini", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano", "gpt-4.1-nano-2025-04-14", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", "gpt-4o-2024-11-20", "gpt-4o-mini", "gpt-4o-mini-2024-07-18", "gpt-5", "gpt-5-2025-08-07", "gpt-5-chat", "gpt-5-chat-latest", "gpt-5-mini", "gpt-5-mini-2025-08-07", "gpt-5-nano", "gpt-5-nano-2025-08-07", "o1", "o1-2024-12-17", "o1-mini", "o1-mini-2024-09-12", "o1-pro", "o1-pro-2025-03-19", "o3", "o3-2025-04-16", "o3-deep-research", "o3-deep-research-2025-06-26", "o3-mini", "o3-mini-2025-01-31", "o3-pro", "o3-pro-2025-06-10", "o4-mini", "o4-mini-2025-04-16", "o4-mini-deep-research", "o4-mini-deep-research-2025-06-26"];
32
30
  const SUPPORTED_MODELS = exports.SUPPORTED_MODELS = [...SUPPPORTED_CLAUDE_MODELS, ...SUPPPORTED_GPT_MODELS];
@@ -88,9 +88,7 @@ const downloadFile = async input => {
88
88
  console.error(`Download was cancelled for URL: ${absoluteUrl}`);
89
89
  });
90
90
  }
91
- } catch (error) {
92
- console.error("Error during download:", error);
93
- }
91
+ } catch (error) {}
94
92
  } else if (isCallableTrigger(trigger)) {
95
93
  action = await trigger(page);
96
94
  try {
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.checkFrameAllowsAsyncScripts = checkFrameAllowsAsyncScripts;
7
+ var _Logger = require("../../common/Logger");
8
+ async function checkFrameAllowsAsyncScripts(iframeElement) {
9
+ try {
10
+ const sandboxValue = await iframeElement.evaluate(element => element.getAttribute("sandbox"));
11
+ if (sandboxValue === null) {
12
+ return true;
13
+ }
14
+ const sandboxTokens = sandboxValue.trim().split(/\s+/);
15
+ return sandboxTokens.includes("allow-scripts");
16
+ } catch (error) {
17
+ _Logger.logger.warn(`Error checking iframe sandbox attribute: ${error}`);
18
+ return true;
19
+ }
20
+ }
@@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", {
6
6
  exports.findAllIframes = findAllIframes;
7
7
  exports.findAllIframesList = findAllIframesList;
8
8
  var _Logger = require("../../common/Logger");
9
+ var _checkFrameAllowsAsyncScripts = require("./checkFrameAllowsAsyncScripts");
9
10
  var _constants = require("./constants");
10
11
  async function findAllIframes(root, iframeTimeoutMs = 10000) {
11
12
  const processed = new Set();
@@ -44,10 +45,12 @@ async function processFrameRecursive(root, processedRoots, iframeTimeoutMs) {
44
45
  _Logger.logger.error(`Could not access content_frame for iframe: ${iframeElement}`);
45
46
  return null;
46
47
  }
48
+ const allowsAsyncScripts = await (0, _checkFrameAllowsAsyncScripts.checkFrameAllowsAsyncScripts)(iframeElement);
47
49
  const nestedIframes = await processFrameRecursive(contentFrame, processedRoots, iframeTimeoutMs);
48
50
  return {
49
51
  frame: contentFrame,
50
- nestedIframes: nestedIframes
52
+ nestedIframes,
53
+ allowsAsyncScripts
51
54
  };
52
55
  };
53
56
  const iframeNode = await Promise.race([processSingleIframe(i), new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout")), iframeTimeoutMs))]);