@intuned/browser-dev 0.1.5-dev.0 → 0.1.6-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RELEASE.md +11 -9
- package/dist/ai/export.d.ts +1 -7
- package/dist/ai/extractStructuredData.js +1 -1
- package/dist/ai/extractStructuredDataUsingAi.js +23 -2
- package/dist/ai/extractionHelpers/validateSchema.js +34 -2
- package/dist/ai/index.d.ts +1 -7
- package/dist/ai/tests/testExtractStructuredData.spec.js +150 -18
- package/dist/ai/tests/testIsPageLoaded.spec.js +1 -1
- package/dist/ai/types/models.js +2 -5
- package/dist/ai/validators.js +1 -1
- package/dist/common/aiModelsValidations.js +2 -4
- package/dist/helpers/downloadFile.js +1 -3
- package/dist/helpers/frame_utils/checkFrameAllowsAsyncScripts.js +20 -0
- package/dist/helpers/frame_utils/constants.js +8 -0
- package/dist/helpers/frame_utils/findAllIframes.js +82 -0
- package/dist/helpers/frame_utils/getContainerFrame.js +22 -0
- package/dist/helpers/frame_utils/index.js +44 -0
- package/dist/helpers/frame_utils/tests/testFindAllIframes.spec.js +213 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +3 -4
- package/dist/helpers/tests/testResolveUrl.spec.js +4 -4
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +1041 -4
- package/dist/helpers/tests/testWithDomSettledWait.spec.js +142 -0
- package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +1 -1
- package/dist/helpers/uploadFileToS3.js +6 -0
- package/dist/helpers/utils/getS3Client.js +2 -2
- package/dist/helpers/validateDataUsingSchema.js +93 -7
- package/dist/helpers/waitForDomSettled.js +66 -40
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +1 -1
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js +271 -2
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +1 -1
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +1 -1
- package/dist/optimized-extractors/types/aiModelsValidation.js +1 -3
- package/package.json +5 -4
package/RELEASE.md
CHANGED
|
@@ -4,13 +4,13 @@
|
|
|
4
4
|
|
|
5
5
|
```bash
|
|
6
6
|
# From typescript-sdk directory
|
|
7
|
-
|
|
7
|
+
yarn release prod 1.2.3
|
|
8
8
|
```
|
|
9
9
|
|
|
10
10
|
## Usage
|
|
11
11
|
|
|
12
12
|
```bash
|
|
13
|
-
|
|
13
|
+
yarn release {prod|dev} version [webapp-repo-path]
|
|
14
14
|
```
|
|
15
15
|
|
|
16
16
|
**Arguments:**
|
|
@@ -23,13 +23,13 @@ bash scripts/release.sh {prod|dev} version [webapp-repo-path]
|
|
|
23
23
|
|
|
24
24
|
```bash
|
|
25
25
|
# Publish to npm (prod)
|
|
26
|
-
|
|
26
|
+
yarn release prod 1.2.3
|
|
27
27
|
|
|
28
28
|
# Publish dev version to npm
|
|
29
|
-
|
|
29
|
+
yarn release dev 1.2.3-beta
|
|
30
30
|
|
|
31
31
|
# Custom WebApp path
|
|
32
|
-
|
|
32
|
+
yarn release prod 1.2.3 /path/to/WebApp
|
|
33
33
|
```
|
|
34
34
|
|
|
35
35
|
## Repo Structure
|
|
@@ -48,13 +48,15 @@ intuned-master/
|
|
|
48
48
|
|
|
49
49
|
1. ✅ Builds project (`yarn build`, type checks)
|
|
50
50
|
2. ✅ Updates version in `package.json`
|
|
51
|
-
3. ✅ Adjusts package name for dev (
|
|
51
|
+
3. ✅ Adjusts package name for dev (`@intuned/browser-dev`)
|
|
52
52
|
4. ✅ Publishes to npm
|
|
53
|
-
5. ✅
|
|
54
|
-
|
|
53
|
+
5. ✅ For prod only:
|
|
54
|
+
- Updates all TypeScript templates in WebApp (`playwright_v1/`)
|
|
55
|
+
- Regenerates template files in WebApp
|
|
56
|
+
- Updates `packageVersions.json` (`typescript.sdk.version`)
|
|
55
57
|
|
|
56
58
|
## Prerequisites
|
|
57
59
|
|
|
58
60
|
- Node.js & Yarn installed
|
|
59
61
|
- npm credentials configured (`npm login`)
|
|
60
|
-
- WebApp repo cloned (
|
|
62
|
+
- WebApp repo cloned (for prod releases)
|
package/dist/ai/export.d.ts
CHANGED
|
@@ -260,7 +260,7 @@ export type JsonSchema =
|
|
|
260
260
|
* const article = await extractStructuredData({
|
|
261
261
|
* source: articleContainer,
|
|
262
262
|
* strategy: "MARKDOWN",
|
|
263
|
-
* model: "claude-3-
|
|
263
|
+
* model: "claude-3-7-sonnet-latest",
|
|
264
264
|
* dataSchema: {
|
|
265
265
|
* type: "object",
|
|
266
266
|
* properties: {
|
|
@@ -376,9 +376,6 @@ export declare function extractStructuredData(options: {
|
|
|
376
376
|
type SUPPORTED_CLAUDE_MODELS =
|
|
377
377
|
| "claude-3-5-haiku-20241022"
|
|
378
378
|
| "claude-3-5-haiku-latest"
|
|
379
|
-
| "claude-3-5-sonnet-20240620"
|
|
380
|
-
| "claude-3-5-sonnet-20241022"
|
|
381
|
-
| "claude-3-5-sonnet-latest"
|
|
382
379
|
| "claude-3-7-sonnet-20250219"
|
|
383
380
|
| "claude-3-7-sonnet-latest"
|
|
384
381
|
| "claude-3-haiku-20240307"
|
|
@@ -509,9 +506,6 @@ type SUPPORTED_OPENAI_MODELS =
|
|
|
509
506
|
* **Supported Anthropic (Claude) Models:**
|
|
510
507
|
* "claude-3-5-haiku-20241022"
|
|
511
508
|
* "claude-3-5-haiku-latest"
|
|
512
|
-
* "claude-3-5-sonnet-20240620"
|
|
513
|
-
* "claude-3-5-sonnet-20241022"
|
|
514
|
-
* "claude-3-5-sonnet-latest"
|
|
515
509
|
* "claude-3-7-sonnet-20250219"
|
|
516
510
|
* "claude-3-7-sonnet-latest"
|
|
517
511
|
* "claude-3-haiku-20240307"
|
|
@@ -302,7 +302,7 @@ const extractStructuredDataFromContent = async options => {
|
|
|
302
302
|
options: {
|
|
303
303
|
prompt: options.prompt,
|
|
304
304
|
images,
|
|
305
|
-
jsonSchema:
|
|
305
|
+
jsonSchema: parsingResult.data.dataSchema,
|
|
306
306
|
content: texts.join("\n"),
|
|
307
307
|
enableDomMatching: false,
|
|
308
308
|
apiKey: options.apiKey,
|
|
@@ -34,6 +34,12 @@ async function extractStructuredDataUsingAi(input) {
|
|
|
34
34
|
});
|
|
35
35
|
const gatewayModel = await gateway.getModel();
|
|
36
36
|
const tools = (0, _tools.getTools)(toolName, jsonSchema);
|
|
37
|
+
if (tools.isErr()) {
|
|
38
|
+
_Logger.logger.error("Schema validation failed", {
|
|
39
|
+
error: tools.error
|
|
40
|
+
});
|
|
41
|
+
return (0, _neverthrow.err)(Errors.invalidExtractionResult(`Invalid JSON schema: ${tools.error.context}`));
|
|
42
|
+
}
|
|
37
43
|
const messages = (0, _prompt.getMessages)({
|
|
38
44
|
prompt,
|
|
39
45
|
content,
|
|
@@ -49,7 +55,7 @@ async function extractStructuredDataUsingAi(input) {
|
|
|
49
55
|
result = await (0, _ai.generateText)({
|
|
50
56
|
model: gatewayModel,
|
|
51
57
|
messages: messagesHistory,
|
|
52
|
-
tools: tools.
|
|
58
|
+
tools: tools.value,
|
|
53
59
|
toolChoice: "required",
|
|
54
60
|
maxRetries
|
|
55
61
|
});
|
|
@@ -60,7 +66,22 @@ async function extractStructuredDataUsingAi(input) {
|
|
|
60
66
|
var _result$usage;
|
|
61
67
|
accumulatedTokens += ((_result$usage = result.usage) === null || _result$usage === void 0 ? void 0 : _result$usage.totalTokens) ?? 0;
|
|
62
68
|
}
|
|
63
|
-
|
|
69
|
+
if (!result.toolCalls || result.toolCalls.length === 0) {
|
|
70
|
+
var _result$text;
|
|
71
|
+
_Logger.logger.error("AI did not return any tool calls", {
|
|
72
|
+
responseText: result.text,
|
|
73
|
+
model
|
|
74
|
+
});
|
|
75
|
+
return (0, _neverthrow.err)(Errors.invalidExtractionResult(`AI model did not return any tool calls. This might be due to invalid schema or content. Response: ${((_result$text = result.text) === null || _result$text === void 0 ? void 0 : _result$text.substring(0, 200)) || "empty"}`));
|
|
76
|
+
}
|
|
77
|
+
const toolCall = result.toolCalls[0];
|
|
78
|
+
if (!toolCall || !toolCall.input) {
|
|
79
|
+
_Logger.logger.error("Tool call missing input data", {
|
|
80
|
+
toolCall,
|
|
81
|
+
model
|
|
82
|
+
});
|
|
83
|
+
return (0, _neverthrow.err)(Errors.invalidExtractionResult(`AI tool call is missing input data. Tool call name: ${(toolCall === null || toolCall === void 0 ? void 0 : toolCall.toolName) || "unknown"}`));
|
|
84
|
+
}
|
|
64
85
|
let extractedData = toolCall.input;
|
|
65
86
|
const isArray = jsonSchema.type === "array";
|
|
66
87
|
if (isArray && extractedData.extracted_data) {
|
|
@@ -17,13 +17,45 @@ function validateJSONSchema(schema) {
|
|
|
17
17
|
if (!schema || typeof schema !== "object") {
|
|
18
18
|
return (0, _neverthrow.err)(errors.invalidJsonSchema("Schema must be an object"));
|
|
19
19
|
}
|
|
20
|
+
if (schema.anyOf || schema.oneOf) {
|
|
21
|
+
const unionSchemas = schema.anyOf || schema.oneOf;
|
|
22
|
+
if (!Array.isArray(unionSchemas)) {
|
|
23
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema("anyOf/oneOf must be an array of schemas"));
|
|
24
|
+
}
|
|
25
|
+
for (const subSchema of unionSchemas) {
|
|
26
|
+
if (subSchema.type === "null") {
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
const subValidation = validateJSONSchema(subSchema);
|
|
30
|
+
if (subValidation.isErr()) {
|
|
31
|
+
return subValidation;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return (0, _neverthrow.ok)(schema);
|
|
35
|
+
}
|
|
20
36
|
if (!schema.type) {
|
|
21
37
|
return (0, _neverthrow.err)(errors.invalidJsonSchema("Schema must have a 'type' property"));
|
|
22
38
|
}
|
|
23
|
-
const validTypes = ["string", "number", "integer", "boolean", "array", "object"];
|
|
24
|
-
if (
|
|
39
|
+
const validTypes = ["string", "number", "integer", "boolean", "array", "object", "null"];
|
|
40
|
+
if (Array.isArray(schema.type)) {
|
|
41
|
+
const invalidTypes = schema.type.filter(t => !validTypes.includes(t));
|
|
42
|
+
if (invalidTypes.length > 0) {
|
|
43
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema(`Invalid schema types: ${invalidTypes.join(", ")}`));
|
|
44
|
+
}
|
|
45
|
+
const nonNullTypes = schema.type.filter(t => t !== "null");
|
|
46
|
+
if (nonNullTypes.length === 0) {
|
|
47
|
+
return (0, _neverthrow.ok)(schema);
|
|
48
|
+
}
|
|
49
|
+
schema = {
|
|
50
|
+
...schema,
|
|
51
|
+
type: nonNullTypes[0]
|
|
52
|
+
};
|
|
53
|
+
} else if (!validTypes.includes(schema.type)) {
|
|
25
54
|
return (0, _neverthrow.err)(errors.invalidJsonSchema(`Invalid schema type: ${schema.type}`));
|
|
26
55
|
}
|
|
56
|
+
if (schema.type === "null") {
|
|
57
|
+
return (0, _neverthrow.ok)(schema);
|
|
58
|
+
}
|
|
27
59
|
if (schema.type === "array") {
|
|
28
60
|
if (!schema.items) {
|
|
29
61
|
return (0, _neverthrow.err)(errors.invalidJsonSchema("Array schema must have 'items' property"));
|
package/dist/ai/index.d.ts
CHANGED
|
@@ -260,7 +260,7 @@ export type JsonSchema =
|
|
|
260
260
|
* const article = await extractStructuredData({
|
|
261
261
|
* source: articleContainer,
|
|
262
262
|
* strategy: "MARKDOWN",
|
|
263
|
-
* model: "claude-3-
|
|
263
|
+
* model: "claude-3-7-sonnet-latest",
|
|
264
264
|
* dataSchema: {
|
|
265
265
|
* type: "object",
|
|
266
266
|
* properties: {
|
|
@@ -376,9 +376,6 @@ export declare function extractStructuredData(options: {
|
|
|
376
376
|
type SUPPORTED_CLAUDE_MODELS =
|
|
377
377
|
| "claude-3-5-haiku-20241022"
|
|
378
378
|
| "claude-3-5-haiku-latest"
|
|
379
|
-
| "claude-3-5-sonnet-20240620"
|
|
380
|
-
| "claude-3-5-sonnet-20241022"
|
|
381
|
-
| "claude-3-5-sonnet-latest"
|
|
382
379
|
| "claude-3-7-sonnet-20250219"
|
|
383
380
|
| "claude-3-7-sonnet-latest"
|
|
384
381
|
| "claude-3-haiku-20240307"
|
|
@@ -509,9 +506,6 @@ type SUPPORTED_OPENAI_MODELS =
|
|
|
509
506
|
* **Supported Anthropic (Claude) Models:**
|
|
510
507
|
* "claude-3-5-haiku-20241022"
|
|
511
508
|
* "claude-3-5-haiku-latest"
|
|
512
|
-
* "claude-3-5-sonnet-20240620"
|
|
513
|
-
* "claude-3-5-sonnet-20241022"
|
|
514
|
-
* "claude-3-5-sonnet-latest"
|
|
515
509
|
* "claude-3-7-sonnet-20250219"
|
|
516
510
|
* "claude-3-7-sonnet-latest"
|
|
517
511
|
* "claude-3-haiku-20240307"
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
var _extendedTest = require("../../common/extendedTest");
|
|
4
4
|
var _ = require("..");
|
|
5
|
-
var
|
|
5
|
+
var _helpers = require("../../helpers");
|
|
6
|
+
var _playwright = require("playwright");
|
|
6
7
|
var _dotenv = require("dotenv");
|
|
7
8
|
var _zod = require("zod");
|
|
8
9
|
(0, _dotenv.config)();
|
|
@@ -130,7 +131,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
130
131
|
imageStrategy: "Extract article metadata including title, author, date, read time, and tags"
|
|
131
132
|
};
|
|
132
133
|
(0, _extendedTest.beforeAll)(async () => {
|
|
133
|
-
browser = await
|
|
134
|
+
browser = await _playwright.chromium.launch({
|
|
134
135
|
headless: true
|
|
135
136
|
});
|
|
136
137
|
});
|
|
@@ -222,7 +223,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
222
223
|
prompt: getPromptVariation(sharedPrompts.imageStrategy),
|
|
223
224
|
enableDomMatching: false,
|
|
224
225
|
strategy: "IMAGE",
|
|
225
|
-
model: "claude-3-
|
|
226
|
+
model: "claude-3-7-sonnet-latest",
|
|
226
227
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
227
228
|
});
|
|
228
229
|
(0, _extendedTest.expect)(data).toHaveProperty("title", "The Future of AI in 2024");
|
|
@@ -266,7 +267,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
266
267
|
prompt: getPromptVariation(sharedPrompts.markdownStrategy),
|
|
267
268
|
enableDomMatching: false,
|
|
268
269
|
strategy: "MARKDOWN",
|
|
269
|
-
model: "claude-3-
|
|
270
|
+
model: "claude-3-7-sonnet-latest",
|
|
270
271
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
271
272
|
});
|
|
272
273
|
(0, _extendedTest.expect)(data).toHaveProperty("title", "The Future of AI in 2024");
|
|
@@ -295,7 +296,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
295
296
|
prompt: getPromptVariation(sharedPrompts.arrayStrings),
|
|
296
297
|
enableDomMatching: false,
|
|
297
298
|
strategy: "HTML",
|
|
298
|
-
model: "claude-3-
|
|
299
|
+
model: "claude-3-7-sonnet-latest",
|
|
299
300
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
300
301
|
});
|
|
301
302
|
(0, _extendedTest.expect)(Array.isArray(data)).toBe(true);
|
|
@@ -331,7 +332,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
331
332
|
prompt: getPromptVariation(sharedPrompts.arrayObjects),
|
|
332
333
|
enableDomMatching: false,
|
|
333
334
|
strategy: "HTML",
|
|
334
|
-
model: "claude-3-
|
|
335
|
+
model: "claude-3-7-sonnet-latest",
|
|
335
336
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
336
337
|
});
|
|
337
338
|
(0, _extendedTest.expect)(Array.isArray(data)).toBe(true);
|
|
@@ -392,7 +393,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
392
393
|
prompt: getPromptVariation(sharedPrompts.objectNested),
|
|
393
394
|
enableDomMatching: false,
|
|
394
395
|
strategy: "HTML",
|
|
395
|
-
model: "claude-3-
|
|
396
|
+
model: "claude-3-7-sonnet-latest",
|
|
396
397
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
397
398
|
});
|
|
398
399
|
(0, _extendedTest.expect)(data.user.name).toBe("Sarah Wilson");
|
|
@@ -426,7 +427,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
426
427
|
prompt: getPromptVariation(sharedPrompts.objectConstraints),
|
|
427
428
|
enableDomMatching: false,
|
|
428
429
|
strategy: "HTML",
|
|
429
|
-
model: "claude-3-
|
|
430
|
+
model: "claude-3-7-sonnet-latest",
|
|
430
431
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
431
432
|
});
|
|
432
433
|
(0, _extendedTest.expect)(data.title.length).toBeGreaterThanOrEqual(10);
|
|
@@ -462,7 +463,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
462
463
|
prompt: getPromptVariation(sharedPrompts.domMatching),
|
|
463
464
|
enableDomMatching: true,
|
|
464
465
|
strategy: "HTML",
|
|
465
|
-
model: "claude-3-
|
|
466
|
+
model: "claude-3-7-sonnet-latest"
|
|
466
467
|
})).rejects.toThrow("For DOM matching, all types of the extraction fields must be STRINGS");
|
|
467
468
|
});
|
|
468
469
|
});
|
|
@@ -487,7 +488,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
487
488
|
prompt: getPromptVariation(sharedPrompts.cachingBasic),
|
|
488
489
|
enableDomMatching: true,
|
|
489
490
|
strategy: "HTML",
|
|
490
|
-
model: "claude-3-
|
|
491
|
+
model: "claude-3-7-sonnet-latest",
|
|
491
492
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
492
493
|
});
|
|
493
494
|
const secondResult = await (0, _.extractStructuredData)({
|
|
@@ -496,7 +497,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
496
497
|
prompt: getPromptVariation(sharedPrompts.cachingBasic),
|
|
497
498
|
enableDomMatching: true,
|
|
498
499
|
strategy: "HTML",
|
|
499
|
-
model: "claude-3-
|
|
500
|
+
model: "claude-3-7-sonnet-latest",
|
|
500
501
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
501
502
|
});
|
|
502
503
|
(0, _extendedTest.expect)(secondResult).toEqual(firstResult);
|
|
@@ -521,7 +522,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
521
522
|
prompt: getPromptVariation(sharedPrompts.cachingBasic),
|
|
522
523
|
enableDomMatching: true,
|
|
523
524
|
strategy: "HTML",
|
|
524
|
-
model: "claude-3-
|
|
525
|
+
model: "claude-3-7-sonnet-latest",
|
|
525
526
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
526
527
|
});
|
|
527
528
|
const modifiedTemplate = productListTemplate.replace("Water Resistant", "DOM HAS CHANGED BUT NOT THE PRICE OR TITLE");
|
|
@@ -532,7 +533,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
532
533
|
prompt: getPromptVariation(sharedPrompts.cachingBasic),
|
|
533
534
|
enableDomMatching: true,
|
|
534
535
|
strategy: "HTML",
|
|
535
|
-
model: "claude-3-
|
|
536
|
+
model: "claude-3-7-sonnet-latest",
|
|
536
537
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
537
538
|
});
|
|
538
539
|
(0, _extendedTest.expect)(secondResult).toEqual(firstResult);
|
|
@@ -559,7 +560,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
559
560
|
prompt: getPromptVariation(sharedPrompts.cachingBasic),
|
|
560
561
|
enableDomMatching: true,
|
|
561
562
|
strategy: "HTML",
|
|
562
|
-
model: "claude-3-
|
|
563
|
+
model: "claude-3-7-sonnet-latest",
|
|
563
564
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
564
565
|
});
|
|
565
566
|
const modifiedTemplate = productListTemplate.replace("iPhone 14 Pro", "iPhone 15 Pro").replace("$999", "$1099");
|
|
@@ -570,7 +571,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
570
571
|
prompt: getPromptVariation(sharedPrompts.cachingBasic),
|
|
571
572
|
enableDomMatching: true,
|
|
572
573
|
strategy: "HTML",
|
|
573
|
-
model: "claude-3-
|
|
574
|
+
model: "claude-3-7-sonnet-latest",
|
|
574
575
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
575
576
|
});
|
|
576
577
|
(0, _extendedTest.expect)(secondResult).not.toEqual(firstResult);
|
|
@@ -599,7 +600,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
599
600
|
prompt: getPromptVariation(sharedPrompts.cachingBasic),
|
|
600
601
|
enableDomMatching: false,
|
|
601
602
|
strategy: "HTML",
|
|
602
|
-
model: "claude-3-
|
|
603
|
+
model: "claude-3-7-sonnet-latest",
|
|
603
604
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
604
605
|
});
|
|
605
606
|
const modifiedTemplate = productListTemplate.replace("iPhone 14 Pro", "iPhone 15 Pro").replace("$999", "$1099");
|
|
@@ -610,7 +611,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
610
611
|
prompt: getPromptVariation(sharedPrompts.cachingBasic),
|
|
611
612
|
enableDomMatching: false,
|
|
612
613
|
strategy: "HTML",
|
|
613
|
-
model: "claude-3-
|
|
614
|
+
model: "claude-3-7-sonnet-latest",
|
|
614
615
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
615
616
|
});
|
|
616
617
|
(0, _extendedTest.expect)(secondResult).not.toEqual(firstResult);
|
|
@@ -634,7 +635,7 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
634
635
|
prompt: "Extract prsoduct information including title, price, stock status, and rating",
|
|
635
636
|
enableDomMatching: true,
|
|
636
637
|
strategy: "HTML",
|
|
637
|
-
model: "claude-3-
|
|
638
|
+
model: "claude-3-7-sonnet-latest",
|
|
638
639
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
639
640
|
});
|
|
640
641
|
(0, _extendedTest.expect)(data).toHaveProperty("title", "iPhone 14 Pro");
|
|
@@ -642,5 +643,136 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
|
|
|
642
643
|
(0, _extendedTest.expect)(data).toHaveProperty("stock", "In Stock");
|
|
643
644
|
(0, _extendedTest.expect)(data).toHaveProperty("rating", "4.5");
|
|
644
645
|
});
|
|
646
|
+
(0, _extendedTest.test)("should extract book details from real website using Zod schema", async () => {
|
|
647
|
+
const BookSchema = _zod.z.object({
|
|
648
|
+
name: _zod.z.string().describe("Book title"),
|
|
649
|
+
price: _zod.z.string().describe("Book price"),
|
|
650
|
+
description: _zod.z.string().nullable().describe("Book description"),
|
|
651
|
+
in_stock: _zod.z.boolean().describe("Stock availability"),
|
|
652
|
+
rating: _zod.z.string().nullable().describe("Book rating")
|
|
653
|
+
});
|
|
654
|
+
await (0, _helpers.goToUrl)({
|
|
655
|
+
page,
|
|
656
|
+
url: "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html"
|
|
657
|
+
});
|
|
658
|
+
const product = await (0, _.extractStructuredData)({
|
|
659
|
+
source: page,
|
|
660
|
+
strategy: "HTML",
|
|
661
|
+
model: "claude-3-7-sonnet-latest",
|
|
662
|
+
dataSchema: BookSchema,
|
|
663
|
+
prompt: "Extract book details from this page",
|
|
664
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
665
|
+
enableCache: false,
|
|
666
|
+
maxRetries: 3
|
|
667
|
+
});
|
|
668
|
+
console.log(`Found product: ${product.name} - ${product.price}`);
|
|
669
|
+
(0, _extendedTest.expect)(product).toHaveProperty("name");
|
|
670
|
+
(0, _extendedTest.expect)(product).toHaveProperty("price");
|
|
671
|
+
(0, _extendedTest.expect)(typeof product.name).toBe("string");
|
|
672
|
+
(0, _extendedTest.expect)(typeof product.price).toBe("string");
|
|
673
|
+
(0, _extendedTest.expect)(typeof product.in_stock).toBe("boolean");
|
|
674
|
+
(0, _extendedTest.expect)(product.name).toBe("A Light in the Attic");
|
|
675
|
+
(0, _extendedTest.expect)(product.price).toMatch(/£\d+\.\d+/);
|
|
676
|
+
});
|
|
677
|
+
});
|
|
678
|
+
(0, _extendedTest.describe)("Content-based Extraction (without Page/DOM)", () => {
|
|
679
|
+
(0, _extendedTest.test)("should extract from text content using Zod schema", async () => {
|
|
680
|
+
const PersonSchema = _zod.z.object({
|
|
681
|
+
name: _zod.z.string().describe("Person's full name"),
|
|
682
|
+
age: _zod.z.number().describe("Person's age in years"),
|
|
683
|
+
occupation: _zod.z.string().describe("Person's job title"),
|
|
684
|
+
company: _zod.z.string().describe("Company name").optional()
|
|
685
|
+
});
|
|
686
|
+
const textContent = {
|
|
687
|
+
type: "text",
|
|
688
|
+
data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
|
|
689
|
+
};
|
|
690
|
+
const person = await (0, _.extractStructuredData)({
|
|
691
|
+
content: textContent,
|
|
692
|
+
model: "claude-3-7-sonnet-latest",
|
|
693
|
+
dataSchema: PersonSchema,
|
|
694
|
+
prompt: "Extract person information from the text",
|
|
695
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
696
|
+
enableCache: false
|
|
697
|
+
});
|
|
698
|
+
(0, _extendedTest.expect)(person).toHaveProperty("name", "John Doe");
|
|
699
|
+
(0, _extendedTest.expect)(person).toHaveProperty("age", 30);
|
|
700
|
+
(0, _extendedTest.expect)(person).toHaveProperty("occupation", "Software Engineer");
|
|
701
|
+
(0, _extendedTest.expect)(person).toHaveProperty("company", "Tech Corp");
|
|
702
|
+
});
|
|
703
|
+
(0, _extendedTest.test)("should extract from multiple text items", async () => {
|
|
704
|
+
const CompanySchema = _zod.z.object({
|
|
705
|
+
companyName: _zod.z.string().describe("Company name"),
|
|
706
|
+
employees: _zod.z.number().describe("Number of employees"),
|
|
707
|
+
founded: _zod.z.number().describe("Year founded"),
|
|
708
|
+
industry: _zod.z.string().describe("Industry sector")
|
|
709
|
+
});
|
|
710
|
+
const textContent = {
|
|
711
|
+
type: "text",
|
|
712
|
+
data: "Tech Corp was founded in 2010 and now employs 500 people in the software industry."
|
|
713
|
+
};
|
|
714
|
+
const company = await (0, _.extractStructuredData)({
|
|
715
|
+
content: textContent,
|
|
716
|
+
model: "claude-3-7-sonnet-latest",
|
|
717
|
+
dataSchema: CompanySchema,
|
|
718
|
+
prompt: "Extract company information",
|
|
719
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
720
|
+
});
|
|
721
|
+
(0, _extendedTest.expect)(company).toHaveProperty("companyName", "Tech Corp");
|
|
722
|
+
(0, _extendedTest.expect)(company).toHaveProperty("employees", 500);
|
|
723
|
+
(0, _extendedTest.expect)(company).toHaveProperty("founded", 2010);
|
|
724
|
+
(0, _extendedTest.expect)(company).toHaveProperty("industry");
|
|
725
|
+
});
|
|
726
|
+
(0, _extendedTest.test)("should extract array from text content", async () => {
|
|
727
|
+
const SkillsSchema = _zod.z.array(_zod.z.string()).describe("List of skills");
|
|
728
|
+
const textContent = {
|
|
729
|
+
type: "text",
|
|
730
|
+
data: "Sarah has skills in JavaScript, Python, React, Node.js, and Docker"
|
|
731
|
+
};
|
|
732
|
+
const skills = await (0, _.extractStructuredData)({
|
|
733
|
+
content: textContent,
|
|
734
|
+
model: "claude-3-7-sonnet-latest",
|
|
735
|
+
dataSchema: SkillsSchema,
|
|
736
|
+
prompt: "Extract all the skills mentioned",
|
|
737
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
738
|
+
});
|
|
739
|
+
(0, _extendedTest.expect)(Array.isArray(skills)).toBe(true);
|
|
740
|
+
(0, _extendedTest.expect)(skills.length).toBeGreaterThanOrEqual(3);
|
|
741
|
+
(0, _extendedTest.expect)(skills).toContain("JavaScript");
|
|
742
|
+
(0, _extendedTest.expect)(skills).toContain("Python");
|
|
743
|
+
(0, _extendedTest.expect)(skills).toContain("React");
|
|
744
|
+
});
|
|
745
|
+
(0, _extendedTest.test)("should extract complex nested object from text", async () => {
|
|
746
|
+
const EventSchema = _zod.z.object({
|
|
747
|
+
eventName: _zod.z.string().describe("Name of the event"),
|
|
748
|
+
date: _zod.z.string().describe("Event date"),
|
|
749
|
+
location: _zod.z.object({
|
|
750
|
+
city: _zod.z.string().describe("City name"),
|
|
751
|
+
venue: _zod.z.string().describe("Venue name")
|
|
752
|
+
}),
|
|
753
|
+
attendees: _zod.z.array(_zod.z.object({
|
|
754
|
+
name: _zod.z.string(),
|
|
755
|
+
role: _zod.z.string()
|
|
756
|
+
}))
|
|
757
|
+
});
|
|
758
|
+
const textContent = {
|
|
759
|
+
type: "text",
|
|
760
|
+
data: `The Tech Summit 2024 will be held on March 20th at the Grand Convention Center in San Francisco.
|
|
761
|
+
Confirmed speakers include: Dr. Jane Smith (Keynote Speaker) and Mike Johnson (CTO Panel).`
|
|
762
|
+
};
|
|
763
|
+
const event = await (0, _.extractStructuredData)({
|
|
764
|
+
content: textContent,
|
|
765
|
+
model: "claude-3-7-sonnet-latest",
|
|
766
|
+
dataSchema: EventSchema,
|
|
767
|
+
prompt: "Extract event details including location and attendees",
|
|
768
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
769
|
+
});
|
|
770
|
+
(0, _extendedTest.expect)(event).toHaveProperty("eventName");
|
|
771
|
+
(0, _extendedTest.expect)(event).toHaveProperty("date");
|
|
772
|
+
(0, _extendedTest.expect)(event.location).toHaveProperty("city", "San Francisco");
|
|
773
|
+
(0, _extendedTest.expect)(event.location).toHaveProperty("venue");
|
|
774
|
+
(0, _extendedTest.expect)(Array.isArray(event.attendees)).toBe(true);
|
|
775
|
+
(0, _extendedTest.expect)(event.attendees.length).toBeGreaterThanOrEqual(2);
|
|
776
|
+
});
|
|
645
777
|
});
|
|
646
778
|
});
|
|
@@ -216,7 +216,7 @@ _extendedTest.describe.skip("isPageLoaded Tests", () => {
|
|
|
216
216
|
const result = await (0, _isPageLoaded.isPageLoaded)({
|
|
217
217
|
page,
|
|
218
218
|
model: "gpt-4o-2024-05-13",
|
|
219
|
-
apiKey
|
|
219
|
+
apiKey,
|
|
220
220
|
timeoutInMs: 10000
|
|
221
221
|
});
|
|
222
222
|
(0, _extendedTest.expect)(result).toBe(false);
|
package/dist/ai/types/models.js
CHANGED
|
@@ -5,14 +5,12 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
5
5
|
});
|
|
6
6
|
exports.SUPPORTED_VISION_MODELS = exports.SUPPORTED_TEXT_MODELS = exports.SUPPORTED_GPT_MODELS = exports.SUPPORTED_GOOGLE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = exports.MODELS_MAPPINGS = exports.MAX_TOKENS_OVERRIDES = exports.GPT_MODELS_MAPPINGS = exports.GOOGLE_MODELS_MAPPINGS = exports.CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_MODELS_MAPPINGS = void 0;
|
|
7
7
|
const CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = ["claude-3-5-haiku", "claude-3-5-haiku-20241022"];
|
|
8
|
-
const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = ["claude-3.
|
|
8
|
+
const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = ["claude-3.7-sonnet-latest"];
|
|
9
9
|
const SUPPORTED_CLAUDE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = [...CLAUDE_ONLY_TEXT_MODELS, ...CLAUDE_VISION_SUPPORTED_MODELS];
|
|
10
10
|
const CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS_MAPPINGS = {
|
|
11
11
|
"claude-3-haiku": "claude-3-haiku-20240307",
|
|
12
12
|
"claude-3-5-haiku": "claude-3-5-haiku-20241022",
|
|
13
|
-
"claude-3-opus": "claude-3-opus-20240229"
|
|
14
|
-
"claude-3-sonnet": "claude-3-sonnet-20240229",
|
|
15
|
-
"claude-3.5-sonnet": "claude-3-5-sonnet-20241022"
|
|
13
|
+
"claude-3-opus": "claude-3-opus-20240229"
|
|
16
14
|
};
|
|
17
15
|
const GPT_ONLY_TEXT_GPT_MODELS = ["gpt3.5-turbo", "gpt-3.5-turbo-0125"];
|
|
18
16
|
const GPT_VISION_SUPPORTED_MODELS = ["gpt4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-mini", "gpt-4o-mini-2024-07-18"];
|
|
@@ -37,7 +35,6 @@ const MODELS_MAPPINGS = exports.MODELS_MAPPINGS = {
|
|
|
37
35
|
...GOOGLE_MODELS_MAPPINGS
|
|
38
36
|
};
|
|
39
37
|
const MAX_TOKENS_OVERRIDES = exports.MAX_TOKENS_OVERRIDES = {
|
|
40
|
-
"claude-3-5-sonnet-20240620": 8192,
|
|
41
38
|
"gemini-1.5-pro-002": 8192,
|
|
42
39
|
"gemini-1.5-flash-8b-002": 8192,
|
|
43
40
|
"gemini-1.5-flash-002": 8192,
|
package/dist/ai/validators.js
CHANGED
|
@@ -71,7 +71,7 @@ const jsonSchemaCustomValidation = _zod.z.any().transform(value => {
|
|
|
71
71
|
const message = e.message.replace("schema is invalid: ", "").split(", ")[0].replace("data/", "").replaceAll("/", ".");
|
|
72
72
|
ctx.addIssue({
|
|
73
73
|
code: _zod.z.ZodIssueCode.custom,
|
|
74
|
-
message
|
|
74
|
+
message
|
|
75
75
|
});
|
|
76
76
|
}
|
|
77
77
|
}).refine(v => {
|
|
@@ -4,9 +4,8 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
4
4
|
value: true
|
|
5
5
|
});
|
|
6
6
|
exports.SUPPPORTED_GPT_MODELS = exports.SUPPPORTED_CLAUDE_MODELS = exports.SUPPORTED_MODELS = exports.MODELS_MAPPINGS = exports.MAX_TOKENS_OVERRIDES = exports.GPT_MODELS = exports.GOOGLE_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS = void 0;
|
|
7
|
-
const CLAUDE_MODELS = exports.CLAUDE_MODELS = ["claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-5-
|
|
7
|
+
const CLAUDE_MODELS = exports.CLAUDE_MODELS = ["claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-5-haiku-20241022", "claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"];
|
|
8
8
|
const MAX_TOKENS_OVERRIDES = exports.MAX_TOKENS_OVERRIDES = {
|
|
9
|
-
"claude-3-5-sonnet-20240620": 8192,
|
|
10
9
|
"gemini-1.5-pro-002": 8192,
|
|
11
10
|
"gemini-1.5-flash-8b-002": 8192,
|
|
12
11
|
"gemini-1.5-flash-002": 8192,
|
|
@@ -18,7 +17,6 @@ const CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS_MAPPINGS = {
|
|
|
18
17
|
"claude-3-5-haiku": "claude-3-5-haiku-20241022",
|
|
19
18
|
"claude-3-opus": "claude-3-opus-20240229",
|
|
20
19
|
"claude-3-sonnet": "claude-3-sonnet-20240229",
|
|
21
|
-
"claude-3.5-sonnet": "claude-3-5-sonnet-20241022",
|
|
22
20
|
"claude-4-sonnet": "claude-sonnet-4-20250514",
|
|
23
21
|
"claude-4-opus": "claude-opus-4-20250514"
|
|
24
22
|
};
|
|
@@ -27,6 +25,6 @@ const GOOGLE_MODELS = exports.GOOGLE_MODELS = ["gemini-2.5-pro", "gemini-2.5-fla
|
|
|
27
25
|
const MODELS_MAPPINGS = exports.MODELS_MAPPINGS = {
|
|
28
26
|
...CLAUDE_MODELS_MAPPINGS
|
|
29
27
|
};
|
|
30
|
-
const SUPPPORTED_CLAUDE_MODELS = exports.SUPPPORTED_CLAUDE_MODELS = ["claude-3-5-haiku-20241022", "claude-3-5-haiku-latest", "claude-3-
|
|
28
|
+
const SUPPPORTED_CLAUDE_MODELS = exports.SUPPPORTED_CLAUDE_MODELS = ["claude-3-5-haiku-20241022", "claude-3-5-haiku-latest", "claude-3-7-sonnet-20250219", "claude-3-7-sonnet-latest", "claude-3-haiku-20240307", "claude-4-opus-20250514", "claude-4-sonnet-20250514", "claude-opus-4-1", "claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514"];
|
|
31
29
|
const SUPPPORTED_GPT_MODELS = exports.SUPPPORTED_GPT_MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-instruct", "gpt-3.5-turbo-instruct-0914", "gpt-4", "gpt-4-0314", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613", "gpt-4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4.1", "gpt-4.1-2025-04-14", "gpt-4.1-mini", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano", "gpt-4.1-nano-2025-04-14", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", "gpt-4o-2024-11-20", "gpt-4o-mini", "gpt-4o-mini-2024-07-18", "gpt-5", "gpt-5-2025-08-07", "gpt-5-chat", "gpt-5-chat-latest", "gpt-5-mini", "gpt-5-mini-2025-08-07", "gpt-5-nano", "gpt-5-nano-2025-08-07", "o1", "o1-2024-12-17", "o1-mini", "o1-mini-2024-09-12", "o1-pro", "o1-pro-2025-03-19", "o3", "o3-2025-04-16", "o3-deep-research", "o3-deep-research-2025-06-26", "o3-mini", "o3-mini-2025-01-31", "o3-pro", "o3-pro-2025-06-10", "o4-mini", "o4-mini-2025-04-16", "o4-mini-deep-research", "o4-mini-deep-research-2025-06-26"];
|
|
32
30
|
const SUPPORTED_MODELS = exports.SUPPORTED_MODELS = [...SUPPPORTED_CLAUDE_MODELS, ...SUPPPORTED_GPT_MODELS];
|
|
@@ -88,9 +88,7 @@ const downloadFile = async input => {
|
|
|
88
88
|
console.error(`Download was cancelled for URL: ${absoluteUrl}`);
|
|
89
89
|
});
|
|
90
90
|
}
|
|
91
|
-
} catch (error) {
|
|
92
|
-
console.error("Error during download:", error);
|
|
93
|
-
}
|
|
91
|
+
} catch (error) {}
|
|
94
92
|
} else if (isCallableTrigger(trigger)) {
|
|
95
93
|
action = await trigger(page);
|
|
96
94
|
try {
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.checkFrameAllowsAsyncScripts = checkFrameAllowsAsyncScripts;
|
|
7
|
+
var _Logger = require("../../common/Logger");
|
|
8
|
+
async function checkFrameAllowsAsyncScripts(iframeElement) {
|
|
9
|
+
try {
|
|
10
|
+
const sandboxValue = await iframeElement.evaluate(element => element.getAttribute("sandbox"));
|
|
11
|
+
if (sandboxValue === null) {
|
|
12
|
+
return true;
|
|
13
|
+
}
|
|
14
|
+
const sandboxTokens = sandboxValue.trim().split(/\s+/);
|
|
15
|
+
return sandboxTokens.includes("allow-scripts");
|
|
16
|
+
} catch (error) {
|
|
17
|
+
_Logger.logger.warn(`Error checking iframe sandbox attribute: ${error}`);
|
|
18
|
+
return true;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.IFRAME_TAGS = exports.ALL_IFRAMES_CSS_SELECTOR = void 0;
|
|
7
|
+
const IFRAME_TAGS = exports.IFRAME_TAGS = ["iframe", "frame"];
|
|
8
|
+
const ALL_IFRAMES_CSS_SELECTOR = exports.ALL_IFRAMES_CSS_SELECTOR = IFRAME_TAGS.join(", ");
|