@intuned/browser-dev 0.1.5-dev.1 → 0.1.6-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RELEASE.md +11 -9
- package/dist/ai/export.d.ts +1 -7
- package/dist/ai/extractStructuredData.js +1 -1
- package/dist/ai/extractStructuredDataUsingAi.js +23 -2
- package/dist/ai/extractionHelpers/validateSchema.js +34 -2
- package/dist/ai/index.d.ts +1 -7
- package/dist/ai/tests/testExtractStructuredData.spec.js +150 -18
- package/dist/ai/tests/testIsPageLoaded.spec.js +1 -1
- package/dist/ai/types/models.js +2 -5
- package/dist/ai/validators.js +1 -1
- package/dist/common/aiModelsValidations.js +2 -4
- package/dist/helpers/clickUntilExhausted.js +35 -38
- package/dist/helpers/downloadFile.js +1 -3
- package/dist/helpers/frame_utils/checkFrameAllowsAsyncScripts.js +20 -0
- package/dist/helpers/frame_utils/findAllIframes.js +4 -1
- package/dist/helpers/frame_utils/tests/testFindAllIframes.spec.js +43 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +3 -4
- package/dist/helpers/tests/testResolveUrl.spec.js +4 -4
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +1041 -4
- package/dist/helpers/tests/testWithDomSettledWait.spec.js +23 -0
- package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +1 -1
- package/dist/helpers/uploadFileToS3.js +6 -0
- package/dist/helpers/utils/getS3Client.js +2 -2
- package/dist/helpers/validateDataUsingSchema.js +93 -7
- package/dist/helpers/waitForDomSettled.js +17 -8
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +1 -1
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +1 -1
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +1 -1
- package/dist/optimized-extractors/types/aiModelsValidation.js +1 -3
- package/package.json +4 -3
|
@@ -280,4 +280,27 @@ var _waitForDomSettled = require("../waitForDomSettled");
|
|
|
280
280
|
(0, _extendedTest.expect)(content).toBe(`Inner dynamic ${i + 1}`);
|
|
281
281
|
}
|
|
282
282
|
});
|
|
283
|
+
(0, _extendedTest.test)("should wait for DOM to settle with sandboxed iframe", async () => {
|
|
284
|
+
await page.goto(`
|
|
285
|
+
data:text/html,
|
|
286
|
+
<html>
|
|
287
|
+
<body>
|
|
288
|
+
<h1>Main Page</h1>
|
|
289
|
+
<iframe id="sandboxed-iframe"
|
|
290
|
+
sandbox="allow-same-origin"
|
|
291
|
+
srcdoc="<html><body><h2>Sandboxed Iframe</h2><div id='content'>Initial content</div></body></html>"
|
|
292
|
+
width="400"
|
|
293
|
+
height="300">
|
|
294
|
+
</iframe>
|
|
295
|
+
</body>
|
|
296
|
+
</html>
|
|
297
|
+
`);
|
|
298
|
+
await page.waitForTimeout(500);
|
|
299
|
+
const settled = await (0, _waitForDomSettled.waitForDomSettled)({
|
|
300
|
+
source: page,
|
|
301
|
+
settleDurationMs: 500,
|
|
302
|
+
timeoutInMs: 5000
|
|
303
|
+
});
|
|
304
|
+
(0, _extendedTest.expect)(settled).toBe(true);
|
|
305
|
+
});
|
|
283
306
|
});
|
|
@@ -103,7 +103,7 @@ _extendedTest.describe.skip("TestWaitForNetworkIdle", () => {
|
|
|
103
103
|
await page.goto("data:text/html,<html><body><h1>Direct Call</h1></body></html>");
|
|
104
104
|
return "direct call complete";
|
|
105
105
|
}, {
|
|
106
|
-
page
|
|
106
|
+
page,
|
|
107
107
|
timeoutInMs: 10000,
|
|
108
108
|
maxInflightRequests: 0
|
|
109
109
|
});
|
|
@@ -42,6 +42,9 @@ const uploadFileToS3 = async input => {
|
|
|
42
42
|
fileNameOverride,
|
|
43
43
|
contentType
|
|
44
44
|
} = input;
|
|
45
|
+
if (!(0, _utils.isDownload)(file) && !Buffer.isBuffer(file)) {
|
|
46
|
+
throw new Error("Invalid file type, Supported types are Download and Buffer");
|
|
47
|
+
}
|
|
45
48
|
const bucketName = (configs === null || configs === void 0 ? void 0 : configs.bucket) ?? process.env.AWS_BUCKET ?? process.env.INTUNED_S3_BUCKET ?? undefined;
|
|
46
49
|
const region = (configs === null || configs === void 0 ? void 0 : configs.region) ?? process.env.AWS_REGION ?? process.env.INTUNED_S3_REGION ?? undefined;
|
|
47
50
|
const endpoint = (configs === null || configs === void 0 ? void 0 : configs.endpoint) ?? process.env.AWS_ENDPOINT_URL ?? process.env.INTUNED_S3_ENDPOINT_URL;
|
|
@@ -52,6 +55,9 @@ const uploadFileToS3 = async input => {
|
|
|
52
55
|
console.log("Uploaded file successfully");
|
|
53
56
|
if (isDownloadedFile) {
|
|
54
57
|
return new _Attachment.Attachment(`${(0, _uuid.v4)()}/${file.suggestedFilename()}`, `${(0, _uuid.v4)()}/${file.suggestedFilename()}`, "testing_bucket", "testing_region", file.suggestedFilename() || "downloaded_file", endpoint, "document");
|
|
58
|
+
} else {
|
|
59
|
+
const suggestedFileName = (0, _uuid.v4)();
|
|
60
|
+
return new _Attachment.Attachment(suggestedFileName, suggestedFileName, "testing_bucket", "testing_region", suggestedFileName, endpoint, "document");
|
|
55
61
|
}
|
|
56
62
|
}
|
|
57
63
|
let suggestedFileName;
|
|
@@ -12,8 +12,8 @@ function getS3Client(endpoint, region) {
|
|
|
12
12
|
s3ClientInstance = new _clientS.S3Client({
|
|
13
13
|
region: resolvedRegion,
|
|
14
14
|
credentials: {
|
|
15
|
-
accessKeyId: process.env.AWS_ACCESS_KEY_ID || "",
|
|
16
|
-
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || ""
|
|
15
|
+
accessKeyId: process.env.AWS_ACCESS_KEY_ID || process.env.INTUNED_S3_ACCESS_KEY_ID || "",
|
|
16
|
+
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || process.env.INTUNED_S3_SECRET_ACCESS_KEY || ""
|
|
17
17
|
},
|
|
18
18
|
endpoint: endpoint || undefined
|
|
19
19
|
});
|
|
@@ -12,7 +12,9 @@ var _Attachment = require("./types/Attachment");
|
|
|
12
12
|
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
13
13
|
const ajv = new _ajv.default({
|
|
14
14
|
strict: false,
|
|
15
|
-
removeAdditional: false
|
|
15
|
+
removeAdditional: false,
|
|
16
|
+
allErrors: true,
|
|
17
|
+
verbose: true
|
|
16
18
|
});
|
|
17
19
|
function injectAttachmentType(schema) {
|
|
18
20
|
const schemaCopy = JSON.parse(JSON.stringify(schema));
|
|
@@ -32,10 +34,35 @@ function injectAttachmentType(schema) {
|
|
|
32
34
|
return node.map(item => replaceAttachmentType(item));
|
|
33
35
|
}
|
|
34
36
|
const result = {};
|
|
35
|
-
|
|
37
|
+
const nodeType = node.type;
|
|
38
|
+
if (typeof nodeType === "string" && nodeType.toLowerCase() === "attachment") {
|
|
36
39
|
return {
|
|
37
40
|
$ref: "#/$defs/attachment"
|
|
38
41
|
};
|
|
42
|
+
} else if (Array.isArray(nodeType)) {
|
|
43
|
+
const hasAttachment = nodeType.some(t => typeof t === "string" && t.toLowerCase() === "attachment");
|
|
44
|
+
if (hasAttachment) {
|
|
45
|
+
const oneOfSchemas = [];
|
|
46
|
+
for (const typeItem of nodeType) {
|
|
47
|
+
if (typeof typeItem === "string" && typeItem.toLowerCase() === "attachment") {
|
|
48
|
+
oneOfSchemas.push({
|
|
49
|
+
$ref: "#/$defs/attachment"
|
|
50
|
+
});
|
|
51
|
+
} else {
|
|
52
|
+
oneOfSchemas.push({
|
|
53
|
+
type: typeItem
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
const newNode = {};
|
|
58
|
+
for (const key in node) {
|
|
59
|
+
if (key !== "type") {
|
|
60
|
+
newNode[key] = replaceAttachmentType(node[key]);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
newNode.oneOf = oneOfSchemas;
|
|
64
|
+
return newNode;
|
|
65
|
+
}
|
|
39
66
|
}
|
|
40
67
|
for (const key in node) {
|
|
41
68
|
result[key] = replaceAttachmentType(node[key]);
|
|
@@ -44,19 +71,78 @@ function injectAttachmentType(schema) {
|
|
|
44
71
|
}
|
|
45
72
|
return replaceAttachmentType(schemaCopy);
|
|
46
73
|
}
|
|
74
|
+
function resolveSchema(schema, rootSchema) {
|
|
75
|
+
if (typeof schema !== "object" || schema === null) {
|
|
76
|
+
return schema;
|
|
77
|
+
}
|
|
78
|
+
if ("$ref" in schema) {
|
|
79
|
+
const refPath = schema.$ref;
|
|
80
|
+
if (refPath.startsWith("#/definitions/")) {
|
|
81
|
+
const defName = refPath.split("/").pop();
|
|
82
|
+
const definitions = rootSchema.definitions || {};
|
|
83
|
+
return definitions[defName] || schema;
|
|
84
|
+
} else if (refPath.startsWith("#/$defs/")) {
|
|
85
|
+
const defName = refPath.split("/").pop();
|
|
86
|
+
const defs = rootSchema.$defs || {};
|
|
87
|
+
return defs[defName] || schema;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
for (const combinator of ["oneOf", "anyOf"]) {
|
|
91
|
+
if (combinator in schema) {
|
|
92
|
+
for (const option of schema[combinator]) {
|
|
93
|
+
const resolved = resolveSchema(option, rootSchema);
|
|
94
|
+
if ((resolved === null || resolved === void 0 ? void 0 : resolved.type) === "object") {
|
|
95
|
+
return resolved;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return schema;
|
|
101
|
+
}
|
|
102
|
+
function removeNoneFromOptionalFields(data, schema, rootSchema) {
|
|
103
|
+
if (typeof data === "object" && data !== null && !Array.isArray(data) && typeof schema === "object" && schema !== null) {
|
|
104
|
+
const resolvedSchema = resolveSchema(schema, rootSchema);
|
|
105
|
+
const requiredFields = new Set(resolvedSchema.required || []);
|
|
106
|
+
const properties = resolvedSchema.properties || {};
|
|
107
|
+
const cleaned = {};
|
|
108
|
+
for (const key in data) {
|
|
109
|
+
const value = data[key];
|
|
110
|
+
const fieldSchema = properties[key] || {};
|
|
111
|
+
if (requiredFields.has(key)) {
|
|
112
|
+
cleaned[key] = removeNoneFromOptionalFields(value, fieldSchema, rootSchema);
|
|
113
|
+
} else if (value !== null && value !== undefined) {
|
|
114
|
+
cleaned[key] = removeNoneFromOptionalFields(value, fieldSchema, rootSchema);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return cleaned;
|
|
118
|
+
} else if (Array.isArray(data)) {
|
|
119
|
+
const itemsSchema = (schema === null || schema === void 0 ? void 0 : schema.items) || {};
|
|
120
|
+
return data.filter(item => item !== null && item !== undefined).map(item => removeNoneFromOptionalFields(item, itemsSchema, rootSchema));
|
|
121
|
+
} else {
|
|
122
|
+
return data;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
47
125
|
const validateDataUsingSchema = input => {
|
|
48
126
|
const {
|
|
49
127
|
data,
|
|
50
128
|
schema
|
|
51
129
|
} = input;
|
|
52
130
|
const processedSchema = injectAttachmentType(schema);
|
|
131
|
+
const cleanedData = removeNoneFromOptionalFields(data, processedSchema, processedSchema);
|
|
53
132
|
const validate = ajv.compile(processedSchema);
|
|
54
|
-
const valid = validate(
|
|
133
|
+
const valid = validate(cleanedData);
|
|
55
134
|
if (!valid) {
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
135
|
+
const errors = validate.errors || [];
|
|
136
|
+
if (errors.length > 0) {
|
|
137
|
+
const errorMessages = [];
|
|
138
|
+
for (const err of errors) {
|
|
139
|
+
errorMessages.push(` - ${err.instancePath || "root"}: ${err.message}`);
|
|
140
|
+
}
|
|
141
|
+
const fullMessage = `Validation failed with ${errors.length} error(s):\n${errorMessages.join("\n")}`;
|
|
142
|
+
throw new _types.ValidationError(fullMessage, data);
|
|
143
|
+
} else {
|
|
144
|
+
throw new _types.ValidationError("Validation failed: Unknown validation error", data);
|
|
145
|
+
}
|
|
60
146
|
}
|
|
61
147
|
};
|
|
62
148
|
exports.validateDataUsingSchema = validateDataUsingSchema;
|
|
@@ -83,17 +83,26 @@ const waitForDomSettled = async options => {
|
|
|
83
83
|
return false;
|
|
84
84
|
}
|
|
85
85
|
const allIframes = await (0, _findAllIframes.findAllIframesList)(frame);
|
|
86
|
+
let hasRestrictedIframes = false;
|
|
86
87
|
for (const iframeNode of allIframes) {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
88
|
+
if (iframeNode.allowsAsyncScripts) {
|
|
89
|
+
const iframeElementHandle = await iframeNode.frame.evaluateHandle("document.documentElement");
|
|
90
|
+
const iframeResult = await iframeElementHandle.evaluate(jsCode, {
|
|
91
|
+
settleDurationMsFloored,
|
|
92
|
+
timeoutMs
|
|
93
|
+
});
|
|
94
|
+
await iframeElementHandle.dispose();
|
|
95
|
+
if (!iframeResult) {
|
|
96
|
+
return false;
|
|
97
|
+
}
|
|
98
|
+
} else {
|
|
99
|
+
hasRestrictedIframes = true;
|
|
95
100
|
}
|
|
96
101
|
}
|
|
102
|
+
if (hasRestrictedIframes) {
|
|
103
|
+
_Logger.logger.debug(`Waiting ${2 * settleDurationMs}ms for iframe(s) that do not allow async scripts to settle`);
|
|
104
|
+
await new Promise(resolve => setTimeout(resolve, 2 * settleDurationMs));
|
|
105
|
+
}
|
|
97
106
|
return true;
|
|
98
107
|
} catch (error) {
|
|
99
108
|
_Logger.logger.warn(`DOM settlement detection failed: ${error}`);
|
|
@@ -72,7 +72,7 @@ _extendedTest.describe.skip("Array Extractor Caching Tests", () => {
|
|
|
72
72
|
model: "claude-3-5-sonnet-20240620",
|
|
73
73
|
type: "HTML"
|
|
74
74
|
},
|
|
75
|
-
variantKey
|
|
75
|
+
variantKey,
|
|
76
76
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
77
77
|
};
|
|
78
78
|
await page.setContent(productListTemplate);
|
|
@@ -57,7 +57,7 @@ _extendedTest.describe.skip("Object Extractor Caching Tests", () => {
|
|
|
57
57
|
model: "claude-3-5-sonnet-20240620",
|
|
58
58
|
type: "HTML"
|
|
59
59
|
},
|
|
60
|
-
variantKey
|
|
60
|
+
variantKey,
|
|
61
61
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
62
62
|
};
|
|
63
63
|
await page.setContent(productTemplate);
|
|
@@ -5,12 +5,11 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
5
5
|
});
|
|
6
6
|
exports.SUPPORTED_VISION_MODELS = exports.SUPPORTED_TEXT_MODELS = exports.SUPPORTED_GPT_MODELS = exports.SUPPORTED_GOOGLE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = exports.MODELS_MAPPINGS = exports.MAX_TOKENS_OVERRIDES = exports.GPT_MODELS_MAPPINGS = exports.GOOGLE_MODELS_MAPPINGS = exports.CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_MODELS_MAPPINGS = void 0;
|
|
7
7
|
const CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = ["claude-3-5-haiku", "claude-3-5-haiku-20241022"];
|
|
8
|
-
const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = ["claude-3-haiku", "claude-3-haiku-20240307", "claude-
|
|
8
|
+
const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = ["claude-3-haiku", "claude-3-haiku-20240307", "claude-opus-4", "claude-opus-4-20250514", "claude-sonnet-4", "claude-sonnet-4-20250514"];
|
|
9
9
|
const SUPPORTED_CLAUDE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = [...CLAUDE_ONLY_TEXT_MODELS, ...CLAUDE_VISION_SUPPORTED_MODELS];
|
|
10
10
|
const CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS_MAPPINGS = {
|
|
11
11
|
"claude-3-haiku": "claude-3-haiku-20240307",
|
|
12
12
|
"claude-3-5-haiku": "claude-3-5-haiku-20241022",
|
|
13
|
-
"claude-3.5-sonnet": "claude-3-5-sonnet-20241022",
|
|
14
13
|
"claude-opus-4": "claude-opus-4-20250514",
|
|
15
14
|
"claude-sonnet-4": "claude-sonnet-4-20250514"
|
|
16
15
|
};
|
|
@@ -37,7 +36,6 @@ const MODELS_MAPPINGS = exports.MODELS_MAPPINGS = {
|
|
|
37
36
|
...GOOGLE_MODELS_MAPPINGS
|
|
38
37
|
};
|
|
39
38
|
const MAX_TOKENS_OVERRIDES = exports.MAX_TOKENS_OVERRIDES = {
|
|
40
|
-
"claude-3-5-sonnet-20240620": 8192,
|
|
41
39
|
"gemini-1.5-pro-002": 8192,
|
|
42
40
|
"gemini-1.5-flash-8b-002": 8192,
|
|
43
41
|
"gemini-1.5-flash-002": 8192,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@intuned/browser-dev",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6-dev.1",
|
|
4
4
|
"description": "runner package for intuned functions",
|
|
5
5
|
"types": "./dist/index.d.ts",
|
|
6
6
|
"typesVersions": {
|
|
@@ -48,7 +48,8 @@
|
|
|
48
48
|
"generate-docs": "npx tsx ./scripts/generate-docs.ts",
|
|
49
49
|
"generate-all-docs": "npx tsx ./scripts/generate-docs.ts ./src/helpers/export.d.ts ./generated-docs/helpers && npx tsx ./scripts/generate-docs.ts ./src/ai/export.d.ts ./generated-docs/ai && npx tsx ./scripts/generate-docs.ts ./src/optimized-extractors/export.d.ts ./generated-docs/optimized-extractors",
|
|
50
50
|
"build-browser-scripts": "rollup -c ./src/common/browserScripts/rollup.config.mjs",
|
|
51
|
-
"copy-dts": "copyfiles -u 1 \"src/**/*.d.ts\" dist"
|
|
51
|
+
"copy-dts": "copyfiles -u 1 \"src/**/*.d.ts\" dist",
|
|
52
|
+
"release": "npx tsx ./scripts/release.ts"
|
|
52
53
|
},
|
|
53
54
|
"dependencies": {
|
|
54
55
|
"@ai-sdk/anthropic": "2.0.1",
|
|
@@ -122,4 +123,4 @@
|
|
|
122
123
|
"optional": true
|
|
123
124
|
}
|
|
124
125
|
}
|
|
125
|
-
}
|
|
126
|
+
}
|