@intuned/browser-dev 0.1.5-dev.1 → 0.1.6-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RELEASE.md +11 -9
- package/dist/ai/export.d.ts +1 -7
- package/dist/ai/extractStructuredData.js +1 -1
- package/dist/ai/extractStructuredDataUsingAi.js +23 -2
- package/dist/ai/extractionHelpers/validateSchema.js +34 -2
- package/dist/ai/index.d.ts +1 -7
- package/dist/ai/tests/testExtractStructuredData.spec.js +150 -18
- package/dist/ai/tests/testIsPageLoaded.spec.js +1 -1
- package/dist/ai/types/models.js +2 -5
- package/dist/ai/validators.js +1 -1
- package/dist/common/aiModelsValidations.js +2 -4
- package/dist/helpers/clickUntilExhausted.js +35 -38
- package/dist/helpers/downloadFile.js +1 -3
- package/dist/helpers/frame_utils/checkFrameAllowsAsyncScripts.js +20 -0
- package/dist/helpers/frame_utils/findAllIframes.js +4 -1
- package/dist/helpers/frame_utils/tests/testFindAllIframes.spec.js +43 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +3 -4
- package/dist/helpers/tests/testResolveUrl.spec.js +4 -4
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +1041 -4
- package/dist/helpers/tests/testWithDomSettledWait.spec.js +23 -0
- package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +1 -1
- package/dist/helpers/uploadFileToS3.js +6 -0
- package/dist/helpers/utils/getS3Client.js +2 -2
- package/dist/helpers/validateDataUsingSchema.js +93 -7
- package/dist/helpers/waitForDomSettled.js +17 -8
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +1 -1
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +1 -1
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +1 -1
- package/dist/optimized-extractors/types/aiModelsValidation.js +1 -3
- package/package.json +4 -3
|
@@ -20,12 +20,7 @@ const clickButtonAndWaitInternal = async (page, buttonLocator, clickDelay) => {
|
|
|
20
20
|
});
|
|
21
21
|
await new Promise(resolve => setTimeout(resolve, clickDelay * 1000));
|
|
22
22
|
};
|
|
23
|
-
const
|
|
24
|
-
const {
|
|
25
|
-
page,
|
|
26
|
-
buttonLocator,
|
|
27
|
-
clickDelay = 0.5
|
|
28
|
-
} = input;
|
|
23
|
+
const clickButtonWithNetworkWait = async (page, buttonLocator, clickDelay) => {
|
|
29
24
|
await (0, _withNetworkSettledWait.withNetworkSettledWait)(async () => {
|
|
30
25
|
await clickButtonAndWaitInternal(page, buttonLocator, clickDelay);
|
|
31
26
|
}, {
|
|
@@ -34,6 +29,14 @@ const clickButtonAndWait = async input => {
|
|
|
34
29
|
timeoutInMs: 10000
|
|
35
30
|
});
|
|
36
31
|
};
|
|
32
|
+
const clickButtonAndWait = async input => {
|
|
33
|
+
const {
|
|
34
|
+
page,
|
|
35
|
+
buttonLocator,
|
|
36
|
+
clickDelay = 0.5
|
|
37
|
+
} = input;
|
|
38
|
+
await clickButtonWithNetworkWait(page, buttonLocator, clickDelay);
|
|
39
|
+
};
|
|
37
40
|
exports.clickButtonAndWait = clickButtonAndWait;
|
|
38
41
|
const clickUntilExhausted = async input => {
|
|
39
42
|
const {
|
|
@@ -45,41 +48,35 @@ const clickUntilExhausted = async input => {
|
|
|
45
48
|
clickDelay = 0.5,
|
|
46
49
|
noChangeThreshold = 0
|
|
47
50
|
} = input;
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
let prevState = null;
|
|
52
|
+
if (containerLocator) {
|
|
53
|
+
prevState = await getContainerState(containerLocator);
|
|
54
|
+
_Logger.logger.info(`Initial container state: ${prevState}`);
|
|
55
|
+
}
|
|
56
|
+
const buttonCount = await buttonLocator.count();
|
|
57
|
+
_Logger.logger.info(`Button matches: ${buttonCount}`);
|
|
58
|
+
for (let i = 0; i < maxClicks; i++) {
|
|
59
|
+
heartbeat === null || heartbeat === void 0 || heartbeat();
|
|
60
|
+
const isVisible = await buttonLocator.isVisible();
|
|
61
|
+
if (!isVisible) {
|
|
62
|
+
_Logger.logger.info("Button not visible, stopping.");
|
|
63
|
+
break;
|
|
53
64
|
}
|
|
54
|
-
const
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
_Logger.logger.info("Button not enabled, stopping.");
|
|
65
|
+
const isEnabled = await buttonLocator.isEnabled();
|
|
66
|
+
if (!isEnabled) {
|
|
67
|
+
_Logger.logger.info("Button not enabled, stopping.");
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
await clickButtonWithNetworkWait(page, buttonLocator, clickDelay);
|
|
71
|
+
if (containerLocator) {
|
|
72
|
+
const currentState = await getContainerState(containerLocator);
|
|
73
|
+
_Logger.logger.info(`Current container state: ${currentState}`);
|
|
74
|
+
if (prevState !== null && currentState - prevState <= noChangeThreshold) {
|
|
75
|
+
_Logger.logger.info(`No significant change in container state: ${currentState} (previous: ${prevState})`);
|
|
66
76
|
break;
|
|
67
77
|
}
|
|
68
|
-
|
|
69
|
-
if (containerLocator) {
|
|
70
|
-
const currentState = await getContainerState(containerLocator);
|
|
71
|
-
_Logger.logger.info(`Current container state: ${currentState}`);
|
|
72
|
-
if (prevState !== null && currentState - prevState <= noChangeThreshold) {
|
|
73
|
-
_Logger.logger.info(`No significant change in container state: ${currentState} (previous: ${prevState})`);
|
|
74
|
-
break;
|
|
75
|
-
}
|
|
76
|
-
prevState = currentState;
|
|
77
|
-
}
|
|
78
|
+
prevState = currentState;
|
|
78
79
|
}
|
|
79
|
-
}
|
|
80
|
-
page,
|
|
81
|
-
maxInflightRequests: 0,
|
|
82
|
-
timeoutInMs: 30000
|
|
83
|
-
});
|
|
80
|
+
}
|
|
84
81
|
};
|
|
85
82
|
exports.clickUntilExhausted = clickUntilExhausted;
|
|
@@ -88,9 +88,7 @@ const downloadFile = async input => {
|
|
|
88
88
|
console.error(`Download was cancelled for URL: ${absoluteUrl}`);
|
|
89
89
|
});
|
|
90
90
|
}
|
|
91
|
-
} catch (error) {
|
|
92
|
-
console.error("Error during download:", error);
|
|
93
|
-
}
|
|
91
|
+
} catch (error) {}
|
|
94
92
|
} else if (isCallableTrigger(trigger)) {
|
|
95
93
|
action = await trigger(page);
|
|
96
94
|
try {
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.checkFrameAllowsAsyncScripts = checkFrameAllowsAsyncScripts;
|
|
7
|
+
var _Logger = require("../../common/Logger");
|
|
8
|
+
async function checkFrameAllowsAsyncScripts(iframeElement) {
|
|
9
|
+
try {
|
|
10
|
+
const sandboxValue = await iframeElement.evaluate(element => element.getAttribute("sandbox"));
|
|
11
|
+
if (sandboxValue === null) {
|
|
12
|
+
return true;
|
|
13
|
+
}
|
|
14
|
+
const sandboxTokens = sandboxValue.trim().split(/\s+/);
|
|
15
|
+
return sandboxTokens.includes("allow-scripts");
|
|
16
|
+
} catch (error) {
|
|
17
|
+
_Logger.logger.warn(`Error checking iframe sandbox attribute: ${error}`);
|
|
18
|
+
return true;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
6
6
|
exports.findAllIframes = findAllIframes;
|
|
7
7
|
exports.findAllIframesList = findAllIframesList;
|
|
8
8
|
var _Logger = require("../../common/Logger");
|
|
9
|
+
var _checkFrameAllowsAsyncScripts = require("./checkFrameAllowsAsyncScripts");
|
|
9
10
|
var _constants = require("./constants");
|
|
10
11
|
async function findAllIframes(root, iframeTimeoutMs = 10000) {
|
|
11
12
|
const processed = new Set();
|
|
@@ -44,10 +45,12 @@ async function processFrameRecursive(root, processedRoots, iframeTimeoutMs) {
|
|
|
44
45
|
_Logger.logger.error(`Could not access content_frame for iframe: ${iframeElement}`);
|
|
45
46
|
return null;
|
|
46
47
|
}
|
|
48
|
+
const allowsAsyncScripts = await (0, _checkFrameAllowsAsyncScripts.checkFrameAllowsAsyncScripts)(iframeElement);
|
|
47
49
|
const nestedIframes = await processFrameRecursive(contentFrame, processedRoots, iframeTimeoutMs);
|
|
48
50
|
return {
|
|
49
51
|
frame: contentFrame,
|
|
50
|
-
nestedIframes
|
|
52
|
+
nestedIframes,
|
|
53
|
+
allowsAsyncScripts
|
|
51
54
|
};
|
|
52
55
|
};
|
|
53
56
|
const iframeNode = await Promise.race([processSingleIframe(i), new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout")), iframeTimeoutMs))]);
|
|
@@ -167,4 +167,47 @@ var _findAllIframes = require("../findAllIframes");
|
|
|
167
167
|
(0, _extendedTest.expect)(node.frame).toBeDefined();
|
|
168
168
|
}
|
|
169
169
|
});
|
|
170
|
+
(0, _extendedTest.test)("should correctly identify sandboxed iframes with allowsAsyncScripts flag", async () => {
|
|
171
|
+
var _sandboxedNode, _normalNode;
|
|
172
|
+
await page.goto(`data:text/html,
|
|
173
|
+
<html>
|
|
174
|
+
<body>
|
|
175
|
+
<h1>Main Content</h1>
|
|
176
|
+
<iframe id="sandboxed-iframe"
|
|
177
|
+
sandbox="allow-same-origin"
|
|
178
|
+
srcdoc="<html><body><h2>Sandboxed Iframe</h2></body></html>"
|
|
179
|
+
width="300"
|
|
180
|
+
height="200">
|
|
181
|
+
</iframe>
|
|
182
|
+
<iframe id="normal-iframe"
|
|
183
|
+
srcdoc="<html><body><h2>Normal Iframe</h2></body></html>"
|
|
184
|
+
width="300"
|
|
185
|
+
height="200">
|
|
186
|
+
</iframe>
|
|
187
|
+
</body>
|
|
188
|
+
</html>`, {
|
|
189
|
+
waitUntil: "domcontentloaded"
|
|
190
|
+
});
|
|
191
|
+
const iframeNodes = await (0, _findAllIframes.findAllIframes)(page);
|
|
192
|
+
(0, _extendedTest.expect)(iframeNodes.length).toBe(2);
|
|
193
|
+
let sandboxedNode;
|
|
194
|
+
let normalNode;
|
|
195
|
+
for (const node of iframeNodes) {
|
|
196
|
+
try {
|
|
197
|
+
const iframeElement = await node.frame.frameElement();
|
|
198
|
+
const iframeId = await iframeElement.getAttribute("id");
|
|
199
|
+
if (iframeId === "sandboxed-iframe") {
|
|
200
|
+
sandboxedNode = node;
|
|
201
|
+
} else if (iframeId === "normal-iframe") {
|
|
202
|
+
normalNode = node;
|
|
203
|
+
}
|
|
204
|
+
} catch (error) {
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
(0, _extendedTest.expect)(sandboxedNode).toBeDefined();
|
|
209
|
+
(0, _extendedTest.expect)((_sandboxedNode = sandboxedNode) === null || _sandboxedNode === void 0 ? void 0 : _sandboxedNode.allowsAsyncScripts).toBe(false);
|
|
210
|
+
(0, _extendedTest.expect)(normalNode).toBeDefined();
|
|
211
|
+
(0, _extendedTest.expect)((_normalNode = normalNode) === null || _normalNode === void 0 ? void 0 : _normalNode.allowsAsyncScripts).toBe(true);
|
|
212
|
+
});
|
|
170
213
|
});
|
|
@@ -66,14 +66,14 @@ async function createUserDirWithPreferences() {
|
|
|
66
66
|
await (0, _promises.writeFile)(preferencesPath, JSON.stringify(preferences));
|
|
67
67
|
return (0, _path.resolve)(userDir);
|
|
68
68
|
}
|
|
69
|
-
|
|
69
|
+
_extendedTest.describe.skip("TestNotInGeneration", () => {
|
|
70
70
|
let context;
|
|
71
71
|
let page;
|
|
72
72
|
(0, _extendedTest.beforeAll)(async () => {
|
|
73
73
|
const dir = await createUserDirWithPreferences();
|
|
74
74
|
process.env.MODE = "";
|
|
75
75
|
context = await _playwrightCore.chromium.launchPersistentContext(dir, {
|
|
76
|
-
headless:
|
|
76
|
+
headless: true,
|
|
77
77
|
args: ["--no-first-run", "--disable-sync", "--disable-translate", "--disable-features=TranslateUI", "--disable-features=NetworkService", "--lang=en", "--disable-blink-features=AutomationControlled"],
|
|
78
78
|
acceptDownloads: true
|
|
79
79
|
});
|
|
@@ -105,7 +105,6 @@ async function createUserDirWithPreferences() {
|
|
|
105
105
|
});
|
|
106
106
|
const path = await downloadedImage.path();
|
|
107
107
|
(0, _extendedTest.expect)(path).toBeDefined();
|
|
108
|
-
(0, _extendedTest.expect)(String(path)).toContain("/var/folders/");
|
|
109
108
|
});
|
|
110
109
|
(0, _extendedTest.test)("should upload image to s3", async () => {
|
|
111
110
|
await page.setContent(content);
|
|
@@ -163,7 +162,7 @@ async function createUserDirWithPreferences() {
|
|
|
163
162
|
process.env.MODE = "generate_code";
|
|
164
163
|
const dir = await createUserDirWithPreferences();
|
|
165
164
|
context = await _playwrightCore.chromium.launchPersistentContext(dir, {
|
|
166
|
-
headless:
|
|
165
|
+
headless: true,
|
|
167
166
|
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
|
|
168
167
|
args: ["--no-first-run", "--disable-sync", "--disable-translate", "--disable-features=TranslateUI", "--disable-features=NetworkService", "--lang=en", "--disable-blink-features=AutomationControlled"],
|
|
169
168
|
acceptDownloads: true
|
|
@@ -81,7 +81,7 @@ var _playwright = require("playwright");
|
|
|
81
81
|
await page.goto("https://mhtml-viewer.com/some/path");
|
|
82
82
|
const result = await (0, _.resolveUrl)({
|
|
83
83
|
url: "/new/path",
|
|
84
|
-
page
|
|
84
|
+
page
|
|
85
85
|
});
|
|
86
86
|
(0, _extendedTest.expect)(result).toBe("https://mhtml-viewer.com/new/path");
|
|
87
87
|
} finally {
|
|
@@ -98,7 +98,7 @@ var _playwright = require("playwright");
|
|
|
98
98
|
const fullUrl = "https://other-site.com/different/path";
|
|
99
99
|
const result = await (0, _.resolveUrl)({
|
|
100
100
|
url: fullUrl,
|
|
101
|
-
page
|
|
101
|
+
page
|
|
102
102
|
});
|
|
103
103
|
(0, _extendedTest.expect)(result).toBe(fullUrl);
|
|
104
104
|
} finally {
|
|
@@ -114,7 +114,7 @@ var _playwright = require("playwright");
|
|
|
114
114
|
await page.goto("https://mhtml-viewer.com/folder/page?param=value#section");
|
|
115
115
|
const result = await (0, _.resolveUrl)({
|
|
116
116
|
url: "/new/path",
|
|
117
|
-
page
|
|
117
|
+
page
|
|
118
118
|
});
|
|
119
119
|
(0, _extendedTest.expect)(result).toBe("https://mhtml-viewer.com/new/path");
|
|
120
120
|
} finally {
|
|
@@ -295,7 +295,7 @@ var _playwright = require("playwright");
|
|
|
295
295
|
const invalidParams = {
|
|
296
296
|
url: "/test",
|
|
297
297
|
baseUrl: "https://example.com",
|
|
298
|
-
page
|
|
298
|
+
page
|
|
299
299
|
};
|
|
300
300
|
await (0, _extendedTest.expect)(async () => {
|
|
301
301
|
await (0, _.resolveUrl)(invalidParams);
|