@intuned/browser-dev 0.1.7-dev.0 → 0.1.9-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -143
- package/dist/ai/export.d.ts +292 -144
- package/dist/ai/extractStructuredDataUsingAi.js +24 -1
- package/dist/ai/index.d.ts +292 -144
- package/dist/ai/tests/testExtractStructuredData.spec.js +2 -2
- package/dist/common/Logger/index.js +2 -2
- package/dist/helpers/export.d.ts +703 -577
- package/dist/helpers/gotoUrl.js +50 -51
- package/dist/helpers/index.d.ts +703 -577
- package/dist/helpers/tests/testClickUntilExhausted.spec.js +2 -1
- package/dist/helpers/withNetworkSettledWait.js +2 -7
- package/dist/optimized-extractors/export.d.ts +17 -18
- package/dist/optimized-extractors/index.d.ts +17 -18
- package/how-to-generate-docs.md +40 -28
- package/package.json +2 -3
- package/generated-docs/ai/functions/extractStructuredData.mdx +0 -255
- package/generated-docs/ai/functions/isPageLoaded.mdx +0 -89
- package/generated-docs/ai/interfaces/ArraySchema.mdx +0 -36
- package/generated-docs/ai/interfaces/BasicSchema.mdx +0 -14
- package/generated-docs/ai/interfaces/BooleanSchema.mdx +0 -28
- package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +0 -16
- package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +0 -16
- package/generated-docs/ai/interfaces/NumberSchema.mdx +0 -35
- package/generated-docs/ai/interfaces/ObjectSchema.mdx +0 -39
- package/generated-docs/ai/interfaces/StringSchema.mdx +0 -35
- package/generated-docs/ai/interfaces/TextContentItem.mdx +0 -14
- package/generated-docs/ai/type-aliases/ContentItem.mdx +0 -12
- package/generated-docs/ai/type-aliases/JsonSchema.mdx +0 -47
- package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +0 -85
- package/generated-docs/helpers/functions/clickButtonAndWait.mdx +0 -63
- package/generated-docs/helpers/functions/clickUntilExhausted.mdx +0 -112
- package/generated-docs/helpers/functions/downloadFile.mdx +0 -99
- package/generated-docs/helpers/functions/extractMarkdown.mdx +0 -56
- package/generated-docs/helpers/functions/filterEmptyValues.mdx +0 -51
- package/generated-docs/helpers/functions/goToUrl.mdx +0 -124
- package/generated-docs/helpers/functions/processDate.mdx +0 -55
- package/generated-docs/helpers/functions/resolveUrl.mdx +0 -165
- package/generated-docs/helpers/functions/sanitizeHtml.mdx +0 -113
- package/generated-docs/helpers/functions/saveFileToS3.mdx +0 -127
- package/generated-docs/helpers/functions/scrollToLoadContent.mdx +0 -83
- package/generated-docs/helpers/functions/uploadFileToS3.mdx +0 -121
- package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +0 -90
- package/generated-docs/helpers/functions/waitForDomSettled.mdx +0 -91
- package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +0 -76
- package/generated-docs/helpers/interfaces/Attachment.mdx +0 -56
- package/generated-docs/helpers/interfaces/S3Configs.mdx +0 -52
- package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +0 -22
- package/generated-docs/helpers/type-aliases/AttachmentType.mdx +0 -10
- package/generated-docs/helpers/type-aliases/FileType.mdx +0 -61
- package/generated-docs/helpers/type-aliases/Trigger.mdx +0 -62
package/dist/helpers/gotoUrl.js
CHANGED
|
@@ -9,7 +9,7 @@ var _asyncRetry = _interopRequireDefault(require("async-retry"));
|
|
|
9
9
|
var _playwright = require("playwright");
|
|
10
10
|
var _isPageLoaded = require("../ai/isPageLoaded");
|
|
11
11
|
var _Logger = require("../common/Logger");
|
|
12
|
-
var
|
|
12
|
+
var _withNetworkSettledWait = require("./withNetworkSettledWait");
|
|
13
13
|
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
14
14
|
const DEFAULT_PLAYWRIGHT_TIMEOUT = 30000;
|
|
15
15
|
const TIMEOUT_PADDING = 3000;
|
|
@@ -17,7 +17,7 @@ const goToUrl = async input => {
|
|
|
17
17
|
const {
|
|
18
18
|
page,
|
|
19
19
|
url,
|
|
20
|
-
throwOnTimeout =
|
|
20
|
+
throwOnTimeout = false,
|
|
21
21
|
waitForLoadingStateUsingAi = false,
|
|
22
22
|
retries = 3,
|
|
23
23
|
model = "gpt-5-mini-2025-08-07",
|
|
@@ -28,63 +28,62 @@ const goToUrl = async input => {
|
|
|
28
28
|
const timeoutInMs = getPageGotoTimeout(page, {
|
|
29
29
|
timeoutInMs: input.timeoutInMs
|
|
30
30
|
});
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
31
|
+
return await (0, _withNetworkSettledWait.withNetworkSettledWait)(async () => {
|
|
32
|
+
let responseOrTimeout;
|
|
33
|
+
try {
|
|
34
|
+
await (0, _asyncRetry.default)(async () => {
|
|
35
|
+
const promises = [page.goto(url, {
|
|
36
|
+
waitUntil: waitForLoadState,
|
|
37
|
+
timeout: timeoutInMs !== undefined ? timeoutInMs : undefined
|
|
38
|
+
})];
|
|
39
|
+
if (timeoutInMs !== undefined) {
|
|
40
|
+
promises.push((0, _promises.setTimeout)(timeoutInMs + TIMEOUT_PADDING, timeoutSymbol));
|
|
41
|
+
}
|
|
42
|
+
responseOrTimeout = await Promise.race(promises);
|
|
43
|
+
if (responseOrTimeout === timeoutSymbol) {
|
|
44
|
+
throw new _playwright.errors.TimeoutError("Page.goto timed out but did not throw an error. Consider using a proxy.\n" + `(URL: ${url}, timeout: ${timeoutInMs}ms)`);
|
|
45
|
+
}
|
|
46
|
+
}, {
|
|
47
|
+
retries,
|
|
48
|
+
factor: 2,
|
|
49
|
+
minTimeout: 1000
|
|
50
|
+
});
|
|
51
|
+
} catch (error) {
|
|
52
|
+
if (!throwOnTimeout) {
|
|
53
|
+
return;
|
|
44
54
|
}
|
|
55
|
+
throw error;
|
|
56
|
+
}
|
|
57
|
+
if (!waitForLoadingStateUsingAi) {
|
|
58
|
+
return responseOrTimeout;
|
|
59
|
+
}
|
|
60
|
+
for (let i = 0; i < retries; i++) {
|
|
61
|
+
let isLoaded = false;
|
|
45
62
|
try {
|
|
46
|
-
await
|
|
47
|
-
|
|
63
|
+
isLoaded = await (0, _isPageLoaded.isPageLoaded)({
|
|
64
|
+
page,
|
|
65
|
+
timeoutInMs,
|
|
66
|
+
model,
|
|
67
|
+
apiKey: apiKey ? apiKey : undefined
|
|
48
68
|
});
|
|
69
|
+
if (isLoaded === true) {
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
49
72
|
} catch (error) {
|
|
50
|
-
_Logger.logger.
|
|
73
|
+
_Logger.logger.debug(`Failed to check if page is loaded: ${url}. Error: ${error}`);
|
|
74
|
+
isLoaded = false;
|
|
51
75
|
}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
factor: 2,
|
|
55
|
-
minTimeout: 1000
|
|
56
|
-
});
|
|
57
|
-
} catch (error) {
|
|
58
|
-
if (!throwOnTimeout) {
|
|
59
|
-
return;
|
|
60
|
-
}
|
|
61
|
-
throw error;
|
|
62
|
-
}
|
|
63
|
-
if (!waitForLoadingStateUsingAi && !(0, _utils.isGenerateCodeMode)()) {
|
|
64
|
-
return responseOrTimeout;
|
|
65
|
-
}
|
|
66
|
-
for (let i = 0; i < retries; i++) {
|
|
67
|
-
try {
|
|
68
|
-
const isLoaded = await (0, _isPageLoaded.isPageLoaded)({
|
|
69
|
-
page,
|
|
70
|
-
timeoutInMs,
|
|
71
|
-
model,
|
|
72
|
-
apiKey: apiKey ? apiKey : undefined
|
|
73
|
-
});
|
|
74
|
-
if (isLoaded === true) {
|
|
76
|
+
if (i === retries - 1) {
|
|
77
|
+
_Logger.logger.warn("Page never loaded, url: " + url);
|
|
75
78
|
return;
|
|
76
79
|
}
|
|
77
|
-
|
|
78
|
-
_Logger.logger.error(`Error in AI page load detection for URL: ${url}. Error: ${error}`);
|
|
79
|
-
_Logger.logger.warn("AI page load detection failed. Continuing without AI verification.");
|
|
80
|
-
return;
|
|
80
|
+
await (0, _promises.setTimeout)(5000);
|
|
81
81
|
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
}
|
|
82
|
+
}, {
|
|
83
|
+
page,
|
|
84
|
+
maxInflightRequests: 0,
|
|
85
|
+
timeoutInMs: 30000
|
|
86
|
+
});
|
|
88
87
|
};
|
|
89
88
|
exports.goToUrl = goToUrl;
|
|
90
89
|
function getPageGotoTimeout(page, options) {
|