@intuned/browser-dev 0.1.9-dev.0 → 0.1.10-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/extractStructuredData.js +21 -27
- package/dist/ai/tests/testCreateMatchesMapping.spec.js +216 -0
- package/dist/ai/tests/testExtractStructuredData.spec.js +346 -0
- package/dist/ai/tests/testExtractStructuredDataDomMatchingIframes.spec.js +459 -0
- package/dist/ai/tests/testExtractStructuredDataUnit.spec.js +375 -0
- package/dist/ai/tests/testMatching.spec.js +342 -0
- package/dist/ai/tests/testValidateMatchesMapping.spec.js +265 -0
- package/dist/common/extendedTest.js +38 -30
- package/dist/common/frame_utils/frameTree.js +116 -0
- package/dist/common/frame_utils/getContentWithNestedIframes.js +13 -0
- package/dist/common/frame_utils/index.js +95 -0
- package/dist/common/frame_utils/stitchIframe.js +105 -0
- package/dist/{helpers → common}/frame_utils/tests/testFindAllIframes.spec.js +24 -15
- package/dist/common/frame_utils/tests/testGetContentWithNestedIframes.spec.js +241 -0
- package/dist/common/frame_utils/utils.js +91 -0
- package/dist/common/getSimplifiedHtml.js +20 -20
- package/dist/common/matching/matching.js +91 -16
- package/dist/common/tests/matching.test.js +225 -0
- package/dist/common/tests/testGetSimplifiedHtml.spec.js +324 -0
- package/dist/helpers/extractMarkdown.js +16 -7
- package/dist/helpers/tests/testExtractMarkdown.spec.js +29 -0
- package/dist/helpers/waitForDomSettled.js +4 -4
- package/dist/types/intuned-runtime.d.ts +6 -32
- package/package.json +1 -1
- package/dist/helpers/frame_utils/constants.js +0 -8
- package/dist/helpers/frame_utils/findAllIframes.js +0 -82
- package/dist/helpers/frame_utils/index.js +0 -44
- /package/dist/{helpers → common}/frame_utils/checkFrameAllowsAsyncScripts.js +0 -0
- /package/dist/{helpers → common}/frame_utils/getContainerFrame.js +0 -0
|
@@ -47,26 +47,26 @@ Object.defineProperty(exports, "vi", {
|
|
|
47
47
|
return _vitest.vi;
|
|
48
48
|
}
|
|
49
49
|
});
|
|
50
|
+
var _playwright = require("playwright");
|
|
50
51
|
var dotenv = _interopRequireWildcard(require("dotenv"));
|
|
51
52
|
var _vitest = require("vitest");
|
|
52
53
|
var _nanoid = require("nanoid");
|
|
53
54
|
var _extractionHelpers = require("./extractionHelpers");
|
|
55
|
+
var _path = _interopRequireDefault(require("path"));
|
|
54
56
|
var _script = require("./script");
|
|
57
|
+
var _os = _interopRequireDefault(require("os"));
|
|
58
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
55
59
|
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
56
60
|
dotenv.config();
|
|
57
61
|
const loadRuntime = async () => {
|
|
58
62
|
try {
|
|
59
63
|
const runtime = await Promise.resolve().then(() => _interopRequireWildcard(require("@intuned/runtime")));
|
|
60
64
|
return {
|
|
61
|
-
|
|
62
|
-
runWithContext: runtime.runWithContext,
|
|
63
|
-
getDownloadDirectoryPath: runtime.getDownloadDirectoryPath
|
|
65
|
+
runWithContext: runtime.runWithContext
|
|
64
66
|
};
|
|
65
67
|
} catch {
|
|
66
68
|
return {
|
|
67
|
-
|
|
68
|
-
runWithContext: null,
|
|
69
|
-
getDownloadDirectoryPath: null
|
|
69
|
+
runWithContext: null
|
|
70
70
|
};
|
|
71
71
|
}
|
|
72
72
|
};
|
|
@@ -80,49 +80,57 @@ const withPlaywright = (name, testFn, {
|
|
|
80
80
|
}) => {
|
|
81
81
|
return async () => {
|
|
82
82
|
const runtime = await loadRuntime();
|
|
83
|
-
if (!runtime.runWithContext
|
|
83
|
+
if (!runtime.runWithContext) {
|
|
84
84
|
throw new Error("@intuned/runtime is required for running tests. Please install it as a dependency.");
|
|
85
85
|
}
|
|
86
|
+
const downloadsPath = _path.default.join(_os.default.tmpdir(), `downloads-${(0, _nanoid.nanoid)()}`);
|
|
86
87
|
return runtime.runWithContext({
|
|
87
88
|
runId: runId ?? (0, _nanoid.nanoid)(),
|
|
88
89
|
extendedPayloads: [],
|
|
89
90
|
runEnvironment: "IDE",
|
|
90
91
|
jobId,
|
|
91
92
|
jobRunId,
|
|
92
|
-
queueId
|
|
93
|
+
queueId,
|
|
94
|
+
headless: headless ?? true,
|
|
95
|
+
downloadsPath
|
|
93
96
|
}, async () => {
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
console.error("Error reading browser scripts:", error);
|
|
109
|
-
}
|
|
110
|
-
if (trace) {
|
|
111
|
-
await context.tracing.start({
|
|
112
|
-
screenshots: true
|
|
113
|
-
});
|
|
97
|
+
const browser = await _playwright.chromium.launch({
|
|
98
|
+
headless: headless ?? true
|
|
99
|
+
});
|
|
100
|
+
const context = await browser.newContext({
|
|
101
|
+
acceptDownloads: true
|
|
102
|
+
});
|
|
103
|
+
const page = await context.newPage();
|
|
104
|
+
context.setDefaultTimeout(10000);
|
|
105
|
+
try {
|
|
106
|
+
await context.addInitScript({
|
|
107
|
+
content: _script.BROWSER_SCRIPT
|
|
108
|
+
});
|
|
109
|
+
for (const p of context.pages()) {
|
|
110
|
+
await p.evaluate(_script.BROWSER_SCRIPT);
|
|
114
111
|
}
|
|
112
|
+
} catch (error) {
|
|
113
|
+
console.error("Error reading browser scripts:", error);
|
|
114
|
+
}
|
|
115
|
+
if (trace) {
|
|
116
|
+
await context.tracing.start({
|
|
117
|
+
screenshots: true
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
try {
|
|
115
121
|
await testFn({
|
|
116
122
|
context: context,
|
|
117
123
|
page
|
|
118
124
|
});
|
|
125
|
+
} finally {
|
|
119
126
|
if (trace) {
|
|
120
127
|
await context.tracing.stop({
|
|
121
128
|
path: `./reports/traces/${name}.zip`
|
|
122
129
|
});
|
|
123
130
|
}
|
|
124
|
-
|
|
125
|
-
|
|
131
|
+
await context.close();
|
|
132
|
+
await browser.close();
|
|
133
|
+
}
|
|
126
134
|
});
|
|
127
135
|
};
|
|
128
136
|
};
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.FrameTree = exports.FrameNode = void 0;
|
|
7
|
+
var _Logger = require("../Logger");
|
|
8
|
+
var _checkFrameAllowsAsyncScripts = require("./checkFrameAllowsAsyncScripts");
|
|
9
|
+
var _utils = require("./utils");
|
|
10
|
+
class FrameNode {
|
|
11
|
+
constructor(frame, frameElement, allowsAsyncScripts, nestedIframes) {
|
|
12
|
+
this.frame = frame;
|
|
13
|
+
this.frameElement = frameElement;
|
|
14
|
+
this.allowsAsyncScripts = allowsAsyncScripts;
|
|
15
|
+
this._nestedIframes = nestedIframes;
|
|
16
|
+
}
|
|
17
|
+
*nestedIframes() {
|
|
18
|
+
for (const node of this._nestedIframes) {
|
|
19
|
+
if (node.frame.isDetached()) {
|
|
20
|
+
_Logger.logger.warn(`Detached frame skipped: ${node.frame}`);
|
|
21
|
+
continue;
|
|
22
|
+
}
|
|
23
|
+
yield node;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
exports.FrameNode = FrameNode;
|
|
28
|
+
class FrameTree {
|
|
29
|
+
constructor(root, tree) {
|
|
30
|
+
this.root = root;
|
|
31
|
+
this._tree = tree;
|
|
32
|
+
}
|
|
33
|
+
*tree() {
|
|
34
|
+
for (const node of this._tree) {
|
|
35
|
+
if (node.frame.isDetached()) {
|
|
36
|
+
_Logger.logger.warn(`Detached frame skipped: ${node.frame}`);
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
yield node;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
*nodes() {
|
|
43
|
+
for (const node of this.tree()) {
|
|
44
|
+
yield node;
|
|
45
|
+
yield* this._traverseNested(node);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
*_traverseNested(node) {
|
|
49
|
+
for (const child of node.nestedIframes()) {
|
|
50
|
+
yield child;
|
|
51
|
+
yield* this._traverseNested(child);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
static async fromRoot(root, iframeTimeout = 10.0, skipUseless = true) {
|
|
55
|
+
const processed = new Set();
|
|
56
|
+
const tree = await processFrameRecursive(root, processed, iframeTimeout, skipUseless);
|
|
57
|
+
return new FrameTree(root, tree);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
exports.FrameTree = FrameTree;
|
|
61
|
+
async function processFrameRecursive(root, processedFrames, iframeTimeout, skipUseless) {
|
|
62
|
+
const isFrame = "frameElement" in root && typeof root.frameElement === "function";
|
|
63
|
+
if (isFrame && processedFrames.has(root)) {
|
|
64
|
+
return [];
|
|
65
|
+
}
|
|
66
|
+
if (isFrame) {
|
|
67
|
+
processedFrames.add(root);
|
|
68
|
+
}
|
|
69
|
+
const iframeNodes = [];
|
|
70
|
+
try {
|
|
71
|
+
let iframeHandles;
|
|
72
|
+
try {
|
|
73
|
+
iframeHandles = await Promise.race([(0, _utils.findTopLevelIframeElements)(root, skipUseless), new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout")), iframeTimeout * 1000))]);
|
|
74
|
+
} catch (error) {
|
|
75
|
+
_Logger.logger.error("Timeout finding iframe elements, skipping");
|
|
76
|
+
return [];
|
|
77
|
+
}
|
|
78
|
+
if (!iframeHandles || iframeHandles.length === 0) {
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
_Logger.logger.debug(`Found ${iframeHandles.length} iframe elements to process`);
|
|
82
|
+
for (const iframeHandle of iframeHandles) {
|
|
83
|
+
try {
|
|
84
|
+
const iframeNode = await Promise.race([processSingleIframe(iframeHandle, processedFrames, iframeTimeout, skipUseless), new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout")), iframeTimeout * 1000))]);
|
|
85
|
+
if (iframeNode !== null) {
|
|
86
|
+
iframeNodes.push(iframeNode);
|
|
87
|
+
}
|
|
88
|
+
} catch (error) {
|
|
89
|
+
_Logger.logger.error("Timeout processing iframe, skipping");
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
} catch (error) {
|
|
94
|
+
_Logger.logger.error(`Error processing frames in context: ${error}`);
|
|
95
|
+
}
|
|
96
|
+
return iframeNodes;
|
|
97
|
+
}
|
|
98
|
+
async function processSingleIframe(iframeHandle, processedFrames, iframeTimeout, skipUseless) {
|
|
99
|
+
let contentFrame;
|
|
100
|
+
try {
|
|
101
|
+
contentFrame = await Promise.race([iframeHandle.contentFrame(), new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout accessing contentFrame")), iframeTimeout * 1000))]);
|
|
102
|
+
} catch (error) {
|
|
103
|
+
_Logger.logger.error("Timeout or error accessing content_frame for iframe, skipping");
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
if (!contentFrame) {
|
|
107
|
+
_Logger.logger.error("Could not access content_frame for iframe");
|
|
108
|
+
return null;
|
|
109
|
+
}
|
|
110
|
+
if (processedFrames.has(contentFrame)) {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
const allowsAsyncScripts = await (0, _checkFrameAllowsAsyncScripts.checkFrameAllowsAsyncScripts)(iframeHandle);
|
|
114
|
+
const nestedIframes = await processFrameRecursive(contentFrame, processedFrames, iframeTimeout, skipUseless);
|
|
115
|
+
return new FrameNode(contentFrame, iframeHandle, allowsAsyncScripts, nestedIframes);
|
|
116
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.getContentWithNestedIframes = getContentWithNestedIframes;
|
|
7
|
+
var _frameTree = require("./frameTree");
|
|
8
|
+
var _stitchIframe = require("./stitchIframe");
|
|
9
|
+
async function getContentWithNestedIframes(root, iframeTimeout = 10.0, htmlContentExtractor) {
|
|
10
|
+
const iframeTree = await _frameTree.FrameTree.fromRoot(root, iframeTimeout, true);
|
|
11
|
+
const soup = await (0, _stitchIframe.stitchIframeContents)(iframeTree, htmlContentExtractor);
|
|
12
|
+
return soup.toString();
|
|
13
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
Object.defineProperty(exports, "ALL_IFRAMES_CSS_SELECTOR", {
|
|
7
|
+
enumerable: true,
|
|
8
|
+
get: function () {
|
|
9
|
+
return _utils.ALL_IFRAMES_CSS_SELECTOR;
|
|
10
|
+
}
|
|
11
|
+
});
|
|
12
|
+
Object.defineProperty(exports, "FrameNode", {
|
|
13
|
+
enumerable: true,
|
|
14
|
+
get: function () {
|
|
15
|
+
return _frameTree.FrameNode;
|
|
16
|
+
}
|
|
17
|
+
});
|
|
18
|
+
Object.defineProperty(exports, "FrameTree", {
|
|
19
|
+
enumerable: true,
|
|
20
|
+
get: function () {
|
|
21
|
+
return _frameTree.FrameTree;
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
Object.defineProperty(exports, "HtmlContentExtractor", {
|
|
25
|
+
enumerable: true,
|
|
26
|
+
get: function () {
|
|
27
|
+
return _getContentWithNestedIframes.HtmlContentExtractor;
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
Object.defineProperty(exports, "IFRAME_CAPTCHA_SRC_PATTERNS", {
|
|
31
|
+
enumerable: true,
|
|
32
|
+
get: function () {
|
|
33
|
+
return _utils.IFRAME_CAPTCHA_SRC_PATTERNS;
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
Object.defineProperty(exports, "IFRAME_PIXEL_SRC_DOMAINS", {
|
|
37
|
+
enumerable: true,
|
|
38
|
+
get: function () {
|
|
39
|
+
return _utils.IFRAME_PIXEL_SRC_DOMAINS;
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
Object.defineProperty(exports, "IFRAME_REPLACEMENT_TAG", {
|
|
43
|
+
enumerable: true,
|
|
44
|
+
get: function () {
|
|
45
|
+
return _utils.IFRAME_REPLACEMENT_TAG;
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
Object.defineProperty(exports, "IFRAME_SRC_ATTRS", {
|
|
49
|
+
enumerable: true,
|
|
50
|
+
get: function () {
|
|
51
|
+
return _utils.IFRAME_SRC_ATTRS;
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
Object.defineProperty(exports, "IFRAME_TAGS", {
|
|
55
|
+
enumerable: true,
|
|
56
|
+
get: function () {
|
|
57
|
+
return _utils.IFRAME_TAGS;
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
Object.defineProperty(exports, "checkFrameAllowsAsyncScripts", {
|
|
61
|
+
enumerable: true,
|
|
62
|
+
get: function () {
|
|
63
|
+
return _checkFrameAllowsAsyncScripts.checkFrameAllowsAsyncScripts;
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
Object.defineProperty(exports, "findTopLevelIframeElements", {
|
|
67
|
+
enumerable: true,
|
|
68
|
+
get: function () {
|
|
69
|
+
return _utils.findTopLevelIframeElements;
|
|
70
|
+
}
|
|
71
|
+
});
|
|
72
|
+
Object.defineProperty(exports, "getContainerFrame", {
|
|
73
|
+
enumerable: true,
|
|
74
|
+
get: function () {
|
|
75
|
+
return _getContainerFrame.getContainerFrame;
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
Object.defineProperty(exports, "getContentWithNestedIframes", {
|
|
79
|
+
enumerable: true,
|
|
80
|
+
get: function () {
|
|
81
|
+
return _getContentWithNestedIframes.getContentWithNestedIframes;
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
Object.defineProperty(exports, "stitchIframeContents", {
|
|
85
|
+
enumerable: true,
|
|
86
|
+
get: function () {
|
|
87
|
+
return _stitchIframe.stitchIframeContents;
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
var _utils = require("./utils");
|
|
91
|
+
var _frameTree = require("./frameTree");
|
|
92
|
+
var _stitchIframe = require("./stitchIframe");
|
|
93
|
+
var _getContentWithNestedIframes = require("./getContentWithNestedIframes");
|
|
94
|
+
var _getContainerFrame = require("./getContainerFrame");
|
|
95
|
+
var _checkFrameAllowsAsyncScripts = require("./checkFrameAllowsAsyncScripts");
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.stitchIframeContents = stitchIframeContents;
|
|
7
|
+
var _nodeHtmlParser = require("node-html-parser");
|
|
8
|
+
var _uuid = require("uuid");
|
|
9
|
+
var _Logger = require("../Logger");
|
|
10
|
+
var _utils = require("./utils");
|
|
11
|
+
async function stitchIframeContents(iframeTree, extractHtml) {
|
|
12
|
+
const defaultExtractHtml = async root => {
|
|
13
|
+
if ("content" in root && typeof root.content === "function") {
|
|
14
|
+
return await root.content();
|
|
15
|
+
} else {
|
|
16
|
+
return await root.evaluate(el => el.outerHTML);
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
const extract = extractHtml || defaultExtractHtml;
|
|
20
|
+
const frameToMarker = new Map();
|
|
21
|
+
for (const node of iframeTree.nodes()) {
|
|
22
|
+
try {
|
|
23
|
+
const markerId = `iframe-${(0, _uuid.v4)().substring(0, 8)}`;
|
|
24
|
+
await node.frameElement.evaluate((el, marker) => el.setAttribute("data-iframe-marker", marker), markerId);
|
|
25
|
+
frameToMarker.set(node.frame, markerId);
|
|
26
|
+
} catch (error) {
|
|
27
|
+
_Logger.logger.warn(`Error adding marker to iframe: ${error}`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
try {
|
|
31
|
+
const root = iframeTree.root;
|
|
32
|
+
const rootContent = await extract(root);
|
|
33
|
+
const frameContents = new Map();
|
|
34
|
+
for (const node of iframeTree.nodes()) {
|
|
35
|
+
try {
|
|
36
|
+
frameContents.set(node.frame, await extract(node.frame));
|
|
37
|
+
} catch (error) {
|
|
38
|
+
_Logger.logger.warn(`Error extracting content from frame: ${error}`);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
const mainSoup = (0, _nodeHtmlParser.parse)(rootContent);
|
|
42
|
+
for (const iframeNode of iframeTree.tree()) {
|
|
43
|
+
stitchIframeRecursive(mainSoup, iframeNode, frameContents, frameToMarker);
|
|
44
|
+
}
|
|
45
|
+
return mainSoup;
|
|
46
|
+
} finally {
|
|
47
|
+
for (const node of iframeTree.nodes()) {
|
|
48
|
+
try {
|
|
49
|
+
await node.frameElement.evaluate(el => el.removeAttribute("data-iframe-marker"));
|
|
50
|
+
} catch (error) {
|
|
51
|
+
_Logger.logger.warn(`Error removing marker from iframe: ${error}`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
function stitchIframeRecursive(parentSoup, iframeNode, frameContents, frameToMarker) {
|
|
57
|
+
const content = frameContents.get(iframeNode.frame);
|
|
58
|
+
if (content === undefined) {
|
|
59
|
+
_Logger.logger.warn(`No content found for frame ${iframeNode.frame}`);
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
const markerId = frameToMarker.get(iframeNode.frame);
|
|
63
|
+
if (!markerId) {
|
|
64
|
+
_Logger.logger.warn(`No marker found for frame ${iframeNode.frame}`);
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
let iframeElement = null;
|
|
68
|
+
for (const tag of _utils.IFRAME_TAGS) {
|
|
69
|
+
const element = parentSoup.querySelector(`${tag}[data-iframe-marker="${markerId}"]`);
|
|
70
|
+
if (element) {
|
|
71
|
+
iframeElement = element;
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (!iframeElement) {
|
|
76
|
+
_Logger.logger.warn(`Could not find iframe element with marker ${markerId} in soup`);
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
const frameSoup = (0, _nodeHtmlParser.parse)(content);
|
|
80
|
+
const htmlElement = frameSoup.querySelector("html");
|
|
81
|
+
if (!htmlElement) {
|
|
82
|
+
_Logger.logger.warn(`No html element found in iframe ${markerId}`);
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
for (const nestedNode of iframeNode.nestedIframes()) {
|
|
86
|
+
stitchIframeRecursive(frameSoup, nestedNode, frameContents, frameToMarker);
|
|
87
|
+
}
|
|
88
|
+
if (iframeElement.hasAttribute("data-iframe-marker")) {
|
|
89
|
+
iframeElement.removeAttribute("data-iframe-marker");
|
|
90
|
+
}
|
|
91
|
+
const bodyElement = frameSoup.querySelector("body");
|
|
92
|
+
iframeElement.innerHTML = "";
|
|
93
|
+
if (bodyElement) {
|
|
94
|
+
const children = Array.from(bodyElement.childNodes);
|
|
95
|
+
for (const child of children) {
|
|
96
|
+
iframeElement.appendChild(child);
|
|
97
|
+
}
|
|
98
|
+
} else {
|
|
99
|
+
iframeElement.appendChild(htmlElement);
|
|
100
|
+
}
|
|
101
|
+
const originalTagName = iframeElement.rawTagName.toLowerCase();
|
|
102
|
+
if (originalTagName in _utils.IFRAME_REPLACEMENT_TAG) {
|
|
103
|
+
iframeElement.rawTagName = _utils.IFRAME_REPLACEMENT_TAG[originalTagName];
|
|
104
|
+
}
|
|
105
|
+
}
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
var _extendedTest = require("../../../common/extendedTest");
|
|
4
4
|
var _playwright = require("playwright");
|
|
5
|
-
var
|
|
6
|
-
(0, _extendedTest.describe)("Test
|
|
5
|
+
var _frameTree = require("../frameTree");
|
|
6
|
+
(0, _extendedTest.describe)("Test FrameTree", () => {
|
|
7
7
|
let browser;
|
|
8
8
|
let page;
|
|
9
9
|
(0, _extendedTest.beforeAll)(async () => {
|
|
@@ -39,12 +39,13 @@ var _findAllIframes = require("../findAllIframes");
|
|
|
39
39
|
</html>`, {
|
|
40
40
|
waitUntil: "domcontentloaded"
|
|
41
41
|
});
|
|
42
|
-
const
|
|
42
|
+
const iframeTree = await _frameTree.FrameTree.fromRoot(page);
|
|
43
|
+
const iframeNodes = Array.from(iframeTree.tree());
|
|
43
44
|
(0, _extendedTest.expect)(iframeNodes.length).toBe(2);
|
|
44
45
|
(0, _extendedTest.expect)(iframeNodes[0].frame).toBeDefined();
|
|
45
46
|
(0, _extendedTest.expect)(iframeNodes[1].frame).toBeDefined();
|
|
46
|
-
(0, _extendedTest.expect)(iframeNodes[0].nestedIframes.length).toBe(0);
|
|
47
|
-
(0, _extendedTest.expect)(iframeNodes[1].nestedIframes.length).toBe(0);
|
|
47
|
+
(0, _extendedTest.expect)(Array.from(iframeNodes[0].nestedIframes()).length).toBe(0);
|
|
48
|
+
(0, _extendedTest.expect)(Array.from(iframeNodes[1].nestedIframes()).length).toBe(0);
|
|
48
49
|
});
|
|
49
50
|
(0, _extendedTest.test)("should find nested iframes", async () => {
|
|
50
51
|
await page.goto(`data:text/html,
|
|
@@ -60,10 +61,12 @@ var _findAllIframes = require("../findAllIframes");
|
|
|
60
61
|
</html>`, {
|
|
61
62
|
waitUntil: "domcontentloaded"
|
|
62
63
|
});
|
|
63
|
-
const
|
|
64
|
+
const iframeTree = await _frameTree.FrameTree.fromRoot(page);
|
|
65
|
+
const iframeNodes = Array.from(iframeTree.tree());
|
|
64
66
|
(0, _extendedTest.expect)(iframeNodes.length).toBe(1);
|
|
65
|
-
|
|
66
|
-
(0, _extendedTest.expect)(
|
|
67
|
+
const nestedIframes = Array.from(iframeNodes[0].nestedIframes());
|
|
68
|
+
(0, _extendedTest.expect)(nestedIframes.length).toBe(1);
|
|
69
|
+
(0, _extendedTest.expect)(Array.from(nestedIframes[0].nestedIframes()).length).toBe(0);
|
|
67
70
|
});
|
|
68
71
|
(0, _extendedTest.test)("should handle page with no iframes", async () => {
|
|
69
72
|
await page.goto(`data:text/html,
|
|
@@ -75,7 +78,8 @@ var _findAllIframes = require("../findAllIframes");
|
|
|
75
78
|
</html>`, {
|
|
76
79
|
waitUntil: "domcontentloaded"
|
|
77
80
|
});
|
|
78
|
-
const
|
|
81
|
+
const iframeTree = await _frameTree.FrameTree.fromRoot(page);
|
|
82
|
+
const iframeNodes = Array.from(iframeTree.tree());
|
|
79
83
|
(0, _extendedTest.expect)(iframeNodes.length).toBe(0);
|
|
80
84
|
});
|
|
81
85
|
(0, _extendedTest.test)("should handle problematic iframe sources", async () => {
|
|
@@ -104,7 +108,8 @@ var _findAllIframes = require("../findAllIframes");
|
|
|
104
108
|
timeout: 5000
|
|
105
109
|
});
|
|
106
110
|
const startTime = Date.now();
|
|
107
|
-
const
|
|
111
|
+
const iframeTree = await _frameTree.FrameTree.fromRoot(page, 2.0);
|
|
112
|
+
const iframeNodes = Array.from(iframeTree.tree());
|
|
108
113
|
const elapsedTime = Date.now() - startTime;
|
|
109
114
|
(0, _extendedTest.expect)(elapsedTime).toBeLessThan(10000);
|
|
110
115
|
(0, _extendedTest.expect)(iframeNodes.length).toBeLessThan(3);
|
|
@@ -123,7 +128,8 @@ var _findAllIframes = require("../findAllIframes");
|
|
|
123
128
|
</html>`, {
|
|
124
129
|
waitUntil: "domcontentloaded"
|
|
125
130
|
});
|
|
126
|
-
const
|
|
131
|
+
const iframeTree = await _frameTree.FrameTree.fromRoot(page);
|
|
132
|
+
const iframeNodes = Array.from(iframeTree.tree());
|
|
127
133
|
(0, _extendedTest.expect)(iframeNodes.length).toBe(1);
|
|
128
134
|
});
|
|
129
135
|
(0, _extendedTest.test)("should handle legacy frame elements", async () => {
|
|
@@ -141,13 +147,14 @@ var _findAllIframes = require("../findAllIframes");
|
|
|
141
147
|
</html>`, {
|
|
142
148
|
waitUntil: "domcontentloaded"
|
|
143
149
|
});
|
|
144
|
-
const
|
|
150
|
+
const iframeTree = await _frameTree.FrameTree.fromRoot(page);
|
|
151
|
+
const iframeNodes = Array.from(iframeTree.tree());
|
|
145
152
|
(0, _extendedTest.expect)(iframeNodes.length).toBeGreaterThanOrEqual(0);
|
|
146
153
|
for (const node of iframeNodes) {
|
|
147
154
|
(0, _extendedTest.expect)(node.frame).toBeDefined();
|
|
148
155
|
}
|
|
149
156
|
});
|
|
150
|
-
(0, _extendedTest.test)("
|
|
157
|
+
(0, _extendedTest.test)("nodes() should return flat list", async () => {
|
|
151
158
|
await page.goto(`data:text/html,
|
|
152
159
|
<html>
|
|
153
160
|
<body>
|
|
@@ -161,7 +168,8 @@ var _findAllIframes = require("../findAllIframes");
|
|
|
161
168
|
</html>`, {
|
|
162
169
|
waitUntil: "domcontentloaded"
|
|
163
170
|
});
|
|
164
|
-
const
|
|
171
|
+
const iframeTree = await _frameTree.FrameTree.fromRoot(page);
|
|
172
|
+
const iframeList = Array.from(iframeTree.nodes());
|
|
165
173
|
(0, _extendedTest.expect)(iframeList.length).toBe(2);
|
|
166
174
|
for (const node of iframeList) {
|
|
167
175
|
(0, _extendedTest.expect)(node.frame).toBeDefined();
|
|
@@ -188,7 +196,8 @@ var _findAllIframes = require("../findAllIframes");
|
|
|
188
196
|
</html>`, {
|
|
189
197
|
waitUntil: "domcontentloaded"
|
|
190
198
|
});
|
|
191
|
-
const
|
|
199
|
+
const iframeTree = await _frameTree.FrameTree.fromRoot(page);
|
|
200
|
+
const iframeNodes = Array.from(iframeTree.tree());
|
|
192
201
|
(0, _extendedTest.expect)(iframeNodes.length).toBe(2);
|
|
193
202
|
let sandboxedNode;
|
|
194
203
|
let normalNode;
|