@intuned/browser-dev 0.1.9-dev.0 → 0.1.10-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/extractStructuredData.js +21 -27
- package/dist/ai/tests/testCreateMatchesMapping.spec.js +216 -0
- package/dist/ai/tests/testExtractStructuredData.spec.js +346 -0
- package/dist/ai/tests/testExtractStructuredDataDomMatchingIframes.spec.js +459 -0
- package/dist/ai/tests/testExtractStructuredDataUnit.spec.js +375 -0
- package/dist/ai/tests/testMatching.spec.js +342 -0
- package/dist/ai/tests/testValidateMatchesMapping.spec.js +265 -0
- package/dist/common/extendedTest.js +38 -30
- package/dist/common/frame_utils/frameTree.js +116 -0
- package/dist/common/frame_utils/getContentWithNestedIframes.js +13 -0
- package/dist/common/frame_utils/index.js +95 -0
- package/dist/common/frame_utils/stitchIframe.js +105 -0
- package/dist/{helpers → common}/frame_utils/tests/testFindAllIframes.spec.js +24 -15
- package/dist/common/frame_utils/tests/testGetContentWithNestedIframes.spec.js +241 -0
- package/dist/common/frame_utils/utils.js +91 -0
- package/dist/common/getSimplifiedHtml.js +20 -20
- package/dist/common/matching/matching.js +91 -16
- package/dist/common/tests/matching.test.js +225 -0
- package/dist/common/tests/testGetSimplifiedHtml.spec.js +324 -0
- package/dist/helpers/extractMarkdown.js +16 -7
- package/dist/helpers/tests/testExtractMarkdown.spec.js +29 -0
- package/dist/helpers/waitForDomSettled.js +4 -4
- package/dist/types/intuned-runtime.d.ts +6 -32
- package/package.json +1 -1
- package/dist/helpers/frame_utils/constants.js +0 -8
- package/dist/helpers/frame_utils/findAllIframes.js +0 -82
- package/dist/helpers/frame_utils/index.js +0 -44
- /package/dist/{helpers → common}/frame_utils/checkFrameAllowsAsyncScripts.js +0 -0
- /package/dist/{helpers → common}/frame_utils/getContainerFrame.js +0 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
var _extendedTest = require("../extendedTest");
|
|
4
|
+
var _playwrightCore = require("playwright-core");
|
|
5
|
+
var _matching = require("../matching/matching");
|
|
6
|
+
(0, _extendedTest.describe)("Test Matching Functions", () => {
|
|
7
|
+
let browser;
|
|
8
|
+
let page;
|
|
9
|
+
(0, _extendedTest.beforeAll)(async () => {
|
|
10
|
+
browser = await _playwrightCore.chromium.launch({
|
|
11
|
+
headless: true
|
|
12
|
+
});
|
|
13
|
+
});
|
|
14
|
+
(0, _extendedTest.afterAll)(async () => {
|
|
15
|
+
await browser.close();
|
|
16
|
+
});
|
|
17
|
+
(0, _extendedTest.beforeEach)(async () => {
|
|
18
|
+
page = await browser.newPage();
|
|
19
|
+
});
|
|
20
|
+
(0, _extendedTest.afterEach)(async () => {
|
|
21
|
+
await page.close();
|
|
22
|
+
});
|
|
23
|
+
(0, _extendedTest.describe)("normalizeSpacing", () => {
|
|
24
|
+
(0, _extendedTest.test)("should replace multiple spaces with single space", async () => {
|
|
25
|
+
const result = (0, _matching.normalizeSpacing)({
|
|
26
|
+
text: "hello world"
|
|
27
|
+
});
|
|
28
|
+
(0, _extendedTest.expect)(result).toBe("hello world");
|
|
29
|
+
});
|
|
30
|
+
(0, _extendedTest.test)("should replace newlines and tabs with spaces", async () => {
|
|
31
|
+
const result = (0, _matching.normalizeSpacing)({
|
|
32
|
+
text: "hello\nworld\tthere"
|
|
33
|
+
});
|
|
34
|
+
(0, _extendedTest.expect)(result).toBe("hello world there");
|
|
35
|
+
});
|
|
36
|
+
(0, _extendedTest.test)("should handle mixed whitespace", async () => {
|
|
37
|
+
const result = (0, _matching.normalizeSpacing)({
|
|
38
|
+
text: " hello\n\n world\t\tthere "
|
|
39
|
+
});
|
|
40
|
+
(0, _extendedTest.expect)(result).toBe("hello world there");
|
|
41
|
+
});
|
|
42
|
+
(0, _extendedTest.test)("should handle empty string", async () => {
|
|
43
|
+
const result = (0, _matching.normalizeSpacing)({
|
|
44
|
+
text: ""
|
|
45
|
+
});
|
|
46
|
+
(0, _extendedTest.expect)(result).toBe("");
|
|
47
|
+
});
|
|
48
|
+
});
|
|
49
|
+
(0, _extendedTest.describe)("removePunctuationAndSpaces", () => {
|
|
50
|
+
(0, _extendedTest.test)("should remove basic punctuation", async () => {
|
|
51
|
+
const result = (0, _matching.removePunctuationAndSpaces)({
|
|
52
|
+
s: "Hello, World!"
|
|
53
|
+
});
|
|
54
|
+
(0, _extendedTest.expect)(result).toBe("HelloWorld");
|
|
55
|
+
});
|
|
56
|
+
(0, _extendedTest.test)("should remove spaces", async () => {
|
|
57
|
+
const result = (0, _matching.removePunctuationAndSpaces)({
|
|
58
|
+
s: "hello world there"
|
|
59
|
+
});
|
|
60
|
+
(0, _extendedTest.expect)(result).toBe("helloworldthere");
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
(0, _extendedTest.describe)("rankMatch", () => {
|
|
64
|
+
(0, _extendedTest.test)("should rank exact match as HIGH", async () => {
|
|
65
|
+
const result = (0, _matching.rankMatch)({
|
|
66
|
+
original: "Hello World",
|
|
67
|
+
match: "Hello World"
|
|
68
|
+
});
|
|
69
|
+
(0, _extendedTest.expect)(result).toBe("HIGH");
|
|
70
|
+
});
|
|
71
|
+
(0, _extendedTest.test)("should rank case insensitive match as HIGH", async () => {
|
|
72
|
+
const result = (0, _matching.rankMatch)({
|
|
73
|
+
original: "Hello World",
|
|
74
|
+
match: "hello world"
|
|
75
|
+
});
|
|
76
|
+
(0, _extendedTest.expect)(result).toBe("HIGH");
|
|
77
|
+
});
|
|
78
|
+
(0, _extendedTest.test)("should rank completely different strings as LOW", async () => {
|
|
79
|
+
const result = (0, _matching.rankMatch)({
|
|
80
|
+
original: "Hello World",
|
|
81
|
+
match: "Goodbye Universe"
|
|
82
|
+
});
|
|
83
|
+
(0, _extendedTest.expect)(result).toBe("LOW");
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
(0, _extendedTest.describe)("matchStringsWithDomContent", () => {
|
|
87
|
+
(0, _extendedTest.test)("should find matches in DOM content", async () => {
|
|
88
|
+
await page.setContent(`
|
|
89
|
+
<html>
|
|
90
|
+
<body>
|
|
91
|
+
<h1>Hello World</h1>
|
|
92
|
+
<p>This is a test paragraph</p>
|
|
93
|
+
<span>Another test element</span>
|
|
94
|
+
</body>
|
|
95
|
+
</html>
|
|
96
|
+
`);
|
|
97
|
+
const stringsToMatch = ["Hello World", "test paragraph", "Another test"];
|
|
98
|
+
const matches = await (0, _matching.matchStringsWithDomContent)({
|
|
99
|
+
pageObject: page,
|
|
100
|
+
stringsList: stringsToMatch
|
|
101
|
+
});
|
|
102
|
+
(0, _extendedTest.expect)("Hello World" in matches).toBe(true);
|
|
103
|
+
(0, _extendedTest.expect)("test paragraph" in matches).toBe(true);
|
|
104
|
+
(0, _extendedTest.expect)("Another test" in matches).toBe(true);
|
|
105
|
+
});
|
|
106
|
+
(0, _extendedTest.test)("should find matches in iframe content", async () => {
|
|
107
|
+
await page.setContent(`
|
|
108
|
+
<html>
|
|
109
|
+
<body>
|
|
110
|
+
<h1>Main Page Header</h1>
|
|
111
|
+
<p>Content before iframe</p>
|
|
112
|
+
<iframe id="test-iframe" srcdoc="<html><body><h2>Iframe Header</h2><p>Content inside iframe</p><span>Iframe text to match</span></body></html>"></iframe>
|
|
113
|
+
<p>Content after iframe</p>
|
|
114
|
+
</body>
|
|
115
|
+
</html>
|
|
116
|
+
`);
|
|
117
|
+
await page.waitForSelector("#test-iframe");
|
|
118
|
+
await page.waitForTimeout(100);
|
|
119
|
+
const stringsToMatch = ["Main Page Header", "Content before iframe", "Iframe Header", "Content inside iframe", "Iframe text to match", "Content after iframe"];
|
|
120
|
+
const matches = await (0, _matching.matchStringsWithDomContent)({
|
|
121
|
+
pageObject: page,
|
|
122
|
+
stringsList: stringsToMatch
|
|
123
|
+
});
|
|
124
|
+
(0, _extendedTest.expect)("Main Page Header" in matches).toBe(true);
|
|
125
|
+
(0, _extendedTest.expect)(matches["Main Page Header"].length).toBeGreaterThan(0);
|
|
126
|
+
(0, _extendedTest.expect)("Content before iframe" in matches).toBe(true);
|
|
127
|
+
(0, _extendedTest.expect)(matches["Content before iframe"].length).toBeGreaterThan(0);
|
|
128
|
+
(0, _extendedTest.expect)("Iframe Header" in matches).toBe(true);
|
|
129
|
+
(0, _extendedTest.expect)(matches["Iframe Header"].length).toBeGreaterThan(0);
|
|
130
|
+
(0, _extendedTest.expect)("Content inside iframe" in matches).toBe(true);
|
|
131
|
+
(0, _extendedTest.expect)(matches["Content inside iframe"].length).toBeGreaterThan(0);
|
|
132
|
+
(0, _extendedTest.expect)("Iframe text to match" in matches).toBe(true);
|
|
133
|
+
(0, _extendedTest.expect)(matches["Iframe text to match"].length).toBeGreaterThan(0);
|
|
134
|
+
(0, _extendedTest.expect)("Content after iframe" in matches).toBe(true);
|
|
135
|
+
(0, _extendedTest.expect)(matches["Content after iframe"].length).toBeGreaterThan(0);
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
(0, _extendedTest.describe)("createMatchesMapping and validateMatchesMapping", () => {
|
|
139
|
+
(0, _extendedTest.test)("should create and validate matches mapping", async () => {
|
|
140
|
+
await page.setContent(`
|
|
141
|
+
<html>
|
|
142
|
+
<body>
|
|
143
|
+
<div class="product">
|
|
144
|
+
<h2>iPhone 14 Pro</h2>
|
|
145
|
+
<div class="price">$999</div>
|
|
146
|
+
</div>
|
|
147
|
+
</body>
|
|
148
|
+
</html>
|
|
149
|
+
`);
|
|
150
|
+
const extractedData = {
|
|
151
|
+
title: "iPhone 14 Pro",
|
|
152
|
+
price: "$999"
|
|
153
|
+
};
|
|
154
|
+
const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
|
|
155
|
+
(0, _extendedTest.expect)("iPhone 14 Pro" in mapping).toBe(true);
|
|
156
|
+
(0, _extendedTest.expect)("$999" in mapping).toBe(true);
|
|
157
|
+
(0, _extendedTest.expect)(mapping["iPhone 14 Pro"].length).toBeGreaterThan(0);
|
|
158
|
+
(0, _extendedTest.expect)(mapping["$999"].length).toBeGreaterThan(0);
|
|
159
|
+
const isValid = await (0, _matching.validateMatchesMapping)(page, mapping);
|
|
160
|
+
(0, _extendedTest.expect)(isValid).toBe(true);
|
|
161
|
+
});
|
|
162
|
+
(0, _extendedTest.test)("should detect when DOM changes invalidate mapping", async () => {
|
|
163
|
+
await page.setContent(`
|
|
164
|
+
<html>
|
|
165
|
+
<body>
|
|
166
|
+
<div class="product">
|
|
167
|
+
<h2>iPhone 14 Pro</h2>
|
|
168
|
+
<div class="price">$999</div>
|
|
169
|
+
</div>
|
|
170
|
+
</body>
|
|
171
|
+
</html>
|
|
172
|
+
`);
|
|
173
|
+
const extractedData = {
|
|
174
|
+
title: "iPhone 14 Pro",
|
|
175
|
+
price: "$999"
|
|
176
|
+
};
|
|
177
|
+
const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
|
|
178
|
+
await page.setContent(`
|
|
179
|
+
<html>
|
|
180
|
+
<body>
|
|
181
|
+
<div class="product">
|
|
182
|
+
<h2>iPhone 15 Pro</h2>
|
|
183
|
+
<div class="price">$1099</div>
|
|
184
|
+
</div>
|
|
185
|
+
</body>
|
|
186
|
+
</html>
|
|
187
|
+
`);
|
|
188
|
+
const isValid = await (0, _matching.validateMatchesMapping)(page, mapping);
|
|
189
|
+
(0, _extendedTest.expect)(isValid).toBe(false);
|
|
190
|
+
});
|
|
191
|
+
(0, _extendedTest.test)("should work with iframe content", async () => {
|
|
192
|
+
const iframeContent = `
|
|
193
|
+
<html>
|
|
194
|
+
<body>
|
|
195
|
+
<div class="product">
|
|
196
|
+
<h2>iPhone 14 Pro</h2>
|
|
197
|
+
<div class="price">$999</div>
|
|
198
|
+
</div>
|
|
199
|
+
</body>
|
|
200
|
+
</html>
|
|
201
|
+
`;
|
|
202
|
+
await page.setContent(`
|
|
203
|
+
<html>
|
|
204
|
+
<body>
|
|
205
|
+
<h1>Product Catalog</h1>
|
|
206
|
+
<iframe id="product-frame" srcdoc='${iframeContent.replace(/'/g, "'")}'></iframe>
|
|
207
|
+
</body>
|
|
208
|
+
</html>
|
|
209
|
+
`);
|
|
210
|
+
await page.waitForSelector("#product-frame");
|
|
211
|
+
await page.waitForTimeout(100);
|
|
212
|
+
const extractedData = {
|
|
213
|
+
title: "iPhone 14 Pro",
|
|
214
|
+
price: "$999"
|
|
215
|
+
};
|
|
216
|
+
const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
|
|
217
|
+
(0, _extendedTest.expect)("iPhone 14 Pro" in mapping).toBe(true);
|
|
218
|
+
(0, _extendedTest.expect)("$999" in mapping).toBe(true);
|
|
219
|
+
(0, _extendedTest.expect)(mapping["iPhone 14 Pro"].length).toBeGreaterThan(0);
|
|
220
|
+
(0, _extendedTest.expect)(mapping["$999"].length).toBeGreaterThan(0);
|
|
221
|
+
const isValid = await (0, _matching.validateMatchesMapping)(page, mapping);
|
|
222
|
+
(0, _extendedTest.expect)(isValid).toBe(true);
|
|
223
|
+
});
|
|
224
|
+
});
|
|
225
|
+
});
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
var _extendedTest = require("../extendedTest");
|
|
4
|
+
var _playwrightCore = require("playwright-core");
|
|
5
|
+
var _getSimplifiedHtml = require("../getSimplifiedHtml");
|
|
6
|
+
(0, _extendedTest.describe)("Test getSimplifiedHtml", () => {
|
|
7
|
+
let browser;
|
|
8
|
+
let page;
|
|
9
|
+
(0, _extendedTest.beforeAll)(async () => {
|
|
10
|
+
browser = await _playwrightCore.chromium.launch({
|
|
11
|
+
headless: true
|
|
12
|
+
});
|
|
13
|
+
});
|
|
14
|
+
(0, _extendedTest.afterAll)(async () => {
|
|
15
|
+
await browser.close();
|
|
16
|
+
});
|
|
17
|
+
(0, _extendedTest.beforeEach)(async () => {
|
|
18
|
+
page = await browser.newPage();
|
|
19
|
+
});
|
|
20
|
+
(0, _extendedTest.afterEach)(async () => {
|
|
21
|
+
await page.close();
|
|
22
|
+
});
|
|
23
|
+
(0, _extendedTest.test)("should simplify HTML from a Page", async () => {
|
|
24
|
+
await page.setContent(`
|
|
25
|
+
<html>
|
|
26
|
+
<head><title>Test</title></head>
|
|
27
|
+
<body>
|
|
28
|
+
<div id="content">
|
|
29
|
+
<h1>Main Header</h1>
|
|
30
|
+
<p class="description" style="color: red;">This is a description</p>
|
|
31
|
+
<button id="btn">Click me</button>
|
|
32
|
+
</div>
|
|
33
|
+
</body>
|
|
34
|
+
</html>
|
|
35
|
+
`);
|
|
36
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
|
|
37
|
+
(0, _extendedTest.expect)(simplified).toContain("Main Header");
|
|
38
|
+
(0, _extendedTest.expect)(simplified).toContain("This is a description");
|
|
39
|
+
(0, _extendedTest.expect)(simplified).toContain("Click me");
|
|
40
|
+
(0, _extendedTest.expect)(simplified).toContain('id="content"');
|
|
41
|
+
(0, _extendedTest.expect)(simplified).toContain('id="btn"');
|
|
42
|
+
(0, _extendedTest.expect)(simplified).not.toContain('style="color: red;"');
|
|
43
|
+
});
|
|
44
|
+
(0, _extendedTest.test)("should simplify HTML from a Locator", async () => {
|
|
45
|
+
await page.setContent(`
|
|
46
|
+
<html>
|
|
47
|
+
<body>
|
|
48
|
+
<div id="container">
|
|
49
|
+
<h2>Container Header</h2>
|
|
50
|
+
<a href="/link" class="nav-link">Link</a>
|
|
51
|
+
</div>
|
|
52
|
+
<div id="other">
|
|
53
|
+
<p>Other content</p>
|
|
54
|
+
</div>
|
|
55
|
+
</body>
|
|
56
|
+
</html>
|
|
57
|
+
`);
|
|
58
|
+
const locator = page.locator("#container");
|
|
59
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(locator);
|
|
60
|
+
(0, _extendedTest.expect)(simplified).toContain("Container Header");
|
|
61
|
+
(0, _extendedTest.expect)(simplified).toContain("Link");
|
|
62
|
+
(0, _extendedTest.expect)(simplified).toContain('href="/link"');
|
|
63
|
+
(0, _extendedTest.expect)(simplified).not.toContain("Other content");
|
|
64
|
+
});
|
|
65
|
+
(0, _extendedTest.test)("should preserve interactive elements", async () => {
|
|
66
|
+
await page.setContent(`
|
|
67
|
+
<html>
|
|
68
|
+
<body>
|
|
69
|
+
<input type="text" name="username" placeholder="Enter username" />
|
|
70
|
+
<button type="submit">Submit</button>
|
|
71
|
+
<select name="country">
|
|
72
|
+
<option>USA</option>
|
|
73
|
+
</select>
|
|
74
|
+
<textarea name="comments"></textarea>
|
|
75
|
+
</body>
|
|
76
|
+
</html>
|
|
77
|
+
`);
|
|
78
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
|
|
79
|
+
(0, _extendedTest.expect)(simplified).toContain('type="text"');
|
|
80
|
+
(0, _extendedTest.expect)(simplified).toContain('name="username"');
|
|
81
|
+
(0, _extendedTest.expect)(simplified).toContain('placeholder="Enter username"');
|
|
82
|
+
(0, _extendedTest.expect)(simplified).toContain('type="submit"');
|
|
83
|
+
(0, _extendedTest.expect)(simplified).toContain('name="country"');
|
|
84
|
+
(0, _extendedTest.expect)(simplified).toContain('name="comments"');
|
|
85
|
+
});
|
|
86
|
+
(0, _extendedTest.test)("should preserve aria-label and data attributes", async () => {
|
|
87
|
+
await page.setContent(`
|
|
88
|
+
<html>
|
|
89
|
+
<body>
|
|
90
|
+
<button aria-label="Close dialog" data-action="close" data-id="123">
|
|
91
|
+
X
|
|
92
|
+
</button>
|
|
93
|
+
<div data-component="card" data-name="product-card">
|
|
94
|
+
Content
|
|
95
|
+
</div>
|
|
96
|
+
</body>
|
|
97
|
+
</html>
|
|
98
|
+
`);
|
|
99
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
|
|
100
|
+
(0, _extendedTest.expect)(simplified).toContain('aria-label="Close dialog"');
|
|
101
|
+
(0, _extendedTest.expect)(simplified).toContain('data-action="close"');
|
|
102
|
+
(0, _extendedTest.expect)(simplified).toContain('data-id="123"');
|
|
103
|
+
(0, _extendedTest.expect)(simplified).toContain('data-component="card"');
|
|
104
|
+
(0, _extendedTest.expect)(simplified).toContain('data-name="product-card"');
|
|
105
|
+
});
|
|
106
|
+
(0, _extendedTest.test)("should filter out invisible elements by default", async () => {
|
|
107
|
+
await page.setContent(`
|
|
108
|
+
<html>
|
|
109
|
+
<body>
|
|
110
|
+
<div id="visible">Visible content</div>
|
|
111
|
+
<div id="hidden" style="display: none;">Hidden content</div>
|
|
112
|
+
<div id="invisible" style="visibility: hidden;">Invisible content</div>
|
|
113
|
+
<div id="transparent" style="opacity: 0;">Transparent content</div>
|
|
114
|
+
</body>
|
|
115
|
+
</html>
|
|
116
|
+
`);
|
|
117
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
|
|
118
|
+
(0, _extendedTest.expect)(simplified).toContain("Visible content");
|
|
119
|
+
(0, _extendedTest.expect)(simplified).not.toContain("Hidden content");
|
|
120
|
+
(0, _extendedTest.expect)(simplified).not.toContain("Invisible content");
|
|
121
|
+
(0, _extendedTest.expect)(simplified).not.toContain("Transparent content");
|
|
122
|
+
});
|
|
123
|
+
(0, _extendedTest.test)("should include invisible elements when keepOnlyVisibleElements is false", async () => {
|
|
124
|
+
await page.setContent(`
|
|
125
|
+
<html>
|
|
126
|
+
<body>
|
|
127
|
+
<div id="visible">Visible content</div>
|
|
128
|
+
<div id="hidden" style="display: none;">Hidden content</div>
|
|
129
|
+
</body>
|
|
130
|
+
</html>
|
|
131
|
+
`);
|
|
132
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page, {
|
|
133
|
+
keepOnlyVisibleElements: false
|
|
134
|
+
});
|
|
135
|
+
(0, _extendedTest.expect)(simplified).toContain("Visible content");
|
|
136
|
+
(0, _extendedTest.expect)(simplified).toContain("Hidden content");
|
|
137
|
+
});
|
|
138
|
+
(0, _extendedTest.test)("should include onclick when shouldIncludeOnClick is true", async () => {
|
|
139
|
+
await page.setContent(`
|
|
140
|
+
<html>
|
|
141
|
+
<body>
|
|
142
|
+
<button onclick="alert('clicked')">Click</button>
|
|
143
|
+
</body>
|
|
144
|
+
</html>
|
|
145
|
+
`);
|
|
146
|
+
const simplifiedWithoutOnClick = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page, {
|
|
147
|
+
shouldIncludeOnClick: false
|
|
148
|
+
});
|
|
149
|
+
const simplifiedWithOnClick = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page, {
|
|
150
|
+
shouldIncludeOnClick: true
|
|
151
|
+
});
|
|
152
|
+
(0, _extendedTest.expect)(simplifiedWithoutOnClick).not.toContain("onclick=");
|
|
153
|
+
(0, _extendedTest.expect)(simplifiedWithOnClick).toContain("onclick=");
|
|
154
|
+
});
|
|
155
|
+
(0, _extendedTest.test)("should add content attribute when shouldIncludeContentAsProp is true", async () => {
|
|
156
|
+
await page.setContent(`
|
|
157
|
+
<html>
|
|
158
|
+
<body>
|
|
159
|
+
<div id="text-content">Some text content</div>
|
|
160
|
+
</body>
|
|
161
|
+
</html>
|
|
162
|
+
`);
|
|
163
|
+
const simplifiedWithContent = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page, {
|
|
164
|
+
shouldIncludeContentAsProp: true
|
|
165
|
+
});
|
|
166
|
+
(0, _extendedTest.expect)(simplifiedWithContent).toContain("content=");
|
|
167
|
+
(0, _extendedTest.expect)(simplifiedWithContent).toContain("Some text content");
|
|
168
|
+
});
|
|
169
|
+
(0, _extendedTest.test)("should preserve input values", async () => {
|
|
170
|
+
await page.setContent(`
|
|
171
|
+
<html>
|
|
172
|
+
<body>
|
|
173
|
+
<input type="text" id="username" value="john_doe" />
|
|
174
|
+
<input type="email" id="email" value="" />
|
|
175
|
+
</body>
|
|
176
|
+
</html>
|
|
177
|
+
`);
|
|
178
|
+
await page.fill("#username", "jane_smith");
|
|
179
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
|
|
180
|
+
(0, _extendedTest.expect)(simplified).toContain("value=");
|
|
181
|
+
(0, _extendedTest.expect)(simplified).toContain('type="text"');
|
|
182
|
+
});
|
|
183
|
+
(0, _extendedTest.test)("should handle empty page", async () => {
|
|
184
|
+
await page.setContent(`
|
|
185
|
+
<html>
|
|
186
|
+
<body></body>
|
|
187
|
+
</html>
|
|
188
|
+
`);
|
|
189
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
|
|
190
|
+
(0, _extendedTest.expect)(simplified).toBe("");
|
|
191
|
+
});
|
|
192
|
+
(0, _extendedTest.test)("should handle complex nested structure", async () => {
|
|
193
|
+
await page.setContent(`
|
|
194
|
+
<html>
|
|
195
|
+
<body>
|
|
196
|
+
<div id="app">
|
|
197
|
+
<header>
|
|
198
|
+
<nav>
|
|
199
|
+
<a href="/" id="home">Home</a>
|
|
200
|
+
<a href="/about" id="about">About</a>
|
|
201
|
+
</nav>
|
|
202
|
+
</header>
|
|
203
|
+
<main>
|
|
204
|
+
<article>
|
|
205
|
+
<h1>Article Title</h1>
|
|
206
|
+
<p>Article content</p>
|
|
207
|
+
</article>
|
|
208
|
+
</main>
|
|
209
|
+
<footer>
|
|
210
|
+
<p>Copyright 2024</p>
|
|
211
|
+
</footer>
|
|
212
|
+
</div>
|
|
213
|
+
</body>
|
|
214
|
+
</html>
|
|
215
|
+
`);
|
|
216
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
|
|
217
|
+
(0, _extendedTest.expect)(simplified).toContain('id="app"');
|
|
218
|
+
(0, _extendedTest.expect)(simplified).toContain('href="/"');
|
|
219
|
+
(0, _extendedTest.expect)(simplified).toContain('href="/about"');
|
|
220
|
+
(0, _extendedTest.expect)(simplified).toContain("Article Title");
|
|
221
|
+
(0, _extendedTest.expect)(simplified).toContain("Article content");
|
|
222
|
+
(0, _extendedTest.expect)(simplified).toContain("Copyright 2024");
|
|
223
|
+
});
|
|
224
|
+
(0, _extendedTest.test)("should remove unnecessary wrapper divs", async () => {
|
|
225
|
+
await page.setContent(`
|
|
226
|
+
<html>
|
|
227
|
+
<body>
|
|
228
|
+
<div>
|
|
229
|
+
<div>
|
|
230
|
+
<div>
|
|
231
|
+
<button id="btn">Click</button>
|
|
232
|
+
</div>
|
|
233
|
+
</div>
|
|
234
|
+
</div>
|
|
235
|
+
</body>
|
|
236
|
+
</html>
|
|
237
|
+
`);
|
|
238
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
|
|
239
|
+
(0, _extendedTest.expect)(simplified).toContain('id="btn"');
|
|
240
|
+
(0, _extendedTest.expect)(simplified).toContain("Click");
|
|
241
|
+
(0, _extendedTest.expect)(simplified).toContain("<button");
|
|
242
|
+
});
|
|
243
|
+
(0, _extendedTest.test)("should work with Frame", async () => {
|
|
244
|
+
const iframeContent = `
|
|
245
|
+
<html>
|
|
246
|
+
<body>
|
|
247
|
+
<h2>Frame Content</h2>
|
|
248
|
+
<a href="/link" id="frame-link">Frame Link</a>
|
|
249
|
+
</body>
|
|
250
|
+
</html>
|
|
251
|
+
`;
|
|
252
|
+
await page.setContent(`
|
|
253
|
+
<html>
|
|
254
|
+
<body>
|
|
255
|
+
<h1>Main Page</h1>
|
|
256
|
+
<iframe id="test-frame" srcdoc='${iframeContent.replace(/'/g, "'")}'></iframe>
|
|
257
|
+
</body>
|
|
258
|
+
</html>
|
|
259
|
+
`);
|
|
260
|
+
await page.waitForSelector("#test-frame");
|
|
261
|
+
await page.waitForTimeout(100);
|
|
262
|
+
const frameElement = await page.frame({
|
|
263
|
+
url: /about:srcdoc/
|
|
264
|
+
});
|
|
265
|
+
if (!frameElement) {
|
|
266
|
+
throw new Error("Frame not found");
|
|
267
|
+
}
|
|
268
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(frameElement);
|
|
269
|
+
(0, _extendedTest.expect)(simplified).toContain("Frame Content");
|
|
270
|
+
(0, _extendedTest.expect)(simplified).toContain("Frame Link");
|
|
271
|
+
(0, _extendedTest.expect)(simplified).toContain('href="/link"');
|
|
272
|
+
(0, _extendedTest.expect)(simplified).not.toContain("Main Page");
|
|
273
|
+
});
|
|
274
|
+
(0, _extendedTest.test)("should handle special characters in text", async () => {
|
|
275
|
+
await page.setContent(`
|
|
276
|
+
<html>
|
|
277
|
+
<body>
|
|
278
|
+
<div id="special">
|
|
279
|
+
<p>Text with <special> characters & symbols</p>
|
|
280
|
+
<a href="/path?query=value&other=123">Link with query</a>
|
|
281
|
+
</div>
|
|
282
|
+
</body>
|
|
283
|
+
</html>
|
|
284
|
+
`);
|
|
285
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
|
|
286
|
+
(0, _extendedTest.expect)(simplified).toContain("special");
|
|
287
|
+
(0, _extendedTest.expect)(simplified).toContain("characters");
|
|
288
|
+
(0, _extendedTest.expect)(simplified).toContain("symbols");
|
|
289
|
+
});
|
|
290
|
+
(0, _extendedTest.test)("should return empty string for non-existent locator", async () => {
|
|
291
|
+
await page.setContent(`
|
|
292
|
+
<html>
|
|
293
|
+
<body>
|
|
294
|
+
<div id="exists">Content</div>
|
|
295
|
+
</body>
|
|
296
|
+
</html>
|
|
297
|
+
`);
|
|
298
|
+
const locator = page.locator("#does-not-exist").first();
|
|
299
|
+
page.setDefaultTimeout(2000);
|
|
300
|
+
await (0, _extendedTest.expect)((0, _getSimplifiedHtml.getSimplifiedHtml)(locator)).rejects.toThrow();
|
|
301
|
+
page.setDefaultTimeout(30000);
|
|
302
|
+
});
|
|
303
|
+
(0, _extendedTest.test)("should handle role attributes", async () => {
|
|
304
|
+
await page.setContent(`
|
|
305
|
+
<html>
|
|
306
|
+
<body>
|
|
307
|
+
<div role="navigation">
|
|
308
|
+
<a href="/">Home</a>
|
|
309
|
+
</div>
|
|
310
|
+
<div role="main">
|
|
311
|
+
<h1>Main Content</h1>
|
|
312
|
+
</div>
|
|
313
|
+
<div role="complementary">
|
|
314
|
+
<p>Sidebar</p>
|
|
315
|
+
</div>
|
|
316
|
+
</body>
|
|
317
|
+
</html>
|
|
318
|
+
`);
|
|
319
|
+
const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
|
|
320
|
+
(0, _extendedTest.expect)(simplified).toContain('role="navigation"');
|
|
321
|
+
(0, _extendedTest.expect)(simplified).toContain('role="main"');
|
|
322
|
+
(0, _extendedTest.expect)(simplified).toContain('role="complementary"');
|
|
323
|
+
});
|
|
324
|
+
});
|
|
@@ -6,24 +6,33 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
6
6
|
exports.extractMarkdown = void 0;
|
|
7
7
|
var _utils = require("./utils");
|
|
8
8
|
var _locatorHelpers = require("../common/locatorHelpers");
|
|
9
|
+
var _frame_utils = require("../common/frame_utils");
|
|
9
10
|
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
10
11
|
const extractMarkdown = async input => {
|
|
11
|
-
var _handle;
|
|
12
12
|
const {
|
|
13
13
|
source
|
|
14
14
|
} = input;
|
|
15
15
|
const isPageSource = (0, _locatorHelpers.isPage)(source);
|
|
16
16
|
const pageObject = isPageSource ? source : source.page();
|
|
17
17
|
await (0, _utils.ensureBrowserScripts)(pageObject);
|
|
18
|
-
|
|
18
|
+
const htmlContent = await (0, _frame_utils.getContentWithNestedIframes)(source);
|
|
19
|
+
let md;
|
|
19
20
|
if (isPageSource) {
|
|
20
|
-
|
|
21
|
+
md = await pageObject.evaluate(html => {
|
|
22
|
+
const parser = new DOMParser();
|
|
23
|
+
const doc = parser.parseFromString(html, "text/html");
|
|
24
|
+
const body = doc.body || doc.documentElement;
|
|
25
|
+
return window.__INTUNED__.convertElementToMarkdown(body);
|
|
26
|
+
}, htmlContent);
|
|
21
27
|
} else {
|
|
22
|
-
|
|
28
|
+
md = await pageObject.evaluate(html => {
|
|
29
|
+
var _doc$body;
|
|
30
|
+
const parser = new DOMParser();
|
|
31
|
+
const doc = parser.parseFromString(html, "text/html");
|
|
32
|
+
const element = ((_doc$body = doc.body) === null || _doc$body === void 0 ? void 0 : _doc$body.firstElementChild) || doc.documentElement.firstElementChild || doc.documentElement;
|
|
33
|
+
return window.__INTUNED__.convertElementToMarkdown(element);
|
|
34
|
+
}, htmlContent);
|
|
23
35
|
}
|
|
24
|
-
const md = await pageObject.evaluate(element => {
|
|
25
|
-
return window.__INTUNED__.convertElementToMarkdown(element);
|
|
26
|
-
}, (_handle = handle) === null || _handle === void 0 ? void 0 : _handle.asElement());
|
|
27
36
|
const prettier = await Promise.resolve().then(() => _interopRequireWildcard(require("prettier/standalone")));
|
|
28
37
|
const parserMarkdown = await Promise.resolve().then(() => _interopRequireWildcard(require("prettier/parser-markdown")));
|
|
29
38
|
const formattedMarkdown = await prettier.format(md, {
|
|
@@ -287,4 +287,33 @@ var _ = require("..");
|
|
|
287
287
|
(0, _extendedTest.expect)(result).toContain("> Important quote here.");
|
|
288
288
|
(0, _extendedTest.expect)(result).toContain("`inline code`");
|
|
289
289
|
});
|
|
290
|
+
(0, _extendedTest.test)("extract markdown from locator with iframe", async ({
|
|
291
|
+
page
|
|
292
|
+
}) => {
|
|
293
|
+
await page.setContent(`
|
|
294
|
+
<html>
|
|
295
|
+
<body>
|
|
296
|
+
<div id="outside">
|
|
297
|
+
<h1>Outside Content</h1>
|
|
298
|
+
</div>
|
|
299
|
+
<div id="container">
|
|
300
|
+
<h2>Container Header</h2>
|
|
301
|
+
<iframe id="nested-iframe" srcdoc="<html><body><h3>Nested Iframe</h3><p>Nested content</p></body></html>"></iframe>
|
|
302
|
+
<p>After iframe</p>
|
|
303
|
+
</div>
|
|
304
|
+
</body>
|
|
305
|
+
</html>
|
|
306
|
+
`);
|
|
307
|
+
await page.waitForSelector("#nested-iframe");
|
|
308
|
+
await page.frameLocator("#nested-iframe").locator("body").waitFor();
|
|
309
|
+
const locator = page.locator("#container");
|
|
310
|
+
const result = await (0, _.extractMarkdown)({
|
|
311
|
+
source: locator
|
|
312
|
+
});
|
|
313
|
+
(0, _extendedTest.expect)(result).toContain("## Container Header");
|
|
314
|
+
(0, _extendedTest.expect)(result).toContain("### Nested Iframe");
|
|
315
|
+
(0, _extendedTest.expect)(result).toContain("Nested content");
|
|
316
|
+
(0, _extendedTest.expect)(result).toContain("After iframe");
|
|
317
|
+
(0, _extendedTest.expect)(result).not.toContain("Outside Content");
|
|
318
|
+
});
|
|
290
319
|
});
|
|
@@ -6,8 +6,8 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
6
6
|
exports.waitForDomSettled = void 0;
|
|
7
7
|
var _locatorHelpers = require("../common/locatorHelpers");
|
|
8
8
|
var _Logger = require("../common/Logger");
|
|
9
|
-
var
|
|
10
|
-
var _getContainerFrame = require("
|
|
9
|
+
var _frameTree = require("../common/frame_utils/frameTree");
|
|
10
|
+
var _getContainerFrame = require("../common/frame_utils/getContainerFrame");
|
|
11
11
|
const waitForDomSettled = async options => {
|
|
12
12
|
const {
|
|
13
13
|
source,
|
|
@@ -82,9 +82,9 @@ const waitForDomSettled = async options => {
|
|
|
82
82
|
if (!result) {
|
|
83
83
|
return false;
|
|
84
84
|
}
|
|
85
|
-
const
|
|
85
|
+
const iframeTree = await _frameTree.FrameTree.fromRoot(frame, 10.0, true);
|
|
86
86
|
let hasRestrictedIframes = false;
|
|
87
|
-
for (const iframeNode of
|
|
87
|
+
for (const iframeNode of iframeTree.nodes()) {
|
|
88
88
|
if (iframeNode.allowsAsyncScripts) {
|
|
89
89
|
const iframeElementHandle = await iframeNode.frame.evaluateHandle("document.documentElement");
|
|
90
90
|
const iframeResult = await iframeElementHandle.evaluate(jsCode, {
|