@intuned/browser-dev 0.1.9-dev.0 → 0.1.12-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/ai/extractStructuredData.js +21 -27
  2. package/dist/ai/tests/testCreateMatchesMapping.spec.js +216 -0
  3. package/dist/ai/tests/testExtractStructuredData.spec.js +346 -0
  4. package/dist/ai/tests/testExtractStructuredDataDomMatchingIframes.spec.js +459 -0
  5. package/dist/ai/tests/testExtractStructuredDataUnit.spec.js +375 -0
  6. package/dist/ai/tests/testMatching.spec.js +342 -0
  7. package/dist/ai/tests/testValidateMatchesMapping.spec.js +265 -0
  8. package/dist/common/extendedTest.js +38 -30
  9. package/dist/common/frame_utils/frameTree.js +116 -0
  10. package/dist/common/frame_utils/getContentWithNestedIframes.js +13 -0
  11. package/dist/common/frame_utils/index.js +95 -0
  12. package/dist/common/frame_utils/stitchIframe.js +105 -0
  13. package/dist/{helpers → common}/frame_utils/tests/testFindAllIframes.spec.js +24 -15
  14. package/dist/common/frame_utils/tests/testGetContentWithNestedIframes.spec.js +241 -0
  15. package/dist/common/frame_utils/utils.js +91 -0
  16. package/dist/common/getSimplifiedHtml.js +20 -20
  17. package/dist/common/matching/matching.js +91 -16
  18. package/dist/common/tests/matching.test.js +225 -0
  19. package/dist/common/tests/testGetSimplifiedHtml.spec.js +324 -0
  20. package/dist/helpers/export.d.ts +5 -12
  21. package/dist/helpers/extractMarkdown.js +16 -7
  22. package/dist/helpers/index.d.ts +5 -12
  23. package/dist/helpers/tests/testDownloadFile.spec.js +41 -0
  24. package/dist/helpers/tests/testExtractMarkdown.spec.js +29 -0
  25. package/dist/helpers/tests/testInjectAttachmentType.spec.js +16 -9
  26. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +111 -0
  27. package/dist/helpers/types/Attachment.js +23 -22
  28. package/dist/helpers/types/__tests__/Attachment.test.js +130 -0
  29. package/dist/helpers/validateDataUsingSchema.js +6 -2
  30. package/dist/helpers/waitForDomSettled.js +4 -4
  31. package/dist/types/intuned-runtime.d.ts +6 -32
  32. package/package.json +1 -1
  33. package/dist/helpers/frame_utils/constants.js +0 -8
  34. package/dist/helpers/frame_utils/findAllIframes.js +0 -82
  35. package/dist/helpers/frame_utils/index.js +0 -44
  36. /package/dist/{helpers → common}/frame_utils/checkFrameAllowsAsyncScripts.js +0 -0
  37. /package/dist/{helpers → common}/frame_utils/getContainerFrame.js +0 -0
@@ -0,0 +1,225 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../extendedTest");
4
+ var _playwrightCore = require("playwright-core");
5
+ var _matching = require("../matching/matching");
6
+ (0, _extendedTest.describe)("Test Matching Functions", () => {
7
+ let browser;
8
+ let page;
9
+ (0, _extendedTest.beforeAll)(async () => {
10
+ browser = await _playwrightCore.chromium.launch({
11
+ headless: true
12
+ });
13
+ });
14
+ (0, _extendedTest.afterAll)(async () => {
15
+ await browser.close();
16
+ });
17
+ (0, _extendedTest.beforeEach)(async () => {
18
+ page = await browser.newPage();
19
+ });
20
+ (0, _extendedTest.afterEach)(async () => {
21
+ await page.close();
22
+ });
23
+ (0, _extendedTest.describe)("normalizeSpacing", () => {
24
+ (0, _extendedTest.test)("should replace multiple spaces with single space", async () => {
25
+ const result = (0, _matching.normalizeSpacing)({
26
+ text: "hello world"
27
+ });
28
+ (0, _extendedTest.expect)(result).toBe("hello world");
29
+ });
30
+ (0, _extendedTest.test)("should replace newlines and tabs with spaces", async () => {
31
+ const result = (0, _matching.normalizeSpacing)({
32
+ text: "hello\nworld\tthere"
33
+ });
34
+ (0, _extendedTest.expect)(result).toBe("hello world there");
35
+ });
36
+ (0, _extendedTest.test)("should handle mixed whitespace", async () => {
37
+ const result = (0, _matching.normalizeSpacing)({
38
+ text: " hello\n\n world\t\tthere "
39
+ });
40
+ (0, _extendedTest.expect)(result).toBe("hello world there");
41
+ });
42
+ (0, _extendedTest.test)("should handle empty string", async () => {
43
+ const result = (0, _matching.normalizeSpacing)({
44
+ text: ""
45
+ });
46
+ (0, _extendedTest.expect)(result).toBe("");
47
+ });
48
+ });
49
+ (0, _extendedTest.describe)("removePunctuationAndSpaces", () => {
50
+ (0, _extendedTest.test)("should remove basic punctuation", async () => {
51
+ const result = (0, _matching.removePunctuationAndSpaces)({
52
+ s: "Hello, World!"
53
+ });
54
+ (0, _extendedTest.expect)(result).toBe("HelloWorld");
55
+ });
56
+ (0, _extendedTest.test)("should remove spaces", async () => {
57
+ const result = (0, _matching.removePunctuationAndSpaces)({
58
+ s: "hello world there"
59
+ });
60
+ (0, _extendedTest.expect)(result).toBe("helloworldthere");
61
+ });
62
+ });
63
+ (0, _extendedTest.describe)("rankMatch", () => {
64
+ (0, _extendedTest.test)("should rank exact match as HIGH", async () => {
65
+ const result = (0, _matching.rankMatch)({
66
+ original: "Hello World",
67
+ match: "Hello World"
68
+ });
69
+ (0, _extendedTest.expect)(result).toBe("HIGH");
70
+ });
71
+ (0, _extendedTest.test)("should rank case insensitive match as HIGH", async () => {
72
+ const result = (0, _matching.rankMatch)({
73
+ original: "Hello World",
74
+ match: "hello world"
75
+ });
76
+ (0, _extendedTest.expect)(result).toBe("HIGH");
77
+ });
78
+ (0, _extendedTest.test)("should rank completely different strings as LOW", async () => {
79
+ const result = (0, _matching.rankMatch)({
80
+ original: "Hello World",
81
+ match: "Goodbye Universe"
82
+ });
83
+ (0, _extendedTest.expect)(result).toBe("LOW");
84
+ });
85
+ });
86
+ (0, _extendedTest.describe)("matchStringsWithDomContent", () => {
87
+ (0, _extendedTest.test)("should find matches in DOM content", async () => {
88
+ await page.setContent(`
89
+ <html>
90
+ <body>
91
+ <h1>Hello World</h1>
92
+ <p>This is a test paragraph</p>
93
+ <span>Another test element</span>
94
+ </body>
95
+ </html>
96
+ `);
97
+ const stringsToMatch = ["Hello World", "test paragraph", "Another test"];
98
+ const matches = await (0, _matching.matchStringsWithDomContent)({
99
+ pageObject: page,
100
+ stringsList: stringsToMatch
101
+ });
102
+ (0, _extendedTest.expect)("Hello World" in matches).toBe(true);
103
+ (0, _extendedTest.expect)("test paragraph" in matches).toBe(true);
104
+ (0, _extendedTest.expect)("Another test" in matches).toBe(true);
105
+ });
106
+ (0, _extendedTest.test)("should find matches in iframe content", async () => {
107
+ await page.setContent(`
108
+ <html>
109
+ <body>
110
+ <h1>Main Page Header</h1>
111
+ <p>Content before iframe</p>
112
+ <iframe id="test-iframe" srcdoc="<html><body><h2>Iframe Header</h2><p>Content inside iframe</p><span>Iframe text to match</span></body></html>"></iframe>
113
+ <p>Content after iframe</p>
114
+ </body>
115
+ </html>
116
+ `);
117
+ await page.waitForSelector("#test-iframe");
118
+ await page.waitForTimeout(100);
119
+ const stringsToMatch = ["Main Page Header", "Content before iframe", "Iframe Header", "Content inside iframe", "Iframe text to match", "Content after iframe"];
120
+ const matches = await (0, _matching.matchStringsWithDomContent)({
121
+ pageObject: page,
122
+ stringsList: stringsToMatch
123
+ });
124
+ (0, _extendedTest.expect)("Main Page Header" in matches).toBe(true);
125
+ (0, _extendedTest.expect)(matches["Main Page Header"].length).toBeGreaterThan(0);
126
+ (0, _extendedTest.expect)("Content before iframe" in matches).toBe(true);
127
+ (0, _extendedTest.expect)(matches["Content before iframe"].length).toBeGreaterThan(0);
128
+ (0, _extendedTest.expect)("Iframe Header" in matches).toBe(true);
129
+ (0, _extendedTest.expect)(matches["Iframe Header"].length).toBeGreaterThan(0);
130
+ (0, _extendedTest.expect)("Content inside iframe" in matches).toBe(true);
131
+ (0, _extendedTest.expect)(matches["Content inside iframe"].length).toBeGreaterThan(0);
132
+ (0, _extendedTest.expect)("Iframe text to match" in matches).toBe(true);
133
+ (0, _extendedTest.expect)(matches["Iframe text to match"].length).toBeGreaterThan(0);
134
+ (0, _extendedTest.expect)("Content after iframe" in matches).toBe(true);
135
+ (0, _extendedTest.expect)(matches["Content after iframe"].length).toBeGreaterThan(0);
136
+ });
137
+ });
138
+ (0, _extendedTest.describe)("createMatchesMapping and validateMatchesMapping", () => {
139
+ (0, _extendedTest.test)("should create and validate matches mapping", async () => {
140
+ await page.setContent(`
141
+ <html>
142
+ <body>
143
+ <div class="product">
144
+ <h2>iPhone 14 Pro</h2>
145
+ <div class="price">$999</div>
146
+ </div>
147
+ </body>
148
+ </html>
149
+ `);
150
+ const extractedData = {
151
+ title: "iPhone 14 Pro",
152
+ price: "$999"
153
+ };
154
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
155
+ (0, _extendedTest.expect)("iPhone 14 Pro" in mapping).toBe(true);
156
+ (0, _extendedTest.expect)("$999" in mapping).toBe(true);
157
+ (0, _extendedTest.expect)(mapping["iPhone 14 Pro"].length).toBeGreaterThan(0);
158
+ (0, _extendedTest.expect)(mapping["$999"].length).toBeGreaterThan(0);
159
+ const isValid = await (0, _matching.validateMatchesMapping)(page, mapping);
160
+ (0, _extendedTest.expect)(isValid).toBe(true);
161
+ });
162
+ (0, _extendedTest.test)("should detect when DOM changes invalidate mapping", async () => {
163
+ await page.setContent(`
164
+ <html>
165
+ <body>
166
+ <div class="product">
167
+ <h2>iPhone 14 Pro</h2>
168
+ <div class="price">$999</div>
169
+ </div>
170
+ </body>
171
+ </html>
172
+ `);
173
+ const extractedData = {
174
+ title: "iPhone 14 Pro",
175
+ price: "$999"
176
+ };
177
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
178
+ await page.setContent(`
179
+ <html>
180
+ <body>
181
+ <div class="product">
182
+ <h2>iPhone 15 Pro</h2>
183
+ <div class="price">$1099</div>
184
+ </div>
185
+ </body>
186
+ </html>
187
+ `);
188
+ const isValid = await (0, _matching.validateMatchesMapping)(page, mapping);
189
+ (0, _extendedTest.expect)(isValid).toBe(false);
190
+ });
191
+ (0, _extendedTest.test)("should work with iframe content", async () => {
192
+ const iframeContent = `
193
+ <html>
194
+ <body>
195
+ <div class="product">
196
+ <h2>iPhone 14 Pro</h2>
197
+ <div class="price">$999</div>
198
+ </div>
199
+ </body>
200
+ </html>
201
+ `;
202
+ await page.setContent(`
203
+ <html>
204
+ <body>
205
+ <h1>Product Catalog</h1>
206
+ <iframe id="product-frame" srcdoc='${iframeContent.replace(/'/g, "&apos;")}'></iframe>
207
+ </body>
208
+ </html>
209
+ `);
210
+ await page.waitForSelector("#product-frame");
211
+ await page.waitForTimeout(100);
212
+ const extractedData = {
213
+ title: "iPhone 14 Pro",
214
+ price: "$999"
215
+ };
216
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
217
+ (0, _extendedTest.expect)("iPhone 14 Pro" in mapping).toBe(true);
218
+ (0, _extendedTest.expect)("$999" in mapping).toBe(true);
219
+ (0, _extendedTest.expect)(mapping["iPhone 14 Pro"].length).toBeGreaterThan(0);
220
+ (0, _extendedTest.expect)(mapping["$999"].length).toBeGreaterThan(0);
221
+ const isValid = await (0, _matching.validateMatchesMapping)(page, mapping);
222
+ (0, _extendedTest.expect)(isValid).toBe(true);
223
+ });
224
+ });
225
+ });
@@ -0,0 +1,324 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../extendedTest");
4
+ var _playwrightCore = require("playwright-core");
5
+ var _getSimplifiedHtml = require("../getSimplifiedHtml");
6
+ (0, _extendedTest.describe)("Test getSimplifiedHtml", () => {
7
+ let browser;
8
+ let page;
9
+ (0, _extendedTest.beforeAll)(async () => {
10
+ browser = await _playwrightCore.chromium.launch({
11
+ headless: true
12
+ });
13
+ });
14
+ (0, _extendedTest.afterAll)(async () => {
15
+ await browser.close();
16
+ });
17
+ (0, _extendedTest.beforeEach)(async () => {
18
+ page = await browser.newPage();
19
+ });
20
+ (0, _extendedTest.afterEach)(async () => {
21
+ await page.close();
22
+ });
23
+ (0, _extendedTest.test)("should simplify HTML from a Page", async () => {
24
+ await page.setContent(`
25
+ <html>
26
+ <head><title>Test</title></head>
27
+ <body>
28
+ <div id="content">
29
+ <h1>Main Header</h1>
30
+ <p class="description" style="color: red;">This is a description</p>
31
+ <button id="btn">Click me</button>
32
+ </div>
33
+ </body>
34
+ </html>
35
+ `);
36
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
37
+ (0, _extendedTest.expect)(simplified).toContain("Main Header");
38
+ (0, _extendedTest.expect)(simplified).toContain("This is a description");
39
+ (0, _extendedTest.expect)(simplified).toContain("Click me");
40
+ (0, _extendedTest.expect)(simplified).toContain('id="content"');
41
+ (0, _extendedTest.expect)(simplified).toContain('id="btn"');
42
+ (0, _extendedTest.expect)(simplified).not.toContain('style="color: red;"');
43
+ });
44
+ (0, _extendedTest.test)("should simplify HTML from a Locator", async () => {
45
+ await page.setContent(`
46
+ <html>
47
+ <body>
48
+ <div id="container">
49
+ <h2>Container Header</h2>
50
+ <a href="/link" class="nav-link">Link</a>
51
+ </div>
52
+ <div id="other">
53
+ <p>Other content</p>
54
+ </div>
55
+ </body>
56
+ </html>
57
+ `);
58
+ const locator = page.locator("#container");
59
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(locator);
60
+ (0, _extendedTest.expect)(simplified).toContain("Container Header");
61
+ (0, _extendedTest.expect)(simplified).toContain("Link");
62
+ (0, _extendedTest.expect)(simplified).toContain('href="/link"');
63
+ (0, _extendedTest.expect)(simplified).not.toContain("Other content");
64
+ });
65
+ (0, _extendedTest.test)("should preserve interactive elements", async () => {
66
+ await page.setContent(`
67
+ <html>
68
+ <body>
69
+ <input type="text" name="username" placeholder="Enter username" />
70
+ <button type="submit">Submit</button>
71
+ <select name="country">
72
+ <option>USA</option>
73
+ </select>
74
+ <textarea name="comments"></textarea>
75
+ </body>
76
+ </html>
77
+ `);
78
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
79
+ (0, _extendedTest.expect)(simplified).toContain('type="text"');
80
+ (0, _extendedTest.expect)(simplified).toContain('name="username"');
81
+ (0, _extendedTest.expect)(simplified).toContain('placeholder="Enter username"');
82
+ (0, _extendedTest.expect)(simplified).toContain('type="submit"');
83
+ (0, _extendedTest.expect)(simplified).toContain('name="country"');
84
+ (0, _extendedTest.expect)(simplified).toContain('name="comments"');
85
+ });
86
+ (0, _extendedTest.test)("should preserve aria-label and data attributes", async () => {
87
+ await page.setContent(`
88
+ <html>
89
+ <body>
90
+ <button aria-label="Close dialog" data-action="close" data-id="123">
91
+ X
92
+ </button>
93
+ <div data-component="card" data-name="product-card">
94
+ Content
95
+ </div>
96
+ </body>
97
+ </html>
98
+ `);
99
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
100
+ (0, _extendedTest.expect)(simplified).toContain('aria-label="Close dialog"');
101
+ (0, _extendedTest.expect)(simplified).toContain('data-action="close"');
102
+ (0, _extendedTest.expect)(simplified).toContain('data-id="123"');
103
+ (0, _extendedTest.expect)(simplified).toContain('data-component="card"');
104
+ (0, _extendedTest.expect)(simplified).toContain('data-name="product-card"');
105
+ });
106
+ (0, _extendedTest.test)("should filter out invisible elements by default", async () => {
107
+ await page.setContent(`
108
+ <html>
109
+ <body>
110
+ <div id="visible">Visible content</div>
111
+ <div id="hidden" style="display: none;">Hidden content</div>
112
+ <div id="invisible" style="visibility: hidden;">Invisible content</div>
113
+ <div id="transparent" style="opacity: 0;">Transparent content</div>
114
+ </body>
115
+ </html>
116
+ `);
117
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
118
+ (0, _extendedTest.expect)(simplified).toContain("Visible content");
119
+ (0, _extendedTest.expect)(simplified).not.toContain("Hidden content");
120
+ (0, _extendedTest.expect)(simplified).not.toContain("Invisible content");
121
+ (0, _extendedTest.expect)(simplified).not.toContain("Transparent content");
122
+ });
123
+ (0, _extendedTest.test)("should include invisible elements when keepOnlyVisibleElements is false", async () => {
124
+ await page.setContent(`
125
+ <html>
126
+ <body>
127
+ <div id="visible">Visible content</div>
128
+ <div id="hidden" style="display: none;">Hidden content</div>
129
+ </body>
130
+ </html>
131
+ `);
132
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page, {
133
+ keepOnlyVisibleElements: false
134
+ });
135
+ (0, _extendedTest.expect)(simplified).toContain("Visible content");
136
+ (0, _extendedTest.expect)(simplified).toContain("Hidden content");
137
+ });
138
+ (0, _extendedTest.test)("should include onclick when shouldIncludeOnClick is true", async () => {
139
+ await page.setContent(`
140
+ <html>
141
+ <body>
142
+ <button onclick="alert('clicked')">Click</button>
143
+ </body>
144
+ </html>
145
+ `);
146
+ const simplifiedWithoutOnClick = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page, {
147
+ shouldIncludeOnClick: false
148
+ });
149
+ const simplifiedWithOnClick = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page, {
150
+ shouldIncludeOnClick: true
151
+ });
152
+ (0, _extendedTest.expect)(simplifiedWithoutOnClick).not.toContain("onclick=");
153
+ (0, _extendedTest.expect)(simplifiedWithOnClick).toContain("onclick=");
154
+ });
155
+ (0, _extendedTest.test)("should add content attribute when shouldIncludeContentAsProp is true", async () => {
156
+ await page.setContent(`
157
+ <html>
158
+ <body>
159
+ <div id="text-content">Some text content</div>
160
+ </body>
161
+ </html>
162
+ `);
163
+ const simplifiedWithContent = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page, {
164
+ shouldIncludeContentAsProp: true
165
+ });
166
+ (0, _extendedTest.expect)(simplifiedWithContent).toContain("content=");
167
+ (0, _extendedTest.expect)(simplifiedWithContent).toContain("Some text content");
168
+ });
169
+ (0, _extendedTest.test)("should preserve input values", async () => {
170
+ await page.setContent(`
171
+ <html>
172
+ <body>
173
+ <input type="text" id="username" value="john_doe" />
174
+ <input type="email" id="email" value="" />
175
+ </body>
176
+ </html>
177
+ `);
178
+ await page.fill("#username", "jane_smith");
179
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
180
+ (0, _extendedTest.expect)(simplified).toContain("value=");
181
+ (0, _extendedTest.expect)(simplified).toContain('type="text"');
182
+ });
183
+ (0, _extendedTest.test)("should handle empty page", async () => {
184
+ await page.setContent(`
185
+ <html>
186
+ <body></body>
187
+ </html>
188
+ `);
189
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
190
+ (0, _extendedTest.expect)(simplified).toBe("");
191
+ });
192
+ (0, _extendedTest.test)("should handle complex nested structure", async () => {
193
+ await page.setContent(`
194
+ <html>
195
+ <body>
196
+ <div id="app">
197
+ <header>
198
+ <nav>
199
+ <a href="/" id="home">Home</a>
200
+ <a href="/about" id="about">About</a>
201
+ </nav>
202
+ </header>
203
+ <main>
204
+ <article>
205
+ <h1>Article Title</h1>
206
+ <p>Article content</p>
207
+ </article>
208
+ </main>
209
+ <footer>
210
+ <p>Copyright 2024</p>
211
+ </footer>
212
+ </div>
213
+ </body>
214
+ </html>
215
+ `);
216
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
217
+ (0, _extendedTest.expect)(simplified).toContain('id="app"');
218
+ (0, _extendedTest.expect)(simplified).toContain('href="/"');
219
+ (0, _extendedTest.expect)(simplified).toContain('href="/about"');
220
+ (0, _extendedTest.expect)(simplified).toContain("Article Title");
221
+ (0, _extendedTest.expect)(simplified).toContain("Article content");
222
+ (0, _extendedTest.expect)(simplified).toContain("Copyright 2024");
223
+ });
224
+ (0, _extendedTest.test)("should remove unnecessary wrapper divs", async () => {
225
+ await page.setContent(`
226
+ <html>
227
+ <body>
228
+ <div>
229
+ <div>
230
+ <div>
231
+ <button id="btn">Click</button>
232
+ </div>
233
+ </div>
234
+ </div>
235
+ </body>
236
+ </html>
237
+ `);
238
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
239
+ (0, _extendedTest.expect)(simplified).toContain('id="btn"');
240
+ (0, _extendedTest.expect)(simplified).toContain("Click");
241
+ (0, _extendedTest.expect)(simplified).toContain("<button");
242
+ });
243
+ (0, _extendedTest.test)("should work with Frame", async () => {
244
+ const iframeContent = `
245
+ <html>
246
+ <body>
247
+ <h2>Frame Content</h2>
248
+ <a href="/link" id="frame-link">Frame Link</a>
249
+ </body>
250
+ </html>
251
+ `;
252
+ await page.setContent(`
253
+ <html>
254
+ <body>
255
+ <h1>Main Page</h1>
256
+ <iframe id="test-frame" srcdoc='${iframeContent.replace(/'/g, "&apos;")}'></iframe>
257
+ </body>
258
+ </html>
259
+ `);
260
+ await page.waitForSelector("#test-frame");
261
+ await page.waitForTimeout(100);
262
+ const frameElement = await page.frame({
263
+ url: /about:srcdoc/
264
+ });
265
+ if (!frameElement) {
266
+ throw new Error("Frame not found");
267
+ }
268
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(frameElement);
269
+ (0, _extendedTest.expect)(simplified).toContain("Frame Content");
270
+ (0, _extendedTest.expect)(simplified).toContain("Frame Link");
271
+ (0, _extendedTest.expect)(simplified).toContain('href="/link"');
272
+ (0, _extendedTest.expect)(simplified).not.toContain("Main Page");
273
+ });
274
+ (0, _extendedTest.test)("should handle special characters in text", async () => {
275
+ await page.setContent(`
276
+ <html>
277
+ <body>
278
+ <div id="special">
279
+ <p>Text with &lt;special&gt; characters &amp; symbols</p>
280
+ <a href="/path?query=value&other=123">Link with query</a>
281
+ </div>
282
+ </body>
283
+ </html>
284
+ `);
285
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
286
+ (0, _extendedTest.expect)(simplified).toContain("special");
287
+ (0, _extendedTest.expect)(simplified).toContain("characters");
288
+ (0, _extendedTest.expect)(simplified).toContain("symbols");
289
+ });
290
+ (0, _extendedTest.test)("should return empty string for non-existent locator", async () => {
291
+ await page.setContent(`
292
+ <html>
293
+ <body>
294
+ <div id="exists">Content</div>
295
+ </body>
296
+ </html>
297
+ `);
298
+ const locator = page.locator("#does-not-exist").first();
299
+ page.setDefaultTimeout(2000);
300
+ await (0, _extendedTest.expect)((0, _getSimplifiedHtml.getSimplifiedHtml)(locator)).rejects.toThrow();
301
+ page.setDefaultTimeout(30000);
302
+ });
303
+ (0, _extendedTest.test)("should handle role attributes", async () => {
304
+ await page.setContent(`
305
+ <html>
306
+ <body>
307
+ <div role="navigation">
308
+ <a href="/">Home</a>
309
+ </div>
310
+ <div role="main">
311
+ <h1>Main Content</h1>
312
+ </div>
313
+ <div role="complementary">
314
+ <p>Sidebar</p>
315
+ </div>
316
+ </body>
317
+ </html>
318
+ `);
319
+ const simplified = await (0, _getSimplifiedHtml.getSimplifiedHtml)(page);
320
+ (0, _extendedTest.expect)(simplified).toContain('role="navigation"');
321
+ (0, _extendedTest.expect)(simplified).toContain('role="main"');
322
+ (0, _extendedTest.expect)(simplified).toContain('role="complementary"');
323
+ });
324
+ });
@@ -1044,11 +1044,11 @@ export interface Attachment {
1044
1044
  /** The S3 object key/path */
1045
1045
  key: string;
1046
1046
 
1047
- /** The S3 bucket name where the file is stored */
1048
- bucket: string;
1047
+ /** The S3 bucket name where the file is stored. Only present when using custom S3 storage. */
1048
+ bucket?: string;
1049
1049
 
1050
- /** The AWS region where the S3 bucket is located */
1051
- region: string;
1050
+ /** The AWS region where the S3 bucket is located. Only present when using custom S3 storage. */
1051
+ region?: string;
1052
1052
 
1053
1053
  /** Optional custom S3 endpoint URL. Defaults to undefined for standard AWS S3 */
1054
1054
  endpoint?: string | null;
@@ -1069,7 +1069,7 @@ export interface Attachment {
1069
1069
  /**
1070
1070
  * Converts the file metadata to a record.
1071
1071
  *
1072
- * @returns `Record<string, string>` - Record with fileName, key, bucket, region, endpoint, suggestedFileName, and fileType
1072
+ * @returns `Record<string, string>` - Record with fileName, key, suggestedFileName, fileType, and optionally bucket, region, endpoint, signedUrl
1073
1073
  */
1074
1074
  toDict(): Record<string, string>;
1075
1075
 
@@ -1080,13 +1080,6 @@ export interface Attachment {
1080
1080
  */
1081
1081
  getS3Key(): string;
1082
1082
 
1083
- /**
1084
- * Returns the file path/key within the S3 bucket.
1085
- *
1086
- * @returns `string` - The fileName property (S3 object key)
1087
- */
1088
- getFilePath(): string;
1089
-
1090
1083
  /**
1091
1084
  * Generates a presigned URL for secure, temporary access to the file.
1092
1085
  *
@@ -6,24 +6,33 @@ Object.defineProperty(exports, "__esModule", {
6
6
  exports.extractMarkdown = void 0;
7
7
  var _utils = require("./utils");
8
8
  var _locatorHelpers = require("../common/locatorHelpers");
9
+ var _frame_utils = require("../common/frame_utils");
9
10
  function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
10
11
  const extractMarkdown = async input => {
11
- var _handle;
12
12
  const {
13
13
  source
14
14
  } = input;
15
15
  const isPageSource = (0, _locatorHelpers.isPage)(source);
16
16
  const pageObject = isPageSource ? source : source.page();
17
17
  await (0, _utils.ensureBrowserScripts)(pageObject);
18
- let handle;
18
+ const htmlContent = await (0, _frame_utils.getContentWithNestedIframes)(source);
19
+ let md;
19
20
  if (isPageSource) {
20
- handle = await source.locator("body").elementHandle();
21
+ md = await pageObject.evaluate(html => {
22
+ const parser = new DOMParser();
23
+ const doc = parser.parseFromString(html, "text/html");
24
+ const body = doc.body || doc.documentElement;
25
+ return window.__INTUNED__.convertElementToMarkdown(body);
26
+ }, htmlContent);
21
27
  } else {
22
- handle = await source.elementHandle();
28
+ md = await pageObject.evaluate(html => {
29
+ var _doc$body;
30
+ const parser = new DOMParser();
31
+ const doc = parser.parseFromString(html, "text/html");
32
+ const element = ((_doc$body = doc.body) === null || _doc$body === void 0 ? void 0 : _doc$body.firstElementChild) || doc.documentElement.firstElementChild || doc.documentElement;
33
+ return window.__INTUNED__.convertElementToMarkdown(element);
34
+ }, htmlContent);
23
35
  }
24
- const md = await pageObject.evaluate(element => {
25
- return window.__INTUNED__.convertElementToMarkdown(element);
26
- }, (_handle = handle) === null || _handle === void 0 ? void 0 : _handle.asElement());
27
36
  const prettier = await Promise.resolve().then(() => _interopRequireWildcard(require("prettier/standalone")));
28
37
  const parserMarkdown = await Promise.resolve().then(() => _interopRequireWildcard(require("prettier/parser-markdown")));
29
38
  const formattedMarkdown = await prettier.format(md, {
@@ -1044,11 +1044,11 @@ export interface Attachment {
1044
1044
  /** The S3 object key/path */
1045
1045
  key: string;
1046
1046
 
1047
- /** The S3 bucket name where the file is stored */
1048
- bucket: string;
1047
+ /** The S3 bucket name where the file is stored. Only present when using custom S3 storage. */
1048
+ bucket?: string;
1049
1049
 
1050
- /** The AWS region where the S3 bucket is located */
1051
- region: string;
1050
+ /** The AWS region where the S3 bucket is located. Only present when using custom S3 storage. */
1051
+ region?: string;
1052
1052
 
1053
1053
  /** Optional custom S3 endpoint URL. Defaults to undefined for standard AWS S3 */
1054
1054
  endpoint?: string | null;
@@ -1069,7 +1069,7 @@ export interface Attachment {
1069
1069
  /**
1070
1070
  * Converts the file metadata to a record.
1071
1071
  *
1072
- * @returns `Record<string, string>` - Record with fileName, key, bucket, region, endpoint, suggestedFileName, and fileType
1072
+ * @returns `Record<string, string>` - Record with fileName, key, suggestedFileName, fileType, and optionally bucket, region, endpoint, signedUrl
1073
1073
  */
1074
1074
  toDict(): Record<string, string>;
1075
1075
 
@@ -1080,13 +1080,6 @@ export interface Attachment {
1080
1080
  */
1081
1081
  getS3Key(): string;
1082
1082
 
1083
- /**
1084
- * Returns the file path/key within the S3 bucket.
1085
- *
1086
- * @returns `string` - The fileName property (S3 object key)
1087
- */
1088
- getFilePath(): string;
1089
-
1090
1083
  /**
1091
1084
  * Generates a presigned URL for secure, temporary access to the file.
1092
1085
  *