@intuned/browser-dev 0.1.9-dev.0 → 0.1.10-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/dist/ai/extractStructuredData.js +21 -27
  2. package/dist/ai/tests/testCreateMatchesMapping.spec.js +216 -0
  3. package/dist/ai/tests/testExtractStructuredData.spec.js +346 -0
  4. package/dist/ai/tests/testExtractStructuredDataDomMatchingIframes.spec.js +459 -0
  5. package/dist/ai/tests/testExtractStructuredDataUnit.spec.js +375 -0
  6. package/dist/ai/tests/testMatching.spec.js +342 -0
  7. package/dist/ai/tests/testValidateMatchesMapping.spec.js +265 -0
  8. package/dist/common/extendedTest.js +38 -30
  9. package/dist/common/frame_utils/frameTree.js +116 -0
  10. package/dist/common/frame_utils/getContentWithNestedIframes.js +13 -0
  11. package/dist/common/frame_utils/index.js +95 -0
  12. package/dist/common/frame_utils/stitchIframe.js +105 -0
  13. package/dist/{helpers → common}/frame_utils/tests/testFindAllIframes.spec.js +24 -15
  14. package/dist/common/frame_utils/tests/testGetContentWithNestedIframes.spec.js +241 -0
  15. package/dist/common/frame_utils/utils.js +91 -0
  16. package/dist/common/getSimplifiedHtml.js +20 -20
  17. package/dist/common/matching/matching.js +91 -16
  18. package/dist/common/tests/matching.test.js +225 -0
  19. package/dist/common/tests/testGetSimplifiedHtml.spec.js +324 -0
  20. package/dist/helpers/extractMarkdown.js +16 -7
  21. package/dist/helpers/tests/testExtractMarkdown.spec.js +29 -0
  22. package/dist/helpers/waitForDomSettled.js +4 -4
  23. package/dist/types/intuned-runtime.d.ts +6 -32
  24. package/package.json +1 -1
  25. package/dist/helpers/frame_utils/constants.js +0 -8
  26. package/dist/helpers/frame_utils/findAllIframes.js +0 -82
  27. package/dist/helpers/frame_utils/index.js +0 -44
  28. /package/dist/{helpers → common}/frame_utils/checkFrameAllowsAsyncScripts.js +0 -0
  29. /package/dist/{helpers → common}/frame_utils/getContainerFrame.js +0 -0
@@ -15,7 +15,8 @@ var _getSimplifiedHtml = require("../common/getSimplifiedHtml");
15
15
  var _hashObject = require("../common/hashObject");
16
16
  var _Logger = require("../common/Logger");
17
17
  var _helpers = require("../helpers");
18
- var _xpathMapping = require("../common/xpathMapping");
18
+ var _frame_utils = require("../common/frame_utils");
19
+ var _matching = require("../common/matching/matching");
19
20
  const extractStructuredData = async options => {
20
21
  if ("content" in options && !("source" in options)) {
21
22
  return await extractStructuredDataFromContent(options);
@@ -57,11 +58,7 @@ const extractStructuredData = async options => {
57
58
  }
58
59
  let cacheKey = "";
59
60
  if (validatedData.strategy === "HTML") {
60
- const containerHandle = isPageInput ? await pageOrLocator.locator("html").elementHandle() : await pageOrLocator.elementHandle();
61
- if (!containerHandle) {
62
- throw new Error("No HTML content found in the specified region.");
63
- }
64
- const simplifiedHtml = await (0, _getSimplifiedHtml.getSimplifiedHtml)(containerHandle);
61
+ const simplifiedHtml = await (0, _frame_utils.getContentWithNestedIframes)(pageOrLocator, 10000, _getSimplifiedHtml.getSimplifiedHtml);
65
62
  if (validatedData.enableCache) {
66
63
  cacheKey = (0, _hashObject.hashObject)({
67
64
  pageUrl: pageObject.url(),
@@ -76,7 +73,7 @@ const extractStructuredData = async options => {
76
73
  }, true);
77
74
  const cachedResult = await _cache.cache.get(cacheKey);
78
75
  if (validatedData.enableDomMatching && cachedResult && cachedResult.matchesMapping) {
79
- const isValid = await (0, _xpathMapping.validateXPathMapping)(pageObject, cachedResult.matchesMapping);
76
+ const isValid = await (0, _matching.validateMatchesMapping)(pageObject, cachedResult.matchesMapping);
80
77
  if (isValid) {
81
78
  _Logger.logger.info("Returning cached result with valid DOM matching");
82
79
  return cachedResult.result;
@@ -106,9 +103,10 @@ const extractStructuredData = async options => {
106
103
  if (!validatedData.enableDomMatching) {
107
104
  await _cache.cache.set(cacheKey, result.value.result);
108
105
  } else {
106
+ const domValidationHash = await (0, _matching.createMatchesMapping)(pageObject, result.value.result);
109
107
  const resultsToCache = {
110
108
  result: result.value.result,
111
- matchesMapping: result.value.xpathMapping || {}
109
+ matchesMapping: domValidationHash
112
110
  };
113
111
  await _cache.cache.set(cacheKey, resultsToCache);
114
112
  }
@@ -132,7 +130,7 @@ const extractStructuredData = async options => {
132
130
  }, true);
133
131
  const cachedResult = await _cache.cache.get(cacheKey);
134
132
  if (validatedData.enableDomMatching && cachedResult && cachedResult.matchesMapping) {
135
- const isValid = await (0, _xpathMapping.validateXPathMapping)(pageObject, cachedResult.matchesMapping);
133
+ const isValid = await (0, _matching.validateMatchesMapping)(pageObject, cachedResult.matchesMapping);
136
134
  if (isValid) {
137
135
  _Logger.logger.info("Returning cached result with valid DOM matching");
138
136
  return cachedResult.result;
@@ -168,9 +166,10 @@ const extractStructuredData = async options => {
168
166
  if (!validatedData.enableDomMatching) {
169
167
  await _cache.cache.set(cacheKey, result.value.result);
170
168
  } else {
169
+ const domValidationHash = await (0, _matching.createMatchesMapping)(pageObject, result.value.result);
171
170
  const resultsToCache = {
172
171
  result: result.value.result,
173
- matchesMapping: result.value.xpathMapping || {}
172
+ matchesMapping: domValidationHash
174
173
  };
175
174
  await _cache.cache.set(cacheKey, resultsToCache);
176
175
  }
@@ -178,13 +177,8 @@ const extractStructuredData = async options => {
178
177
  return result.value.result;
179
178
  }
180
179
  if (validatedData.strategy === "MARKDOWN") {
181
- const containerHandle = isPageInput ? await pageOrLocator.locator("html").elementHandle() : await pageOrLocator.elementHandle();
182
- const html = await (containerHandle === null || containerHandle === void 0 ? void 0 : containerHandle.innerHTML());
183
- if (!html) {
184
- throw new Error("No HTML content found in the specified region.");
185
- }
186
180
  const markdown = await (0, _helpers.extractMarkdown)({
187
- source: pageObject
181
+ source: pageOrLocator
188
182
  });
189
183
  if (validatedData.enableCache) {
190
184
  cacheKey = (0, _hashObject.hashObject)({
@@ -200,14 +194,13 @@ const extractStructuredData = async options => {
200
194
  })
201
195
  }, true);
202
196
  const cachedResult = await _cache.cache.get(cacheKey);
203
- if (enableDomMatching && cachedResult && cachedResult.matchesMapping) {
204
- const cachedXpathMapping = cachedResult.matchesMapping;
205
- const isValid = await (0, _xpathMapping.validateXPathMapping)(pageObject, cachedXpathMapping);
197
+ if (validatedData.enableDomMatching && cachedResult && cachedResult.matchesMapping) {
198
+ const isValid = await (0, _matching.validateMatchesMapping)(pageObject, cachedResult.matchesMapping);
206
199
  if (isValid) {
207
200
  _Logger.logger.info("Returning cached result with valid DOM matching");
208
201
  return cachedResult.result;
209
202
  }
210
- } else if (cachedResult && !enableDomMatching) {
203
+ } else if (cachedResult && !validatedData.enableDomMatching) {
211
204
  _Logger.logger.info("Returning cached result");
212
205
  return cachedResult;
213
206
  }
@@ -229,15 +222,16 @@ const extractStructuredData = async options => {
229
222
  throw new Error(result.error.context);
230
223
  }
231
224
  if (validatedData.enableCache) {
232
- if (!enableDomMatching) {
225
+ if (!validatedData.enableDomMatching) {
233
226
  await _cache.cache.set(cacheKey, result.value.result);
234
- return result.value.result;
227
+ } else {
228
+ const domValidationHash = await (0, _matching.createMatchesMapping)(pageObject, result.value.result);
229
+ const resultsToCache = {
230
+ result: result.value.result,
231
+ matchesMapping: domValidationHash
232
+ };
233
+ await _cache.cache.set(cacheKey, resultsToCache);
235
234
  }
236
- const resultsToCache = {
237
- result: result.value.result,
238
- matchesMapping: result.value.xpathMapping || {}
239
- };
240
- await _cache.cache.set(cacheKey, resultsToCache);
241
235
  }
242
236
  return result.value.result;
243
237
  }
@@ -0,0 +1,216 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../../common/extendedTest");
4
+ var _playwright = require("playwright");
5
+ var _matching = require("../../common/matching/matching");
6
+ const SIMPLE_PRODUCT_HTML = `
7
+ <html>
8
+ <body>
9
+ <div class="product">
10
+ <h2 class="title">iPhone 14 Pro</h2>
11
+ <div class="price">$999</div>
12
+ <div class="stock">In Stock</div>
13
+ </div>
14
+ </body>
15
+ </html>
16
+ `;
17
+ const MULTIPLE_PRODUCTS_HTML = `
18
+ <html>
19
+ <body>
20
+ <div class="product" id="p1">
21
+ <h2 class="title">iPhone 14 Pro</h2>
22
+ <div class="price">$999</div>
23
+ </div>
24
+ <div class="product" id="p2">
25
+ <h2 class="title">MacBook Air M2</h2>
26
+ <div class="price">$1199</div>
27
+ </div>
28
+ </body>
29
+ </html>
30
+ `;
31
+ const PRODUCT_WITH_IFRAME_HTML = `
32
+ <html>
33
+ <body>
34
+ <div class="product">
35
+ <h2 class="title">iPhone 14 Pro</h2>
36
+ <div class="price">$999</div>
37
+ </div>
38
+ <iframe id="details-frame" srcdoc='
39
+ <html>
40
+ <body>
41
+ <div class="stock">In Stock</div>
42
+ <div class="rating">4.5 stars</div>
43
+ </body>
44
+ </html>
45
+ '></iframe>
46
+ </body>
47
+ </html>
48
+ `;
49
+ const DUPLICATE_TEXT_HTML = `
50
+ <html>
51
+ <body>
52
+ <div class="header">
53
+ <span class="price">$999</span>
54
+ </div>
55
+ <div class="product">
56
+ <h2 class="title">iPhone 14 Pro</h2>
57
+ <div class="price">$999</div>
58
+ </div>
59
+ <div class="footer">
60
+ <span class="disclaimer">Starting at $999</span>
61
+ </div>
62
+ </body>
63
+ </html>
64
+ `;
65
+ (0, _extendedTest.describe)("createMatchesMapping", () => {
66
+ let browser;
67
+ let page;
68
+ (0, _extendedTest.beforeAll)(async () => {
69
+ browser = await _playwright.chromium.launch({
70
+ headless: true
71
+ });
72
+ });
73
+ (0, _extendedTest.afterAll)(async () => {
74
+ await browser.close();
75
+ });
76
+ (0, _extendedTest.beforeEach)(async () => {
77
+ page = await browser.newPage();
78
+ });
79
+ (0, _extendedTest.afterEach)(async () => {
80
+ await page.close();
81
+ });
82
+ (0, _extendedTest.test)("should create mapping for dict data", async () => {
83
+ await page.setContent(SIMPLE_PRODUCT_HTML);
84
+ const extractedData = {
85
+ title: "iPhone 14 Pro",
86
+ price: "$999",
87
+ stock: "In Stock"
88
+ };
89
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
90
+ (0, _extendedTest.expect)(mapping).toHaveProperty("iPhone 14 Pro");
91
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$999");
92
+ (0, _extendedTest.expect)(mapping).toHaveProperty("In Stock");
93
+ for (const [_value, matches] of Object.entries(mapping)) {
94
+ (0, _extendedTest.expect)(Array.isArray(matches)).toBe(true);
95
+ (0, _extendedTest.expect)(matches.length).toBeGreaterThan(0);
96
+ for (const match of matches) {
97
+ (0, _extendedTest.expect)(match).toHaveProperty("xpath");
98
+ (0, _extendedTest.expect)(match).toHaveProperty("matched_value");
99
+ (0, _extendedTest.expect)(typeof match.xpath).toBe("string");
100
+ (0, _extendedTest.expect)(typeof match.matched_value).toBe("string");
101
+ }
102
+ }
103
+ (0, _extendedTest.expect)(mapping["iPhone 14 Pro"][0].matched_value).toBe("iPhone 14 Pro");
104
+ (0, _extendedTest.expect)(mapping["$999"][0].matched_value).toBe("$999");
105
+ (0, _extendedTest.expect)(mapping["In Stock"][0].matched_value).toBe("In Stock");
106
+ });
107
+ (0, _extendedTest.test)("should create mapping for list of strings", async () => {
108
+ await page.setContent(SIMPLE_PRODUCT_HTML);
109
+ const extractedData = ["iPhone 14 Pro", "$999", "In Stock"];
110
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
111
+ (0, _extendedTest.expect)(Object.keys(mapping).length).toBe(3);
112
+ (0, _extendedTest.expect)(mapping).toHaveProperty("iPhone 14 Pro");
113
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$999");
114
+ (0, _extendedTest.expect)(mapping).toHaveProperty("In Stock");
115
+ (0, _extendedTest.expect)(mapping["iPhone 14 Pro"][0].matched_value).toBe("iPhone 14 Pro");
116
+ (0, _extendedTest.expect)(mapping["$999"][0].matched_value).toBe("$999");
117
+ });
118
+ (0, _extendedTest.test)("should create mapping for list of dicts", async () => {
119
+ await page.setContent(MULTIPLE_PRODUCTS_HTML);
120
+ const extractedData = [{
121
+ title: "iPhone 14 Pro",
122
+ price: "$999"
123
+ }, {
124
+ title: "MacBook Air M2",
125
+ price: "$1199"
126
+ }];
127
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
128
+ (0, _extendedTest.expect)(mapping).toHaveProperty("iPhone 14 Pro");
129
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$999");
130
+ (0, _extendedTest.expect)(mapping).toHaveProperty("MacBook Air M2");
131
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$1199");
132
+ (0, _extendedTest.expect)(mapping["iPhone 14 Pro"].length).toBeGreaterThanOrEqual(1);
133
+ (0, _extendedTest.expect)(mapping["MacBook Air M2"].length).toBeGreaterThanOrEqual(1);
134
+ (0, _extendedTest.expect)(mapping["iPhone 14 Pro"][0].matched_value).toBe("iPhone 14 Pro");
135
+ (0, _extendedTest.expect)(mapping["MacBook Air M2"][0].matched_value).toBe("MacBook Air M2");
136
+ });
137
+ (0, _extendedTest.test)("should include matches from iframe content", async () => {
138
+ await page.setContent(PRODUCT_WITH_IFRAME_HTML);
139
+ await page.waitForSelector("#details-frame");
140
+ const extractedData = {
141
+ title: "iPhone 14 Pro",
142
+ price: "$999",
143
+ stock: "In Stock",
144
+ rating: "4.5 stars"
145
+ };
146
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
147
+ (0, _extendedTest.expect)(mapping).toHaveProperty("iPhone 14 Pro");
148
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$999");
149
+ (0, _extendedTest.expect)(mapping).toHaveProperty("In Stock");
150
+ (0, _extendedTest.expect)(mapping).toHaveProperty("4.5 stars");
151
+ (0, _extendedTest.expect)(mapping["In Stock"].length).toBeGreaterThan(0);
152
+ (0, _extendedTest.expect)(mapping["4.5 stars"].length).toBeGreaterThan(0);
153
+ (0, _extendedTest.expect)(mapping["In Stock"][0].matched_value).toBe("In Stock");
154
+ (0, _extendedTest.expect)(mapping["4.5 stars"][0].matched_value).toBe("4.5 stars");
155
+ });
156
+ (0, _extendedTest.test)("should handle duplicate text in multiple locations", async () => {
157
+ await page.setContent(DUPLICATE_TEXT_HTML);
158
+ const extractedData = {
159
+ title: "iPhone 14 Pro",
160
+ price: "$999"
161
+ };
162
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
163
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$999");
164
+ const priceMatches = mapping["$999"];
165
+ (0, _extendedTest.expect)(priceMatches.length).toBeGreaterThanOrEqual(2);
166
+ const xpaths = priceMatches.map(match => match.xpath);
167
+ const uniqueXpaths = new Set(xpaths);
168
+ (0, _extendedTest.expect)(uniqueXpaths.size).toBeGreaterThanOrEqual(2);
169
+ (0, _extendedTest.expect)(priceMatches.every(match => match.matched_value === "$999")).toBe(true);
170
+ (0, _extendedTest.expect)(priceMatches.every(match => match.xpath.length > 0)).toBe(true);
171
+ });
172
+ (0, _extendedTest.test)("should handle empty extraction data", async () => {
173
+ await page.setContent(SIMPLE_PRODUCT_HTML);
174
+ let mapping = await (0, _matching.createMatchesMapping)(page, {});
175
+ (0, _extendedTest.expect)(mapping).toEqual({});
176
+ (0, _extendedTest.expect)(Object.keys(mapping).length).toBe(0);
177
+ mapping = await (0, _matching.createMatchesMapping)(page, []);
178
+ (0, _extendedTest.expect)(mapping).toEqual({});
179
+ (0, _extendedTest.expect)(Object.keys(mapping).length).toBe(0);
180
+ });
181
+ (0, _extendedTest.test)("should handle nonexistent text in DOM", async () => {
182
+ await page.setContent(SIMPLE_PRODUCT_HTML);
183
+ const extractedData = {
184
+ title: "Samsung Galaxy S24",
185
+ price: "$899"
186
+ };
187
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
188
+ (0, _extendedTest.expect)(mapping).toHaveProperty("Samsung Galaxy S24");
189
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$899");
190
+ (0, _extendedTest.expect)(mapping["Samsung Galaxy S24"].length).toBe(0);
191
+ (0, _extendedTest.expect)(mapping["$899"].length).toBe(0);
192
+ (0, _extendedTest.expect)(Object.keys(mapping).length).toBe(2);
193
+ (0, _extendedTest.expect)(Array.isArray(mapping["Samsung Galaxy S24"])).toBe(true);
194
+ (0, _extendedTest.expect)(Array.isArray(mapping["$899"])).toBe(true);
195
+ });
196
+ (0, _extendedTest.test)("should have consistent structure across different data types", async () => {
197
+ await page.setContent(SIMPLE_PRODUCT_HTML);
198
+ const dictData = {
199
+ title: "iPhone 14 Pro"
200
+ };
201
+ const dictMapping = await (0, _matching.createMatchesMapping)(page, dictData);
202
+ const listData = ["iPhone 14 Pro"];
203
+ const listMapping = await (0, _matching.createMatchesMapping)(page, listData);
204
+ (0, _extendedTest.expect)(dictMapping).toHaveProperty("iPhone 14 Pro");
205
+ (0, _extendedTest.expect)(listMapping).toHaveProperty("iPhone 14 Pro");
206
+ const dictMatches = dictMapping["iPhone 14 Pro"];
207
+ const listMatches = listMapping["iPhone 14 Pro"];
208
+ (0, _extendedTest.expect)(dictMatches.length).toBe(listMatches.length);
209
+ for (let i = 0; i < dictMatches.length; i++) {
210
+ (0, _extendedTest.expect)(dictMatches[i].xpath).toBe(listMatches[i].xpath);
211
+ (0, _extendedTest.expect)(dictMatches[i].matched_value).toBe(listMatches[i].matched_value);
212
+ }
213
+ (0, _extendedTest.expect)(dictMatches[0].matched_value).toBe("iPhone 14 Pro");
214
+ (0, _extendedTest.expect)(listMatches[0].matched_value).toBe("iPhone 14 Pro");
215
+ });
216
+ });