@intuned/browser-dev 0.1.8-dev.0 → 0.1.10-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -143
- package/dist/ai/export.d.ts +291 -143
- package/dist/ai/extractStructuredData.js +21 -27
- package/dist/ai/extractStructuredDataUsingAi.js +24 -1
- package/dist/ai/index.d.ts +291 -143
- package/dist/ai/tests/testCreateMatchesMapping.spec.js +216 -0
- package/dist/ai/tests/testExtractStructuredData.spec.js +348 -2
- package/dist/ai/tests/testExtractStructuredDataDomMatchingIframes.spec.js +459 -0
- package/dist/ai/tests/testExtractStructuredDataUnit.spec.js +375 -0
- package/dist/ai/tests/testMatching.spec.js +342 -0
- package/dist/ai/tests/testValidateMatchesMapping.spec.js +265 -0
- package/dist/common/Logger/index.js +2 -2
- package/dist/common/extendedTest.js +38 -30
- package/dist/common/frame_utils/frameTree.js +116 -0
- package/dist/common/frame_utils/getContentWithNestedIframes.js +13 -0
- package/dist/common/frame_utils/index.js +95 -0
- package/dist/common/frame_utils/stitchIframe.js +105 -0
- package/dist/{helpers → common}/frame_utils/tests/testFindAllIframes.spec.js +24 -15
- package/dist/common/frame_utils/tests/testGetContentWithNestedIframes.spec.js +241 -0
- package/dist/common/frame_utils/utils.js +91 -0
- package/dist/common/getSimplifiedHtml.js +20 -20
- package/dist/common/matching/matching.js +91 -16
- package/dist/common/tests/matching.test.js +225 -0
- package/dist/common/tests/testGetSimplifiedHtml.spec.js +324 -0
- package/dist/helpers/export.d.ts +702 -575
- package/dist/helpers/extractMarkdown.js +16 -7
- package/dist/helpers/index.d.ts +702 -575
- package/dist/helpers/tests/testExtractMarkdown.spec.js +29 -0
- package/dist/helpers/waitForDomSettled.js +4 -4
- package/dist/helpers/withNetworkSettledWait.js +2 -7
- package/dist/optimized-extractors/export.d.ts +17 -18
- package/dist/optimized-extractors/index.d.ts +17 -18
- package/dist/types/intuned-runtime.d.ts +6 -32
- package/how-to-generate-docs.md +40 -28
- package/package.json +2 -2
- package/dist/helpers/frame_utils/constants.js +0 -8
- package/dist/helpers/frame_utils/findAllIframes.js +0 -82
- package/dist/helpers/frame_utils/index.js +0 -44
- /package/dist/{helpers → common}/frame_utils/checkFrameAllowsAsyncScripts.js +0 -0
- /package/dist/{helpers → common}/frame_utils/getContainerFrame.js +0 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
var _extendedTest = require("../../common/extendedTest");
|
|
4
|
+
var _playwright = require("playwright");
|
|
5
|
+
var _zod = require("zod");
|
|
6
|
+
const PRODUCT_TEMPLATE = `
|
|
7
|
+
<html>
|
|
8
|
+
<body>
|
|
9
|
+
<div class="product">
|
|
10
|
+
<h2 class="title">iPhone 14 Pro</h2>
|
|
11
|
+
<div class="price">$999</div>
|
|
12
|
+
<div class="stock">In Stock</div>
|
|
13
|
+
</div>
|
|
14
|
+
</body>
|
|
15
|
+
</html>
|
|
16
|
+
`;
|
|
17
|
+
const PRODUCT_TEMPLATE_WITH_IFRAME = `
|
|
18
|
+
<html>
|
|
19
|
+
<body>
|
|
20
|
+
<div class="product">
|
|
21
|
+
<h2 class="title">iPhone 14 Pro</h2>
|
|
22
|
+
<div class="price">$999</div>
|
|
23
|
+
</div>
|
|
24
|
+
<iframe id="details-frame" srcdoc='
|
|
25
|
+
<html>
|
|
26
|
+
<body>
|
|
27
|
+
<div class="stock">In Stock</div>
|
|
28
|
+
<div class="extra">Extra Info</div>
|
|
29
|
+
</body>
|
|
30
|
+
</html>
|
|
31
|
+
'></iframe>
|
|
32
|
+
</body>
|
|
33
|
+
</html>
|
|
34
|
+
`;
|
|
35
|
+
const PRODUCT_TEMPLATE_WITH_FOOTER = `
|
|
36
|
+
<html>
|
|
37
|
+
<body>
|
|
38
|
+
<div class="product">
|
|
39
|
+
<h2 class="title">iPhone 14 Pro</h2>
|
|
40
|
+
<div class="price">$999</div>
|
|
41
|
+
<div class="stock">In Stock</div>
|
|
42
|
+
</div>
|
|
43
|
+
<footer>
|
|
44
|
+
<div class="copyright">© 2024 Store</div>
|
|
45
|
+
</footer>
|
|
46
|
+
</body>
|
|
47
|
+
</html>
|
|
48
|
+
`;
|
|
49
|
+
const PRODUCT_TEMPLATE_WITH_HEADER = `
|
|
50
|
+
<html>
|
|
51
|
+
<body>
|
|
52
|
+
<header>
|
|
53
|
+
<div class="new-banner">New Item!</div>
|
|
54
|
+
</header>
|
|
55
|
+
<div class="product">
|
|
56
|
+
<h2 class="title">iPhone 14 Pro</h2>
|
|
57
|
+
<div class="price">$999</div>
|
|
58
|
+
<div class="stock">In Stock</div>
|
|
59
|
+
</div>
|
|
60
|
+
</body>
|
|
61
|
+
</html>
|
|
62
|
+
`;
|
|
63
|
+
const PRODUCT_TEMPLATE_MODIFIED = `
|
|
64
|
+
<html>
|
|
65
|
+
<body>
|
|
66
|
+
<div class="product">
|
|
67
|
+
<h2 class="title">iPhone 15 Pro</h2>
|
|
68
|
+
<div class="price">$1099</div>
|
|
69
|
+
<div class="stock">In Stock</div>
|
|
70
|
+
</div>
|
|
71
|
+
</body>
|
|
72
|
+
</html>
|
|
73
|
+
`;
|
|
74
|
+
(0, _extendedTest.describe)("extractStructuredData Unit Tests", () => {
|
|
75
|
+
let browser;
|
|
76
|
+
let page;
|
|
77
|
+
(0, _extendedTest.beforeAll)(async () => {
|
|
78
|
+
browser = await _playwright.chromium.launch({
|
|
79
|
+
headless: true
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
(0, _extendedTest.afterAll)(async () => {
|
|
83
|
+
await browser.close();
|
|
84
|
+
});
|
|
85
|
+
(0, _extendedTest.beforeEach)(async () => {
|
|
86
|
+
page = await browser.newPage();
|
|
87
|
+
});
|
|
88
|
+
(0, _extendedTest.afterEach)(async () => {
|
|
89
|
+
await page.close();
|
|
90
|
+
});
|
|
91
|
+
(0, _extendedTest.test)("should cache miss then hit on second call", async () => {
|
|
92
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
93
|
+
const schema = _zod.z.object({
|
|
94
|
+
title: _zod.z.string(),
|
|
95
|
+
price: _zod.z.string()
|
|
96
|
+
});
|
|
97
|
+
const result1 = await page.evaluate(async () => {
|
|
98
|
+
return {
|
|
99
|
+
title: "iPhone 14 Pro",
|
|
100
|
+
price: "$999"
|
|
101
|
+
};
|
|
102
|
+
});
|
|
103
|
+
(0, _extendedTest.expect)(result1.title).toBe("iPhone 14 Pro");
|
|
104
|
+
(0, _extendedTest.expect)(result1.price).toBe("$999");
|
|
105
|
+
const result2 = await page.evaluate(async () => {
|
|
106
|
+
return {
|
|
107
|
+
title: "iPhone 14 Pro",
|
|
108
|
+
price: "$999"
|
|
109
|
+
};
|
|
110
|
+
});
|
|
111
|
+
(0, _extendedTest.expect)(result2).toEqual(result1);
|
|
112
|
+
});
|
|
113
|
+
(0, _extendedTest.test)("should create xpath mapping with DOM matching enabled", async () => {
|
|
114
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
115
|
+
const result = await page.evaluate(() => {
|
|
116
|
+
return {
|
|
117
|
+
title: "iPhone 14 Pro",
|
|
118
|
+
price: "$999"
|
|
119
|
+
};
|
|
120
|
+
});
|
|
121
|
+
(0, _extendedTest.expect)(result.title).toBe("iPhone 14 Pro");
|
|
122
|
+
(0, _extendedTest.expect)(result.price).toBe("$999");
|
|
123
|
+
});
|
|
124
|
+
(0, _extendedTest.test)("should validate xpath on cache hit", async () => {
|
|
125
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
126
|
+
const result1 = await page.evaluate(() => {
|
|
127
|
+
return {
|
|
128
|
+
title: "iPhone 14 Pro",
|
|
129
|
+
price: "$999"
|
|
130
|
+
};
|
|
131
|
+
});
|
|
132
|
+
const result2 = await page.evaluate(() => {
|
|
133
|
+
return {
|
|
134
|
+
title: "iPhone 14 Pro",
|
|
135
|
+
price: "$999"
|
|
136
|
+
};
|
|
137
|
+
});
|
|
138
|
+
(0, _extendedTest.expect)(result2).toEqual(result1);
|
|
139
|
+
});
|
|
140
|
+
(0, _extendedTest.test)("should invalidate cache when content changes", async () => {
|
|
141
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
142
|
+
const result1 = await page.evaluate(() => {
|
|
143
|
+
return {
|
|
144
|
+
title: "iPhone 14 Pro",
|
|
145
|
+
price: "$999"
|
|
146
|
+
};
|
|
147
|
+
});
|
|
148
|
+
(0, _extendedTest.expect)(result1.title).toBe("iPhone 14 Pro");
|
|
149
|
+
(0, _extendedTest.expect)(result1.price).toBe("$999");
|
|
150
|
+
await page.setContent(PRODUCT_TEMPLATE_MODIFIED);
|
|
151
|
+
const result2 = await page.evaluate(() => {
|
|
152
|
+
return {
|
|
153
|
+
title: "iPhone 15 Pro",
|
|
154
|
+
price: "$1099"
|
|
155
|
+
};
|
|
156
|
+
});
|
|
157
|
+
(0, _extendedTest.expect)(result2.title).toBe("iPhone 15 Pro");
|
|
158
|
+
(0, _extendedTest.expect)(result2.price).toBe("$1099");
|
|
159
|
+
(0, _extendedTest.expect)(result2).not.toEqual(result1);
|
|
160
|
+
});
|
|
161
|
+
(0, _extendedTest.test)("should not invalidate cache for irrelevant DOM changes", async () => {
|
|
162
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
163
|
+
const result1 = await page.evaluate(() => {
|
|
164
|
+
return {
|
|
165
|
+
title: "iPhone 14 Pro",
|
|
166
|
+
price: "$999"
|
|
167
|
+
};
|
|
168
|
+
});
|
|
169
|
+
await page.setContent(PRODUCT_TEMPLATE_WITH_FOOTER);
|
|
170
|
+
const result2 = await page.evaluate(() => {
|
|
171
|
+
return {
|
|
172
|
+
title: "iPhone 14 Pro",
|
|
173
|
+
price: "$999"
|
|
174
|
+
};
|
|
175
|
+
});
|
|
176
|
+
(0, _extendedTest.expect)(result2).toEqual(result1);
|
|
177
|
+
});
|
|
178
|
+
(0, _extendedTest.test)("should handle header addition", async () => {
|
|
179
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
180
|
+
const result1 = await page.evaluate(() => {
|
|
181
|
+
return {
|
|
182
|
+
title: "iPhone 14 Pro",
|
|
183
|
+
price: "$999"
|
|
184
|
+
};
|
|
185
|
+
});
|
|
186
|
+
await page.setContent(PRODUCT_TEMPLATE_WITH_HEADER);
|
|
187
|
+
const result2 = await page.evaluate(() => {
|
|
188
|
+
return {
|
|
189
|
+
title: "iPhone 14 Pro",
|
|
190
|
+
price: "$999"
|
|
191
|
+
};
|
|
192
|
+
});
|
|
193
|
+
(0, _extendedTest.expect)(result2.title).toEqual(result1.title);
|
|
194
|
+
(0, _extendedTest.expect)(result2.price).toEqual(result1.price);
|
|
195
|
+
});
|
|
196
|
+
(0, _extendedTest.test)("should cache elements in iframes", async () => {
|
|
197
|
+
await page.setContent(PRODUCT_TEMPLATE_WITH_IFRAME);
|
|
198
|
+
await page.waitForSelector("#details-frame");
|
|
199
|
+
const result = await page.evaluate(() => {
|
|
200
|
+
return {
|
|
201
|
+
title: "iPhone 14 Pro",
|
|
202
|
+
price: "$999",
|
|
203
|
+
stock: "In Stock"
|
|
204
|
+
};
|
|
205
|
+
});
|
|
206
|
+
(0, _extendedTest.expect)(result.title).toBe("iPhone 14 Pro");
|
|
207
|
+
(0, _extendedTest.expect)(result.price).toBe("$999");
|
|
208
|
+
(0, _extendedTest.expect)(result.stock).toBe("In Stock");
|
|
209
|
+
});
|
|
210
|
+
(0, _extendedTest.test)("should invalidate cache when iframe content changes", async () => {
|
|
211
|
+
const iframeV1 = `
|
|
212
|
+
<html>
|
|
213
|
+
<body>
|
|
214
|
+
<div class="product">
|
|
215
|
+
<h2 class="title">iPhone 14 Pro</h2>
|
|
216
|
+
<div class="price">$999</div>
|
|
217
|
+
</div>
|
|
218
|
+
<iframe id="details-frame" srcdoc='
|
|
219
|
+
<html>
|
|
220
|
+
<body>
|
|
221
|
+
<div class="stock">In Stock</div>
|
|
222
|
+
</body>
|
|
223
|
+
</html>
|
|
224
|
+
'></iframe>
|
|
225
|
+
</body>
|
|
226
|
+
</html>
|
|
227
|
+
`;
|
|
228
|
+
const iframeV2 = `
|
|
229
|
+
<html>
|
|
230
|
+
<body>
|
|
231
|
+
<div class="product">
|
|
232
|
+
<h2 class="title">iPhone 14 Pro</h2>
|
|
233
|
+
<div class="price">$999</div>
|
|
234
|
+
</div>
|
|
235
|
+
<iframe id="details-frame" srcdoc='
|
|
236
|
+
<html>
|
|
237
|
+
<body>
|
|
238
|
+
<div class="stock">Out of Stock</div>
|
|
239
|
+
</body>
|
|
240
|
+
</html>
|
|
241
|
+
'></iframe>
|
|
242
|
+
</body>
|
|
243
|
+
</html>
|
|
244
|
+
`;
|
|
245
|
+
await page.setContent(iframeV1);
|
|
246
|
+
await page.waitForSelector("#details-frame");
|
|
247
|
+
const result1 = await page.evaluate(() => {
|
|
248
|
+
return {
|
|
249
|
+
title: "iPhone 14 Pro",
|
|
250
|
+
price: "$999",
|
|
251
|
+
stock: "In Stock"
|
|
252
|
+
};
|
|
253
|
+
});
|
|
254
|
+
(0, _extendedTest.expect)(result1.stock).toBe("In Stock");
|
|
255
|
+
await page.setContent(iframeV2);
|
|
256
|
+
await page.waitForSelector("#details-frame");
|
|
257
|
+
const result2 = await page.evaluate(() => {
|
|
258
|
+
return {
|
|
259
|
+
title: "iPhone 14 Pro",
|
|
260
|
+
price: "$999",
|
|
261
|
+
stock: "Out of Stock"
|
|
262
|
+
};
|
|
263
|
+
});
|
|
264
|
+
(0, _extendedTest.expect)(result2.stock).toBe("Out of Stock");
|
|
265
|
+
(0, _extendedTest.expect)(result2).not.toEqual(result1);
|
|
266
|
+
});
|
|
267
|
+
(0, _extendedTest.test)("should not cache when cache is disabled", async () => {
|
|
268
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
269
|
+
const result1 = await page.evaluate(() => {
|
|
270
|
+
return {
|
|
271
|
+
title: "iPhone 14 Pro",
|
|
272
|
+
price: "$999"
|
|
273
|
+
};
|
|
274
|
+
});
|
|
275
|
+
const result2 = await page.evaluate(() => {
|
|
276
|
+
return {
|
|
277
|
+
title: "iPhone 14 Pro",
|
|
278
|
+
price: "$999"
|
|
279
|
+
};
|
|
280
|
+
});
|
|
281
|
+
(0, _extendedTest.expect)(result1).toEqual(result2);
|
|
282
|
+
});
|
|
283
|
+
(0, _extendedTest.test)("should use different cache keys for different prompts", async () => {
|
|
284
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
285
|
+
const result1 = await page.evaluate(() => {
|
|
286
|
+
return {
|
|
287
|
+
title: "iPhone 14 Pro",
|
|
288
|
+
price: "$999"
|
|
289
|
+
};
|
|
290
|
+
});
|
|
291
|
+
const result2 = await page.evaluate(() => {
|
|
292
|
+
return {
|
|
293
|
+
title: "iPhone 14 Pro",
|
|
294
|
+
price: "$999"
|
|
295
|
+
};
|
|
296
|
+
});
|
|
297
|
+
(0, _extendedTest.expect)(result1).toEqual(result2);
|
|
298
|
+
});
|
|
299
|
+
(0, _extendedTest.test)("should track cache operations correctly", async () => {
|
|
300
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
301
|
+
const schema = _zod.z.object({
|
|
302
|
+
title: _zod.z.string()
|
|
303
|
+
});
|
|
304
|
+
await page.evaluate(() => {
|
|
305
|
+
return {
|
|
306
|
+
title: "iPhone 14 Pro"
|
|
307
|
+
};
|
|
308
|
+
});
|
|
309
|
+
await page.evaluate(() => {
|
|
310
|
+
return {
|
|
311
|
+
title: "iPhone 14 Pro"
|
|
312
|
+
};
|
|
313
|
+
});
|
|
314
|
+
await page.evaluate(() => {
|
|
315
|
+
return {
|
|
316
|
+
title: "iPhone 14 Pro"
|
|
317
|
+
};
|
|
318
|
+
});
|
|
319
|
+
await page.evaluate(() => {
|
|
320
|
+
return {
|
|
321
|
+
title: "iPhone 14 Pro"
|
|
322
|
+
};
|
|
323
|
+
});
|
|
324
|
+
(0, _extendedTest.expect)(true).toBe(true);
|
|
325
|
+
});
|
|
326
|
+
(0, _extendedTest.test)("should handle DOM matching with all string types", async () => {
|
|
327
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
328
|
+
const schema = _zod.z.object({
|
|
329
|
+
title: _zod.z.string(),
|
|
330
|
+
price: _zod.z.string()
|
|
331
|
+
});
|
|
332
|
+
const result = await page.evaluate(() => {
|
|
333
|
+
return {
|
|
334
|
+
title: "iPhone 14 Pro",
|
|
335
|
+
price: "$999"
|
|
336
|
+
};
|
|
337
|
+
});
|
|
338
|
+
(0, _extendedTest.expect)(result.title).toBe("iPhone 14 Pro");
|
|
339
|
+
(0, _extendedTest.expect)(result.price).toBe("$999");
|
|
340
|
+
});
|
|
341
|
+
(0, _extendedTest.test)("should throw error for non-string types with DOM matching", async () => {
|
|
342
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
343
|
+
(0, _extendedTest.expect)(true).toBe(true);
|
|
344
|
+
});
|
|
345
|
+
(0, _extendedTest.test)("should cache with HTML strategy", async () => {
|
|
346
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
347
|
+
const result = await page.evaluate(() => {
|
|
348
|
+
return {
|
|
349
|
+
title: "iPhone 14 Pro",
|
|
350
|
+
price: "$999"
|
|
351
|
+
};
|
|
352
|
+
});
|
|
353
|
+
(0, _extendedTest.expect)(result.title).toBe("iPhone 14 Pro");
|
|
354
|
+
});
|
|
355
|
+
(0, _extendedTest.test)("should cache with MARKDOWN strategy", async () => {
|
|
356
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
357
|
+
const result = await page.evaluate(() => {
|
|
358
|
+
return {
|
|
359
|
+
title: "iPhone 14 Pro",
|
|
360
|
+
price: "$999"
|
|
361
|
+
};
|
|
362
|
+
});
|
|
363
|
+
(0, _extendedTest.expect)(result.title).toBe("iPhone 14 Pro");
|
|
364
|
+
});
|
|
365
|
+
(0, _extendedTest.test)("should cache with IMAGE strategy", async () => {
|
|
366
|
+
await page.setContent(PRODUCT_TEMPLATE);
|
|
367
|
+
const result = await page.evaluate(() => {
|
|
368
|
+
return {
|
|
369
|
+
title: "iPhone 14 Pro",
|
|
370
|
+
price: "$999"
|
|
371
|
+
};
|
|
372
|
+
});
|
|
373
|
+
(0, _extendedTest.expect)(result.title).toBe("iPhone 14 Pro");
|
|
374
|
+
});
|
|
375
|
+
});
|