@intuned/browser-dev 0.1.8-dev.0 → 0.1.10-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +85 -143
  2. package/dist/ai/export.d.ts +291 -143
  3. package/dist/ai/extractStructuredData.js +21 -27
  4. package/dist/ai/extractStructuredDataUsingAi.js +24 -1
  5. package/dist/ai/index.d.ts +291 -143
  6. package/dist/ai/tests/testCreateMatchesMapping.spec.js +216 -0
  7. package/dist/ai/tests/testExtractStructuredData.spec.js +348 -2
  8. package/dist/ai/tests/testExtractStructuredDataDomMatchingIframes.spec.js +459 -0
  9. package/dist/ai/tests/testExtractStructuredDataUnit.spec.js +375 -0
  10. package/dist/ai/tests/testMatching.spec.js +342 -0
  11. package/dist/ai/tests/testValidateMatchesMapping.spec.js +265 -0
  12. package/dist/common/Logger/index.js +2 -2
  13. package/dist/common/extendedTest.js +38 -30
  14. package/dist/common/frame_utils/frameTree.js +116 -0
  15. package/dist/common/frame_utils/getContentWithNestedIframes.js +13 -0
  16. package/dist/common/frame_utils/index.js +95 -0
  17. package/dist/common/frame_utils/stitchIframe.js +105 -0
  18. package/dist/{helpers → common}/frame_utils/tests/testFindAllIframes.spec.js +24 -15
  19. package/dist/common/frame_utils/tests/testGetContentWithNestedIframes.spec.js +241 -0
  20. package/dist/common/frame_utils/utils.js +91 -0
  21. package/dist/common/getSimplifiedHtml.js +20 -20
  22. package/dist/common/matching/matching.js +91 -16
  23. package/dist/common/tests/matching.test.js +225 -0
  24. package/dist/common/tests/testGetSimplifiedHtml.spec.js +324 -0
  25. package/dist/helpers/export.d.ts +702 -575
  26. package/dist/helpers/extractMarkdown.js +16 -7
  27. package/dist/helpers/index.d.ts +702 -575
  28. package/dist/helpers/tests/testExtractMarkdown.spec.js +29 -0
  29. package/dist/helpers/waitForDomSettled.js +4 -4
  30. package/dist/helpers/withNetworkSettledWait.js +2 -7
  31. package/dist/optimized-extractors/export.d.ts +17 -18
  32. package/dist/optimized-extractors/index.d.ts +17 -18
  33. package/dist/types/intuned-runtime.d.ts +6 -32
  34. package/how-to-generate-docs.md +40 -28
  35. package/package.json +2 -2
  36. package/dist/helpers/frame_utils/constants.js +0 -8
  37. package/dist/helpers/frame_utils/findAllIframes.js +0 -82
  38. package/dist/helpers/frame_utils/index.js +0 -44
  39. /package/dist/{helpers → common}/frame_utils/checkFrameAllowsAsyncScripts.js +0 -0
  40. /package/dist/{helpers → common}/frame_utils/getContainerFrame.js +0 -0
@@ -0,0 +1,459 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../../common/extendedTest");
4
+ var _vitest = require("vitest");
5
+ var _ = require("..");
6
+ var _playwright = require("playwright");
7
+ var _zod = require("zod");
8
+ const mockCacheInstance = _vitest.vi.hoisted(() => {
9
+ class MockCache {
10
+ storage = new Map();
11
+ getCalls = [];
12
+ setCalls = [];
13
+ cacheHits = 0;
14
+ cacheMisses = 0;
15
+ async get(key) {
16
+ this.getCalls.push(key);
17
+ const value = this.storage.get(key);
18
+ if (value !== undefined) {
19
+ this.cacheHits++;
20
+ } else {
21
+ this.cacheMisses++;
22
+ }
23
+ return value;
24
+ }
25
+ async set(key, value) {
26
+ this.setCalls.push([key, value]);
27
+ this.storage.set(key, value);
28
+ }
29
+ clear() {
30
+ this.storage.clear();
31
+ this.getCalls = [];
32
+ this.setCalls = [];
33
+ this.cacheHits = 0;
34
+ this.cacheMisses = 0;
35
+ }
36
+ }
37
+ return new MockCache();
38
+ });
39
+ _vitest.vi.mock("../../intunedServices/cache/cache", () => ({
40
+ cache: mockCacheInstance
41
+ }));
42
+ const ProductDataSchema = _zod.z.object({
43
+ name: _zod.z.string().describe("Product name"),
44
+ price: _zod.z.string().describe("Product price"),
45
+ stock: _zod.z.string().describe("Stock status")
46
+ });
47
+ const ListItemSchema = _zod.z.object({
48
+ title: _zod.z.string().describe("Item title"),
49
+ description: _zod.z.string().describe("Item description")
50
+ });
51
+ _extendedTest.describe.skip("DOM Matching with Iframes", () => {
52
+ let browser;
53
+ let page;
54
+ (0, _extendedTest.beforeAll)(async () => {
55
+ browser = await _playwright.chromium.launch({
56
+ headless: true
57
+ });
58
+ });
59
+ (0, _extendedTest.afterAll)(async () => {
60
+ await browser.close();
61
+ });
62
+ (0, _extendedTest.beforeEach)(async () => {
63
+ page = await browser.newPage();
64
+ mockCacheInstance.clear();
65
+ });
66
+ (0, _extendedTest.afterEach)(async () => {
67
+ await page.close();
68
+ });
69
+ (0, _extendedTest.test)("cache should be valid when element is added after iframe", async () => {
70
+ const initialHtml = `
71
+ <html>
72
+ <body>
73
+ <h1>Product Page</h1>
74
+ <iframe id="product-data" srcdoc='
75
+ <ul class="product-list">
76
+ <li class="product-item">
77
+ <span class="name">iPhone 15</span>
78
+ <span class="price">$999</span>
79
+ <span class="stock">In Stock</span>
80
+ </li>
81
+ </ul>
82
+ '></iframe>
83
+ </body>
84
+ </html>
85
+ `;
86
+ await page.setContent(initialHtml, {
87
+ waitUntil: "domcontentloaded"
88
+ });
89
+ await page.waitForTimeout(500);
90
+ const result1 = await (0, _.extractStructuredData)({
91
+ source: page,
92
+ dataSchema: ProductDataSchema,
93
+ strategy: "HTML",
94
+ prompt: "Extract the product information from the page",
95
+ enableDomMatching: true,
96
+ enableCache: true,
97
+ model: "claude-haiku-4-5-20251001",
98
+ apiKey: process.env.ANTHROPIC_API_KEY
99
+ });
100
+ (0, _extendedTest.expect)(result1).toBeTruthy();
101
+ const product1 = result1;
102
+ (0, _extendedTest.expect)(product1.name).toBe("iPhone 15");
103
+ (0, _extendedTest.expect)(product1.price).toBe("$999");
104
+ (0, _extendedTest.expect)(product1.stock).toBe("In Stock");
105
+ const modifiedHtml = `
106
+ <html>
107
+ <body>
108
+ <h1>Product Page</h1>
109
+ <iframe id="product-data" srcdoc='
110
+ <ul class="product-list">
111
+ <li class="product-item">
112
+ <span class="name">iPhone 15</span>
113
+ <span class="price">$999</span>
114
+ <span class="stock">In Stock</span>
115
+ </li>
116
+ </ul>
117
+ '></iframe>
118
+ <!-- NEW ELEMENT ADDED AFTER IFRAME -->
119
+ <footer>
120
+ <p>Footer content added</p>
121
+ </footer>
122
+ </body>
123
+ </html>
124
+ `;
125
+ await page.setContent(modifiedHtml, {
126
+ waitUntil: "domcontentloaded"
127
+ });
128
+ await page.waitForTimeout(500);
129
+ const result2 = await (0, _.extractStructuredData)({
130
+ source: page,
131
+ dataSchema: ProductDataSchema,
132
+ strategy: "HTML",
133
+ prompt: "Extract the product information from the page",
134
+ enableDomMatching: true,
135
+ enableCache: true,
136
+ model: "claude-haiku-4-5-20251001",
137
+ apiKey: process.env.ANTHROPIC_API_KEY
138
+ });
139
+ (0, _extendedTest.expect)(result2).toBeTruthy();
140
+ const product2 = result2;
141
+ (0, _extendedTest.expect)(product2.name).toBe("iPhone 15");
142
+ (0, _extendedTest.expect)(product2.price).toBe("$999");
143
+ (0, _extendedTest.expect)(product2.stock).toBe("In Stock");
144
+ (0, _extendedTest.expect)(product1).toEqual(product2);
145
+ (0, _extendedTest.expect)(mockCacheInstance.getCalls.length).toBe(2);
146
+ (0, _extendedTest.expect)(mockCacheInstance.setCalls.length).toBe(1);
147
+ (0, _extendedTest.expect)(mockCacheInstance.cacheHits).toBe(1);
148
+ (0, _extendedTest.expect)(mockCacheInstance.cacheMisses).toBe(1);
149
+ });
150
+ (0, _extendedTest.test)("cache should be invalid when element is added before target in iframe", async () => {
151
+ const initialHtml = `
152
+ <html>
153
+ <body>
154
+ <h1>Items Page</h1>
155
+ <iframe id="items-data" srcdoc='
156
+ <ul class="item-list">
157
+ <li class="item" data-id="target">
158
+ <span class="title">Target Item</span>
159
+ <span class="desc">This is the target item to extract</span>
160
+ </li>
161
+ </ul>
162
+ '></iframe>
163
+ </body>
164
+ </html>
165
+ `;
166
+ await page.setContent(initialHtml, {
167
+ waitUntil: "domcontentloaded"
168
+ });
169
+ await page.waitForTimeout(500);
170
+ const result1 = await (0, _.extractStructuredData)({
171
+ source: page,
172
+ dataSchema: ListItemSchema,
173
+ strategy: "HTML",
174
+ prompt: "Extract the item information",
175
+ enableDomMatching: true,
176
+ enableCache: true,
177
+ model: "claude-haiku-4-5-20251001",
178
+ apiKey: process.env.ANTHROPIC_API_KEY
179
+ });
180
+ (0, _extendedTest.expect)(result1).toBeTruthy();
181
+ const item1 = result1;
182
+ (0, _extendedTest.expect)(item1.title.toLowerCase()).toBe("target item");
183
+ (0, _extendedTest.expect)(item1.description.toLowerCase()).toBe("this is the target item to extract");
184
+ const modifiedHtml = `
185
+ <html>
186
+ <body>
187
+ <h1>Items Page</h1>
188
+ <iframe id="items-data" srcdoc='
189
+ <ul class="item-list">
190
+ <!-- NEW ELEMENT ADDED BEFORE TARGET -->
191
+ <li class="item" data-id="new">
192
+ <span class="title">New Item</span>
193
+ <span class="desc">This is a new item added before target</span>
194
+ </li>
195
+ <li class="item" data-id="target">
196
+ <span class="title">Target Item</span>
197
+ <span class="desc">This is the target item to extract</span>
198
+ </li>
199
+ </ul>
200
+ '></iframe>
201
+ </body>
202
+ </html>
203
+ `;
204
+ await page.setContent(modifiedHtml, {
205
+ waitUntil: "domcontentloaded"
206
+ });
207
+ await page.waitForTimeout(500);
208
+ const result2 = await (0, _.extractStructuredData)({
209
+ source: page,
210
+ dataSchema: ListItemSchema,
211
+ strategy: "HTML",
212
+ prompt: "Extract the item information",
213
+ enableDomMatching: true,
214
+ enableCache: true,
215
+ model: "claude-haiku-4-5-20251001",
216
+ apiKey: process.env.ANTHROPIC_API_KEY
217
+ });
218
+ (0, _extendedTest.expect)(result2).toBeTruthy();
219
+ const item2 = result2;
220
+ (0, _extendedTest.expect)(item2.title.toLowerCase() === "target item" || item2.title.toLowerCase() === "new item").toBe(true);
221
+ (0, _extendedTest.expect)(mockCacheInstance.getCalls.length).toBe(2);
222
+ (0, _extendedTest.expect)(mockCacheInstance.setCalls.length).toBe(2);
223
+ (0, _extendedTest.expect)(mockCacheInstance.cacheHits).toBe(1);
224
+ (0, _extendedTest.expect)(mockCacheInstance.cacheMisses).toBe(1);
225
+ });
226
+ (0, _extendedTest.test)("cache should be invalid when element is removed before target in iframe", async () => {
227
+ const initialHtml = `
228
+ <html>
229
+ <body>
230
+ <iframe id="items-data" srcdoc='
231
+ <ul class="item-list">
232
+ <li class="item" data-id="first">
233
+ <span class="title">First Item</span>
234
+ <span class="desc">First item description</span>
235
+ </li>
236
+ <li class="item" data-id="target">
237
+ <span class="title">Second Item (Target)</span>
238
+ <span class="desc">Target item description</span>
239
+ </li>
240
+ </ul>
241
+ '></iframe>
242
+ </body>
243
+ </html>
244
+ `;
245
+ await page.setContent(initialHtml, {
246
+ waitUntil: "domcontentloaded"
247
+ });
248
+ await page.waitForTimeout(500);
249
+ const result1 = await (0, _.extractStructuredData)({
250
+ source: page,
251
+ dataSchema: ListItemSchema,
252
+ strategy: "HTML",
253
+ prompt: "Extract the second item information (the target item)",
254
+ enableDomMatching: true,
255
+ enableCache: true,
256
+ model: "claude-haiku-4-5-20251001"
257
+ });
258
+ (0, _extendedTest.expect)(result1).toBeTruthy();
259
+ const item1 = result1;
260
+ (0, _extendedTest.expect)(item1.title.includes("Second Item") || item1.title.includes("Target")).toBe(true);
261
+ const modifiedHtml = `
262
+ <html>
263
+ <body>
264
+ <iframe id="items-data" srcdoc='
265
+ <ul class="item-list">
266
+ <!-- FIRST ITEM REMOVED -->
267
+ <li class="item" data-id="target">
268
+ <span class="title">Second Item (Target)</span>
269
+ <span class="desc">Target item description</span>
270
+ </li>
271
+ </ul>
272
+ '></iframe>
273
+ </body>
274
+ </html>
275
+ `;
276
+ await page.setContent(modifiedHtml, {
277
+ waitUntil: "domcontentloaded"
278
+ });
279
+ await page.waitForTimeout(500);
280
+ const result2 = await (0, _.extractStructuredData)({
281
+ source: page,
282
+ dataSchema: ListItemSchema,
283
+ strategy: "HTML",
284
+ prompt: "Extract the second item information (the target item)",
285
+ enableDomMatching: true,
286
+ enableCache: true,
287
+ model: "claude-haiku-4-5-20251001",
288
+ apiKey: process.env.ANTHROPIC_API_KEY
289
+ });
290
+ (0, _extendedTest.expect)(result2).toBeTruthy();
291
+ const item2 = result2;
292
+ (0, _extendedTest.expect)(item2.title.includes("Second Item") || item2.title.includes("Target")).toBe(true);
293
+ (0, _extendedTest.expect)(mockCacheInstance.getCalls.length).toBe(2);
294
+ (0, _extendedTest.expect)(mockCacheInstance.setCalls.length).toBe(2);
295
+ (0, _extendedTest.expect)(mockCacheInstance.cacheHits).toBe(1);
296
+ (0, _extendedTest.expect)(mockCacheInstance.cacheMisses).toBe(1);
297
+ });
298
+ (0, _extendedTest.test)("cache should be valid with nested iframes and trailing elements", async () => {
299
+ const initialHtml = `
300
+ <html>
301
+ <body>
302
+ <h1>Nested Iframe Test</h1>
303
+ <iframe id="outer" srcdoc='
304
+ <div class="outer-container">
305
+ <h2>Outer Frame</h2>
306
+ <iframe id="inner" srcdoc="
307
+ <ul class=&quot;product-list&quot;>
308
+ <li class=&quot;product&quot;>
309
+ <span class=&quot;name&quot;>Nested Product</span>
310
+ <span class=&quot;price&quot;>$599</span>
311
+ <span class=&quot;stock&quot;>Available</span>
312
+ </li>
313
+ </ul>
314
+ "></iframe>
315
+ </div>
316
+ '></iframe>
317
+ </body>
318
+ </html>
319
+ `;
320
+ await page.setContent(initialHtml, {
321
+ waitUntil: "domcontentloaded"
322
+ });
323
+ await page.waitForTimeout(1000);
324
+ const result1 = await (0, _.extractStructuredData)({
325
+ source: page,
326
+ dataSchema: ProductDataSchema,
327
+ strategy: "HTML",
328
+ prompt: "Extract the product information from the nested iframe",
329
+ enableDomMatching: true,
330
+ enableCache: true,
331
+ model: "claude-haiku-4-5-20251001",
332
+ apiKey: process.env.ANTHROPIC_API_KEY
333
+ });
334
+ (0, _extendedTest.expect)(result1).toBeTruthy();
335
+ const product1 = result1;
336
+ (0, _extendedTest.expect)(product1.name).toBe("Nested Product");
337
+ (0, _extendedTest.expect)(product1.price).toBe("$599");
338
+ const modifiedHtml = `
339
+ <html>
340
+ <body>
341
+ <h1>Nested Iframe Test</h1>
342
+ <iframe id="outer" srcdoc='
343
+ <div class="outer-container">
344
+ <h2>Outer Frame</h2>
345
+ <iframe id="inner" srcdoc="
346
+ <ul class=&quot;product-list&quot;>
347
+ <li class=&quot;product&quot;>
348
+ <span class=&quot;name&quot;>Nested Product</span>
349
+ <span class=&quot;price&quot;>$599</span>
350
+ <span class=&quot;stock&quot;>Available</span>
351
+ </li>
352
+ </ul>
353
+ "></iframe>
354
+ </div>
355
+ '></iframe>
356
+ <!-- NEW FOOTER ADDED AFTER ENTIRE NESTED STRUCTURE -->
357
+ <footer>
358
+ <p>Footer after nested iframes</p>
359
+ </footer>
360
+ </body>
361
+ </html>
362
+ `;
363
+ await page.setContent(modifiedHtml, {
364
+ waitUntil: "domcontentloaded"
365
+ });
366
+ await page.waitForTimeout(1000);
367
+ const result2 = await (0, _.extractStructuredData)({
368
+ source: page,
369
+ dataSchema: ProductDataSchema,
370
+ strategy: "HTML",
371
+ prompt: "Extract the product information from the nested iframe",
372
+ enableDomMatching: true,
373
+ enableCache: true,
374
+ model: "claude-haiku-4-5-20251001",
375
+ apiKey: process.env.ANTHROPIC_API_KEY
376
+ });
377
+ (0, _extendedTest.expect)(result2).toBeTruthy();
378
+ const product2 = result2;
379
+ (0, _extendedTest.expect)(product2).toEqual(product1);
380
+ (0, _extendedTest.expect)(mockCacheInstance.getCalls.length).toBe(2);
381
+ (0, _extendedTest.expect)(mockCacheInstance.setCalls.length).toBe(1);
382
+ (0, _extendedTest.expect)(mockCacheInstance.cacheHits).toBe(1);
383
+ (0, _extendedTest.expect)(mockCacheInstance.cacheMisses).toBe(1);
384
+ });
385
+ (0, _extendedTest.test)("cache with MARKDOWN strategy and iframes", async () => {
386
+ const initialHtml = `
387
+ <html>
388
+ <body>
389
+ <article>
390
+ <h1>Article Title</h1>
391
+ <iframe id="content" srcdoc='
392
+ <div class="article-content">
393
+ <p class="highlight">
394
+ <strong class="title">Key Point</strong>
395
+ <span class="desc">Important information here</span>
396
+ </p>
397
+ </div>
398
+ '></iframe>
399
+ </article>
400
+ </body>
401
+ </html>
402
+ `;
403
+ await page.setContent(initialHtml, {
404
+ waitUntil: "domcontentloaded"
405
+ });
406
+ await page.waitForTimeout(500);
407
+ const result1 = await (0, _.extractStructuredData)({
408
+ source: page,
409
+ dataSchema: ListItemSchema,
410
+ strategy: "MARKDOWN",
411
+ prompt: "Extract the title and description from the highlighted paragraph.",
412
+ enableDomMatching: true,
413
+ enableCache: true,
414
+ model: "claude-haiku-4-5-20251001"
415
+ });
416
+ (0, _extendedTest.expect)(result1).toBeTruthy();
417
+ const item1 = result1;
418
+ (0, _extendedTest.expect)(item1.title.toLowerCase()).toContain("article title");
419
+ (0, _extendedTest.expect)(item1.description.toLowerCase()).toContain("important information");
420
+ const modifiedHtml = `
421
+ <html>
422
+ <body>
423
+ <article>
424
+ <h1>Article Title</h1>
425
+ <iframe id="content" srcdoc='
426
+ <div class="article-content">
427
+ <p class="highlight">
428
+ <strong class="title">Key Point</strong>
429
+ <span class="desc">Important information here</span>
430
+ </p>
431
+ </div>
432
+ '></iframe>
433
+ </article>
434
+ <aside>Related articles</aside>
435
+ </body>
436
+ </html>
437
+ `;
438
+ await page.setContent(modifiedHtml, {
439
+ waitUntil: "domcontentloaded"
440
+ });
441
+ await page.waitForTimeout(500);
442
+ const result2 = await (0, _.extractStructuredData)({
443
+ source: page,
444
+ dataSchema: ListItemSchema,
445
+ strategy: "MARKDOWN",
446
+ prompt: "Extract the title and description from the highlighted paragraph.",
447
+ enableDomMatching: true,
448
+ enableCache: true,
449
+ model: "claude-haiku-4-5-20251001"
450
+ });
451
+ (0, _extendedTest.expect)(result2).toBeTruthy();
452
+ const item2 = result2;
453
+ (0, _extendedTest.expect)(item2).toEqual(item1);
454
+ (0, _extendedTest.expect)(mockCacheInstance.getCalls.length).toBe(2);
455
+ (0, _extendedTest.expect)(mockCacheInstance.setCalls.length).toBe(1);
456
+ (0, _extendedTest.expect)(mockCacheInstance.cacheHits).toBe(1);
457
+ (0, _extendedTest.expect)(mockCacheInstance.cacheMisses).toBe(1);
458
+ });
459
+ });