@intuned/browser-dev 0.1.8-dev.0 → 0.1.10-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +85 -143
  2. package/dist/ai/export.d.ts +291 -143
  3. package/dist/ai/extractStructuredData.js +21 -27
  4. package/dist/ai/extractStructuredDataUsingAi.js +24 -1
  5. package/dist/ai/index.d.ts +291 -143
  6. package/dist/ai/tests/testCreateMatchesMapping.spec.js +216 -0
  7. package/dist/ai/tests/testExtractStructuredData.spec.js +348 -2
  8. package/dist/ai/tests/testExtractStructuredDataDomMatchingIframes.spec.js +459 -0
  9. package/dist/ai/tests/testExtractStructuredDataUnit.spec.js +375 -0
  10. package/dist/ai/tests/testMatching.spec.js +342 -0
  11. package/dist/ai/tests/testValidateMatchesMapping.spec.js +265 -0
  12. package/dist/common/Logger/index.js +2 -2
  13. package/dist/common/extendedTest.js +38 -30
  14. package/dist/common/frame_utils/frameTree.js +116 -0
  15. package/dist/common/frame_utils/getContentWithNestedIframes.js +13 -0
  16. package/dist/common/frame_utils/index.js +95 -0
  17. package/dist/common/frame_utils/stitchIframe.js +105 -0
  18. package/dist/{helpers → common}/frame_utils/tests/testFindAllIframes.spec.js +24 -15
  19. package/dist/common/frame_utils/tests/testGetContentWithNestedIframes.spec.js +241 -0
  20. package/dist/common/frame_utils/utils.js +91 -0
  21. package/dist/common/getSimplifiedHtml.js +20 -20
  22. package/dist/common/matching/matching.js +91 -16
  23. package/dist/common/tests/matching.test.js +225 -0
  24. package/dist/common/tests/testGetSimplifiedHtml.spec.js +324 -0
  25. package/dist/helpers/export.d.ts +702 -575
  26. package/dist/helpers/extractMarkdown.js +16 -7
  27. package/dist/helpers/index.d.ts +702 -575
  28. package/dist/helpers/tests/testExtractMarkdown.spec.js +29 -0
  29. package/dist/helpers/waitForDomSettled.js +4 -4
  30. package/dist/helpers/withNetworkSettledWait.js +2 -7
  31. package/dist/optimized-extractors/export.d.ts +17 -18
  32. package/dist/optimized-extractors/index.d.ts +17 -18
  33. package/dist/types/intuned-runtime.d.ts +6 -32
  34. package/how-to-generate-docs.md +40 -28
  35. package/package.json +2 -2
  36. package/dist/helpers/frame_utils/constants.js +0 -8
  37. package/dist/helpers/frame_utils/findAllIframes.js +0 -82
  38. package/dist/helpers/frame_utils/index.js +0 -44
  39. /package/dist/{helpers → common}/frame_utils/checkFrameAllowsAsyncScripts.js +0 -0
  40. /package/dist/{helpers → common}/frame_utils/getContainerFrame.js +0 -0
@@ -0,0 +1,216 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../../common/extendedTest");
4
+ var _playwright = require("playwright");
5
+ var _matching = require("../../common/matching/matching");
6
+ const SIMPLE_PRODUCT_HTML = `
7
+ <html>
8
+ <body>
9
+ <div class="product">
10
+ <h2 class="title">iPhone 14 Pro</h2>
11
+ <div class="price">$999</div>
12
+ <div class="stock">In Stock</div>
13
+ </div>
14
+ </body>
15
+ </html>
16
+ `;
17
+ const MULTIPLE_PRODUCTS_HTML = `
18
+ <html>
19
+ <body>
20
+ <div class="product" id="p1">
21
+ <h2 class="title">iPhone 14 Pro</h2>
22
+ <div class="price">$999</div>
23
+ </div>
24
+ <div class="product" id="p2">
25
+ <h2 class="title">MacBook Air M2</h2>
26
+ <div class="price">$1199</div>
27
+ </div>
28
+ </body>
29
+ </html>
30
+ `;
31
+ const PRODUCT_WITH_IFRAME_HTML = `
32
+ <html>
33
+ <body>
34
+ <div class="product">
35
+ <h2 class="title">iPhone 14 Pro</h2>
36
+ <div class="price">$999</div>
37
+ </div>
38
+ <iframe id="details-frame" srcdoc='
39
+ <html>
40
+ <body>
41
+ <div class="stock">In Stock</div>
42
+ <div class="rating">4.5 stars</div>
43
+ </body>
44
+ </html>
45
+ '></iframe>
46
+ </body>
47
+ </html>
48
+ `;
49
+ const DUPLICATE_TEXT_HTML = `
50
+ <html>
51
+ <body>
52
+ <div class="header">
53
+ <span class="price">$999</span>
54
+ </div>
55
+ <div class="product">
56
+ <h2 class="title">iPhone 14 Pro</h2>
57
+ <div class="price">$999</div>
58
+ </div>
59
+ <div class="footer">
60
+ <span class="disclaimer">Starting at $999</span>
61
+ </div>
62
+ </body>
63
+ </html>
64
+ `;
65
+ (0, _extendedTest.describe)("createMatchesMapping", () => {
66
+ let browser;
67
+ let page;
68
+ (0, _extendedTest.beforeAll)(async () => {
69
+ browser = await _playwright.chromium.launch({
70
+ headless: true
71
+ });
72
+ });
73
+ (0, _extendedTest.afterAll)(async () => {
74
+ await browser.close();
75
+ });
76
+ (0, _extendedTest.beforeEach)(async () => {
77
+ page = await browser.newPage();
78
+ });
79
+ (0, _extendedTest.afterEach)(async () => {
80
+ await page.close();
81
+ });
82
+ (0, _extendedTest.test)("should create mapping for dict data", async () => {
83
+ await page.setContent(SIMPLE_PRODUCT_HTML);
84
+ const extractedData = {
85
+ title: "iPhone 14 Pro",
86
+ price: "$999",
87
+ stock: "In Stock"
88
+ };
89
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
90
+ (0, _extendedTest.expect)(mapping).toHaveProperty("iPhone 14 Pro");
91
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$999");
92
+ (0, _extendedTest.expect)(mapping).toHaveProperty("In Stock");
93
+ for (const [_value, matches] of Object.entries(mapping)) {
94
+ (0, _extendedTest.expect)(Array.isArray(matches)).toBe(true);
95
+ (0, _extendedTest.expect)(matches.length).toBeGreaterThan(0);
96
+ for (const match of matches) {
97
+ (0, _extendedTest.expect)(match).toHaveProperty("xpath");
98
+ (0, _extendedTest.expect)(match).toHaveProperty("matched_value");
99
+ (0, _extendedTest.expect)(typeof match.xpath).toBe("string");
100
+ (0, _extendedTest.expect)(typeof match.matched_value).toBe("string");
101
+ }
102
+ }
103
+ (0, _extendedTest.expect)(mapping["iPhone 14 Pro"][0].matched_value).toBe("iPhone 14 Pro");
104
+ (0, _extendedTest.expect)(mapping["$999"][0].matched_value).toBe("$999");
105
+ (0, _extendedTest.expect)(mapping["In Stock"][0].matched_value).toBe("In Stock");
106
+ });
107
+ (0, _extendedTest.test)("should create mapping for list of strings", async () => {
108
+ await page.setContent(SIMPLE_PRODUCT_HTML);
109
+ const extractedData = ["iPhone 14 Pro", "$999", "In Stock"];
110
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
111
+ (0, _extendedTest.expect)(Object.keys(mapping).length).toBe(3);
112
+ (0, _extendedTest.expect)(mapping).toHaveProperty("iPhone 14 Pro");
113
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$999");
114
+ (0, _extendedTest.expect)(mapping).toHaveProperty("In Stock");
115
+ (0, _extendedTest.expect)(mapping["iPhone 14 Pro"][0].matched_value).toBe("iPhone 14 Pro");
116
+ (0, _extendedTest.expect)(mapping["$999"][0].matched_value).toBe("$999");
117
+ });
118
+ (0, _extendedTest.test)("should create mapping for list of dicts", async () => {
119
+ await page.setContent(MULTIPLE_PRODUCTS_HTML);
120
+ const extractedData = [{
121
+ title: "iPhone 14 Pro",
122
+ price: "$999"
123
+ }, {
124
+ title: "MacBook Air M2",
125
+ price: "$1199"
126
+ }];
127
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
128
+ (0, _extendedTest.expect)(mapping).toHaveProperty("iPhone 14 Pro");
129
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$999");
130
+ (0, _extendedTest.expect)(mapping).toHaveProperty("MacBook Air M2");
131
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$1199");
132
+ (0, _extendedTest.expect)(mapping["iPhone 14 Pro"].length).toBeGreaterThanOrEqual(1);
133
+ (0, _extendedTest.expect)(mapping["MacBook Air M2"].length).toBeGreaterThanOrEqual(1);
134
+ (0, _extendedTest.expect)(mapping["iPhone 14 Pro"][0].matched_value).toBe("iPhone 14 Pro");
135
+ (0, _extendedTest.expect)(mapping["MacBook Air M2"][0].matched_value).toBe("MacBook Air M2");
136
+ });
137
+ (0, _extendedTest.test)("should include matches from iframe content", async () => {
138
+ await page.setContent(PRODUCT_WITH_IFRAME_HTML);
139
+ await page.waitForSelector("#details-frame");
140
+ const extractedData = {
141
+ title: "iPhone 14 Pro",
142
+ price: "$999",
143
+ stock: "In Stock",
144
+ rating: "4.5 stars"
145
+ };
146
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
147
+ (0, _extendedTest.expect)(mapping).toHaveProperty("iPhone 14 Pro");
148
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$999");
149
+ (0, _extendedTest.expect)(mapping).toHaveProperty("In Stock");
150
+ (0, _extendedTest.expect)(mapping).toHaveProperty("4.5 stars");
151
+ (0, _extendedTest.expect)(mapping["In Stock"].length).toBeGreaterThan(0);
152
+ (0, _extendedTest.expect)(mapping["4.5 stars"].length).toBeGreaterThan(0);
153
+ (0, _extendedTest.expect)(mapping["In Stock"][0].matched_value).toBe("In Stock");
154
+ (0, _extendedTest.expect)(mapping["4.5 stars"][0].matched_value).toBe("4.5 stars");
155
+ });
156
+ (0, _extendedTest.test)("should handle duplicate text in multiple locations", async () => {
157
+ await page.setContent(DUPLICATE_TEXT_HTML);
158
+ const extractedData = {
159
+ title: "iPhone 14 Pro",
160
+ price: "$999"
161
+ };
162
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
163
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$999");
164
+ const priceMatches = mapping["$999"];
165
+ (0, _extendedTest.expect)(priceMatches.length).toBeGreaterThanOrEqual(2);
166
+ const xpaths = priceMatches.map(match => match.xpath);
167
+ const uniqueXpaths = new Set(xpaths);
168
+ (0, _extendedTest.expect)(uniqueXpaths.size).toBeGreaterThanOrEqual(2);
169
+ (0, _extendedTest.expect)(priceMatches.every(match => match.matched_value === "$999")).toBe(true);
170
+ (0, _extendedTest.expect)(priceMatches.every(match => match.xpath.length > 0)).toBe(true);
171
+ });
172
+ (0, _extendedTest.test)("should handle empty extraction data", async () => {
173
+ await page.setContent(SIMPLE_PRODUCT_HTML);
174
+ let mapping = await (0, _matching.createMatchesMapping)(page, {});
175
+ (0, _extendedTest.expect)(mapping).toEqual({});
176
+ (0, _extendedTest.expect)(Object.keys(mapping).length).toBe(0);
177
+ mapping = await (0, _matching.createMatchesMapping)(page, []);
178
+ (0, _extendedTest.expect)(mapping).toEqual({});
179
+ (0, _extendedTest.expect)(Object.keys(mapping).length).toBe(0);
180
+ });
181
+ (0, _extendedTest.test)("should handle nonexistent text in DOM", async () => {
182
+ await page.setContent(SIMPLE_PRODUCT_HTML);
183
+ const extractedData = {
184
+ title: "Samsung Galaxy S24",
185
+ price: "$899"
186
+ };
187
+ const mapping = await (0, _matching.createMatchesMapping)(page, extractedData);
188
+ (0, _extendedTest.expect)(mapping).toHaveProperty("Samsung Galaxy S24");
189
+ (0, _extendedTest.expect)(mapping).toHaveProperty("$899");
190
+ (0, _extendedTest.expect)(mapping["Samsung Galaxy S24"].length).toBe(0);
191
+ (0, _extendedTest.expect)(mapping["$899"].length).toBe(0);
192
+ (0, _extendedTest.expect)(Object.keys(mapping).length).toBe(2);
193
+ (0, _extendedTest.expect)(Array.isArray(mapping["Samsung Galaxy S24"])).toBe(true);
194
+ (0, _extendedTest.expect)(Array.isArray(mapping["$899"])).toBe(true);
195
+ });
196
+ (0, _extendedTest.test)("should have consistent structure across different data types", async () => {
197
+ await page.setContent(SIMPLE_PRODUCT_HTML);
198
+ const dictData = {
199
+ title: "iPhone 14 Pro"
200
+ };
201
+ const dictMapping = await (0, _matching.createMatchesMapping)(page, dictData);
202
+ const listData = ["iPhone 14 Pro"];
203
+ const listMapping = await (0, _matching.createMatchesMapping)(page, listData);
204
+ (0, _extendedTest.expect)(dictMapping).toHaveProperty("iPhone 14 Pro");
205
+ (0, _extendedTest.expect)(listMapping).toHaveProperty("iPhone 14 Pro");
206
+ const dictMatches = dictMapping["iPhone 14 Pro"];
207
+ const listMatches = listMapping["iPhone 14 Pro"];
208
+ (0, _extendedTest.expect)(dictMatches.length).toBe(listMatches.length);
209
+ for (let i = 0; i < dictMatches.length; i++) {
210
+ (0, _extendedTest.expect)(dictMatches[i].xpath).toBe(listMatches[i].xpath);
211
+ (0, _extendedTest.expect)(dictMatches[i].matched_value).toBe(listMatches[i].matched_value);
212
+ }
213
+ (0, _extendedTest.expect)(dictMatches[0].matched_value).toBe("iPhone 14 Pro");
214
+ (0, _extendedTest.expect)(listMatches[0].matched_value).toBe("iPhone 14 Pro");
215
+ });
216
+ });
@@ -267,8 +267,8 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
267
267
  prompt: getPromptVariation(sharedPrompts.markdownStrategy),
268
268
  enableDomMatching: false,
269
269
  strategy: "MARKDOWN",
270
- model: "claude-3-7-sonnet-latest",
271
- apiKey: process.env.ANTHROPIC_API_KEY
270
+ model: "gpt-4o",
271
+ apiKey: process.env.OPENAI_API_KEY
272
272
  });
273
273
  (0, _extendedTest.expect)(data).toHaveProperty("title", "The Future of AI in 2024");
274
274
  (0, _extendedTest.expect)(data).toHaveProperty("author", "John Doe");
@@ -620,6 +620,352 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
620
620
  });
621
621
  });
622
622
  });
623
+ (0, _extendedTest.describe)("Iframe Support", () => {
624
+ (0, _extendedTest.test)("should extract data from content within iframes", async () => {
625
+ const iframeContent = `
626
+ <html>
627
+ <body>
628
+ <div class="product">
629
+ <h2 class="title">iPhone 14 Pro</h2>
630
+ <div class="price">$999</div>
631
+ <div class="stock">In Stock</div>
632
+ </div>
633
+ </body>
634
+ </html>
635
+ `;
636
+ const mainPageHtml = `
637
+ <html>
638
+ <body>
639
+ <h1>Product Catalog</h1>
640
+ <iframe id="product-frame" srcdoc='${iframeContent.replace(/'/g, "&apos;")}'></iframe>
641
+ <p>Additional content</p>
642
+ </body>
643
+ </html>
644
+ `;
645
+ const schema = {
646
+ type: "object",
647
+ properties: {
648
+ title: {
649
+ type: "string"
650
+ },
651
+ price: {
652
+ type: "string"
653
+ }
654
+ },
655
+ required: ["title", "price"]
656
+ };
657
+ await page.setContent(mainPageHtml);
658
+ await page.waitForSelector("#product-frame");
659
+ await page.waitForTimeout(100);
660
+ const result = await (0, _.extractStructuredData)({
661
+ source: page,
662
+ dataSchema: schema,
663
+ prompt: "Extract product title and price from anywhere on the page including iframes.",
664
+ strategy: "HTML",
665
+ apiKey: process.env.ANTHROPIC_API_KEY,
666
+ enableDomMatching: false
667
+ });
668
+ (0, _extendedTest.expect)(result.title).toBe("iPhone 14 Pro");
669
+ (0, _extendedTest.expect)(result.price).toBe("$999");
670
+ });
671
+ (0, _extendedTest.test)("should work with nested iframes", async () => {
672
+ const innerIframeContent = `
673
+ <html>
674
+ <body>
675
+ <div class="nested-content">
676
+ <span class="nested-title">Nested Product</span>
677
+ <span class="nested-price">$500</span>
678
+ </div>
679
+ </body>
680
+ </html>
681
+ `;
682
+ const outerIframeContent = `
683
+ <html>
684
+ <body>
685
+ <h2>Outer Frame</h2>
686
+ <iframe id="inner-iframe" srcdoc='${innerIframeContent.replace(/'/g, "&apos;")}'></iframe>
687
+ </body>
688
+ </html>
689
+ `;
690
+ const mainPageHtml = `
691
+ <html>
692
+ <body>
693
+ <h1>Main Content</h1>
694
+ <iframe id="outer-iframe" srcdoc='${outerIframeContent.replace(/'/g, "&apos;")}'></iframe>
695
+ </body>
696
+ </html>
697
+ `;
698
+ const schema = {
699
+ type: "object",
700
+ properties: {
701
+ title: {
702
+ type: "string"
703
+ },
704
+ price: {
705
+ type: "string"
706
+ }
707
+ },
708
+ required: ["title", "price"]
709
+ };
710
+ await page.setContent(mainPageHtml);
711
+ await page.waitForSelector("#outer-iframe");
712
+ await page.waitForTimeout(200);
713
+ const result = await (0, _.extractStructuredData)({
714
+ source: page,
715
+ dataSchema: schema,
716
+ prompt: "Extract nested-title as title and nested-price as price from the page including all iframes.",
717
+ strategy: "HTML",
718
+ apiKey: process.env.ANTHROPIC_API_KEY,
719
+ enableDomMatching: false
720
+ });
721
+ (0, _extendedTest.expect)(result.title).toBe("Nested Product");
722
+ (0, _extendedTest.expect)(result.price).toBe("$500");
723
+ });
724
+ (0, _extendedTest.test)("should extract data from iframes using MARKDOWN strategy", async () => {
725
+ const iframeContent = `
726
+ <html>
727
+ <body>
728
+ <div class="metadata">
729
+ <time datetime="2024-03-15">March 15, 2024</time>
730
+ <span class="read-time">8 min read</span>
731
+ </div>
732
+ <div class="tags">
733
+ <span class="tag">AI</span>
734
+ <span class="tag">Technology</span>
735
+ <span class="tag">Future</span>
736
+ </div>
737
+ </body>
738
+ </html>
739
+ `;
740
+ const mainPageHtml = `
741
+ <html>
742
+ <body>
743
+ <article class="blog-post">
744
+ <header>
745
+ <h1>The Future of AI in 2024</h1>
746
+ <div class="metadata">
747
+ <span class="author">John Doe</span>
748
+ </div>
749
+ </header>
750
+ </article>
751
+ <iframe id="article-iframe" srcdoc='${iframeContent.replace(/'/g, "&apos;")}'></iframe>
752
+ </body>
753
+ </html>
754
+ `;
755
+ await page.setContent(mainPageHtml);
756
+ await page.waitForSelector("#article-iframe");
757
+ await page.waitForTimeout(100);
758
+ const result = await (0, _.extractStructuredData)({
759
+ source: page,
760
+ dataSchema: {
761
+ type: "object",
762
+ properties: {
763
+ title: {
764
+ type: "string"
765
+ },
766
+ author: {
767
+ type: "string"
768
+ },
769
+ date: {
770
+ type: "string"
771
+ },
772
+ readTime: {
773
+ type: "string"
774
+ },
775
+ tags: {
776
+ type: "array",
777
+ items: {
778
+ type: "string"
779
+ },
780
+ description: "Article tags - each tag must be a separate string in the array, do not combine multiple tags into one string"
781
+ }
782
+ },
783
+ required: ["title", "author", "tags"]
784
+ },
785
+ prompt: "Extract article metadata including title, author, date, read time, and tags from both main page and iframe content. IMPORTANT: Extract each tag as a separate item in the tags array. Do not combine multiple tags into one.",
786
+ strategy: "MARKDOWN",
787
+ apiKey: process.env.ANTHROPIC_API_KEY,
788
+ enableDomMatching: false
789
+ });
790
+ (0, _extendedTest.expect)(result.title).toBe("The Future of AI in 2024");
791
+ (0, _extendedTest.expect)(result.author).toBe("John Doe");
792
+ (0, _extendedTest.expect)(result.date).toBe("March 15, 2024");
793
+ (0, _extendedTest.expect)(result.readTime).toBe("8 min read");
794
+ (0, _extendedTest.expect)(result.tags).toContain("AI");
795
+ (0, _extendedTest.expect)(result.tags).toContain("Technology");
796
+ (0, _extendedTest.expect)(result.tags).toContain("Future");
797
+ (0, _extendedTest.expect)(result.tags.length).toBe(3);
798
+ });
799
+ (0, _extendedTest.test)("should extract data from iframes using IMAGE strategy", async () => {
800
+ const iframeContent = `
801
+ <html>
802
+ <body>
803
+ <div class="profile-stats">
804
+ <div class="stat">
805
+ <span class="value">1,234</span>
806
+ <span class="label">Followers</span>
807
+ </div>
808
+ <div class="stat">
809
+ <span class="value">567</span>
810
+ <span class="label">Following</span>
811
+ </div>
812
+ </div>
813
+ <div class="badges">
814
+ <span class="badge">🏆 Top Contributor</span>
815
+ <span class="badge">✨ Trending Creator</span>
816
+ <span class="badge">🎯 Pro User</span>
817
+ </div>
818
+ </body>
819
+ </html>
820
+ `;
821
+ const mainPageHtml = `
822
+ <html>
823
+ <body>
824
+ <div class="user-profile">
825
+ <div class="profile-header">
826
+ <h1 class="name">Sarah Wilson</h1>
827
+ <div class="status">Premium Member</div>
828
+ </div>
829
+ <div class="profile-details">
830
+ <div class="location">📍 San Francisco, CA</div>
831
+ </div>
832
+ </div>
833
+ <iframe id="profile-iframe" srcdoc='${iframeContent.replace(/'/g, "&apos;")}'></iframe>
834
+ </body>
835
+ </html>
836
+ `;
837
+ await page.setContent(mainPageHtml);
838
+ await page.waitForSelector("#profile-iframe");
839
+ await page.waitForTimeout(100);
840
+ const result = await (0, _.extractStructuredData)({
841
+ source: page,
842
+ dataSchema: {
843
+ type: "object",
844
+ properties: {
845
+ name: {
846
+ type: "string"
847
+ },
848
+ status: {
849
+ type: "string"
850
+ },
851
+ followers: {
852
+ type: "string"
853
+ },
854
+ following: {
855
+ type: "string"
856
+ },
857
+ location: {
858
+ type: "string"
859
+ },
860
+ badges: {
861
+ type: "array",
862
+ items: {
863
+ type: "string"
864
+ }
865
+ }
866
+ },
867
+ required: ["name", "status", "location"]
868
+ },
869
+ prompt: "Extract user profile information including name, status, follower counts, location, and badges from both main page and iframe content.",
870
+ strategy: "IMAGE",
871
+ apiKey: process.env.ANTHROPIC_API_KEY,
872
+ enableDomMatching: false
873
+ });
874
+ (0, _extendedTest.expect)(result.name).toBe("Sarah Wilson");
875
+ (0, _extendedTest.expect)(result.status).toBe("Premium Member");
876
+ (0, _extendedTest.expect)(result.location).toBe("San Francisco, CA");
877
+ (0, _extendedTest.expect)(result.followers).toBe("1,234");
878
+ (0, _extendedTest.expect)(result.following).toBe("567");
879
+ (0, _extendedTest.expect)(result.badges.length).toBe(3);
880
+ (0, _extendedTest.expect)(result.badges.join(" ")).toContain("Top Contributor");
881
+ (0, _extendedTest.expect)(result.badges.join(" ")).toContain("Trending Creator");
882
+ (0, _extendedTest.expect)(result.badges.join(" ")).toContain("Pro User");
883
+ });
884
+ (0, _extendedTest.test)("should cache correctly with iframe DOM matching", async () => {
885
+ const iframeContent = `
886
+ <html>
887
+ <body>
888
+ <div class="product">
889
+ <h2 class="title">iPhone 14 Pro</h2>
890
+ <div class="price">$999</div>
891
+ <div class="stock">In Stock</div>
892
+ </div>
893
+ </body>
894
+ </html>
895
+ `;
896
+ const mainPageHtml = `
897
+ <html>
898
+ <body>
899
+ <h1>Product Catalog</h1>
900
+ <iframe id="product-frame" srcdoc='${iframeContent.replace(/'/g, "&apos;")}'></iframe>
901
+ <p>Additional content</p>
902
+ </body>
903
+ </html>
904
+ `;
905
+ const schema = {
906
+ type: "object",
907
+ properties: {
908
+ title: {
909
+ type: "string"
910
+ },
911
+ price: {
912
+ type: "string"
913
+ }
914
+ },
915
+ required: ["title", "price"]
916
+ };
917
+ await page.setContent(mainPageHtml);
918
+ await page.waitForSelector("#product-frame");
919
+ await page.waitForTimeout(100);
920
+ const firstResult = await (0, _.extractStructuredData)({
921
+ source: page,
922
+ dataSchema: schema,
923
+ prompt: "Extract product title and price from anywhere on the page including iframes.",
924
+ strategy: "HTML",
925
+ apiKey: process.env.ANTHROPIC_API_KEY,
926
+ enableDomMatching: true
927
+ });
928
+ const modifiedMainPage = mainPageHtml.replace("Additional content", "Different content");
929
+ await page.setContent(modifiedMainPage);
930
+ await page.waitForSelector("#product-frame");
931
+ await page.waitForTimeout(100);
932
+ const secondResult = await (0, _.extractStructuredData)({
933
+ source: page,
934
+ dataSchema: schema,
935
+ prompt: "Extract product title and price from anywhere on the page including iframes.",
936
+ strategy: "HTML",
937
+ apiKey: process.env.ANTHROPIC_API_KEY,
938
+ enableDomMatching: true
939
+ });
940
+ (0, _extendedTest.expect)(secondResult).toEqual(firstResult);
941
+ (0, _extendedTest.expect)(secondResult.title).toBe("iPhone 14 Pro");
942
+ (0, _extendedTest.expect)(secondResult.price).toBe("$999");
943
+ const modifiedIframeContent = iframeContent.replace("iPhone 14 Pro", "iPhone 15 Pro").replace("$999", "$1099");
944
+ const modifiedPageWithNewIframe = `
945
+ <html>
946
+ <body>
947
+ <h1>Product Catalog</h1>
948
+ <iframe id="product-frame" srcdoc='${modifiedIframeContent.replace(/'/g, "&apos;")}'></iframe>
949
+ <p>Different content</p>
950
+ </body>
951
+ </html>
952
+ `;
953
+ await page.setContent(modifiedPageWithNewIframe);
954
+ await page.waitForSelector("#product-frame");
955
+ await page.waitForTimeout(100);
956
+ const thirdResult = await (0, _.extractStructuredData)({
957
+ source: page,
958
+ dataSchema: schema,
959
+ prompt: "Extract product title and price from anywhere on the page including iframes.",
960
+ strategy: "HTML",
961
+ apiKey: process.env.ANTHROPIC_API_KEY,
962
+ enableDomMatching: true
963
+ });
964
+ (0, _extendedTest.expect)(thirdResult).not.toEqual(firstResult);
965
+ (0, _extendedTest.expect)(thirdResult.title).toBe("iPhone 15 Pro");
966
+ (0, _extendedTest.expect)(thirdResult.price).toBe("$1099");
967
+ });
968
+ });
623
969
  (0, _extendedTest.describe)("Zod Schema Integration", () => {
624
970
  (0, _extendedTest.test)("should extract data using Zod schema directly", async () => {
625
971
  await page.setContent(productListTemplate);