@intuned/browser-dev 0.1.9-dev.0 → 0.1.10-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/dist/ai/extractStructuredData.js +21 -27
  2. package/dist/ai/tests/testCreateMatchesMapping.spec.js +216 -0
  3. package/dist/ai/tests/testExtractStructuredData.spec.js +346 -0
  4. package/dist/ai/tests/testExtractStructuredDataDomMatchingIframes.spec.js +459 -0
  5. package/dist/ai/tests/testExtractStructuredDataUnit.spec.js +375 -0
  6. package/dist/ai/tests/testMatching.spec.js +342 -0
  7. package/dist/ai/tests/testValidateMatchesMapping.spec.js +265 -0
  8. package/dist/common/extendedTest.js +38 -30
  9. package/dist/common/frame_utils/frameTree.js +116 -0
  10. package/dist/common/frame_utils/getContentWithNestedIframes.js +13 -0
  11. package/dist/common/frame_utils/index.js +95 -0
  12. package/dist/common/frame_utils/stitchIframe.js +105 -0
  13. package/dist/{helpers → common}/frame_utils/tests/testFindAllIframes.spec.js +24 -15
  14. package/dist/common/frame_utils/tests/testGetContentWithNestedIframes.spec.js +241 -0
  15. package/dist/common/frame_utils/utils.js +91 -0
  16. package/dist/common/getSimplifiedHtml.js +20 -20
  17. package/dist/common/matching/matching.js +91 -16
  18. package/dist/common/tests/matching.test.js +225 -0
  19. package/dist/common/tests/testGetSimplifiedHtml.spec.js +324 -0
  20. package/dist/helpers/extractMarkdown.js +16 -7
  21. package/dist/helpers/tests/testExtractMarkdown.spec.js +29 -0
  22. package/dist/helpers/waitForDomSettled.js +4 -4
  23. package/dist/types/intuned-runtime.d.ts +6 -32
  24. package/package.json +1 -1
  25. package/dist/helpers/frame_utils/constants.js +0 -8
  26. package/dist/helpers/frame_utils/findAllIframes.js +0 -82
  27. package/dist/helpers/frame_utils/index.js +0 -44
  28. /package/dist/{helpers → common}/frame_utils/checkFrameAllowsAsyncScripts.js +0 -0
  29. /package/dist/{helpers → common}/frame_utils/getContainerFrame.js +0 -0
@@ -620,6 +620,352 @@ _extendedTest.describe.skip("Extract data from page tests", () => {
620
620
  });
621
621
  });
622
622
  });
623
+ (0, _extendedTest.describe)("Iframe Support", () => {
624
+ (0, _extendedTest.test)("should extract data from content within iframes", async () => {
625
+ const iframeContent = `
626
+ <html>
627
+ <body>
628
+ <div class="product">
629
+ <h2 class="title">iPhone 14 Pro</h2>
630
+ <div class="price">$999</div>
631
+ <div class="stock">In Stock</div>
632
+ </div>
633
+ </body>
634
+ </html>
635
+ `;
636
+ const mainPageHtml = `
637
+ <html>
638
+ <body>
639
+ <h1>Product Catalog</h1>
640
+ <iframe id="product-frame" srcdoc='${iframeContent.replace(/'/g, "&apos;")}'></iframe>
641
+ <p>Additional content</p>
642
+ </body>
643
+ </html>
644
+ `;
645
+ const schema = {
646
+ type: "object",
647
+ properties: {
648
+ title: {
649
+ type: "string"
650
+ },
651
+ price: {
652
+ type: "string"
653
+ }
654
+ },
655
+ required: ["title", "price"]
656
+ };
657
+ await page.setContent(mainPageHtml);
658
+ await page.waitForSelector("#product-frame");
659
+ await page.waitForTimeout(100);
660
+ const result = await (0, _.extractStructuredData)({
661
+ source: page,
662
+ dataSchema: schema,
663
+ prompt: "Extract product title and price from anywhere on the page including iframes.",
664
+ strategy: "HTML",
665
+ apiKey: process.env.ANTHROPIC_API_KEY,
666
+ enableDomMatching: false
667
+ });
668
+ (0, _extendedTest.expect)(result.title).toBe("iPhone 14 Pro");
669
+ (0, _extendedTest.expect)(result.price).toBe("$999");
670
+ });
671
+ (0, _extendedTest.test)("should work with nested iframes", async () => {
672
+ const innerIframeContent = `
673
+ <html>
674
+ <body>
675
+ <div class="nested-content">
676
+ <span class="nested-title">Nested Product</span>
677
+ <span class="nested-price">$500</span>
678
+ </div>
679
+ </body>
680
+ </html>
681
+ `;
682
+ const outerIframeContent = `
683
+ <html>
684
+ <body>
685
+ <h2>Outer Frame</h2>
686
+ <iframe id="inner-iframe" srcdoc='${innerIframeContent.replace(/'/g, "&apos;")}'></iframe>
687
+ </body>
688
+ </html>
689
+ `;
690
+ const mainPageHtml = `
691
+ <html>
692
+ <body>
693
+ <h1>Main Content</h1>
694
+ <iframe id="outer-iframe" srcdoc='${outerIframeContent.replace(/'/g, "&apos;")}'></iframe>
695
+ </body>
696
+ </html>
697
+ `;
698
+ const schema = {
699
+ type: "object",
700
+ properties: {
701
+ title: {
702
+ type: "string"
703
+ },
704
+ price: {
705
+ type: "string"
706
+ }
707
+ },
708
+ required: ["title", "price"]
709
+ };
710
+ await page.setContent(mainPageHtml);
711
+ await page.waitForSelector("#outer-iframe");
712
+ await page.waitForTimeout(200);
713
+ const result = await (0, _.extractStructuredData)({
714
+ source: page,
715
+ dataSchema: schema,
716
+ prompt: "Extract nested-title as title and nested-price as price from the page including all iframes.",
717
+ strategy: "HTML",
718
+ apiKey: process.env.ANTHROPIC_API_KEY,
719
+ enableDomMatching: false
720
+ });
721
+ (0, _extendedTest.expect)(result.title).toBe("Nested Product");
722
+ (0, _extendedTest.expect)(result.price).toBe("$500");
723
+ });
724
+ (0, _extendedTest.test)("should extract data from iframes using MARKDOWN strategy", async () => {
725
+ const iframeContent = `
726
+ <html>
727
+ <body>
728
+ <div class="metadata">
729
+ <time datetime="2024-03-15">March 15, 2024</time>
730
+ <span class="read-time">8 min read</span>
731
+ </div>
732
+ <div class="tags">
733
+ <span class="tag">AI</span>
734
+ <span class="tag">Technology</span>
735
+ <span class="tag">Future</span>
736
+ </div>
737
+ </body>
738
+ </html>
739
+ `;
740
+ const mainPageHtml = `
741
+ <html>
742
+ <body>
743
+ <article class="blog-post">
744
+ <header>
745
+ <h1>The Future of AI in 2024</h1>
746
+ <div class="metadata">
747
+ <span class="author">John Doe</span>
748
+ </div>
749
+ </header>
750
+ </article>
751
+ <iframe id="article-iframe" srcdoc='${iframeContent.replace(/'/g, "&apos;")}'></iframe>
752
+ </body>
753
+ </html>
754
+ `;
755
+ await page.setContent(mainPageHtml);
756
+ await page.waitForSelector("#article-iframe");
757
+ await page.waitForTimeout(100);
758
+ const result = await (0, _.extractStructuredData)({
759
+ source: page,
760
+ dataSchema: {
761
+ type: "object",
762
+ properties: {
763
+ title: {
764
+ type: "string"
765
+ },
766
+ author: {
767
+ type: "string"
768
+ },
769
+ date: {
770
+ type: "string"
771
+ },
772
+ readTime: {
773
+ type: "string"
774
+ },
775
+ tags: {
776
+ type: "array",
777
+ items: {
778
+ type: "string"
779
+ },
780
+ description: "Article tags - each tag must be a separate string in the array, do not combine multiple tags into one string"
781
+ }
782
+ },
783
+ required: ["title", "author", "tags"]
784
+ },
785
+ prompt: "Extract article metadata including title, author, date, read time, and tags from both main page and iframe content. IMPORTANT: Extract each tag as a separate item in the tags array. Do not combine multiple tags into one.",
786
+ strategy: "MARKDOWN",
787
+ apiKey: process.env.ANTHROPIC_API_KEY,
788
+ enableDomMatching: false
789
+ });
790
+ (0, _extendedTest.expect)(result.title).toBe("The Future of AI in 2024");
791
+ (0, _extendedTest.expect)(result.author).toBe("John Doe");
792
+ (0, _extendedTest.expect)(result.date).toBe("March 15, 2024");
793
+ (0, _extendedTest.expect)(result.readTime).toBe("8 min read");
794
+ (0, _extendedTest.expect)(result.tags).toContain("AI");
795
+ (0, _extendedTest.expect)(result.tags).toContain("Technology");
796
+ (0, _extendedTest.expect)(result.tags).toContain("Future");
797
+ (0, _extendedTest.expect)(result.tags.length).toBe(3);
798
+ });
799
+ (0, _extendedTest.test)("should extract data from iframes using IMAGE strategy", async () => {
800
+ const iframeContent = `
801
+ <html>
802
+ <body>
803
+ <div class="profile-stats">
804
+ <div class="stat">
805
+ <span class="value">1,234</span>
806
+ <span class="label">Followers</span>
807
+ </div>
808
+ <div class="stat">
809
+ <span class="value">567</span>
810
+ <span class="label">Following</span>
811
+ </div>
812
+ </div>
813
+ <div class="badges">
814
+ <span class="badge">🏆 Top Contributor</span>
815
+ <span class="badge">✨ Trending Creator</span>
816
+ <span class="badge">🎯 Pro User</span>
817
+ </div>
818
+ </body>
819
+ </html>
820
+ `;
821
+ const mainPageHtml = `
822
+ <html>
823
+ <body>
824
+ <div class="user-profile">
825
+ <div class="profile-header">
826
+ <h1 class="name">Sarah Wilson</h1>
827
+ <div class="status">Premium Member</div>
828
+ </div>
829
+ <div class="profile-details">
830
+ <div class="location">📍 San Francisco, CA</div>
831
+ </div>
832
+ </div>
833
+ <iframe id="profile-iframe" srcdoc='${iframeContent.replace(/'/g, "&apos;")}'></iframe>
834
+ </body>
835
+ </html>
836
+ `;
837
+ await page.setContent(mainPageHtml);
838
+ await page.waitForSelector("#profile-iframe");
839
+ await page.waitForTimeout(100);
840
+ const result = await (0, _.extractStructuredData)({
841
+ source: page,
842
+ dataSchema: {
843
+ type: "object",
844
+ properties: {
845
+ name: {
846
+ type: "string"
847
+ },
848
+ status: {
849
+ type: "string"
850
+ },
851
+ followers: {
852
+ type: "string"
853
+ },
854
+ following: {
855
+ type: "string"
856
+ },
857
+ location: {
858
+ type: "string"
859
+ },
860
+ badges: {
861
+ type: "array",
862
+ items: {
863
+ type: "string"
864
+ }
865
+ }
866
+ },
867
+ required: ["name", "status", "location"]
868
+ },
869
+ prompt: "Extract user profile information including name, status, follower counts, location, and badges from both main page and iframe content.",
870
+ strategy: "IMAGE",
871
+ apiKey: process.env.ANTHROPIC_API_KEY,
872
+ enableDomMatching: false
873
+ });
874
+ (0, _extendedTest.expect)(result.name).toBe("Sarah Wilson");
875
+ (0, _extendedTest.expect)(result.status).toBe("Premium Member");
876
+ (0, _extendedTest.expect)(result.location).toBe("San Francisco, CA");
877
+ (0, _extendedTest.expect)(result.followers).toBe("1,234");
878
+ (0, _extendedTest.expect)(result.following).toBe("567");
879
+ (0, _extendedTest.expect)(result.badges.length).toBe(3);
880
+ (0, _extendedTest.expect)(result.badges.join(" ")).toContain("Top Contributor");
881
+ (0, _extendedTest.expect)(result.badges.join(" ")).toContain("Trending Creator");
882
+ (0, _extendedTest.expect)(result.badges.join(" ")).toContain("Pro User");
883
+ });
884
+ (0, _extendedTest.test)("should cache correctly with iframe DOM matching", async () => {
885
+ const iframeContent = `
886
+ <html>
887
+ <body>
888
+ <div class="product">
889
+ <h2 class="title">iPhone 14 Pro</h2>
890
+ <div class="price">$999</div>
891
+ <div class="stock">In Stock</div>
892
+ </div>
893
+ </body>
894
+ </html>
895
+ `;
896
+ const mainPageHtml = `
897
+ <html>
898
+ <body>
899
+ <h1>Product Catalog</h1>
900
+ <iframe id="product-frame" srcdoc='${iframeContent.replace(/'/g, "&apos;")}'></iframe>
901
+ <p>Additional content</p>
902
+ </body>
903
+ </html>
904
+ `;
905
+ const schema = {
906
+ type: "object",
907
+ properties: {
908
+ title: {
909
+ type: "string"
910
+ },
911
+ price: {
912
+ type: "string"
913
+ }
914
+ },
915
+ required: ["title", "price"]
916
+ };
917
+ await page.setContent(mainPageHtml);
918
+ await page.waitForSelector("#product-frame");
919
+ await page.waitForTimeout(100);
920
+ const firstResult = await (0, _.extractStructuredData)({
921
+ source: page,
922
+ dataSchema: schema,
923
+ prompt: "Extract product title and price from anywhere on the page including iframes.",
924
+ strategy: "HTML",
925
+ apiKey: process.env.ANTHROPIC_API_KEY,
926
+ enableDomMatching: true
927
+ });
928
+ const modifiedMainPage = mainPageHtml.replace("Additional content", "Different content");
929
+ await page.setContent(modifiedMainPage);
930
+ await page.waitForSelector("#product-frame");
931
+ await page.waitForTimeout(100);
932
+ const secondResult = await (0, _.extractStructuredData)({
933
+ source: page,
934
+ dataSchema: schema,
935
+ prompt: "Extract product title and price from anywhere on the page including iframes.",
936
+ strategy: "HTML",
937
+ apiKey: process.env.ANTHROPIC_API_KEY,
938
+ enableDomMatching: true
939
+ });
940
+ (0, _extendedTest.expect)(secondResult).toEqual(firstResult);
941
+ (0, _extendedTest.expect)(secondResult.title).toBe("iPhone 14 Pro");
942
+ (0, _extendedTest.expect)(secondResult.price).toBe("$999");
943
+ const modifiedIframeContent = iframeContent.replace("iPhone 14 Pro", "iPhone 15 Pro").replace("$999", "$1099");
944
+ const modifiedPageWithNewIframe = `
945
+ <html>
946
+ <body>
947
+ <h1>Product Catalog</h1>
948
+ <iframe id="product-frame" srcdoc='${modifiedIframeContent.replace(/'/g, "&apos;")}'></iframe>
949
+ <p>Different content</p>
950
+ </body>
951
+ </html>
952
+ `;
953
+ await page.setContent(modifiedPageWithNewIframe);
954
+ await page.waitForSelector("#product-frame");
955
+ await page.waitForTimeout(100);
956
+ const thirdResult = await (0, _.extractStructuredData)({
957
+ source: page,
958
+ dataSchema: schema,
959
+ prompt: "Extract product title and price from anywhere on the page including iframes.",
960
+ strategy: "HTML",
961
+ apiKey: process.env.ANTHROPIC_API_KEY,
962
+ enableDomMatching: true
963
+ });
964
+ (0, _extendedTest.expect)(thirdResult).not.toEqual(firstResult);
965
+ (0, _extendedTest.expect)(thirdResult.title).toBe("iPhone 15 Pro");
966
+ (0, _extendedTest.expect)(thirdResult.price).toBe("$1099");
967
+ });
968
+ });
623
969
  (0, _extendedTest.describe)("Zod Schema Integration", () => {
624
970
  (0, _extendedTest.test)("should extract data using Zod schema directly", async () => {
625
971
  await page.setContent(productListTemplate);