@intuned/browser-dev 2.2.3-test-build.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/.babelrc +21 -0
  2. package/.eslintignore +10 -0
  3. package/.eslintrc.js +39 -0
  4. package/LICENSE +43 -0
  5. package/dist/ai/export.d.js +5 -0
  6. package/dist/ai/export.d.ts +641 -0
  7. package/dist/ai/extractStructuredData.js +320 -0
  8. package/dist/ai/extractStructuredDataUsingAi.js +139 -0
  9. package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
  10. package/dist/ai/extractionHelpers/validateSchema.js +148 -0
  11. package/dist/ai/index.d.ts +641 -0
  12. package/dist/ai/index.js +19 -0
  13. package/dist/ai/isPageLoaded.js +77 -0
  14. package/dist/ai/prompt.js +39 -0
  15. package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
  16. package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
  17. package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
  18. package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
  19. package/dist/ai/tools/index.js +48 -0
  20. package/dist/ai/types/errors.js +67 -0
  21. package/dist/ai/types/models.js +45 -0
  22. package/dist/ai/types/types.js +48 -0
  23. package/dist/ai/validators.js +167 -0
  24. package/dist/common/Logger/index.js +60 -0
  25. package/dist/common/Logger/types.js +5 -0
  26. package/dist/common/SdkError.js +50 -0
  27. package/dist/common/aiModelsValidations.js +32 -0
  28. package/dist/common/browser_scripts.js +2596 -0
  29. package/dist/common/ensureBrowserScripts.js +18 -0
  30. package/dist/common/extendedTest.js +148 -0
  31. package/dist/common/extractionHelpers.js +19 -0
  32. package/dist/common/formatZodError.js +18 -0
  33. package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
  34. package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
  35. package/dist/common/fuzzySearch/utils.js +23 -0
  36. package/dist/common/getModelProvider.js +18 -0
  37. package/dist/common/getSimplifiedHtml.js +122 -0
  38. package/dist/common/hashObject.js +32 -0
  39. package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
  40. package/dist/common/html2markdown/index.js +19 -0
  41. package/dist/common/jwtTokenManager.js +18 -0
  42. package/dist/common/loadRuntime.js +16 -0
  43. package/dist/common/locatorHelpers.js +41 -0
  44. package/dist/common/matching/collectStrings.js +32 -0
  45. package/dist/common/matching/levenshtein.js +40 -0
  46. package/dist/common/matching/matching.js +317 -0
  47. package/dist/common/matching/types.js +1 -0
  48. package/dist/common/noEmpty.js +9 -0
  49. package/dist/common/saveSnapshotWithExamples.js +60 -0
  50. package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
  51. package/dist/common/xpathMapping.js +107 -0
  52. package/dist/helpers/clickUntilExhausted.js +85 -0
  53. package/dist/helpers/downloadFile.js +125 -0
  54. package/dist/helpers/export.d.js +5 -0
  55. package/dist/helpers/export.d.ts +1220 -0
  56. package/dist/helpers/extractMarkdown.js +35 -0
  57. package/dist/helpers/filterEmptyValues.js +54 -0
  58. package/dist/helpers/gotoUrl.js +98 -0
  59. package/dist/helpers/index.d.ts +1220 -0
  60. package/dist/helpers/index.js +128 -0
  61. package/dist/helpers/processDate.js +25 -0
  62. package/dist/helpers/resolveUrl.js +64 -0
  63. package/dist/helpers/sanitizeHtml.js +74 -0
  64. package/dist/helpers/saveFileToS3.js +50 -0
  65. package/dist/helpers/scrollToLoadContent.js +57 -0
  66. package/dist/helpers/tests/extendedTest.js +130 -0
  67. package/dist/helpers/tests/testClickUntilExhausted.spec.js +387 -0
  68. package/dist/helpers/tests/testDownloadFile.spec.js +204 -0
  69. package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
  70. package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
  71. package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
  72. package/dist/helpers/tests/testProcessDate.spec.js +13 -0
  73. package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
  74. package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
  75. package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
  76. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
  77. package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
  78. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
  79. package/dist/helpers/types/Attachment.js +115 -0
  80. package/dist/helpers/types/CustomTypeRegistry.js +48 -0
  81. package/dist/helpers/types/RunEnvironment.js +18 -0
  82. package/dist/helpers/types/ValidationError.js +17 -0
  83. package/dist/helpers/types/index.js +51 -0
  84. package/dist/helpers/uploadFileToS3.js +154 -0
  85. package/dist/helpers/utils/getS3Client.js +22 -0
  86. package/dist/helpers/utils/index.js +73 -0
  87. package/dist/helpers/utils/isDownload.js +10 -0
  88. package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
  89. package/dist/helpers/utils/isLocator.js +9 -0
  90. package/dist/helpers/utils/jwtTokenManager.js +18 -0
  91. package/dist/helpers/validateDataUsingSchema.js +103 -0
  92. package/dist/helpers/waitForDomSettled.js +90 -0
  93. package/dist/helpers/withNetworkSettledWait.js +91 -0
  94. package/dist/index.d.js +16 -0
  95. package/dist/index.d.ts +10 -0
  96. package/dist/index.js +16 -0
  97. package/dist/intunedServices/ApiGateway/aiApiGateway.js +99 -0
  98. package/dist/intunedServices/ApiGateway/factory.js +13 -0
  99. package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
  100. package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
  101. package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
  102. package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +224 -0
  103. package/dist/intunedServices/ApiGateway/types.js +11 -0
  104. package/dist/intunedServices/cache/cache.js +61 -0
  105. package/dist/intunedServices/cache/index.js +12 -0
  106. package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
  107. package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
  108. package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
  109. package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
  110. package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
  111. package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
  112. package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
  113. package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
  114. package/dist/optimized-extractors/common/index.js +55 -0
  115. package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
  116. package/dist/optimized-extractors/common/matching/matching.js +212 -0
  117. package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
  118. package/dist/optimized-extractors/common/matching/types.js +18 -0
  119. package/dist/optimized-extractors/common/matching/utils.js +184 -0
  120. package/dist/optimized-extractors/common/utils.js +58 -0
  121. package/dist/optimized-extractors/export.d.js +5 -0
  122. package/dist/optimized-extractors/export.d.ts +397 -0
  123. package/dist/optimized-extractors/extractArray.js +120 -0
  124. package/dist/optimized-extractors/extractObject.js +104 -0
  125. package/dist/optimized-extractors/index.d.ts +397 -0
  126. package/dist/optimized-extractors/index.js +31 -0
  127. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
  128. package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
  129. package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
  130. package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
  131. package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
  132. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
  133. package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
  134. package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
  135. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
  136. package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
  137. package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
  138. package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
  139. package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
  140. package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
  141. package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
  142. package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
  143. package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
  144. package/dist/optimized-extractors/models/anthropicModel.js +23 -0
  145. package/dist/optimized-extractors/models/openaiModel.js +23 -0
  146. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
  147. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
  148. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
  149. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
  150. package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
  151. package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
  152. package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
  153. package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
  154. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
  155. package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
  156. package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
  157. package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
  158. package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
  159. package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
  160. package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
  161. package/dist/optimized-extractors/types/errors.js +42 -0
  162. package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
  163. package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
  164. package/dist/optimized-extractors/types/types.js +5 -0
  165. package/dist/optimized-extractors/validators.js +152 -0
  166. package/dist/vite-env.d.js +1 -0
  167. package/dist/vite-env.d.ts +9 -0
  168. package/docs.md +14 -0
  169. package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
  170. package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
  171. package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
  172. package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
  173. package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
  174. package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
  175. package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
  176. package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
  177. package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
  178. package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
  179. package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
  180. package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
  181. package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
  182. package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
  183. package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
  184. package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
  185. package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
  186. package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
  187. package/generated-docs/helpers/functions/processDate.mdx +55 -0
  188. package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
  189. package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
  190. package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
  191. package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
  192. package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
  193. package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
  194. package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
  195. package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
  196. package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
  197. package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
  198. package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
  199. package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
  200. package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
  201. package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
  202. package/how-to-run-tests.md +10 -0
  203. package/intuned-runtime-setup.md +13 -0
  204. package/package.json +119 -0
  205. package/tsconfig.eslint.json +5 -0
  206. package/tsconfig.json +26 -0
@@ -0,0 +1,646 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../../common/extendedTest");
4
+ var _ = require("..");
5
+ var _playwrightCore = require("playwright-core");
6
+ var _dotenv = require("dotenv");
7
+ var _zod = require("zod");
8
+ (0, _dotenv.config)();
9
+ const productListTemplate = `
10
+ <div class="product-list">
11
+ <div class="product" data-id="1">
12
+ <h2 class="title">iPhone 14 Pro</h2>
13
+ <div class="price">$999</div>
14
+ <div class="stock">In Stock</div>
15
+ <div class="rating">4.5</div>
16
+ <ul class="features">
17
+ <li>5G Capable</li>
18
+ <li>A16 Bionic</li>
19
+ <li>48MP Camera</li>
20
+ </ul>
21
+ </div>
22
+ <div class="product" data-id="2">
23
+ <h2 class="title">MacBook Air M2</h2>
24
+ <div class="price">$1199</div>
25
+ <div class="stock">Low Stock</div>
26
+ <div class="rating">4.8</div>
27
+ <ul class="features">
28
+ <li>M2 Chip</li>
29
+ <li>13.6" Display</li>
30
+ <li>18hr Battery</li>
31
+ </ul>
32
+ </div>
33
+ <div class="product" data-id="3">
34
+ <h2 class="title">AirPods Pro</h2>
35
+ <div class="price">$249</div>
36
+ <div class="stock">Out of Stock</div>
37
+ <div class="rating">4.7</div>
38
+ <ul class="features">
39
+ <li>Active Noise Cancellation</li>
40
+ <li>Spatial Audio</li>
41
+ <li>Water Resistant</li>
42
+ </ul>
43
+ </div>
44
+ </div>
45
+ `;
46
+ const articleTemplate = `
47
+ <article class="blog-post">
48
+ <header>
49
+ <h1>The Future of AI in 2024</h1>
50
+ <div class="metadata">
51
+ <span class="author">John Doe</span>
52
+ <time datetime="2024-03-15">March 15, 2024</time>
53
+ <span class="read-time">8 min read</span>
54
+ </div>
55
+ <div class="tags">
56
+ <span class="tag">AI</span>
57
+ <span class="tag">Technology</span>
58
+ <span class="tag">Future</span>
59
+ </div>
60
+ </header>
61
+ <div class="content">
62
+ <p>Artificial Intelligence has seen remarkable growth in recent years...</p>
63
+ <h2>Key Developments</h2>
64
+ <ul>
65
+ <li>Advanced Language Models</li>
66
+ <li>Computer Vision Breakthroughs</li>
67
+ <li>Ethical AI Guidelines</li>
68
+ </ul>
69
+ <p>These advancements are reshaping industries...</p>
70
+ </div>
71
+ <footer>
72
+ <div class="engagement">
73
+ <span class="likes">1.2k likes</span>
74
+ <span class="comments">83 comments</span>
75
+ <span class="shares">456 shares</span>
76
+ </div>
77
+ </footer>
78
+ </article>
79
+ `;
80
+ const userProfileTemplate = `
81
+ <div class="user-profile">
82
+ <div class="profile-header">
83
+ <img src="https://example.com/avatar.jpg" alt="User Avatar" class="avatar" />
84
+ <h1 class="name">Sarah Wilson</h1>
85
+ <div class="status">Premium Member</div>
86
+ </div>
87
+ <div class="profile-stats">
88
+ <div class="stat">
89
+ <span class="value">1,234</span>
90
+ <span class="label">Followers</span>
91
+ </div>
92
+ <div class="stat">
93
+ <span class="value">567</span>
94
+ <span class="label">Following</span>
95
+ </div>
96
+ <div class="stat">
97
+ <span class="value">89</span>
98
+ <span class="label">Posts</span>
99
+ </div>
100
+ </div>
101
+ <div class="profile-details">
102
+ <div class="location">📍 San Francisco, CA</div>
103
+ <div class="bio">Tech enthusiast & photographer 📱 📸</div>
104
+ <div class="joined-date">Joined: January 2020</div>
105
+ </div>
106
+ <div class="badges">
107
+ <span class="badge">🏆 Top Contributor</span>
108
+ <span class="badge">✨ Trending Creator</span>
109
+ <span class="badge">🎯 Pro User</span>
110
+ </div>
111
+ </div>
112
+ `;
113
+ _extendedTest.describe.skip("Extract data from page tests", () => {
114
+ let browser;
115
+ let page;
116
+ const getPromptVariation = basePrompt => {
117
+ const variations = [".", " ", "..", " ", "..."];
118
+ const randomVariation = variations[Math.floor(Math.random() * variations.length)];
119
+ return basePrompt + randomVariation;
120
+ };
121
+ const sharedPrompts = {
122
+ htmlStrategy: "Extract product information including title, price, stock status, and rating",
123
+ markdownStrategy: "Extract article metadata including title, author, date, read time, and tags",
124
+ arrayStrings: "Extract all unique tags from the article",
125
+ arrayObjects: "Extract each product's title and list of features",
126
+ objectNested: "Extract user profile information with nested stats",
127
+ objectConstraints: "Extract article metadata with specific string formats",
128
+ domMatching: "Extract follower counts",
129
+ cachingBasic: "Extract product title and price",
130
+ imageStrategy: "Extract article metadata including title, author, date, read time, and tags"
131
+ };
132
+ (0, _extendedTest.beforeAll)(async () => {
133
+ browser = await _playwrightCore.chromium.launch({
134
+ headless: false
135
+ });
136
+ });
137
+ (0, _extendedTest.afterAll)(async () => {
138
+ await browser.close();
139
+ });
140
+ (0, _extendedTest.beforeEach)(async () => {
141
+ page = await browser.newPage();
142
+ });
143
+ (0, _extendedTest.afterEach)(async () => {
144
+ await page.close();
145
+ });
146
+ (0, _extendedTest.describe)("Strategy Tests", () => {
147
+ (0, _extendedTest.describe)("HTML Strategy", () => {
148
+ (0, _extendedTest.test)("should extract product list using HTML strategy", async () => {
149
+ await page.setContent(productListTemplate);
150
+ const data = await (0, _.extractStructuredData)({
151
+ source: page,
152
+ dataSchema: {
153
+ type: "array",
154
+ items: {
155
+ type: "object",
156
+ properties: {
157
+ price: {
158
+ type: "string",
159
+ description: "Product price"
160
+ },
161
+ title: {
162
+ type: "string",
163
+ description: "Product name"
164
+ },
165
+ stock: {
166
+ type: "string",
167
+ description: "Stock status"
168
+ },
169
+ rating: {
170
+ type: "string",
171
+ description: "Product rating"
172
+ }
173
+ },
174
+ required: ["title", "price", "stock"]
175
+ }
176
+ },
177
+ prompt: "extract product information including title, price, stock status, and rating pls",
178
+ enableDomMatching: true,
179
+ strategy: "HTML",
180
+ model: "o4-mini"
181
+ });
182
+ (0, _extendedTest.expect)(Array.isArray(data)).toBe(true);
183
+ (0, _extendedTest.expect)(data.length).toBe(3);
184
+ (0, _extendedTest.expect)(data[0]).toHaveProperty("title", "iPhone 14 Pro");
185
+ (0, _extendedTest.expect)(data[0]).toHaveProperty("price", "$999");
186
+ (0, _extendedTest.expect)(data[0]).toHaveProperty("stock", "In Stock");
187
+ });
188
+ });
189
+ (0, _extendedTest.describe)("IMAGE Strategy", () => {
190
+ (0, _extendedTest.test)("should extract article data using IMAGE strategy", async () => {
191
+ await page.setContent(articleTemplate);
192
+ const data = await (0, _.extractStructuredData)({
193
+ source: page,
194
+ dataSchema: {
195
+ type: "object",
196
+ properties: {
197
+ title: {
198
+ type: "string",
199
+ description: "Article title"
200
+ },
201
+ author: {
202
+ type: "string",
203
+ description: "Author name"
204
+ },
205
+ date: {
206
+ type: "string",
207
+ description: "Publication date"
208
+ },
209
+ readTime: {
210
+ type: "string",
211
+ description: "Reading time"
212
+ },
213
+ tags: {
214
+ type: "array",
215
+ items: {
216
+ type: "string"
217
+ },
218
+ description: "Article tags"
219
+ }
220
+ }
221
+ },
222
+ prompt: getPromptVariation(sharedPrompts.imageStrategy),
223
+ enableDomMatching: false,
224
+ strategy: "IMAGE",
225
+ model: "claude-3-5-sonnet-20240620",
226
+ apiKey: process.env.ANTHROPIC_API_KEY
227
+ });
228
+ (0, _extendedTest.expect)(data).toHaveProperty("title", "The Future of AI in 2024");
229
+ (0, _extendedTest.expect)(data).toHaveProperty("author", "John Doe");
230
+ });
231
+ });
232
+ (0, _extendedTest.describe)("MARKDOWN Strategy", () => {
233
+ (0, _extendedTest.test)("should extract article data using MARKDOWN strategy", async () => {
234
+ await page.setContent(articleTemplate);
235
+ const data = await (0, _.extractStructuredData)({
236
+ source: page,
237
+ dataSchema: {
238
+ type: "object",
239
+ properties: {
240
+ title: {
241
+ type: "string",
242
+ description: "Article title"
243
+ },
244
+ author: {
245
+ type: "string",
246
+ description: "Author name"
247
+ },
248
+ date: {
249
+ type: "string",
250
+ description: "Publication date"
251
+ },
252
+ readTime: {
253
+ type: "string",
254
+ description: "Reading time"
255
+ },
256
+ tags: {
257
+ type: "array",
258
+ items: {
259
+ type: "string"
260
+ },
261
+ description: "Article tags"
262
+ }
263
+ },
264
+ required: ["title", "author", "tags"]
265
+ },
266
+ prompt: getPromptVariation(sharedPrompts.markdownStrategy),
267
+ enableDomMatching: false,
268
+ strategy: "MARKDOWN",
269
+ model: "claude-3-5-sonnet-20240620",
270
+ apiKey: process.env.ANTHROPIC_API_KEY
271
+ });
272
+ (0, _extendedTest.expect)(data).toHaveProperty("title", "The Future of AI in 2024");
273
+ (0, _extendedTest.expect)(data).toHaveProperty("author", "John Doe");
274
+ (0, _extendedTest.expect)(data.tags).toContain("AI");
275
+ (0, _extendedTest.expect)(data.tags).toContain("Technology");
276
+ });
277
+ });
278
+ });
279
+ (0, _extendedTest.describe)("Schema Tests", () => {
280
+ (0, _extendedTest.describe)("Array Schema", () => {
281
+ (0, _extendedTest.test)("should extract array of strings", async () => {
282
+ await page.setContent(articleTemplate);
283
+ const data = await (0, _.extractStructuredData)({
284
+ source: page,
285
+ dataSchema: {
286
+ type: "array",
287
+ items: {
288
+ type: "string"
289
+ },
290
+ description: "List of article tags",
291
+ minItems: 2,
292
+ maxItems: 5,
293
+ uniqueItems: true
294
+ },
295
+ prompt: getPromptVariation(sharedPrompts.arrayStrings),
296
+ enableDomMatching: false,
297
+ strategy: "HTML",
298
+ model: "claude-3-5-sonnet-20240620",
299
+ apiKey: process.env.ANTHROPIC_API_KEY
300
+ });
301
+ (0, _extendedTest.expect)(Array.isArray(data)).toBe(true);
302
+ (0, _extendedTest.expect)(data.length).toBeGreaterThanOrEqual(2);
303
+ (0, _extendedTest.expect)(data.length).toBeLessThanOrEqual(5);
304
+ (0, _extendedTest.expect)(new Set(data).size).toBe(data.length);
305
+ (0, _extendedTest.expect)(data).toContain("AI");
306
+ (0, _extendedTest.expect)(data).toContain("Technology");
307
+ });
308
+ (0, _extendedTest.test)("should extract array of objects", async () => {
309
+ await page.setContent(productListTemplate);
310
+ const data = await (0, _.extractStructuredData)({
311
+ source: page,
312
+ dataSchema: {
313
+ type: "array",
314
+ items: {
315
+ type: "object",
316
+ properties: {
317
+ title: {
318
+ type: "string"
319
+ },
320
+ features: {
321
+ type: "array",
322
+ items: {
323
+ type: "string"
324
+ }
325
+ }
326
+ },
327
+ required: ["title", "features"]
328
+ },
329
+ minItems: 1
330
+ },
331
+ prompt: getPromptVariation(sharedPrompts.arrayObjects),
332
+ enableDomMatching: false,
333
+ strategy: "HTML",
334
+ model: "claude-3-5-sonnet-20240620",
335
+ apiKey: process.env.ANTHROPIC_API_KEY
336
+ });
337
+ (0, _extendedTest.expect)(Array.isArray(data)).toBe(true);
338
+ (0, _extendedTest.expect)(data[0].title).toBe("iPhone 14 Pro");
339
+ (0, _extendedTest.expect)(Array.isArray(data[0].features)).toBe(true);
340
+ (0, _extendedTest.expect)(data[0].features).toContain("5G Capable");
341
+ });
342
+ });
343
+ (0, _extendedTest.describe)("Object Schema", () => {
344
+ (0, _extendedTest.test)("should extract nested object with mixed types", async () => {
345
+ try {
346
+ await page.setContent(userProfileTemplate);
347
+ } catch (error) {
348
+ await page.reload();
349
+ await page.setContent(userProfileTemplate);
350
+ }
351
+ const data = await (0, _.extractStructuredData)({
352
+ source: page,
353
+ dataSchema: {
354
+ type: "object",
355
+ properties: {
356
+ user: {
357
+ type: "object",
358
+ properties: {
359
+ name: {
360
+ type: "string"
361
+ },
362
+ status: {
363
+ type: "string"
364
+ }
365
+ },
366
+ required: ["name", "status"]
367
+ },
368
+ stats: {
369
+ type: "object",
370
+ properties: {
371
+ followers: {
372
+ type: "string"
373
+ },
374
+ following: {
375
+ type: "string"
376
+ },
377
+ posts: {
378
+ type: "string"
379
+ }
380
+ },
381
+ required: ["followers", "following", "posts"]
382
+ },
383
+ badges: {
384
+ type: "array",
385
+ items: {
386
+ type: "string"
387
+ }
388
+ }
389
+ },
390
+ required: ["user", "stats"]
391
+ },
392
+ prompt: getPromptVariation(sharedPrompts.objectNested),
393
+ enableDomMatching: false,
394
+ strategy: "HTML",
395
+ model: "claude-3-5-sonnet-20240620",
396
+ apiKey: process.env.ANTHROPIC_API_KEY
397
+ });
398
+ (0, _extendedTest.expect)(data.user.name).toBe("Sarah Wilson");
399
+ (0, _extendedTest.expect)(data.user.status).toBe("Premium Member");
400
+ (0, _extendedTest.expect)(data.stats.followers).toBe("1,234");
401
+ (0, _extendedTest.expect)(Array.isArray(data.badges)).toBe(true);
402
+ });
403
+ (0, _extendedTest.test)("should extract object with string constraints", async () => {
404
+ await page.setContent(articleTemplate);
405
+ const data = await (0, _.extractStructuredData)({
406
+ source: page,
407
+ dataSchema: {
408
+ type: "object",
409
+ properties: {
410
+ title: {
411
+ type: "string",
412
+ minLength: 10,
413
+ maxLength: 100
414
+ },
415
+ author: {
416
+ type: "string",
417
+ pattern: "^[A-Za-z ]+$"
418
+ },
419
+ readTime: {
420
+ type: "string",
421
+ pattern: "^\\d+ min read$"
422
+ }
423
+ },
424
+ required: ["title", "author", "readTime"]
425
+ },
426
+ prompt: getPromptVariation(sharedPrompts.objectConstraints),
427
+ enableDomMatching: false,
428
+ strategy: "HTML",
429
+ model: "claude-3-5-sonnet-20240620",
430
+ apiKey: process.env.ANTHROPIC_API_KEY
431
+ });
432
+ (0, _extendedTest.expect)(data.title.length).toBeGreaterThanOrEqual(10);
433
+ (0, _extendedTest.expect)(data.title.length).toBeLessThanOrEqual(100);
434
+ (0, _extendedTest.expect)(data.author).toMatch(/^[A-Za-z ]+$/);
435
+ (0, _extendedTest.expect)(data.readTime).toMatch(/^\d+ min read$/);
436
+ });
437
+ });
438
+ });
439
+ (0, _extendedTest.describe)("DOM Matching and Caching Tests", () => {
440
+ (0, _extendedTest.describe)("DOM Matching", () => {
441
+ (0, _extendedTest.test)("should fail when using non-string types with DOM matching enabled", async () => {
442
+ try {
443
+ await page.setContent(userProfileTemplate);
444
+ } catch (error) {
445
+ await page.reload();
446
+ await page.setContent(userProfileTemplate);
447
+ }
448
+ await (0, _extendedTest.expect)((0, _.extractStructuredData)({
449
+ source: page,
450
+ dataSchema: {
451
+ type: "object",
452
+ properties: {
453
+ followers: {
454
+ type: "number"
455
+ },
456
+ following: {
457
+ type: "number"
458
+ }
459
+ },
460
+ required: ["followers", "following"]
461
+ },
462
+ prompt: getPromptVariation(sharedPrompts.domMatching),
463
+ enableDomMatching: true,
464
+ strategy: "HTML",
465
+ model: "claude-3-5-sonnet-20240620"
466
+ })).rejects.toThrow("For DOM matching, all types of the extraction fields must be STRINGS");
467
+ });
468
+ });
469
+ (0, _extendedTest.describe)("Caching with DOM Matching", () => {
470
+ (0, _extendedTest.test)("should use cached result when DOM matches", async () => {
471
+ await page.setContent(productListTemplate);
472
+ const schema = {
473
+ type: "object",
474
+ properties: {
475
+ title: {
476
+ type: "string"
477
+ },
478
+ price: {
479
+ type: "string"
480
+ }
481
+ },
482
+ required: ["title", "price"]
483
+ };
484
+ const firstResult = await (0, _.extractStructuredData)({
485
+ source: page.locator(".product").first(),
486
+ dataSchema: schema,
487
+ prompt: getPromptVariation(sharedPrompts.cachingBasic),
488
+ enableDomMatching: true,
489
+ strategy: "HTML",
490
+ model: "claude-3-5-sonnet-20240620",
491
+ apiKey: process.env.ANTHROPIC_API_KEY
492
+ });
493
+ const secondResult = await (0, _.extractStructuredData)({
494
+ source: page.locator(".product").first(),
495
+ dataSchema: schema,
496
+ prompt: getPromptVariation(sharedPrompts.cachingBasic),
497
+ enableDomMatching: true,
498
+ strategy: "HTML",
499
+ model: "claude-3-5-sonnet-20240620",
500
+ apiKey: process.env.ANTHROPIC_API_KEY
501
+ });
502
+ (0, _extendedTest.expect)(secondResult).toEqual(firstResult);
503
+ });
504
+ (0, _extendedTest.test)("should use cached result when DOM changes", async () => {
505
+ const schema = {
506
+ type: "object",
507
+ properties: {
508
+ title: {
509
+ type: "string"
510
+ },
511
+ price: {
512
+ type: "string"
513
+ }
514
+ },
515
+ required: ["title", "price"]
516
+ };
517
+ await page.setContent(productListTemplate);
518
+ const firstResult = await (0, _.extractStructuredData)({
519
+ source: page.locator(".product").first(),
520
+ dataSchema: schema,
521
+ prompt: getPromptVariation(sharedPrompts.cachingBasic),
522
+ enableDomMatching: true,
523
+ strategy: "HTML",
524
+ model: "claude-3-5-sonnet-20240620",
525
+ apiKey: process.env.ANTHROPIC_API_KEY
526
+ });
527
+ const modifiedTemplate = productListTemplate.replace("Water Resistant", "DOM HAS CHANGED BUT NOT THE PRICE OR TITLE");
528
+ await page.setContent(modifiedTemplate);
529
+ const secondResult = await (0, _.extractStructuredData)({
530
+ source: page.locator(".product").first(),
531
+ dataSchema: schema,
532
+ prompt: getPromptVariation(sharedPrompts.cachingBasic),
533
+ enableDomMatching: true,
534
+ strategy: "HTML",
535
+ model: "claude-3-5-sonnet-20240620",
536
+ apiKey: process.env.ANTHROPIC_API_KEY
537
+ });
538
+ (0, _extendedTest.expect)(secondResult).toEqual(firstResult);
539
+ (0, _extendedTest.expect)(secondResult.title).toBe("iPhone 14 Pro");
540
+ (0, _extendedTest.expect)(secondResult.price).toBe("$999");
541
+ });
542
+ (0, _extendedTest.test)("should not use cache when relevant DOM changes", async () => {
543
+ const schema = {
544
+ type: "object",
545
+ properties: {
546
+ title: {
547
+ type: "string"
548
+ },
549
+ price: {
550
+ type: "string"
551
+ }
552
+ },
553
+ required: ["title", "price"]
554
+ };
555
+ await page.setContent(productListTemplate);
556
+ const firstResult = await (0, _.extractStructuredData)({
557
+ source: page.locator(".product").first(),
558
+ dataSchema: schema,
559
+ prompt: getPromptVariation(sharedPrompts.cachingBasic),
560
+ enableDomMatching: true,
561
+ strategy: "HTML",
562
+ model: "claude-3-5-sonnet-20240620",
563
+ apiKey: process.env.ANTHROPIC_API_KEY
564
+ });
565
+ const modifiedTemplate = productListTemplate.replace("iPhone 14 Pro", "iPhone 15 Pro").replace("$999", "$1099");
566
+ await page.setContent(modifiedTemplate);
567
+ const secondResult = await (0, _.extractStructuredData)({
568
+ source: page.locator(".product").first(),
569
+ dataSchema: schema,
570
+ prompt: getPromptVariation(sharedPrompts.cachingBasic),
571
+ enableDomMatching: true,
572
+ strategy: "HTML",
573
+ model: "claude-3-5-sonnet-20240620",
574
+ apiKey: process.env.ANTHROPIC_API_KEY
575
+ });
576
+ (0, _extendedTest.expect)(secondResult).not.toEqual(firstResult);
577
+ (0, _extendedTest.expect)(secondResult.title).toBe("iPhone 15 Pro");
578
+ (0, _extendedTest.expect)(secondResult.price).toBe("$1099");
579
+ });
580
+ });
581
+ (0, _extendedTest.describe)("Caching without DOM Matching", () => {
582
+ (0, _extendedTest.test)("should use cached result regardless of DOM changes when matching is disabled", async () => {
583
+ const schema = {
584
+ type: "object",
585
+ properties: {
586
+ title: {
587
+ type: "string"
588
+ },
589
+ price: {
590
+ type: "string"
591
+ }
592
+ },
593
+ required: ["title", "price"]
594
+ };
595
+ await page.setContent(productListTemplate);
596
+ const firstResult = await (0, _.extractStructuredData)({
597
+ source: page.locator(".product").first(),
598
+ dataSchema: schema,
599
+ prompt: getPromptVariation(sharedPrompts.cachingBasic),
600
+ enableDomMatching: false,
601
+ strategy: "HTML",
602
+ model: "claude-3-5-sonnet-20240620",
603
+ apiKey: process.env.ANTHROPIC_API_KEY
604
+ });
605
+ const modifiedTemplate = productListTemplate.replace("iPhone 14 Pro", "iPhone 15 Pro").replace("$999", "$1099");
606
+ await page.setContent(modifiedTemplate);
607
+ const secondResult = await (0, _.extractStructuredData)({
608
+ source: page.locator(".product").first(),
609
+ dataSchema: schema,
610
+ prompt: getPromptVariation(sharedPrompts.cachingBasic),
611
+ enableDomMatching: false,
612
+ strategy: "HTML",
613
+ model: "claude-3-5-sonnet-20240620",
614
+ apiKey: process.env.ANTHROPIC_API_KEY
615
+ });
616
+ (0, _extendedTest.expect)(secondResult).not.toEqual(firstResult);
617
+ (0, _extendedTest.expect)(secondResult.title).toBe("iPhone 15 Pro");
618
+ (0, _extendedTest.expect)(secondResult.price).toBe("$1099");
619
+ });
620
+ });
621
+ });
622
+ (0, _extendedTest.describe)("Zod Schema Integration", () => {
623
+ (0, _extendedTest.test)("should extract data using Zod schema directly", async () => {
624
+ await page.setContent(productListTemplate);
625
+ const productZodSchema = _zod.z.object({
626
+ title: _zod.z.string().describe("Product name").optional(),
627
+ price: _zod.z.string().describe("Product price"),
628
+ stock: _zod.z.string().describe("Stock status"),
629
+ rating: _zod.z.string().describe("Product rating")
630
+ });
631
+ const data = await (0, _.extractStructuredData)({
632
+ source: page.locator(".product").first(),
633
+ dataSchema: productZodSchema,
634
+ prompt: "Extract prsoduct information including title, price, stock status, and rating",
635
+ enableDomMatching: true,
636
+ strategy: "HTML",
637
+ model: "claude-3-5-sonnet-20240620",
638
+ apiKey: process.env.ANTHROPIC_API_KEY
639
+ });
640
+ (0, _extendedTest.expect)(data).toHaveProperty("title", "iPhone 14 Pro");
641
+ (0, _extendedTest.expect)(data).toHaveProperty("price", "$999");
642
+ (0, _extendedTest.expect)(data).toHaveProperty("stock", "In Stock");
643
+ (0, _extendedTest.expect)(data).toHaveProperty("rating", "4.5");
644
+ });
645
+ });
646
+ });