@intuned/browser-dev 0.1.4-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/.babelrc +21 -0
  2. package/.eslintignore +10 -0
  3. package/.eslintrc.js +39 -0
  4. package/BROWSER_SCRIPTS_SETUP.md +84 -0
  5. package/LICENSE +43 -0
  6. package/README.md +160 -0
  7. package/RELEASE.md +60 -0
  8. package/dist/ai/export.d.js +5 -0
  9. package/dist/ai/export.d.ts +641 -0
  10. package/dist/ai/extractStructuredData.js +320 -0
  11. package/dist/ai/extractStructuredDataUsingAi.js +142 -0
  12. package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
  13. package/dist/ai/extractionHelpers/validateSchema.js +148 -0
  14. package/dist/ai/index.d.ts +641 -0
  15. package/dist/ai/index.js +19 -0
  16. package/dist/ai/isPageLoaded.js +80 -0
  17. package/dist/ai/prompt.js +39 -0
  18. package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
  19. package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
  20. package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
  21. package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
  22. package/dist/ai/tools/index.js +48 -0
  23. package/dist/ai/types/errors.js +67 -0
  24. package/dist/ai/types/models.js +45 -0
  25. package/dist/ai/types/types.js +48 -0
  26. package/dist/ai/validators.js +167 -0
  27. package/dist/common/Logger/index.js +60 -0
  28. package/dist/common/Logger/types.js +5 -0
  29. package/dist/common/SdkError.js +50 -0
  30. package/dist/common/aiModelsValidations.js +32 -0
  31. package/dist/common/ensureBrowserScripts.js +14 -0
  32. package/dist/common/extendedTest.js +157 -0
  33. package/dist/common/extractionHelpers.js +19 -0
  34. package/dist/common/formatZodError.js +18 -0
  35. package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
  36. package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
  37. package/dist/common/fuzzySearch/utils.js +23 -0
  38. package/dist/common/getModelProvider.js +18 -0
  39. package/dist/common/getSimplifiedHtml.js +122 -0
  40. package/dist/common/hashObject.js +32 -0
  41. package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
  42. package/dist/common/html2markdown/index.js +19 -0
  43. package/dist/common/jwtTokenManager.js +57 -0
  44. package/dist/common/loadRuntime.js +16 -0
  45. package/dist/common/locatorHelpers.js +41 -0
  46. package/dist/common/matching/collectStrings.js +32 -0
  47. package/dist/common/matching/levenshtein.js +40 -0
  48. package/dist/common/matching/matching.js +317 -0
  49. package/dist/common/matching/types.js +1 -0
  50. package/dist/common/noEmpty.js +9 -0
  51. package/dist/common/saveSnapshotWithExamples.js +60 -0
  52. package/dist/common/script.js +2602 -0
  53. package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
  54. package/dist/common/xpathMapping.js +107 -0
  55. package/dist/helpers/clickUntilExhausted.js +85 -0
  56. package/dist/helpers/downloadFile.js +125 -0
  57. package/dist/helpers/export.d.js +5 -0
  58. package/dist/helpers/export.d.ts +1220 -0
  59. package/dist/helpers/extractMarkdown.js +35 -0
  60. package/dist/helpers/filterEmptyValues.js +54 -0
  61. package/dist/helpers/gotoUrl.js +98 -0
  62. package/dist/helpers/index.d.ts +1220 -0
  63. package/dist/helpers/index.js +122 -0
  64. package/dist/helpers/processDate.js +25 -0
  65. package/dist/helpers/resolveUrl.js +64 -0
  66. package/dist/helpers/sanitizeHtml.js +74 -0
  67. package/dist/helpers/saveFileToS3.js +50 -0
  68. package/dist/helpers/scrollToLoadContent.js +57 -0
  69. package/dist/helpers/tests/testClickUntilExhausted.spec.js +372 -0
  70. package/dist/helpers/tests/testDownloadFile.spec.js +206 -0
  71. package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
  72. package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
  73. package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
  74. package/dist/helpers/tests/testProcessDate.spec.js +13 -0
  75. package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
  76. package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
  77. package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
  78. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
  79. package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
  80. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
  81. package/dist/helpers/types/Attachment.js +115 -0
  82. package/dist/helpers/types/CustomTypeRegistry.js +48 -0
  83. package/dist/helpers/types/RunEnvironment.js +18 -0
  84. package/dist/helpers/types/ValidationError.js +17 -0
  85. package/dist/helpers/types/index.js +51 -0
  86. package/dist/helpers/uploadFileToS3.js +154 -0
  87. package/dist/helpers/utils/getS3Client.js +22 -0
  88. package/dist/helpers/utils/index.js +73 -0
  89. package/dist/helpers/utils/isDownload.js +10 -0
  90. package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
  91. package/dist/helpers/utils/isLocator.js +9 -0
  92. package/dist/helpers/utils/jwtTokenManager.js +18 -0
  93. package/dist/helpers/validateDataUsingSchema.js +103 -0
  94. package/dist/helpers/waitForDomSettled.js +90 -0
  95. package/dist/helpers/withNetworkSettledWait.js +91 -0
  96. package/dist/index.d.js +16 -0
  97. package/dist/index.d.ts +10 -0
  98. package/dist/index.js +16 -0
  99. package/dist/intunedServices/ApiGateway/aiApiGateway.js +143 -0
  100. package/dist/intunedServices/ApiGateway/factory.js +16 -0
  101. package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
  102. package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
  103. package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
  104. package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +355 -0
  105. package/dist/intunedServices/ApiGateway/types.js +11 -0
  106. package/dist/intunedServices/cache/cache.js +61 -0
  107. package/dist/intunedServices/cache/index.js +12 -0
  108. package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
  109. package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
  110. package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
  111. package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
  112. package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
  113. package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
  114. package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
  115. package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
  116. package/dist/optimized-extractors/common/index.js +55 -0
  117. package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
  118. package/dist/optimized-extractors/common/matching/matching.js +212 -0
  119. package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
  120. package/dist/optimized-extractors/common/matching/types.js +18 -0
  121. package/dist/optimized-extractors/common/matching/utils.js +184 -0
  122. package/dist/optimized-extractors/common/utils.js +58 -0
  123. package/dist/optimized-extractors/export.d.js +5 -0
  124. package/dist/optimized-extractors/export.d.ts +397 -0
  125. package/dist/optimized-extractors/extractArray.js +120 -0
  126. package/dist/optimized-extractors/extractObject.js +104 -0
  127. package/dist/optimized-extractors/index.d.ts +397 -0
  128. package/dist/optimized-extractors/index.js +31 -0
  129. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +269 -0
  130. package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
  131. package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
  132. package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
  133. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +146 -0
  134. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js +130 -0
  135. package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
  136. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +160 -0
  137. package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
  138. package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
  139. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +243 -0
  140. package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
  141. package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
  142. package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
  143. package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
  144. package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
  145. package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
  146. package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
  147. package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
  148. package/dist/optimized-extractors/models/anthropicModel.js +23 -0
  149. package/dist/optimized-extractors/models/openaiModel.js +23 -0
  150. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
  151. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
  152. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
  153. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
  154. package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
  155. package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
  156. package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
  157. package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
  158. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
  159. package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
  160. package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
  161. package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
  162. package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
  163. package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
  164. package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
  165. package/dist/optimized-extractors/types/errors.js +42 -0
  166. package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
  167. package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
  168. package/dist/optimized-extractors/types/types.js +5 -0
  169. package/dist/optimized-extractors/validators.js +152 -0
  170. package/dist/types/intuned-runtime.d.js +1 -0
  171. package/dist/types/intuned-runtime.d.ts +64 -0
  172. package/dist/vite-env.d.js +1 -0
  173. package/dist/vite-env.d.ts +9 -0
  174. package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
  175. package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
  176. package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
  177. package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
  178. package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
  179. package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
  180. package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
  181. package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
  182. package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
  183. package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
  184. package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
  185. package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
  186. package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
  187. package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
  188. package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
  189. package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
  190. package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
  191. package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
  192. package/generated-docs/helpers/functions/processDate.mdx +55 -0
  193. package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
  194. package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
  195. package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
  196. package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
  197. package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
  198. package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
  199. package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
  200. package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
  201. package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
  202. package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
  203. package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
  204. package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
  205. package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
  206. package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
  207. package/how-to-generate-docs.md +61 -0
  208. package/how-to-run-tests.md +42 -0
  209. package/intuned-runtime-setup.md +13 -0
  210. package/package.json +124 -0
  211. package/tsconfig.eslint.json +5 -0
  212. package/tsconfig.json +26 -0
@@ -0,0 +1,269 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../../../common/extendedTest");
4
+ var _dynamicListExtractor = require("../dynamicListExtractor");
5
+ var _uuid = require("uuid");
6
+ var _dotenv = require("dotenv");
7
+ var _neverthrow = require("neverthrow");
8
+ var _Logger = require("../../../common/Logger");
9
+ function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
10
+ (0, _dotenv.config)();
11
+ const booksTemplate = `
12
+ <div class="books-list">
13
+ <div class="book-page">
14
+ <div class="book-info">
15
+ <h1 class="book-title">The Great Gatsby</h1>
16
+ <div class="price-container">
17
+ <span class="price">$15.99</span>
18
+ </div>
19
+ <div class="description">
20
+ <p class="book-description">A classic novel about the American Dream</p>
21
+ </div>
22
+ </div>
23
+ </div>
24
+
25
+ <div class="book-page">
26
+ <div class="book-info">
27
+ <h1 class="book-title">1984</h1>
28
+ <div class="price-container">
29
+ <span class="price">$12.99</span>
30
+ </div>
31
+ <div class="description">
32
+ <p class="book-description">A dystopian social science fiction novel</p>
33
+ </div>
34
+ </div>
35
+ </div>
36
+
37
+ <div class="book-page">
38
+ <div class="book-info">
39
+ <h1 class="book-title">To Kill a Mockingbird</h1>
40
+ <div class="price-container">
41
+ <span class="price">$14.99</span>
42
+ </div>
43
+ <div class="description">
44
+ <p class="book-description">A story of racial injustice and loss of innocence</p>
45
+ </div>
46
+ </div>
47
+ </div>
48
+ </div>
49
+ `;
50
+ _extendedTest.describe.skip("Dynamic List Extractor Caching Tests", () => {
51
+ (0, _extendedTest.describe)("DOM Changes and Cache Behavior", () => {
52
+ (0, _extendedTest.test)("should demonstrate caching behavior with different types of DOM changes", async ({
53
+ page
54
+ }) => {
55
+ const testLabel = `books-cache-test-${(0, _uuid.v4)()}`;
56
+ const variantKey = testLabel;
57
+ const entitySchema = {
58
+ type: "object",
59
+ required: ["name"],
60
+ properties: {
61
+ name: {
62
+ type: "string",
63
+ description: "book name",
64
+ primary: true
65
+ },
66
+ price: {
67
+ type: "string",
68
+ description: "book price"
69
+ },
70
+ description: {
71
+ type: "string",
72
+ description: "book description"
73
+ }
74
+ }
75
+ };
76
+ const extractionOptions = {
77
+ itemEntityName: "book",
78
+ label: testLabel,
79
+ itemEntitySchema: entitySchema,
80
+ strategy: {
81
+ model: "claude-3-5-sonnet-20240620",
82
+ type: "HTML"
83
+ },
84
+ variantKey,
85
+ apiKey: process.env.ANTHROPIC_API_KEY
86
+ };
87
+ await page.setContent(booksTemplate);
88
+ const firstResult = await (0, _dynamicListExtractor.dynamicListExtractor)(page, ".books-list", extractionOptions);
89
+ (0, _extendedTest.expect)(firstResult.isOk()).toBe(true);
90
+ const books = firstResult._unsafeUnwrap();
91
+ const expectedResult = [{
92
+ name: "The Great Gatsby",
93
+ price: "$15.99",
94
+ description: "A classic novel about the American Dream"
95
+ }, {
96
+ name: "1984",
97
+ price: "$12.99",
98
+ description: "A dystopian social science fiction novel"
99
+ }, {
100
+ name: "To Kill a Mockingbird",
101
+ price: "$14.99",
102
+ description: "A story of racial injustice and loss of innocence"
103
+ }];
104
+ (0, _extendedTest.expect)(books).toHaveLength(3);
105
+ (0, _extendedTest.expect)(books[0]).toEqual(expectedResult[0]);
106
+ (0, _extendedTest.expect)(books[0]).toHaveProperty("price", expectedResult[0].price);
107
+ (0, _extendedTest.expect)(books[0]).toHaveProperty("description", expectedResult[0].description);
108
+ (0, _extendedTest.expect)(books[1]).toEqual(expectedResult[1]);
109
+ (0, _extendedTest.expect)(books[1]).toHaveProperty("price", expectedResult[1].price);
110
+ (0, _extendedTest.expect)(books[1]).toHaveProperty("description", expectedResult[1].description);
111
+ (0, _extendedTest.expect)(books[2]).toEqual(expectedResult[2]);
112
+ const irrelevantChangeTemplate = booksTemplate.replace('class="books-list"', 'class="books-list featured-books"');
113
+ await page.setContent(irrelevantChangeTemplate);
114
+ const secondResult = await (0, _dynamicListExtractor.dynamicListExtractor)(page, ".books-list", extractionOptions);
115
+ (0, _extendedTest.expect)(secondResult.isOk()).toBe(true);
116
+ (0, _extendedTest.expect)(secondResult._unsafeUnwrap()).toEqual(books);
117
+ const modifiedTemplate = booksTemplate.replace("The Great Gatsby", "The Great Gatsby (Deluxe Edition)").replace("$15.99", "$19.99");
118
+ await page.setContent(modifiedTemplate);
119
+ const thirdResult = await (0, _dynamicListExtractor.dynamicListExtractor)(page, ".books-list", extractionOptions);
120
+ (0, _extendedTest.expect)(thirdResult.isOk()).toBe(true);
121
+ const modifiedBooks = thirdResult._unsafeUnwrap();
122
+ (0, _extendedTest.expect)(modifiedBooks).not.toEqual(books);
123
+ (0, _extendedTest.expect)(modifiedBooks[0]).toHaveProperty("name", "The Great Gatsby (Deluxe Edition)");
124
+ (0, _extendedTest.expect)(modifiedBooks[0]).toHaveProperty("price", "$19.99");
125
+ const insertedTemplate = `
126
+ <div class="books-list">
127
+ <div class="featured-section">
128
+ <h2>Featured Books</h2>
129
+ </div>
130
+ ${booksTemplate.split('<div class="books-list">')[1]}
131
+ `;
132
+ await page.setContent(insertedTemplate);
133
+ const fourthResult = await (0, _dynamicListExtractor.dynamicListExtractor)(page, ".books-list", extractionOptions);
134
+ (0, _extendedTest.expect)(fourthResult.isOk()).toBe(true);
135
+ (0, _extendedTest.expect)(fourthResult._unsafeUnwrap()).not.toEqual(modifiedBooks);
136
+ const appendedTemplate = insertedTemplate.replace("</div>\n</div>", `</div>
137
+ <div class="book-page">
138
+ <div class="book-info">
139
+ <h1 class="book-title">Dune</h1>
140
+ <div class="price-container">
141
+ <span class="price">$16.99</span>
142
+ </div>
143
+ <div class="description">
144
+ <p class="book-description">A science fiction masterpiece</p>
145
+ </div>
146
+ </div>
147
+ </div>
148
+ </div>`);
149
+ await page.setContent(appendedTemplate);
150
+ const fifthResult = await (0, _dynamicListExtractor.dynamicListExtractor)(page, ".books-list", extractionOptions);
151
+ (0, _extendedTest.expect)(fifthResult.isOk()).toBe(true);
152
+ const appendedBooks = fifthResult._unsafeUnwrap();
153
+ (0, _extendedTest.expect)(appendedBooks).toHaveLength(4);
154
+ _Logger.logger.info("All cache behavior tests completed successfully!");
155
+ const outsideTemplate = appendedTemplate + `
156
+ <div class="outside-books-list">
157
+ <div class="book-page">
158
+ <div class="book-info">
159
+ <h1 class="book-title">Dune</h1>
160
+ </div>
161
+ </div>
162
+ </div>
163
+ `;
164
+ await page.setContent(outsideTemplate);
165
+ const sixthResult = await (0, _dynamicListExtractor.dynamicListExtractor)(page, ".books-list", extractionOptions);
166
+ (0, _extendedTest.expect)(sixthResult.isOk()).toBe(true);
167
+ const outsideBooks = sixthResult._unsafeUnwrap();
168
+ (0, _extendedTest.expect)(outsideBooks).toHaveLength(4);
169
+ _Logger.logger.info("All cache behavior tests completed successfully!");
170
+ });
171
+ (0, _extendedTest.test)("should handle cache size limit correctly", async ({
172
+ page
173
+ }) => {
174
+ const testLabel = `books-cache-size-test-${(0, _uuid.v4)()}`;
175
+ const variantKey = testLabel;
176
+ const simpleTemplate = `
177
+ <div class="books-list">
178
+ <div class="book-page">
179
+ <div class="book-info">
180
+ <h1 class="book-title">Test Book</h1>
181
+ <div class="price-container">
182
+ <span class="price">$15.99</span>
183
+ </div>
184
+ <div class="description">
185
+ <p class="book-description">A test book</p>
186
+ </div>
187
+ </div>
188
+ </div>
189
+ </div>
190
+ `;
191
+ const entitySchema = {
192
+ type: "object",
193
+ required: ["name"],
194
+ properties: {
195
+ name: {
196
+ type: "string",
197
+ description: "book name",
198
+ primary: true
199
+ },
200
+ price: {
201
+ type: "string",
202
+ description: "book price"
203
+ },
204
+ description: {
205
+ type: "string",
206
+ description: "book description"
207
+ }
208
+ }
209
+ };
210
+ const extractionOptions = {
211
+ itemEntityName: "book",
212
+ label: testLabel,
213
+ itemEntitySchema: entitySchema,
214
+ strategy: {
215
+ model: "claude-3-5-sonnet-20240620",
216
+ type: "HTML"
217
+ },
218
+ variantKey,
219
+ apiKey: process.env.ANTHROPIC_API_KEY
220
+ };
221
+ const largeMockData = [];
222
+ for (let i = 0; i < 1000; i++) {
223
+ largeMockData.push({
224
+ rowIndex: i,
225
+ result: {
226
+ name: {
227
+ matchText: `Book Title ${i} - This is a very long book title with extensive descriptive text to make the cached data structure large enough to exceed the 380KB limit when we have many items like this in the response`,
228
+ matchXpath: "/html/body/div[1]/div[1]/div[1]/h1",
229
+ matchType: "direct-text"
230
+ },
231
+ price: {
232
+ matchText: `$${(15 + i % 50).toFixed(2)}`,
233
+ matchXpath: "/html/body/div[1]/div[1]/div[1]/div[1]/span",
234
+ matchType: "direct-text"
235
+ },
236
+ description: {
237
+ matchText: `A very detailed description of book ${i} with extensive content to make the cached data structure large enough to exceed the 380KB limit. This description contains multiple sentences and detailed information about the plot, characters, themes, and critical reception of the book. The description is intentionally verbose to increase the JSON size when serialized for caching purposes. Additional padding text to ensure we reach the size limit for testing cache overflow behavior.`,
238
+ matchXpath: "/html/body/div[1]/div[1]/div[1]/div[2]/p",
239
+ matchType: "direct-text"
240
+ }
241
+ }
242
+ });
243
+ }
244
+ const runAiExtractionModule = await Promise.resolve().then(() => _interopRequireWildcard(require("../runAiExtraction")));
245
+ const runAiExtractionSpy = _extendedTest.vi.spyOn(runAiExtractionModule, "runAiExtraction").mockResolvedValue((0, _neverthrow.ok)({
246
+ resultValues: largeMockData,
247
+ containerPath: "/html/body/div[1]",
248
+ matches: new Map(),
249
+ fullContainerXpath: "/html/body/div[1]"
250
+ }));
251
+ const consoleSpy = _extendedTest.vi.spyOn(_Logger.logger, "debug");
252
+ const consoleInfoSpy = _extendedTest.vi.spyOn(_Logger.logger, "info");
253
+ const consoleWarnSpy = _extendedTest.vi.spyOn(_Logger.logger, "warn");
254
+ await page.setContent(simpleTemplate);
255
+ const firstResult = await (0, _dynamicListExtractor.dynamicListExtractor)(page, ".books-list", extractionOptions);
256
+ (0, _extendedTest.expect)(firstResult.isOk()).toBe(true);
257
+ const books = firstResult._unsafeUnwrap();
258
+ (0, _extendedTest.expect)(books.length).toBe(1000);
259
+ (0, _extendedTest.expect)(consoleWarnSpy).toHaveBeenCalledWith(_extendedTest.expect.stringContaining("Results exceed cache limit"));
260
+ (0, _extendedTest.expect)(consoleWarnSpy).toHaveBeenCalledWith(_extendedTest.expect.stringContaining("skipping caching"));
261
+ const secondResult = await (0, _dynamicListExtractor.dynamicListExtractor)(page, ".books-list", extractionOptions);
262
+ (0, _extendedTest.expect)(secondResult.isOk()).toBe(true);
263
+ (0, _extendedTest.expect)(consoleWarnSpy).toHaveBeenCalledWith(_extendedTest.expect.stringContaining("exceeds cache limit and is not cacheable"));
264
+ consoleSpy.mockRestore();
265
+ runAiExtractionSpy.mockRestore();
266
+ console.log("Cache size limit test completed successfully!");
267
+ });
268
+ });
269
+ });
@@ -0,0 +1,22 @@
1
+ "use strict";
2
+
3
+ var _getListContainerXpath = require("../utils/getListContainerXpath");
4
+ var _vitest = require("vitest");
5
+ (0, _vitest.describe)("find xpaths to create an array", () => {
6
+ (0, _vitest.it)("basic case | 1 element in smallest", () => {
7
+ const input = [["html[1]/li[2]/article[1]/h3[1]", "html[1]/li[2]/article[1]/h3[1]/a[1]/@title", "html[1]/li[2]/article[1]/h3[1]/a[1]/text()"], ["html[1]/li[3]/article[1]/h3[1]/a[1]/@title", "html[1]/li[3]/article[1]/h3[1]/a[1]/@title"], ["html[1]/li[1]/article[1]/h3[1]/a[1]/@title"], ["html[1]/li[4]/article[1]/h3[1]", "html[1]/li[4]/article[1]/h3[1]/a[1]/@title", "html[1]/li[4]/article[1]/h3[1]/a[1]/@title", "html[1]/li[4]/article[1]/h3[1]/a[1]/text()"]];
8
+ (0, _vitest.expect)((0, _getListContainerXpath.findSetOfXpathsToCreateAnArrayExtractor)(input)).toEqual(["html[1]/li[1]/article[1]/h3[1]/a[1]/@title", "html[1]/li[2]/article[1]/h3[1]/a[1]/@title", "html[1]/li[3]/article[1]/h3[1]/a[1]/@title", "html[1]/li[4]/article[1]/h3[1]/a[1]/@title"]);
9
+ });
10
+ (0, _vitest.it)("basic case | 2 elements in smallest", () => {
11
+ const input = [["html[1]/li[1]/article[1]/h3[1]", "html[1]/li[1]/article[1]/h3[1]/a[1]/@href"], ["html[1]/li[2]/article[1]/h3[1]/a[1]/@href", "html[1]/li[2]/article[1]/h3[1]/a[1]/text()"], ["html[1]/li[3]/article[1]/h3[1]/a[1]/@href", "html[1]/li[3]/article[1]/h3[1]/a[1]/text()", "html[1]/li[3]/article[1]/h3[1]/a[1]/@title"]];
12
+ (0, _vitest.expect)((0, _getListContainerXpath.findSetOfXpathsToCreateAnArrayExtractor)(input)).toEqual(["html[1]/li[1]/article[1]/h3[1]/a[1]/@href", "html[1]/li[2]/article[1]/h3[1]/a[1]/@href", "html[1]/li[3]/article[1]/h3[1]/a[1]/@href"]);
13
+ });
14
+ (0, _vitest.it)("2 sets | 2 items different depth", () => {
15
+ const input = [["html[1]/li[1]/article[1]/h3[1]", "html[1]/li[1]/article[1]/h3[1]/a[1]"], ["html[1]/li[2]/article[1]/h3[1]", "html[1]/li[2]/article[1]/h3[1]/a[1]"]];
16
+ (0, _vitest.expect)((0, _getListContainerXpath.findSetOfXpathsToCreateAnArrayExtractor)(input)).toEqual(["html[1]/li[1]/article[1]/h3[1]/a[1]", "html[1]/li[2]/article[1]/h3[1]/a[1]"]);
17
+ });
18
+ (0, _vitest.it)("Real world", () => {
19
+ const input = [["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[1]/article[1]/h3[1]/a[1]/@title"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[2]/article[1]/h3[1]", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[2]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[2]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[2]/article[1]/h3[1]/a[1]/text()"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[3]/article[1]/h3[1]", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[3]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[3]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[3]/article[1]/h3[1]/a[1]/text()"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[4]/article[1]/h3[1]", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[4]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[4]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[4]/article[1]/h3[1]/a[1]/text()"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[5]/article[1]/h3[1]/a[1]/@title"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[6]/article[1]/h3[1]", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[6]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[6]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[6]/article[1]/h3[1]/a[1]/text()"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[7]/article[1]/h3[1]/a[1]/@title"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[8]/article[1]/h3[1]/a[1]/@title"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[9]/article[1]/h3[1]/a[1]/@title"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[10]/article[1]/h3[1]", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[10]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[10]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[10]/article[1]/h3[1]/a[1]/text()"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[11]/article[1]/h3[1]/a[1]/@title"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[12]/article[1]/h3[1]", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[12]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[12]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[12]/article[1]/h3[1]/a[1]/text()"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[13]/article[1]/h3[1]", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[13]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[13]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[13]/article[1]/h3[1]/a[1]/text()"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[14]/article[1]/h3[1]/a[1]/@title"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[15]/article[1]/h3[1]/a[1]/@title"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[16]/article[1]/h3[1]/a[1]/@title"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[17]/article[1]/h3[1]", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[17]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[17]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[17]/article[1]/h3[1]/a[1]/text()"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[18]/article[1]/h3[1]/a[1]/@title"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[19]/article[1]/h3[1]", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[19]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[19]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[19]/article[1]/h3[1]/a[1]/text()"], ["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[20]/article[1]/h3[1]", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[20]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[20]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[20]/article[1]/h3[1]/a[1]/text()"]];
20
+ (0, _vitest.expect)((0, _getListContainerXpath.findSetOfXpathsToCreateAnArrayExtractor)(input)).toEqual(["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[1]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[2]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[3]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[4]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[5]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[6]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[7]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[8]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[9]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[10]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[11]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[12]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[13]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[14]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[15]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[16]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[17]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[18]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[19]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[20]/article[1]/h3[1]/a[1]/@title"]);
21
+ });
22
+ });
@@ -0,0 +1,21 @@
1
+ "use strict";
2
+
3
+ var _getListContainerXpath = require("../utils/getListContainerXpath");
4
+ var _vitest = require("vitest");
5
+ (0, _vitest.describe)("getContainerElement", () => {
6
+ (0, _vitest.it)("basic case", () => {
7
+ (0, _vitest.expect)((0, _getListContainerXpath.getContainerElement)(["html[1]/li[1]/article[1]/h3[1]/a[1]/@title", "html[1]/li[2]/article[1]/h3[1]/a[1]/@title", "html[1]/li[3]/article[1]/h3[1]/a[1]/@title", "html[1]/li[4]/article[1]/h3[1]/a[1]/@title"])).toBe("html[1]");
8
+ });
9
+ (0, _vitest.it)("real case", () => {
10
+ (0, _vitest.expect)((0, _getListContainerXpath.getContainerElement)(["html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[1]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[4]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[7]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[10]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[13]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[16]/article[1]/h3[1]/a[1]/@title", "html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]/li[19]/article[1]/h3[1]/a[1]/@title"])).toBe("html[1]/body[1]/div[1]/div[1]/div[1]/div[1]/section[1]/div[2]/ol[1]");
11
+ });
12
+ (0, _vitest.it)("empty array", () => {
13
+ (0, _vitest.expect)((0, _getListContainerXpath.getContainerElement)([])).toBeNull();
14
+ });
15
+ (0, _vitest.it)("no common parent", () => {
16
+ (0, _vitest.expect)((0, _getListContainerXpath.getContainerElement)(["html[1]/li[1]/article[1]/h3[1]/a[1]/@title", "html[2]/div[1]/span[1]/@text"])).toBeNull();
17
+ });
18
+ (0, _vitest.it)("should return the container element even if the list starts at 2", () => {
19
+ (0, _vitest.expect)((0, _getListContainerXpath.getContainerElement)(["html[1]/body[1]/main[1]/div[3]/div[1]/div[4]/div[2]", "html[1]/body[1]/main[1]/div[3]/div[1]/div[7]/div[2]", "html[1]/body[1]/main[1]/div[3]/div[1]/div[10]/div[2]"])).toBe("html[1]/body[1]/main[1]/div[3]/div[1]");
20
+ });
21
+ });
@@ -0,0 +1,42 @@
1
+ "use strict";
2
+
3
+ var _getListContainerXpath = require("../utils/getListContainerXpath");
4
+ var _vitest = require("vitest");
5
+ (0, _vitest.describe)("XPath Matcher", () => {
6
+ (0, _vitest.it)("basic case", () => {
7
+ const xpaths = ["html[1]/li[11]/article[1]/h3[1]/a[1]/@href", "html[1]/li[22]/article[1]/h3[1]/a[1]/@href"];
8
+ (0, _vitest.expect)((0, _getListContainerXpath.partOfSameArrayXpath)(xpaths[0], xpaths[1])).toBe(true);
9
+ });
10
+ (0, _vitest.it)("different structure case", () => {
11
+ const xpaths = ["html[1]/li[1]/article[1]/h3[1]/a[1]/@href", "html[1]/li[2]/article[1]/h3[1]/a[1]/text()"];
12
+ (0, _vitest.expect)((0, _getListContainerXpath.partOfSameArrayXpath)(xpaths[0], xpaths[1])).toBe(false);
13
+ });
14
+ (0, _vitest.it)("multiple numbers in a segment", () => {
15
+ const xpaths = ["html[1]/li[3]/article[2]/h3[1]/a[1]/@href", "html[1]/li[3]/article[2]/h3[1]/a[2]/@href"];
16
+ (0, _vitest.expect)((0, _getListContainerXpath.partOfSameArrayXpath)(xpaths[0], xpaths[1])).toBe(true);
17
+ });
18
+ (0, _vitest.it)("two numeric differences", () => {
19
+ const xpaths = ["html[1]/li[1]/article[1]/h3[1]/a[1]/@href", "html[1]/li[2]/article[2]/h3[1]/a[1]/@href"];
20
+ (0, _vitest.expect)((0, _getListContainerXpath.partOfSameArrayXpath)(xpaths[0], xpaths[1])).toBe(false);
21
+ });
22
+ (0, _vitest.it)("non-numeric differences", () => {
23
+ const xpaths = ["html[1]/ul[1]/article[1]/h3[1]/a[1]/@href", "html[1]/li[1]/article[1]/h3[1]/a[1]/@href"];
24
+ (0, _vitest.expect)((0, _getListContainerXpath.partOfSameArrayXpath)(xpaths[0], xpaths[1])).toBe(false);
25
+ });
26
+ (0, _vitest.it)("one numeric difference, same structure", () => {
27
+ const xpaths = ["html[1]/li[1]/article[1]/h3[1]/a[1]/@href", "html[1]/li[2]/article[1]/h3[1]/a[1]/@href"];
28
+ (0, _vitest.expect)((0, _getListContainerXpath.partOfSameArrayXpath)(xpaths[0], xpaths[1])).toBe(true);
29
+ });
30
+ (0, _vitest.it)("exact same xpath", () => {
31
+ const xpaths = ["html[1]/li[1]/article[1]/h3[1]/a[1]/@href", "html[1]/li[1]/article[1]/h3[1]/a[1]/@href"];
32
+ (0, _vitest.expect)((0, _getListContainerXpath.partOfSameArrayXpath)(xpaths[0], xpaths[1])).toBe(false);
33
+ });
34
+ (0, _vitest.it)("non-numeric differences in numeric segment", () => {
35
+ const xpaths = ["html[1]/li[1a]/article[1]/h3[1]/a[1]/@href", "html[1]/li[1b]/article[1]/h3[1]/a[1]/@href"];
36
+ (0, _vitest.expect)((0, _getListContainerXpath.partOfSameArrayXpath)(xpaths[0], xpaths[1])).toBe(false);
37
+ });
38
+ (0, _vitest.it)("different lengths", () => {
39
+ const xpaths = ["html[1]/li[1]/article[1]/h3[1]/a[1]/@href", "html[1]/li[1]/article[1]/h3[1]/@href"];
40
+ (0, _vitest.expect)((0, _getListContainerXpath.partOfSameArrayXpath)(xpaths[0], xpaths[1])).toBe(false);
41
+ });
42
+ });
@@ -0,0 +1,146 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../../../common/extendedTest");
4
+ var _ = require("../..");
5
+ var _uuid = require("uuid");
6
+ const productListTemplate = `
7
+ <div class="products-container">
8
+ <div class="product-item">
9
+ <h2 class="product-title">iPhone 14 Pro</h2>
10
+ <div class="price-wrapper">
11
+ <span class="price">$999</span>
12
+ </div>
13
+ <div class="details">
14
+ <p class="product-description">Latest iPhone with advanced camera system</p>
15
+ </div>
16
+ </div>
17
+ <div class="product-item">
18
+ <h2 class="product-title">MacBook Air M2</h2>
19
+ <div class="price-wrapper">
20
+ <span class="price">$1199</span>
21
+ </div>
22
+ <div class="details">
23
+ <p class="product-description">Thin and light laptop with M2 chip</p>
24
+ </div>
25
+ </div>
26
+ <div class="product-item">
27
+ <h2 class="product-title">AirPods Pro</h2>
28
+ <div class="price-wrapper">
29
+ <span class="price">$249</span>
30
+ </div>
31
+ <div class="details">
32
+ <p class="product-description">Active noise cancellation earbuds</p>
33
+ </div>
34
+ </div>
35
+ <div class="additional-info">
36
+ <div class="shipping-notice">Free shipping on all orders</div>
37
+ <div class="return-policy">30-day return policy</div>
38
+ </div>
39
+ </div>
40
+ `;
41
+ _extendedTest.describe.skip("Array Extractor Caching Tests", () => {
42
+ (0, _extendedTest.describe)("DOM Changes and Cache Behavior", () => {
43
+ (0, _extendedTest.test)("should demonstrate caching behavior with different types of DOM changes", async ({
44
+ page
45
+ }) => {
46
+ const testLabel = `product-list-cache-test-${(0, _uuid.v4)()}`;
47
+ const variantKey = testLabel;
48
+ const itemEntitySchema = {
49
+ type: "object",
50
+ required: ["title", "price"],
51
+ properties: {
52
+ title: {
53
+ type: "string",
54
+ description: "Product title",
55
+ primary: true
56
+ },
57
+ price: {
58
+ type: "string",
59
+ description: "Product price"
60
+ },
61
+ description: {
62
+ type: "string",
63
+ description: "Product description"
64
+ }
65
+ }
66
+ };
67
+ const extractionOptions = {
68
+ itemEntityName: "product",
69
+ label: testLabel,
70
+ itemEntitySchema,
71
+ strategy: {
72
+ model: "claude-3-5-sonnet-20240620",
73
+ type: "HTML"
74
+ },
75
+ variantKey: variantKey,
76
+ apiKey: process.env.ANTHROPIC_API_KEY
77
+ };
78
+ await page.setContent(productListTemplate);
79
+ const firstResult = await (0, _.extractArrayFromLocator)(page.locator(".products-container"), extractionOptions);
80
+ console.log("First extraction result:", firstResult);
81
+ (0, _extendedTest.expect)(firstResult).toHaveLength(3);
82
+ (0, _extendedTest.expect)(firstResult[0]).toHaveProperty("title", "iPhone 14 Pro");
83
+ (0, _extendedTest.expect)(firstResult[0]).toHaveProperty("price", "$999");
84
+ (0, _extendedTest.expect)(firstResult[1]).toHaveProperty("title", "MacBook Air M2");
85
+ (0, _extendedTest.expect)(firstResult[1]).toHaveProperty("price", "$1199");
86
+ (0, _extendedTest.expect)(firstResult[2]).toHaveProperty("title", "AirPods Pro");
87
+ (0, _extendedTest.expect)(firstResult[2]).toHaveProperty("price", "$249");
88
+ const modifiedTemplate = productListTemplate.replace("iPhone 14 Pro", "iPhone 15 Pro Max").replace("$999", "$1099").replace("MacBook Air M2", "MacBook Pro M3").replace("$1199", "$1999");
89
+ await page.setContent(modifiedTemplate);
90
+ const secondResult = await (0, _.extractArrayFromLocator)(page.locator(".products-container"), extractionOptions);
91
+ console.log("Second extraction result (after relevant change):", secondResult);
92
+ (0, _extendedTest.expect)(secondResult).not.toEqual(firstResult);
93
+ (0, _extendedTest.expect)(secondResult).toHaveLength(3);
94
+ (0, _extendedTest.expect)(secondResult[0]).toHaveProperty("title", "iPhone 15 Pro Max");
95
+ (0, _extendedTest.expect)(secondResult[0]).toHaveProperty("price", "$1099");
96
+ (0, _extendedTest.expect)(secondResult[1]).toHaveProperty("title", "MacBook Pro M3");
97
+ (0, _extendedTest.expect)(secondResult[1]).toHaveProperty("price", "$1999");
98
+ (0, _extendedTest.expect)(secondResult[2]).toHaveProperty("title", "AirPods Pro");
99
+ (0, _extendedTest.expect)(secondResult[2]).toHaveProperty("price", "$249");
100
+ const irrelevantChangeTemplate = modifiedTemplate.replace("Free shipping on all orders", "Express shipping available").replace("30-day return policy", "60-day return policy");
101
+ await page.setContent(irrelevantChangeTemplate);
102
+ const thirdResult = await (0, _.extractArrayFromLocator)(page.locator(".products-container"), extractionOptions);
103
+ console.log("Third extraction result (after irrelevant change):", thirdResult);
104
+ (0, _extendedTest.expect)(thirdResult).toEqual(secondResult);
105
+ (0, _extendedTest.expect)(thirdResult).toHaveLength(3);
106
+ (0, _extendedTest.expect)(thirdResult[0]).toHaveProperty("title", "iPhone 15 Pro Max");
107
+ (0, _extendedTest.expect)(thirdResult[0]).toHaveProperty("price", "$1099");
108
+ const appendedTemplate = irrelevantChangeTemplate.replace('<div class="return-policy">60-day return policy</div>', `<div class="return-policy">60-day return policy</div>
109
+ <div class="newly-added-section">
110
+ <div class="customer-service">
111
+ <h3>Customer Support</h3>
112
+ <p>24/7 support available</p>
113
+ </div>
114
+ <div class="social-media">
115
+ <button class="share-facebook">Share on Facebook</button>
116
+ <button class="share-twitter">Share on Twitter</button>
117
+ </div>
118
+ </div>`);
119
+ await page.setContent(appendedTemplate);
120
+ const fourthResult = await (0, _.extractArrayFromLocator)(page.locator(".products-container"), extractionOptions);
121
+ console.log("Fourth extraction result (after appending content):", fourthResult);
122
+ (0, _extendedTest.expect)(fourthResult).toEqual(thirdResult);
123
+ (0, _extendedTest.expect)(fourthResult).toHaveLength(3);
124
+ (0, _extendedTest.expect)(fourthResult[0]).toHaveProperty("title", "iPhone 15 Pro Max");
125
+ (0, _extendedTest.expect)(fourthResult[0]).toHaveProperty("price", "$1099");
126
+ const newItemTemplate = appendedTemplate.replace('<div class="additional-info">', `<div class="product-item">
127
+ <h2 class="product-title">iPad Pro</h2>
128
+ <div class="price-wrapper">
129
+ <span class="price">$799</span>
130
+ </div>
131
+ <div class="details">
132
+ <p class="product-description">Powerful tablet with M2 chip</p>
133
+ </div>
134
+ </div>
135
+ <div class="additional-info">`);
136
+ await page.setContent(newItemTemplate);
137
+ const fifthResult = await (0, _.extractArrayFromLocator)(page.locator(".products-container"), extractionOptions);
138
+ console.log("Fifth extraction result (after adding new item):", fifthResult);
139
+ (0, _extendedTest.expect)(fifthResult).not.toEqual(fourthResult);
140
+ (0, _extendedTest.expect)(fifthResult).toHaveLength(4);
141
+ (0, _extendedTest.expect)(fifthResult[3]).toHaveProperty("title", "iPad Pro");
142
+ (0, _extendedTest.expect)(fifthResult[3]).toHaveProperty("price", "$799");
143
+ console.log("All cache behavior tests completed successfully!");
144
+ });
145
+ });
146
+ });