@intuned/browser-dev 0.1.4-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/.babelrc +21 -0
  2. package/.eslintignore +10 -0
  3. package/.eslintrc.js +39 -0
  4. package/BROWSER_SCRIPTS_SETUP.md +84 -0
  5. package/LICENSE +43 -0
  6. package/README.md +160 -0
  7. package/RELEASE.md +60 -0
  8. package/dist/ai/export.d.js +5 -0
  9. package/dist/ai/export.d.ts +641 -0
  10. package/dist/ai/extractStructuredData.js +320 -0
  11. package/dist/ai/extractStructuredDataUsingAi.js +142 -0
  12. package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
  13. package/dist/ai/extractionHelpers/validateSchema.js +148 -0
  14. package/dist/ai/index.d.ts +641 -0
  15. package/dist/ai/index.js +19 -0
  16. package/dist/ai/isPageLoaded.js +80 -0
  17. package/dist/ai/prompt.js +39 -0
  18. package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
  19. package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
  20. package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
  21. package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
  22. package/dist/ai/tools/index.js +48 -0
  23. package/dist/ai/types/errors.js +67 -0
  24. package/dist/ai/types/models.js +45 -0
  25. package/dist/ai/types/types.js +48 -0
  26. package/dist/ai/validators.js +167 -0
  27. package/dist/common/Logger/index.js +60 -0
  28. package/dist/common/Logger/types.js +5 -0
  29. package/dist/common/SdkError.js +50 -0
  30. package/dist/common/aiModelsValidations.js +32 -0
  31. package/dist/common/ensureBrowserScripts.js +14 -0
  32. package/dist/common/extendedTest.js +157 -0
  33. package/dist/common/extractionHelpers.js +19 -0
  34. package/dist/common/formatZodError.js +18 -0
  35. package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
  36. package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
  37. package/dist/common/fuzzySearch/utils.js +23 -0
  38. package/dist/common/getModelProvider.js +18 -0
  39. package/dist/common/getSimplifiedHtml.js +122 -0
  40. package/dist/common/hashObject.js +32 -0
  41. package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
  42. package/dist/common/html2markdown/index.js +19 -0
  43. package/dist/common/jwtTokenManager.js +57 -0
  44. package/dist/common/loadRuntime.js +16 -0
  45. package/dist/common/locatorHelpers.js +41 -0
  46. package/dist/common/matching/collectStrings.js +32 -0
  47. package/dist/common/matching/levenshtein.js +40 -0
  48. package/dist/common/matching/matching.js +317 -0
  49. package/dist/common/matching/types.js +1 -0
  50. package/dist/common/noEmpty.js +9 -0
  51. package/dist/common/saveSnapshotWithExamples.js +60 -0
  52. package/dist/common/script.js +2602 -0
  53. package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
  54. package/dist/common/xpathMapping.js +107 -0
  55. package/dist/helpers/clickUntilExhausted.js +85 -0
  56. package/dist/helpers/downloadFile.js +125 -0
  57. package/dist/helpers/export.d.js +5 -0
  58. package/dist/helpers/export.d.ts +1220 -0
  59. package/dist/helpers/extractMarkdown.js +35 -0
  60. package/dist/helpers/filterEmptyValues.js +54 -0
  61. package/dist/helpers/gotoUrl.js +98 -0
  62. package/dist/helpers/index.d.ts +1220 -0
  63. package/dist/helpers/index.js +122 -0
  64. package/dist/helpers/processDate.js +25 -0
  65. package/dist/helpers/resolveUrl.js +64 -0
  66. package/dist/helpers/sanitizeHtml.js +74 -0
  67. package/dist/helpers/saveFileToS3.js +50 -0
  68. package/dist/helpers/scrollToLoadContent.js +57 -0
  69. package/dist/helpers/tests/testClickUntilExhausted.spec.js +372 -0
  70. package/dist/helpers/tests/testDownloadFile.spec.js +206 -0
  71. package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
  72. package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
  73. package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
  74. package/dist/helpers/tests/testProcessDate.spec.js +13 -0
  75. package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
  76. package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
  77. package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
  78. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
  79. package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
  80. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
  81. package/dist/helpers/types/Attachment.js +115 -0
  82. package/dist/helpers/types/CustomTypeRegistry.js +48 -0
  83. package/dist/helpers/types/RunEnvironment.js +18 -0
  84. package/dist/helpers/types/ValidationError.js +17 -0
  85. package/dist/helpers/types/index.js +51 -0
  86. package/dist/helpers/uploadFileToS3.js +154 -0
  87. package/dist/helpers/utils/getS3Client.js +22 -0
  88. package/dist/helpers/utils/index.js +73 -0
  89. package/dist/helpers/utils/isDownload.js +10 -0
  90. package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
  91. package/dist/helpers/utils/isLocator.js +9 -0
  92. package/dist/helpers/utils/jwtTokenManager.js +18 -0
  93. package/dist/helpers/validateDataUsingSchema.js +103 -0
  94. package/dist/helpers/waitForDomSettled.js +90 -0
  95. package/dist/helpers/withNetworkSettledWait.js +91 -0
  96. package/dist/index.d.js +16 -0
  97. package/dist/index.d.ts +10 -0
  98. package/dist/index.js +16 -0
  99. package/dist/intunedServices/ApiGateway/aiApiGateway.js +143 -0
  100. package/dist/intunedServices/ApiGateway/factory.js +16 -0
  101. package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
  102. package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
  103. package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
  104. package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +355 -0
  105. package/dist/intunedServices/ApiGateway/types.js +11 -0
  106. package/dist/intunedServices/cache/cache.js +61 -0
  107. package/dist/intunedServices/cache/index.js +12 -0
  108. package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
  109. package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
  110. package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
  111. package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
  112. package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
  113. package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
  114. package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
  115. package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
  116. package/dist/optimized-extractors/common/index.js +55 -0
  117. package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
  118. package/dist/optimized-extractors/common/matching/matching.js +212 -0
  119. package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
  120. package/dist/optimized-extractors/common/matching/types.js +18 -0
  121. package/dist/optimized-extractors/common/matching/utils.js +184 -0
  122. package/dist/optimized-extractors/common/utils.js +58 -0
  123. package/dist/optimized-extractors/export.d.js +5 -0
  124. package/dist/optimized-extractors/export.d.ts +397 -0
  125. package/dist/optimized-extractors/extractArray.js +120 -0
  126. package/dist/optimized-extractors/extractObject.js +104 -0
  127. package/dist/optimized-extractors/index.d.ts +397 -0
  128. package/dist/optimized-extractors/index.js +31 -0
  129. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +269 -0
  130. package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
  131. package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
  132. package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
  133. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +146 -0
  134. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js +130 -0
  135. package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
  136. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +160 -0
  137. package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
  138. package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
  139. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +243 -0
  140. package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
  141. package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
  142. package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
  143. package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
  144. package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
  145. package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
  146. package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
  147. package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
  148. package/dist/optimized-extractors/models/anthropicModel.js +23 -0
  149. package/dist/optimized-extractors/models/openaiModel.js +23 -0
  150. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
  151. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
  152. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
  153. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
  154. package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
  155. package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
  156. package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
  157. package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
  158. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
  159. package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
  160. package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
  161. package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
  162. package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
  163. package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
  164. package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
  165. package/dist/optimized-extractors/types/errors.js +42 -0
  166. package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
  167. package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
  168. package/dist/optimized-extractors/types/types.js +5 -0
  169. package/dist/optimized-extractors/validators.js +152 -0
  170. package/dist/types/intuned-runtime.d.js +1 -0
  171. package/dist/types/intuned-runtime.d.ts +64 -0
  172. package/dist/vite-env.d.js +1 -0
  173. package/dist/vite-env.d.ts +9 -0
  174. package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
  175. package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
  176. package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
  177. package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
  178. package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
  179. package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
  180. package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
  181. package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
  182. package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
  183. package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
  184. package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
  185. package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
  186. package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
  187. package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
  188. package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
  189. package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
  190. package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
  191. package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
  192. package/generated-docs/helpers/functions/processDate.mdx +55 -0
  193. package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
  194. package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
  195. package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
  196. package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
  197. package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
  198. package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
  199. package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
  200. package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
  201. package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
  202. package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
  203. package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
  204. package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
  205. package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
  206. package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
  207. package/how-to-generate-docs.md +61 -0
  208. package/how-to-run-tests.md +42 -0
  209. package/intuned-runtime-setup.md +13 -0
  210. package/package.json +124 -0
  211. package/tsconfig.eslint.json +5 -0
  212. package/tsconfig.json +26 -0
@@ -0,0 +1,130 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../../../common/extendedTest");
4
+ var _ = require("../..");
5
+ var _uuid = require("uuid");
6
+ const productListTemplate = `
7
+ <div class="products-container">
8
+ <div class="product-item">
9
+ <h2 class="product-title">iPhone 14 Pro</h2>
10
+ <div class="price-wrapper">
11
+ <span class="price">$999</span>
12
+ </div>
13
+ <div class="details">
14
+ <p class="product-description">Latest iPhone with advanced camera system</p>
15
+ </div>
16
+ </div>
17
+ <div class="product-item">
18
+ <h2 class="product-title">MacBook Air M2</h2>
19
+ <div class="price-wrapper">
20
+ <span class="price">$1199</span>
21
+ </div>
22
+ <div class="details">
23
+ <p class="product-description">Thin and light laptop with M2 chip</p>
24
+ </div>
25
+ </div>
26
+ <div class="product-item">
27
+ <h2 class="product-title">AirPods Pro</h2>
28
+ <div class="price-wrapper">
29
+ <span class="price">$249</span>
30
+ </div>
31
+ <div class="details">
32
+ <p class="product-description">Active noise cancellation earbuds</p>
33
+ </div>
34
+ </div>
35
+ <div class="additional-info">
36
+ <div class="shipping-notice">Free shipping on all orders</div>
37
+ <div class="return-policy">30-day return policy</div>
38
+ </div>
39
+ </div>
40
+ `;
41
+ _extendedTest.describe.skip("Array Extractor from Page Caching Tests", () => {
42
+ (0, _extendedTest.describe)("DOM Changes and Cache Behavior", () => {
43
+ (0, _extendedTest.test)("should demonstrate caching behavior with different types of DOM changes", async ({
44
+ page
45
+ }) => {
46
+ const testLabel = `product-list-page-cache-test-${(0, _uuid.v4)()}`;
47
+ const variantKey = testLabel;
48
+ const itemEntitySchema = {
49
+ type: "object",
50
+ required: ["title", "price"],
51
+ properties: {
52
+ title: {
53
+ type: "string",
54
+ description: "Product title",
55
+ primary: true
56
+ },
57
+ price: {
58
+ type: "string",
59
+ description: "Product price"
60
+ },
61
+ description: {
62
+ type: "string",
63
+ description: "Product description"
64
+ }
65
+ }
66
+ };
67
+ const extractionOptions = {
68
+ itemEntityName: "product",
69
+ label: testLabel,
70
+ itemEntitySchema,
71
+ strategy: {
72
+ model: "claude-3-5-sonnet-20240620",
73
+ type: "HTML"
74
+ },
75
+ variantKey: variantKey,
76
+ apiKey: process.env.ANTHROPIC_API_KEY
77
+ };
78
+ await page.setContent(productListTemplate);
79
+ const firstResult = await (0, _.extractArrayFromPage)(page, extractionOptions);
80
+ console.log("First extraction result:", firstResult);
81
+ (0, _extendedTest.expect)(firstResult).toHaveLength(3);
82
+ (0, _extendedTest.expect)(firstResult[0]).toHaveProperty("title", "iPhone 14 Pro");
83
+ (0, _extendedTest.expect)(firstResult[0]).toHaveProperty("price", "$999");
84
+ (0, _extendedTest.expect)(firstResult[1]).toHaveProperty("title", "MacBook Air M2");
85
+ (0, _extendedTest.expect)(firstResult[1]).toHaveProperty("price", "$1199");
86
+ (0, _extendedTest.expect)(firstResult[2]).toHaveProperty("title", "AirPods Pro");
87
+ (0, _extendedTest.expect)(firstResult[2]).toHaveProperty("price", "$249");
88
+ const modifiedTemplate = productListTemplate.replace("iPhone 14 Pro", "iPhone 15 Pro Max").replace("$999", "$1099").replace("MacBook Air M2", "MacBook Pro M3").replace("$1199", "$1999");
89
+ await page.setContent(modifiedTemplate);
90
+ const secondResult = await (0, _.extractArrayFromPage)(page, extractionOptions);
91
+ console.log("Second extraction result (after relevant change):", secondResult);
92
+ (0, _extendedTest.expect)(secondResult).not.toEqual(firstResult);
93
+ (0, _extendedTest.expect)(secondResult).toHaveLength(3);
94
+ (0, _extendedTest.expect)(secondResult[0]).toHaveProperty("title", "iPhone 15 Pro Max");
95
+ (0, _extendedTest.expect)(secondResult[0]).toHaveProperty("price", "$1099");
96
+ (0, _extendedTest.expect)(secondResult[1]).toHaveProperty("title", "MacBook Pro M3");
97
+ (0, _extendedTest.expect)(secondResult[1]).toHaveProperty("price", "$1999");
98
+ (0, _extendedTest.expect)(secondResult[2]).toHaveProperty("title", "AirPods Pro");
99
+ (0, _extendedTest.expect)(secondResult[2]).toHaveProperty("price", "$249");
100
+ const irrelevantChangeTemplate = modifiedTemplate.replace("Free shipping on all orders", "Express shipping available").replace("30-day return policy", "60-day return policy");
101
+ await page.setContent(irrelevantChangeTemplate);
102
+ const thirdResult = await (0, _.extractArrayFromPage)(page, extractionOptions);
103
+ console.log("Third extraction result (after irrelevant change):", thirdResult);
104
+ (0, _extendedTest.expect)(thirdResult).toEqual(secondResult);
105
+ (0, _extendedTest.expect)(thirdResult).toHaveLength(3);
106
+ (0, _extendedTest.expect)(thirdResult[0]).toHaveProperty("title", "iPhone 15 Pro Max");
107
+ (0, _extendedTest.expect)(thirdResult[0]).toHaveProperty("price", "$1099");
108
+ const appendedTemplate = irrelevantChangeTemplate + `
109
+ <div class="newly-added-section">
110
+ <div class="customer-service">
111
+ <h3>Customer Support</h3>
112
+ <p>24/7 support available</p>
113
+ </div>
114
+ <div class="social-media">
115
+ <button class="share-facebook">Share on Facebook</button>
116
+ <button class="share-twitter">Share on Twitter</button>
117
+ </div>
118
+ </div>
119
+ `;
120
+ await page.setContent(appendedTemplate);
121
+ const fourthResult = await (0, _.extractArrayFromPage)(page, extractionOptions);
122
+ console.log("Fourth extraction result (after appending content):", fourthResult);
123
+ (0, _extendedTest.expect)(fourthResult).toEqual(thirdResult);
124
+ (0, _extendedTest.expect)(fourthResult).toHaveLength(3);
125
+ (0, _extendedTest.expect)(fourthResult[0]).toHaveProperty("title", "iPhone 15 Pro Max");
126
+ (0, _extendedTest.expect)(fourthResult[0]).toHaveProperty("price", "$1099");
127
+ console.log("All cache behavior tests completed successfully!");
128
+ });
129
+ });
130
+ });
@@ -0,0 +1,9 @@
1
+ "use strict";
2
+
3
+ var _getListContainerXpath = require("../utils/getListContainerXpath");
4
+ var _vitest = require("vitest");
5
+ (0, _vitest.describe)("verifyThatAllXpathsArePartOfSameArray", () => {
6
+ (0, _vitest.it)("basic case", () => {
7
+ (0, _vitest.expect)((0, _getListContainerXpath.verifyThatAllXpathsArePartOfSameArray)(["html[1]/li[1]/article[1]/h3[1]/a[1]/@title", "html[1]/li[2]/article[1]/h3[1]/a[1]/@title", "html[1]/li[3]/article[1]/h3[1]/a[1]/@title", "html[1]/li[4]/article[1]/h3[1]/a[1]/@title"])).toBe(true);
8
+ });
9
+ });
@@ -0,0 +1,160 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.dynamicListExtractor = dynamicListExtractor;
7
+ var _neverthrow = require("neverthrow");
8
+ var _captureSnapshot = require("../objectExtractionHelpers/captureSnapshot");
9
+ var _checksumUtils = require("../objectExtractionHelpers/checksumUtils");
10
+ var _runAiExtraction = require("./runAiExtraction");
11
+ var _validateOptions = require("./utils/validateOptions");
12
+ var _cache = require("../../intunedServices/cache");
13
+ var _xpathMapping = require("../../common/xpathMapping");
14
+ var _Logger = require("../../common/Logger");
15
+ async function dynamicListExtractor(page, identifier, options) {
16
+ const inputValidation = await (0, _validateOptions.validateDynamicListExtractorOptions)(page, identifier, options);
17
+ if (inputValidation.isErr()) {
18
+ return inputValidation;
19
+ }
20
+ const {
21
+ pageUrl,
22
+ searchRegion,
23
+ searchRegionHandler,
24
+ invalidate: _invalidate,
25
+ itemEntityName,
26
+ itemEntitySchema,
27
+ variantKey,
28
+ primaryProperty,
29
+ hasSearchRegionContainer,
30
+ label: _label,
31
+ searchRegionKey,
32
+ searchRegionXpath: _searchRegionXpath,
33
+ strategy,
34
+ prompt,
35
+ apiKey
36
+ } = inputValidation.value;
37
+ const extractorInputHash = (0, _checksumUtils.hashObject)({
38
+ itemEntityName,
39
+ itemEntitySchema,
40
+ variantKey,
41
+ currentPageUrl: pageUrl,
42
+ strategy,
43
+ searchRegionKey,
44
+ prompt
45
+ }, true);
46
+ const pageAndSearchRegion = {
47
+ page,
48
+ searchRegion,
49
+ searchRegionHandler: searchRegionHandler
50
+ };
51
+ const snapshot = await (0, _captureSnapshot.captureSnapshot)(pageAndSearchRegion);
52
+ if (snapshot.isErr()) {
53
+ return (0, _neverthrow.err)(snapshot.error);
54
+ }
55
+ _Logger.logger.info("Looking for value in the cache...");
56
+ const cachedResult = await _cache.cache.get(extractorInputHash);
57
+ if (cachedResult) {
58
+ _Logger.logger.info("Found value in cache");
59
+ if (cachedResult.exceedsLimit) {
60
+ _Logger.logger.warn(`Cache key ${extractorInputHash} exceeds cache limit and is not cacheable`);
61
+ } else {
62
+ const isValid = await (0, _xpathMapping.validateXPathMapping)(page, cachedResult.matchesMapping, cachedResult.containerPath);
63
+ if (isValid) {
64
+ const nonRelatedChildrenCount = cachedResult.nonRelatedChildrenCount;
65
+ const currentChildrenCount = await page.evaluate(fullContainerXpath => {
66
+ if (!fullContainerXpath) return 0;
67
+ const containerResult = document.evaluate(fullContainerXpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
68
+ const containerElement = containerResult.singleNodeValue;
69
+ if (!containerElement) return 0;
70
+ return containerElement.children.length;
71
+ }, cachedResult.fullContainerXpath);
72
+ if (currentChildrenCount - nonRelatedChildrenCount === cachedResult.result.length) {
73
+ _Logger.logger.info("The values in the cache are the same as the current page, returning the cached result");
74
+ return (0, _neverthrow.ok)(cachedResult.result);
75
+ }
76
+ _Logger.logger.info("The values in the cache are different from the current page, running AI extraction");
77
+ }
78
+ _Logger.logger.info("The values in the cache are different from the current page, running AI extraction");
79
+ }
80
+ } else {
81
+ _Logger.logger.info("No value found in the cache, running AI extraction");
82
+ }
83
+ const aiExtractionResult = await (0, _runAiExtraction.runAiExtraction)({
84
+ pageAndSearchRegion,
85
+ itemEntityName,
86
+ itemEntitySchema,
87
+ primaryProperty,
88
+ hasSearchRegionContainer,
89
+ strategy,
90
+ identifier,
91
+ prompt,
92
+ examples: [],
93
+ apiKey
94
+ });
95
+ if (aiExtractionResult.isErr()) {
96
+ return (0, _neverthrow.err)(aiExtractionResult.error);
97
+ }
98
+ const xpathsMapping = buildXpathsMapping(aiExtractionResult.value);
99
+ const resultsToReturn = getResultToReturn(aiExtractionResult.value.resultValues);
100
+ const allContainerChildrenCount = await page.evaluate(fullContainerXpath => {
101
+ if (!fullContainerXpath) return 0;
102
+ const containerResult = document.evaluate(fullContainerXpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
103
+ const containerElement = containerResult.singleNodeValue;
104
+ if (!containerElement) return 0;
105
+ return containerElement.children.length;
106
+ }, aiExtractionResult.value.fullContainerXpath);
107
+ const nonRelatedChildrenCount = allContainerChildrenCount - resultsToReturn.length;
108
+ const resultsToCache = {
109
+ result: resultsToReturn,
110
+ matchesMapping: xpathsMapping,
111
+ containerPath: aiExtractionResult.value.containerPath,
112
+ fullContainerXpath: aiExtractionResult.value.fullContainerXpath,
113
+ nonRelatedChildrenCount
114
+ };
115
+ const cacheDataSize = JSON.stringify(resultsToCache).length;
116
+ const CACHE_SIZE_LIMIT = 380 * 1024;
117
+ if (cacheDataSize > CACHE_SIZE_LIMIT) {
118
+ _Logger.logger.warn(`Results exceed cache limit (${cacheDataSize} bytes > ${CACHE_SIZE_LIMIT} bytes), skipping caching`);
119
+ await _cache.cache.set(extractorInputHash, {
120
+ exceedsLimit: true
121
+ });
122
+ } else {
123
+ _Logger.logger.debug("Caching results...");
124
+ await _cache.cache.set(extractorInputHash, resultsToCache);
125
+ _Logger.logger.debug("Results cached");
126
+ }
127
+ return (0, _neverthrow.ok)(resultsToReturn);
128
+ }
129
+ function buildXpathsMapping(results) {
130
+ const containerXpath = results.containerPath;
131
+ const xpathsMapping = {};
132
+ for (const result of results.resultValues) {
133
+ for (const [_key, valueObj] of Object.entries(result.result)) {
134
+ const value = valueObj;
135
+ const matchedXpath = value.matchXpath;
136
+ if (matchedXpath) {
137
+ const relativePath = matchedXpath.replace(containerXpath + "/", "");
138
+ const xpathEntry = {
139
+ xpath: relativePath,
140
+ matchType: value.matchType
141
+ };
142
+ if (!xpathsMapping[value.matchText]) {
143
+ xpathsMapping[value.matchText] = [];
144
+ }
145
+ xpathsMapping[value.matchText].push(xpathEntry);
146
+ }
147
+ }
148
+ }
149
+ return xpathsMapping;
150
+ }
151
+ function getResultToReturn(extractionResult) {
152
+ return extractionResult.map(result => {
153
+ const transformedResult = {};
154
+ for (const [key, valueObj] of Object.entries(result.result)) {
155
+ const value = valueObj;
156
+ transformedResult[key] = value.matchText;
157
+ }
158
+ return transformedResult;
159
+ });
160
+ }
@@ -0,0 +1,46 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.requiredPropertyNotExtracted = exports.other = exports.invalidSearchRegion = exports.invalidList = exports.invalidInput = exports.invalidExtractionResult = exports.invalidAddressUrl = exports.insufficientAiCredits = void 0;
7
+ const other = (context, error) => ({
8
+ type: "Other",
9
+ context,
10
+ error
11
+ });
12
+ exports.other = other;
13
+ const invalidSearchRegion = () => ({
14
+ type: "InvalidSearchRegion"
15
+ });
16
+ exports.invalidSearchRegion = invalidSearchRegion;
17
+ const invalidList = () => ({
18
+ type: "InvalidList",
19
+ context: "Failed to get container path, please try to choose another primary property and make sure all list items are direct child of a common container"
20
+ });
21
+ exports.invalidList = invalidList;
22
+ const invalidInput = context => ({
23
+ type: "InvalidInput",
24
+ context
25
+ });
26
+ exports.invalidInput = invalidInput;
27
+ const invalidExtractionResult = context => ({
28
+ type: "InvalidExtractionResult",
29
+ context
30
+ });
31
+ exports.invalidExtractionResult = invalidExtractionResult;
32
+ const invalidAddressUrl = context => ({
33
+ type: "InvalidAddressUrl",
34
+ context
35
+ });
36
+ exports.invalidAddressUrl = invalidAddressUrl;
37
+ const requiredPropertyNotExtracted = context => ({
38
+ type: "RequiredPropertyNotExtracted",
39
+ context
40
+ });
41
+ exports.requiredPropertyNotExtracted = requiredPropertyNotExtracted;
42
+ const insufficientAiCredits = context => ({
43
+ type: "InsufficientAiCredits",
44
+ context
45
+ });
46
+ exports.insufficientAiCredits = insufficientAiCredits;
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.getListMatches = getListMatches;
7
+ var _noEmpty = _interopRequireDefault(require("../../common/noEmpty"));
8
+ var _findDomMatches = require("../objectExtractionHelpers/findDomMatches");
9
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
10
+ async function getListMatches(page, searchRegionHandler, results) {
11
+ const allExtractedValues = results.map(v => Object.values(v.result)).filter(_noEmpty.default).flat();
12
+ const allExtractedValuesDomMatches = await (0, _findDomMatches.getDomMatches)(page, searchRegionHandler, allExtractedValues);
13
+ return allExtractedValuesDomMatches;
14
+ }
@@ -0,0 +1,243 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.runAiExtraction = runAiExtraction;
7
+ var _neverthrow = require("neverthrow");
8
+ var _extractionHelpers = require("../../common/extractionHelpers");
9
+ var _findDomMatches = require("../objectExtractionHelpers/findDomMatches");
10
+ var _getSimplifiedHtml = require("../objectExtractionHelpers/getSimplifiedHtml");
11
+ var Errors = _interopRequireWildcard(require("./errors"));
12
+ var _getListMatches = require("./getListMatches");
13
+ var _extractPropertiesUsingGPTFromArray = require("./utils/extractPropertiesUsingGPTFromArray");
14
+ var _extractStructuredListUsingAi = require("./utils/extractStructuredListUsingAi");
15
+ var _getListContainerXpath = require("./utils/getListContainerXpath");
16
+ var _getRelativeContainerXpathSelector = require("./utils/getRelativeContainerXpathSelector");
17
+ var _getSimplifiedHtmlPerListItem = require("./utils/getSimplifiedHtmlPerListItem");
18
+ var _tablesUtils = require("./utils/tablesUtils");
19
+ var _buildImagesFromPage = require("../common/buildImagesFromPage");
20
+ var _findTableHeaders = require("../common/findTableHeaders");
21
+ var _Logger = require("../../common/Logger");
22
+ var _utils = require("../common/matching/utils");
23
+ function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
24
+ async function runAiExtraction(params) {
25
+ return handleNewAiExtraction(params);
26
+ }
27
+ async function handleNewAiExtraction(params) {
28
+ var _primaryData$value;
29
+ const {
30
+ itemEntityName,
31
+ itemEntitySchema,
32
+ pageAndSearchRegion,
33
+ primaryProperty,
34
+ hasSearchRegionContainer,
35
+ strategy,
36
+ identifier,
37
+ prompt,
38
+ examples,
39
+ apiKey
40
+ } = params;
41
+ const [primaryPropertyName, primaryPropertyValue] = primaryProperty;
42
+ const aiExtractionData = await getAiExtractionData(strategy, pageAndSearchRegion, hasSearchRegionContainer);
43
+ if (aiExtractionData.isErr()) return (0, _neverthrow.err)(aiExtractionData.error);
44
+ const primaryData = await (0, _extractStructuredListUsingAi.extractStructuredListUsingAi)(itemEntityName, {
45
+ type: "object",
46
+ properties: {
47
+ [primaryPropertyName]: primaryPropertyValue
48
+ },
49
+ required: [primaryPropertyName]
50
+ }, aiExtractionData.value, identifier, strategy, prompt, apiKey);
51
+ if (primaryData.isErr()) {
52
+ return (0, _neverthrow.err)(primaryData.error);
53
+ }
54
+ if (primaryData.value.length === 0) {
55
+ _Logger.logger.debug(`the ai couldn't find any item with the ${primaryPropertyName} property`);
56
+ return (0, _neverthrow.ok)({
57
+ resultValues: [],
58
+ containerPath: null,
59
+ fullContainerXpath: null,
60
+ matches: new Map()
61
+ });
62
+ }
63
+ const primaryValues = (_primaryData$value = primaryData.value) === null || _primaryData$value === void 0 ? void 0 : _primaryData$value.map(i => i[primaryPropertyName]);
64
+ _Logger.logger.debug(`we were able to detect ${primaryValues.length} items with ${primaryPropertyName} property: ${JSON.stringify(primaryValues)}`);
65
+ if (primaryValues.length < 2) {
66
+ const allData = await (0, _extractStructuredListUsingAi.extractStructuredListUsingAi)(itemEntityName, itemEntitySchema, aiExtractionData.value, identifier, strategy, prompt, apiKey);
67
+ if (allData.isErr()) {
68
+ return (0, _neverthrow.err)(allData.error);
69
+ }
70
+ allData.value.forEach((v, i) => {
71
+ _Logger.logger.debug(`ai extraction result for row ${i}: ${JSON.stringify(v)}`);
72
+ });
73
+ const aiResults = allData.value.map((v, i) => ({
74
+ result: v,
75
+ rowIndex: i
76
+ }));
77
+ const resultValues = aiResults;
78
+ const matches = await (0, _getListMatches.getListMatches)(pageAndSearchRegion.page, pageAndSearchRegion.searchRegionHandler, aiResults);
79
+ return (0, _neverthrow.ok)({
80
+ resultValues,
81
+ containerPath: null,
82
+ fullContainerXpath: null,
83
+ matches
84
+ });
85
+ }
86
+ const primaryValuesDomMatches = await (0, _findDomMatches.getDomMatches)(pageAndSearchRegion.page, pageAndSearchRegion.searchRegionHandler, primaryValues);
87
+ const valuesDoesNotExistInDOM = Array.from(primaryValuesDomMatches.entries()).filter(([_, v]) => v.length === 0).map(i => i[0]);
88
+ if (valuesDoesNotExistInDOM.length > 0) {
89
+ _Logger.logger.debug(`the following values returned by AI does not exist in the page dom, [${valuesDoesNotExistInDOM}] , this will cause issues finding the list container`);
90
+ }
91
+ const fullContainerXpath = await (0, _getListContainerXpath.getListContainerXpath)(primaryValuesDomMatches);
92
+ let containerPath = fullContainerXpath;
93
+ if (hasSearchRegionContainer && containerPath) {
94
+ containerPath = await (0, _getRelativeContainerXpathSelector.getRelativeContainerXpathSelector)(pageAndSearchRegion.searchRegion, containerPath);
95
+ }
96
+ if (!containerPath) {
97
+ return (0, _neverthrow.err)(Errors.invalidList());
98
+ }
99
+ const listItemsContainerLocator = (await (0, _extractionHelpers.selectLocatorsUsingXpath)(pageAndSearchRegion.page, fullContainerXpath))[0];
100
+ const extractedData = await splitDomAndExtractData({
101
+ listItemsContainerLocator,
102
+ itemEntityName,
103
+ itemEntitySchema,
104
+ pageAndSearchRegion,
105
+ primaryPropertyName: primaryProperty[0],
106
+ strategy,
107
+ identifier,
108
+ examples,
109
+ apiKey
110
+ });
111
+ if (extractedData.isErr()) {
112
+ return (0, _neverthrow.err)(extractedData.error);
113
+ }
114
+ const {
115
+ matches,
116
+ resultValues
117
+ } = extractedData.value;
118
+ return (0, _neverthrow.ok)({
119
+ resultValues,
120
+ containerPath,
121
+ fullContainerXpath,
122
+ matches
123
+ });
124
+ }
125
+ async function buildImagesForItemsHandles(locators) {
126
+ const images = [];
127
+ for (const locator of locators) {
128
+ const elementHandle = await locator.elementHandle();
129
+ const screenshot = await elementHandle.screenshot({
130
+ type: "png"
131
+ });
132
+ images.push(screenshot);
133
+ }
134
+ return images.map(i => ({
135
+ type: "image",
136
+ buffer: i
137
+ }));
138
+ }
139
+ async function splitDomAndExtractData({
140
+ listItemsContainerLocator,
141
+ itemEntityName,
142
+ itemEntitySchema,
143
+ pageAndSearchRegion,
144
+ primaryPropertyName,
145
+ strategy,
146
+ identifier,
147
+ examples,
148
+ apiKey
149
+ }) {
150
+ const itemsLocators = await (0, _extractionHelpers.splitContainerIntoListLocators)(listItemsContainerLocator);
151
+ const itemsSimplifiedHtml = await (0, _getSimplifiedHtmlPerListItem.getSimplifiedHtmlPerListItem)(itemsLocators);
152
+ const {
153
+ isTable,
154
+ tableLocater
155
+ } = await (0, _tablesUtils.isListTable)(listItemsContainerLocator, itemsSimplifiedHtml);
156
+ const tableAsJsonArray = isTable ? await (0, _tablesUtils.createJsonFromTable)(pageAndSearchRegion.page) : [];
157
+ const tableHeaders = tableLocater ? await (0, _findTableHeaders.getTableHeadersUsingAi)(tableLocater, identifier) : undefined;
158
+ if (tableHeaders && tableHeaders.isErr()) {
159
+ return (0, _neverthrow.err)(tableHeaders.error);
160
+ }
161
+ const extractedData = await (0, _extractPropertiesUsingGPTFromArray.extractPropertiesUsingGPTFromArray)({
162
+ itemEntityName,
163
+ itemEntitySchema,
164
+ itemsSimplifiedHtml,
165
+ tableAsJsonArray,
166
+ strategy,
167
+ tableHeaders: tableHeaders !== null && tableHeaders !== void 0 && tableHeaders.value.headers.length ? tableHeaders.value.headers : undefined,
168
+ items: strategy.type === "HTML" ? itemsSimplifiedHtml.map(v => ({
169
+ type: "text",
170
+ text: v
171
+ })) : await buildImagesForItemsHandles(itemsLocators),
172
+ identifier,
173
+ examples,
174
+ apiKey
175
+ });
176
+ if (extractedData.isErr()) {
177
+ return extractedData;
178
+ }
179
+ const resultValues = [];
180
+ for (let i = 0; i < extractedData.value.length; i++) {
181
+ const rowValues = extractedData.value[i] ?? {};
182
+ const rowLocator = itemsLocators[i];
183
+ const primaryValue = rowValues[primaryPropertyName];
184
+ if (primaryValue === null || primaryValue === undefined) {
185
+ continue;
186
+ }
187
+ const rowValuesMatches = await (0, _findDomMatches.getDomMatchesFromItemsHandles)(pageAndSearchRegion.page, await rowLocator.elementHandle(), Object.entries(rowValues).map(([_, value]) => value));
188
+ const matches = rowValuesMatches.get(primaryValue);
189
+ const rowValuesWithMatchesOnly = Object.entries(rowValues).reduce((acc, [key, value]) => {
190
+ const valueMatches = rowValuesMatches.get(value);
191
+ const bestMatch = (0, _utils.selectBestMatch)(value, valueMatches ?? []);
192
+ if (valueMatches && valueMatches.length > 0 && bestMatch) {
193
+ acc[key] = {
194
+ matchText: bestMatch.matchText,
195
+ matchXpath: bestMatch.matchXpath,
196
+ matchType: bestMatch.matchType
197
+ };
198
+ } else {
199
+ _Logger.logger.debug(`value "${value}" for key "${key}" in row ${i + 1} does not have any matches in the item's html, dropped for hallucination protection`);
200
+ }
201
+ return acc;
202
+ }, {});
203
+ if (matches && (matches === null || matches === void 0 ? void 0 : matches.length) > 0) {
204
+ resultValues.push({
205
+ rowIndex: i,
206
+ result: rowValuesWithMatchesOnly
207
+ });
208
+ }
209
+ }
210
+ const propertyKeys = Object.keys(itemEntitySchema.properties);
211
+ for (const propertyKey of propertyKeys) {
212
+ const isRequired = itemEntitySchema.required.includes(propertyKey);
213
+ const isPrimary = itemEntitySchema.properties[propertyKey].primary;
214
+ if (!isPrimary && isRequired && resultValues.some(i => {
215
+ const doenstExist = i.result[propertyKey] === null || i.result[propertyKey] === undefined;
216
+ return doenstExist;
217
+ })) {
218
+ return (0, _neverthrow.err)(Errors.invalidExtractionResult(`Required property ${propertyKey} not found in all rows`));
219
+ }
220
+ }
221
+ const matches = await (0, _getListMatches.getListMatches)(pageAndSearchRegion.page, pageAndSearchRegion.searchRegionHandler, resultValues.map(v => {
222
+ return {
223
+ rowIndex: v.rowIndex,
224
+ result: Object.fromEntries(Object.entries(v.result).map(([key, value]) => [key, value.matchText]))
225
+ };
226
+ }));
227
+ return (0, _neverthrow.ok)({
228
+ resultValues,
229
+ matches
230
+ });
231
+ }
232
+ async function getAiExtractionData(strategy, pageAndSearchRegion, hasSearchRegionContainer) {
233
+ if (strategy.type === "HTML") {
234
+ return (0, _neverthrow.ok)({
235
+ text: await (0, _getSimplifiedHtml.getSimplifiedHtml)(pageAndSearchRegion.searchRegionHandler)
236
+ });
237
+ }
238
+ const images = await (0, _buildImagesFromPage.buildImagesFromPageOrHandle)(pageAndSearchRegion.page, hasSearchRegionContainer ? pageAndSearchRegion.searchRegionHandler : undefined);
239
+ if (images.isErr()) return (0, _neverthrow.err)(Errors.other(images.error.context));
240
+ return (0, _neverthrow.ok)({
241
+ images: images.value
242
+ });
243
+ }
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });