@intuned/browser-dev 2.2.3-unify-sdks.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/.babelrc +21 -0
  2. package/.eslintignore +10 -0
  3. package/.eslintrc.js +39 -0
  4. package/LICENSE +43 -0
  5. package/dist/ai-extractors/AnthropicClient/index.js +23 -0
  6. package/dist/ai-extractors/export.d.js +5 -0
  7. package/dist/ai-extractors/export.d.ts +422 -0
  8. package/dist/ai-extractors/extractStructuredData.js +79 -0
  9. package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/constants.js +7 -0
  10. package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/errors.js +42 -0
  11. package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingClaude.js +149 -0
  12. package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingGoogle.js +37 -0
  13. package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingOpenAi.js +144 -0
  14. package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStrucutredDataUsingAiInstance.js +123 -0
  15. package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/index.js +55 -0
  16. package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/isItemTableHeaderOrFooter.js +96 -0
  17. package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/screenshotHelpers.js +55 -0
  18. package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/types.js +5 -0
  19. package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/utils.js +53 -0
  20. package/dist/ai-extractors/extractionHelpers/types.js +5 -0
  21. package/dist/ai-extractors/fileExtractors.js +176 -0
  22. package/dist/ai-extractors/index.js +31 -0
  23. package/dist/ai-extractors/jsonSchema.d.js +5 -0
  24. package/dist/ai-extractors/jsonSchema.d.ts +49 -0
  25. package/dist/ai-extractors/openAiClients/index.js +23 -0
  26. package/dist/ai-extractors/validators.js +239 -0
  27. package/dist/browser/ai/export.d.js +3 -0
  28. package/dist/browser/ai/export.d.ts +587 -0
  29. package/dist/browser/ai/extractMarkdown.js +15 -0
  30. package/dist/browser/ai/extractStructuredData.js +231 -0
  31. package/dist/browser/ai/extractStructuredDataUsingAi.js +140 -0
  32. package/dist/browser/ai/extractionHelpers/screenshotHelpers.js +55 -0
  33. package/dist/browser/ai/extractionHelpers/validateSchema.js +148 -0
  34. package/dist/browser/ai/index.d.ts +587 -0
  35. package/dist/browser/ai/index.js +19 -0
  36. package/dist/browser/ai/isPageLoaded.js +67 -0
  37. package/dist/browser/ai/prompt.js +39 -0
  38. package/dist/browser/ai/tests/testCheckAllTypesAreStrings.spec.js +143 -0
  39. package/dist/browser/ai/tests/testExtractStructuredData.spec.js +622 -0
  40. package/dist/browser/ai/tools/index.js +48 -0
  41. package/dist/browser/ai/types/errors.js +67 -0
  42. package/dist/browser/ai/types/models.js +45 -0
  43. package/dist/browser/ai/types/types.js +48 -0
  44. package/dist/browser/ai/validators.js +136 -0
  45. package/dist/common/Logger/index.js +60 -0
  46. package/dist/common/Logger/types.js +5 -0
  47. package/dist/common/SdkError.js +50 -0
  48. package/dist/common/aiModelsValidations.js +50 -0
  49. package/dist/common/browser_scripts.js +2596 -0
  50. package/dist/common/ensureBrowserScripts.js +17 -0
  51. package/dist/common/environmentVariables.js +16 -0
  52. package/dist/common/eventTracking/getAiTrackingHeaders.js +31 -0
  53. package/dist/common/eventTracking/getFileTrackingHeaders.js +23 -0
  54. package/dist/common/extendedTest.js +148 -0
  55. package/dist/common/extractionHelpers.js +19 -0
  56. package/dist/common/formatZodError.js +18 -0
  57. package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
  58. package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
  59. package/dist/common/fuzzySearch/utils.js +23 -0
  60. package/dist/common/getModelProvider.js +18 -0
  61. package/dist/common/getSimplifiedHtml.js +122 -0
  62. package/dist/common/hashObject.js +32 -0
  63. package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
  64. package/dist/common/html2markdown/index.js +19 -0
  65. package/dist/common/jwtTokenManager.js +18 -0
  66. package/dist/common/loadRuntime.js +16 -0
  67. package/dist/common/locatorHelpers.js +41 -0
  68. package/dist/common/matching/collectStrings.js +32 -0
  69. package/dist/common/matching/levenshtein.js +40 -0
  70. package/dist/common/matching/matching.js +317 -0
  71. package/dist/common/matching/types.js +1 -0
  72. package/dist/common/noEmpty.js +9 -0
  73. package/dist/common/saveSnapshotWithExamples.js +60 -0
  74. package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
  75. package/dist/common/xpathMapping.js +107 -0
  76. package/dist/helpers/downloadFile.js +125 -0
  77. package/dist/helpers/export.d.js +1 -0
  78. package/dist/helpers/export.d.ts +1294 -0
  79. package/dist/helpers/extractMarkdown.js +35 -0
  80. package/dist/helpers/filterEmptyValues.js +54 -0
  81. package/dist/helpers/gotoUrl.js +93 -0
  82. package/dist/helpers/index.d.ts +1294 -0
  83. package/dist/helpers/index.js +115 -0
  84. package/dist/helpers/processDate.js +25 -0
  85. package/dist/helpers/resolveUrl.js +63 -0
  86. package/dist/helpers/sanitizeHtml.js +73 -0
  87. package/dist/helpers/saveFileToS3.js +46 -0
  88. package/dist/helpers/scrollToLoadContent.js +50 -0
  89. package/dist/helpers/tests/extendedTest.js +130 -0
  90. package/dist/helpers/tests/testDownloadFile.spec.js +197 -0
  91. package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
  92. package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
  93. package/dist/helpers/tests/testIsPageLoaded.spec.js +285 -0
  94. package/dist/helpers/tests/testProcessDate.spec.js +13 -0
  95. package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
  96. package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
  97. package/dist/helpers/tests/testSimplifyHtml.spec.js +251 -0
  98. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +380 -0
  99. package/dist/helpers/tests/testWaitForDomSettled.spec.js +169 -0
  100. package/dist/helpers/tests/testWaitForNetworkIdle.spec.js +115 -0
  101. package/dist/helpers/types/Attachment.js +81 -0
  102. package/dist/helpers/types/CustomTypeRegistry.js +48 -0
  103. package/dist/helpers/types/RunEnvironment.js +18 -0
  104. package/dist/helpers/types/ValidationError.js +17 -0
  105. package/dist/helpers/types/index.js +51 -0
  106. package/dist/helpers/uploadFileToS3.js +153 -0
  107. package/dist/helpers/utils/getS3Client.js +21 -0
  108. package/dist/helpers/utils/index.js +73 -0
  109. package/dist/helpers/utils/isDownload.js +10 -0
  110. package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
  111. package/dist/helpers/utils/isLocator.js +9 -0
  112. package/dist/helpers/utils/jwtTokenManager.js +18 -0
  113. package/dist/helpers/validateDataUsingSchema.js +119 -0
  114. package/dist/helpers/waitForDomSettled.js +182 -0
  115. package/dist/helpers/waitForNetworkIdle.js +191 -0
  116. package/dist/index.d.js +82 -0
  117. package/dist/index.d.ts +11 -0
  118. package/dist/index.js +84 -0
  119. package/dist/intunedServices/ApiGateway/aiApiGateway.js +87 -0
  120. package/dist/intunedServices/ApiGateway/factory.js +13 -0
  121. package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
  122. package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
  123. package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
  124. package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +221 -0
  125. package/dist/intunedServices/ApiGateway/types.js +11 -0
  126. package/dist/intunedServices/cache/cache.js +61 -0
  127. package/dist/intunedServices/cache/index.js +12 -0
  128. package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
  129. package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
  130. package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
  131. package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +149 -0
  132. package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
  133. package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +145 -0
  134. package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
  135. package/dist/optimized-extractors/common/findTableHeaders.js +175 -0
  136. package/dist/optimized-extractors/common/index.js +55 -0
  137. package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +97 -0
  138. package/dist/optimized-extractors/common/matching/matching.js +212 -0
  139. package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
  140. package/dist/optimized-extractors/common/matching/types.js +18 -0
  141. package/dist/optimized-extractors/common/matching/utils.js +184 -0
  142. package/dist/optimized-extractors/common/utils.js +58 -0
  143. package/dist/optimized-extractors/export.d.js +5 -0
  144. package/dist/optimized-extractors/export.d.ts +397 -0
  145. package/dist/optimized-extractors/extractArray.js +120 -0
  146. package/dist/optimized-extractors/extractObject.js +104 -0
  147. package/dist/optimized-extractors/index.d.ts +397 -0
  148. package/dist/optimized-extractors/index.js +31 -0
  149. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
  150. package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
  151. package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
  152. package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
  153. package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
  154. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
  155. package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
  156. package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
  157. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
  158. package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
  159. package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
  160. package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
  161. package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
  162. package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
  163. package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
  164. package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
  165. package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
  166. package/dist/optimized-extractors/models/anthropicModel.js +23 -0
  167. package/dist/optimized-extractors/models/openaiModel.js +23 -0
  168. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
  169. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
  170. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
  171. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
  172. package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
  173. package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
  174. package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
  175. package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
  176. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
  177. package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
  178. package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
  179. package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
  180. package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
  181. package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
  182. package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
  183. package/dist/optimized-extractors/types/errors.js +42 -0
  184. package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
  185. package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
  186. package/dist/optimized-extractors/types/types.js +5 -0
  187. package/dist/optimized-extractors/validators.js +152 -0
  188. package/dist/vite-env.d.js +1 -0
  189. package/dist/vite-env.d.ts +9 -0
  190. package/docs.md +14 -0
  191. package/how-to-run-tests.md +10 -0
  192. package/intuned-runtime-setup.md +13 -0
  193. package/package.json +124 -0
  194. package/tsconfig.eslint.json +5 -0
  195. package/tsconfig.json +26 -0
@@ -0,0 +1,106 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.dynamicObjectExtractor = dynamicObjectExtractor;
7
+ var _neverthrow = require("neverthrow");
8
+ var Errors = _interopRequireWildcard(require("./errors"));
9
+ var _AIExtractors = require("./AIExtractors");
10
+ var _captureSnapshot = require("./captureSnapshot");
11
+ var _validateDynamicObjectExtractorOptions = require("./validateDynamicObjectExtractorOptions");
12
+ var _calculateObjectExampleHash = require("./calculateObjectExampleHash");
13
+ var _Logger = require("../../common/Logger");
14
+ var _cache = require("../../intunedServices/cache");
15
+ var _xpathMapping = require("../../common/xpathMapping");
16
+ function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
17
+ async function dynamicObjectExtractor(page, identifier, options) {
18
+ const validation = await (0, _validateDynamicObjectExtractorOptions.validateDynamicObjectExtractorOptions)(page, identifier, options);
19
+ if (validation.isErr()) {
20
+ return (0, _neverthrow.err)(validation.error);
21
+ }
22
+ const {
23
+ entityName,
24
+ entitySchema,
25
+ entityDescription,
26
+ searchRegion,
27
+ searchRegionHandler,
28
+ variantKey,
29
+ currentPageUrl,
30
+ searchRegionKey,
31
+ strategy,
32
+ hasSearchRegionContainer,
33
+ prompt,
34
+ apiKey
35
+ } = validation.value;
36
+ const extractionExampleHash = (0, _calculateObjectExampleHash.calculateObjectExampleHash)({
37
+ currentPageUrl,
38
+ entityName,
39
+ entityDescription,
40
+ entitySchema,
41
+ variantKey,
42
+ strategy,
43
+ searchRegionLocator: searchRegionKey,
44
+ prompt
45
+ });
46
+ const cachedResult = await _cache.cache.get(extractionExampleHash);
47
+ if (cachedResult) {
48
+ _Logger.logger.info("Found value in cache");
49
+ const isValid = await (0, _xpathMapping.validateXPathMapping)(page, cachedResult.matchesMapping);
50
+ if (isValid) {
51
+ _Logger.logger.info("The values in the cache are the same as the current page, returning the cached result");
52
+ return (0, _neverthrow.ok)(cachedResult.result);
53
+ }
54
+ _Logger.logger.info("The values in the cache are different from the current page, running AI extraction");
55
+ }
56
+ _Logger.logger.info("No value found in the cache, running AI extraction");
57
+ const pageAndSearchRegion = {
58
+ page,
59
+ searchRegion,
60
+ searchRegionHandler: searchRegionHandler
61
+ };
62
+ const snapshot = await (0, _captureSnapshot.captureSnapshot)(pageAndSearchRegion);
63
+ if (snapshot.isErr()) {
64
+ return (0, _neverthrow.err)(snapshot.error);
65
+ }
66
+ const extractionResultWithMatches = await (0, _AIExtractors.runAIExtraction)(pageAndSearchRegion, {
67
+ entityName,
68
+ entityDescription,
69
+ entitySchema,
70
+ identifier
71
+ }, snapshot.value, strategy, hasSearchRegionContainer, prompt, apiKey);
72
+ if (extractionResultWithMatches.isErr()) {
73
+ return (0, _neverthrow.err)(extractionResultWithMatches.error);
74
+ }
75
+ const requiredProperties = entitySchema.required;
76
+ const requiredPropsNotExtracted = requiredProperties.filter(propertyName => !extractionResultWithMatches.value.extractionResultObject[propertyName]);
77
+ const shouldReturnNull = requiredPropsNotExtracted.length === requiredProperties.length;
78
+ if (!shouldReturnNull && requiredPropsNotExtracted.length > 0) {
79
+ return (0, _neverthrow.err)(Errors.requiredPropertyNotExtracted(`${requiredPropsNotExtracted.toString()} not found.`));
80
+ }
81
+ if (shouldReturnNull) {
82
+ _Logger.logger.warn("All required properties not found, returning null as extraction result");
83
+ return (0, _neverthrow.ok)(null);
84
+ }
85
+ const resultToReturn = extractionResultWithMatches.value.extractionResultObject;
86
+ const xpathMappingFromAI = extractionResultWithMatches.value.xpathMapping;
87
+ const xpathMapping = {};
88
+ Object.entries(xpathMappingFromAI).forEach(([_propertyName, {
89
+ matchXpath,
90
+ matchText,
91
+ matchType
92
+ }]) => {
93
+ xpathMapping[matchText] = [{
94
+ xpath: matchXpath,
95
+ matchType
96
+ }];
97
+ });
98
+ const resultsToCache = {
99
+ result: resultToReturn,
100
+ matchesMapping: xpathMapping
101
+ };
102
+ _Logger.logger.debug("Caching results...");
103
+ await _cache.cache.set(extractionExampleHash, resultsToCache);
104
+ _Logger.logger.debug("Results cached");
105
+ return (0, _neverthrow.ok)(resultToReturn);
106
+ }
@@ -0,0 +1,42 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.requiredPropertyNotExtracted = exports.other = exports.invalidSearchRegion = exports.invalidPageState = exports.invalidInput = exports.invalidExtractionResult = exports.insufficientAiCredits = void 0;
7
+ const invalidPageState = context => ({
8
+ type: "InvalidPageState",
9
+ context
10
+ });
11
+ exports.invalidPageState = invalidPageState;
12
+ const other = (context, error) => ({
13
+ type: "Other",
14
+ context,
15
+ error
16
+ });
17
+ exports.other = other;
18
+ const invalidSearchRegion = context => ({
19
+ type: "InvalidSearchRegion",
20
+ context
21
+ });
22
+ exports.invalidSearchRegion = invalidSearchRegion;
23
+ const invalidInput = context => ({
24
+ type: "InvalidInput",
25
+ context
26
+ });
27
+ exports.invalidInput = invalidInput;
28
+ const invalidExtractionResult = context => ({
29
+ type: "InvalidExtractionResult",
30
+ context
31
+ });
32
+ exports.invalidExtractionResult = invalidExtractionResult;
33
+ const requiredPropertyNotExtracted = context => ({
34
+ type: "RequiredPropertyNotExtracted",
35
+ context
36
+ });
37
+ exports.requiredPropertyNotExtracted = requiredPropertyNotExtracted;
38
+ const insufficientAiCredits = context => ({
39
+ type: "InsufficientAiCredits",
40
+ context
41
+ });
42
+ exports.insufficientAiCredits = insufficientAiCredits;
@@ -0,0 +1,54 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.getDomMatches = getDomMatches;
7
+ exports.getDomMatchesFromItemsHandles = getDomMatchesFromItemsHandles;
8
+ var _types = require("../common/matching/types");
9
+ var _ensureBrowserScripts = require("../../common/ensureBrowserScripts");
10
+ async function getDomMatches(page, container, searchTexts) {
11
+ await (0, _ensureBrowserScripts.ensureBrowserScripts)(page);
12
+ const result = await page.evaluate(async ({
13
+ container,
14
+ searchTexts
15
+ }) => {
16
+ const result = await window.__INTUNED__.matchStringsWithDomContent(container, searchTexts);
17
+ return result;
18
+ }, {
19
+ container,
20
+ searchTexts
21
+ });
22
+ return new Map(Object.entries(result).map(([searchText, matchResults]) => {
23
+ const mappedMatches = matchResults.map(matchResult => {
24
+ const matchedXpath = matchResult.xpath + (matchResult.match_source === _types.MatchSource.DIRECT_TEXT_NODE ? "" : matchResult.match_source === _types.MatchSource.ATTRIBUTE ? `[@${matchResult.attribute}]` : "");
25
+ return {
26
+ sourceText: matchResult.matched_source_value,
27
+ matchText: matchResult.matched_value,
28
+ nodeXpath: matchResult.xpath,
29
+ matchXpath: matchedXpath,
30
+ exact: matchResult.match_mode === _types.MatchMode.FULL,
31
+ matchType: matchResult.match_source === _types.MatchSource.TEXT_CONTENT ? "all-text" : matchResult.match_source === _types.MatchSource.DIRECT_TEXT_NODE ? "direct-text" : {
32
+ attribute: matchResult.attribute
33
+ },
34
+ isFuzzy: matchResult.match_mode === _types.MatchMode.FUZZY,
35
+ fuzzyDistance: matchResult.fuzzy_distance ?? undefined
36
+ };
37
+ });
38
+ return [searchText, mappedMatches];
39
+ }));
40
+ }
41
+ async function getDomMatchesFromItemsHandles(page, handle, searchTexts) {
42
+ const matchesArrays = [];
43
+ const matches = await getDomMatches(page, handle, searchTexts);
44
+ matchesArrays.push(matches);
45
+ const mergedMatches = new Map();
46
+ for (const matches of matchesArrays) {
47
+ for (const [searchText, matchesArray] of matches) {
48
+ const existingMatches = mergedMatches.get(searchText) || [];
49
+ existingMatches.push(...matchesArray);
50
+ mergedMatches.set(searchText, existingMatches);
51
+ }
52
+ }
53
+ return mergedMatches;
54
+ }
@@ -0,0 +1,122 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.getSimplifiedHtml = getSimplifiedHtml;
7
+ var _constants = require("./constants");
8
+ async function getSimplifiedHtml(containerHandle, options) {
9
+ const tagName = await containerHandle.evaluateHandle(element => element.tagName.toLowerCase());
10
+ const shouldReturnFullHtml = (await tagName.jsonValue()) === "html";
11
+ const optionsWithDefault = {
12
+ shouldIncludeOnClick: false,
13
+ shouldIncludeContentAsProp: false,
14
+ keepOnlyVisibleElements: true,
15
+ shouldReturnFullHtml,
16
+ ...options
17
+ };
18
+ const simplifiedHtml = await containerHandle.evaluate((element, {
19
+ optionsWithDefault,
20
+ ALLOWED_ATTRIBUTES
21
+ }) => {
22
+ function truthyFilter(value) {
23
+ return Boolean(value);
24
+ }
25
+ function isElementNode(node) {
26
+ return node.nodeType === node.ELEMENT_NODE;
27
+ }
28
+ const hasAnyAllowedAttribute = (element, allowedAttributes) => {
29
+ const elementAttributes = element.getAttributeNames();
30
+ const hasAllowedAttribute = !!allowedAttributes.some(attr => {
31
+ if (typeof attr === "string") {
32
+ return element.hasAttribute(attr);
33
+ } else if (attr instanceof RegExp) {
34
+ return elementAttributes.some(attrName => attr.test(attrName));
35
+ }
36
+ });
37
+ return hasAllowedAttribute;
38
+ };
39
+ function isElementVisible(style) {
40
+ return style.opacity !== "" && style.display !== "none" && style.visibility !== "hidden" && style.opacity !== "0";
41
+ }
42
+ function isElementInteractive(element, style) {
43
+ return element.tagName === "A" || element.tagName === "INPUT" || element.tagName === "BUTTON" || element.tagName === "SELECT" || element.tagName === "TEXTAREA" || element.hasAttribute("onclick") || element.hasAttribute("onmousedown") || element.hasAttribute("onmouseup") || element.hasAttribute("onkeydown") || element.hasAttribute("onkeyup") || style.cursor === "pointer";
44
+ }
45
+ function getDocumentFromIframeElementSafely(element) {
46
+ try {
47
+ if (element.contentWindow && element.contentWindow.document) {
48
+ return element.contentWindow.document.documentElement;
49
+ }
50
+ } catch (error) {
51
+ return undefined;
52
+ }
53
+ }
54
+ function isInputWithValue(element) {
55
+ return element.tagName === "INPUT" && element.value && element.value.trim();
56
+ }
57
+ function generateSimplifiedDom(element, interactiveElements, document, allowedAttributes, shouldIncludeContentAsProp, keepOnlyVisibleElements) {
58
+ var _element$textContent;
59
+ if (element.nodeType === 3 && (_element$textContent = element.textContent) !== null && _element$textContent !== void 0 && _element$textContent.trim()) {
60
+ return document.createTextNode(element.textContent + " ");
61
+ }
62
+ const shouldSkipElementChecks = optionsWithDefault.shouldIncludeIframes ? ["BODY", "HTML", "IFRAME"] : ["BODY", "HTML"].includes(element.nodeName);
63
+ if (!isElementNode(element)) {
64
+ return null;
65
+ }
66
+ const style = window.getComputedStyle(element);
67
+ const isVisible = isElementVisible(style) || shouldSkipElementChecks;
68
+ if (keepOnlyVisibleElements && !isVisible && !isInputWithValue(element)) {
69
+ return null;
70
+ }
71
+ let children = optionsWithDefault.shouldIncludeIframes && element.nodeName === "IFRAME" ? [getDocumentFromIframeElementSafely(element)].filter(Boolean) : Array.from(element.childNodes).map(c => generateSimplifiedDom(c, interactiveElements, document, allowedAttributes, shouldIncludeContentAsProp, keepOnlyVisibleElements)).filter(truthyFilter);
72
+ if (element.tagName === "BODY") children = children.filter(c => c.nodeType !== 3);
73
+ const interactive = isElementInteractive(element, style) || element.hasAttribute("role");
74
+ const hasLabel = element.hasAttribute("aria-label") || element.hasAttribute("name");
75
+ const hasAllowedAttribute = hasAnyAllowedAttribute(element, allowedAttributes);
76
+ let includeNode = interactive || hasLabel || hasAllowedAttribute || shouldSkipElementChecks;
77
+ if (children.length === 0 && !hasAnyAllowedAttribute(element, allowedAttributes)) {
78
+ return null;
79
+ }
80
+ if (children.length === 1 && !hasAnyAllowedAttribute(element, allowedAttributes) && children[0].nodeType !== 3 && !(shouldSkipElementChecks && optionsWithDefault.shouldReturnFullHtml)) {
81
+ return children[0];
82
+ }
83
+ if (!includeNode && children.length === 0) return null;
84
+ if (!includeNode && children.some(c => c.nodeType === 3)) {
85
+ includeNode = true;
86
+ }
87
+ if (!includeNode && children.length === 1) {
88
+ return children[0];
89
+ }
90
+ const container = element.cloneNode();
91
+ const allAttributes = element.getAttributeNames();
92
+ const listOfAttributesToRemove = allAttributes.filter(attr => {
93
+ const isAllowedString = allowedAttributes.includes(attr);
94
+ const isAllowedRegExp = allowedAttributes.some(regex => regex instanceof RegExp && regex.test(attr));
95
+ return !isAllowedString && !isAllowedRegExp;
96
+ });
97
+ for (const attr of listOfAttributesToRemove) {
98
+ container.removeAttribute(attr);
99
+ }
100
+ if (interactive) {
101
+ interactiveElements.push(element);
102
+ }
103
+ if (shouldIncludeContentAsProp && element.textContent) {
104
+ container.setAttribute("content", element.textContent);
105
+ }
106
+ children.forEach(child => container.appendChild(child));
107
+ return container;
108
+ }
109
+ function getSimplifiedDomFromElement(htmlElement, shouldIncludeOnClick, shouldIncludeContentAsProp, keepOnlyVisibleElements) {
110
+ const interactiveElements = [];
111
+ const allowedAttributes = shouldIncludeOnClick ? [...ALLOWED_ATTRIBUTES, "onclick"] : ALLOWED_ATTRIBUTES;
112
+ const simplifiedDom = generateSimplifiedDom(htmlElement, interactiveElements, htmlElement.ownerDocument, allowedAttributes, shouldIncludeContentAsProp, keepOnlyVisibleElements);
113
+ if (!simplifiedDom) return "";
114
+ return simplifiedDom.outerHTML;
115
+ }
116
+ return getSimplifiedDomFromElement(element, optionsWithDefault.shouldIncludeOnClick, optionsWithDefault.shouldIncludeContentAsProp, optionsWithDefault.keepOnlyVisibleElements);
117
+ }, {
118
+ optionsWithDefault,
119
+ ALLOWED_ATTRIBUTES: _constants.ALLOWED_ATTRIBUTES
120
+ });
121
+ return simplifiedHtml;
122
+ }
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
@@ -0,0 +1,52 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.validateDynamicObjectExtractorOptions = validateDynamicObjectExtractorOptions;
7
+ var _neverthrow = require("neverthrow");
8
+ var Errors = _interopRequireWildcard(require("./errors"));
9
+ var z = _interopRequireWildcard(require("zod"));
10
+ var _locatorHelpers = require("../../common/locatorHelpers");
11
+ function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
12
+ async function validateDynamicObjectExtractorOptions(page, label, options) {
13
+ const invalidate = options.optionalPropertiesInvalidator ?? (_extractionResult => []);
14
+ const searchRegion = options.searchRegion ?? page.locator("html");
15
+ const elementExist = (await searchRegion.count()) > 0;
16
+ const isElementVisible = await searchRegion.isVisible();
17
+ if (!elementExist || !isElementVisible) {
18
+ return (0, _neverthrow.err)(Errors.invalidSearchRegion(!elementExist ? "Element not found" : "Element is not visible"));
19
+ }
20
+ const searchRegionHandler = await searchRegion.elementHandle();
21
+ if (!searchRegionHandler) {
22
+ return (0, _neverthrow.err)(Errors.invalidSearchRegion());
23
+ }
24
+ let currentPageUrl = page.url();
25
+ const validationResult = z.string().nonempty().safeParse(currentPageUrl);
26
+ if (!validationResult.success) {
27
+ return (0, _neverthrow.err)(Errors.invalidPageState("Cannot get page url."));
28
+ }
29
+ currentPageUrl = validationResult.data;
30
+ const getPageUrlOrigin = () => new URL(page.url()).origin;
31
+ const safeGetPageUrlOrigin = (0, _neverthrow.fromThrowable)(getPageUrlOrigin, () => Errors.invalidPageState("Cannot get page url origin."));
32
+ const variantKey = options.variantKey ? (0, _neverthrow.ok)(options.variantKey) : safeGetPageUrlOrigin();
33
+ if (variantKey.isErr()) {
34
+ return (0, _neverthrow.err)(variantKey.error);
35
+ }
36
+ return (0, _neverthrow.ok)({
37
+ entityName: options.entityName,
38
+ entitySchema: options.entitySchema,
39
+ invalidate,
40
+ searchRegion,
41
+ searchRegionHandler,
42
+ variantKey: variantKey.value,
43
+ currentPageUrl,
44
+ searchRegionKey: options.searchRegion ? (0, _locatorHelpers.getLocatorInternalKey)(options.searchRegion) : null,
45
+ hasSearchRegionContainer: !!options.searchRegion,
46
+ label,
47
+ strategy: options.strategy,
48
+ prompt: options.prompt,
49
+ searchRegionXpath: options.searchRegion ? await (0, _locatorHelpers.findXPathForLocator)(options.searchRegion) : undefined,
50
+ apiKey: options.apiKey
51
+ });
52
+ }
@@ -0,0 +1,45 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.SUPPORTED_VISION_MODELS = exports.SUPPORTED_TEXT_MODELS = exports.SUPPORTED_GPT_MODELS = exports.SUPPORTED_GOOGLE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = exports.MODELS_MAPPINGS = exports.MAX_TOKENS_OVERRIDES = exports.GPT_MODELS_MAPPINGS = exports.GOOGLE_MODELS_MAPPINGS = exports.CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_MODELS_MAPPINGS = void 0;
7
+ const CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = ["claude-3-5-haiku", "claude-3-5-haiku-20241022"];
8
+ const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = ["claude-3-haiku", "claude-3-haiku-20240307", "claude-3.5-sonnet", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", "claude-opus-4", "claude-opus-4-20250514", "claude-sonnet-4", "claude-sonnet-4-20250514"];
9
+ const SUPPORTED_CLAUDE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = [...CLAUDE_ONLY_TEXT_MODELS, ...CLAUDE_VISION_SUPPORTED_MODELS];
10
+ const CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS_MAPPINGS = {
11
+ "claude-3-haiku": "claude-3-haiku-20240307",
12
+ "claude-3-5-haiku": "claude-3-5-haiku-20241022",
13
+ "claude-3.5-sonnet": "claude-3-5-sonnet-20241022",
14
+ "claude-opus-4": "claude-opus-4-20250514",
15
+ "claude-sonnet-4": "claude-sonnet-4-20250514"
16
+ };
17
+ const GPT_ONLY_TEXT_GPT_MODELS = ["gpt3.5-turbo", "gpt-3.5-turbo-0125"];
18
+ const GPT_VISION_SUPPORTED_MODELS = ["gpt4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-mini", "gpt-4o-mini-2024-07-18"];
19
+ const SUPPORTED_GPT_MODELS = exports.SUPPORTED_GPT_MODELS = [...GPT_ONLY_TEXT_GPT_MODELS, ...GPT_VISION_SUPPORTED_MODELS];
20
+ const GPT_MODELS_MAPPINGS = exports.GPT_MODELS_MAPPINGS = {
21
+ "gpt4-turbo": "gpt-4-turbo-2024-04-09",
22
+ "gpt3.5-turbo": "gpt-3.5-turbo-0125",
23
+ "gpt-4o": "gpt-4o-2024-05-13",
24
+ "gpt-4o-mini": "gpt-4o-mini-2024-07-18"
25
+ };
26
+ const SUPPORTED_GOOGLE_MODELS = exports.SUPPORTED_GOOGLE_MODELS = ["gemini-1.5-pro", "gemini-1.5-pro-002", "gemini-1.5-flash-8b", "gemini-1.5-flash-8b-002", "gemini-1.5-flash", "gemini-1.5-flash-002", "gemini-2.0-flash-exp"];
27
+ const GOOGLE_MODELS_MAPPINGS = exports.GOOGLE_MODELS_MAPPINGS = {
28
+ "gemini-1.5-pro": "gemini-1.5-pro-002",
29
+ "gemini-1.5-flash-8b": "gemini-1.5-flash-8b-002",
30
+ "gemini-1.5-flash": "gemini-1.5-flash-002"
31
+ };
32
+ const SUPPORTED_TEXT_MODELS = exports.SUPPORTED_TEXT_MODELS = [...SUPPORTED_CLAUDE_MODELS, ...SUPPORTED_GPT_MODELS, ...SUPPORTED_GOOGLE_MODELS];
33
+ const SUPPORTED_VISION_MODELS = exports.SUPPORTED_VISION_MODELS = [...CLAUDE_VISION_SUPPORTED_MODELS, ...GPT_VISION_SUPPORTED_MODELS, ...SUPPORTED_GOOGLE_MODELS];
34
+ const MODELS_MAPPINGS = exports.MODELS_MAPPINGS = {
35
+ ...GPT_MODELS_MAPPINGS,
36
+ ...CLAUDE_MODELS_MAPPINGS,
37
+ ...GOOGLE_MODELS_MAPPINGS
38
+ };
39
+ const MAX_TOKENS_OVERRIDES = exports.MAX_TOKENS_OVERRIDES = {
40
+ "claude-3-5-sonnet-20240620": 8192,
41
+ "gemini-1.5-pro-002": 8192,
42
+ "gemini-1.5-flash-8b-002": 8192,
43
+ "gemini-1.5-flash-002": 8192,
44
+ "gemini-2.0-flash-exp": 8192
45
+ };
@@ -0,0 +1,42 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.other = exports.invalidInput = exports.invalidExtractionResult = exports.insufficientAiCredits = exports.NoToolUsage = exports.NoDataFound = exports.AiCallFailed = void 0;
7
+ const NoDataFound = context => ({
8
+ type: "NoDataFound",
9
+ context
10
+ });
11
+ exports.NoDataFound = NoDataFound;
12
+ const NoToolUsage = context => ({
13
+ type: "NoToolUsage",
14
+ context
15
+ });
16
+ exports.NoToolUsage = NoToolUsage;
17
+ const AiCallFailed = (context, error) => ({
18
+ type: "AiCallFailed",
19
+ context,
20
+ error
21
+ });
22
+ exports.AiCallFailed = AiCallFailed;
23
+ const invalidExtractionResult = context => ({
24
+ type: "InvalidExtractionResult",
25
+ context
26
+ });
27
+ exports.invalidExtractionResult = invalidExtractionResult;
28
+ const invalidInput = context => ({
29
+ type: "invalidInput",
30
+ context
31
+ });
32
+ exports.invalidInput = invalidInput;
33
+ const other = context => ({
34
+ type: "other",
35
+ context
36
+ });
37
+ exports.other = other;
38
+ const insufficientAiCredits = context => ({
39
+ type: "InsufficientAiCredits",
40
+ context
41
+ });
42
+ exports.insufficientAiCredits = insufficientAiCredits;
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
@@ -0,0 +1,50 @@
1
+ export interface BasicSchema {
2
+ type: string;
3
+ description?: string;
4
+ }
5
+
6
+ export interface StringSchema extends BasicSchema {
7
+ type: "string";
8
+ enum?: string[];
9
+ maxLength?: number;
10
+ minLength?: number;
11
+ pattern?: string;
12
+ }
13
+
14
+ export interface NumberSchema extends BasicSchema {
15
+ type: "number" | "integer";
16
+ multipleOf?: number;
17
+ maximum?: number;
18
+ exclusiveMaximum?: number;
19
+ minimum?: number;
20
+ exclusiveMinimum?: number;
21
+ }
22
+
23
+ export interface BooleanSchema extends BasicSchema {
24
+ type: "boolean";
25
+ }
26
+
27
+ export interface ArraySchema extends BasicSchema {
28
+ type: "array";
29
+ items: JsonSchema;
30
+ maxItems?: number;
31
+ minItems?: number;
32
+ uniqueItems?: boolean;
33
+ }
34
+
35
+ export interface ObjectSchema extends BasicSchema {
36
+ type: "object";
37
+ properties: Record<string, JsonSchema>;
38
+ required?: string[];
39
+
40
+ maxProperties?: number;
41
+ minProperties?: number;
42
+ }
43
+
44
+ export type JsonSchema =
45
+ | StringSchema
46
+ | NumberSchema
47
+ | BooleanSchema
48
+ | ArraySchema
49
+ | ObjectSchema;
50
+
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });