@intuned/browser-dev 2.2.3-test-build.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/.babelrc +21 -0
  2. package/.eslintignore +10 -0
  3. package/.eslintrc.js +39 -0
  4. package/LICENSE +43 -0
  5. package/dist/ai/export.d.js +5 -0
  6. package/dist/ai/export.d.ts +641 -0
  7. package/dist/ai/extractStructuredData.js +320 -0
  8. package/dist/ai/extractStructuredDataUsingAi.js +139 -0
  9. package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
  10. package/dist/ai/extractionHelpers/validateSchema.js +148 -0
  11. package/dist/ai/index.d.ts +641 -0
  12. package/dist/ai/index.js +19 -0
  13. package/dist/ai/isPageLoaded.js +77 -0
  14. package/dist/ai/prompt.js +39 -0
  15. package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
  16. package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
  17. package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
  18. package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
  19. package/dist/ai/tools/index.js +48 -0
  20. package/dist/ai/types/errors.js +67 -0
  21. package/dist/ai/types/models.js +45 -0
  22. package/dist/ai/types/types.js +48 -0
  23. package/dist/ai/validators.js +167 -0
  24. package/dist/common/Logger/index.js +60 -0
  25. package/dist/common/Logger/types.js +5 -0
  26. package/dist/common/SdkError.js +50 -0
  27. package/dist/common/aiModelsValidations.js +32 -0
  28. package/dist/common/browser_scripts.js +2596 -0
  29. package/dist/common/ensureBrowserScripts.js +18 -0
  30. package/dist/common/extendedTest.js +148 -0
  31. package/dist/common/extractionHelpers.js +19 -0
  32. package/dist/common/formatZodError.js +18 -0
  33. package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
  34. package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
  35. package/dist/common/fuzzySearch/utils.js +23 -0
  36. package/dist/common/getModelProvider.js +18 -0
  37. package/dist/common/getSimplifiedHtml.js +122 -0
  38. package/dist/common/hashObject.js +32 -0
  39. package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
  40. package/dist/common/html2markdown/index.js +19 -0
  41. package/dist/common/jwtTokenManager.js +18 -0
  42. package/dist/common/loadRuntime.js +16 -0
  43. package/dist/common/locatorHelpers.js +41 -0
  44. package/dist/common/matching/collectStrings.js +32 -0
  45. package/dist/common/matching/levenshtein.js +40 -0
  46. package/dist/common/matching/matching.js +317 -0
  47. package/dist/common/matching/types.js +1 -0
  48. package/dist/common/noEmpty.js +9 -0
  49. package/dist/common/saveSnapshotWithExamples.js +60 -0
  50. package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
  51. package/dist/common/xpathMapping.js +107 -0
  52. package/dist/helpers/clickUntilExhausted.js +85 -0
  53. package/dist/helpers/downloadFile.js +125 -0
  54. package/dist/helpers/export.d.js +5 -0
  55. package/dist/helpers/export.d.ts +1220 -0
  56. package/dist/helpers/extractMarkdown.js +35 -0
  57. package/dist/helpers/filterEmptyValues.js +54 -0
  58. package/dist/helpers/gotoUrl.js +98 -0
  59. package/dist/helpers/index.d.ts +1220 -0
  60. package/dist/helpers/index.js +128 -0
  61. package/dist/helpers/processDate.js +25 -0
  62. package/dist/helpers/resolveUrl.js +64 -0
  63. package/dist/helpers/sanitizeHtml.js +74 -0
  64. package/dist/helpers/saveFileToS3.js +50 -0
  65. package/dist/helpers/scrollToLoadContent.js +57 -0
  66. package/dist/helpers/tests/extendedTest.js +130 -0
  67. package/dist/helpers/tests/testClickUntilExhausted.spec.js +387 -0
  68. package/dist/helpers/tests/testDownloadFile.spec.js +204 -0
  69. package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
  70. package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
  71. package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
  72. package/dist/helpers/tests/testProcessDate.spec.js +13 -0
  73. package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
  74. package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
  75. package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
  76. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
  77. package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
  78. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
  79. package/dist/helpers/types/Attachment.js +115 -0
  80. package/dist/helpers/types/CustomTypeRegistry.js +48 -0
  81. package/dist/helpers/types/RunEnvironment.js +18 -0
  82. package/dist/helpers/types/ValidationError.js +17 -0
  83. package/dist/helpers/types/index.js +51 -0
  84. package/dist/helpers/uploadFileToS3.js +154 -0
  85. package/dist/helpers/utils/getS3Client.js +22 -0
  86. package/dist/helpers/utils/index.js +73 -0
  87. package/dist/helpers/utils/isDownload.js +10 -0
  88. package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
  89. package/dist/helpers/utils/isLocator.js +9 -0
  90. package/dist/helpers/utils/jwtTokenManager.js +18 -0
  91. package/dist/helpers/validateDataUsingSchema.js +103 -0
  92. package/dist/helpers/waitForDomSettled.js +90 -0
  93. package/dist/helpers/withNetworkSettledWait.js +91 -0
  94. package/dist/index.d.js +16 -0
  95. package/dist/index.d.ts +10 -0
  96. package/dist/index.js +16 -0
  97. package/dist/intunedServices/ApiGateway/aiApiGateway.js +99 -0
  98. package/dist/intunedServices/ApiGateway/factory.js +13 -0
  99. package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
  100. package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
  101. package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
  102. package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +224 -0
  103. package/dist/intunedServices/ApiGateway/types.js +11 -0
  104. package/dist/intunedServices/cache/cache.js +61 -0
  105. package/dist/intunedServices/cache/index.js +12 -0
  106. package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
  107. package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
  108. package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
  109. package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
  110. package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
  111. package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
  112. package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
  113. package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
  114. package/dist/optimized-extractors/common/index.js +55 -0
  115. package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
  116. package/dist/optimized-extractors/common/matching/matching.js +212 -0
  117. package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
  118. package/dist/optimized-extractors/common/matching/types.js +18 -0
  119. package/dist/optimized-extractors/common/matching/utils.js +184 -0
  120. package/dist/optimized-extractors/common/utils.js +58 -0
  121. package/dist/optimized-extractors/export.d.js +5 -0
  122. package/dist/optimized-extractors/export.d.ts +397 -0
  123. package/dist/optimized-extractors/extractArray.js +120 -0
  124. package/dist/optimized-extractors/extractObject.js +104 -0
  125. package/dist/optimized-extractors/index.d.ts +397 -0
  126. package/dist/optimized-extractors/index.js +31 -0
  127. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
  128. package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
  129. package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
  130. package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
  131. package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
  132. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
  133. package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
  134. package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
  135. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
  136. package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
  137. package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
  138. package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
  139. package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
  140. package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
  141. package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
  142. package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
  143. package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
  144. package/dist/optimized-extractors/models/anthropicModel.js +23 -0
  145. package/dist/optimized-extractors/models/openaiModel.js +23 -0
  146. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
  147. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
  148. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
  149. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
  150. package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
  151. package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
  152. package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
  153. package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
  154. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
  155. package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
  156. package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
  157. package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
  158. package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
  159. package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
  160. package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
  161. package/dist/optimized-extractors/types/errors.js +42 -0
  162. package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
  163. package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
  164. package/dist/optimized-extractors/types/types.js +5 -0
  165. package/dist/optimized-extractors/validators.js +152 -0
  166. package/dist/vite-env.d.js +1 -0
  167. package/dist/vite-env.d.ts +9 -0
  168. package/docs.md +14 -0
  169. package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
  170. package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
  171. package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
  172. package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
  173. package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
  174. package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
  175. package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
  176. package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
  177. package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
  178. package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
  179. package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
  180. package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
  181. package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
  182. package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
  183. package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
  184. package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
  185. package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
  186. package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
  187. package/generated-docs/helpers/functions/processDate.mdx +55 -0
  188. package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
  189. package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
  190. package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
  191. package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
  192. package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
  193. package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
  194. package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
  195. package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
  196. package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
  197. package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
  198. package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
  199. package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
  200. package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
  201. package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
  202. package/how-to-run-tests.md +10 -0
  203. package/intuned-runtime-setup.md +13 -0
  204. package/package.json +119 -0
  205. package/tsconfig.eslint.json +5 -0
  206. package/tsconfig.json +26 -0
@@ -0,0 +1,120 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.extractArrayFromPage = exports.extractArrayFromLocator = void 0;
7
+ var _validators = require("./validators");
8
+ var _formatZodError = require("../common/formatZodError");
9
+ var _dynamicListExtractor = require("./listExtractionHelpers/dynamicListExtractor");
10
+ var _SdkError = require("../common/SdkError");
11
+ var _Logger = require("../common/Logger");
12
+ const extractArrayFromPage = async (page, options) => {
13
+ const originalPositionStack = (0, _SdkError.captureUserStack)();
14
+ if (!page) {
15
+ throw new _SdkError.SdkError("Invalid page object, page must be an instance of playwright page", originalPositionStack);
16
+ }
17
+ const optionsValidationResults = _validators.extractArrayOptimizedInputSchema.safeParse(options);
18
+ if (!optionsValidationResults.success) {
19
+ const errors = (0, _formatZodError.formatZodError)(optionsValidationResults.error);
20
+ const message = `invalid extractArrayFromPage input: ${errors.join("\n")}`;
21
+ throw new _SdkError.SdkError(message, originalPositionStack);
22
+ }
23
+ const validOptions = optionsValidationResults.data;
24
+ const result = await (0, _dynamicListExtractor.dynamicListExtractor)(page, validOptions.label, {
25
+ itemEntityName: validOptions.itemEntityName ?? "data",
26
+ itemEntitySchema: validOptions.itemEntitySchema,
27
+ strategy: validOptions.strategy,
28
+ optionalPropertiesInvalidator: validOptions.optionalPropertiesInvalidator,
29
+ variantKey: validOptions.variantKey,
30
+ prompt: validOptions.prompt,
31
+ apiKey: validOptions.apiKey
32
+ });
33
+ if (result.isErr()) {
34
+ switch (result.error.type) {
35
+ case "InvalidSearchRegion":
36
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Search region is invalid. Please make sure container is correct. Object extraction relies on the container when passed.`);
37
+ throw new _SdkError.SdkError("InvalidSearchRegion", originalPositionStack);
38
+ case "InvalidInput":
39
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid input: ${result.error.context}`);
40
+ throw new _SdkError.SdkError("InvalidInput", originalPositionStack);
41
+ case "InvalidAddressUrl":
42
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid address url: ${result.error.context}`);
43
+ throw new _SdkError.SdkError("InvalidAddressUrl", originalPositionStack);
44
+ case "InvalidExtractionResult":
45
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid extraction result: ${result.error.context}`);
46
+ throw new _SdkError.SdkError("InvalidExtractionResult", originalPositionStack);
47
+ case "InvalidList":
48
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid list: ${result.error.context}`);
49
+ throw new _SdkError.SdkError("InvalidList", originalPositionStack);
50
+ case "NoResultsFound":
51
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid results: No results found were found. It's probably that the page is empty.`);
52
+ throw new _SdkError.SdkError("NoResultsFound", originalPositionStack);
53
+ case "RequiredPropertyNotExtracted":
54
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Required property not extracted: ${result.error.context}`);
55
+ throw new _SdkError.SdkError("RequiredPropertyNotExtracted", originalPositionStack);
56
+ case "Other":
57
+ console.log(result.error.error ?? "");
58
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Other error: ${result.error.error ?? result.error.context}`);
59
+ throw new _SdkError.SdkError("Other", originalPositionStack);
60
+ case "InsufficientAiCredits":
61
+ result.error.context && _Logger.logger.error(result.error.context);
62
+ return [];
63
+ }
64
+ }
65
+ return result.value;
66
+ };
67
+ exports.extractArrayFromPage = extractArrayFromPage;
68
+ const extractArrayFromLocator = async (locator, options) => {
69
+ const optionsValidationResults = _validators.extractArrayOptimizedInputSchema.safeParse(options);
70
+ const originalPositionStack = (0, _SdkError.captureUserStack)();
71
+ if (!optionsValidationResults.success) {
72
+ const errors = (0, _formatZodError.formatZodError)(optionsValidationResults.error);
73
+ const message = `invalid extractArrayFromLocator input: ${errors.join("\n")}`;
74
+ throw new _SdkError.SdkError(message, originalPositionStack);
75
+ }
76
+ const validOptions = optionsValidationResults.data;
77
+ const page = await locator.page();
78
+ const result = await (0, _dynamicListExtractor.dynamicListExtractor)(page, validOptions.label, {
79
+ itemEntityName: validOptions.itemEntityName,
80
+ itemEntitySchema: validOptions.itemEntitySchema,
81
+ strategy: validOptions.strategy,
82
+ optionalPropertiesInvalidator: validOptions.optionalPropertiesInvalidator,
83
+ variantKey: validOptions.variantKey,
84
+ searchRegion: locator
85
+ });
86
+ if (result.isErr()) {
87
+ switch (result.error.type) {
88
+ case "InvalidSearchRegion":
89
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Search region is invalid. Please make sure container is correct. Object extraction relies on the container when passed.`);
90
+ throw new Error("InvalidSearchRegion");
91
+ case "InvalidInput":
92
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid input: ${result.error.context}`);
93
+ throw new _SdkError.SdkError("InvalidInput", originalPositionStack);
94
+ case "InvalidAddressUrl":
95
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid address url: ${result.error.context}`);
96
+ throw new _SdkError.SdkError("InvalidAddressUrl", originalPositionStack);
97
+ case "InvalidExtractionResult":
98
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid extraction result: ${result.error.context}`);
99
+ throw new _SdkError.SdkError("InvalidExtractionResult", originalPositionStack);
100
+ case "InvalidList":
101
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid list: ${result.error.context}`);
102
+ throw new _SdkError.SdkError("InvalidList", originalPositionStack);
103
+ case "NoResultsFound":
104
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid results: No results found were found. It's probably that the page is empty.`);
105
+ throw new _SdkError.SdkError("NoResultsFound", originalPositionStack);
106
+ case "RequiredPropertyNotExtracted":
107
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Required property not extracted: ${result.error.context}`);
108
+ throw new _SdkError.SdkError("RequiredPropertyNotExtracted", originalPositionStack);
109
+ case "Other":
110
+ console.log(result.error.error ?? "");
111
+ _Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Other error: ${result.error.error ?? result.error.context}`);
112
+ throw new _SdkError.SdkError("Other", originalPositionStack);
113
+ case "InsufficientAiCredits":
114
+ console.log(result.error.context, originalPositionStack);
115
+ throw new _SdkError.SdkError("InsufficientAiCredits", originalPositionStack);
116
+ }
117
+ }
118
+ return result.value;
119
+ };
120
+ exports.extractArrayFromLocator = extractArrayFromLocator;
@@ -0,0 +1,104 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.extractObjectFromPage = exports.extractObjectFromLocator = void 0;
7
+ var _validators = require("./validators");
8
+ var _formatZodError = require("../common/formatZodError");
9
+ var _dynamicObjectExtractor = require("./objectExtractionHelpers/dynamicObjectExtractor");
10
+ var _SdkError = require("../common/SdkError");
11
+ const extractObjectFromPage = async (page, options) => {
12
+ const originalPositionStack = (0, _SdkError.captureUserStack)();
13
+ const optionsValidationResults = _validators.extractObjectOptimizedInputSchema.safeParse(options);
14
+ if (!optionsValidationResults.success) {
15
+ const errors = (0, _formatZodError.formatZodError)(optionsValidationResults.error);
16
+ const message = `invalid extractObjectFromPage input: ${errors.join("\n")}`;
17
+ throw new _SdkError.SdkError(message, originalPositionStack);
18
+ }
19
+ const parsedOptions = optionsValidationResults.data;
20
+ const result = await (0, _dynamicObjectExtractor.dynamicObjectExtractor)(page, options.label, {
21
+ entityName: parsedOptions.entityName ?? "data",
22
+ entitySchema: parsedOptions.entitySchema,
23
+ strategy: parsedOptions.strategy,
24
+ optionalPropertiesInvalidator: parsedOptions.optionalPropertiesInvalidator,
25
+ variantKey: parsedOptions.variantKey,
26
+ prompt: parsedOptions.prompt,
27
+ apiKey: parsedOptions.apiKey
28
+ });
29
+ if (result.isErr()) {
30
+ switch (result.error.type) {
31
+ case "InvalidSearchRegion":
32
+ console.log(`Object Extractor ${options.entityName} - Search region is invalid. Please make sure container is correct. Object extraction relies on the container when passed.`);
33
+ throw new _SdkError.SdkError("InvalidContainer", originalPositionStack);
34
+ case "InvalidInput":
35
+ console.log(`Object Extractor ${options.entityName} - Invalid input: ${result.error.context}`);
36
+ throw new _SdkError.SdkError("InvalidInput", originalPositionStack);
37
+ case "InvalidExtractionResult":
38
+ console.log(`Object Extractor ${options.entityName} - Invalid extraction result: ${result.error.context}`);
39
+ throw new _SdkError.SdkError("InvalidExtractionResult", originalPositionStack);
40
+ case "InvalidPageState":
41
+ console.log(`Object Extractor ${options.entityName} - Invalid page state: ${result.error.context}`);
42
+ throw new _SdkError.SdkError("InvalidPageState", originalPositionStack);
43
+ case "Other":
44
+ console.log(`Object Extractor ${options.entityName} - Other error: ${result.error.context}`);
45
+ throw new _SdkError.SdkError("Other", originalPositionStack);
46
+ case "RequiredPropertyNotExtracted":
47
+ console.log(`Object Extractor ${options.entityName} - Required property not extracted: ${result.error.context}`);
48
+ throw new _SdkError.SdkError("RequiredPropertyNotExtracted", originalPositionStack);
49
+ case "InsufficientAiCredits":
50
+ console.log(result.error.context);
51
+ throw new _SdkError.SdkError("InsufficientAiCredits", originalPositionStack);
52
+ }
53
+ }
54
+ return result.value;
55
+ };
56
+ exports.extractObjectFromPage = extractObjectFromPage;
57
+ const extractObjectFromLocator = async (locator, options) => {
58
+ const originalPositionStack = (0, _SdkError.captureUserStack)();
59
+ const optionsValidationResults = _validators.extractObjectOptimizedInputSchema.safeParse(options);
60
+ if (!optionsValidationResults.success) {
61
+ const errors = (0, _formatZodError.formatZodError)(optionsValidationResults.error);
62
+ const message = `invalid extractObjectFromLocator input: ${errors.join("\n")}`;
63
+ throw new _SdkError.SdkError(message, originalPositionStack);
64
+ }
65
+ const data = optionsValidationResults.data;
66
+ const page = locator.page();
67
+ const result = await (0, _dynamicObjectExtractor.dynamicObjectExtractor)(page, options.label, {
68
+ entityName: data.entityName ?? "data",
69
+ entitySchema: data.entitySchema,
70
+ strategy: data.strategy,
71
+ optionalPropertiesInvalidator: data.optionalPropertiesInvalidator,
72
+ variantKey: data.variantKey,
73
+ searchRegion: locator,
74
+ prompt: data.prompt,
75
+ apiKey: data.apiKey
76
+ });
77
+ if (result.isErr()) {
78
+ switch (result.error.type) {
79
+ case "InvalidSearchRegion":
80
+ console.log(`Object Extractor ${options.entityName} - Search region is invalid. Please make sure container is correct. Object extraction relies on the container when passed.`);
81
+ throw new _SdkError.SdkError("InvalidContainer", originalPositionStack);
82
+ case "InvalidInput":
83
+ console.log(`Object Extractor ${options.entityName} - Invalid input: ${result.error.context}`);
84
+ throw new _SdkError.SdkError("InvalidInput", originalPositionStack);
85
+ case "InvalidExtractionResult":
86
+ console.log(`Object Extractor ${options.entityName} - Invalid extraction result: ${result.error.context}`);
87
+ throw new _SdkError.SdkError("InvalidExtractionResult", originalPositionStack);
88
+ case "InvalidPageState":
89
+ console.log(`Object Extractor ${options.entityName} - Invalid page state: ${result.error.context}`);
90
+ throw new _SdkError.SdkError("InvalidPageState", originalPositionStack);
91
+ case "Other":
92
+ console.log(`Object Extractor ${options.entityName} - Other error: ${result.error.context}`);
93
+ throw new _SdkError.SdkError("Other", originalPositionStack);
94
+ case "RequiredPropertyNotExtracted":
95
+ console.log(`Object Extractor ${options.entityName} - Required property not extracted: ${result.error.context}`);
96
+ throw new _SdkError.SdkError("RequiredPropertyNotExtracted", originalPositionStack);
97
+ case "InsufficientAiCredits":
98
+ console.log(result.error.context);
99
+ throw new _SdkError.SdkError("InsufficientAiCredits", originalPositionStack);
100
+ }
101
+ }
102
+ return result.value;
103
+ };
104
+ exports.extractObjectFromLocator = extractObjectFromLocator;
@@ -0,0 +1,397 @@
1
+ import { Locator, Page } from "playwright-core";
2
+ import { BasicSchema } from "./types/jsonSchema";
3
+
4
+ /**
5
+ * this strategy will use a screenshot of the page/locator with some processing to extract the needed data.
6
+ * should be used when the information you're trying to extract is not present in the dom as a text but can be identified visually.
7
+ * @interface
8
+ * @property model - the model to use in the extraction process.
9
+ * @property type - the type of the strategy
10
+ */
11
+ export interface ImageStrategy {
12
+ model:
13
+ | "claude-3-haiku"
14
+ | "claude-3-haiku-20240307"
15
+ | "claude-3.5-sonnet"
16
+ | "claude-3-5-sonnet-20240620"
17
+ | "claude-3-5-sonnet-20241022"
18
+ | "claude-opus-4"
19
+ | "claude-opus-4-20250514"
20
+ | "claude-sonnet-4"
21
+ | "claude-sonnet-4-20250514"
22
+ | "gpt4-turbo"
23
+ | "gpt-4-turbo-2024-04-09"
24
+ | "gpt-4o"
25
+ | "gpt-4o-2024-05-13"
26
+ | "gpt-4o-mini"
27
+ | "gpt-4o-mini-2024-07-18"
28
+ | "gemini-1.5-pro"
29
+ | "gemini-1.5-pro-002"
30
+ | "gemini-1.5-flash-8b"
31
+ | "gemini-1.5-flash-8b-002"
32
+ | "gemini-1.5-flash"
33
+ | "gemini-1.5-flash-002"
34
+ | "gemini-2.0-flash-exp";
35
+ type: "IMAGE";
36
+ }
37
+ /**
38
+ * this strategy will use the html of the page/locator to extract the needed data. we filter out some of the attributes to reduce context.
39
+ * the attributes included are only: `aria-label` `data-name` `name` `type` `placeholder` `value` `role` `title` `href` `id` `alt`,
40
+ *
41
+ * @interface
42
+ * @property model - the model to use in the extraction process
43
+ * @property type - the type of the strategy
44
+ */
45
+ export interface HtmlStrategy {
46
+ model:
47
+ | "claude-3-haiku"
48
+ | "claude-3-haiku-20240307"
49
+ | "claude-3-5-haiku"
50
+ | "claude-3-5-haiku-20241022"
51
+ | "claude-3.5-sonnet"
52
+ | "claude-3-5-sonnet-20240620"
53
+ | "claude-3-5-sonnet-20241022"
54
+ | "claude-opus-4"
55
+ | "claude-opus-4-20250514"
56
+ | "claude-sonnet-4"
57
+ | "claude-sonnet-4-20250514"
58
+ | "gpt4-turbo"
59
+ | "gpt-4-turbo-2024-04-09"
60
+ | "gpt3.5-turbo"
61
+ | "gpt-3.5-turbo-0125"
62
+ | "gpt-4o"
63
+ | "gpt-4o-2024-05-13"
64
+ | "gpt-4o-mini"
65
+ | "gpt-4o-mini-2024-07-18"
66
+ | "gemini-1.5-pro"
67
+ | "gemini-1.5-pro-002"
68
+ | "gemini-1.5-flash-8b"
69
+ | "gemini-1.5-flash-8b-002"
70
+ | "gemini-1.5-flash"
71
+ | "gemini-1.5-flash-002"
72
+ | "gemini-2.0-flash-exp";
73
+ type: "HTML";
74
+ }
75
+ /**
76
+ * Extracts an array of structured data from a web page in an optimized way, this function will use ai for the first n times, until it collects multiple examples
77
+ * then it will build reliable selectors in the background to make the process more efficient
78
+ * @deprecated This function is deprecated and will be removed in the future.
79
+ * @param page - The Playwright Page object from which to extract the data.
80
+ * @param options.label - A label for this extraction process, used for billing and monitoring.
81
+ * @param options.itemEntityName - The name of the entity items being extracted, it must be between 1 and 50 characters long and can only contain letters, digits, periods, underscores, and hyphens.
82
+ * @param options.itemEntitySchema - The schema of the entity items being extracted.
83
+ * @param options.strategy - Optional. The strategy to use for extraction, if not provided, the html strategy with claude haiku will be used.
84
+ * @param options.prompt - Optional. A prompt to guide the extraction process.
85
+ * @param options.optionalPropertiesInvalidator - Optional. A function to invalidate optional properties.
86
+ * @param options.variantKey - Optional. A variant key for the extraction process, use this when the page has multiple variants/shapes.
87
+ * @param options.apiKey - Optional. An API key to use for the AI extraction. Extractions made with you API key will not be billed to your account.
88
+ * @returns A promise that resolves to a list of extracted data.
89
+ *
90
+ * @example
91
+ * ```typescript extractArrayFromPage
92
+ * import { extractArrayFromPage } from "@intuned/sdk/optimized-extractors";
93
+ *
94
+ * await page.goto("https://books.toscrape.com/")
95
+ * const books = await extractArrayFromPage(page,
96
+ * {
97
+ * strategy: {
98
+ * model: "gpt4-turbo",
99
+ * type: "HTML"
100
+ * },
101
+ * itemEntityName: "book",
102
+ * label: "books-extraction",
103
+ * itemEntitySchema: {
104
+ * type: "object",
105
+ * required: ["name"],
106
+ * properties: {
107
+ * name: {
108
+ * type: "string",
109
+ * description: "book name",
110
+ * primary: true
111
+ * }
112
+ * }
113
+ * }
114
+ * },
115
+ * )
116
+ *
117
+ * console.log(books)
118
+ *
119
+ * // output:
120
+ * // [
121
+ * // ...
122
+ * // { name: 'Olio' },
123
+ * // { name: 'Mesaerion: The Best Science Fiction Stories 1800-1849' },
124
+ * // { name: 'Libertarianism for Beginners' },
125
+ * // { name: "It's Only the Himalayas" }
126
+ * // ...
127
+ * // ]
128
+ *
129
+ * ```
130
+ */
131
+ export declare function extractArrayFromPage(
132
+ page: Page,
133
+ options: {
134
+ label: string;
135
+ itemEntityName: string;
136
+ itemEntitySchema: SimpleArrayItemSchema;
137
+ strategy?: ImageStrategy | HtmlStrategy;
138
+ prompt?: string;
139
+ optionalPropertiesInvalidator?: (
140
+ result: Record<string, string>[]
141
+ ) => string[];
142
+ variantKey?: string;
143
+ apiKey?: string;
144
+ }
145
+ ): Promise<Record<string, string>[]>;
146
+
147
+ /**
148
+ * Extracts an array of structured data from a locator.
149
+ * @deprecated This function is deprecated and will be removed in the future.
150
+ * @param locator - The Playwright Locator object from which to extract the data.
151
+ * @param options.label - A label for this extraction process, used for billing and monitoring.
152
+ * @param options.itemEntityName - The name of the entity items being extracted. it must be between 1 and 50 characters long and can only contain letters, digits, periods, underscores, and hyphens.
153
+ * @param options.itemEntitySchema - The schema of the entity items being extracted.
154
+ * @param options.strategy - Optional. The strategy to use for extraction, if not provided, the html strategy with claude haiku will be used.
155
+ * @param options.prompt - Optional. A prompt to guide the extraction process.
156
+ * @param options.optionalPropertiesInvalidator - Optional. A function to invalidate optional properties.
157
+ * @param options.variantKey - Optional. A variant key for the extraction process.
158
+ * @param options.apiKey - Optional. An API key to use for the AI extraction. Extractions made with you API key will not be billed to your account.
159
+ * @returns A promise that resolves to a list of extracted data.
160
+ *
161
+ * @example
162
+ * ```typescript extractArrayFromLocator
163
+ * import { extractArrayFromLocator } from "@intuned/sdk/optimized-extractors";
164
+ *
165
+ * await page.goto("https://books.toscrape.com/")
166
+ * const books = await extractArrayFromLocator(page.locator("section"),
167
+ * {
168
+ * itemEntityName: "book",
169
+ * label: "books-extraction",
170
+ * itemEntitySchema: {
171
+ * type: "object",
172
+ * required: ["name"],
173
+ * properties: {
174
+ * name: {
175
+ * type: "string",
176
+ * description: "book name",
177
+ * primary: true
178
+ * }
179
+ * }
180
+ * }
181
+ * },
182
+ * )
183
+ *
184
+ * console.log(books)
185
+ *
186
+ * // output:
187
+ * // [
188
+ * // ...
189
+ * // { name: 'Olio' },
190
+ * // { name: 'Mesaerion: The Best Science Fiction Stories 1800-1849' },
191
+ * // { name: 'Libertarianism for Beginners' },
192
+ * // { name: "It's Only the Himalayas" }
193
+ * // ...
194
+ * // ]
195
+ *
196
+ * ```
197
+ */
198
+ export declare function extractArrayFromLocator(
199
+ locator: Locator,
200
+ options: {
201
+ label: string;
202
+ itemEntityName: string;
203
+ itemEntitySchema: SimpleArrayItemSchema;
204
+ strategy?: ImageStrategy | HtmlStrategy;
205
+ prompt?: string;
206
+ optionalPropertiesInvalidator?: (
207
+ result: Record<string, string>[]
208
+ ) => string[];
209
+ variantKey?: string;
210
+ apiKey?: string;
211
+ }
212
+ ): Promise<Record<string, string>[]>;
213
+
214
+ /**
215
+ * A simple object schema with string properties.
216
+ * @interface SimpleObjectStringSchema
217
+ * @extends BasicSchema
218
+ * @property type - The type of the schema, which is always "string".
219
+ */
220
+ interface SimpleObjectStringSchema extends BasicSchema {
221
+ type: "string";
222
+ }
223
+
224
+ /**
225
+ * A simple array schema with string properties.
226
+ * @interface SimpleArrayStringSchema
227
+ * @extends BasicSchema
228
+ * @property type - The type of the schema, which is always "string".
229
+ * @property [primary] - Optional. Indicates whether this is a primary property.
230
+ */
231
+ interface SimpleArrayStringSchema extends BasicSchema {
232
+ type: "string";
233
+ primary?: boolean;
234
+ }
235
+
236
+ /**
237
+ * A simple object schema with properties.
238
+ * @interface SimpleObjectSchema
239
+ * @extends BasicSchema
240
+ * @property type - The type of the schema, which is always "object".
241
+ * @property properties - The properties of the object.
242
+ * @property required - The required properties of the object.
243
+ */
244
+ export interface SimpleObjectSchema extends BasicSchema {
245
+ type: "object";
246
+ properties: Record<string, SimpleObjectStringSchema>;
247
+ required: string[];
248
+ }
249
+
250
+ /**
251
+ * A simple array item schema with properties.
252
+ * @interface SimpleArrayItemSchema
253
+ * @extends BasicSchema
254
+ * @property type - The type of the schema, which is always "object".
255
+ * @property properties - The properties of the array item.
256
+ * @property required - The required properties of the array item.
257
+ */
258
+ export interface SimpleArrayItemSchema extends BasicSchema {
259
+ type: "object";
260
+ properties: Record<string, SimpleArrayStringSchema>;
261
+ required: string[];
262
+ }
263
+
264
+ /**
265
+ * Extracts a structured object from a web page.
266
+ * @deprecated This function is deprecated and will be removed in the future.
267
+ * @param page - The Playwright Page object from which to extract the data.
268
+ * @param options.label - A label for this extraction process, used for billing and monitoring.
269
+ * @param options.entityName - The name of the entity being extracted. it must be between 1 and 50 characters long and can only contain letters, digits, periods, underscores, and hyphens.
270
+ * @param options.entitySchema - The schema of the entity being extracted.
271
+ * @param options.strategy - Optional. The strategy to use for extraction, if not provided, the html strategy with claude haiku will be used.
272
+ * @param options.prompt - Optional. A prompt to guide the extraction process.
273
+ * @param options.optionalPropertiesInvalidator - Optional. A function to invalidate optional properties.
274
+ * @param options.variantKey - Optional. A variant key for the extraction process.
275
+ * @param options.apiKey - Optional. An API key to use for the AI extraction. Extractions made with you API key will not be billed to your account.
276
+ * @returns A promise that resolves to the extracted object.
277
+ * @example
278
+ * ```typescript extractObjectFromPage
279
+ * import { extractObjectFromPage } from "@intuned/sdk/optimized-extractors";
280
+ *
281
+ * await page.goto("https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html")
282
+ * const book = await extractObjectFromPage(page,
283
+ * {
284
+ * entityName: "book",
285
+ * label: "book-extraction",
286
+ * entitySchema: {
287
+ * type: "object",
288
+ * required: ["name","price","reviews"],
289
+ * properties: {
290
+ * name: {
291
+ * type: "string",
292
+ * description: "book name",
293
+ * },
294
+ * price: {
295
+ * type: "string",
296
+ * description: "book price"
297
+ * },
298
+ * reviews: {
299
+ * type: "string",
300
+ * description: "Number of reviews"
301
+ * }
302
+ *
303
+ * }
304
+ * }
305
+ * },
306
+ * )
307
+ *
308
+ * console.log(book)
309
+ *
310
+ * // output:
311
+ * // { name: 'A Light in the Attic', price: '£51.77', reviews: '0' }
312
+ *
313
+ * ```
314
+ */
315
+ export declare function extractObjectFromPage(
316
+ page: Page,
317
+ options: {
318
+ label: string;
319
+ entityName: string;
320
+ entitySchema: SimpleObjectSchema;
321
+ strategy?: ImageStrategy | HtmlStrategy;
322
+ prompt?: string;
323
+ optionalPropertiesInvalidator?: (
324
+ result: Record<string, string | null> | null
325
+ ) => string[];
326
+ variantKey?: string;
327
+ apiKey?: string;
328
+ }
329
+ ): Promise<Record<string, string | null> | null>;
330
+
331
+ /**
332
+ * Extracts a structured object from a locator.
333
+ * @deprecated This function is deprecated and will be removed in the future.
334
+ * @param locator - The Playwright Locator object from which to extract the data.
335
+ * @param options.label - A label for this extraction process, used for billing and monitoring.
336
+ * @param options.entityName - The name of the entity being extracted. it must be between 1 and 50 characters long and can only contain letters, digits, periods, underscores, and hyphens.
337
+ * @param options.entitySchema - The schema of the entity being extracted.
338
+ * @param options.strategy - Optional. The strategy to use for extraction, if not provided, the html strategy with claude haiku will be used.
339
+ * @param options.prompt - Optional. A prompt to guide the extraction process.
340
+ * @param options.optionalPropertiesInvalidator - Optional. A function to invalidate optional properties.
341
+ * @param options.variantKey - Optional. A variant key for the extraction process.
342
+ * @param options.apiKey - Optional. An API key to use for the AI extraction. Extractions made with you API key will not be billed to your account.
343
+ * @returns A promise that resolves to the extracted object.
344
+ *
345
+ * @example
346
+ * ```typescript extractObjectFromLocator
347
+ * import { extractObjectFromLocator } from "@intuned/sdk/optimized-extractors";
348
+ *
349
+ * await page.goto("https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html")
350
+ * const book = await extractObjectFromLocator(page.locator(".page_inner"),
351
+ * {
352
+ * entityName: "book",
353
+ * label: "book-extraction",
354
+ * entitySchema: {
355
+ * type: "object",
356
+ * required: ["name","price","reviews"],
357
+ * properties: {
358
+ * name: {
359
+ * type: "string",
360
+ * description: "book name",
361
+ * },
362
+ * price: {
363
+ * type: "string",
364
+ * description: "book price"
365
+ * },
366
+ * reviews: {
367
+ * type: "string",
368
+ * description: "Number of reviews"
369
+ * }
370
+ *
371
+ * }
372
+ * }
373
+ * },
374
+ * )
375
+ *
376
+ * console.log(book)
377
+ *
378
+ * // output:
379
+ * // { name: 'A Light in the Attic', price: '£51.77', reviews: '0' }
380
+ *
381
+ * ```
382
+ */
383
+ export declare function extractObjectFromLocator(
384
+ locator: Locator,
385
+ options: {
386
+ label: string;
387
+ entityName: string;
388
+ entitySchema: SimpleObjectSchema;
389
+ strategy?: ImageStrategy | HtmlStrategy;
390
+ prompt?: string;
391
+ optionalPropertiesInvalidator?: (
392
+ result: Record<string, string | null> | null
393
+ ) => string[];
394
+ variantKey?: string;
395
+ apiKey?: string;
396
+ }
397
+ ): Promise<Record<string, string | null> | null>;
@@ -0,0 +1,31 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ Object.defineProperty(exports, "extractArrayFromLocator", {
7
+ enumerable: true,
8
+ get: function () {
9
+ return _extractArray.extractArrayFromLocator;
10
+ }
11
+ });
12
+ Object.defineProperty(exports, "extractArrayFromPage", {
13
+ enumerable: true,
14
+ get: function () {
15
+ return _extractArray.extractArrayFromPage;
16
+ }
17
+ });
18
+ Object.defineProperty(exports, "extractObjectFromLocator", {
19
+ enumerable: true,
20
+ get: function () {
21
+ return _extractObject.extractObjectFromLocator;
22
+ }
23
+ });
24
+ Object.defineProperty(exports, "extractObjectFromPage", {
25
+ enumerable: true,
26
+ get: function () {
27
+ return _extractObject.extractObjectFromPage;
28
+ }
29
+ });
30
+ var _extractArray = require("./extractArray");
31
+ var _extractObject = require("./extractObject");