@intuned/browser-dev 2.2.3-test-build.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/.babelrc +21 -0
  2. package/.eslintignore +10 -0
  3. package/.eslintrc.js +39 -0
  4. package/LICENSE +43 -0
  5. package/dist/ai/export.d.js +5 -0
  6. package/dist/ai/export.d.ts +641 -0
  7. package/dist/ai/extractStructuredData.js +320 -0
  8. package/dist/ai/extractStructuredDataUsingAi.js +139 -0
  9. package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
  10. package/dist/ai/extractionHelpers/validateSchema.js +148 -0
  11. package/dist/ai/index.d.ts +641 -0
  12. package/dist/ai/index.js +19 -0
  13. package/dist/ai/isPageLoaded.js +77 -0
  14. package/dist/ai/prompt.js +39 -0
  15. package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
  16. package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
  17. package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
  18. package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
  19. package/dist/ai/tools/index.js +48 -0
  20. package/dist/ai/types/errors.js +67 -0
  21. package/dist/ai/types/models.js +45 -0
  22. package/dist/ai/types/types.js +48 -0
  23. package/dist/ai/validators.js +167 -0
  24. package/dist/common/Logger/index.js +60 -0
  25. package/dist/common/Logger/types.js +5 -0
  26. package/dist/common/SdkError.js +50 -0
  27. package/dist/common/aiModelsValidations.js +32 -0
  28. package/dist/common/browser_scripts.js +2596 -0
  29. package/dist/common/ensureBrowserScripts.js +18 -0
  30. package/dist/common/extendedTest.js +148 -0
  31. package/dist/common/extractionHelpers.js +19 -0
  32. package/dist/common/formatZodError.js +18 -0
  33. package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
  34. package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
  35. package/dist/common/fuzzySearch/utils.js +23 -0
  36. package/dist/common/getModelProvider.js +18 -0
  37. package/dist/common/getSimplifiedHtml.js +122 -0
  38. package/dist/common/hashObject.js +32 -0
  39. package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
  40. package/dist/common/html2markdown/index.js +19 -0
  41. package/dist/common/jwtTokenManager.js +18 -0
  42. package/dist/common/loadRuntime.js +16 -0
  43. package/dist/common/locatorHelpers.js +41 -0
  44. package/dist/common/matching/collectStrings.js +32 -0
  45. package/dist/common/matching/levenshtein.js +40 -0
  46. package/dist/common/matching/matching.js +317 -0
  47. package/dist/common/matching/types.js +1 -0
  48. package/dist/common/noEmpty.js +9 -0
  49. package/dist/common/saveSnapshotWithExamples.js +60 -0
  50. package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
  51. package/dist/common/xpathMapping.js +107 -0
  52. package/dist/helpers/clickUntilExhausted.js +85 -0
  53. package/dist/helpers/downloadFile.js +125 -0
  54. package/dist/helpers/export.d.js +5 -0
  55. package/dist/helpers/export.d.ts +1220 -0
  56. package/dist/helpers/extractMarkdown.js +35 -0
  57. package/dist/helpers/filterEmptyValues.js +54 -0
  58. package/dist/helpers/gotoUrl.js +98 -0
  59. package/dist/helpers/index.d.ts +1220 -0
  60. package/dist/helpers/index.js +128 -0
  61. package/dist/helpers/processDate.js +25 -0
  62. package/dist/helpers/resolveUrl.js +64 -0
  63. package/dist/helpers/sanitizeHtml.js +74 -0
  64. package/dist/helpers/saveFileToS3.js +50 -0
  65. package/dist/helpers/scrollToLoadContent.js +57 -0
  66. package/dist/helpers/tests/extendedTest.js +130 -0
  67. package/dist/helpers/tests/testClickUntilExhausted.spec.js +387 -0
  68. package/dist/helpers/tests/testDownloadFile.spec.js +204 -0
  69. package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
  70. package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
  71. package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
  72. package/dist/helpers/tests/testProcessDate.spec.js +13 -0
  73. package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
  74. package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
  75. package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
  76. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
  77. package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
  78. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
  79. package/dist/helpers/types/Attachment.js +115 -0
  80. package/dist/helpers/types/CustomTypeRegistry.js +48 -0
  81. package/dist/helpers/types/RunEnvironment.js +18 -0
  82. package/dist/helpers/types/ValidationError.js +17 -0
  83. package/dist/helpers/types/index.js +51 -0
  84. package/dist/helpers/uploadFileToS3.js +154 -0
  85. package/dist/helpers/utils/getS3Client.js +22 -0
  86. package/dist/helpers/utils/index.js +73 -0
  87. package/dist/helpers/utils/isDownload.js +10 -0
  88. package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
  89. package/dist/helpers/utils/isLocator.js +9 -0
  90. package/dist/helpers/utils/jwtTokenManager.js +18 -0
  91. package/dist/helpers/validateDataUsingSchema.js +103 -0
  92. package/dist/helpers/waitForDomSettled.js +90 -0
  93. package/dist/helpers/withNetworkSettledWait.js +91 -0
  94. package/dist/index.d.js +16 -0
  95. package/dist/index.d.ts +10 -0
  96. package/dist/index.js +16 -0
  97. package/dist/intunedServices/ApiGateway/aiApiGateway.js +99 -0
  98. package/dist/intunedServices/ApiGateway/factory.js +13 -0
  99. package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
  100. package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
  101. package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
  102. package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +224 -0
  103. package/dist/intunedServices/ApiGateway/types.js +11 -0
  104. package/dist/intunedServices/cache/cache.js +61 -0
  105. package/dist/intunedServices/cache/index.js +12 -0
  106. package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
  107. package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
  108. package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
  109. package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
  110. package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
  111. package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
  112. package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
  113. package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
  114. package/dist/optimized-extractors/common/index.js +55 -0
  115. package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
  116. package/dist/optimized-extractors/common/matching/matching.js +212 -0
  117. package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
  118. package/dist/optimized-extractors/common/matching/types.js +18 -0
  119. package/dist/optimized-extractors/common/matching/utils.js +184 -0
  120. package/dist/optimized-extractors/common/utils.js +58 -0
  121. package/dist/optimized-extractors/export.d.js +5 -0
  122. package/dist/optimized-extractors/export.d.ts +397 -0
  123. package/dist/optimized-extractors/extractArray.js +120 -0
  124. package/dist/optimized-extractors/extractObject.js +104 -0
  125. package/dist/optimized-extractors/index.d.ts +397 -0
  126. package/dist/optimized-extractors/index.js +31 -0
  127. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
  128. package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
  129. package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
  130. package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
  131. package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
  132. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
  133. package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
  134. package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
  135. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
  136. package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
  137. package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
  138. package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
  139. package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
  140. package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
  141. package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
  142. package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
  143. package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
  144. package/dist/optimized-extractors/models/anthropicModel.js +23 -0
  145. package/dist/optimized-extractors/models/openaiModel.js +23 -0
  146. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
  147. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
  148. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
  149. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
  150. package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
  151. package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
  152. package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
  153. package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
  154. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
  155. package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
  156. package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
  157. package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
  158. package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
  159. package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
  160. package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
  161. package/dist/optimized-extractors/types/errors.js +42 -0
  162. package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
  163. package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
  164. package/dist/optimized-extractors/types/types.js +5 -0
  165. package/dist/optimized-extractors/validators.js +152 -0
  166. package/dist/vite-env.d.js +1 -0
  167. package/dist/vite-env.d.ts +9 -0
  168. package/docs.md +14 -0
  169. package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
  170. package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
  171. package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
  172. package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
  173. package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
  174. package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
  175. package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
  176. package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
  177. package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
  178. package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
  179. package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
  180. package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
  181. package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
  182. package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
  183. package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
  184. package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
  185. package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
  186. package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
  187. package/generated-docs/helpers/functions/processDate.mdx +55 -0
  188. package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
  189. package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
  190. package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
  191. package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
  192. package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
  193. package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
  194. package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
  195. package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
  196. package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
  197. package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
  198. package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
  199. package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
  200. package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
  201. package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
  202. package/how-to-run-tests.md +10 -0
  203. package/intuned-runtime-setup.md +13 -0
  204. package/package.json +119 -0
  205. package/tsconfig.eslint.json +5 -0
  206. package/tsconfig.json +26 -0
@@ -0,0 +1,77 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.isPageLoaded = void 0;
7
+ var _Logger = require("../common/Logger");
8
+ var _utils = require("../helpers/utils");
9
+ var _ai = require("ai");
10
+ const isPageLoaded = async options => {
11
+ var _response$response;
12
+ const {
13
+ page
14
+ } = options;
15
+ const timeoutInMs = options !== null && options !== void 0 && options.timeoutInMs ? options === null || options === void 0 ? void 0 : options.timeoutInMs : 10000;
16
+ const screenshotBytes = await page.screenshot({
17
+ fullPage: false,
18
+ type: "png",
19
+ timeout: timeoutInMs
20
+ });
21
+ const gateway = _utils.GatewayFactory.createAIGateway();
22
+ const gatewayModel = gateway.getModel((options === null || options === void 0 ? void 0 : options.model) ?? "gpt-4o-2024-08-06", options === null || options === void 0 ? void 0 : options.apiKey);
23
+ const base64Image = Buffer.from(screenshotBytes).toString("base64");
24
+ const response = await (0, _ai.generateText)({
25
+ model: gatewayModel,
26
+ messages: [{
27
+ role: "system",
28
+ content: `You are a helpful assistant that determines if a webpage finished loading. If the page finished loading, start your answer with 'True'. If the page is loading, start your answer with 'False'. If you are not sure, start your answer with 'Dont know'. In a new line, add a reason to your response.
29
+
30
+ Some good cues for determining if a page is loading:
31
+ - Loading spinner
32
+ - Page is blank
33
+ - Some content looks like it's missing
34
+ - Not on splash screen`
35
+ }, {
36
+ role: "user",
37
+ content: [{
38
+ type: "text",
39
+ text: "Look at the screenshot and tell me, is the page loading or has it finished loading?"
40
+ }, {
41
+ type: "image",
42
+ image: base64Image
43
+ }]
44
+ }]
45
+ });
46
+ if (response !== null && response !== void 0 && (_response$response = response.response) !== null && _response$response !== void 0 && (_response$response = _response$response.headers) !== null && _response$response !== void 0 && _response$response["x-ai-cost-in-cents"]) {
47
+ _Logger.logger.info(`Total LLM Cost In Cents: ${response.response.headers["x-ai-cost-in-cents"]}`);
48
+ } else {
49
+ _Logger.logger.info(`Total LLM Tokens: ${response.usage.totalTokens}`);
50
+ }
51
+ let llmResult = response.text.trim();
52
+ if (!llmResult) {
53
+ throw new Error("LLM response is empty");
54
+ }
55
+ llmResult = llmResult.split("\n").filter(line => line.trim() !== "").join("\n");
56
+ const isTrue = llmResult.includes("True");
57
+ const isFalse = llmResult.includes("False");
58
+ const isDontKnow = llmResult.includes("Dont know") || llmResult.includes("Don't know");
59
+ const lines = llmResult.split("\n");
60
+ const reason = lines.length > 1 ? lines[1] : null;
61
+ let result;
62
+ if (isTrue) {
63
+ _Logger.logger.info(`Page is loaded.`);
64
+ result = true;
65
+ } else if (isFalse) {
66
+ _Logger.logger.info(`Page is not loaded.`);
67
+ result = false;
68
+ } else if (isDontKnow) {
69
+ _Logger.logger.info(`Page loading status is unknown.`);
70
+ result = false;
71
+ } else {
72
+ throw new Error("LLM result is not valid");
73
+ }
74
+ _Logger.logger.info(`LLM Reason: ${reason}`);
75
+ return result;
76
+ };
77
+ exports.isPageLoaded = isPageLoaded;
@@ -0,0 +1,39 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.getMessages = getMessages;
7
+ function getMessages(input) {
8
+ const {
9
+ prompt,
10
+ content,
11
+ images
12
+ } = input;
13
+ const messages = [{
14
+ role: "system",
15
+ content: `You are a data analyst whose job is to extract structured data from an HTML page.
16
+ Please ensure that the data is extracted exactly as it appears in the HTML, without any additional formatting or alterations.
17
+ Extract the structured data exactly as it is in the HTML.
18
+ If you don't find a specific field just don't return the field.
19
+ Call extract_data_from_content tool with the extracted data as the argument of this tool.
20
+ ${prompt}`
21
+ }];
22
+ const userContentParts = [{
23
+ type: "text",
24
+ text: content
25
+ }];
26
+ if (images && images.length > 0) {
27
+ images.forEach(image => {
28
+ userContentParts.push({
29
+ type: "image",
30
+ image: `data:image/${image.image_type};base64,${image.data.toString("base64")}`
31
+ });
32
+ });
33
+ }
34
+ messages.push({
35
+ role: "user",
36
+ content: userContentParts
37
+ });
38
+ return messages;
39
+ }
@@ -0,0 +1,137 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../../helpers/tests/extendedTest");
4
+ var _validators = require("../validators");
5
+ (0, _extendedTest.describe)("Check All Types Are Strings Tests", () => {
6
+ (0, _extendedTest.test)("should return true for string schema", async () => {
7
+ const schema = {
8
+ type: "string",
9
+ description: "A simple string field"
10
+ };
11
+ (0, _extendedTest.expect)((0, _validators.checkAllTypesAreStrings)(schema)).toBe(true);
12
+ });
13
+ (0, _extendedTest.test)("should return false for number schema", async () => {
14
+ const schema = {
15
+ type: "number",
16
+ description: "A number field"
17
+ };
18
+ (0, _extendedTest.expect)((0, _validators.checkAllTypesAreStrings)(schema)).toBe(false);
19
+ });
20
+ (0, _extendedTest.test)("should return false for boolean schema", async () => {
21
+ const schema = {
22
+ type: "boolean",
23
+ description: "A boolean field"
24
+ };
25
+ (0, _extendedTest.expect)((0, _validators.checkAllTypesAreStrings)(schema)).toBe(false);
26
+ });
27
+ (0, _extendedTest.test)("should return true for array of strings", async () => {
28
+ const schema = {
29
+ type: "array",
30
+ items: {
31
+ type: "string"
32
+ }
33
+ };
34
+ (0, _extendedTest.expect)((0, _validators.checkAllTypesAreStrings)(schema)).toBe(true);
35
+ });
36
+ (0, _extendedTest.test)("should return true for array with no items constraint", async () => {
37
+ const schema = {
38
+ type: "array",
39
+ items: {
40
+ type: "string"
41
+ }
42
+ };
43
+ (0, _extendedTest.expect)((0, _validators.checkAllTypesAreStrings)(schema)).toBe(true);
44
+ });
45
+ (0, _extendedTest.test)("should return true for object with all string properties", async () => {
46
+ const schema = {
47
+ type: "object",
48
+ properties: {
49
+ name: {
50
+ type: "string"
51
+ },
52
+ description: {
53
+ type: "string"
54
+ }
55
+ },
56
+ required: ["name", "description"]
57
+ };
58
+ (0, _extendedTest.expect)((0, _validators.checkAllTypesAreStrings)(schema)).toBe(true);
59
+ });
60
+ (0, _extendedTest.test)("should return false for object with mixed property types", async () => {
61
+ const schema = {
62
+ type: "object",
63
+ properties: {
64
+ name: {
65
+ type: "string"
66
+ },
67
+ age: {
68
+ type: "number"
69
+ }
70
+ },
71
+ required: ["name", "age"]
72
+ };
73
+ (0, _extendedTest.expect)((0, _validators.checkAllTypesAreStrings)(schema)).toBe(false);
74
+ });
75
+ (0, _extendedTest.test)("should return true for object with no properties", async () => {
76
+ const schema = {
77
+ type: "object",
78
+ properties: {},
79
+ required: []
80
+ };
81
+ (0, _extendedTest.expect)((0, _validators.checkAllTypesAreStrings)(schema)).toBe(true);
82
+ });
83
+ (0, _extendedTest.test)("should return true for nested object with all string properties", async () => {
84
+ const schema = {
85
+ type: "object",
86
+ properties: {
87
+ user: {
88
+ type: "object",
89
+ properties: {
90
+ firstName: {
91
+ type: "string"
92
+ },
93
+ lastName: {
94
+ type: "string"
95
+ }
96
+ },
97
+ required: ["firstName", "lastName"]
98
+ },
99
+ metadata: {
100
+ type: "object",
101
+ properties: {
102
+ tags: {
103
+ type: "array",
104
+ items: {
105
+ type: "string"
106
+ }
107
+ }
108
+ },
109
+ required: ["tags"]
110
+ }
111
+ },
112
+ required: ["user", "metadata"]
113
+ };
114
+ (0, _extendedTest.expect)((0, _validators.checkAllTypesAreStrings)(schema)).toBe(true);
115
+ });
116
+ (0, _extendedTest.test)("should return false for nested object with non-string properties", async () => {
117
+ const schema = {
118
+ type: "object",
119
+ properties: {
120
+ user: {
121
+ type: "object",
122
+ properties: {
123
+ firstName: {
124
+ type: "string"
125
+ },
126
+ age: {
127
+ type: "number"
128
+ }
129
+ },
130
+ required: ["firstName", "age"]
131
+ }
132
+ },
133
+ required: ["user"]
134
+ };
135
+ (0, _extendedTest.expect)((0, _validators.checkAllTypesAreStrings)(schema)).toBe(false);
136
+ });
137
+ });
@@ -0,0 +1,372 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../../common/extendedTest");
4
+ var _ = require("..");
5
+ var _playwrightCore = require("playwright-core");
6
+ var _dotenv = require("dotenv");
7
+ (0, _dotenv.config)();
8
+ _extendedTest.describe.skip("Extract data from content tests", () => {
9
+ let browser;
10
+ let page;
11
+ (0, _extendedTest.beforeAll)(async () => {
12
+ browser = await _playwrightCore.chromium.launch({
13
+ headless: false
14
+ });
15
+ });
16
+ (0, _extendedTest.afterAll)(async () => {
17
+ await browser.close();
18
+ });
19
+ (0, _extendedTest.beforeEach)(async () => {
20
+ page = await browser.newPage();
21
+ });
22
+ (0, _extendedTest.afterEach)(async () => {
23
+ await page.close();
24
+ });
25
+ (0, _extendedTest.describe)("Text content extraction", () => {
26
+ (0, _extendedTest.test)("should extract structured data from single text content", async () => {
27
+ const textContent = {
28
+ type: "text",
29
+ data: "John Doe, age 30, works as a Software Engineer at Tech Corp. His email is john.doe@techcorp.com and phone number is +1-555-0123."
30
+ };
31
+ const personSchema = {
32
+ type: "object",
33
+ properties: {
34
+ name: {
35
+ type: "string",
36
+ description: "Person's full name"
37
+ },
38
+ age: {
39
+ type: "number",
40
+ description: "Person's age"
41
+ },
42
+ occupation: {
43
+ type: "string",
44
+ description: "Person's job title"
45
+ },
46
+ company: {
47
+ type: "string",
48
+ description: "Company name"
49
+ },
50
+ email: {
51
+ type: "string",
52
+ description: "Email address"
53
+ },
54
+ phone: {
55
+ type: "string",
56
+ description: "Phone number"
57
+ }
58
+ },
59
+ required: ["name"]
60
+ };
61
+ const result = await (0, _.extractStructuredData)({
62
+ content: textContent,
63
+ model: "gpt-4o",
64
+ dataSchema: personSchema,
65
+ prompt: "Extract person information from the text"
66
+ });
67
+ (0, _extendedTest.expect)(result).toBeDefined();
68
+ (0, _extendedTest.expect)(result.name).toBe("John Doe");
69
+ (0, _extendedTest.expect)(result.age).toBe(30);
70
+ (0, _extendedTest.expect)(result.occupation).toContain("Software Engineer");
71
+ (0, _extendedTest.expect)(result.company).toContain("Tech Corp");
72
+ (0, _extendedTest.expect)(result.email).toBe("john.doe@techcorp.com");
73
+ (0, _extendedTest.expect)(result.phone).toContain("555-0123");
74
+ });
75
+ (0, _extendedTest.test)("should extract array data from text content", async () => {
76
+ const textContent = {
77
+ type: "text",
78
+ data: `
79
+ Product List:
80
+ 1. iPhone 15 - $999 - Apple
81
+ 2. Samsung Galaxy S24 - $899 - Samsung
82
+ 3. Google Pixel 8 - $699 - Google
83
+ `
84
+ };
85
+ const productsSchema = {
86
+ type: "object",
87
+ properties: {
88
+ products: {
89
+ type: "array",
90
+ items: {
91
+ type: "object",
92
+ properties: {
93
+ name: {
94
+ type: "string",
95
+ description: "Product name"
96
+ },
97
+ price: {
98
+ type: "string",
99
+ description: "Product price"
100
+ },
101
+ brand: {
102
+ type: "string",
103
+ description: "Product brand"
104
+ }
105
+ },
106
+ required: ["name", "price", "brand"]
107
+ }
108
+ }
109
+ },
110
+ required: ["products"]
111
+ };
112
+ const result = await (0, _.extractStructuredData)({
113
+ content: textContent,
114
+ model: "gpt-4o",
115
+ dataSchema: productsSchema,
116
+ prompt: "Extract all products with their details"
117
+ });
118
+ (0, _extendedTest.expect)(result).toBeDefined();
119
+ (0, _extendedTest.expect)(result.products).toHaveLength(3);
120
+ (0, _extendedTest.expect)(result.products[0].name).toContain("iPhone");
121
+ (0, _extendedTest.expect)(result.products[0].price).toContain("999");
122
+ (0, _extendedTest.expect)(result.products[0].brand).toBe("Apple");
123
+ });
124
+ (0, _extendedTest.test)("should handle multiple text content items", async () => {
125
+ const textContents = [{
126
+ type: "text",
127
+ data: "Customer: Alice Johnson"
128
+ }, {
129
+ type: "text",
130
+ data: "Order ID: ORD-12345"
131
+ }, {
132
+ type: "text",
133
+ data: "Total: $156.78"
134
+ }];
135
+ const orderSchema = {
136
+ type: "object",
137
+ properties: {
138
+ customer: {
139
+ type: "string",
140
+ description: "Customer name"
141
+ },
142
+ orderId: {
143
+ type: "string",
144
+ description: "Order identifier"
145
+ },
146
+ total: {
147
+ type: "string",
148
+ description: "Order total amount"
149
+ }
150
+ },
151
+ required: ["customer", "orderId", "total"]
152
+ };
153
+ const result = await (0, _.extractStructuredData)({
154
+ content: textContents,
155
+ model: "gpt-4o",
156
+ dataSchema: orderSchema,
157
+ prompt: "Extract order information from the text fragments"
158
+ });
159
+ (0, _extendedTest.expect)(result).toBeDefined();
160
+ (0, _extendedTest.expect)(result.customer).toBe("Alice Johnson");
161
+ (0, _extendedTest.expect)(result.orderId).toBe("ORD-12345");
162
+ (0, _extendedTest.expect)(result.total).toBe("$156.78");
163
+ });
164
+ });
165
+ (0, _extendedTest.describe)("Image URL content extraction", () => {
166
+ (0, _extendedTest.test)("should extract data from image URL content", async () => {
167
+ const imageContent = {
168
+ type: "image-url",
169
+ image_type: "png",
170
+ data: "https://cdn-dynmedia-1.microsoft.com/is/image/microsoftcorp/2-accordion-3-800x513-1?resMode=sharp2&op_usm=1.5,0.65,15,0&wid=1664&hei=1062&qlt=100&fmt=png-alpha&fit=constrain"
171
+ };
172
+ const imageSchema = {
173
+ type: "array",
174
+ items: {
175
+ type: "string",
176
+ description: "Todo item shown in the middle panel"
177
+ }
178
+ };
179
+ const result = await (0, _.extractStructuredData)({
180
+ content: imageContent,
181
+ model: "gpt-4o",
182
+ dataSchema: imageSchema,
183
+ prompt: "Extract todo items from the image, they are shown in the middle panel"
184
+ });
185
+ (0, _extendedTest.expect)(result).toBeDefined();
186
+ (0, _extendedTest.expect)(result[0]).toContain("Yoga");
187
+ });
188
+ (0, _extendedTest.test)("should handle invalid image URLs gracefully", async () => {
189
+ const imageContent = {
190
+ type: "image-url",
191
+ image_type: "png",
192
+ data: "https://invalid-url-com/image.png"
193
+ };
194
+ const schema = {
195
+ type: "object",
196
+ properties: {
197
+ description: {
198
+ type: "string"
199
+ }
200
+ }
201
+ };
202
+ await (0, _extendedTest.expect)((0, _.extractStructuredData)({
203
+ content: imageContent,
204
+ model: "gpt-4o",
205
+ dataSchema: schema
206
+ })).rejects.toThrow();
207
+ });
208
+ });
209
+ (0, _extendedTest.describe)("Mixed content extraction", () => {
210
+ (0, _extendedTest.test)("should extract data from mixed text and image content", async () => {
211
+ const mixedContent = [{
212
+ type: "text",
213
+ data: "Product: iPhone 15 Pro Max"
214
+ }, {
215
+ type: "text",
216
+ data: "Price: $1,199"
217
+ }, {
218
+ type: "image-url",
219
+ image_type: "png",
220
+ data: "https://cdn11.bigcommerce.com/s-scmrv6kkrz/images/stencil/1280x1280/products/192719/166712/1891-clear-14-vdc-miniature-lb93__16581.1568390384.jpg?c=2"
221
+ }];
222
+ const productSchema = {
223
+ type: "object",
224
+ properties: {
225
+ name: {
226
+ type: "string",
227
+ description: "Product name"
228
+ },
229
+ price: {
230
+ type: "string",
231
+ description: "Product price"
232
+ },
233
+ imageDescription: {
234
+ type: "string",
235
+ description: "Description of the product image"
236
+ }
237
+ },
238
+ required: ["name", "price"]
239
+ };
240
+ const result = await (0, _.extractStructuredData)({
241
+ content: mixedContent,
242
+ model: "gpt-4o",
243
+ dataSchema: productSchema,
244
+ prompt: "Extract product information from both text and image"
245
+ });
246
+ (0, _extendedTest.expect)(result).toBeDefined();
247
+ (0, _extendedTest.expect)(result.name).toContain("iPhone");
248
+ (0, _extendedTest.expect)(result.price).toContain("1,199");
249
+ (0, _extendedTest.expect)(result.imageDescription).toBeDefined();
250
+ });
251
+ });
252
+ (0, _extendedTest.describe)("Input validation and error handling", () => {
253
+ (0, _extendedTest.test)("should validate data schema", async () => {
254
+ const textContent = {
255
+ type: "text",
256
+ data: "test data"
257
+ };
258
+ await (0, _extendedTest.expect)((0, _.extractStructuredData)({
259
+ content: textContent,
260
+ dataSchema: "invalid schema",
261
+ model: "gpt-4o"
262
+ })).rejects.toThrow();
263
+ });
264
+ (0, _extendedTest.test)("should validate model parameter", async () => {
265
+ const textContent = {
266
+ type: "text",
267
+ data: "test data"
268
+ };
269
+ await (0, _extendedTest.expect)((0, _.extractStructuredData)({
270
+ content: textContent,
271
+ dataSchema: {
272
+ type: "object",
273
+ properties: {}
274
+ },
275
+ model: "invalid-model"
276
+ })).rejects.toThrow();
277
+ });
278
+ (0, _extendedTest.test)("should handle empty content gracefully", async () => {
279
+ const textContent = {
280
+ type: "text",
281
+ data: ""
282
+ };
283
+ const schema = {
284
+ type: "object",
285
+ properties: {
286
+ message: {
287
+ type: "string",
288
+ description: "Any message found"
289
+ }
290
+ }
291
+ };
292
+ const result = await (0, _.extractStructuredData)({
293
+ content: textContent,
294
+ model: "gpt-4o",
295
+ dataSchema: schema,
296
+ prompt: "Extract any information from the content"
297
+ });
298
+ (0, _extendedTest.expect)(result).toBeDefined();
299
+ });
300
+ (0, _extendedTest.test)("should handle empty array content", async () => {
301
+ const schema = {
302
+ type: "object",
303
+ properties: {
304
+ message: {
305
+ type: "string",
306
+ description: "Any message found"
307
+ }
308
+ }
309
+ };
310
+ await (0, _extendedTest.expect)((0, _.extractStructuredData)({
311
+ content: [],
312
+ model: "gpt-4o",
313
+ dataSchema: schema
314
+ })).rejects.toThrow();
315
+ });
316
+ });
317
+ (0, _extendedTest.describe)("Caching behavior", () => {
318
+ (0, _extendedTest.test)("should support cache enabling/disabling", async () => {
319
+ const textContent = {
320
+ type: "text",
321
+ data: "Test content for caching"
322
+ };
323
+ const schema = {
324
+ type: "object",
325
+ properties: {
326
+ content: {
327
+ type: "string",
328
+ description: "Content summary"
329
+ }
330
+ }
331
+ };
332
+ const result1 = await (0, _.extractStructuredData)({
333
+ content: textContent,
334
+ model: "gpt-4o",
335
+ dataSchema: schema,
336
+ enableCache: true
337
+ });
338
+ const result2 = await (0, _.extractStructuredData)({
339
+ content: textContent,
340
+ model: "gpt-4o",
341
+ dataSchema: schema,
342
+ enableCache: false
343
+ });
344
+ (0, _extendedTest.expect)(result1).toBeDefined();
345
+ (0, _extendedTest.expect)(result2).toBeDefined();
346
+ });
347
+ });
348
+ (0, _extendedTest.describe)("Retry behavior", () => {
349
+ (0, _extendedTest.test)("should support maxRetries parameter", async () => {
350
+ const textContent = {
351
+ type: "text",
352
+ data: "Test content for retry behavior"
353
+ };
354
+ const schema = {
355
+ type: "object",
356
+ properties: {
357
+ content: {
358
+ type: "string",
359
+ description: "Content summary"
360
+ }
361
+ }
362
+ };
363
+ const result = await (0, _.extractStructuredData)({
364
+ content: textContent,
365
+ model: "o4-mini",
366
+ dataSchema: schema,
367
+ maxRetries: 1
368
+ });
369
+ (0, _extendedTest.expect)(result).toBeDefined();
370
+ });
371
+ });
372
+ });